aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/trace/kmemtrace.txt126
-rw-r--r--MAINTAINERS7
-rw-r--r--Makefile4
-rw-r--r--arch/alpha/include/asm/local64.h1
-rw-r--r--arch/arm/include/asm/local64.h1
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/avr32/include/asm/local64.h1
-rw-r--r--arch/blackfin/include/asm/local64.h1
-rw-r--r--arch/cris/include/asm/local64.h1
-rw-r--r--arch/frv/include/asm/local64.h1
-rw-r--r--arch/frv/kernel/local64.h1
-rw-r--r--arch/h8300/include/asm/local64.h1
-rw-r--r--arch/ia64/include/asm/local64.h1
-rw-r--r--arch/m32r/include/asm/local64.h1
-rw-r--r--arch/m68k/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/mips/include/asm/local64.h1
-rw-r--r--arch/mn10300/include/asm/local64.h1
-rw-r--r--arch/parisc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/perf_event.h12
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/score/include/asm/local64.h1
-rw-r--r--arch/sh/include/asm/local64.h1
-rw-r--r--arch/sh/kernel/perf_event.c6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/perf_event.h8
-rw-r--r--arch/sparc/kernel/helpers.S6
-rw-r--r--arch/sparc/kernel/perf_event.c25
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c9
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/xtensa/include/asm/local64.h1
-rw-r--r--fs/exec.c1
-rw-r--r--include/asm-generic/local64.h96
-rw-r--r--include/linux/ftrace_event.h3
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/perf_event.h89
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/trace/boot.h60
-rw-r--r--include/trace/ftrace.h2
-rw-r--r--init/main.c29
-rw-r--r--kernel/perf_event.c458
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/trace/Kconfig37
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/kmemtrace.c529
-rw-r--r--kernel/trace/ring_buffer.c38
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/trace/trace.h68
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_entries.h62
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_events.c5
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_sched_wakeup.c5
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/trace/trace_sysprof.c7
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c1
-rw-r--r--scripts/package/Makefile37
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Documentation/perf-probe.txt4
-rw-r--r--tools/perf/Documentation/perf-record.txt13
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST12
-rw-r--r--tools/perf/Makefile109
-rw-r--r--tools/perf/builtin-annotate.c6
-rw-r--r--tools/perf/builtin-buildid-cache.c3
-rw-r--r--tools/perf/builtin-diff.c7
-rw-r--r--tools/perf/builtin-probe.c2
-rw-r--r--tools/perf/builtin-record.c35
-rw-r--r--tools/perf/builtin-report.c25
-rw-r--r--tools/perf/builtin-stat.c14
-rw-r--r--tools/perf/builtin-top.c20
-rw-r--r--tools/perf/feature-tests.mak119
-rw-r--r--tools/perf/perf-archive.sh20
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/util/build-id.c10
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/config.c64
-rw-r--r--tools/perf/util/cpumap.c57
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c10
-rw-r--r--tools/perf/util/event.c37
-rw-r--r--tools/perf/util/event.h5
-rw-r--r--tools/perf/util/header.c13
-rw-r--r--tools/perf/util/hist.c3
-rw-r--r--tools/perf/util/probe-finder.c58
-rw-r--r--tools/perf/util/session.c6
-rw-r--r--tools/perf/util/sort.c27
-rw-r--r--tools/perf/util/sort.h6
-rw-r--r--tools/perf/util/symbol.c37
-rw-r--r--tools/perf/util/symbol.h6
-rw-r--r--tools/perf/util/util.h3
113 files changed, 1268 insertions, 1884 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644
index 5e6a92a02d85..000000000000
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ /dev/null
@@ -1,71 +0,0 @@
1What: /sys/kernel/debug/kmemtrace/
2Date: July 2008
3Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
4Description:
5
6In kmemtrace-enabled kernels, the following files are created:
7
8/sys/kernel/debug/kmemtrace/
9 cpu<n> (0400) Per-CPU tracing data, see below. (binary)
10 total_overruns (0400) Total number of bytes which were dropped from
11 cpu<n> files because of full buffer condition,
12 non-binary. (text)
13 abi_version (0400) Kernel's kmemtrace ABI version. (text)
14
15Each per-CPU file should be read according to the relay interface. That is,
16the reader should set affinity to that specific CPU and, as currently done by
17the userspace application (though there are other methods), use poll() with
18an infinite timeout before every read(). Otherwise, erroneous data may be
19read. The binary data has the following _core_ format:
20
21 Event ID (1 byte) Unsigned integer, one of:
22 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
23 1 - represents a freeing of previously allocated memory
24 (KMEMTRACE_EVENT_FREE)
25 Type ID (1 byte) Unsigned integer, one of:
26 0 - this is a kmalloc() / kfree()
27 1 - this is a kmem_cache_alloc() / kmem_cache_free()
28 2 - this is a __get_free_pages() et al.
29 Event size (2 bytes) Unsigned integer representing the
30 size of this event. Used to extend
31 kmemtrace. Discard the bytes you
32 don't know about.
33 Sequence number (4 bytes) Signed integer used to reorder data
34 logged on SMP machines. Wraparound
35 must be taken into account, although
36 it is unlikely.
37 Caller address (8 bytes) Return address to the caller.
38 Pointer to mem (8 bytes) Pointer to target memory area. Can be
39 NULL, but not all such calls might be
40 recorded.
41
42In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
43
44 Requested bytes (8 bytes) Total number of requested bytes,
45 unsigned, must not be zero.
46 Allocated bytes (8 bytes) Total number of actually allocated
47 bytes, unsigned, must not be lower
48 than requested bytes.
49 Requested flags (4 bytes) GFP flags supplied by the caller.
50 Target CPU (4 bytes) Signed integer, valid for event id 1.
51 If equal to -1, target CPU is the same
52 as origin CPU, but the reverse might
53 not be true.
54
55The data is made available in the same endianness the machine has.
56
57Other event ids and type ids may be defined and added. Other fields may be
58added by increasing event size, but see below for details.
59Every modification to the ABI, including new id definitions, are followed
60by bumping the ABI version by one.
61
62Adding new data to the packet (features) is done at the end of the mandatory
63data:
64 Feature size (2 byte)
65 Feature ID (1 byte)
66 Feature data (Feature size - 3 bytes)
67
68
69Users:
70 kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
71
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644
index 6308735e58ca..000000000000
--- a/Documentation/trace/kmemtrace.txt
+++ /dev/null
@@ -1,126 +0,0 @@
1 kmemtrace - Kernel Memory Tracer
2
3 by Eduard - Gabriel Munteanu
4 <eduard.munteanu@linux360.ro>
5
6I. Introduction
7===============
8
9kmemtrace helps kernel developers figure out two things:
101) how different allocators (SLAB, SLUB etc.) perform
112) how kernel code allocates memory and how much
12
13To do this, we trace every allocation and export information to the userspace
14through the relay interface. We export things such as the number of requested
15bytes, the number of bytes actually allocated (i.e. including internal
16fragmentation), whether this is a slab allocation or a plain kmalloc() and so
17on.
18
19The actual analysis is performed by a userspace tool (see section III for
20details on where to get it from). It logs the data exported by the kernel,
21processes it and (as of writing this) can provide the following information:
22- the total amount of memory allocated and fragmentation per call-site
23- the amount of memory allocated and fragmentation per allocation
24- total memory allocated and fragmentation in the collected dataset
25- number of cross-CPU allocation and frees (makes sense in NUMA environments)
26
27Moreover, it can potentially find inconsistent and erroneous behavior in
28kernel code, such as using slab free functions on kmalloc'ed memory or
29allocating less memory than requested (but not truly failed allocations).
30
31kmemtrace also makes provisions for tracing on some arch and analysing the
32data on another.
33
34II. Design and goals
35====================
36
37kmemtrace was designed to handle rather large amounts of data. Thus, it uses
38the relay interface to export whatever is logged to userspace, which then
39stores it. Analysis and reporting is done asynchronously, that is, after the
40data is collected and stored. By design, it allows one to log and analyse
41on different machines and different arches.
42
43As of writing this, the ABI is not considered stable, though it might not
44change much. However, no guarantees are made about compatibility yet. When
45deemed stable, the ABI should still allow easy extension while maintaining
46backward compatibility. This is described further in Documentation/ABI.
47
48Summary of design goals:
49 - allow logging and analysis to be done across different machines
50 - be fast and anticipate usage in high-load environments (*)
51 - be reasonably extensible
52 - make it possible for GNU/Linux distributions to have kmemtrace
53 included in their repositories
54
55(*) - one of the reasons Pekka Enberg's original userspace data analysis
56 tool's code was rewritten from Perl to C (although this is more than a
57 simple conversion)
58
59
60III. Quick usage guide
61======================
62
631) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
64CONFIG_KMEMTRACE).
65
662) Get the userspace tool and build it:
67$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
68$ cd kmemtrace-user/
69$ ./autogen.sh
70$ ./configure
71$ make
72
733) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
74'single' runlevel (so that relay buffers don't fill up easily), and run
75kmemtrace:
76# '$' does not mean user, but root here.
77$ mount -t debugfs none /sys/kernel/debug
78$ mount -t proc none /proc
79$ cd path/to/kmemtrace-user/
80$ ./kmemtraced
81Wait a bit, then stop it with CTRL+C.
82$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
83 # overrun, should
84 # be zero.
85$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
86 check its correctness]
87$ ./kmemtrace-report
88
89Now you should have a nice and short summary of how the allocator performs.
90
91IV. FAQ and known issues
92========================
93
94Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
95this? Should I worry?
96A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
97large the number is. You can fix it by supplying a higher
98'kmemtrace.subbufs=N' kernel parameter.
99---
100
101Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
102A: This is a bug and should be reported. It can occur for a variety of
103reasons:
104 - possible bugs in relay code
105 - possible misuse of relay by kmemtrace
106 - timestamps being collected unorderly
107Or you may fix it yourself and send us a patch.
108---
109
110Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
111A: This is a known issue and I'm working on it. These might be true errors
112in kernel code, which may have inconsistent behavior (e.g. allocating memory
113with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
114out this behavior may work with SLAB, but may fail with other allocators.
115
116It may also be due to lack of tracing in some unusual allocator functions.
117
118We don't want bug reports regarding this issue yet.
119---
120
121V. See also
122===========
123
124Documentation/kernel-parameters.txt
125Documentation/ABI/testing/debugfs-kmemtrace
126
diff --git a/MAINTAINERS b/MAINTAINERS
index 6d119c98b89b..b7ae5fb9f8bc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3368,13 +3368,6 @@ F: include/linux/kmemleak.h
3368F: mm/kmemleak.c 3368F: mm/kmemleak.c
3369F: mm/kmemleak-test.c 3369F: mm/kmemleak-test.c
3370 3370
3371KMEMTRACE
3372M: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
3373S: Maintained
3374F: Documentation/trace/kmemtrace.txt
3375F: include/linux/kmemtrace.h
3376F: kernel/trace/kmemtrace.c
3377
3378KPROBES 3371KPROBES
3379M: Ananth N Mavinakayanahalli <ananth@in.ibm.com> 3372M: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
3380M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 3373M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
diff --git a/Makefile b/Makefile
index d49d96c35ce5..865718f4bcea 100644
--- a/Makefile
+++ b/Makefile
@@ -414,7 +414,7 @@ endif
414no-dot-config-targets := clean mrproper distclean \ 414no-dot-config-targets := clean mrproper distclean \
415 cscope TAGS tags help %docs check% \ 415 cscope TAGS tags help %docs check% \
416 include/linux/version.h headers_% \ 416 include/linux/version.h headers_% \
417 kernelrelease kernelversion 417 kernelrelease kernelversion %src-pkg
418 418
419config-targets := 0 419config-targets := 0
420mixed-targets := 0 420mixed-targets := 0
@@ -1228,6 +1228,8 @@ distclean: mrproper
1228# rpm target kept for backward compatibility 1228# rpm target kept for backward compatibility
1229package-dir := $(srctree)/scripts/package 1229package-dir := $(srctree)/scripts/package
1230 1230
1231%src-pkg: FORCE
1232 $(Q)$(MAKE) $(build)=$(package-dir) $@
1231%pkg: include/config/kernel.release FORCE 1233%pkg: include/config/kernel.release FORCE
1232 $(Q)$(MAKE) $(build)=$(package-dir) $@ 1234 $(Q)$(MAKE) $(build)=$(package-dir) $@
1233rpm: include/config/kernel.release FORCE 1235rpm: include/config/kernel.release FORCE
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index c45768614c8a..5b7cfafc0720 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
164 struct hw_perf_event *hwc, 164 struct hw_perf_event *hwc,
165 int idx) 165 int idx)
166{ 166{
167 s64 left = atomic64_read(&hwc->period_left); 167 s64 left = local64_read(&hwc->period_left);
168 s64 period = hwc->sample_period; 168 s64 period = hwc->sample_period;
169 int ret = 0; 169 int ret = 0;
170 170
171 if (unlikely(left <= -period)) { 171 if (unlikely(left <= -period)) {
172 left = period; 172 left = period;
173 atomic64_set(&hwc->period_left, left); 173 local64_set(&hwc->period_left, left);
174 hwc->last_period = period; 174 hwc->last_period = period;
175 ret = 1; 175 ret = 1;
176 } 176 }
177 177
178 if (unlikely(left <= 0)) { 178 if (unlikely(left <= 0)) {
179 left += period; 179 left += period;
180 atomic64_set(&hwc->period_left, left); 180 local64_set(&hwc->period_left, left);
181 hwc->last_period = period; 181 hwc->last_period = period;
182 ret = 1; 182 ret = 1;
183 } 183 }
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
185 if (left > (s64)armpmu->max_period) 185 if (left > (s64)armpmu->max_period)
186 left = armpmu->max_period; 186 left = armpmu->max_period;
187 187
188 atomic64_set(&hwc->prev_count, (u64)-left); 188 local64_set(&hwc->prev_count, (u64)-left);
189 189
190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 190 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
191 191
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
204 s64 delta; 204 s64 delta;
205 205
206again: 206again:
207 prev_raw_count = atomic64_read(&hwc->prev_count); 207 prev_raw_count = local64_read(&hwc->prev_count);
208 new_raw_count = armpmu->read_counter(idx); 208 new_raw_count = armpmu->read_counter(idx);
209 209
210 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 210 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
211 new_raw_count) != prev_raw_count) 211 new_raw_count) != prev_raw_count)
212 goto again; 212 goto again;
213 213
214 delta = (new_raw_count << shift) - (prev_raw_count << shift); 214 delta = (new_raw_count << shift) - (prev_raw_count << shift);
215 delta >>= shift; 215 delta >>= shift;
216 216
217 atomic64_add(delta, &event->count); 217 local64_add(delta, &event->count);
218 atomic64_sub(delta, &hwc->period_left); 218 local64_sub(delta, &hwc->period_left);
219 219
220 return new_raw_count; 220 return new_raw_count;
221} 221}
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
478 if (!hwc->sample_period) { 478 if (!hwc->sample_period) {
479 hwc->sample_period = armpmu->max_period; 479 hwc->sample_period = armpmu->max_period;
480 hwc->last_period = hwc->sample_period; 480 hwc->last_period = hwc->sample_period;
481 atomic64_set(&hwc->period_left, hwc->sample_period); 481 local64_set(&hwc->period_left, hwc->sample_period);
482 } 482 }
483 483
484 err = 0; 484 err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b126..5c16b891d501 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
21#ifdef CONFIG_FSL_EMB_PERF_EVENT 21#ifdef CONFIG_FSL_EMB_PERF_EVENT
22#include <asm/perf_event_fsl_emb.h> 22#include <asm/perf_event_fsl_emb.h>
23#endif 23#endif
24
25#ifdef CONFIG_PERF_EVENTS
26#include <asm/ptrace.h>
27#include <asm/reg.h>
28
29#define perf_arch_fetch_caller_regs(regs, __ip) \
30 do { \
31 (regs)->nip = __ip; \
32 (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
33 asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
34 } while (0)
35#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a556..2d29752cbe16 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
127_GLOBAL(__restore_cpu_power7) 127_GLOBAL(__restore_cpu_power7)
128 /* place holder */ 128 /* place holder */
129 blr 129 blr
130
131/*
132 * Get a minimal set of registers for our caller's nth caller.
133 * r3 = regs pointer, r5 = n.
134 *
135 * We only get R1 (stack pointer), NIP (next instruction pointer)
136 * and LR (link register). These are all we can get in the
137 * general case without doing complicated stack unwinding, but
138 * fortunately they are enough to do a stack backtrace, which
139 * is all we need them for.
140 */
141_GLOBAL(perf_arch_fetch_caller_regs)
142 mr r6,r1
143 cmpwi r5,0
144 mflr r4
145 ble 2f
146 mtctr r5
1471: PPC_LL r6,0(r6)
148 bdnz 1b
149 PPC_LL r4,PPC_LR_STKOFF(r6)
1502: PPC_LL r7,0(r6)
151 PPC_LL r7,PPC_LR_STKOFF(r7)
152 PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
153 PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
154 PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
155 blr
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 43b83c35cf54..af1d9a7c65d1 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
410 * Therefore we treat them like NMIs. 410 * Therefore we treat them like NMIs.
411 */ 411 */
412 do { 412 do {
413 prev = atomic64_read(&event->hw.prev_count); 413 prev = local64_read(&event->hw.prev_count);
414 barrier(); 414 barrier();
415 val = read_pmc(event->hw.idx); 415 val = read_pmc(event->hw.idx);
416 } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 416 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
417 417
418 /* The counters are only 32 bits wide */ 418 /* The counters are only 32 bits wide */
419 delta = (val - prev) & 0xfffffffful; 419 delta = (val - prev) & 0xfffffffful;
420 atomic64_add(delta, &event->count); 420 local64_add(delta, &event->count);
421 atomic64_sub(delta, &event->hw.period_left); 421 local64_sub(delta, &event->hw.period_left);
422} 422}
423 423
424/* 424/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
444 if (!event->hw.idx) 444 if (!event->hw.idx)
445 continue; 445 continue;
446 val = (event->hw.idx == 5) ? pmc5 : pmc6; 446 val = (event->hw.idx == 5) ? pmc5 : pmc6;
447 prev = atomic64_read(&event->hw.prev_count); 447 prev = local64_read(&event->hw.prev_count);
448 event->hw.idx = 0; 448 event->hw.idx = 0;
449 delta = (val - prev) & 0xfffffffful; 449 delta = (val - prev) & 0xfffffffful;
450 atomic64_add(delta, &event->count); 450 local64_add(delta, &event->count);
451 } 451 }
452} 452}
453 453
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
462 event = cpuhw->limited_counter[i]; 462 event = cpuhw->limited_counter[i];
463 event->hw.idx = cpuhw->limited_hwidx[i]; 463 event->hw.idx = cpuhw->limited_hwidx[i];
464 val = (event->hw.idx == 5) ? pmc5 : pmc6; 464 val = (event->hw.idx == 5) ? pmc5 : pmc6;
465 atomic64_set(&event->hw.prev_count, val); 465 local64_set(&event->hw.prev_count, val);
466 perf_event_update_userpage(event); 466 perf_event_update_userpage(event);
467 } 467 }
468} 468}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
666 } 666 }
667 val = 0; 667 val = 0;
668 if (event->hw.sample_period) { 668 if (event->hw.sample_period) {
669 left = atomic64_read(&event->hw.period_left); 669 left = local64_read(&event->hw.period_left);
670 if (left < 0x80000000L) 670 if (left < 0x80000000L)
671 val = 0x80000000L - left; 671 val = 0x80000000L - left;
672 } 672 }
673 atomic64_set(&event->hw.prev_count, val); 673 local64_set(&event->hw.prev_count, val);
674 event->hw.idx = idx; 674 event->hw.idx = idx;
675 write_pmc(idx, val); 675 write_pmc(idx, val);
676 perf_event_update_userpage(event); 676 perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
754 * skip the schedulability test here, it will be peformed 754 * skip the schedulability test here, it will be peformed
755 * at commit time(->commit_txn) as a whole 755 * at commit time(->commit_txn) as a whole
756 */ 756 */
757 if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) 757 if (cpuhw->group_flag & PERF_EVENT_TXN)
758 goto nocheck; 758 goto nocheck;
759 759
760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) 760 if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -842,8 +842,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
842 if (left < 0x80000000L) 842 if (left < 0x80000000L)
843 val = 0x80000000L - left; 843 val = 0x80000000L - left;
844 write_pmc(event->hw.idx, val); 844 write_pmc(event->hw.idx, val);
845 atomic64_set(&event->hw.prev_count, val); 845 local64_set(&event->hw.prev_count, val);
846 atomic64_set(&event->hw.period_left, left); 846 local64_set(&event->hw.period_left, left);
847 perf_event_update_userpage(event); 847 perf_event_update_userpage(event);
848 perf_enable(); 848 perf_enable();
849 local_irq_restore(flags); 849 local_irq_restore(flags);
@@ -858,7 +858,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
858{ 858{
859 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 859 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
860 860
861 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 861 cpuhw->group_flag |= PERF_EVENT_TXN;
862 cpuhw->n_txn_start = cpuhw->n_events; 862 cpuhw->n_txn_start = cpuhw->n_events;
863} 863}
864 864
@@ -871,7 +871,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
871{ 871{
872 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 872 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
873 873
874 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 874 cpuhw->group_flag &= ~PERF_EVENT_TXN;
875} 875}
876 876
877/* 877/*
@@ -897,6 +897,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
897 for (i = cpuhw->n_txn_start; i < n; ++i) 897 for (i = cpuhw->n_txn_start; i < n; ++i)
898 cpuhw->event[i]->hw.config = cpuhw->events[i]; 898 cpuhw->event[i]->hw.config = cpuhw->events[i];
899 899
900 cpuhw->group_flag &= ~PERF_EVENT_TXN;
900 return 0; 901 return 0;
901} 902}
902 903
@@ -1108,7 +1109,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1108 event->hw.config = events[n]; 1109 event->hw.config = events[n];
1109 event->hw.event_base = cflags[n]; 1110 event->hw.event_base = cflags[n];
1110 event->hw.last_period = event->hw.sample_period; 1111 event->hw.last_period = event->hw.sample_period;
1111 atomic64_set(&event->hw.period_left, event->hw.last_period); 1112 local64_set(&event->hw.period_left, event->hw.last_period);
1112 1113
1113 /* 1114 /*
1114 * See if we need to reserve the PMU. 1115 * See if we need to reserve the PMU.
@@ -1146,16 +1147,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1146 int record = 0; 1147 int record = 0;
1147 1148
1148 /* we don't have to worry about interrupts here */ 1149 /* we don't have to worry about interrupts here */
1149 prev = atomic64_read(&event->hw.prev_count); 1150 prev = local64_read(&event->hw.prev_count);
1150 delta = (val - prev) & 0xfffffffful; 1151 delta = (val - prev) & 0xfffffffful;
1151 atomic64_add(delta, &event->count); 1152 local64_add(delta, &event->count);
1152 1153
1153 /* 1154 /*
1154 * See if the total period for this event has expired, 1155 * See if the total period for this event has expired,
1155 * and update for the next period. 1156 * and update for the next period.
1156 */ 1157 */
1157 val = 0; 1158 val = 0;
1158 left = atomic64_read(&event->hw.period_left) - delta; 1159 left = local64_read(&event->hw.period_left) - delta;
1159 if (period) { 1160 if (period) {
1160 if (left <= 0) { 1161 if (left <= 0) {
1161 left += period; 1162 left += period;
@@ -1193,8 +1194,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1193 } 1194 }
1194 1195
1195 write_pmc(event->hw.idx, val); 1196 write_pmc(event->hw.idx, val);
1196 atomic64_set(&event->hw.prev_count, val); 1197 local64_set(&event->hw.prev_count, val);
1197 atomic64_set(&event->hw.period_left, left); 1198 local64_set(&event->hw.period_left, left);
1198 perf_event_update_userpage(event); 1199 perf_event_update_userpage(event);
1199} 1200}
1200 1201
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5d..7a3dc3567258 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
185 * this is the simplest approach for maintaining consistency. 185 * this is the simplest approach for maintaining consistency.
186 */ 186 */
187again: 187again:
188 prev_raw_count = atomic64_read(&hwc->prev_count); 188 prev_raw_count = local64_read(&hwc->prev_count);
189 new_raw_count = sh_pmu->read(idx); 189 new_raw_count = sh_pmu->read(idx);
190 190
191 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 191 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
192 new_raw_count) != prev_raw_count) 192 new_raw_count) != prev_raw_count)
193 goto again; 193 goto again;
194 194
@@ -203,7 +203,7 @@ again:
203 delta = (new_raw_count << shift) - (prev_raw_count << shift); 203 delta = (new_raw_count << shift) - (prev_raw_count << shift);
204 delta >>= shift; 204 delta >>= shift;
205 205
206 atomic64_add(delta, &event->count); 206 local64_add(delta, &event->count);
207} 207}
208 208
209static void sh_pmu_disable(struct perf_event *event) 209static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce8..74c4e0cd889c 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
6#define PERF_EVENT_INDEX_OFFSET 0 6#define PERF_EVENT_INDEX_OFFSET 0
7 7
8#ifdef CONFIG_PERF_EVENTS 8#ifdef CONFIG_PERF_EVENTS
9#include <asm/ptrace.h>
10
9extern void init_hw_perf_events(void); 11extern void init_hw_perf_events(void);
12
13extern void
14__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
15
16#define perf_arch_fetch_caller_regs(pt_regs, ip) \
17 __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
10#else 18#else
11static inline void init_hw_perf_events(void) { } 19static inline void init_hw_perf_events(void) { }
12#endif 20#endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e829..682fee06a16b 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
47 .size stack_trace_flush,.-stack_trace_flush 47 .size stack_trace_flush,.-stack_trace_flush
48 48
49#ifdef CONFIG_PERF_EVENTS 49#ifdef CONFIG_PERF_EVENTS
50 .globl perf_arch_fetch_caller_regs 50 .globl __perf_arch_fetch_caller_regs
51 .type perf_arch_fetch_caller_regs,#function 51 .type __perf_arch_fetch_caller_regs,#function
52perf_arch_fetch_caller_regs: 52__perf_arch_fetch_caller_regs:
53 /* We always read the %pstate into %o5 since we will use 53 /* We always read the %pstate into %o5 since we will use
54 * that to construct a fake %tstate to store into the regs. 54 * that to construct a fake %tstate to store into the regs.
55 */ 55 */
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 0ec92c8861dd..8a6660da8e08 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
572 s64 delta; 572 s64 delta;
573 573
574again: 574again:
575 prev_raw_count = atomic64_read(&hwc->prev_count); 575 prev_raw_count = local64_read(&hwc->prev_count);
576 new_raw_count = read_pmc(idx); 576 new_raw_count = read_pmc(idx);
577 577
578 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 578 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
579 new_raw_count) != prev_raw_count) 579 new_raw_count) != prev_raw_count)
580 goto again; 580 goto again;
581 581
582 delta = (new_raw_count << shift) - (prev_raw_count << shift); 582 delta = (new_raw_count << shift) - (prev_raw_count << shift);
583 delta >>= shift; 583 delta >>= shift;
584 584
585 atomic64_add(delta, &event->count); 585 local64_add(delta, &event->count);
586 atomic64_sub(delta, &hwc->period_left); 586 local64_sub(delta, &hwc->period_left);
587 587
588 return new_raw_count; 588 return new_raw_count;
589} 589}
@@ -591,27 +591,27 @@ again:
591static int sparc_perf_event_set_period(struct perf_event *event, 591static int sparc_perf_event_set_period(struct perf_event *event,
592 struct hw_perf_event *hwc, int idx) 592 struct hw_perf_event *hwc, int idx)
593{ 593{
594 s64 left = atomic64_read(&hwc->period_left); 594 s64 left = local64_read(&hwc->period_left);
595 s64 period = hwc->sample_period; 595 s64 period = hwc->sample_period;
596 int ret = 0; 596 int ret = 0;
597 597
598 if (unlikely(left <= -period)) { 598 if (unlikely(left <= -period)) {
599 left = period; 599 left = period;
600 atomic64_set(&hwc->period_left, left); 600 local64_set(&hwc->period_left, left);
601 hwc->last_period = period; 601 hwc->last_period = period;
602 ret = 1; 602 ret = 1;
603 } 603 }
604 604
605 if (unlikely(left <= 0)) { 605 if (unlikely(left <= 0)) {
606 left += period; 606 left += period;
607 atomic64_set(&hwc->period_left, left); 607 local64_set(&hwc->period_left, left);
608 hwc->last_period = period; 608 hwc->last_period = period;
609 ret = 1; 609 ret = 1;
610 } 610 }
611 if (left > MAX_PERIOD) 611 if (left > MAX_PERIOD)
612 left = MAX_PERIOD; 612 left = MAX_PERIOD;
613 613
614 atomic64_set(&hwc->prev_count, (u64)-left); 614 local64_set(&hwc->prev_count, (u64)-left);
615 615
616 write_pmc(idx, (u64)(-left) & 0xffffffff); 616 write_pmc(idx, (u64)(-left) & 0xffffffff);
617 617
@@ -1005,7 +1005,7 @@ static int sparc_pmu_enable(struct perf_event *event)
1005 * skip the schedulability test here, it will be peformed 1005 * skip the schedulability test here, it will be peformed
1006 * at commit time(->commit_txn) as a whole 1006 * at commit time(->commit_txn) as a whole
1007 */ 1007 */
1008 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1008 if (cpuc->group_flag & PERF_EVENT_TXN)
1009 goto nocheck; 1009 goto nocheck;
1010 1010
1011 if (check_excludes(cpuc->event, n0, 1)) 1011 if (check_excludes(cpuc->event, n0, 1))
@@ -1087,7 +1087,7 @@ static int __hw_perf_event_init(struct perf_event *event)
1087 if (!hwc->sample_period) { 1087 if (!hwc->sample_period) {
1088 hwc->sample_period = MAX_PERIOD; 1088 hwc->sample_period = MAX_PERIOD;
1089 hwc->last_period = hwc->sample_period; 1089 hwc->last_period = hwc->sample_period;
1090 atomic64_set(&hwc->period_left, hwc->sample_period); 1090 local64_set(&hwc->period_left, hwc->sample_period);
1091 } 1091 }
1092 1092
1093 return 0; 1093 return 0;
@@ -1102,7 +1102,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
1102{ 1102{
1103 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1103 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1104 1104
1105 cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; 1105 cpuhw->group_flag |= PERF_EVENT_TXN;
1106} 1106}
1107 1107
1108/* 1108/*
@@ -1114,7 +1114,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
1114{ 1114{
1115 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); 1115 struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
1116 1116
1117 cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; 1117 cpuhw->group_flag &= ~PERF_EVENT_TXN;
1118} 1118}
1119 1119
1120/* 1120/*
@@ -1137,6 +1137,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
1137 if (sparc_check_constraints(cpuc->event, cpuc->events, n)) 1137 if (sparc_check_constraints(cpuc->event, cpuc->events, n))
1138 return -EAGAIN; 1138 return -EAGAIN;
1139 1139
1140 cpuc->group_flag &= ~PERF_EVENT_TXN;
1140 return 0; 1141 return 0;
1141} 1142}
1142 1143
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e0..6e742cc4251b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
68 68
69union cpuid10_edx { 69union cpuid10_edx {
70 struct { 70 struct {
71 unsigned int num_counters_fixed:4; 71 unsigned int num_counters_fixed:5;
72 unsigned int reserved:28; 72 unsigned int bit_width_fixed:8;
73 unsigned int reserved:19;
73 } split; 74 } split;
74 unsigned int full; 75 unsigned int full;
75}; 76};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs); 141extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs) 142#define perf_misc_flags(regs) perf_misc_flags(regs)
142 143
144#include <asm/stacktrace.h>
145
146/*
147 * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
148 * and the comment with PERF_EFLAGS_EXACT.
149 */
150#define perf_arch_fetch_caller_regs(regs, __ip) { \
151 (regs)->ip = (__ip); \
152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \
155}
156
143#else 157#else
144static inline void init_hw_perf_events(void) { } 158static inline void init_hw_perf_events(void) { }
145static inline void perf_events_lapic_init(void) { } 159static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad9..2b16a2ad23dc 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
1#ifndef _ASM_X86_STACKTRACE_H 6#ifndef _ASM_X86_STACKTRACE_H
2#define _ASM_X86_STACKTRACE_H 7#define _ASM_X86_STACKTRACE_H
3 8
9#include <linux/uaccess.h>
10
4extern int kstack_depth_to_print; 11extern int kstack_depth_to_print;
5 12
6struct thread_info; 13struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
42 unsigned long *stack, unsigned long bp, 49 unsigned long *stack, unsigned long bp,
43 const struct stacktrace_ops *ops, void *data); 50 const struct stacktrace_ops *ops, void *data);
44 51
52#ifdef CONFIG_X86_32
53#define STACKSLOTS_PER_LINE 8
54#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
55#else
56#define STACKSLOTS_PER_LINE 4
57#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
58#endif
59
60extern void
61show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
62 unsigned long *stack, unsigned long bp, char *log_lvl);
63
64extern void
65show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
66 unsigned long *sp, unsigned long bp, char *log_lvl);
67
68extern unsigned int code_bytes;
69
70/* The form of the top of the frame on the stack */
71struct stack_frame {
72 struct stack_frame *next_frame;
73 unsigned long return_address;
74};
75
76struct stack_frame_ia32 {
77 u32 next_frame;
78 u32 return_address;
79};
80
81static inline unsigned long caller_frame_pointer(void)
82{
83 struct stack_frame *frame;
84
85 get_bp(frame);
86
87#ifdef CONFIG_FRAME_POINTER
88 frame = frame->next_frame;
89#endif
90
91 return (unsigned long)frame;
92}
93
45#endif /* _ASM_X86_STACKTRACE_H */ 94#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a18..f2da20fda02d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
220 struct perf_event *event); 220 struct perf_event *event);
221 struct event_constraint *event_constraints; 221 struct event_constraint *event_constraints;
222 void (*quirks)(void); 222 void (*quirks)(void);
223 int perfctr_second_write;
223 224
224 int (*cpu_prepare)(int cpu); 225 int (*cpu_prepare)(int cpu);
225 void (*cpu_starting)(int cpu); 226 void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
295 * count to the generic event atomically: 296 * count to the generic event atomically:
296 */ 297 */
297again: 298again:
298 prev_raw_count = atomic64_read(&hwc->prev_count); 299 prev_raw_count = local64_read(&hwc->prev_count);
299 rdmsrl(hwc->event_base + idx, new_raw_count); 300 rdmsrl(hwc->event_base + idx, new_raw_count);
300 301
301 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, 302 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
302 new_raw_count) != prev_raw_count) 303 new_raw_count) != prev_raw_count)
303 goto again; 304 goto again;
304 305
@@ -313,8 +314,8 @@ again:
313 delta = (new_raw_count << shift) - (prev_raw_count << shift); 314 delta = (new_raw_count << shift) - (prev_raw_count << shift);
314 delta >>= shift; 315 delta >>= shift;
315 316
316 atomic64_add(delta, &event->count); 317 local64_add(delta, &event->count);
317 atomic64_sub(delta, &hwc->period_left); 318 local64_sub(delta, &hwc->period_left);
318 319
319 return new_raw_count; 320 return new_raw_count;
320} 321}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
438 if (!hwc->sample_period) { 439 if (!hwc->sample_period) {
439 hwc->sample_period = x86_pmu.max_period; 440 hwc->sample_period = x86_pmu.max_period;
440 hwc->last_period = hwc->sample_period; 441 hwc->last_period = hwc->sample_period;
441 atomic64_set(&hwc->period_left, hwc->sample_period); 442 local64_set(&hwc->period_left, hwc->sample_period);
442 } else { 443 } else {
443 /* 444 /*
444 * If we have a PMU initialized but no APIC 445 * If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
885x86_perf_event_set_period(struct perf_event *event) 886x86_perf_event_set_period(struct perf_event *event)
886{ 887{
887 struct hw_perf_event *hwc = &event->hw; 888 struct hw_perf_event *hwc = &event->hw;
888 s64 left = atomic64_read(&hwc->period_left); 889 s64 left = local64_read(&hwc->period_left);
889 s64 period = hwc->sample_period; 890 s64 period = hwc->sample_period;
890 int ret = 0, idx = hwc->idx; 891 int ret = 0, idx = hwc->idx;
891 892
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
897 */ 898 */
898 if (unlikely(left <= -period)) { 899 if (unlikely(left <= -period)) {
899 left = period; 900 left = period;
900 atomic64_set(&hwc->period_left, left); 901 local64_set(&hwc->period_left, left);
901 hwc->last_period = period; 902 hwc->last_period = period;
902 ret = 1; 903 ret = 1;
903 } 904 }
904 905
905 if (unlikely(left <= 0)) { 906 if (unlikely(left <= 0)) {
906 left += period; 907 left += period;
907 atomic64_set(&hwc->period_left, left); 908 local64_set(&hwc->period_left, left);
908 hwc->last_period = period; 909 hwc->last_period = period;
909 ret = 1; 910 ret = 1;
910 } 911 }
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
923 * The hw event starts counting from this event offset, 924 * The hw event starts counting from this event offset,
924 * mark it to be able to extra future deltas: 925 * mark it to be able to extra future deltas:
925 */ 926 */
926 atomic64_set(&hwc->prev_count, (u64)-left); 927 local64_set(&hwc->prev_count, (u64)-left);
927 928
928 wrmsrl(hwc->event_base + idx, 929 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
930
931 /*
932 * Due to erratum on certan cpu we need
933 * a second write to be sure the register
934 * is updated properly
935 */
936 if (x86_pmu.perfctr_second_write) {
937 wrmsrl(hwc->event_base + idx,
929 (u64)(-left) & x86_pmu.cntval_mask); 938 (u64)(-left) & x86_pmu.cntval_mask);
939 }
930 940
931 perf_event_update_userpage(event); 941 perf_event_update_userpage(event);
932 942
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
969 * skip the schedulability test here, it will be peformed 979 * skip the schedulability test here, it will be peformed
970 * at commit time(->commit_txn) as a whole 980 * at commit time(->commit_txn) as a whole
971 */ 981 */
972 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 982 if (cpuc->group_flag & PERF_EVENT_TXN)
973 goto out; 983 goto out;
974 984
975 ret = x86_pmu.schedule_events(cpuc, n, assign); 985 ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
1096 * The events never got scheduled and ->cancel_txn will truncate 1106 * The events never got scheduled and ->cancel_txn will truncate
1097 * the event_list. 1107 * the event_list.
1098 */ 1108 */
1099 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) 1109 if (cpuc->group_flag & PERF_EVENT_TXN)
1100 return; 1110 return;
1101 1111
1102 x86_pmu_stop(event); 1112 x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1388{ 1398{
1389 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1399 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1390 1400
1391 cpuc->group_flag |= PERF_EVENT_TXN_STARTED; 1401 cpuc->group_flag |= PERF_EVENT_TXN;
1392 cpuc->n_txn = 0; 1402 cpuc->n_txn = 0;
1393} 1403}
1394 1404
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1401{ 1411{
1402 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1412 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1403 1413
1404 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; 1414 cpuc->group_flag &= ~PERF_EVENT_TXN;
1405 /* 1415 /*
1406 * Truncate the collected events. 1416 * Truncate the collected events.
1407 */ 1417 */
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1435 */ 1445 */
1436 memcpy(cpuc->assign, assign, n*sizeof(int)); 1446 memcpy(cpuc->assign, assign, n*sizeof(int));
1437 1447
1438 /* 1448 cpuc->group_flag &= ~PERF_EVENT_TXN;
1439 * Clear out the txn count so that ->cancel_txn() which gets
1440 * run after ->commit_txn() doesn't undo things.
1441 */
1442 cpuc->n_txn = 0;
1443 1449
1444 return 0; 1450 return 0;
1445} 1451}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
1607 .walk_stack = print_context_stack_bp, 1613 .walk_stack = print_context_stack_bp,
1608}; 1614};
1609 1615
1610#include "../dumpstack.h"
1611
1612static void 1616static void
1613perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1617perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1614{ 1618{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1730 return entry; 1734 return entry;
1731} 1735}
1732 1736
1733void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
1734{
1735 regs->ip = ip;
1736 /*
1737 * perf_arch_fetch_caller_regs adds another call, we need to increment
1738 * the skip level
1739 */
1740 regs->bp = rewind_frame_pointer(skip + 1);
1741 regs->cs = __KERNEL_CS;
1742 /*
1743 * We abuse bit 3 to pass exact information, see perf_misc_flags
1744 * and the comment with PERF_EFLAGS_EXACT.
1745 */
1746 regs->flags = 0;
1747}
1748
1749unsigned long perf_instruction_pointer(struct pt_regs *regs) 1737unsigned long perf_instruction_pointer(struct pt_regs *regs)
1750{ 1738{
1751 unsigned long ip; 1739 unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d1..9286e736a70a 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -829,6 +829,15 @@ static __initconst const struct x86_pmu p4_pmu = {
829 .max_period = (1ULL << 39) - 1, 829 .max_period = (1ULL << 39) - 1,
830 .hw_config = p4_hw_config, 830 .hw_config = p4_hw_config,
831 .schedule_events = p4_pmu_schedule_events, 831 .schedule_events = p4_pmu_schedule_events,
832 /*
833 * This handles erratum N15 in intel doc 249199-029,
834 * the counter may not be updated correctly on write
835 * so we need a second write operation to do the trick
836 * (the official workaround didn't work)
837 *
838 * the former idea is taken from OProfile code
839 */
840 .perfctr_second_write = 1,
832}; 841};
833 842
834static __init int p4_pmu_init(void) 843static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b7..6e8752c1bd52 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
18 18
19#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
20 20
21#include "dumpstack.h"
22 21
23int panic_on_unrecovered_nmi; 22int panic_on_unrecovered_nmi;
24int panic_on_io_nmi; 23int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd44..000000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5
6#ifndef DUMPSTACK_H
7#define DUMPSTACK_H
8
9#ifdef CONFIG_X86_32
10#define STACKSLOTS_PER_LINE 8
11#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
12#else
13#define STACKSLOTS_PER_LINE 4
14#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
15#endif
16
17#include <linux/uaccess.h>
18
19extern void
20show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
21 unsigned long *stack, unsigned long bp, char *log_lvl);
22
23extern void
24show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
25 unsigned long *sp, unsigned long bp, char *log_lvl);
26
27extern unsigned int code_bytes;
28
29/* The form of the top of the frame on the stack */
30struct stack_frame {
31 struct stack_frame *next_frame;
32 unsigned long return_address;
33};
34
35struct stack_frame_ia32 {
36 u32 next_frame;
37 u32 return_address;
38};
39
40static inline unsigned long rewind_frame_pointer(int n)
41{
42 struct stack_frame *frame;
43
44 get_bp(frame);
45
46#ifdef CONFIG_FRAME_POINTER
47 while (n--) {
48 if (probe_kernel_address(&frame->next_frame, frame))
49 break;
50 }
51#endif
52
53 return (unsigned long)frame;
54}
55
56#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d93..0f6376ffa2d9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20
21 19
22void dump_trace(struct task_struct *task, struct pt_regs *regs, 20void dump_trace(struct task_struct *task, struct pt_regs *regs,
23 unsigned long *stack, unsigned long bp, 21 unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f3..57a21f11c791 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
16 16
17#include <asm/stacktrace.h> 17#include <asm/stacktrace.h>
18 18
19#include "dumpstack.h"
20 19
21#define N_EXCEPTION_STACKS_END \ 20#define N_EXCEPTION_STACKS_END \
22 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) 21 (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6c..b53c525368a7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
23 return 0; 23 return 0;
24} 24}
25 25
26static void save_stack_address(void *data, unsigned long addr, int reliable) 26static void
27__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
27{ 28{
28 struct stack_trace *trace = data; 29 struct stack_trace *trace = data;
30#ifdef CONFIG_FRAME_POINTER
29 if (!reliable) 31 if (!reliable)
30 return; 32 return;
33#endif
34 if (nosched && in_sched_functions(addr))
35 return;
31 if (trace->skip > 0) { 36 if (trace->skip > 0) {
32 trace->skip--; 37 trace->skip--;
33 return; 38 return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
36 trace->entries[trace->nr_entries++] = addr; 41 trace->entries[trace->nr_entries++] = addr;
37} 42}
38 43
44static void save_stack_address(void *data, unsigned long addr, int reliable)
45{
46 return __save_stack_address(data, addr, reliable, false);
47}
48
39static void 49static void
40save_stack_address_nosched(void *data, unsigned long addr, int reliable) 50save_stack_address_nosched(void *data, unsigned long addr, int reliable)
41{ 51{
42 struct stack_trace *trace = (struct stack_trace *)data; 52 return __save_stack_address(data, addr, reliable, true);
43 if (!reliable)
44 return;
45 if (in_sched_functions(addr))
46 return;
47 if (trace->skip > 0) {
48 trace->skip--;
49 return;
50 }
51 if (trace->nr_entries < trace->max_entries)
52 trace->entries[trace->nr_entries++] = addr;
53} 53}
54 54
55static const struct stacktrace_ops save_stack_ops = { 55static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
96 96
97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ 97/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
98 98
99struct stack_frame { 99struct stack_frame_user {
100 const void __user *next_fp; 100 const void __user *next_fp;
101 unsigned long ret_addr; 101 unsigned long ret_addr;
102}; 102};
103 103
104static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 104static int
105copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
105{ 106{
106 int ret; 107 int ret;
107 108
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
126 trace->entries[trace->nr_entries++] = regs->ip; 127 trace->entries[trace->nr_entries++] = regs->ip;
127 128
128 while (trace->nr_entries < trace->max_entries) { 129 while (trace->nr_entries < trace->max_entries) {
129 struct stack_frame frame; 130 struct stack_frame_user frame;
130 131
131 frame.next_fp = NULL; 132 frame.next_fp = NULL;
132 frame.ret_addr = 0; 133 frame.ret_addr = 0;
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 000000000000..36c93b5cc239
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
#include <asm-generic/local64.h>
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a80339..97d91a03fb13 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
653 else 653 else
654 stack_base = vma->vm_start - stack_expand; 654 stack_base = vma->vm_start - stack_expand;
655#endif 655#endif
656 current->mm->start_stack = bprm->p;
656 ret = expand_stack(vma, stack_base); 657 ret = expand_stack(vma, stack_base);
657 if (ret) 658 if (ret)
658 ret = -EFAULT; 659 ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644
index 000000000000..02ac760c1a8b
--- /dev/null
+++ b/include/asm-generic/local64.h
@@ -0,0 +1,96 @@
1#ifndef _ASM_GENERIC_LOCAL64_H
2#define _ASM_GENERIC_LOCAL64_H
3
4#include <linux/percpu.h>
5#include <asm/types.h>
6
7/*
8 * A signed long type for operations which are atomic for a single CPU.
9 * Usually used in combination with per-cpu variables.
10 *
11 * This is the default implementation, which uses atomic64_t. Which is
12 * rather pointless. The whole point behind local64_t is that some processors
13 * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
14 * running on this CPU. local64_t allows exploitation of such capabilities.
15 */
16
17/* Implement in terms of atomics. */
18
19#if BITS_PER_LONG == 64
20
21#include <asm/local.h>
22
23typedef struct {
24 local_t a;
25} local64_t;
26
27#define LOCAL64_INIT(i) { LOCAL_INIT(i) }
28
29#define local64_read(l) local_read(&(l)->a)
30#define local64_set(l,i) local_set((&(l)->a),(i))
31#define local64_inc(l) local_inc(&(l)->a)
32#define local64_dec(l) local_dec(&(l)->a)
33#define local64_add(i,l) local_add((i),(&(l)->a))
34#define local64_sub(i,l) local_sub((i),(&(l)->a))
35
36#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
37#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
38#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
39#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
40#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
41#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
42#define local64_inc_return(l) local_inc_return(&(l)->a)
43
44#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
45#define local64_xchg(l, n) local_xchg((&(l)->a), (n))
46#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
47#define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a)
48
49/* Non-atomic variants, ie. preemption disabled and won't be touched
50 * in interrupt, etc. Some archs can optimize this case well. */
51#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
52#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
53#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
54#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
55
56#else /* BITS_PER_LONG != 64 */
57
58#include <asm/atomic.h>
59
60/* Don't use typedef: don't want them to be mixed with atomic_t's. */
61typedef struct {
62 atomic64_t a;
63} local64_t;
64
65#define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) }
66
67#define local64_read(l) atomic64_read(&(l)->a)
68#define local64_set(l,i) atomic64_set((&(l)->a),(i))
69#define local64_inc(l) atomic64_inc(&(l)->a)
70#define local64_dec(l) atomic64_dec(&(l)->a)
71#define local64_add(i,l) atomic64_add((i),(&(l)->a))
72#define local64_sub(i,l) atomic64_sub((i),(&(l)->a))
73
74#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
75#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
76#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
77#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
78#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
79#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
80#define local64_inc_return(l) atomic64_inc_return(&(l)->a)
81
82#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
83#define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n))
84#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
85#define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a)
86
87/* Non-atomic variants, ie. preemption disabled and won't be touched
88 * in interrupt, etc. Some archs can optimize this case well. */
89#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
90#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
91#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
92#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
93
94#endif /* BITS_PER_LONG != 64 */
95
96#endif /* _ASM_GENERIC_LOCAL64_H */
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 3167f2df4126..0af31cd335d6 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -257,8 +257,7 @@ static inline void
257perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr, 257perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
258 u64 count, struct pt_regs *regs, void *head) 258 u64 count, struct pt_regs *regs, void *head)
259{ 259{
260 perf_tp_event(addr, count, raw_data, size, regs, head); 260 perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
261 perf_swevent_put_recursion_context(rctx);
262} 261}
263#endif 262#endif
264 263
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644
index b616d3930c3b..000000000000
--- a/include/linux/kmemtrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
1/*
2 * Copyright (C) 2008 Eduard - Gabriel Munteanu
3 *
4 * This file is released under GPL version 2.
5 */
6
7#ifndef _LINUX_KMEMTRACE_H
8#define _LINUX_KMEMTRACE_H
9
10#ifdef __KERNEL__
11
12#include <trace/events/kmem.h>
13
14#ifdef CONFIG_KMEMTRACE
15extern void kmemtrace_init(void);
16#else
17static inline void kmemtrace_init(void)
18{
19}
20#endif
21
22#endif /* __KERNEL__ */
23
24#endif /* _LINUX_KMEMTRACE_H */
25
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d94985..63b5aa5dce69 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -214,8 +214,9 @@ struct perf_event_attr {
214 * See also PERF_RECORD_MISC_EXACT_IP 214 * See also PERF_RECORD_MISC_EXACT_IP
215 */ 215 */
216 precise_ip : 2, /* skid constraint */ 216 precise_ip : 2, /* skid constraint */
217 mmap_data : 1, /* non-exec mmap data */
217 218
218 __reserved_1 : 47; 219 __reserved_1 : 46;
219 220
220 union { 221 union {
221 __u32 wakeup_events; /* wakeup every n events */ 222 __u32 wakeup_events; /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
461 462
462#ifdef CONFIG_PERF_EVENTS 463#ifdef CONFIG_PERF_EVENTS
463# include <asm/perf_event.h> 464# include <asm/perf_event.h>
465# include <asm/local64.h>
464#endif 466#endif
465 467
466struct perf_guest_info_callbacks { 468struct perf_guest_info_callbacks {
@@ -535,10 +537,10 @@ struct hw_perf_event {
535 struct arch_hw_breakpoint info; 537 struct arch_hw_breakpoint info;
536#endif 538#endif
537 }; 539 };
538 atomic64_t prev_count; 540 local64_t prev_count;
539 u64 sample_period; 541 u64 sample_period;
540 u64 last_period; 542 u64 last_period;
541 atomic64_t period_left; 543 local64_t period_left;
542 u64 interrupts; 544 u64 interrupts;
543 545
544 u64 freq_time_stamp; 546 u64 freq_time_stamp;
@@ -548,7 +550,10 @@ struct hw_perf_event {
548 550
549struct perf_event; 551struct perf_event;
550 552
551#define PERF_EVENT_TXN_STARTED 1 553/*
554 * Common implementation detail of pmu::{start,commit,cancel}_txn
555 */
556#define PERF_EVENT_TXN 0x1
552 557
553/** 558/**
554 * struct pmu - generic performance monitoring unit 559 * struct pmu - generic performance monitoring unit
@@ -562,14 +567,28 @@ struct pmu {
562 void (*unthrottle) (struct perf_event *event); 567 void (*unthrottle) (struct perf_event *event);
563 568
564 /* 569 /*
565 * group events scheduling is treated as a transaction, 570 * Group events scheduling is treated as a transaction, add group
566 * add group events as a whole and perform one schedulability test. 571 * events as a whole and perform one schedulability test. If the test
567 * If test fails, roll back the whole group 572 * fails, roll back the whole group
568 */ 573 */
569 574
575 /*
576 * Start the transaction, after this ->enable() doesn't need
577 * to do schedulability tests.
578 */
570 void (*start_txn) (const struct pmu *pmu); 579 void (*start_txn) (const struct pmu *pmu);
571 void (*cancel_txn) (const struct pmu *pmu); 580 /*
581 * If ->start_txn() disabled the ->enable() schedulability test
582 * then ->commit_txn() is required to perform one. On success
583 * the transaction is closed. On error the transaction is kept
584 * open until ->cancel_txn() is called.
585 */
572 int (*commit_txn) (const struct pmu *pmu); 586 int (*commit_txn) (const struct pmu *pmu);
587 /*
588 * Will cancel the transaction, assumes ->disable() is called for
589 * each successfull ->enable() during the transaction.
590 */
591 void (*cancel_txn) (const struct pmu *pmu);
573}; 592};
574 593
575/** 594/**
@@ -584,7 +603,9 @@ enum perf_event_active_state {
584 603
585struct file; 604struct file;
586 605
587struct perf_mmap_data { 606#define PERF_BUFFER_WRITABLE 0x01
607
608struct perf_buffer {
588 atomic_t refcount; 609 atomic_t refcount;
589 struct rcu_head rcu_head; 610 struct rcu_head rcu_head;
590#ifdef CONFIG_PERF_USE_VMALLOC 611#ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +671,8 @@ struct perf_event {
650 671
651 enum perf_event_active_state state; 672 enum perf_event_active_state state;
652 unsigned int attach_state; 673 unsigned int attach_state;
653 atomic64_t count; 674 local64_t count;
675 atomic64_t child_count;
654 676
655 /* 677 /*
656 * These are the total time in nanoseconds that the event 678 * These are the total time in nanoseconds that the event
@@ -709,7 +731,7 @@ struct perf_event {
709 atomic_t mmap_count; 731 atomic_t mmap_count;
710 int mmap_locked; 732 int mmap_locked;
711 struct user_struct *mmap_user; 733 struct user_struct *mmap_user;
712 struct perf_mmap_data *data; 734 struct perf_buffer *buffer;
713 735
714 /* poll related */ 736 /* poll related */
715 wait_queue_head_t waitq; 737 wait_queue_head_t waitq;
@@ -807,7 +829,7 @@ struct perf_cpu_context {
807 829
808struct perf_output_handle { 830struct perf_output_handle {
809 struct perf_event *event; 831 struct perf_event *event;
810 struct perf_mmap_data *data; 832 struct perf_buffer *buffer;
811 unsigned long wakeup; 833 unsigned long wakeup;
812 unsigned long size; 834 unsigned long size;
813 void *addr; 835 void *addr;
@@ -910,8 +932,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
910 932
911extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); 933extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
912 934
913extern void 935#ifndef perf_arch_fetch_caller_regs
914perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip); 936static inline void
937perf_arch_fetch_caller_regs(struct regs *regs, unsigned long ip) { }
938#endif
915 939
916/* 940/*
917 * Take a snapshot of the regs. Skip ip and frame pointer to 941 * Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +945,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
921 * - bp for callchains 945 * - bp for callchains
922 * - eflags, for future purposes, just in case 946 * - eflags, for future purposes, just in case
923 */ 947 */
924static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip) 948static inline void perf_fetch_caller_regs(struct pt_regs *regs)
925{ 949{
926 unsigned long ip;
927
928 memset(regs, 0, sizeof(*regs)); 950 memset(regs, 0, sizeof(*regs));
929 951
930 switch (skip) { 952 perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
931 case 1 :
932 ip = CALLER_ADDR0;
933 break;
934 case 2 :
935 ip = CALLER_ADDR1;
936 break;
937 case 3 :
938 ip = CALLER_ADDR2;
939 break;
940 case 4:
941 ip = CALLER_ADDR3;
942 break;
943 /* No need to support further for now */
944 default:
945 ip = 0;
946 }
947
948 return perf_arch_fetch_caller_regs(regs, ip, skip);
949} 953}
950 954
951static inline void 955static inline void
@@ -955,21 +959,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
955 struct pt_regs hot_regs; 959 struct pt_regs hot_regs;
956 960
957 if (!regs) { 961 if (!regs) {
958 perf_fetch_caller_regs(&hot_regs, 1); 962 perf_fetch_caller_regs(&hot_regs);
959 regs = &hot_regs; 963 regs = &hot_regs;
960 } 964 }
961 __perf_sw_event(event_id, nr, nmi, regs, addr); 965 __perf_sw_event(event_id, nr, nmi, regs, addr);
962 } 966 }
963} 967}
964 968
965extern void __perf_event_mmap(struct vm_area_struct *vma); 969extern void perf_event_mmap(struct vm_area_struct *vma);
966
967static inline void perf_event_mmap(struct vm_area_struct *vma)
968{
969 if (vma->vm_flags & VM_EXEC)
970 __perf_event_mmap(vma);
971}
972
973extern struct perf_guest_info_callbacks *perf_guest_cbs; 970extern struct perf_guest_info_callbacks *perf_guest_cbs;
974extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 971extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
975extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 972extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +998,7 @@ static inline bool perf_paranoid_kernel(void)
1001extern void perf_event_init(void); 998extern void perf_event_init(void);
1002extern void perf_tp_event(u64 addr, u64 count, void *record, 999extern void perf_tp_event(u64 addr, u64 count, void *record,
1003 int entry_size, struct pt_regs *regs, 1000 int entry_size, struct pt_regs *regs,
1004 struct hlist_head *head); 1001 struct hlist_head *head, int rctx);
1005extern void perf_bp_event(struct perf_event *event, void *data); 1002extern void perf_bp_event(struct perf_event *event, void *data);
1006 1003
1007#ifndef perf_misc_flags 1004#ifndef perf_misc_flags
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 1812dac8c496..1acfa73ce2ac 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,8 @@
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */ 14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */ 15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/kmemtrace.h> 17
18#include <trace/events/kmem.h>
18 19
19#ifndef ARCH_KMALLOC_MINALIGN 20#ifndef ARCH_KMALLOC_MINALIGN
20/* 21/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4ba59cfc1f75..6447a723ecb1 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,10 @@
10#include <linux/gfp.h> 10#include <linux/gfp.h>
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13#include <linux/kmemtrace.h>
14#include <linux/kmemleak.h> 13#include <linux/kmemleak.h>
15 14
15#include <trace/events/kmem.h>
16
16enum stat_item { 17enum stat_item {
17 ALLOC_FASTPATH, /* Allocation from cpu slab */ 18 ALLOC_FASTPATH, /* Allocation from cpu slab */
18 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ 19 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644
index 088ea089e31d..000000000000
--- a/include/trace/boot.h
+++ /dev/null
@@ -1,60 +0,0 @@
1#ifndef _LINUX_TRACE_BOOT_H
2#define _LINUX_TRACE_BOOT_H
3
4#include <linux/module.h>
5#include <linux/kallsyms.h>
6#include <linux/init.h>
7
8/*
9 * Structure which defines the trace of an initcall
10 * while it is called.
11 * You don't have to fill the func field since it is
12 * only used internally by the tracer.
13 */
14struct boot_trace_call {
15 pid_t caller;
16 char func[KSYM_SYMBOL_LEN];
17};
18
19/*
20 * Structure which defines the trace of an initcall
21 * while it returns.
22 */
23struct boot_trace_ret {
24 char func[KSYM_SYMBOL_LEN];
25 int result;
26 unsigned long long duration; /* nsecs */
27};
28
29#ifdef CONFIG_BOOT_TRACER
30/* Append the traces on the ring-buffer */
31extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
32extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
33
34/* Tells the tracer that smp_pre_initcall is finished.
35 * So we can start the tracing
36 */
37extern void start_boot_trace(void);
38
39/* Resume the tracing of other necessary events
40 * such as sched switches
41 */
42extern void enable_boot_trace(void);
43
44/* Suspend this tracing. Actually, only sched_switches tracing have
45 * to be suspended. Initcalls doesn't need it.)
46 */
47extern void disable_boot_trace(void);
48#else
49static inline
50void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
51
52static inline
53void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
54
55static inline void start_boot_trace(void) { }
56static inline void enable_boot_trace(void) { }
57static inline void disable_boot_trace(void) { }
58#endif /* CONFIG_BOOT_TRACER */
59
60#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a64905d7278..fc013a8201e9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -705,7 +705,7 @@ perf_trace_##call(void *__data, proto) \
705 int __data_size; \ 705 int __data_size; \
706 int rctx; \ 706 int rctx; \
707 \ 707 \
708 perf_fetch_caller_regs(&__regs, 1); \ 708 perf_fetch_caller_regs(&__regs); \
709 \ 709 \
710 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 710 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
711 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ 711 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
diff --git a/init/main.c b/init/main.c
index 3bdb152f412f..e2a2bf3a169f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,11 +66,9 @@
66#include <linux/ftrace.h> 66#include <linux/ftrace.h>
67#include <linux/async.h> 67#include <linux/async.h>
68#include <linux/kmemcheck.h> 68#include <linux/kmemcheck.h>
69#include <linux/kmemtrace.h>
70#include <linux/sfi.h> 69#include <linux/sfi.h>
71#include <linux/shmem_fs.h> 70#include <linux/shmem_fs.h>
72#include <linux/slab.h> 71#include <linux/slab.h>
73#include <trace/boot.h>
74 72
75#include <asm/io.h> 73#include <asm/io.h>
76#include <asm/bugs.h> 74#include <asm/bugs.h>
@@ -653,7 +651,6 @@ asmlinkage void __init start_kernel(void)
653#endif 651#endif
654 page_cgroup_init(); 652 page_cgroup_init();
655 enable_debug_pagealloc(); 653 enable_debug_pagealloc();
656 kmemtrace_init();
657 kmemleak_init(); 654 kmemleak_init();
658 debug_objects_mem_init(); 655 debug_objects_mem_init();
659 idr_init_cache(); 656 idr_init_cache();
@@ -715,38 +712,33 @@ int initcall_debug;
715core_param(initcall_debug, initcall_debug, bool, 0644); 712core_param(initcall_debug, initcall_debug, bool, 0644);
716 713
717static char msgbuf[64]; 714static char msgbuf[64];
718static struct boot_trace_call call;
719static struct boot_trace_ret ret;
720 715
721int do_one_initcall(initcall_t fn) 716int do_one_initcall(initcall_t fn)
722{ 717{
723 int count = preempt_count(); 718 int count = preempt_count();
724 ktime_t calltime, delta, rettime; 719 ktime_t calltime, delta, rettime;
720 unsigned long long duration;
721 int ret;
725 722
726 if (initcall_debug) { 723 if (initcall_debug) {
727 call.caller = task_pid_nr(current); 724 printk("calling %pF @ %i\n", fn, task_pid_nr(current));
728 printk("calling %pF @ %i\n", fn, call.caller);
729 calltime = ktime_get(); 725 calltime = ktime_get();
730 trace_boot_call(&call, fn);
731 enable_boot_trace();
732 } 726 }
733 727
734 ret.result = fn(); 728 ret = fn();
735 729
736 if (initcall_debug) { 730 if (initcall_debug) {
737 disable_boot_trace();
738 rettime = ktime_get(); 731 rettime = ktime_get();
739 delta = ktime_sub(rettime, calltime); 732 delta = ktime_sub(rettime, calltime);
740 ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10; 733 duration = (unsigned long long) ktime_to_ns(delta) >> 10;
741 trace_boot_ret(&ret, fn); 734 printk("initcall %pF returned %d after %lld usecs\n", fn,
742 printk("initcall %pF returned %d after %Ld usecs\n", fn, 735 ret, duration);
743 ret.result, ret.duration);
744 } 736 }
745 737
746 msgbuf[0] = 0; 738 msgbuf[0] = 0;
747 739
748 if (ret.result && ret.result != -ENODEV && initcall_debug) 740 if (ret && ret != -ENODEV && initcall_debug)
749 sprintf(msgbuf, "error code %d ", ret.result); 741 sprintf(msgbuf, "error code %d ", ret);
750 742
751 if (preempt_count() != count) { 743 if (preempt_count() != count) {
752 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); 744 strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -760,7 +752,7 @@ int do_one_initcall(initcall_t fn)
760 printk("initcall %pF returned with %s\n", fn, msgbuf); 752 printk("initcall %pF returned with %s\n", fn, msgbuf);
761 } 753 }
762 754
763 return ret.result; 755 return ret;
764} 756}
765 757
766 758
@@ -880,7 +872,6 @@ static int __init kernel_init(void * unused)
880 smp_prepare_cpus(setup_max_cpus); 872 smp_prepare_cpus(setup_max_cpus);
881 873
882 do_pre_smp_initcalls(); 874 do_pre_smp_initcalls();
883 start_boot_trace();
884 875
885 smp_init(); 876 smp_init();
886 sched_init_smp(); 877 sched_init_smp();
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4c..c772a3d4000d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
675 struct perf_event *event, *partial_group = NULL; 675 struct perf_event *event, *partial_group = NULL;
676 const struct pmu *pmu = group_event->pmu; 676 const struct pmu *pmu = group_event->pmu;
677 bool txn = false; 677 bool txn = false;
678 int ret;
679 678
680 if (group_event->state == PERF_EVENT_STATE_OFF) 679 if (group_event->state == PERF_EVENT_STATE_OFF)
681 return 0; 680 return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
703 } 702 }
704 } 703 }
705 704
706 if (!txn) 705 if (!txn || !pmu->commit_txn(pmu))
707 return 0;
708
709 ret = pmu->commit_txn(pmu);
710 if (!ret) {
711 pmu->cancel_txn(pmu);
712 return 0; 706 return 0;
713 }
714 707
715group_error: 708group_error:
716 /* 709 /*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
1155 * In order to keep per-task stats reliable we need to flip the event 1148 * In order to keep per-task stats reliable we need to flip the event
1156 * values when we flip the contexts. 1149 * values when we flip the contexts.
1157 */ 1150 */
1158 value = atomic64_read(&next_event->count); 1151 value = local64_read(&next_event->count);
1159 value = atomic64_xchg(&event->count, value); 1152 value = local64_xchg(&event->count, value);
1160 atomic64_set(&next_event->count, value); 1153 local64_set(&next_event->count, value);
1161 1154
1162 swap(event->total_time_enabled, next_event->total_time_enabled); 1155 swap(event->total_time_enabled, next_event->total_time_enabled);
1163 swap(event->total_time_running, next_event->total_time_running); 1156 swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1547 1540
1548 hwc->sample_period = sample_period; 1541 hwc->sample_period = sample_period;
1549 1542
1550 if (atomic64_read(&hwc->period_left) > 8*sample_period) { 1543 if (local64_read(&hwc->period_left) > 8*sample_period) {
1551 perf_disable(); 1544 perf_disable();
1552 perf_event_stop(event); 1545 perf_event_stop(event);
1553 atomic64_set(&hwc->period_left, 0); 1546 local64_set(&hwc->period_left, 0);
1554 perf_event_start(event); 1547 perf_event_start(event);
1555 perf_enable(); 1548 perf_enable();
1556 } 1549 }
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1591 1584
1592 perf_disable(); 1585 perf_disable();
1593 event->pmu->read(event); 1586 event->pmu->read(event);
1594 now = atomic64_read(&event->count); 1587 now = local64_read(&event->count);
1595 delta = now - hwc->freq_count_stamp; 1588 delta = now - hwc->freq_count_stamp;
1596 hwc->freq_count_stamp = now; 1589 hwc->freq_count_stamp = now;
1597 1590
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
1743 event->pmu->read(event); 1736 event->pmu->read(event);
1744} 1737}
1745 1738
1739static inline u64 perf_event_count(struct perf_event *event)
1740{
1741 return local64_read(&event->count) + atomic64_read(&event->child_count);
1742}
1743
1746static u64 perf_event_read(struct perf_event *event) 1744static u64 perf_event_read(struct perf_event *event)
1747{ 1745{
1748 /* 1746 /*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
1762 raw_spin_unlock_irqrestore(&ctx->lock, flags); 1760 raw_spin_unlock_irqrestore(&ctx->lock, flags);
1763 } 1761 }
1764 1762
1765 return atomic64_read(&event->count); 1763 return perf_event_count(event);
1766} 1764}
1767 1765
1768/* 1766/*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
1883} 1881}
1884 1882
1885static void perf_pending_sync(struct perf_event *event); 1883static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data); 1884static void perf_buffer_put(struct perf_buffer *buffer);
1887 1885
1888static void free_event(struct perf_event *event) 1886static void free_event(struct perf_event *event)
1889{ 1887{
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
1891 1889
1892 if (!event->parent) { 1890 if (!event->parent) {
1893 atomic_dec(&nr_events); 1891 atomic_dec(&nr_events);
1894 if (event->attr.mmap) 1892 if (event->attr.mmap || event->attr.mmap_data)
1895 atomic_dec(&nr_mmap_events); 1893 atomic_dec(&nr_mmap_events);
1896 if (event->attr.comm) 1894 if (event->attr.comm)
1897 atomic_dec(&nr_comm_events); 1895 atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
1899 atomic_dec(&nr_task_events); 1897 atomic_dec(&nr_task_events);
1900 } 1898 }
1901 1899
1902 if (event->data) { 1900 if (event->buffer) {
1903 perf_mmap_data_put(event->data); 1901 perf_buffer_put(event->buffer);
1904 event->data = NULL; 1902 event->buffer = NULL;
1905 } 1903 }
1906 1904
1907 if (event->destroy) 1905 if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
2126static unsigned int perf_poll(struct file *file, poll_table *wait) 2124static unsigned int perf_poll(struct file *file, poll_table *wait)
2127{ 2125{
2128 struct perf_event *event = file->private_data; 2126 struct perf_event *event = file->private_data;
2129 struct perf_mmap_data *data; 2127 struct perf_buffer *buffer;
2130 unsigned int events = POLL_HUP; 2128 unsigned int events = POLL_HUP;
2131 2129
2132 rcu_read_lock(); 2130 rcu_read_lock();
2133 data = rcu_dereference(event->data); 2131 buffer = rcu_dereference(event->buffer);
2134 if (data) 2132 if (buffer)
2135 events = atomic_xchg(&data->poll, 0); 2133 events = atomic_xchg(&buffer->poll, 0);
2136 rcu_read_unlock(); 2134 rcu_read_unlock();
2137 2135
2138 poll_wait(file, &event->waitq, wait); 2136 poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
2143static void perf_event_reset(struct perf_event *event) 2141static void perf_event_reset(struct perf_event *event)
2144{ 2142{
2145 (void)perf_event_read(event); 2143 (void)perf_event_read(event);
2146 atomic64_set(&event->count, 0); 2144 local64_set(&event->count, 0);
2147 perf_event_update_userpage(event); 2145 perf_event_update_userpage(event);
2148} 2146}
2149 2147
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
2342void perf_event_update_userpage(struct perf_event *event) 2340void perf_event_update_userpage(struct perf_event *event)
2343{ 2341{
2344 struct perf_event_mmap_page *userpg; 2342 struct perf_event_mmap_page *userpg;
2345 struct perf_mmap_data *data; 2343 struct perf_buffer *buffer;
2346 2344
2347 rcu_read_lock(); 2345 rcu_read_lock();
2348 data = rcu_dereference(event->data); 2346 buffer = rcu_dereference(event->buffer);
2349 if (!data) 2347 if (!buffer)
2350 goto unlock; 2348 goto unlock;
2351 2349
2352 userpg = data->user_page; 2350 userpg = buffer->user_page;
2353 2351
2354 /* 2352 /*
2355 * Disable preemption so as to not let the corresponding user-space 2353 * Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
2359 ++userpg->lock; 2357 ++userpg->lock;
2360 barrier(); 2358 barrier();
2361 userpg->index = perf_event_index(event); 2359 userpg->index = perf_event_index(event);
2362 userpg->offset = atomic64_read(&event->count); 2360 userpg->offset = perf_event_count(event);
2363 if (event->state == PERF_EVENT_STATE_ACTIVE) 2361 if (event->state == PERF_EVENT_STATE_ACTIVE)
2364 userpg->offset -= atomic64_read(&event->hw.prev_count); 2362 userpg->offset -= local64_read(&event->hw.prev_count);
2365 2363
2366 userpg->time_enabled = event->total_time_enabled + 2364 userpg->time_enabled = event->total_time_enabled +
2367 atomic64_read(&event->child_total_time_enabled); 2365 atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
2376 rcu_read_unlock(); 2374 rcu_read_unlock();
2377} 2375}
2378 2376
2377static unsigned long perf_data_size(struct perf_buffer *buffer);
2378
2379static void
2380perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
2381{
2382 long max_size = perf_data_size(buffer);
2383
2384 if (watermark)
2385 buffer->watermark = min(max_size, watermark);
2386
2387 if (!buffer->watermark)
2388 buffer->watermark = max_size / 2;
2389
2390 if (flags & PERF_BUFFER_WRITABLE)
2391 buffer->writable = 1;
2392
2393 atomic_set(&buffer->refcount, 1);
2394}
2395
2379#ifndef CONFIG_PERF_USE_VMALLOC 2396#ifndef CONFIG_PERF_USE_VMALLOC
2380 2397
2381/* 2398/*
@@ -2383,15 +2400,15 @@ unlock:
2383 */ 2400 */
2384 2401
2385static struct page * 2402static struct page *
2386perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2403perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2387{ 2404{
2388 if (pgoff > data->nr_pages) 2405 if (pgoff > buffer->nr_pages)
2389 return NULL; 2406 return NULL;
2390 2407
2391 if (pgoff == 0) 2408 if (pgoff == 0)
2392 return virt_to_page(data->user_page); 2409 return virt_to_page(buffer->user_page);
2393 2410
2394 return virt_to_page(data->data_pages[pgoff - 1]); 2411 return virt_to_page(buffer->data_pages[pgoff - 1]);
2395} 2412}
2396 2413
2397static void *perf_mmap_alloc_page(int cpu) 2414static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
2407 return page_address(page); 2424 return page_address(page);
2408} 2425}
2409 2426
2410static struct perf_mmap_data * 2427static struct perf_buffer *
2411perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2428perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2412{ 2429{
2413 struct perf_mmap_data *data; 2430 struct perf_buffer *buffer;
2414 unsigned long size; 2431 unsigned long size;
2415 int i; 2432 int i;
2416 2433
2417 size = sizeof(struct perf_mmap_data); 2434 size = sizeof(struct perf_buffer);
2418 size += nr_pages * sizeof(void *); 2435 size += nr_pages * sizeof(void *);
2419 2436
2420 data = kzalloc(size, GFP_KERNEL); 2437 buffer = kzalloc(size, GFP_KERNEL);
2421 if (!data) 2438 if (!buffer)
2422 goto fail; 2439 goto fail;
2423 2440
2424 data->user_page = perf_mmap_alloc_page(event->cpu); 2441 buffer->user_page = perf_mmap_alloc_page(cpu);
2425 if (!data->user_page) 2442 if (!buffer->user_page)
2426 goto fail_user_page; 2443 goto fail_user_page;
2427 2444
2428 for (i = 0; i < nr_pages; i++) { 2445 for (i = 0; i < nr_pages; i++) {
2429 data->data_pages[i] = perf_mmap_alloc_page(event->cpu); 2446 buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
2430 if (!data->data_pages[i]) 2447 if (!buffer->data_pages[i])
2431 goto fail_data_pages; 2448 goto fail_data_pages;
2432 } 2449 }
2433 2450
2434 data->nr_pages = nr_pages; 2451 buffer->nr_pages = nr_pages;
2452
2453 perf_buffer_init(buffer, watermark, flags);
2435 2454
2436 return data; 2455 return buffer;
2437 2456
2438fail_data_pages: 2457fail_data_pages:
2439 for (i--; i >= 0; i--) 2458 for (i--; i >= 0; i--)
2440 free_page((unsigned long)data->data_pages[i]); 2459 free_page((unsigned long)buffer->data_pages[i]);
2441 2460
2442 free_page((unsigned long)data->user_page); 2461 free_page((unsigned long)buffer->user_page);
2443 2462
2444fail_user_page: 2463fail_user_page:
2445 kfree(data); 2464 kfree(buffer);
2446 2465
2447fail: 2466fail:
2448 return NULL; 2467 return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
2456 __free_page(page); 2475 __free_page(page);
2457} 2476}
2458 2477
2459static void perf_mmap_data_free(struct perf_mmap_data *data) 2478static void perf_buffer_free(struct perf_buffer *buffer)
2460{ 2479{
2461 int i; 2480 int i;
2462 2481
2463 perf_mmap_free_page((unsigned long)data->user_page); 2482 perf_mmap_free_page((unsigned long)buffer->user_page);
2464 for (i = 0; i < data->nr_pages; i++) 2483 for (i = 0; i < buffer->nr_pages; i++)
2465 perf_mmap_free_page((unsigned long)data->data_pages[i]); 2484 perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
2466 kfree(data); 2485 kfree(buffer);
2467} 2486}
2468 2487
2469static inline int page_order(struct perf_mmap_data *data) 2488static inline int page_order(struct perf_buffer *buffer)
2470{ 2489{
2471 return 0; 2490 return 0;
2472} 2491}
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
2479 * Required for architectures that have d-cache aliasing issues. 2498 * Required for architectures that have d-cache aliasing issues.
2480 */ 2499 */
2481 2500
2482static inline int page_order(struct perf_mmap_data *data) 2501static inline int page_order(struct perf_buffer *buffer)
2483{ 2502{
2484 return data->page_order; 2503 return buffer->page_order;
2485} 2504}
2486 2505
2487static struct page * 2506static struct page *
2488perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) 2507perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
2489{ 2508{
2490 if (pgoff > (1UL << page_order(data))) 2509 if (pgoff > (1UL << page_order(buffer)))
2491 return NULL; 2510 return NULL;
2492 2511
2493 return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); 2512 return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
2494} 2513}
2495 2514
2496static void perf_mmap_unmark_page(void *addr) 2515static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
2500 page->mapping = NULL; 2519 page->mapping = NULL;
2501} 2520}
2502 2521
2503static void perf_mmap_data_free_work(struct work_struct *work) 2522static void perf_buffer_free_work(struct work_struct *work)
2504{ 2523{
2505 struct perf_mmap_data *data; 2524 struct perf_buffer *buffer;
2506 void *base; 2525 void *base;
2507 int i, nr; 2526 int i, nr;
2508 2527
2509 data = container_of(work, struct perf_mmap_data, work); 2528 buffer = container_of(work, struct perf_buffer, work);
2510 nr = 1 << page_order(data); 2529 nr = 1 << page_order(buffer);
2511 2530
2512 base = data->user_page; 2531 base = buffer->user_page;
2513 for (i = 0; i < nr + 1; i++) 2532 for (i = 0; i < nr + 1; i++)
2514 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 2533 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
2515 2534
2516 vfree(base); 2535 vfree(base);
2517 kfree(data); 2536 kfree(buffer);
2518} 2537}
2519 2538
2520static void perf_mmap_data_free(struct perf_mmap_data *data) 2539static void perf_buffer_free(struct perf_buffer *buffer)
2521{ 2540{
2522 schedule_work(&data->work); 2541 schedule_work(&buffer->work);
2523} 2542}
2524 2543
2525static struct perf_mmap_data * 2544static struct perf_buffer *
2526perf_mmap_data_alloc(struct perf_event *event, int nr_pages) 2545perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
2527{ 2546{
2528 struct perf_mmap_data *data; 2547 struct perf_buffer *buffer;
2529 unsigned long size; 2548 unsigned long size;
2530 void *all_buf; 2549 void *all_buf;
2531 2550
2532 size = sizeof(struct perf_mmap_data); 2551 size = sizeof(struct perf_buffer);
2533 size += sizeof(void *); 2552 size += sizeof(void *);
2534 2553
2535 data = kzalloc(size, GFP_KERNEL); 2554 buffer = kzalloc(size, GFP_KERNEL);
2536 if (!data) 2555 if (!buffer)
2537 goto fail; 2556 goto fail;
2538 2557
2539 INIT_WORK(&data->work, perf_mmap_data_free_work); 2558 INIT_WORK(&buffer->work, perf_buffer_free_work);
2540 2559
2541 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); 2560 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
2542 if (!all_buf) 2561 if (!all_buf)
2543 goto fail_all_buf; 2562 goto fail_all_buf;
2544 2563
2545 data->user_page = all_buf; 2564 buffer->user_page = all_buf;
2546 data->data_pages[0] = all_buf + PAGE_SIZE; 2565 buffer->data_pages[0] = all_buf + PAGE_SIZE;
2547 data->page_order = ilog2(nr_pages); 2566 buffer->page_order = ilog2(nr_pages);
2548 data->nr_pages = 1; 2567 buffer->nr_pages = 1;
2568
2569 perf_buffer_init(buffer, watermark, flags);
2549 2570
2550 return data; 2571 return buffer;
2551 2572
2552fail_all_buf: 2573fail_all_buf:
2553 kfree(data); 2574 kfree(buffer);
2554 2575
2555fail: 2576fail:
2556 return NULL; 2577 return NULL;
@@ -2558,15 +2579,15 @@ fail:
2558 2579
2559#endif 2580#endif
2560 2581
2561static unsigned long perf_data_size(struct perf_mmap_data *data) 2582static unsigned long perf_data_size(struct perf_buffer *buffer)
2562{ 2583{
2563 return data->nr_pages << (PAGE_SHIFT + page_order(data)); 2584 return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
2564} 2585}
2565 2586
2566static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 2587static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2567{ 2588{
2568 struct perf_event *event = vma->vm_file->private_data; 2589 struct perf_event *event = vma->vm_file->private_data;
2569 struct perf_mmap_data *data; 2590 struct perf_buffer *buffer;
2570 int ret = VM_FAULT_SIGBUS; 2591 int ret = VM_FAULT_SIGBUS;
2571 2592
2572 if (vmf->flags & FAULT_FLAG_MKWRITE) { 2593 if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
2576 } 2597 }
2577 2598
2578 rcu_read_lock(); 2599 rcu_read_lock();
2579 data = rcu_dereference(event->data); 2600 buffer = rcu_dereference(event->buffer);
2580 if (!data) 2601 if (!buffer)
2581 goto unlock; 2602 goto unlock;
2582 2603
2583 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) 2604 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
2584 goto unlock; 2605 goto unlock;
2585 2606
2586 vmf->page = perf_mmap_to_page(data, vmf->pgoff); 2607 vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
2587 if (!vmf->page) 2608 if (!vmf->page)
2588 goto unlock; 2609 goto unlock;
2589 2610
@@ -2598,52 +2619,35 @@ unlock:
2598 return ret; 2619 return ret;
2599} 2620}
2600 2621
2601static void 2622static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
2602perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2603{
2604 long max_size = perf_data_size(data);
2605
2606 if (event->attr.watermark) {
2607 data->watermark = min_t(long, max_size,
2608 event->attr.wakeup_watermark);
2609 }
2610
2611 if (!data->watermark)
2612 data->watermark = max_size / 2;
2613
2614 atomic_set(&data->refcount, 1);
2615 rcu_assign_pointer(event->data, data);
2616}
2617
2618static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2619{ 2623{
2620 struct perf_mmap_data *data; 2624 struct perf_buffer *buffer;
2621 2625
2622 data = container_of(rcu_head, struct perf_mmap_data, rcu_head); 2626 buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
2623 perf_mmap_data_free(data); 2627 perf_buffer_free(buffer);
2624} 2628}
2625 2629
2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event) 2630static struct perf_buffer *perf_buffer_get(struct perf_event *event)
2627{ 2631{
2628 struct perf_mmap_data *data; 2632 struct perf_buffer *buffer;
2629 2633
2630 rcu_read_lock(); 2634 rcu_read_lock();
2631 data = rcu_dereference(event->data); 2635 buffer = rcu_dereference(event->buffer);
2632 if (data) { 2636 if (buffer) {
2633 if (!atomic_inc_not_zero(&data->refcount)) 2637 if (!atomic_inc_not_zero(&buffer->refcount))
2634 data = NULL; 2638 buffer = NULL;
2635 } 2639 }
2636 rcu_read_unlock(); 2640 rcu_read_unlock();
2637 2641
2638 return data; 2642 return buffer;
2639} 2643}
2640 2644
2641static void perf_mmap_data_put(struct perf_mmap_data *data) 2645static void perf_buffer_put(struct perf_buffer *buffer)
2642{ 2646{
2643 if (!atomic_dec_and_test(&data->refcount)) 2647 if (!atomic_dec_and_test(&buffer->refcount))
2644 return; 2648 return;
2645 2649
2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2650 call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
2647} 2651}
2648 2652
2649static void perf_mmap_open(struct vm_area_struct *vma) 2653static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2658 struct perf_event *event = vma->vm_file->private_data; 2662 struct perf_event *event = vma->vm_file->private_data;
2659 2663
2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2664 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2661 unsigned long size = perf_data_size(event->data); 2665 unsigned long size = perf_data_size(event->buffer);
2662 struct user_struct *user = event->mmap_user; 2666 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data; 2667 struct perf_buffer *buffer = event->buffer;
2664 2668
2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2669 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2666 vma->vm_mm->locked_vm -= event->mmap_locked; 2670 vma->vm_mm->locked_vm -= event->mmap_locked;
2667 rcu_assign_pointer(event->data, NULL); 2671 rcu_assign_pointer(event->buffer, NULL);
2668 mutex_unlock(&event->mmap_mutex); 2672 mutex_unlock(&event->mmap_mutex);
2669 2673
2670 perf_mmap_data_put(data); 2674 perf_buffer_put(buffer);
2671 free_uid(user); 2675 free_uid(user);
2672 } 2676 }
2673} 2677}
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2685 unsigned long user_locked, user_lock_limit; 2689 unsigned long user_locked, user_lock_limit;
2686 struct user_struct *user = current_user(); 2690 struct user_struct *user = current_user();
2687 unsigned long locked, lock_limit; 2691 unsigned long locked, lock_limit;
2688 struct perf_mmap_data *data; 2692 struct perf_buffer *buffer;
2689 unsigned long vma_size; 2693 unsigned long vma_size;
2690 unsigned long nr_pages; 2694 unsigned long nr_pages;
2691 long user_extra, extra; 2695 long user_extra, extra;
2692 int ret = 0; 2696 int ret = 0, flags = 0;
2693 2697
2694 /* 2698 /*
2695 * Don't allow mmap() of inherited per-task counters. This would 2699 * Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2706 nr_pages = (vma_size / PAGE_SIZE) - 1; 2710 nr_pages = (vma_size / PAGE_SIZE) - 1;
2707 2711
2708 /* 2712 /*
2709 * If we have data pages ensure they're a power-of-two number, so we 2713 * If we have buffer pages ensure they're a power-of-two number, so we
2710 * can do bitmasks instead of modulo. 2714 * can do bitmasks instead of modulo.
2711 */ 2715 */
2712 if (nr_pages != 0 && !is_power_of_2(nr_pages)) 2716 if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2720 2724
2721 WARN_ON_ONCE(event->ctx->parent_ctx); 2725 WARN_ON_ONCE(event->ctx->parent_ctx);
2722 mutex_lock(&event->mmap_mutex); 2726 mutex_lock(&event->mmap_mutex);
2723 if (event->data) { 2727 if (event->buffer) {
2724 if (event->data->nr_pages == nr_pages) 2728 if (event->buffer->nr_pages == nr_pages)
2725 atomic_inc(&event->data->refcount); 2729 atomic_inc(&event->buffer->refcount);
2726 else 2730 else
2727 ret = -EINVAL; 2731 ret = -EINVAL;
2728 goto unlock; 2732 goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2752 goto unlock; 2756 goto unlock;
2753 } 2757 }
2754 2758
2755 WARN_ON(event->data); 2759 WARN_ON(event->buffer);
2760
2761 if (vma->vm_flags & VM_WRITE)
2762 flags |= PERF_BUFFER_WRITABLE;
2756 2763
2757 data = perf_mmap_data_alloc(event, nr_pages); 2764 buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
2758 if (!data) { 2765 event->cpu, flags);
2766 if (!buffer) {
2759 ret = -ENOMEM; 2767 ret = -ENOMEM;
2760 goto unlock; 2768 goto unlock;
2761 } 2769 }
2762 2770 rcu_assign_pointer(event->buffer, buffer);
2763 perf_mmap_data_init(event, data);
2764 if (vma->vm_flags & VM_WRITE)
2765 event->data->writable = 1;
2766 2771
2767 atomic_long_add(user_extra, &user->locked_vm); 2772 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra; 2773 event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2941 return NULL; 2946 return NULL;
2942} 2947}
2943 2948
2944__weak
2945void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2946{
2947}
2948
2949 2949
2950/* 2950/*
2951 * We assume there is only KVM supporting the callbacks. 2951 * We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2971/* 2971/*
2972 * Output 2972 * Output
2973 */ 2973 */
2974static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2974static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
2975 unsigned long offset, unsigned long head) 2975 unsigned long offset, unsigned long head)
2976{ 2976{
2977 unsigned long mask; 2977 unsigned long mask;
2978 2978
2979 if (!data->writable) 2979 if (!buffer->writable)
2980 return true; 2980 return true;
2981 2981
2982 mask = perf_data_size(data) - 1; 2982 mask = perf_data_size(buffer) - 1;
2983 2983
2984 offset = (offset - tail) & mask; 2984 offset = (offset - tail) & mask;
2985 head = (head - tail) & mask; 2985 head = (head - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
2992 2992
2993static void perf_output_wakeup(struct perf_output_handle *handle) 2993static void perf_output_wakeup(struct perf_output_handle *handle)
2994{ 2994{
2995 atomic_set(&handle->data->poll, POLL_IN); 2995 atomic_set(&handle->buffer->poll, POLL_IN);
2996 2996
2997 if (handle->nmi) { 2997 if (handle->nmi) {
2998 handle->event->pending_wakeup = 1; 2998 handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
3012 */ 3012 */
3013static void perf_output_get_handle(struct perf_output_handle *handle) 3013static void perf_output_get_handle(struct perf_output_handle *handle)
3014{ 3014{
3015 struct perf_mmap_data *data = handle->data; 3015 struct perf_buffer *buffer = handle->buffer;
3016 3016
3017 preempt_disable(); 3017 preempt_disable();
3018 local_inc(&data->nest); 3018 local_inc(&buffer->nest);
3019 handle->wakeup = local_read(&data->wakeup); 3019 handle->wakeup = local_read(&buffer->wakeup);
3020} 3020}
3021 3021
3022static void perf_output_put_handle(struct perf_output_handle *handle) 3022static void perf_output_put_handle(struct perf_output_handle *handle)
3023{ 3023{
3024 struct perf_mmap_data *data = handle->data; 3024 struct perf_buffer *buffer = handle->buffer;
3025 unsigned long head; 3025 unsigned long head;
3026 3026
3027again: 3027again:
3028 head = local_read(&data->head); 3028 head = local_read(&buffer->head);
3029 3029
3030 /* 3030 /*
3031 * IRQ/NMI can happen here, which means we can miss a head update. 3031 * IRQ/NMI can happen here, which means we can miss a head update.
3032 */ 3032 */
3033 3033
3034 if (!local_dec_and_test(&data->nest)) 3034 if (!local_dec_and_test(&buffer->nest))
3035 goto out; 3035 goto out;
3036 3036
3037 /* 3037 /*
3038 * Publish the known good head. Rely on the full barrier implied 3038 * Publish the known good head. Rely on the full barrier implied
3039 * by atomic_dec_and_test() order the data->head read and this 3039 * by atomic_dec_and_test() order the buffer->head read and this
3040 * write. 3040 * write.
3041 */ 3041 */
3042 data->user_page->data_head = head; 3042 buffer->user_page->data_head = head;
3043 3043
3044 /* 3044 /*
3045 * Now check if we missed an update, rely on the (compiler) 3045 * Now check if we missed an update, rely on the (compiler)
3046 * barrier in atomic_dec_and_test() to re-read data->head. 3046 * barrier in atomic_dec_and_test() to re-read buffer->head.
3047 */ 3047 */
3048 if (unlikely(head != local_read(&data->head))) { 3048 if (unlikely(head != local_read(&buffer->head))) {
3049 local_inc(&data->nest); 3049 local_inc(&buffer->nest);
3050 goto again; 3050 goto again;
3051 } 3051 }
3052 3052
3053 if (handle->wakeup != local_read(&data->wakeup)) 3053 if (handle->wakeup != local_read(&buffer->wakeup))
3054 perf_output_wakeup(handle); 3054 perf_output_wakeup(handle);
3055 3055
3056 out: 3056 out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
3070 buf += size; 3070 buf += size;
3071 handle->size -= size; 3071 handle->size -= size;
3072 if (!handle->size) { 3072 if (!handle->size) {
3073 struct perf_mmap_data *data = handle->data; 3073 struct perf_buffer *buffer = handle->buffer;
3074 3074
3075 handle->page++; 3075 handle->page++;
3076 handle->page &= data->nr_pages - 1; 3076 handle->page &= buffer->nr_pages - 1;
3077 handle->addr = data->data_pages[handle->page]; 3077 handle->addr = buffer->data_pages[handle->page];
3078 handle->size = PAGE_SIZE << page_order(data); 3078 handle->size = PAGE_SIZE << page_order(buffer);
3079 } 3079 }
3080 } while (len); 3080 } while (len);
3081} 3081}
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3084 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
3085 int nmi, int sample) 3085 int nmi, int sample)
3086{ 3086{
3087 struct perf_mmap_data *data; 3087 struct perf_buffer *buffer;
3088 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
3089 int have_lost; 3089 int have_lost;
3090 struct { 3090 struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
3100 if (event->parent) 3100 if (event->parent)
3101 event = event->parent; 3101 event = event->parent;
3102 3102
3103 data = rcu_dereference(event->data); 3103 buffer = rcu_dereference(event->buffer);
3104 if (!data) 3104 if (!buffer)
3105 goto out; 3105 goto out;
3106 3106
3107 handle->data = data; 3107 handle->buffer = buffer;
3108 handle->event = event; 3108 handle->event = event;
3109 handle->nmi = nmi; 3109 handle->nmi = nmi;
3110 handle->sample = sample; 3110 handle->sample = sample;
3111 3111
3112 if (!data->nr_pages) 3112 if (!buffer->nr_pages)
3113 goto out; 3113 goto out;
3114 3114
3115 have_lost = local_read(&data->lost); 3115 have_lost = local_read(&buffer->lost);
3116 if (have_lost) 3116 if (have_lost)
3117 size += sizeof(lost_event); 3117 size += sizeof(lost_event);
3118 3118
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
3124 * tail pointer. So that all reads will be completed before the 3124 * tail pointer. So that all reads will be completed before the
3125 * write is issued. 3125 * write is issued.
3126 */ 3126 */
3127 tail = ACCESS_ONCE(data->user_page->data_tail); 3127 tail = ACCESS_ONCE(buffer->user_page->data_tail);
3128 smp_rmb(); 3128 smp_rmb();
3129 offset = head = local_read(&data->head); 3129 offset = head = local_read(&buffer->head);
3130 head += size; 3130 head += size;
3131 if (unlikely(!perf_output_space(data, tail, offset, head))) 3131 if (unlikely(!perf_output_space(buffer, tail, offset, head)))
3132 goto fail; 3132 goto fail;
3133 } while (local_cmpxchg(&data->head, offset, head) != offset); 3133 } while (local_cmpxchg(&buffer->head, offset, head) != offset);
3134 3134
3135 if (head - local_read(&data->wakeup) > data->watermark) 3135 if (head - local_read(&buffer->wakeup) > buffer->watermark)
3136 local_add(data->watermark, &data->wakeup); 3136 local_add(buffer->watermark, &buffer->wakeup);
3137 3137
3138 handle->page = offset >> (PAGE_SHIFT + page_order(data)); 3138 handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
3139 handle->page &= data->nr_pages - 1; 3139 handle->page &= buffer->nr_pages - 1;
3140 handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1); 3140 handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
3141 handle->addr = data->data_pages[handle->page]; 3141 handle->addr = buffer->data_pages[handle->page];
3142 handle->addr += handle->size; 3142 handle->addr += handle->size;
3143 handle->size = (PAGE_SIZE << page_order(data)) - handle->size; 3143 handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
3144 3144
3145 if (have_lost) { 3145 if (have_lost) {
3146 lost_event.header.type = PERF_RECORD_LOST; 3146 lost_event.header.type = PERF_RECORD_LOST;
3147 lost_event.header.misc = 0; 3147 lost_event.header.misc = 0;
3148 lost_event.header.size = sizeof(lost_event); 3148 lost_event.header.size = sizeof(lost_event);
3149 lost_event.id = event->id; 3149 lost_event.id = event->id;
3150 lost_event.lost = local_xchg(&data->lost, 0); 3150 lost_event.lost = local_xchg(&buffer->lost, 0);
3151 3151
3152 perf_output_put(handle, lost_event); 3152 perf_output_put(handle, lost_event);
3153 } 3153 }
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
3155 return 0; 3155 return 0;
3156 3156
3157fail: 3157fail:
3158 local_inc(&data->lost); 3158 local_inc(&buffer->lost);
3159 perf_output_put_handle(handle); 3159 perf_output_put_handle(handle);
3160out: 3160out:
3161 rcu_read_unlock(); 3161 rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
3166void perf_output_end(struct perf_output_handle *handle) 3166void perf_output_end(struct perf_output_handle *handle)
3167{ 3167{
3168 struct perf_event *event = handle->event; 3168 struct perf_event *event = handle->event;
3169 struct perf_mmap_data *data = handle->data; 3169 struct perf_buffer *buffer = handle->buffer;
3170 3170
3171 int wakeup_events = event->attr.wakeup_events; 3171 int wakeup_events = event->attr.wakeup_events;
3172 3172
3173 if (handle->sample && wakeup_events) { 3173 if (handle->sample && wakeup_events) {
3174 int events = local_inc_return(&data->events); 3174 int events = local_inc_return(&buffer->events);
3175 if (events >= wakeup_events) { 3175 if (events >= wakeup_events) {
3176 local_sub(wakeup_events, &data->events); 3176 local_sub(wakeup_events, &buffer->events);
3177 local_inc(&data->wakeup); 3177 local_inc(&buffer->wakeup);
3178 } 3178 }
3179 } 3179 }
3180 3180
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3211 u64 values[4]; 3211 u64 values[4];
3212 int n = 0; 3212 int n = 0;
3213 3213
3214 values[n++] = atomic64_read(&event->count); 3214 values[n++] = perf_event_count(event);
3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3215 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3216 values[n++] = event->total_time_enabled + 3216 values[n++] = event->total_time_enabled +
3217 atomic64_read(&event->child_total_time_enabled); 3217 atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3248 if (leader != event) 3248 if (leader != event)
3249 leader->pmu->read(leader); 3249 leader->pmu->read(leader);
3250 3250
3251 values[n++] = atomic64_read(&leader->count); 3251 values[n++] = perf_event_count(leader);
3252 if (read_format & PERF_FORMAT_ID) 3252 if (read_format & PERF_FORMAT_ID)
3253 values[n++] = primary_event_id(leader); 3253 values[n++] = primary_event_id(leader);
3254 3254
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3260 if (sub != event) 3260 if (sub != event)
3261 sub->pmu->read(sub); 3261 sub->pmu->read(sub);
3262 3262
3263 values[n++] = atomic64_read(&sub->count); 3263 values[n++] = perf_event_count(sub);
3264 if (read_format & PERF_FORMAT_ID) 3264 if (read_format & PERF_FORMAT_ID)
3265 values[n++] = primary_event_id(sub); 3265 values[n++] = primary_event_id(sub);
3266 3266
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
3491/* 3491/*
3492 * task tracking -- fork/exit 3492 * task tracking -- fork/exit
3493 * 3493 *
3494 * enabled by: attr.comm | attr.mmap | attr.task 3494 * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
3495 */ 3495 */
3496 3496
3497struct perf_task_event { 3497struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
3541 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3541 if (event->cpu != -1 && event->cpu != smp_processor_id())
3542 return 0; 3542 return 0;
3543 3543
3544 if (event->attr.comm || event->attr.mmap || event->attr.task) 3544 if (event->attr.comm || event->attr.mmap ||
3545 event->attr.mmap_data || event->attr.task)
3545 return 1; 3546 return 1;
3546 3547
3547 return 0; 3548 return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
3766} 3767}
3767 3768
3768static int perf_event_mmap_match(struct perf_event *event, 3769static int perf_event_mmap_match(struct perf_event *event,
3769 struct perf_mmap_event *mmap_event) 3770 struct perf_mmap_event *mmap_event,
3771 int executable)
3770{ 3772{
3771 if (event->state < PERF_EVENT_STATE_INACTIVE) 3773 if (event->state < PERF_EVENT_STATE_INACTIVE)
3772 return 0; 3774 return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
3774 if (event->cpu != -1 && event->cpu != smp_processor_id()) 3776 if (event->cpu != -1 && event->cpu != smp_processor_id())
3775 return 0; 3777 return 0;
3776 3778
3777 if (event->attr.mmap) 3779 if ((!executable && event->attr.mmap_data) ||
3780 (executable && event->attr.mmap))
3778 return 1; 3781 return 1;
3779 3782
3780 return 0; 3783 return 0;
3781} 3784}
3782 3785
3783static void perf_event_mmap_ctx(struct perf_event_context *ctx, 3786static void perf_event_mmap_ctx(struct perf_event_context *ctx,
3784 struct perf_mmap_event *mmap_event) 3787 struct perf_mmap_event *mmap_event,
3788 int executable)
3785{ 3789{
3786 struct perf_event *event; 3790 struct perf_event *event;
3787 3791
3788 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 3792 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
3789 if (perf_event_mmap_match(event, mmap_event)) 3793 if (perf_event_mmap_match(event, mmap_event, executable))
3790 perf_event_mmap_output(event, mmap_event); 3794 perf_event_mmap_output(event, mmap_event);
3791 } 3795 }
3792} 3796}
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
3830 if (!vma->vm_mm) { 3834 if (!vma->vm_mm) {
3831 name = strncpy(tmp, "[vdso]", sizeof(tmp)); 3835 name = strncpy(tmp, "[vdso]", sizeof(tmp));
3832 goto got_name; 3836 goto got_name;
3837 } else if (vma->vm_start <= vma->vm_mm->start_brk &&
3838 vma->vm_end >= vma->vm_mm->brk) {
3839 name = strncpy(tmp, "[heap]", sizeof(tmp));
3840 goto got_name;
3841 } else if (vma->vm_start <= vma->vm_mm->start_stack &&
3842 vma->vm_end >= vma->vm_mm->start_stack) {
3843 name = strncpy(tmp, "[stack]", sizeof(tmp));
3844 goto got_name;
3833 } 3845 }
3834 3846
3835 name = strncpy(tmp, "//anon", sizeof(tmp)); 3847 name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
3846 3858
3847 rcu_read_lock(); 3859 rcu_read_lock();
3848 cpuctx = &get_cpu_var(perf_cpu_context); 3860 cpuctx = &get_cpu_var(perf_cpu_context);
3849 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); 3861 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
3850 ctx = rcu_dereference(current->perf_event_ctxp); 3862 ctx = rcu_dereference(current->perf_event_ctxp);
3851 if (ctx) 3863 if (ctx)
3852 perf_event_mmap_ctx(ctx, mmap_event); 3864 perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
3853 put_cpu_var(perf_cpu_context); 3865 put_cpu_var(perf_cpu_context);
3854 rcu_read_unlock(); 3866 rcu_read_unlock();
3855 3867
3856 kfree(buf); 3868 kfree(buf);
3857} 3869}
3858 3870
3859void __perf_event_mmap(struct vm_area_struct *vma) 3871void perf_event_mmap(struct vm_area_struct *vma)
3860{ 3872{
3861 struct perf_mmap_event mmap_event; 3873 struct perf_mmap_event mmap_event;
3862 3874
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
4018 hwc->last_period = hwc->sample_period; 4030 hwc->last_period = hwc->sample_period;
4019 4031
4020again: 4032again:
4021 old = val = atomic64_read(&hwc->period_left); 4033 old = val = local64_read(&hwc->period_left);
4022 if (val < 0) 4034 if (val < 0)
4023 return 0; 4035 return 0;
4024 4036
4025 nr = div64_u64(period + val, period); 4037 nr = div64_u64(period + val, period);
4026 offset = nr * period; 4038 offset = nr * period;
4027 val -= offset; 4039 val -= offset;
4028 if (atomic64_cmpxchg(&hwc->period_left, old, val) != old) 4040 if (local64_cmpxchg(&hwc->period_left, old, val) != old)
4029 goto again; 4041 goto again;
4030 4042
4031 return nr; 4043 return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4064{ 4076{
4065 struct hw_perf_event *hwc = &event->hw; 4077 struct hw_perf_event *hwc = &event->hw;
4066 4078
4067 atomic64_add(nr, &event->count); 4079 local64_add(nr, &event->count);
4068 4080
4069 if (!regs) 4081 if (!regs)
4070 return; 4082 return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
4075 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4087 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
4076 return perf_swevent_overflow(event, 1, nmi, data, regs); 4088 return perf_swevent_overflow(event, 1, nmi, data, regs);
4077 4089
4078 if (atomic64_add_negative(nr, &hwc->period_left)) 4090 if (local64_add_negative(nr, &hwc->period_left))
4079 return; 4091 return;
4080 4092
4081 perf_swevent_overflow(event, 0, nmi, data, regs); 4093 perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
4213} 4225}
4214EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); 4226EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
4215 4227
4216void perf_swevent_put_recursion_context(int rctx) 4228void inline perf_swevent_put_recursion_context(int rctx)
4217{ 4229{
4218 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); 4230 struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
4219 barrier(); 4231 barrier();
4220 cpuctx->recursion[rctx]--; 4232 cpuctx->recursion[rctx]--;
4221} 4233}
4222EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4223
4224 4234
4225void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4235void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4226 struct pt_regs *regs, u64 addr) 4236 struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
4368 u64 now; 4378 u64 now;
4369 4379
4370 now = cpu_clock(cpu); 4380 now = cpu_clock(cpu);
4371 prev = atomic64_xchg(&event->hw.prev_count, now); 4381 prev = local64_xchg(&event->hw.prev_count, now);
4372 atomic64_add(now - prev, &event->count); 4382 local64_add(now - prev, &event->count);
4373} 4383}
4374 4384
4375static int cpu_clock_perf_event_enable(struct perf_event *event) 4385static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
4377 struct hw_perf_event *hwc = &event->hw; 4387 struct hw_perf_event *hwc = &event->hw;
4378 int cpu = raw_smp_processor_id(); 4388 int cpu = raw_smp_processor_id();
4379 4389
4380 atomic64_set(&hwc->prev_count, cpu_clock(cpu)); 4390 local64_set(&hwc->prev_count, cpu_clock(cpu));
4381 perf_swevent_start_hrtimer(event); 4391 perf_swevent_start_hrtimer(event);
4382 4392
4383 return 0; 4393 return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
4409 u64 prev; 4419 u64 prev;
4410 s64 delta; 4420 s64 delta;
4411 4421
4412 prev = atomic64_xchg(&event->hw.prev_count, now); 4422 prev = local64_xchg(&event->hw.prev_count, now);
4413 delta = now - prev; 4423 delta = now - prev;
4414 atomic64_add(delta, &event->count); 4424 local64_add(delta, &event->count);
4415} 4425}
4416 4426
4417static int task_clock_perf_event_enable(struct perf_event *event) 4427static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
4421 4431
4422 now = event->ctx->time; 4432 now = event->ctx->time;
4423 4433
4424 atomic64_set(&hwc->prev_count, now); 4434 local64_set(&hwc->prev_count, now);
4425 4435
4426 perf_swevent_start_hrtimer(event); 4436 perf_swevent_start_hrtimer(event);
4427 4437
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
4601} 4611}
4602 4612
4603void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, 4613void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4604 struct pt_regs *regs, struct hlist_head *head) 4614 struct pt_regs *regs, struct hlist_head *head, int rctx)
4605{ 4615{
4606 struct perf_sample_data data; 4616 struct perf_sample_data data;
4607 struct perf_event *event; 4617 struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
4615 perf_sample_data_init(&data, addr); 4625 perf_sample_data_init(&data, addr);
4616 data.raw = &raw; 4626 data.raw = &raw;
4617 4627
4618 rcu_read_lock();
4619 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4628 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4620 if (perf_tp_event_match(event, &data, regs)) 4629 if (perf_tp_event_match(event, &data, regs))
4621 perf_swevent_add(event, count, 1, &data, regs); 4630 perf_swevent_add(event, count, 1, &data, regs);
4622 } 4631 }
4623 rcu_read_unlock(); 4632
4633 perf_swevent_put_recursion_context(rctx);
4624} 4634}
4625EXPORT_SYMBOL_GPL(perf_tp_event); 4635EXPORT_SYMBOL_GPL(perf_tp_event);
4626 4636
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
4864 hwc->sample_period = 1; 4874 hwc->sample_period = 1;
4865 hwc->last_period = hwc->sample_period; 4875 hwc->last_period = hwc->sample_period;
4866 4876
4867 atomic64_set(&hwc->period_left, hwc->sample_period); 4877 local64_set(&hwc->period_left, hwc->sample_period);
4868 4878
4869 /* 4879 /*
4870 * we currently do not support PERF_FORMAT_GROUP on inherited events 4880 * we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
4913 4923
4914 if (!event->parent) { 4924 if (!event->parent) {
4915 atomic_inc(&nr_events); 4925 atomic_inc(&nr_events);
4916 if (event->attr.mmap) 4926 if (event->attr.mmap || event->attr.mmap_data)
4917 atomic_inc(&nr_mmap_events); 4927 atomic_inc(&nr_mmap_events);
4918 if (event->attr.comm) 4928 if (event->attr.comm)
4919 atomic_inc(&nr_comm_events); 4929 atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
5007static int 5017static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 5018perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
5009{ 5019{
5010 struct perf_mmap_data *data = NULL, *old_data = NULL; 5020 struct perf_buffer *buffer = NULL, *old_buffer = NULL;
5011 int ret = -EINVAL; 5021 int ret = -EINVAL;
5012 5022
5013 if (!output_event) 5023 if (!output_event)
@@ -5037,19 +5047,19 @@ set:
5037 5047
5038 if (output_event) { 5048 if (output_event) {
5039 /* get the buffer we want to redirect to */ 5049 /* get the buffer we want to redirect to */
5040 data = perf_mmap_data_get(output_event); 5050 buffer = perf_buffer_get(output_event);
5041 if (!data) 5051 if (!buffer)
5042 goto unlock; 5052 goto unlock;
5043 } 5053 }
5044 5054
5045 old_data = event->data; 5055 old_buffer = event->buffer;
5046 rcu_assign_pointer(event->data, data); 5056 rcu_assign_pointer(event->buffer, buffer);
5047 ret = 0; 5057 ret = 0;
5048unlock: 5058unlock:
5049 mutex_unlock(&event->mmap_mutex); 5059 mutex_unlock(&event->mmap_mutex);
5050 5060
5051 if (old_data) 5061 if (old_buffer)
5052 perf_mmap_data_put(old_data); 5062 perf_buffer_put(old_buffer);
5053out: 5063out:
5054 return ret; 5064 return ret;
5055} 5065}
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
5298 hwc->sample_period = sample_period; 5308 hwc->sample_period = sample_period;
5299 hwc->last_period = sample_period; 5309 hwc->last_period = sample_period;
5300 5310
5301 atomic64_set(&hwc->period_left, sample_period); 5311 local64_set(&hwc->period_left, sample_period);
5302 } 5312 }
5303 5313
5304 child_event->overflow_handler = parent_event->overflow_handler; 5314 child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
5359 if (child_event->attr.inherit_stat) 5369 if (child_event->attr.inherit_stat)
5360 perf_event_read_event(child_event, child); 5370 perf_event_read_event(child_event, child);
5361 5371
5362 child_val = atomic64_read(&child_event->count); 5372 child_val = perf_event_count(child_event);
5363 5373
5364 /* 5374 /*
5365 * Add back the child's count to the parent's count: 5375 * Add back the child's count to the parent's count:
5366 */ 5376 */
5367 atomic64_add(child_val, &parent_event->count); 5377 atomic64_add(child_val, &parent_event->child_count);
5368 atomic64_add(child_event->total_time_enabled, 5378 atomic64_add(child_event->total_time_enabled,
5369 &parent_event->child_total_time_enabled); 5379 &parent_event->child_total_time_enabled);
5370 atomic64_add(child_event->total_time_running, 5380 atomic64_add(child_event->total_time_running,
diff --git a/kernel/sched.c b/kernel/sched.c
index f8b8996228dd..3c5d34a4e932 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3717,7 +3717,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3717 * off of preempt_enable. Kernel preemptions off return from interrupt 3717 * off of preempt_enable. Kernel preemptions off return from interrupt
3718 * occur there and call schedule directly. 3718 * occur there and call schedule directly.
3719 */ 3719 */
3720asmlinkage void __sched preempt_schedule(void) 3720asmlinkage void __sched notrace preempt_schedule(void)
3721{ 3721{
3722 struct thread_info *ti = current_thread_info(); 3722 struct thread_info *ti = current_thread_info();
3723 3723
@@ -3729,9 +3729,9 @@ asmlinkage void __sched preempt_schedule(void)
3729 return; 3729 return;
3730 3730
3731 do { 3731 do {
3732 add_preempt_count(PREEMPT_ACTIVE); 3732 add_preempt_count_notrace(PREEMPT_ACTIVE);
3733 schedule(); 3733 schedule();
3734 sub_preempt_count(PREEMPT_ACTIVE); 3734 sub_preempt_count_notrace(PREEMPT_ACTIVE);
3735 3735
3736 /* 3736 /*
3737 * Check again in case we missed a preemption opportunity 3737 * Check again in case we missed a preemption opportunity
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545b..f669092fdead 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -229,23 +229,6 @@ config FTRACE_SYSCALLS
229 help 229 help
230 Basic tracer to catch the syscall entry and exit events. 230 Basic tracer to catch the syscall entry and exit events.
231 231
232config BOOT_TRACER
233 bool "Trace boot initcalls"
234 select GENERIC_TRACER
235 select CONTEXT_SWITCH_TRACER
236 help
237 This tracer helps developers to optimize boot times: it records
238 the timings of the initcalls and traces key events and the identity
239 of tasks that can cause boot delays, such as context-switches.
240
241 Its aim is to be parsed by the scripts/bootgraph.pl tool to
242 produce pretty graphics about boot inefficiencies, giving a visual
243 representation of the delays during initcalls - but the raw
244 /debug/tracing/trace text output is readable too.
245
246 You must pass in initcall_debug and ftrace=initcall to the kernel
247 command line to enable this on bootup.
248
249config TRACE_BRANCH_PROFILING 232config TRACE_BRANCH_PROFILING
250 bool 233 bool
251 select GENERIC_TRACER 234 select GENERIC_TRACER
@@ -371,26 +354,6 @@ config STACK_TRACER
371 354
372 Say N if unsure. 355 Say N if unsure.
373 356
374config KMEMTRACE
375 bool "Trace SLAB allocations"
376 select GENERIC_TRACER
377 help
378 kmemtrace provides tracing for slab allocator functions, such as
379 kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
380 data is then fed to the userspace application in order to analyse
381 allocation hotspots, internal fragmentation and so on, making it
382 possible to see how well an allocator performs, as well as debug
383 and profile kernel code.
384
385 This requires an userspace application to use. See
386 Documentation/trace/kmemtrace.txt for more information.
387
388 Saying Y will make the kernel somewhat larger and slower. However,
389 if you disable kmemtrace at run-time or boot-time, the performance
390 impact is minimal (depending on the arch the kernel is built for).
391
392 If unsure, say N.
393
394config WORKQUEUE_TRACER 357config WORKQUEUE_TRACER
395 bool "Trace workqueues" 358 bool "Trace workqueues"
396 select GENERIC_TRACER 359 select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index ffb1a5b0550e..469a1c7555a5 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,10 +38,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
38obj-$(CONFIG_NOP_TRACER) += trace_nop.o 38obj-$(CONFIG_NOP_TRACER) += trace_nop.o
39obj-$(CONFIG_STACK_TRACER) += trace_stack.o 39obj-$(CONFIG_STACK_TRACER) += trace_stack.o
40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o 40obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 41obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 42obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 43obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 44obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
47ifeq ($(CONFIG_BLOCK),y) 45ifeq ($(CONFIG_BLOCK),y)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6d2cb14f9449..0d88ce9b9fb8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1883 struct hlist_head *hhd; 1883 struct hlist_head *hhd;
1884 struct hlist_node *n; 1884 struct hlist_node *n;
1885 unsigned long key; 1885 unsigned long key;
1886 int resched;
1887 1886
1888 key = hash_long(ip, FTRACE_HASH_BITS); 1887 key = hash_long(ip, FTRACE_HASH_BITS);
1889 1888
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
1897 * period. This syncs the hash iteration and freeing of items 1896 * period. This syncs the hash iteration and freeing of items
1898 * on the hash. rcu_read_lock is too dangerous here. 1897 * on the hash. rcu_read_lock is too dangerous here.
1899 */ 1898 */
1900 resched = ftrace_preempt_disable(); 1899 preempt_disable_notrace();
1901 hlist_for_each_entry_rcu(entry, n, hhd, node) { 1900 hlist_for_each_entry_rcu(entry, n, hhd, node) {
1902 if (entry->ip == ip) 1901 if (entry->ip == ip)
1903 entry->ops->func(ip, parent_ip, &entry->data); 1902 entry->ops->func(ip, parent_ip, &entry->data);
1904 } 1903 }
1905 ftrace_preempt_enable(resched); 1904 preempt_enable_notrace();
1906} 1905}
1907 1906
1908static struct ftrace_ops trace_probe_ops __read_mostly = 1907static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660b..000000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
1/*
2 * Memory allocator tracing
3 *
4 * Copyright (C) 2008 Eduard - Gabriel Munteanu
5 * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */
8
9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
11#include <linux/debugfs.h>
12#include <linux/dcache.h>
13#include <linux/fs.h>
14
15#include <linux/kmemtrace.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20/* Select an alternative, minimalistic output than the original one */
21#define TRACE_KMEM_OPT_MINIMAL 0x1
22
23static struct tracer_opt kmem_opts[] = {
24 /* Default disable the minimalistic output */
25 { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
26 { }
27};
28
29static struct tracer_flags kmem_tracer_flags = {
30 .val = 0,
31 .opts = kmem_opts
32};
33
34static struct trace_array *kmemtrace_array;
35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct ftrace_event_call *call = &event_kmem_alloc;
46 struct trace_array *tr = kmemtrace_array;
47 struct kmemtrace_alloc_entry *entry;
48 struct ring_buffer_event *event;
49
50 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
51 if (!event)
52 return;
53
54 entry = ring_buffer_event_data(event);
55 tracing_generic_entry_update(&entry->ent, 0, 0);
56
57 entry->ent.type = TRACE_KMEM_ALLOC;
58 entry->type_id = type_id;
59 entry->call_site = call_site;
60 entry->ptr = ptr;
61 entry->bytes_req = bytes_req;
62 entry->bytes_alloc = bytes_alloc;
63 entry->gfp_flags = gfp_flags;
64 entry->node = node;
65
66 if (!filter_check_discard(call, entry, tr->buffer, event))
67 ring_buffer_unlock_commit(tr->buffer, event);
68
69 trace_wake_up();
70}
71
72static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
73 unsigned long call_site,
74 const void *ptr)
75{
76 struct ftrace_event_call *call = &event_kmem_free;
77 struct trace_array *tr = kmemtrace_array;
78 struct kmemtrace_free_entry *entry;
79 struct ring_buffer_event *event;
80
81 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
82 if (!event)
83 return;
84 entry = ring_buffer_event_data(event);
85 tracing_generic_entry_update(&entry->ent, 0, 0);
86
87 entry->ent.type = TRACE_KMEM_FREE;
88 entry->type_id = type_id;
89 entry->call_site = call_site;
90 entry->ptr = ptr;
91
92 if (!filter_check_discard(call, entry, tr->buffer, event))
93 ring_buffer_unlock_commit(tr->buffer, event);
94
95 trace_wake_up();
96}
97
98static void kmemtrace_kmalloc(void *ignore,
99 unsigned long call_site,
100 const void *ptr,
101 size_t bytes_req,
102 size_t bytes_alloc,
103 gfp_t gfp_flags)
104{
105 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
106 bytes_req, bytes_alloc, gfp_flags, -1);
107}
108
109static void kmemtrace_kmem_cache_alloc(void *ignore,
110 unsigned long call_site,
111 const void *ptr,
112 size_t bytes_req,
113 size_t bytes_alloc,
114 gfp_t gfp_flags)
115{
116 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
117 bytes_req, bytes_alloc, gfp_flags, -1);
118}
119
120static void kmemtrace_kmalloc_node(void *ignore,
121 unsigned long call_site,
122 const void *ptr,
123 size_t bytes_req,
124 size_t bytes_alloc,
125 gfp_t gfp_flags,
126 int node)
127{
128 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
129 bytes_req, bytes_alloc, gfp_flags, node);
130}
131
132static void kmemtrace_kmem_cache_alloc_node(void *ignore,
133 unsigned long call_site,
134 const void *ptr,
135 size_t bytes_req,
136 size_t bytes_alloc,
137 gfp_t gfp_flags,
138 int node)
139{
140 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
141 bytes_req, bytes_alloc, gfp_flags, node);
142}
143
144static void
145kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
146{
147 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
148}
149
150static void kmemtrace_kmem_cache_free(void *ignore,
151 unsigned long call_site, const void *ptr)
152{
153 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
154}
155
156static int kmemtrace_start_probes(void)
157{
158 int err;
159
160 err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
161 if (err)
162 return err;
163 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
164 if (err)
165 return err;
166 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
167 if (err)
168 return err;
169 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
170 if (err)
171 return err;
172 err = register_trace_kfree(kmemtrace_kfree, NULL);
173 if (err)
174 return err;
175 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
176
177 return err;
178}
179
180static void kmemtrace_stop_probes(void)
181{
182 unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
183 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
184 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
185 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
186 unregister_trace_kfree(kmemtrace_kfree, NULL);
187 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
188}
189
190static int kmem_trace_init(struct trace_array *tr)
191{
192 kmemtrace_array = tr;
193
194 tracing_reset_online_cpus(tr);
195
196 kmemtrace_start_probes();
197
198 return 0;
199}
200
201static void kmem_trace_reset(struct trace_array *tr)
202{
203 kmemtrace_stop_probes();
204}
205
206static void kmemtrace_headers(struct seq_file *s)
207{
208 /* Don't need headers for the original kmemtrace output */
209 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
210 return;
211
212 seq_printf(s, "#\n");
213 seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
214 " POINTER NODE CALLER\n");
215 seq_printf(s, "# FREE | | | | "
216 " | | | |\n");
217 seq_printf(s, "# |\n\n");
218}
219
220/*
221 * The following functions give the original output from kmemtrace,
222 * plus the origin CPU, since reordering occurs in-kernel now.
223 */
224
225#define KMEMTRACE_USER_ALLOC 0
226#define KMEMTRACE_USER_FREE 1
227
228struct kmemtrace_user_event {
229 u8 event_id;
230 u8 type_id;
231 u16 event_size;
232 u32 cpu;
233 u64 timestamp;
234 unsigned long call_site;
235 unsigned long ptr;
236};
237
238struct kmemtrace_user_event_alloc {
239 size_t bytes_req;
240 size_t bytes_alloc;
241 unsigned gfp_flags;
242 int node;
243};
244
245static enum print_line_t
246kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
247 struct trace_event *event)
248{
249 struct trace_seq *s = &iter->seq;
250 struct kmemtrace_alloc_entry *entry;
251 int ret;
252
253 trace_assign_type(entry, iter->ent);
254
255 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
256 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
257 entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
258 (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
259 (unsigned long)entry->gfp_flags, entry->node);
260
261 if (!ret)
262 return TRACE_TYPE_PARTIAL_LINE;
263 return TRACE_TYPE_HANDLED;
264}
265
266static enum print_line_t
267kmemtrace_print_free(struct trace_iterator *iter, int flags,
268 struct trace_event *event)
269{
270 struct trace_seq *s = &iter->seq;
271 struct kmemtrace_free_entry *entry;
272 int ret;
273
274 trace_assign_type(entry, iter->ent);
275
276 ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
277 entry->type_id, (void *)entry->call_site,
278 (unsigned long)entry->ptr);
279
280 if (!ret)
281 return TRACE_TYPE_PARTIAL_LINE;
282 return TRACE_TYPE_HANDLED;
283}
284
285static enum print_line_t
286kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
287 struct trace_event *event)
288{
289 struct trace_seq *s = &iter->seq;
290 struct kmemtrace_alloc_entry *entry;
291 struct kmemtrace_user_event *ev;
292 struct kmemtrace_user_event_alloc *ev_alloc;
293
294 trace_assign_type(entry, iter->ent);
295
296 ev = trace_seq_reserve(s, sizeof(*ev));
297 if (!ev)
298 return TRACE_TYPE_PARTIAL_LINE;
299
300 ev->event_id = KMEMTRACE_USER_ALLOC;
301 ev->type_id = entry->type_id;
302 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
303 ev->cpu = iter->cpu;
304 ev->timestamp = iter->ts;
305 ev->call_site = entry->call_site;
306 ev->ptr = (unsigned long)entry->ptr;
307
308 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
309 if (!ev_alloc)
310 return TRACE_TYPE_PARTIAL_LINE;
311
312 ev_alloc->bytes_req = entry->bytes_req;
313 ev_alloc->bytes_alloc = entry->bytes_alloc;
314 ev_alloc->gfp_flags = entry->gfp_flags;
315 ev_alloc->node = entry->node;
316
317 return TRACE_TYPE_HANDLED;
318}
319
320static enum print_line_t
321kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
322 struct trace_event *event)
323{
324 struct trace_seq *s = &iter->seq;
325 struct kmemtrace_free_entry *entry;
326 struct kmemtrace_user_event *ev;
327
328 trace_assign_type(entry, iter->ent);
329
330 ev = trace_seq_reserve(s, sizeof(*ev));
331 if (!ev)
332 return TRACE_TYPE_PARTIAL_LINE;
333
334 ev->event_id = KMEMTRACE_USER_FREE;
335 ev->type_id = entry->type_id;
336 ev->event_size = sizeof(*ev);
337 ev->cpu = iter->cpu;
338 ev->timestamp = iter->ts;
339 ev->call_site = entry->call_site;
340 ev->ptr = (unsigned long)entry->ptr;
341
342 return TRACE_TYPE_HANDLED;
343}
344
345/* The two other following provide a more minimalistic output */
346static enum print_line_t
347kmemtrace_print_alloc_compress(struct trace_iterator *iter)
348{
349 struct kmemtrace_alloc_entry *entry;
350 struct trace_seq *s = &iter->seq;
351 int ret;
352
353 trace_assign_type(entry, iter->ent);
354
355 /* Alloc entry */
356 ret = trace_seq_printf(s, " + ");
357 if (!ret)
358 return TRACE_TYPE_PARTIAL_LINE;
359
360 /* Type */
361 switch (entry->type_id) {
362 case KMEMTRACE_TYPE_KMALLOC:
363 ret = trace_seq_printf(s, "K ");
364 break;
365 case KMEMTRACE_TYPE_CACHE:
366 ret = trace_seq_printf(s, "C ");
367 break;
368 case KMEMTRACE_TYPE_PAGES:
369 ret = trace_seq_printf(s, "P ");
370 break;
371 default:
372 ret = trace_seq_printf(s, "? ");
373 }
374
375 if (!ret)
376 return TRACE_TYPE_PARTIAL_LINE;
377
378 /* Requested */
379 ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
380 if (!ret)
381 return TRACE_TYPE_PARTIAL_LINE;
382
383 /* Allocated */
384 ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
385 if (!ret)
386 return TRACE_TYPE_PARTIAL_LINE;
387
388 /* Flags
389 * TODO: would be better to see the name of the GFP flag names
390 */
391 ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
392 if (!ret)
393 return TRACE_TYPE_PARTIAL_LINE;
394
395 /* Pointer to allocated */
396 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
397 if (!ret)
398 return TRACE_TYPE_PARTIAL_LINE;
399
400 /* Node and call site*/
401 ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
402 (void *)entry->call_site);
403 if (!ret)
404 return TRACE_TYPE_PARTIAL_LINE;
405
406 return TRACE_TYPE_HANDLED;
407}
408
409static enum print_line_t
410kmemtrace_print_free_compress(struct trace_iterator *iter)
411{
412 struct kmemtrace_free_entry *entry;
413 struct trace_seq *s = &iter->seq;
414 int ret;
415
416 trace_assign_type(entry, iter->ent);
417
418 /* Free entry */
419 ret = trace_seq_printf(s, " - ");
420 if (!ret)
421 return TRACE_TYPE_PARTIAL_LINE;
422
423 /* Type */
424 switch (entry->type_id) {
425 case KMEMTRACE_TYPE_KMALLOC:
426 ret = trace_seq_printf(s, "K ");
427 break;
428 case KMEMTRACE_TYPE_CACHE:
429 ret = trace_seq_printf(s, "C ");
430 break;
431 case KMEMTRACE_TYPE_PAGES:
432 ret = trace_seq_printf(s, "P ");
433 break;
434 default:
435 ret = trace_seq_printf(s, "? ");
436 }
437
438 if (!ret)
439 return TRACE_TYPE_PARTIAL_LINE;
440
441 /* Skip requested/allocated/flags */
442 ret = trace_seq_printf(s, " ");
443 if (!ret)
444 return TRACE_TYPE_PARTIAL_LINE;
445
446 /* Pointer to allocated */
447 ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
448 if (!ret)
449 return TRACE_TYPE_PARTIAL_LINE;
450
451 /* Skip node and print call site*/
452 ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
453 if (!ret)
454 return TRACE_TYPE_PARTIAL_LINE;
455
456 return TRACE_TYPE_HANDLED;
457}
458
459static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
460{
461 struct trace_entry *entry = iter->ent;
462
463 if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
464 return TRACE_TYPE_UNHANDLED;
465
466 switch (entry->type) {
467 case TRACE_KMEM_ALLOC:
468 return kmemtrace_print_alloc_compress(iter);
469 case TRACE_KMEM_FREE:
470 return kmemtrace_print_free_compress(iter);
471 default:
472 return TRACE_TYPE_UNHANDLED;
473 }
474}
475
476static struct trace_event_functions kmem_trace_alloc_funcs = {
477 .trace = kmemtrace_print_alloc,
478 .binary = kmemtrace_print_alloc_user,
479};
480
481static struct trace_event kmem_trace_alloc = {
482 .type = TRACE_KMEM_ALLOC,
483 .funcs = &kmem_trace_alloc_funcs,
484};
485
486static struct trace_event_functions kmem_trace_free_funcs = {
487 .trace = kmemtrace_print_free,
488 .binary = kmemtrace_print_free_user,
489};
490
491static struct trace_event kmem_trace_free = {
492 .type = TRACE_KMEM_FREE,
493 .funcs = &kmem_trace_free_funcs,
494};
495
496static struct tracer kmem_tracer __read_mostly = {
497 .name = "kmemtrace",
498 .init = kmem_trace_init,
499 .reset = kmem_trace_reset,
500 .print_line = kmemtrace_print_line,
501 .print_header = kmemtrace_headers,
502 .flags = &kmem_tracer_flags
503};
504
505void kmemtrace_init(void)
506{
507 /* earliest opportunity to start kmem tracing */
508}
509
510static int __init init_kmem_tracer(void)
511{
512 if (!register_ftrace_event(&kmem_trace_alloc)) {
513 pr_warning("Warning: could not register kmem events\n");
514 return 1;
515 }
516
517 if (!register_ftrace_event(&kmem_trace_free)) {
518 pr_warning("Warning: could not register kmem events\n");
519 return 1;
520 }
521
522 if (register_tracer(&kmem_tracer) != 0) {
523 pr_warning("Warning: could not register the kmem tracer\n");
524 return 1;
525 }
526
527 return 0;
528}
529device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1da7b6ea8b85..28d0615a513f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
2242 2242
2243#endif 2243#endif
2244 2244
2245static DEFINE_PER_CPU(int, rb_need_resched);
2246
2247/** 2245/**
2248 * ring_buffer_lock_reserve - reserve a part of the buffer 2246 * ring_buffer_lock_reserve - reserve a part of the buffer
2249 * @buffer: the ring buffer to reserve from 2247 * @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2264{ 2262{
2265 struct ring_buffer_per_cpu *cpu_buffer; 2263 struct ring_buffer_per_cpu *cpu_buffer;
2266 struct ring_buffer_event *event; 2264 struct ring_buffer_event *event;
2267 int cpu, resched; 2265 int cpu;
2268 2266
2269 if (ring_buffer_flags != RB_BUFFERS_ON) 2267 if (ring_buffer_flags != RB_BUFFERS_ON)
2270 return NULL; 2268 return NULL;
2271 2269
2272 /* If we are tracing schedule, we don't want to recurse */ 2270 /* If we are tracing schedule, we don't want to recurse */
2273 resched = ftrace_preempt_disable(); 2271 preempt_disable_notrace();
2274 2272
2275 if (atomic_read(&buffer->record_disabled)) 2273 if (atomic_read(&buffer->record_disabled))
2276 goto out_nocheck; 2274 goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2295 if (!event) 2293 if (!event)
2296 goto out; 2294 goto out;
2297 2295
2298 /*
2299 * Need to store resched state on this cpu.
2300 * Only the first needs to.
2301 */
2302
2303 if (preempt_count() == 1)
2304 per_cpu(rb_need_resched, cpu) = resched;
2305
2306 return event; 2296 return event;
2307 2297
2308 out: 2298 out:
2309 trace_recursive_unlock(); 2299 trace_recursive_unlock();
2310 2300
2311 out_nocheck: 2301 out_nocheck:
2312 ftrace_preempt_enable(resched); 2302 preempt_enable_notrace();
2313 return NULL; 2303 return NULL;
2314} 2304}
2315EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 2305EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
2355 2345
2356 trace_recursive_unlock(); 2346 trace_recursive_unlock();
2357 2347
2358 /* 2348 preempt_enable_notrace();
2359 * Only the last preempt count needs to restore preemption.
2360 */
2361 if (preempt_count() == 1)
2362 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2363 else
2364 preempt_enable_no_resched_notrace();
2365 2349
2366 return 0; 2350 return 0;
2367} 2351}
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
2469 2453
2470 trace_recursive_unlock(); 2454 trace_recursive_unlock();
2471 2455
2472 /* 2456 preempt_enable_notrace();
2473 * Only the last preempt count needs to restore preemption.
2474 */
2475 if (preempt_count() == 1)
2476 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2477 else
2478 preempt_enable_no_resched_notrace();
2479 2457
2480} 2458}
2481EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); 2459EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
2501 struct ring_buffer_event *event; 2479 struct ring_buffer_event *event;
2502 void *body; 2480 void *body;
2503 int ret = -EBUSY; 2481 int ret = -EBUSY;
2504 int cpu, resched; 2482 int cpu;
2505 2483
2506 if (ring_buffer_flags != RB_BUFFERS_ON) 2484 if (ring_buffer_flags != RB_BUFFERS_ON)
2507 return -EBUSY; 2485 return -EBUSY;
2508 2486
2509 resched = ftrace_preempt_disable(); 2487 preempt_disable_notrace();
2510 2488
2511 if (atomic_read(&buffer->record_disabled)) 2489 if (atomic_read(&buffer->record_disabled))
2512 goto out; 2490 goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
2536 2514
2537 ret = 0; 2515 ret = 0;
2538 out: 2516 out:
2539 ftrace_preempt_enable(resched); 2517 preempt_enable_notrace();
2540 2518
2541 return ret; 2519 return ret;
2542} 2520}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 086d36316805..8683dec6946b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1404,7 +1404,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1404 struct bprint_entry *entry; 1404 struct bprint_entry *entry;
1405 unsigned long flags; 1405 unsigned long flags;
1406 int disable; 1406 int disable;
1407 int resched;
1408 int cpu, len = 0, size, pc; 1407 int cpu, len = 0, size, pc;
1409 1408
1410 if (unlikely(tracing_selftest_running || tracing_disabled)) 1409 if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1414,7 +1413,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
1414 pause_graph_tracing(); 1413 pause_graph_tracing();
1415 1414
1416 pc = preempt_count(); 1415 pc = preempt_count();
1417 resched = ftrace_preempt_disable(); 1416 preempt_disable_notrace();
1418 cpu = raw_smp_processor_id(); 1417 cpu = raw_smp_processor_id();
1419 data = tr->data[cpu]; 1418 data = tr->data[cpu];
1420 1419
@@ -1452,7 +1451,7 @@ out_unlock:
1452 1451
1453out: 1452out:
1454 atomic_dec_return(&data->disabled); 1453 atomic_dec_return(&data->disabled);
1455 ftrace_preempt_enable(resched); 1454 preempt_enable_notrace();
1456 unpause_graph_tracing(); 1455 unpause_graph_tracing();
1457 1456
1458 return len; 1457 return len;
@@ -4597,9 +4596,6 @@ __init static int tracer_alloc_buffers(void)
4597 4596
4598 register_tracer(&nop_trace); 4597 register_tracer(&nop_trace);
4599 current_trace = &nop_trace; 4598 current_trace = &nop_trace;
4600#ifdef CONFIG_BOOT_TRACER
4601 register_tracer(&boot_tracer);
4602#endif
4603 /* All seems OK, enable tracing */ 4599 /* All seems OK, enable tracing */
4604 tracing_disabled = 0; 4600 tracing_disabled = 0;
4605 4601
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2cd96399463f..01ce088c1cdf 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
9#include <linux/mmiotrace.h> 9#include <linux/mmiotrace.h>
10#include <linux/tracepoint.h> 10#include <linux/tracepoint.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <trace/boot.h>
13#include <linux/kmemtrace.h>
14#include <linux/hw_breakpoint.h> 12#include <linux/hw_breakpoint.h>
15
16#include <linux/trace_seq.h> 13#include <linux/trace_seq.h>
17#include <linux/ftrace_event.h> 14#include <linux/ftrace_event.h>
18 15
@@ -29,26 +26,15 @@ enum trace_type {
29 TRACE_MMIO_RW, 26 TRACE_MMIO_RW,
30 TRACE_MMIO_MAP, 27 TRACE_MMIO_MAP,
31 TRACE_BRANCH, 28 TRACE_BRANCH,
32 TRACE_BOOT_CALL,
33 TRACE_BOOT_RET,
34 TRACE_GRAPH_RET, 29 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 30 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 31 TRACE_USER_STACK,
37 TRACE_KMEM_ALLOC,
38 TRACE_KMEM_FREE,
39 TRACE_BLK, 32 TRACE_BLK,
40 TRACE_KSYM, 33 TRACE_KSYM,
41 34
42 __TRACE_LAST_TYPE, 35 __TRACE_LAST_TYPE,
43}; 36};
44 37
45enum kmemtrace_type_id {
46 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
47 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
48 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
49};
50
51extern struct tracer boot_tracer;
52 38
53#undef __field 39#undef __field
54#define __field(type, item) type item; 40#define __field(type, item) type item;
@@ -209,17 +195,11 @@ extern void __ftrace_bad_type(void);
209 TRACE_MMIO_RW); \ 195 TRACE_MMIO_RW); \
210 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ 196 IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
211 TRACE_MMIO_MAP); \ 197 TRACE_MMIO_MAP); \
212 IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
213 IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
214 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ 198 IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
215 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ 199 IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
216 TRACE_GRAPH_ENT); \ 200 TRACE_GRAPH_ENT); \
217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 201 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
218 TRACE_GRAPH_RET); \ 202 TRACE_GRAPH_RET); \
219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
220 TRACE_KMEM_ALLOC); \
221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
222 TRACE_KMEM_FREE); \
223 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ 203 IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
224 __ftrace_bad_type(); \ 204 __ftrace_bad_type(); \
225 } while (0) 205 } while (0)
@@ -628,54 +608,6 @@ enum trace_iterator_flags {
628 608
629extern struct tracer nop_trace; 609extern struct tracer nop_trace;
630 610
631/**
632 * ftrace_preempt_disable - disable preemption scheduler safe
633 *
634 * When tracing can happen inside the scheduler, there exists
635 * cases that the tracing might happen before the need_resched
636 * flag is checked. If this happens and the tracer calls
637 * preempt_enable (after a disable), a schedule might take place
638 * causing an infinite recursion.
639 *
640 * To prevent this, we read the need_resched flag before
641 * disabling preemption. When we want to enable preemption we
642 * check the flag, if it is set, then we call preempt_enable_no_resched.
643 * Otherwise, we call preempt_enable.
644 *
645 * The rational for doing the above is that if need_resched is set
646 * and we have yet to reschedule, we are either in an atomic location
647 * (where we do not need to check for scheduling) or we are inside
648 * the scheduler and do not want to resched.
649 */
650static inline int ftrace_preempt_disable(void)
651{
652 int resched;
653
654 resched = need_resched();
655 preempt_disable_notrace();
656
657 return resched;
658}
659
660/**
661 * ftrace_preempt_enable - enable preemption scheduler safe
662 * @resched: the return value from ftrace_preempt_disable
663 *
664 * This is a scheduler safe way to enable preemption and not miss
665 * any preemption checks. The disabled saved the state of preemption.
666 * If resched is set, then we are either inside an atomic or
667 * are inside the scheduler (we would have already scheduled
668 * otherwise). In this case, we do not want to call normal
669 * preempt_enable, but preempt_enable_no_resched instead.
670 */
671static inline void ftrace_preempt_enable(int resched)
672{
673 if (resched)
674 preempt_enable_no_resched_notrace();
675 else
676 preempt_enable_notrace();
677}
678
679#ifdef CONFIG_BRANCH_TRACER 611#ifdef CONFIG_BRANCH_TRACER
680extern int enable_branch_tracing(struct trace_array *tr); 612extern int enable_branch_tracing(struct trace_array *tr);
681extern void disable_branch_tracing(void); 613extern void disable_branch_tracing(void);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956ad..000000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
1/*
2 * ring buffer based initcalls tracer
3 *
4 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
5 *
6 */
7
8#include <linux/init.h>
9#include <linux/debugfs.h>
10#include <linux/ftrace.h>
11#include <linux/kallsyms.h>
12#include <linux/time.h>
13
14#include "trace.h"
15#include "trace_output.h"
16
17static struct trace_array *boot_trace;
18static bool pre_initcalls_finished;
19
20/* Tells the boot tracer that the pre_smp_initcalls are finished.
21 * So we are ready .
22 * It doesn't enable sched events tracing however.
23 * You have to call enable_boot_trace to do so.
24 */
25void start_boot_trace(void)
26{
27 pre_initcalls_finished = true;
28}
29
30void enable_boot_trace(void)
31{
32 if (boot_trace && pre_initcalls_finished)
33 tracing_start_sched_switch_record();
34}
35
36void disable_boot_trace(void)
37{
38 if (boot_trace && pre_initcalls_finished)
39 tracing_stop_sched_switch_record();
40}
41
42static int boot_trace_init(struct trace_array *tr)
43{
44 boot_trace = tr;
45
46 if (!tr)
47 return 0;
48
49 tracing_reset_online_cpus(tr);
50
51 tracing_sched_switch_assign_trace(tr);
52 return 0;
53}
54
55static enum print_line_t
56initcall_call_print_line(struct trace_iterator *iter)
57{
58 struct trace_entry *entry = iter->ent;
59 struct trace_seq *s = &iter->seq;
60 struct trace_boot_call *field;
61 struct boot_trace_call *call;
62 u64 ts;
63 unsigned long nsec_rem;
64 int ret;
65
66 trace_assign_type(field, entry);
67 call = &field->boot_call;
68 ts = iter->ts;
69 nsec_rem = do_div(ts, NSEC_PER_SEC);
70
71 ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
72 (unsigned long)ts, nsec_rem, call->func, call->caller);
73
74 if (!ret)
75 return TRACE_TYPE_PARTIAL_LINE;
76 else
77 return TRACE_TYPE_HANDLED;
78}
79
80static enum print_line_t
81initcall_ret_print_line(struct trace_iterator *iter)
82{
83 struct trace_entry *entry = iter->ent;
84 struct trace_seq *s = &iter->seq;
85 struct trace_boot_ret *field;
86 struct boot_trace_ret *init_ret;
87 u64 ts;
88 unsigned long nsec_rem;
89 int ret;
90
91 trace_assign_type(field, entry);
92 init_ret = &field->boot_ret;
93 ts = iter->ts;
94 nsec_rem = do_div(ts, NSEC_PER_SEC);
95
96 ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
97 "returned %d after %llu msecs\n",
98 (unsigned long) ts,
99 nsec_rem,
100 init_ret->func, init_ret->result, init_ret->duration);
101
102 if (!ret)
103 return TRACE_TYPE_PARTIAL_LINE;
104 else
105 return TRACE_TYPE_HANDLED;
106}
107
108static enum print_line_t initcall_print_line(struct trace_iterator *iter)
109{
110 struct trace_entry *entry = iter->ent;
111
112 switch (entry->type) {
113 case TRACE_BOOT_CALL:
114 return initcall_call_print_line(iter);
115 case TRACE_BOOT_RET:
116 return initcall_ret_print_line(iter);
117 default:
118 return TRACE_TYPE_UNHANDLED;
119 }
120}
121
122struct tracer boot_tracer __read_mostly =
123{
124 .name = "initcall",
125 .init = boot_trace_init,
126 .reset = tracing_reset_online_cpus,
127 .print_line = initcall_print_line,
128};
129
130void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
131{
132 struct ftrace_event_call *call = &event_boot_call;
133 struct ring_buffer_event *event;
134 struct ring_buffer *buffer;
135 struct trace_boot_call *entry;
136 struct trace_array *tr = boot_trace;
137
138 if (!tr || !pre_initcalls_finished)
139 return;
140
141 /* Get its name now since this function could
142 * disappear because it is in the .init section.
143 */
144 sprint_symbol(bt->func, (unsigned long)fn);
145 preempt_disable();
146
147 buffer = tr->buffer;
148 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
149 sizeof(*entry), 0, 0);
150 if (!event)
151 goto out;
152 entry = ring_buffer_event_data(event);
153 entry->boot_call = *bt;
154 if (!filter_check_discard(call, entry, buffer, event))
155 trace_buffer_unlock_commit(buffer, event, 0, 0);
156 out:
157 preempt_enable();
158}
159
160void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
161{
162 struct ftrace_event_call *call = &event_boot_ret;
163 struct ring_buffer_event *event;
164 struct ring_buffer *buffer;
165 struct trace_boot_ret *entry;
166 struct trace_array *tr = boot_trace;
167
168 if (!tr || !pre_initcalls_finished)
169 return;
170
171 sprint_symbol(bt->func, (unsigned long)fn);
172 preempt_disable();
173
174 buffer = tr->buffer;
175 event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
176 sizeof(*entry), 0, 0);
177 if (!event)
178 goto out;
179 entry = ring_buffer_event_data(event);
180 entry->boot_ret = *bt;
181 if (!filter_check_discard(call, entry, buffer, event))
182 trace_buffer_unlock_commit(buffer, event, 0, 0);
183 out:
184 preempt_enable();
185}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1a..52fda6c04ac3 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -32,16 +32,15 @@
32u64 notrace trace_clock_local(void) 32u64 notrace trace_clock_local(void)
33{ 33{
34 u64 clock; 34 u64 clock;
35 int resched;
36 35
37 /* 36 /*
38 * sched_clock() is an architecture implemented, fast, scalable, 37 * sched_clock() is an architecture implemented, fast, scalable,
39 * lockless clock. It is not guaranteed to be coherent across 38 * lockless clock. It is not guaranteed to be coherent across
40 * CPUs, nor across CPU idle events. 39 * CPUs, nor across CPU idle events.
41 */ 40 */
42 resched = ftrace_preempt_disable(); 41 preempt_disable_notrace();
43 clock = sched_clock(); 42 clock = sched_clock();
44 ftrace_preempt_enable(resched); 43 preempt_enable_notrace();
45 44
46 return clock; 45 return clock;
47} 46}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240da..13abc157dbaf 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -271,33 +271,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
271 __entry->map_id, __entry->opcode) 271 __entry->map_id, __entry->opcode)
272); 272);
273 273
274FTRACE_ENTRY(boot_call, trace_boot_call,
275
276 TRACE_BOOT_CALL,
277
278 F_STRUCT(
279 __field_struct( struct boot_trace_call, boot_call )
280 __field_desc( pid_t, boot_call, caller )
281 __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
282 ),
283
284 F_printk("%d %s", __entry->caller, __entry->func)
285);
286
287FTRACE_ENTRY(boot_ret, trace_boot_ret,
288
289 TRACE_BOOT_RET,
290
291 F_STRUCT(
292 __field_struct( struct boot_trace_ret, boot_ret )
293 __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
294 __field_desc( int, boot_ret, result )
295 __field_desc( unsigned long, boot_ret, duration )
296 ),
297
298 F_printk("%s %d %lx",
299 __entry->func, __entry->result, __entry->duration)
300);
301 274
302#define TRACE_FUNC_SIZE 30 275#define TRACE_FUNC_SIZE 30
303#define TRACE_FILE_SIZE 20 276#define TRACE_FILE_SIZE 20
@@ -318,41 +291,6 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 291 __entry->func, __entry->file, __entry->correct)
319); 292);
320 293
321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
322
323 TRACE_KMEM_ALLOC,
324
325 F_STRUCT(
326 __field( enum kmemtrace_type_id, type_id )
327 __field( unsigned long, call_site )
328 __field( const void *, ptr )
329 __field( size_t, bytes_req )
330 __field( size_t, bytes_alloc )
331 __field( gfp_t, gfp_flags )
332 __field( int, node )
333 ),
334
335 F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
336 " flags:%x node:%d",
337 __entry->type_id, __entry->call_site, __entry->ptr,
338 __entry->bytes_req, __entry->bytes_alloc,
339 __entry->gfp_flags, __entry->node)
340);
341
342FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
343
344 TRACE_KMEM_FREE,
345
346 F_STRUCT(
347 __field( enum kmemtrace_type_id, type_id )
348 __field( unsigned long, call_site )
349 __field( const void *, ptr )
350 ),
351
352 F_printk("type:%u call_site:%lx ptr:%p",
353 __entry->type_id, __entry->call_site, __entry->ptr)
354);
355
356FTRACE_ENTRY(ksym_trace, ksym_trace_entry, 294FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
357 295
358 TRACE_KSYM, 296 TRACE_KSYM,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index e6f65887842c..4799d7047eb0 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,8 +9,6 @@
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
13
14static char *perf_trace_buf[4]; 12static char *perf_trace_buf[4];
15 13
16/* 14/*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0b0801..a594f9a7ee3d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1524,12 +1524,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1524 struct ftrace_entry *entry; 1524 struct ftrace_entry *entry;
1525 unsigned long flags; 1525 unsigned long flags;
1526 long disabled; 1526 long disabled;
1527 int resched;
1528 int cpu; 1527 int cpu;
1529 int pc; 1528 int pc;
1530 1529
1531 pc = preempt_count(); 1530 pc = preempt_count();
1532 resched = ftrace_preempt_disable(); 1531 preempt_disable_notrace();
1533 cpu = raw_smp_processor_id(); 1532 cpu = raw_smp_processor_id();
1534 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); 1533 disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1535 1534
@@ -1551,7 +1550,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
1551 1550
1552 out: 1551 out:
1553 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); 1552 atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1554 ftrace_preempt_enable(resched); 1553 preempt_enable_notrace();
1555} 1554}
1556 1555
1557static struct ftrace_ops trace_ops __initdata = 1556static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd6..16aee4d44e8f 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
54 struct trace_array_cpu *data; 54 struct trace_array_cpu *data;
55 unsigned long flags; 55 unsigned long flags;
56 long disabled; 56 long disabled;
57 int cpu, resched; 57 int cpu;
58 int pc; 58 int pc;
59 59
60 if (unlikely(!ftrace_function_enabled)) 60 if (unlikely(!ftrace_function_enabled))
61 return; 61 return;
62 62
63 pc = preempt_count(); 63 pc = preempt_count();
64 resched = ftrace_preempt_disable(); 64 preempt_disable_notrace();
65 local_save_flags(flags); 65 local_save_flags(flags);
66 cpu = raw_smp_processor_id(); 66 cpu = raw_smp_processor_id();
67 data = tr->data[cpu]; 67 data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
71 trace_function(tr, ip, parent_ip, flags, pc); 71 trace_function(tr, ip, parent_ip, flags, pc);
72 72
73 atomic_dec(&data->disabled); 73 atomic_dec(&data->disabled);
74 ftrace_preempt_enable(resched); 74 preempt_enable_notrace();
75} 75}
76 76
77static void 77static void
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0e73bc2ef8c5..c9fd5bd02036 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
46 struct trace_array_cpu *data; 46 struct trace_array_cpu *data;
47 unsigned long flags; 47 unsigned long flags;
48 long disabled; 48 long disabled;
49 int resched;
50 int cpu; 49 int cpu;
51 int pc; 50 int pc;
52 51
@@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
54 return; 53 return;
55 54
56 pc = preempt_count(); 55 pc = preempt_count();
57 resched = ftrace_preempt_disable(); 56 preempt_disable_notrace();
58 57
59 cpu = raw_smp_processor_id(); 58 cpu = raw_smp_processor_id();
60 if (cpu != wakeup_current_cpu) 59 if (cpu != wakeup_current_cpu)
@@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
74 out: 73 out:
75 atomic_dec(&data->disabled); 74 atomic_dec(&data->disabled);
76 out_enable: 75 out_enable:
77 ftrace_preempt_enable(resched); 76 preempt_enable_notrace();
78} 77}
79 78
80static struct ftrace_ops trace_ops __read_mostly = 79static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index f4bc9b27de5f..056468eae7cf 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -110,12 +110,12 @@ static inline void check_stack(void)
110static void 110static void
111stack_trace_call(unsigned long ip, unsigned long parent_ip) 111stack_trace_call(unsigned long ip, unsigned long parent_ip)
112{ 112{
113 int cpu, resched; 113 int cpu;
114 114
115 if (unlikely(!ftrace_enabled || stack_trace_disabled)) 115 if (unlikely(!ftrace_enabled || stack_trace_disabled))
116 return; 116 return;
117 117
118 resched = ftrace_preempt_disable(); 118 preempt_disable_notrace();
119 119
120 cpu = raw_smp_processor_id(); 120 cpu = raw_smp_processor_id();
121 /* no atomic needed, we only modify this variable by this cpu */ 121 /* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127 out: 127 out:
128 per_cpu(trace_active, cpu)--; 128 per_cpu(trace_active, cpu)--;
129 /* prevent recursion in schedule */ 129 /* prevent recursion in schedule */
130 ftrace_preempt_enable(resched); 130 preempt_enable_notrace();
131} 131}
132 132
133static struct ftrace_ops trace_ops __read_mostly = 133static struct ftrace_ops trace_ops __read_mostly =
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index a7974a552ca9..c080956f4d8e 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -33,12 +33,13 @@ static DEFINE_MUTEX(sample_timer_lock);
33 */ 33 */
34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); 34static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
35 35
36struct stack_frame { 36struct stack_frame_user {
37 const void __user *next_fp; 37 const void __user *next_fp;
38 unsigned long return_address; 38 unsigned long return_address;
39}; 39};
40 40
41static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) 41static int
42copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
42{ 43{
43 int ret; 44 int ret;
44 45
@@ -125,7 +126,7 @@ trace_kernel(struct pt_regs *regs, struct trace_array *tr,
125static void timer_notify(struct pt_regs *regs, int cpu) 126static void timer_notify(struct pt_regs *regs, int cpu)
126{ 127{
127 struct trace_array_cpu *data; 128 struct trace_array_cpu *data;
128 struct stack_frame frame; 129 struct stack_frame_user frame;
129 struct trace_array *tr; 130 struct trace_array *tr;
130 const void __user *fp; 131 const void __user *fp;
131 int is_user; 132 int is_user;
diff --git a/mm/mmap.c b/mm/mmap.c
index 456ec6f27889..e38e910cb756 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1734,8 +1734,10 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
1734 grow = (address - vma->vm_end) >> PAGE_SHIFT; 1734 grow = (address - vma->vm_end) >> PAGE_SHIFT;
1735 1735
1736 error = acct_stack_growth(vma, size, grow); 1736 error = acct_stack_growth(vma, size, grow);
1737 if (!error) 1737 if (!error) {
1738 vma->vm_end = address; 1738 vma->vm_end = address;
1739 perf_event_mmap(vma);
1740 }
1739 } 1741 }
1740 anon_vma_unlock(vma); 1742 anon_vma_unlock(vma);
1741 return error; 1743 return error;
@@ -1781,6 +1783,7 @@ static int expand_downwards(struct vm_area_struct *vma,
1781 if (!error) { 1783 if (!error) {
1782 vma->vm_start = address; 1784 vma->vm_start = address;
1783 vma->vm_pgoff -= grow; 1785 vma->vm_pgoff -= grow;
1786 perf_event_mmap(vma);
1784 } 1787 }
1785 } 1788 }
1786 anon_vma_unlock(vma); 1789 anon_vma_unlock(vma);
@@ -2208,6 +2211,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2208 vma->vm_page_prot = vm_get_page_prot(flags); 2211 vma->vm_page_prot = vm_get_page_prot(flags);
2209 vma_link(mm, vma, prev, rb_link, rb_parent); 2212 vma_link(mm, vma, prev, rb_link, rb_parent);
2210out: 2213out:
2214 perf_event_mmap(vma);
2211 mm->total_vm += len >> PAGE_SHIFT; 2215 mm->total_vm += len >> PAGE_SHIFT;
2212 if (flags & VM_LOCKED) { 2216 if (flags & VM_LOCKED) {
2213 if (!mlock_vma_pages_range(vma, addr, addr + len)) 2217 if (!mlock_vma_pages_range(vma, addr, addr + len))
diff --git a/mm/slab.c b/mm/slab.c
index e49f8f46f46d..47360c3e5abd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,7 +102,6 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 105#include <linux/rcupdate.h>
107#include <linux/string.h> 106#include <linux/string.h>
108#include <linux/uaccess.h> 107#include <linux/uaccess.h>
diff --git a/mm/slob.c b/mm/slob.c
index 23631e2bb57a..a82ab5811bd9 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,8 +66,10 @@
66#include <linux/module.h> 66#include <linux/module.h>
67#include <linux/rcupdate.h> 67#include <linux/rcupdate.h>
68#include <linux/list.h> 68#include <linux/list.h>
69#include <linux/kmemtrace.h>
70#include <linux/kmemleak.h> 69#include <linux/kmemleak.h>
70
71#include <trace/events/kmem.h>
72
71#include <asm/atomic.h> 73#include <asm/atomic.h>
72 74
73/* 75/*
diff --git a/mm/slub.c b/mm/slub.c
index 578f68f3c51f..7bb7940f4eee 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h> 20#include <linux/kmemcheck.h>
22#include <linux/cpu.h> 21#include <linux/cpu.h>
23#include <linux/cpuset.h> 22#include <linux/cpuset.h>
diff --git a/scripts/package/Makefile b/scripts/package/Makefile
index 62fcc3a7f4d3..18513b0191db 100644
--- a/scripts/package/Makefile
+++ b/scripts/package/Makefile
@@ -111,13 +111,38 @@ tar%pkg: FORCE
111clean-dirs += $(objtree)/tar-install/ 111clean-dirs += $(objtree)/tar-install/
112 112
113 113
114# perf-pkg - generate a source tarball with perf source
115# ---------------------------------------------------------------------------
116
117perf-tar=perf-$(KERNELVERSION)
118
119quiet_cmd_perf_tar = TAR
120 cmd_perf_tar = \
121git archive --prefix=$(perf-tar)/ HEAD^{tree} \
122 $$(cat $(srctree)/tools/perf/MANIFEST) -o $(perf-tar).tar; \
123mkdir -p $(perf-tar); \
124git rev-parse HEAD > $(perf-tar)/HEAD; \
125tar rf $(perf-tar).tar $(perf-tar)/HEAD; \
126rm -r $(perf-tar); \
127$(if $(findstring tar-src,$@),, \
128$(if $(findstring bz2,$@),bzip2, \
129$(if $(findstring gz,$@),gzip, \
130$(error unknown target $@))) \
131 -f -9 $(perf-tar).tar)
132
133perf-%pkg: FORCE
134 $(call cmd,perf_tar)
135
114# Help text displayed when executing 'make help' 136# Help text displayed when executing 'make help'
115# --------------------------------------------------------------------------- 137# ---------------------------------------------------------------------------
116help: FORCE 138help: FORCE
117 @echo ' rpm-pkg - Build both source and binary RPM kernel packages' 139 @echo ' rpm-pkg - Build both source and binary RPM kernel packages'
118 @echo ' binrpm-pkg - Build only the binary kernel package' 140 @echo ' binrpm-pkg - Build only the binary kernel package'
119 @echo ' deb-pkg - Build the kernel as an deb package' 141 @echo ' deb-pkg - Build the kernel as an deb package'
120 @echo ' tar-pkg - Build the kernel as an uncompressed tarball' 142 @echo ' tar-pkg - Build the kernel as an uncompressed tarball'
121 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball' 143 @echo ' targz-pkg - Build the kernel as a gzip compressed tarball'
122 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball' 144 @echo ' tarbz2-pkg - Build the kernel as a bzip2 compressed tarball'
145 @echo ' perf-tar-src-pkg - Build $(perf-tar).tar source tarball'
146 @echo ' perf-targz-src-pkg - Build $(perf-tar).tar.gz source tarball'
147 @echo ' perf-tarbz2-src-pkg - Build $(perf-tar).tar.bz2 source tarball'
123 148
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index e1d60d780784..cb43289e447f 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -18,3 +18,5 @@ perf-archive
18tags 18tags
19TAGS 19TAGS
20cscope* 20cscope*
21config.mak
22config.mak.autogen
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 94a258c96a44..ea531d9d975c 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -31,6 +31,10 @@ OPTIONS
31--vmlinux=PATH:: 31--vmlinux=PATH::
32 Specify vmlinux path which has debuginfo (Dwarf binary). 32 Specify vmlinux path which has debuginfo (Dwarf binary).
33 33
34-s::
35--source=PATH::
36 Specify path to kernel source.
37
34-v:: 38-v::
35--verbose:: 39--verbose::
36 Be more verbose (show parsed arguments, etc). 40 Be more verbose (show parsed arguments, etc).
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 34e255fc3e2f..3ee27dccfde9 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -103,6 +103,19 @@ OPTIONS
103--raw-samples:: 103--raw-samples::
104Collect raw sample records from all opened counters (default for tracepoint counters). 104Collect raw sample records from all opened counters (default for tracepoint counters).
105 105
106-C::
107--cpu::
108Collect samples only on the list of cpus provided. Multiple CPUs can be provided as a
109comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
110In per-thread mode with inheritance mode on (default), samples are captured only when
111the thread executes on the designated CPUs. Default is to monitor all CPUs.
112
113-N::
114--no-buildid-cache::
115Do not update the builid cache. This saves some overhead in situations
116where the information in the perf.data file (which includes buildids)
117is sufficient.
118
106SEE ALSO 119SEE ALSO
107-------- 120--------
108linkperf:perf-stat[1], linkperf:perf-list[1] 121linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 909fa766fa1c..4b3a2d46b437 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -46,6 +46,13 @@ OPTIONS
46-B:: 46-B::
47 print large numbers with thousands' separators according to locale 47 print large numbers with thousands' separators according to locale
48 48
49-C::
50--cpu=::
51Count only on the list of cpus provided. Multiple CPUs can be provided as a
52comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
53In per-thread mode, this option is ignored. The -a option is still necessary
54to activate system-wide monitoring. Default is to count on all CPUs.
55
49EXAMPLES 56EXAMPLES
50-------- 57--------
51 58
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 785b9fc32a46..1f9687663f2a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -25,9 +25,11 @@ OPTIONS
25--count=<count>:: 25--count=<count>::
26 Event period to sample. 26 Event period to sample.
27 27
28-C <cpu>:: 28-C <cpu-list>::
29--CPU=<cpu>:: 29--cpu=<cpu>::
30 CPU to profile. 30Monitor only on the list of cpus provided. Multiple CPUs can be provided as a
31comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
32Default is to monitor all CPUS.
31 33
32-d <seconds>:: 34-d <seconds>::
33--delay=<seconds>:: 35--delay=<seconds>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
new file mode 100644
index 000000000000..8c7fc0c8f0b8
--- /dev/null
+++ b/tools/perf/MANIFEST
@@ -0,0 +1,12 @@
1tools/perf
2include/linux/perf_event.h
3include/linux/rbtree.h
4include/linux/list.h
5include/linux/hash.h
6include/linux/stringify.h
7lib/rbtree.c
8include/linux/swab.h
9arch/*/include/asm/unistd*.h
10include/linux/poison.h
11include/linux/magic.h
12include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3d8f31ed771d..6aa2fe323db1 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -285,14 +285,10 @@ else
285 QUIET_STDERR = ">/dev/null 2>&1" 285 QUIET_STDERR = ">/dev/null 2>&1"
286endif 286endif
287 287
288BITBUCKET = "/dev/null" 288-include feature-tests.mak
289 289
290ifneq ($(shell sh -c "(echo '\#include <stdio.h>'; echo 'int main(void) { return puts(\"hi\"); }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y) 290ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
291 BITBUCKET = .perf.dev.null 291 CFLAGS := $(CFLAGS) -fstack-protector-all
292endif
293
294ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o $(BITBUCKET) "$(QUIET_STDERR)" && echo y"), y)
295 CFLAGS := $(CFLAGS) -fstack-protector-all
296endif 292endif
297 293
298 294
@@ -508,7 +504,8 @@ PERFLIBS = $(LIB_FILE)
508-include config.mak 504-include config.mak
509 505
510ifndef NO_DWARF 506ifndef NO_DWARF
511ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo '\#include <version.h>'; echo '\#ifndef _ELFUTILS_PREREQ'; echo '\#error'; echo '\#endif'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 507FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
508ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
512 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); 509 msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
513 NO_DWARF := 1 510 NO_DWARF := 1
514endif # Dwarf support 511endif # Dwarf support
@@ -536,16 +533,18 @@ ifneq ($(OUTPUT),)
536 BASIC_CFLAGS += -I$(OUTPUT) 533 BASIC_CFLAGS += -I$(OUTPUT)
537endif 534endif
538 535
539ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 536FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
540ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 537ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y)
541 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); 538 FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
539 ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y)
540 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
541 else
542 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel);
543 endif
542endif 544endif
543 545
544 ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 546ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y)
545 BASIC_CFLAGS += -DLIBELF_NO_MMAP 547 BASIC_CFLAGS += -DLIBELF_NO_MMAP
546 endif
547else
548 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
549endif 548endif
550 549
551ifndef NO_DWARF 550ifndef NO_DWARF
@@ -561,41 +560,47 @@ endif # NO_DWARF
561ifdef NO_NEWT 560ifdef NO_NEWT
562 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 561 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
563else 562else
564ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 563 FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev); 564 ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT)),y)
566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT 565 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
567else 566 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 567 else
569 BASIC_CFLAGS += -I/usr/include/slang 568 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
570 EXTLIBS += -lnewt -lslang 569 BASIC_CFLAGS += -I/usr/include/slang
571 LIB_OBJS += $(OUTPUT)util/newt.o 570 EXTLIBS += -lnewt -lslang
572endif 571 LIB_OBJS += $(OUTPUT)util/newt.o
573endif # NO_NEWT 572 endif
574
575ifndef NO_LIBPERL
576PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
577PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
578endif 573endif
579 574
580ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o $(BITBUCKET) $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 575ifdef NO_LIBPERL
581 BASIC_CFLAGS += -DNO_LIBPERL 576 BASIC_CFLAGS += -DNO_LIBPERL
582else 577else
583 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 578 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
584 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o 579 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
585 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o 580 PERL_EMBED_FLAGS=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
586endif
587 581
588ifndef NO_LIBPYTHON 582 ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
589PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` 583 BASIC_CFLAGS += -DNO_LIBPERL
590PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 584 else
585 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
586 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
587 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
588 endif
591endif 589endif
592 590
593ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o $(BITBUCKET) $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 591ifdef NO_LIBPYTHON
594 BASIC_CFLAGS += -DNO_LIBPYTHON 592 BASIC_CFLAGS += -DNO_LIBPYTHON
595else 593else
596 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 594 PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
597 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o 595 PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
598 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o 596 FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
597 ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
598 BASIC_CFLAGS += -DNO_LIBPYTHON
599 else
600 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
601 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
602 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
603 endif
599endif 604endif
600 605
601ifdef NO_DEMANGLE 606ifdef NO_DEMANGLE
@@ -604,20 +609,23 @@ else ifdef HAVE_CPLUS_DEMANGLE
604 EXTLIBS += -liberty 609 EXTLIBS += -liberty
605 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 610 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
606else 611else
607 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") 612 FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd
608 613 has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
609 ifeq ($(has_bfd),y) 614 ifeq ($(has_bfd),y)
610 EXTLIBS += -lbfd 615 EXTLIBS += -lbfd
611 else 616 else
612 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") 617 FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
618 has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY))
613 ifeq ($(has_bfd_iberty),y) 619 ifeq ($(has_bfd_iberty),y)
614 EXTLIBS += -lbfd -liberty 620 EXTLIBS += -lbfd -liberty
615 else 621 else
616 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") 622 FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
623 has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z))
617 ifeq ($(has_bfd_iberty_z),y) 624 ifeq ($(has_bfd_iberty_z),y)
618 EXTLIBS += -lbfd -liberty -lz 625 EXTLIBS += -lbfd -liberty -lz
619 else 626 else
620 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") 627 FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
628 has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE))
621 ifeq ($(has_cplus_demangle),y) 629 ifeq ($(has_cplus_demangle),y)
622 EXTLIBS += -liberty 630 EXTLIBS += -liberty
623 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE 631 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
@@ -865,7 +873,7 @@ export TAR INSTALL DESTDIR SHELL_PATH
865 873
866SHELL = $(SHELL_PATH) 874SHELL = $(SHELL_PATH)
867 875
868all:: .perf.dev.null shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS 876all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
869ifneq (,$X) 877ifneq (,$X)
870 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) 878 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
871endif 879endif
@@ -1195,11 +1203,6 @@ clean:
1195.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS 1203.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
1196.PHONY: .FORCE-PERF-BUILD-OPTIONS 1204.PHONY: .FORCE-PERF-BUILD-OPTIONS
1197 1205
1198.perf.dev.null:
1199 touch .perf.dev.null
1200
1201.INTERMEDIATE: .perf.dev.null
1202
1203### Make sure built-ins do not have dups and listed in perf.c 1206### Make sure built-ins do not have dups and listed in perf.c
1204# 1207#
1205check-builtins:: 1208check-builtins::
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 96db5248e995..fd20670ce986 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -61,11 +61,9 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
61static int process_sample_event(event_t *event, struct perf_session *session) 61static int process_sample_event(event_t *event, struct perf_session *session)
62{ 62{
63 struct addr_location al; 63 struct addr_location al;
64 struct sample_data data;
64 65
65 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 66 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
66 event->ip.pid, event->ip.ip);
67
68 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
69 pr_warning("problem processing %d event, skipping it.\n", 67 pr_warning("problem processing %d event, skipping it.\n",
70 event->header.type); 68 event->header.type);
71 return -1; 69 return -1;
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index f8e3d1852029..29ad20e67919 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -78,8 +78,7 @@ static int __cmd_buildid_cache(void)
78 struct str_node *pos; 78 struct str_node *pos;
79 char debugdir[PATH_MAX]; 79 char debugdir[PATH_MAX];
80 80
81 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 81 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
82 DEBUG_CACHE_DIR);
83 82
84 if (add_name_list_str) { 83 if (add_name_list_str) {
85 list = strlist__new(true, add_name_list_str); 84 list = strlist__new(true, add_name_list_str);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a6e2fdc7a04e..39e6627ebb96 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -35,10 +35,7 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
35 struct addr_location al; 35 struct addr_location al;
36 struct sample_data data = { .period = 1, }; 36 struct sample_data data = { .period = 1, };
37 37
38 dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc, 38 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
39 event->ip.pid, event->ip.ip);
40
41 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
42 pr_warning("problem processing %d event, skipping it.\n", 39 pr_warning("problem processing %d event, skipping it.\n",
43 event->header.type); 40 event->header.type);
44 return -1; 41 return -1;
@@ -47,8 +44,6 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
47 if (al.filtered || al.sym == NULL) 44 if (al.filtered || al.sym == NULL)
48 return 0; 45 return 0;
49 46
50 event__parse_sample(event, session->sample_type, &data);
51
52 if (hists__add_entry(&session->hists, &al, data.period)) { 47 if (hists__add_entry(&session->hists, &al, data.period)) {
53 pr_warning("problem incrementing symbol period, skipping event\n"); 48 pr_warning("problem incrementing symbol period, skipping event\n");
54 return -1; 49 return -1;
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index e4a4da32a568..54551867e7e0 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -182,6 +182,8 @@ static const struct option options[] = {
182 "Show source code lines.", opt_show_lines), 182 "Show source code lines.", opt_show_lines),
183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 183 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
184 "file", "vmlinux pathname"), 184 "file", "vmlinux pathname"),
185 OPT_STRING('s', "source", &symbol_conf.source_prefix,
186 "directory", "path to kernel source"),
185#endif 187#endif
186 OPT__DRY_RUN(&probe_event_dry_run), 188 OPT__DRY_RUN(&probe_event_dry_run),
187 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 189 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dc3435e18bde..86b1c3b6264e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,7 +49,6 @@ static int group = 0;
49static int realtime_prio = 0; 49static int realtime_prio = 0;
50static bool raw_samples = false; 50static bool raw_samples = false;
51static bool system_wide = false; 51static bool system_wide = false;
52static int profile_cpu = -1;
53static pid_t target_pid = -1; 52static pid_t target_pid = -1;
54static pid_t target_tid = -1; 53static pid_t target_tid = -1;
55static pid_t *all_tids = NULL; 54static pid_t *all_tids = NULL;
@@ -61,6 +60,7 @@ static bool call_graph = false;
61static bool inherit_stat = false; 60static bool inherit_stat = false;
62static bool no_samples = false; 61static bool no_samples = false;
63static bool sample_address = false; 62static bool sample_address = false;
63static bool no_buildid = false;
64 64
65static long samples = 0; 65static long samples = 0;
66static u64 bytes_written = 0; 66static u64 bytes_written = 0;
@@ -74,6 +74,7 @@ static int file_new = 1;
74static off_t post_processing_offset; 74static off_t post_processing_offset;
75 75
76static struct perf_session *session; 76static struct perf_session *session;
77static const char *cpu_list;
77 78
78struct mmap_data { 79struct mmap_data {
79 int counter; 80 int counter;
@@ -268,12 +269,17 @@ static void create_counter(int counter, int cpu)
268 if (inherit_stat) 269 if (inherit_stat)
269 attr->inherit_stat = 1; 270 attr->inherit_stat = 1;
270 271
271 if (sample_address) 272 if (sample_address) {
272 attr->sample_type |= PERF_SAMPLE_ADDR; 273 attr->sample_type |= PERF_SAMPLE_ADDR;
274 attr->mmap_data = track;
275 }
273 276
274 if (call_graph) 277 if (call_graph)
275 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 278 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
276 279
280 if (system_wide)
281 attr->sample_type |= PERF_SAMPLE_CPU;
282
277 if (raw_samples) { 283 if (raw_samples) {
278 attr->sample_type |= PERF_SAMPLE_TIME; 284 attr->sample_type |= PERF_SAMPLE_TIME;
279 attr->sample_type |= PERF_SAMPLE_RAW; 285 attr->sample_type |= PERF_SAMPLE_RAW;
@@ -300,7 +306,7 @@ try_again:
300 die("Permission error - are you root?\n" 306 die("Permission error - are you root?\n"
301 "\t Consider tweaking" 307 "\t Consider tweaking"
302 " /proc/sys/kernel/perf_event_paranoid.\n"); 308 " /proc/sys/kernel/perf_event_paranoid.\n");
303 else if (err == ENODEV && profile_cpu != -1) { 309 else if (err == ENODEV && cpu_list) {
304 die("No such device - did you specify" 310 die("No such device - did you specify"
305 " an out-of-range profile CPU?\n"); 311 " an out-of-range profile CPU?\n");
306 } 312 }
@@ -622,10 +628,15 @@ static int __cmd_record(int argc, const char **argv)
622 close(child_ready_pipe[0]); 628 close(child_ready_pipe[0]);
623 } 629 }
624 630
625 if ((!system_wide && no_inherit) || profile_cpu != -1) { 631 nr_cpus = read_cpu_map(cpu_list);
626 open_counters(profile_cpu); 632 if (nr_cpus < 1) {
633 perror("failed to collect number of CPUs\n");
634 return -1;
635 }
636
637 if (!system_wide && no_inherit && !cpu_list) {
638 open_counters(-1);
627 } else { 639 } else {
628 nr_cpus = read_cpu_map();
629 for (i = 0; i < nr_cpus; i++) 640 for (i = 0; i < nr_cpus; i++)
630 open_counters(cpumap[i]); 641 open_counters(cpumap[i]);
631 } 642 }
@@ -704,7 +715,7 @@ static int __cmd_record(int argc, const char **argv)
704 if (perf_guest) 715 if (perf_guest)
705 perf_session__process_machines(session, event__synthesize_guest_os); 716 perf_session__process_machines(session, event__synthesize_guest_os);
706 717
707 if (!system_wide && profile_cpu == -1) 718 if (!system_wide)
708 event__synthesize_thread(target_tid, process_synthesized_event, 719 event__synthesize_thread(target_tid, process_synthesized_event,
709 session); 720 session);
710 else 721 else
@@ -794,8 +805,8 @@ static const struct option options[] = {
794 "system-wide collection from all CPUs"), 805 "system-wide collection from all CPUs"),
795 OPT_BOOLEAN('A', "append", &append_file, 806 OPT_BOOLEAN('A', "append", &append_file,
796 "append to the output file to do incremental profiling"), 807 "append to the output file to do incremental profiling"),
797 OPT_INTEGER('C', "profile_cpu", &profile_cpu, 808 OPT_STRING('C', "cpu", &cpu_list, "cpu",
798 "CPU to profile on"), 809 "list of cpus to monitor"),
799 OPT_BOOLEAN('f', "force", &force, 810 OPT_BOOLEAN('f', "force", &force,
800 "overwrite existing data file (deprecated)"), 811 "overwrite existing data file (deprecated)"),
801 OPT_U64('c', "count", &user_interval, "event period to sample"), 812 OPT_U64('c', "count", &user_interval, "event period to sample"),
@@ -815,6 +826,8 @@ static const struct option options[] = {
815 "Sample addresses"), 826 "Sample addresses"),
816 OPT_BOOLEAN('n', "no-samples", &no_samples, 827 OPT_BOOLEAN('n', "no-samples", &no_samples,
817 "don't sample"), 828 "don't sample"),
829 OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
830 "do not update the buildid cache"),
818 OPT_END() 831 OPT_END()
819}; 832};
820 833
@@ -825,7 +838,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
825 argc = parse_options(argc, argv, options, record_usage, 838 argc = parse_options(argc, argv, options, record_usage,
826 PARSE_OPT_STOP_AT_NON_OPTION); 839 PARSE_OPT_STOP_AT_NON_OPTION);
827 if (!argc && target_pid == -1 && target_tid == -1 && 840 if (!argc && target_pid == -1 && target_tid == -1 &&
828 !system_wide && profile_cpu == -1) 841 !system_wide && !cpu_list)
829 usage_with_options(record_usage, options); 842 usage_with_options(record_usage, options);
830 843
831 if (force && append_file) { 844 if (force && append_file) {
@@ -839,6 +852,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
839 } 852 }
840 853
841 symbol__init(); 854 symbol__init();
855 if (no_buildid)
856 disable_buildid_cache();
842 857
843 if (!nr_counters) { 858 if (!nr_counters) {
844 nr_counters = 1; 859 nr_counters = 1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 359205782964..371a3c995806 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -155,30 +155,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
155 struct addr_location al; 155 struct addr_location al;
156 struct perf_event_attr *attr; 156 struct perf_event_attr *attr;
157 157
158 event__parse_sample(event, session->sample_type, &data); 158 if (event__preprocess_sample(event, session, &al, &data, NULL) < 0) {
159
160 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
161 data.pid, data.tid, data.ip, data.period);
162
163 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
164 unsigned int i;
165
166 dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
167
168 if (!ip_callchain__valid(data.callchain, event)) {
169 pr_debug("call-chain problem with event, "
170 "skipping it.\n");
171 return 0;
172 }
173
174 if (dump_trace) {
175 for (i = 0; i < data.callchain->nr; i++)
176 dump_printf("..... %2d: %016Lx\n",
177 i, data.callchain->ips[i]);
178 }
179 }
180
181 if (event__preprocess_sample(event, session, &al, NULL) < 0) {
182 fprintf(stderr, "problem processing %d event, skipping it.\n", 159 fprintf(stderr, "problem processing %d event, skipping it.\n",
183 event->header.type); 160 event->header.type);
184 return -1; 161 return -1;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9a39ca3c3ac4..a6b4d44f9502 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -69,7 +69,7 @@ static struct perf_event_attr default_attrs[] = {
69}; 69};
70 70
71static bool system_wide = false; 71static bool system_wide = false;
72static unsigned int nr_cpus = 0; 72static int nr_cpus = 0;
73static int run_idx = 0; 73static int run_idx = 0;
74 74
75static int run_count = 1; 75static int run_count = 1;
@@ -82,6 +82,7 @@ static int thread_num = 0;
82static pid_t child_pid = -1; 82static pid_t child_pid = -1;
83static bool null_run = false; 83static bool null_run = false;
84static bool big_num = false; 84static bool big_num = false;
85static const char *cpu_list;
85 86
86 87
87static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 88static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
@@ -158,7 +159,7 @@ static int create_perf_stat_counter(int counter)
158 PERF_FORMAT_TOTAL_TIME_RUNNING; 159 PERF_FORMAT_TOTAL_TIME_RUNNING;
159 160
160 if (system_wide) { 161 if (system_wide) {
161 unsigned int cpu; 162 int cpu;
162 163
163 for (cpu = 0; cpu < nr_cpus; cpu++) { 164 for (cpu = 0; cpu < nr_cpus; cpu++) {
164 fd[cpu][counter][0] = sys_perf_event_open(attr, 165 fd[cpu][counter][0] = sys_perf_event_open(attr,
@@ -208,7 +209,7 @@ static inline int nsec_counter(int counter)
208static void read_counter(int counter) 209static void read_counter(int counter)
209{ 210{
210 u64 count[3], single_count[3]; 211 u64 count[3], single_count[3];
211 unsigned int cpu; 212 int cpu;
212 size_t res, nv; 213 size_t res, nv;
213 int scaled; 214 int scaled;
214 int i, thread; 215 int i, thread;
@@ -542,6 +543,8 @@ static const struct option options[] = {
542 "null run - dont start any counters"), 543 "null run - dont start any counters"),
543 OPT_BOOLEAN('B', "big-num", &big_num, 544 OPT_BOOLEAN('B', "big-num", &big_num,
544 "print large numbers with thousands\' separators"), 545 "print large numbers with thousands\' separators"),
546 OPT_STRING('C', "cpu", &cpu_list, "cpu",
547 "list of cpus to monitor in system-wide"),
545 OPT_END() 548 OPT_END()
546}; 549};
547 550
@@ -566,10 +569,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
566 } 569 }
567 570
568 if (system_wide) 571 if (system_wide)
569 nr_cpus = read_cpu_map(); 572 nr_cpus = read_cpu_map(cpu_list);
570 else 573 else
571 nr_cpus = 1; 574 nr_cpus = 1;
572 575
576 if (nr_cpus < 1)
577 usage_with_options(stat_usage, options);
578
573 if (target_pid != -1) { 579 if (target_pid != -1) {
574 target_tid = target_pid; 580 target_tid = target_pid;
575 thread_num = find_all_tid(target_pid, &all_tids); 581 thread_num = find_all_tid(target_pid, &all_tids);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a66f4272b994..1e8e92e317b9 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -102,6 +102,7 @@ struct sym_entry *sym_filter_entry_sched = NULL;
102static int sym_pcnt_filter = 5; 102static int sym_pcnt_filter = 5;
103static int sym_counter = 0; 103static int sym_counter = 0;
104static int display_weighted = -1; 104static int display_weighted = -1;
105static const char *cpu_list;
105 106
106/* 107/*
107 * Symbols 108 * Symbols
@@ -982,6 +983,7 @@ static void event__process_sample(const event_t *self,
982 u64 ip = self->ip.ip; 983 u64 ip = self->ip.ip;
983 struct sym_entry *syme; 984 struct sym_entry *syme;
984 struct addr_location al; 985 struct addr_location al;
986 struct sample_data data;
985 struct machine *machine; 987 struct machine *machine;
986 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 988 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
987 989
@@ -1024,7 +1026,8 @@ static void event__process_sample(const event_t *self,
1024 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1026 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
1025 exact_samples++; 1027 exact_samples++;
1026 1028
1027 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 1029 if (event__preprocess_sample(self, session, &al, &data,
1030 symbol_filter) < 0 ||
1028 al.filtered) 1031 al.filtered)
1029 return; 1032 return;
1030 1033
@@ -1351,8 +1354,8 @@ static const struct option options[] = {
1351 "profile events on existing thread id"), 1354 "profile events on existing thread id"),
1352 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1355 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1353 "system-wide collection from all CPUs"), 1356 "system-wide collection from all CPUs"),
1354 OPT_INTEGER('C', "CPU", &profile_cpu, 1357 OPT_STRING('C', "cpu", &cpu_list, "cpu",
1355 "CPU to profile on"), 1358 "list of cpus to monitor"),
1356 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1359 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1357 "file", "vmlinux pathname"), 1360 "file", "vmlinux pathname"),
1358 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1361 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
@@ -1428,10 +1431,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1428 return -ENOMEM; 1431 return -ENOMEM;
1429 1432
1430 /* CPU and PID are mutually exclusive */ 1433 /* CPU and PID are mutually exclusive */
1431 if (target_tid > 0 && profile_cpu != -1) { 1434 if (target_tid > 0 && cpu_list) {
1432 printf("WARNING: PID switch overriding CPU\n"); 1435 printf("WARNING: PID switch overriding CPU\n");
1433 sleep(1); 1436 sleep(1);
1434 profile_cpu = -1; 1437 cpu_list = NULL;
1435 } 1438 }
1436 1439
1437 if (!nr_counters) 1440 if (!nr_counters)
@@ -1469,10 +1472,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1469 attrs[counter].sample_period = default_interval; 1472 attrs[counter].sample_period = default_interval;
1470 } 1473 }
1471 1474
1472 if (target_tid != -1 || profile_cpu != -1) 1475 if (target_tid != -1)
1473 nr_cpus = 1; 1476 nr_cpus = 1;
1474 else 1477 else
1475 nr_cpus = read_cpu_map(); 1478 nr_cpus = read_cpu_map(cpu_list);
1479
1480 if (nr_cpus < 1)
1481 usage_with_options(top_usage, options);
1476 1482
1477 get_term_dimensions(&winsize); 1483 get_term_dimensions(&winsize);
1478 if (print_entries == 0) { 1484 if (print_entries == 0) {
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
new file mode 100644
index 000000000000..ddb68e601f0e
--- /dev/null
+++ b/tools/perf/feature-tests.mak
@@ -0,0 +1,119 @@
1define SOURCE_HELLO
2#include <stdio.h>
3int main(void)
4{
5 return puts(\"hi\");
6}
7endef
8
9ifndef NO_DWARF
10define SOURCE_DWARF
11#include <dwarf.h>
12#include <libdw.h>
13#include <version.h>
14#ifndef _ELFUTILS_PREREQ
15#error
16#endif
17
18int main(void)
19{
20 Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
21 return (long)dbg;
22}
23endef
24endif
25
26define SOURCE_LIBELF
27#include <libelf.h>
28
29int main(void)
30{
31 Elf *elf = elf_begin(0, ELF_C_READ, 0);
32 return (long)elf;
33}
34endef
35
36define SOURCE_GLIBC
37#include <gnu/libc-version.h>
38
39int main(void)
40{
41 const char *version = gnu_get_libc_version();
42 return (long)version;
43}
44endef
45
46define SOURCE_ELF_MMAP
47#include <libelf.h>
48int main(void)
49{
50 Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
51 return (long)elf;
52}
53endef
54
55ifndef NO_NEWT
56define SOURCE_NEWT
57#include <newt.h>
58
59int main(void)
60{
61 newtInit();
62 newtCls();
63 return newtFinished();
64}
65endef
66endif
67
68ifndef NO_LIBPERL
69define SOURCE_PERL_EMBED
70#include <EXTERN.h>
71#include <perl.h>
72
73int main(void)
74{
75perl_alloc();
76return 0;
77}
78endef
79endif
80
81ifndef NO_LIBPYTHON
82define SOURCE_PYTHON_EMBED
83#include <Python.h>
84
85int main(void)
86{
87 Py_Initialize();
88 return 0;
89}
90endef
91endif
92
93define SOURCE_BFD
94#include <bfd.h>
95
96int main(void)
97{
98 bfd_demangle(0, 0, 0);
99 return 0;
100}
101endef
102
103define SOURCE_CPLUS_DEMANGLE
104extern char *cplus_demangle(const char *, int);
105
106int main(void)
107{
108 cplus_demangle(0, 0);
109 return 0;
110}
111endef
112
113# try-cc
114# Usage: option = $(call try-cc, source-to-build, cc-options)
115try-cc = $(shell sh -c \
116 'TMP="$(TMPOUT).$$$$"; \
117 echo "$(1)" | \
118 $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
119 rm -f "$$TMP"')
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 2e7a4f417e20..677e59d62a8d 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -7,7 +7,17 @@ if [ $# -ne 0 ] ; then
7 PERF_DATA=$1 7 PERF_DATA=$1
8fi 8fi
9 9
10DEBUGDIR=~/.debug/ 10#
11# PERF_BUILDID_DIR environment variable set by perf
12# path to buildid directory, default to $HOME/.debug
13#
14if [ -z $PERF_BUILDID_DIR ]; then
15 PERF_BUILDID_DIR=~/.debug/
16else
17 # append / to make substitutions work
18 PERF_BUILDID_DIR=$PERF_BUILDID_DIR/
19fi
20
11BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) 21BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
12NOBUILDID=0000000000000000000000000000000000000000 22NOBUILDID=0000000000000000000000000000000000000000
13 23
@@ -22,13 +32,13 @@ MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
22 32
23cut -d ' ' -f 1 $BUILDIDS | \ 33cut -d ' ' -f 1 $BUILDIDS | \
24while read build_id ; do 34while read build_id ; do
25 linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2} 35 linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2}
26 filename=$(readlink -f $linkname) 36 filename=$(readlink -f $linkname)
27 echo ${linkname#$DEBUGDIR} >> $MANIFEST 37 echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST
28 echo ${filename#$DEBUGDIR} >> $MANIFEST 38 echo ${filename#$PERF_BUILDID_DIR} >> $MANIFEST
29done 39done
30 40
31tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST 41tar cfj $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
32rm -f $MANIFEST $BUILDIDS 42rm -f $MANIFEST $BUILDIDS
33echo -e "Now please run:\n" 43echo -e "Now please run:\n"
34echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n" 44echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 6e4871191138..cdd6c03f1e14 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -458,6 +458,8 @@ int main(int argc, const char **argv)
458 handle_options(&argv, &argc, NULL); 458 handle_options(&argv, &argc, NULL);
459 commit_pager_choice(); 459 commit_pager_choice();
460 set_debugfs_path(); 460 set_debugfs_path();
461 set_buildid_dir();
462
461 if (argc > 0) { 463 if (argc > 0) {
462 if (!prefixcmp(argv[0], "--")) 464 if (!prefixcmp(argv[0], "--"))
463 argv[0] += 2; 465 argv[0] += 2;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 70c5cf87d020..5c26e2d314af 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -43,19 +43,17 @@ struct perf_event_ops build_id__mark_dso_hit_ops = {
43char *dso__build_id_filename(struct dso *self, char *bf, size_t size) 43char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
44{ 44{
45 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 45 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
46 const char *home;
47 46
48 if (!self->has_build_id) 47 if (!self->has_build_id)
49 return NULL; 48 return NULL;
50 49
51 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex); 50 build_id__sprintf(self->build_id, sizeof(self->build_id), build_id_hex);
52 home = getenv("HOME");
53 if (bf == NULL) { 51 if (bf == NULL) {
54 if (asprintf(&bf, "%s/%s/.build-id/%.2s/%s", home, 52 if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir,
55 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2) < 0) 53 build_id_hex, build_id_hex + 2) < 0)
56 return NULL; 54 return NULL;
57 } else 55 } else
58 snprintf(bf, size, "%s/%s/.build-id/%.2s/%s", home, 56 snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
59 DEBUG_CACHE_DIR, build_id_hex, build_id_hex + 2); 57 build_id_hex, build_id_hex + 2);
60 return bf; 58 return bf;
61} 59}
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 65fe664fddf6..27e9ebe4076e 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -23,6 +23,7 @@ extern int perf_config(config_fn_t fn, void *);
23extern int perf_config_int(const char *, const char *); 23extern int perf_config_int(const char *, const char *);
24extern int perf_config_bool(const char *, const char *); 24extern int perf_config_bool(const char *, const char *);
25extern int config_error_nonbool(const char *); 25extern int config_error_nonbool(const char *);
26extern const char *perf_config_dirname(const char *, const char *);
26 27
27/* pager.c */ 28/* pager.c */
28extern void setup_pager(void); 29extern void setup_pager(void);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 62b69ad4aa73..e63c997d6c1b 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -18,7 +18,7 @@
18#include "util.h" 18#include "util.h"
19#include "callchain.h" 19#include "callchain.h"
20 20
21bool ip_callchain__valid(struct ip_callchain *chain, event_t *event) 21bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
22{ 22{
23 unsigned int chain_size = event->header.size; 23 unsigned int chain_size = event->header.size;
24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; 24 chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 1ca73e4a2723..809850fb75fb 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -60,5 +60,5 @@ int register_callchain_param(struct callchain_param *param);
60int append_chain(struct callchain_node *root, struct ip_callchain *chain, 60int append_chain(struct callchain_node *root, struct ip_callchain *chain,
61 struct map_symbol *syms); 61 struct map_symbol *syms);
62 62
63bool ip_callchain__valid(struct ip_callchain *chain, event_t *event); 63bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
64#endif /* __PERF_CALLCHAIN_H */ 64#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dabe892d0e53..e02d78cae70f 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -11,6 +11,11 @@
11 11
12#define MAXNAME (256) 12#define MAXNAME (256)
13 13
14#define DEBUG_CACHE_DIR ".debug"
15
16
17char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
18
14static FILE *config_file; 19static FILE *config_file;
15static const char *config_file_name; 20static const char *config_file_name;
16static int config_linenr; 21static int config_linenr;
@@ -127,7 +132,7 @@ static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
127 break; 132 break;
128 if (!iskeychar(c)) 133 if (!iskeychar(c))
129 break; 134 break;
130 name[len++] = tolower(c); 135 name[len++] = c;
131 if (len >= MAXNAME) 136 if (len >= MAXNAME)
132 return -1; 137 return -1;
133 } 138 }
@@ -327,6 +332,13 @@ int perf_config_bool(const char *name, const char *value)
327 return !!perf_config_bool_or_int(name, value, &discard); 332 return !!perf_config_bool_or_int(name, value, &discard);
328} 333}
329 334
335const char *perf_config_dirname(const char *name, const char *value)
336{
337 if (!name)
338 return NULL;
339 return value;
340}
341
330static int perf_default_core_config(const char *var __used, const char *value __used) 342static int perf_default_core_config(const char *var __used, const char *value __used)
331{ 343{
332 /* Add other config variables here and to Documentation/config.txt. */ 344 /* Add other config variables here and to Documentation/config.txt. */
@@ -428,3 +440,53 @@ int config_error_nonbool(const char *var)
428{ 440{
429 return error("Missing value for '%s'", var); 441 return error("Missing value for '%s'", var);
430} 442}
443
444struct buildid_dir_config {
445 char *dir;
446};
447
448static int buildid_dir_command_config(const char *var, const char *value,
449 void *data)
450{
451 struct buildid_dir_config *c = data;
452 const char *v;
453
454 /* same dir for all commands */
455 if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
456 v = perf_config_dirname(var, value);
457 if (!v)
458 return -1;
459 strncpy(c->dir, v, MAXPATHLEN-1);
460 c->dir[MAXPATHLEN-1] = '\0';
461 }
462 return 0;
463}
464
465static void check_buildid_dir_config(void)
466{
467 struct buildid_dir_config c;
468 c.dir = buildid_dir;
469 perf_config(buildid_dir_command_config, &c);
470}
471
472void set_buildid_dir(void)
473{
474 buildid_dir[0] = '\0';
475
476 /* try config file */
477 check_buildid_dir_config();
478
479 /* default to $HOME/.debug */
480 if (buildid_dir[0] == '\0') {
481 char *v = getenv("HOME");
482 if (v) {
483 snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
484 v, DEBUG_CACHE_DIR);
485 } else {
486 strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
487 }
488 buildid_dir[MAXPATHLEN-1] = '\0';
489 }
490 /* for communicating with external commands */
491 setenv("PERF_BUILDID_DIR", buildid_dir, 1);
492}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 4e01490e51e5..0f9b8d7a7d7e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -20,7 +20,7 @@ static int default_cpu_map(void)
20 return nr_cpus; 20 return nr_cpus;
21} 21}
22 22
23int read_cpu_map(void) 23static int read_all_cpu_map(void)
24{ 24{
25 FILE *onlnf; 25 FILE *onlnf;
26 int nr_cpus = 0; 26 int nr_cpus = 0;
@@ -57,3 +57,58 @@ int read_cpu_map(void)
57 57
58 return default_cpu_map(); 58 return default_cpu_map();
59} 59}
60
61int read_cpu_map(const char *cpu_list)
62{
63 unsigned long start_cpu, end_cpu = 0;
64 char *p = NULL;
65 int i, nr_cpus = 0;
66
67 if (!cpu_list)
68 return read_all_cpu_map();
69
70 if (!isdigit(*cpu_list))
71 goto invalid;
72
73 while (isdigit(*cpu_list)) {
74 p = NULL;
75 start_cpu = strtoul(cpu_list, &p, 0);
76 if (start_cpu >= INT_MAX
77 || (*p != '\0' && *p != ',' && *p != '-'))
78 goto invalid;
79
80 if (*p == '-') {
81 cpu_list = ++p;
82 p = NULL;
83 end_cpu = strtoul(cpu_list, &p, 0);
84
85 if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
86 goto invalid;
87
88 if (end_cpu < start_cpu)
89 goto invalid;
90 } else {
91 end_cpu = start_cpu;
92 }
93
94 for (; start_cpu <= end_cpu; start_cpu++) {
95 /* check for duplicates */
96 for (i = 0; i < nr_cpus; i++)
97 if (cpumap[i] == (int)start_cpu)
98 goto invalid;
99
100 assert(nr_cpus < MAX_NR_CPUS);
101 cpumap[nr_cpus++] = (int)start_cpu;
102 }
103 if (*p)
104 ++p;
105
106 cpu_list = p;
107 }
108 if (nr_cpus > 0)
109 return nr_cpus;
110
111 return default_cpu_map();
112invalid:
113 return -1;
114}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 86c78bb33098..3e60f56e490e 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,7 +1,7 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4extern int read_cpu_map(void); 4extern int read_cpu_map(const char *cpu_list);
5extern int cpumap[]; 5extern int cpumap[];
6 6
7#endif /* __PERF_CPUMAP_H */ 7#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 6cddff2bc970..318dab15d177 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -86,12 +86,10 @@ void trace_event(event_t *event)
86 dump_printf_color(" ", color); 86 dump_printf_color(" ", color);
87 for (j = 0; j < 15-(i & 15); j++) 87 for (j = 0; j < 15-(i & 15); j++)
88 dump_printf_color(" ", color); 88 dump_printf_color(" ", color);
89 for (j = 0; j < (i & 15); j++) { 89 for (j = i & ~15; j <= i; j++) {
90 if (isprint(raw_event[i-15+j])) 90 dump_printf_color("%c", color,
91 dump_printf_color("%c", color, 91 isprint(raw_event[j]) ?
92 raw_event[i-15+j]); 92 raw_event[j] : '.');
93 else
94 dump_printf_color(".", color);
95 } 93 }
96 dump_printf_color("\n", color); 94 dump_printf_color("\n", color);
97 } 95 }
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1f08f008d289..a7460868124b 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -655,11 +655,36 @@ static void dso__calc_col_width(struct dso *self)
655} 655}
656 656
657int event__preprocess_sample(const event_t *self, struct perf_session *session, 657int event__preprocess_sample(const event_t *self, struct perf_session *session,
658 struct addr_location *al, symbol_filter_t filter) 658 struct addr_location *al, struct sample_data *data,
659 symbol_filter_t filter)
659{ 660{
660 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 661 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
661 struct thread *thread = perf_session__findnew(session, self->ip.pid); 662 struct thread *thread;
663
664 event__parse_sample(self, session->sample_type, data);
665
666 dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld cpu:%d\n",
667 self->header.misc, data->pid, data->tid, data->ip,
668 data->period, data->cpu);
669
670 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
671 unsigned int i;
672
673 dump_printf("... chain: nr:%Lu\n", data->callchain->nr);
674
675 if (!ip_callchain__valid(data->callchain, self)) {
676 pr_debug("call-chain problem with event, "
677 "skipping it.\n");
678 goto out_filtered;
679 }
662 680
681 if (dump_trace) {
682 for (i = 0; i < data->callchain->nr; i++)
683 dump_printf("..... %2d: %016Lx\n",
684 i, data->callchain->ips[i]);
685 }
686 }
687 thread = perf_session__findnew(session, self->ip.pid);
663 if (thread == NULL) 688 if (thread == NULL)
664 return -1; 689 return -1;
665 690
@@ -685,6 +710,7 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
685 al->map ? al->map->dso->long_name : 710 al->map ? al->map->dso->long_name :
686 al->level == 'H' ? "[hypervisor]" : "<not found>"); 711 al->level == 'H' ? "[hypervisor]" : "<not found>");
687 al->sym = NULL; 712 al->sym = NULL;
713 al->cpu = data->cpu;
688 714
689 if (al->map) { 715 if (al->map) {
690 if (symbol_conf.dso_list && 716 if (symbol_conf.dso_list &&
@@ -724,9 +750,9 @@ out_filtered:
724 return 0; 750 return 0;
725} 751}
726 752
727int event__parse_sample(event_t *event, u64 type, struct sample_data *data) 753int event__parse_sample(const event_t *event, u64 type, struct sample_data *data)
728{ 754{
729 u64 *array = event->sample.array; 755 const u64 *array = event->sample.array;
730 756
731 if (type & PERF_SAMPLE_IP) { 757 if (type & PERF_SAMPLE_IP) {
732 data->ip = event->ip.ip; 758 data->ip = event->ip.ip;
@@ -765,7 +791,8 @@ int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
765 u32 *p = (u32 *)array; 791 u32 *p = (u32 *)array;
766 data->cpu = *p; 792 data->cpu = *p;
767 array++; 793 array++;
768 } 794 } else
795 data->cpu = -1;
769 796
770 if (type & PERF_SAMPLE_PERIOD) { 797 if (type & PERF_SAMPLE_PERIOD) {
771 data->period = *array; 798 data->period = *array;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8577085db067..887ee63bbb62 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -157,8 +157,9 @@ int event__process_task(event_t *self, struct perf_session *session);
157 157
158struct addr_location; 158struct addr_location;
159int event__preprocess_sample(const event_t *self, struct perf_session *session, 159int event__preprocess_sample(const event_t *self, struct perf_session *session,
160 struct addr_location *al, symbol_filter_t filter); 160 struct addr_location *al, struct sample_data *data,
161int event__parse_sample(event_t *event, u64 type, struct sample_data *data); 161 symbol_filter_t filter);
162int event__parse_sample(const event_t *event, u64 type, struct sample_data *data);
162 163
163extern const char *event__name[]; 164extern const char *event__name[];
164 165
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1f62435f96c2..d7e67b167ea3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -16,6 +16,8 @@
16#include "symbol.h" 16#include "symbol.h"
17#include "debug.h" 17#include "debug.h"
18 18
19static bool no_buildid_cache = false;
20
19/* 21/*
20 * Create new perf.data header attribute: 22 * Create new perf.data header attribute:
21 */ 23 */
@@ -385,8 +387,7 @@ static int perf_session__cache_build_ids(struct perf_session *self)
385 int ret; 387 int ret;
386 char debugdir[PATH_MAX]; 388 char debugdir[PATH_MAX];
387 389
388 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 390 snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
389 DEBUG_CACHE_DIR);
390 391
391 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 392 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
392 return -1; 393 return -1;
@@ -471,7 +472,8 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
471 } 472 }
472 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - 473 buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
473 buildid_sec->offset; 474 buildid_sec->offset;
474 perf_session__cache_build_ids(session); 475 if (!no_buildid_cache)
476 perf_session__cache_build_ids(session);
475 } 477 }
476 478
477 lseek(fd, sec_start, SEEK_SET); 479 lseek(fd, sec_start, SEEK_SET);
@@ -1190,3 +1192,8 @@ int event__process_build_id(event_t *self,
1190 session); 1192 session);
1191 return 0; 1193 return 0;
1192} 1194}
1195
1196void disable_buildid_cache(void)
1197{
1198 no_buildid_cache = true;
1199}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 07f89b66b318..68d288c975de 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -70,6 +70,7 @@ struct hist_entry *__hists__add_entry(struct hists *self,
70 .map = al->map, 70 .map = al->map,
71 .sym = al->sym, 71 .sym = al->sym,
72 }, 72 },
73 .cpu = al->cpu,
73 .ip = al->addr, 74 .ip = al->addr,
74 .level = al->level, 75 .level = al->level,
75 .period = period, 76 .period = period,
@@ -1037,7 +1038,7 @@ fallback:
1037 dso, dso->long_name, sym, sym->name); 1038 dso, dso->long_name, sym, sym->name);
1038 1039
1039 snprintf(command, sizeof(command), 1040 snprintf(command, sizeof(command),
1040 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s|expand", 1041 "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS -C %s|grep -v %s|expand",
1041 map__rip_2objdump(map, sym->start), 1042 map__rip_2objdump(map, sym->start),
1042 map__rip_2objdump(map, sym->end), 1043 map__rip_2objdump(map, sym->end),
1043 filename, filename); 1044 filename, filename);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index d964cb199c67..baf665383498 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -37,6 +37,7 @@
37#include "event.h" 37#include "event.h"
38#include "debug.h" 38#include "debug.h"
39#include "util.h" 39#include "util.h"
40#include "symbol.h"
40#include "probe-finder.h" 41#include "probe-finder.h"
41 42
42/* Kprobe tracer basic type is up to u64 */ 43/* Kprobe tracer basic type is up to u64 */
@@ -57,6 +58,55 @@ static int strtailcmp(const char *s1, const char *s2)
57 return 0; 58 return 0;
58} 59}
59 60
61/*
62 * Find a src file from a DWARF tag path. Prepend optional source path prefix
63 * and chop off leading directories that do not exist. Result is passed back as
64 * a newly allocated path on success.
65 * Return 0 if file was found and readable, -errno otherwise.
66 */
67static int get_real_path(const char *raw_path, char **new_path)
68{
69 if (!symbol_conf.source_prefix) {
70 if (access(raw_path, R_OK) == 0) {
71 *new_path = strdup(raw_path);
72 return 0;
73 } else
74 return -errno;
75 }
76
77 *new_path = malloc((strlen(symbol_conf.source_prefix) +
78 strlen(raw_path) + 2));
79 if (!*new_path)
80 return -ENOMEM;
81
82 for (;;) {
83 sprintf(*new_path, "%s/%s", symbol_conf.source_prefix,
84 raw_path);
85
86 if (access(*new_path, R_OK) == 0)
87 return 0;
88
89 switch (errno) {
90 case ENAMETOOLONG:
91 case ENOENT:
92 case EROFS:
93 case EFAULT:
94 raw_path = strchr(++raw_path, '/');
95 if (!raw_path) {
96 free(*new_path);
97 *new_path = NULL;
98 return -ENOENT;
99 }
100 continue;
101
102 default:
103 free(*new_path);
104 *new_path = NULL;
105 return -errno;
106 }
107 }
108}
109
60/* Line number list operations */ 110/* Line number list operations */
61 111
62/* Add a line to line number list */ 112/* Add a line to line number list */
@@ -1096,11 +1146,13 @@ end:
1096static int line_range_add_line(const char *src, unsigned int lineno, 1146static int line_range_add_line(const char *src, unsigned int lineno,
1097 struct line_range *lr) 1147 struct line_range *lr)
1098{ 1148{
1149 int ret;
1150
1099 /* Copy real path */ 1151 /* Copy real path */
1100 if (!lr->path) { 1152 if (!lr->path) {
1101 lr->path = strdup(src); 1153 ret = get_real_path(src, &lr->path);
1102 if (lr->path == NULL) 1154 if (ret != 0)
1103 return -ENOMEM; 1155 return ret;
1104 } 1156 }
1105 return line_list__add_line(&lr->line_list, lineno); 1157 return line_list__add_line(&lr->line_list, lineno);
1106} 1158}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8f83a1835766..0564a5cfb12e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,8 +27,10 @@ static int perf_session__open(struct perf_session *self, bool force)
27 27
28 self->fd = open(self->filename, O_RDONLY); 28 self->fd = open(self->filename, O_RDONLY);
29 if (self->fd < 0) { 29 if (self->fd < 0) {
30 pr_err("failed to open file: %s", self->filename); 30 int err = errno;
31 if (!strcmp(self->filename, "perf.data")) 31
32 pr_err("failed to open %s: %s", self->filename, strerror(err));
33 if (err == ENOENT && !strcmp(self->filename, "perf.data"))
32 pr_err(" (try 'perf record' first)"); 34 pr_err(" (try 'perf record' first)");
33 pr_err("\n"); 35 pr_err("\n");
34 return -errno; 36 return -errno;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2316cb5a4116..c27b4b03fbc1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -13,6 +13,7 @@ enum sort_type sort__first_dimension;
13unsigned int dsos__col_width; 13unsigned int dsos__col_width;
14unsigned int comms__col_width; 14unsigned int comms__col_width;
15unsigned int threads__col_width; 15unsigned int threads__col_width;
16unsigned int cpus__col_width;
16static unsigned int parent_symbol__col_width; 17static unsigned int parent_symbol__col_width;
17char * field_sep; 18char * field_sep;
18 19
@@ -28,6 +29,8 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
28 size_t size, unsigned int width); 29 size_t size, unsigned int width);
29static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf, 30static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
30 size_t size, unsigned int width); 31 size_t size, unsigned int width);
32static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
33 size_t size, unsigned int width);
31 34
32struct sort_entry sort_thread = { 35struct sort_entry sort_thread = {
33 .se_header = "Command: Pid", 36 .se_header = "Command: Pid",
@@ -63,6 +66,13 @@ struct sort_entry sort_parent = {
63 .se_snprintf = hist_entry__parent_snprintf, 66 .se_snprintf = hist_entry__parent_snprintf,
64 .se_width = &parent_symbol__col_width, 67 .se_width = &parent_symbol__col_width,
65}; 68};
69
70struct sort_entry sort_cpu = {
71 .se_header = "CPU",
72 .se_cmp = sort__cpu_cmp,
73 .se_snprintf = hist_entry__cpu_snprintf,
74 .se_width = &cpus__col_width,
75};
66 76
67struct sort_dimension { 77struct sort_dimension {
68 const char *name; 78 const char *name;
@@ -76,6 +86,7 @@ static struct sort_dimension sort_dimensions[] = {
76 { .name = "dso", .entry = &sort_dso, }, 86 { .name = "dso", .entry = &sort_dso, },
77 { .name = "symbol", .entry = &sort_sym, }, 87 { .name = "symbol", .entry = &sort_sym, },
78 { .name = "parent", .entry = &sort_parent, }, 88 { .name = "parent", .entry = &sort_parent, },
89 { .name = "cpu", .entry = &sort_cpu, },
79}; 90};
80 91
81int64_t cmp_null(void *l, void *r) 92int64_t cmp_null(void *l, void *r)
@@ -242,6 +253,20 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
242 self->parent ? self->parent->name : "[other]"); 253 self->parent ? self->parent->name : "[other]");
243} 254}
244 255
256/* --sort cpu */
257
258int64_t
259sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
260{
261 return right->cpu - left->cpu;
262}
263
264static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
265 size_t size, unsigned int width)
266{
267 return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
268}
269
245int sort_dimension__add(const char *tok) 270int sort_dimension__add(const char *tok)
246{ 271{
247 unsigned int i; 272 unsigned int i;
@@ -281,6 +306,8 @@ int sort_dimension__add(const char *tok)
281 sort__first_dimension = SORT_SYM; 306 sort__first_dimension = SORT_SYM;
282 else if (!strcmp(sd->name, "parent")) 307 else if (!strcmp(sd->name, "parent"))
283 sort__first_dimension = SORT_PARENT; 308 sort__first_dimension = SORT_PARENT;
309 else if (!strcmp(sd->name, "cpu"))
310 sort__first_dimension = SORT_CPU;
284 } 311 }
285 312
286 list_add_tail(&sd->entry->list, &hist_entry__sort_list); 313 list_add_tail(&sd->entry->list, &hist_entry__sort_list);
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0d61c4082f43..560c855417e4 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -39,6 +39,7 @@ extern struct sort_entry sort_parent;
39extern unsigned int dsos__col_width; 39extern unsigned int dsos__col_width;
40extern unsigned int comms__col_width; 40extern unsigned int comms__col_width;
41extern unsigned int threads__col_width; 41extern unsigned int threads__col_width;
42extern unsigned int cpus__col_width;
42extern enum sort_type sort__first_dimension; 43extern enum sort_type sort__first_dimension;
43 44
44struct hist_entry { 45struct hist_entry {
@@ -51,6 +52,7 @@ struct hist_entry {
51 struct map_symbol ms; 52 struct map_symbol ms;
52 struct thread *thread; 53 struct thread *thread;
53 u64 ip; 54 u64 ip;
55 s32 cpu;
54 u32 nr_events; 56 u32 nr_events;
55 char level; 57 char level;
56 u8 filtered; 58 u8 filtered;
@@ -68,7 +70,8 @@ enum sort_type {
68 SORT_COMM, 70 SORT_COMM,
69 SORT_DSO, 71 SORT_DSO,
70 SORT_SYM, 72 SORT_SYM,
71 SORT_PARENT 73 SORT_PARENT,
74 SORT_CPU,
72}; 75};
73 76
74/* 77/*
@@ -104,6 +107,7 @@ extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
104extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); 107extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
105extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); 108extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
106extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); 109extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
110int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
107extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int); 111extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
108extern int sort_dimension__add(const char *); 112extern int sort_dimension__add(const char *);
109void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 113void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b63e5713849f..971d0a05d6b4 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -933,6 +933,25 @@ static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type
933 } 933 }
934} 934}
935 935
936static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
937{
938 Elf_Scn *sec = NULL;
939 GElf_Shdr shdr;
940 size_t cnt = 1;
941
942 while ((sec = elf_nextscn(elf, sec)) != NULL) {
943 gelf_getshdr(sec, &shdr);
944
945 if ((addr >= shdr.sh_addr) &&
946 (addr < (shdr.sh_addr + shdr.sh_size)))
947 return cnt;
948
949 ++cnt;
950 }
951
952 return -1;
953}
954
936static int dso__load_sym(struct dso *self, struct map *map, const char *name, 955static int dso__load_sym(struct dso *self, struct map *map, const char *name,
937 int fd, symbol_filter_t filter, int kmodule) 956 int fd, symbol_filter_t filter, int kmodule)
938{ 957{
@@ -944,12 +963,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
944 int err = -1; 963 int err = -1;
945 uint32_t idx; 964 uint32_t idx;
946 GElf_Ehdr ehdr; 965 GElf_Ehdr ehdr;
947 GElf_Shdr shdr; 966 GElf_Shdr shdr, opdshdr;
948 Elf_Data *syms; 967 Elf_Data *syms, *opddata = NULL;
949 GElf_Sym sym; 968 GElf_Sym sym;
950 Elf_Scn *sec, *sec_strndx; 969 Elf_Scn *sec, *sec_strndx, *opdsec;
951 Elf *elf; 970 Elf *elf;
952 int nr = 0; 971 int nr = 0;
972 size_t opdidx = 0;
953 973
954 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); 974 elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
955 if (elf == NULL) { 975 if (elf == NULL) {
@@ -969,6 +989,10 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
969 goto out_elf_end; 989 goto out_elf_end;
970 } 990 }
971 991
992 opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx);
993 if (opdsec)
994 opddata = elf_rawdata(opdsec, NULL);
995
972 syms = elf_getdata(sec, NULL); 996 syms = elf_getdata(sec, NULL);
973 if (syms == NULL) 997 if (syms == NULL)
974 goto out_elf_end; 998 goto out_elf_end;
@@ -1013,6 +1037,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
1013 if (!is_label && !elf_sym__is_a(&sym, map->type)) 1037 if (!is_label && !elf_sym__is_a(&sym, map->type))
1014 continue; 1038 continue;
1015 1039
1040 if (opdsec && sym.st_shndx == opdidx) {
1041 u32 offset = sym.st_value - opdshdr.sh_addr;
1042 u64 *opd = opddata->d_buf + offset;
1043 sym.st_value = *opd;
1044 sym.st_shndx = elf_addr_to_index(elf, sym.st_value);
1045 }
1046
1016 sec = elf_getscn(elf, sym.st_shndx); 1047 sec = elf_getscn(elf, sym.st_shndx);
1017 if (!sec) 1048 if (!sec)
1018 goto out_elf_end; 1049 goto out_elf_end;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5e02d2c17154..80e569bbdecc 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -9,8 +9,6 @@
9#include <linux/rbtree.h> 9#include <linux/rbtree.h>
10#include <stdio.h> 10#include <stdio.h>
11 11
12#define DEBUG_CACHE_DIR ".debug"
13
14#ifdef HAVE_CPLUS_DEMANGLE 12#ifdef HAVE_CPLUS_DEMANGLE
15extern char *cplus_demangle(const char *, int); 13extern char *cplus_demangle(const char *, int);
16 14
@@ -73,6 +71,7 @@ struct symbol_conf {
73 full_paths, 71 full_paths,
74 show_cpu_utilization; 72 show_cpu_utilization;
75 const char *vmlinux_name, 73 const char *vmlinux_name,
74 *source_prefix,
76 *field_sep; 75 *field_sep;
77 const char *default_guest_vmlinux_name, 76 const char *default_guest_vmlinux_name,
78 *default_guest_kallsyms, 77 *default_guest_kallsyms,
@@ -112,7 +111,8 @@ struct addr_location {
112 u64 addr; 111 u64 addr;
113 char level; 112 char level;
114 bool filtered; 113 bool filtered;
115 unsigned int cpumode; 114 u8 cpumode;
115 s32 cpu;
116}; 116};
117 117
118enum dso_kernel_type { 118enum dso_kernel_type {
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 4e8b6b0c551c..f380fed74359 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -89,6 +89,7 @@
89 89
90extern const char *graph_line; 90extern const char *graph_line;
91extern const char *graph_dotted_line; 91extern const char *graph_dotted_line;
92extern char buildid_dir[];
92 93
93/* On most systems <limits.h> would have given us this, but 94/* On most systems <limits.h> would have given us this, but
94 * not on some systems (e.g. GNU/Hurd). 95 * not on some systems (e.g. GNU/Hurd).
@@ -152,6 +153,8 @@ extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)))
152extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); 153extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
153 154
154extern int prefixcmp(const char *str, const char *prefix); 155extern int prefixcmp(const char *str, const char *prefix);
156extern void set_buildid_dir(void);
157extern void disable_buildid_cache(void);
155 158
156static inline const char *skip_prefix(const char *str, const char *prefix) 159static inline const char *skip_prefix(const char *str, const char *prefix)
157{ 160{