aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:44:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 19:44:39 -0400
commit4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (patch)
tree4ed4c74b70240451065165fda5fb2059f8c6b1e5
parent0342cbcfced2ee937d7c8e1c63f3d3082da7c7dc (diff)
parent7fcfd1abd6480d3b9ef17f5759c175e036e835cf (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (123 commits) perf: Remove the nmi parameter from the oprofile_perf backend x86, perf: Make copy_from_user_nmi() a library function perf: Remove perf_event_attr::type check x86, perf: P4 PMU - Fix typos in comments and style cleanup perf tools: Make test use the preset debugfs path perf tools: Add automated tests for events parsing perf tools: De-opt the parse_events function perf script: Fix display of IP address for non-callchain path perf tools: Fix endian conversion reading event attr from file header perf tools: Add missing 'node' alias to the hw_cache[] array perf probe: Support adding probes on offline kernel modules perf probe: Add probed module in front of function perf probe: Introduce debuginfo to encapsulate dwarf information perf-probe: Move dwarf library routines to dwarf-aux.{c, h} perf probe: Remove redundant dwarf functions perf probe: Move strtailcmp to string.c perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END tracing/kprobe: Update symbol reference when loading module tracing/kprobes: Support module init function probing kprobes: Return -ENOENT if probe point doesn't exist ...
-rw-r--r--Documentation/trace/kprobetrace.txt9
-rw-r--r--Makefile1
-rw-r--r--arch/alpha/kernel/perf_event.c2
-rw-r--r--arch/alpha/kernel/time.c2
-rw-r--r--arch/arm/kernel/perf_event_v6.c30
-rw-r--r--arch/arm/kernel/perf_event_v7.c30
-rw-r--r--arch/arm/kernel/perf_event_xscale.c18
-rw-r--r--arch/arm/kernel/ptrace.c5
-rw-r--r--arch/arm/kernel/swp_emulate.c2
-rw-r--r--arch/arm/mm/fault.c6
-rw-r--r--arch/mips/include/asm/stacktrace.h4
-rw-r--r--arch/mips/kernel/perf_event.c2
-rw-r--r--arch/mips/kernel/perf_event_mipsxx.c28
-rw-r--r--arch/mips/kernel/process.c19
-rw-r--r--arch/mips/kernel/traps.c8
-rw-r--r--arch/mips/kernel/unaligned.c5
-rw-r--r--arch/mips/math-emu/cp1emu.c3
-rw-r--r--arch/mips/mm/fault.c8
-rw-r--r--arch/mips/oprofile/Makefile2
-rw-r--r--arch/mips/oprofile/backtrace.c175
-rw-r--r--arch/mips/oprofile/common.c1
-rw-r--r--arch/mips/oprofile/op_impl.h2
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h4
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h2
-rw-r--r--arch/powerpc/kernel/e500-pmu.c5
-rw-r--r--arch/powerpc/kernel/mpc7450-pmu.c5
-rw-r--r--arch/powerpc/kernel/perf_event.c6
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c6
-rw-r--r--arch/powerpc/kernel/power4-pmu.c5
-rw-r--r--arch/powerpc/kernel/power5+-pmu.c5
-rw-r--r--arch/powerpc/kernel/power5-pmu.c5
-rw-r--r--arch/powerpc/kernel/power6-pmu.c5
-rw-r--r--arch/powerpc/kernel/power7-pmu.c5
-rw-r--r--arch/powerpc/kernel/ppc970-pmu.c5
-rw-r--r--arch/powerpc/kernel/ptrace.c4
-rw-r--r--arch/powerpc/kernel/time.c2
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/s390/mm/fault.c6
-rw-r--r--arch/sh/kernel/cpu/sh4/perf_event.c15
-rw-r--r--arch/sh/kernel/cpu/sh4a/perf_event.c15
-rw-r--r--arch/sh/kernel/ptrace_32.c5
-rw-r--r--arch/sh/kernel/traps_32.c2
-rw-r--r--arch/sh/kernel/traps_64.c8
-rw-r--r--arch/sh/math-emu/math.c2
-rw-r--r--arch/sh/mm/fault_32.c6
-rw-r--r--arch/sh/mm/tlbflush_64.c6
-rw-r--r--arch/sparc/kernel/perf_event.c44
-rw-r--r--arch/sparc/kernel/unaligned_32.c4
-rw-r--r--arch/sparc/kernel/unaligned_64.c12
-rw-r--r--arch/sparc/kernel/visemul.c2
-rw-r--r--arch/sparc/math-emu/math_32.c2
-rw-r--r--arch/sparc/math-emu/math_64.c2
-rw-r--r--arch/sparc/mm/fault_32.c8
-rw-r--r--arch/sparc/mm/fault_64.c8
-rw-r--r--arch/x86/include/asm/irqflags.h11
-rw-r--r--arch/x86/include/asm/perf_event.h5
-rw-r--r--arch/x86/include/asm/perf_event_p4.h33
-rw-r--r--arch/x86/include/asm/uaccess.h3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c168
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c385
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c119
-rw-r--r--arch/x86/kernel/dumpstack_64.c37
-rw-r--r--arch/x86/kernel/entry_64.S69
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/ptrace.c5
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/usercopy.c43
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/kmemcheck/error.c2
-rw-r--r--arch/x86/oprofile/backtrace.c21
-rw-r--r--drivers/oprofile/oprofile_perf.c4
-rw-r--r--include/linux/ftrace.h7
-rw-r--r--include/linux/ftrace_event.h5
-rw-r--r--include/linux/hw_breakpoint.h10
-rw-r--r--include/linux/perf_event.h81
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--include/linux/stacktrace.h4
-rw-r--r--kernel/async.c12
-rw-r--r--kernel/events/Makefile2
-rw-r--r--kernel/events/core.c938
-rw-r--r--kernel/events/hw_breakpoint.c10
-rw-r--r--kernel/events/internal.h96
-rw-r--r--kernel/events/ring_buffer.c380
-rw-r--r--kernel/kprobes.c33
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/stacktrace.c12
-rw-r--r--kernel/trace/ftrace.c157
-rw-r--r--kernel/trace/ring_buffer.c66
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c326
-rw-r--r--kernel/trace/trace.h61
-rw-r--r--kernel/trace/trace_entries.h3
-rw-r--r--kernel/trace/trace_events.c139
-rw-r--r--kernel/trace/trace_events_filter.c6
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_functions_graph.c225
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_kprobe.c324
-rw-r--r--kernel/trace/trace_output.c11
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_stack.c13
-rw-r--r--kernel/watchdog.c8
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c4
-rw-r--r--tools/perf/Documentation/perf-annotate.txt6
-rw-r--r--tools/perf/Documentation/perf-probe.txt6
-rw-r--r--tools/perf/Documentation/perf-report.txt21
-rw-r--r--tools/perf/Documentation/perf-script.txt18
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/builtin-annotate.c15
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-report.c57
-rw-r--r--tools/perf/builtin-script.c121
-rw-r--r--tools/perf/builtin-stat.c20
-rw-r--r--tools/perf/builtin-test.c249
-rw-r--r--tools/perf/builtin-top.c2
-rw-r--r--tools/perf/util/callchain.h6
-rw-r--r--tools/perf/util/dwarf-aux.c663
-rw-r--r--tools/perf/util/dwarf-aux.h100
-rw-r--r--tools/perf/util/evsel.c1
-rw-r--r--tools/perf/util/evsel.h1
-rw-r--r--tools/perf/util/header.c5
-rw-r--r--tools/perf/util/hist.c6
-rw-r--r--tools/perf/util/parse-events.c39
-rw-r--r--tools/perf/util/parse-events.h6
-rw-r--r--tools/perf/util/probe-event.c165
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c752
-rw-r--r--tools/perf/util/probe-finder.h43
-rw-r--r--tools/perf/util/python.c17
-rw-r--r--tools/perf/util/session.c132
-rw-r--r--tools/perf/util/session.h9
-rw-r--r--tools/perf/util/sort.c223
-rw-r--r--tools/perf/util/sort.h14
-rw-r--r--tools/perf/util/string.c19
-rw-r--r--tools/perf/util/trace-event-info.c120
-rw-r--r--tools/perf/util/util.h1
140 files changed, 4650 insertions, 2661 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index c83bd6b4e6e8..d0d0bb9e3e25 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
22 22
23Synopsis of kprobe_events 23Synopsis of kprobe_events
24------------------------- 24-------------------------
25 p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe 25 p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
26 r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe 26 r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
27 -:[GRP/]EVENT : Clear a probe 27 -:[GRP/]EVENT : Clear a probe
28 28
29 GRP : Group name. If omitted, use "kprobes" for it. 29 GRP : Group name. If omitted, use "kprobes" for it.
30 EVENT : Event name. If omitted, the event name is generated 30 EVENT : Event name. If omitted, the event name is generated
31 based on SYMBOL+offs or MEMADDR. 31 based on SYM+offs or MEMADDR.
32 SYMBOL[+offs] : Symbol+offset where the probe is inserted. 32 MOD : Module name which has given SYM.
33 SYM[+offs] : Symbol+offset where the probe is inserted.
33 MEMADDR : Address where the probe is inserted. 34 MEMADDR : Address where the probe is inserted.
34 35
35 FETCHARGS : Arguments. Each probe can have up to 128 args. 36 FETCHARGS : Arguments. Each probe can have up to 128 args.
diff --git a/Makefile b/Makefile
index 6a5bdad524af..d0189560613c 100644
--- a/Makefile
+++ b/Makefile
@@ -1290,6 +1290,7 @@ help:
1290 @echo ' make O=dir [targets] Locate all output files in "dir", including .config' 1290 @echo ' make O=dir [targets] Locate all output files in "dir", including .config'
1291 @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' 1291 @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
1292 @echo ' make C=2 [targets] Force check of all c source with $$CHECK' 1292 @echo ' make C=2 [targets] Force check of all c source with $$CHECK'
1293 @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
1293 @echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where' 1294 @echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'
1294 @echo ' 1: warnings which may be relevant and do not occur too often' 1295 @echo ' 1: warnings which may be relevant and do not occur too often'
1295 @echo ' 2: warnings which occur quite often but may still be relevant' 1296 @echo ' 2: warnings which occur quite often but may still be relevant'
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 90561c45e7d8..8e47709160f8 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
847 data.period = event->hw.last_period; 847 data.period = event->hw.last_period;
848 848
849 if (alpha_perf_event_set_period(event, hwc, idx)) { 849 if (alpha_perf_event_set_period(event, hwc, idx)) {
850 if (perf_event_overflow(event, 1, &data, regs)) { 850 if (perf_event_overflow(event, &data, regs)) {
851 /* Interrupts coming too quickly; "throttle" the 851 /* Interrupts coming too quickly; "throttle" the
852 * counter, i.e., disable it for a little while. 852 * counter, i.e., disable it for a little while.
853 */ 853 */
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index 818e74ed45dc..f20d1b5396b8 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -91,7 +91,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
91#define test_irq_work_pending() __get_cpu_var(irq_work_pending) 91#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0 92#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
93 93
94void set_irq_work_pending(void) 94void arch_irq_work_raise(void)
95{ 95{
96 set_irq_work_pending_flag(); 96 set_irq_work_pending_flag();
97} 97}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index f1e8dd94afe8..dd7f3b9f4cb3 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -173,6 +173,20 @@ static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 173 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
174 }, 174 },
175 }, 175 },
176 [C(NODE)] = {
177 [C(OP_READ)] = {
178 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
179 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
180 },
181 [C(OP_WRITE)] = {
182 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
183 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
184 },
185 [C(OP_PREFETCH)] = {
186 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
187 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
188 },
189 },
176}; 190};
177 191
178enum armv6mpcore_perf_types { 192enum armv6mpcore_perf_types {
@@ -310,6 +324,20 @@ static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
310 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 324 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
311 }, 325 },
312 }, 326 },
327 [C(NODE)] = {
328 [C(OP_READ)] = {
329 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
330 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
331 },
332 [C(OP_WRITE)] = {
333 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
334 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
335 },
336 [C(OP_PREFETCH)] = {
337 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
338 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
339 },
340 },
313}; 341};
314 342
315static inline unsigned long 343static inline unsigned long
@@ -479,7 +507,7 @@ armv6pmu_handle_irq(int irq_num,
479 if (!armpmu_event_set_period(event, hwc, idx)) 507 if (!armpmu_event_set_period(event, hwc, idx))
480 continue; 508 continue;
481 509
482 if (perf_event_overflow(event, 0, &data, regs)) 510 if (perf_event_overflow(event, &data, regs))
483 armpmu->disable(hwc, idx); 511 armpmu->disable(hwc, idx);
484 } 512 }
485 513
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 4960686afb58..e20ca9cafef5 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -255,6 +255,20 @@ static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 255 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
256 }, 256 },
257 }, 257 },
258 [C(NODE)] = {
259 [C(OP_READ)] = {
260 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
261 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
262 },
263 [C(OP_WRITE)] = {
264 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
265 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
266 },
267 [C(OP_PREFETCH)] = {
268 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
269 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
270 },
271 },
258}; 272};
259 273
260/* 274/*
@@ -371,6 +385,20 @@ static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
371 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 385 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
372 }, 386 },
373 }, 387 },
388 [C(NODE)] = {
389 [C(OP_READ)] = {
390 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
391 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
392 },
393 [C(OP_WRITE)] = {
394 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
395 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
396 },
397 [C(OP_PREFETCH)] = {
398 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
399 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
400 },
401 },
374}; 402};
375 403
376/* 404/*
@@ -787,7 +815,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
787 if (!armpmu_event_set_period(event, hwc, idx)) 815 if (!armpmu_event_set_period(event, hwc, idx))
788 continue; 816 continue;
789 817
790 if (perf_event_overflow(event, 0, &data, regs)) 818 if (perf_event_overflow(event, &data, regs))
791 armpmu->disable(hwc, idx); 819 armpmu->disable(hwc, idx);
792 } 820 }
793 821
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index 39affbe4fdb2..3c4397491d08 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -144,6 +144,20 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 144 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
145 }, 145 },
146 }, 146 },
147 [C(NODE)] = {
148 [C(OP_READ)] = {
149 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
150 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
151 },
152 [C(OP_WRITE)] = {
153 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
154 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
155 },
156 [C(OP_PREFETCH)] = {
157 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
158 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
159 },
160 },
147}; 161};
148 162
149#define XSCALE_PMU_ENABLE 0x001 163#define XSCALE_PMU_ENABLE 0x001
@@ -251,7 +265,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
251 if (!armpmu_event_set_period(event, hwc, idx)) 265 if (!armpmu_event_set_period(event, hwc, idx))
252 continue; 266 continue;
253 267
254 if (perf_event_overflow(event, 0, &data, regs)) 268 if (perf_event_overflow(event, &data, regs))
255 armpmu->disable(hwc, idx); 269 armpmu->disable(hwc, idx);
256 } 270 }
257 271
@@ -583,7 +597,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
583 if (!armpmu_event_set_period(event, hwc, idx)) 597 if (!armpmu_event_set_period(event, hwc, idx))
584 continue; 598 continue;
585 599
586 if (perf_event_overflow(event, 0, &data, regs)) 600 if (perf_event_overflow(event, &data, regs))
587 armpmu->disable(hwc, idx); 601 armpmu->disable(hwc, idx);
588 } 602 }
589 603
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 97260060bf26..5c199610719f 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -396,7 +396,7 @@ static long ptrace_hbp_idx_to_num(int idx)
396/* 396/*
397 * Handle hitting a HW-breakpoint. 397 * Handle hitting a HW-breakpoint.
398 */ 398 */
399static void ptrace_hbptriggered(struct perf_event *bp, int unused, 399static void ptrace_hbptriggered(struct perf_event *bp,
400 struct perf_sample_data *data, 400 struct perf_sample_data *data,
401 struct pt_regs *regs) 401 struct pt_regs *regs)
402{ 402{
@@ -479,7 +479,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type)
479 attr.bp_type = type; 479 attr.bp_type = type;
480 attr.disabled = 1; 480 attr.disabled = 1;
481 481
482 return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); 482 return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL,
483 tsk);
483} 484}
484 485
485static int ptrace_gethbpregs(struct task_struct *tsk, long num, 486static int ptrace_gethbpregs(struct task_struct *tsk, long num,
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 40ee7e5045e4..5f452f8fde05 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -183,7 +183,7 @@ static int swp_handler(struct pt_regs *regs, unsigned int instr)
183 unsigned int address, destreg, data, type; 183 unsigned int address, destreg, data, type;
184 unsigned int res = 0; 184 unsigned int res = 0;
185 185
186 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); 186 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc);
187 187
188 if (current->pid != previous_pid) { 188 if (current->pid != previous_pid) {
189 pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", 189 pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n",
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index bc0e1d88fd3b..9ea4f7ddd665 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -318,11 +318,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
318 fault = __do_page_fault(mm, addr, fsr, tsk); 318 fault = __do_page_fault(mm, addr, fsr, tsk);
319 up_read(&mm->mmap_sem); 319 up_read(&mm->mmap_sem);
320 320
321 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, addr); 321 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
322 if (fault & VM_FAULT_MAJOR) 322 if (fault & VM_FAULT_MAJOR)
323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, addr); 323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr);
324 else if (fault & VM_FAULT_MINOR) 324 else if (fault & VM_FAULT_MINOR)
325 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, addr); 325 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr);
326 326
327 /* 327 /*
328 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR 328 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
diff --git a/arch/mips/include/asm/stacktrace.h b/arch/mips/include/asm/stacktrace.h
index 0bf82818aa53..780ee2c2a2ac 100644
--- a/arch/mips/include/asm/stacktrace.h
+++ b/arch/mips/include/asm/stacktrace.h
@@ -7,6 +7,10 @@
7extern int raw_show_trace; 7extern int raw_show_trace;
8extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, 8extern unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
9 unsigned long pc, unsigned long *ra); 9 unsigned long pc, unsigned long *ra);
10extern unsigned long unwind_stack_by_address(unsigned long stack_page,
11 unsigned long *sp,
12 unsigned long pc,
13 unsigned long *ra);
10#else 14#else
11#define raw_show_trace 1 15#define raw_show_trace 1
12static inline unsigned long unwind_stack(struct task_struct *task, 16static inline unsigned long unwind_stack(struct task_struct *task,
diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index a8244854d3dc..d0deaab9ace2 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -527,7 +527,7 @@ handle_associated_event(struct cpu_hw_events *cpuc,
527 if (!mipspmu_event_set_period(event, hwc, idx)) 527 if (!mipspmu_event_set_period(event, hwc, idx))
528 return; 528 return;
529 529
530 if (perf_event_overflow(event, 0, data, regs)) 530 if (perf_event_overflow(event, data, regs))
531 mipspmu->disable_event(idx); 531 mipspmu->disable_event(idx);
532} 532}
533 533
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 75266ff4cc33..e5ad09a9baf7 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -377,6 +377,20 @@ static const struct mips_perf_event mipsxxcore_cache_map
377 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID }, 377 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
378 }, 378 },
379}, 379},
380[C(NODE)] = {
381 [C(OP_READ)] = {
382 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
383 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
384 },
385 [C(OP_WRITE)] = {
386 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
387 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
388 },
389 [C(OP_PREFETCH)] = {
390 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
391 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
392 },
393},
380}; 394};
381 395
382/* 74K core has completely different cache event map. */ 396/* 74K core has completely different cache event map. */
@@ -480,6 +494,20 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map
480 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID }, 494 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
481 }, 495 },
482}, 496},
497[C(NODE)] = {
498 [C(OP_READ)] = {
499 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
500 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
501 },
502 [C(OP_WRITE)] = {
503 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
504 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
505 },
506 [C(OP_PREFETCH)] = {
507 [C(RESULT_ACCESS)] = { UNSUPPORTED_PERF_EVENT_ID },
508 [C(RESULT_MISS)] = { UNSUPPORTED_PERF_EVENT_ID },
509 },
510},
483}; 511};
484 512
485#ifdef CONFIG_MIPS_MT_SMP 513#ifdef CONFIG_MIPS_MT_SMP
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index d2112d3cf115..c28fbe6107bc 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -373,18 +373,18 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
373 373
374 374
375#ifdef CONFIG_KALLSYMS 375#ifdef CONFIG_KALLSYMS
376/* used by show_backtrace() */ 376/* generic stack unwinding function */
377unsigned long unwind_stack(struct task_struct *task, unsigned long *sp, 377unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
378 unsigned long pc, unsigned long *ra) 378 unsigned long *sp,
379 unsigned long pc,
380 unsigned long *ra)
379{ 381{
380 unsigned long stack_page;
381 struct mips_frame_info info; 382 struct mips_frame_info info;
382 unsigned long size, ofs; 383 unsigned long size, ofs;
383 int leaf; 384 int leaf;
384 extern void ret_from_irq(void); 385 extern void ret_from_irq(void);
385 extern void ret_from_exception(void); 386 extern void ret_from_exception(void);
386 387
387 stack_page = (unsigned long)task_stack_page(task);
388 if (!stack_page) 388 if (!stack_page)
389 return 0; 389 return 0;
390 390
@@ -443,6 +443,15 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
443 *ra = 0; 443 *ra = 0;
444 return __kernel_text_address(pc) ? pc : 0; 444 return __kernel_text_address(pc) ? pc : 0;
445} 445}
446EXPORT_SYMBOL(unwind_stack_by_address);
447
448/* used by show_backtrace() */
449unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
450 unsigned long pc, unsigned long *ra)
451{
452 unsigned long stack_page = (unsigned long)task_stack_page(task);
453 return unwind_stack_by_address(stack_page, sp, pc, ra);
454}
446#endif 455#endif
447 456
448/* 457/*
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e9b3af27d844..b7517e3abc85 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -578,12 +578,12 @@ static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
578{ 578{
579 if ((opcode & OPCODE) == LL) { 579 if ((opcode & OPCODE) == LL) {
580 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 580 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
581 1, 0, regs, 0); 581 1, regs, 0);
582 return simulate_ll(regs, opcode); 582 return simulate_ll(regs, opcode);
583 } 583 }
584 if ((opcode & OPCODE) == SC) { 584 if ((opcode & OPCODE) == SC) {
585 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 585 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
586 1, 0, regs, 0); 586 1, regs, 0);
587 return simulate_sc(regs, opcode); 587 return simulate_sc(regs, opcode);
588 } 588 }
589 589
@@ -602,7 +602,7 @@ static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
602 int rd = (opcode & RD) >> 11; 602 int rd = (opcode & RD) >> 11;
603 int rt = (opcode & RT) >> 16; 603 int rt = (opcode & RT) >> 16;
604 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 604 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
605 1, 0, regs, 0); 605 1, regs, 0);
606 switch (rd) { 606 switch (rd) {
607 case 0: /* CPU number */ 607 case 0: /* CPU number */
608 regs->regs[rt] = smp_processor_id(); 608 regs->regs[rt] = smp_processor_id();
@@ -640,7 +640,7 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
640{ 640{
641 if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) { 641 if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC) {
642 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 642 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS,
643 1, 0, regs, 0); 643 1, regs, 0);
644 return 0; 644 return 0;
645 } 645 }
646 646
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index cfea1adfa153..eb319b580353 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -111,8 +111,7 @@ static void emulate_load_store_insn(struct pt_regs *regs,
111 unsigned long value; 111 unsigned long value;
112 unsigned int res; 112 unsigned int res;
113 113
114 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 114 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
115 1, 0, regs, 0);
116 115
117 /* 116 /*
118 * This load never faults. 117 * This load never faults.
@@ -517,7 +516,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
517 mm_segment_t seg; 516 mm_segment_t seg;
518 517
519 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 518 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
520 1, 0, regs, regs->cp0_badvaddr); 519 1, regs, regs->cp0_badvaddr);
521 /* 520 /*
522 * Did we catch a fault trying to load an instruction? 521 * Did we catch a fault trying to load an instruction?
523 * Or are we running in MIPS16 mode? 522 * Or are we running in MIPS16 mode?
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index d32cb0503110..dbf2f93a5091 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -272,8 +272,7 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
272 } 272 }
273 273
274 emul: 274 emul:
275 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 275 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, xcp, 0);
276 1, 0, xcp, 0);
277 MIPS_FPU_EMU_INC_STATS(emulated); 276 MIPS_FPU_EMU_INC_STATS(emulated);
278 switch (MIPSInst_OPCODE(ir)) { 277 switch (MIPSInst_OPCODE(ir)) {
279 case ldc1_op:{ 278 case ldc1_op:{
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 137ee76a0045..937cf3368164 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -145,7 +145,7 @@ good_area:
145 * the fault. 145 * the fault.
146 */ 146 */
147 fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); 147 fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
148 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 148 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
149 if (unlikely(fault & VM_FAULT_ERROR)) { 149 if (unlikely(fault & VM_FAULT_ERROR)) {
150 if (fault & VM_FAULT_OOM) 150 if (fault & VM_FAULT_OOM)
151 goto out_of_memory; 151 goto out_of_memory;
@@ -154,12 +154,10 @@ good_area:
154 BUG(); 154 BUG();
155 } 155 }
156 if (fault & VM_FAULT_MAJOR) { 156 if (fault & VM_FAULT_MAJOR) {
157 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 157 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
158 1, 0, regs, address);
159 tsk->maj_flt++; 158 tsk->maj_flt++;
160 } else { 159 } else {
161 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 160 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
162 1, 0, regs, address);
163 tsk->min_flt++; 161 tsk->min_flt++;
164 } 162 }
165 163
diff --git a/arch/mips/oprofile/Makefile b/arch/mips/oprofile/Makefile
index 4b9d7044e26c..29f2f13eb31c 100644
--- a/arch/mips/oprofile/Makefile
+++ b/arch/mips/oprofile/Makefile
@@ -8,7 +8,7 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
8 oprofilefs.o oprofile_stats.o \ 8 oprofilefs.o oprofile_stats.o \
9 timer_int.o ) 9 timer_int.o )
10 10
11oprofile-y := $(DRIVER_OBJS) common.o 11oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
12 12
13oprofile-$(CONFIG_CPU_MIPS32) += op_model_mipsxx.o 13oprofile-$(CONFIG_CPU_MIPS32) += op_model_mipsxx.o
14oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o 14oprofile-$(CONFIG_CPU_MIPS64) += op_model_mipsxx.o
diff --git a/arch/mips/oprofile/backtrace.c b/arch/mips/oprofile/backtrace.c
new file mode 100644
index 000000000000..6854ed5097d2
--- /dev/null
+++ b/arch/mips/oprofile/backtrace.c
@@ -0,0 +1,175 @@
1#include <linux/oprofile.h>
2#include <linux/sched.h>
3#include <linux/mm.h>
4#include <linux/uaccess.h>
5#include <asm/ptrace.h>
6#include <asm/stacktrace.h>
7#include <linux/stacktrace.h>
8#include <linux/kernel.h>
9#include <asm/sections.h>
10#include <asm/inst.h>
11
12struct stackframe {
13 unsigned long sp;
14 unsigned long pc;
15 unsigned long ra;
16};
17
18static inline int get_mem(unsigned long addr, unsigned long *result)
19{
20 unsigned long *address = (unsigned long *) addr;
21 if (!access_ok(VERIFY_READ, addr, sizeof(unsigned long)))
22 return -1;
23 if (__copy_from_user_inatomic(result, address, sizeof(unsigned long)))
24 return -3;
25 return 0;
26}
27
28/*
29 * These two instruction helpers were taken from process.c
30 */
31static inline int is_ra_save_ins(union mips_instruction *ip)
32{
33 /* sw / sd $ra, offset($sp) */
34 return (ip->i_format.opcode == sw_op || ip->i_format.opcode == sd_op)
35 && ip->i_format.rs == 29 && ip->i_format.rt == 31;
36}
37
38static inline int is_sp_move_ins(union mips_instruction *ip)
39{
40 /* addiu/daddiu sp,sp,-imm */
41 if (ip->i_format.rs != 29 || ip->i_format.rt != 29)
42 return 0;
43 if (ip->i_format.opcode == addiu_op || ip->i_format.opcode == daddiu_op)
44 return 1;
45 return 0;
46}
47
48/*
49 * Looks for specific instructions that mark the end of a function.
50 * This usually means we ran into the code area of the previous function.
51 */
52static inline int is_end_of_function_marker(union mips_instruction *ip)
53{
54 /* jr ra */
55 if (ip->r_format.func == jr_op && ip->r_format.rs == 31)
56 return 1;
57 /* lui gp */
58 if (ip->i_format.opcode == lui_op && ip->i_format.rt == 28)
59 return 1;
60 return 0;
61}
62
63/*
64 * TODO for userspace stack unwinding:
65 * - handle cases where the stack is adjusted inside a function
66 * (generally doesn't happen)
67 * - find optimal value for max_instr_check
68 * - try to find a way to handle leaf functions
69 */
70
71static inline int unwind_user_frame(struct stackframe *old_frame,
72 const unsigned int max_instr_check)
73{
74 struct stackframe new_frame = *old_frame;
75 off_t ra_offset = 0;
76 size_t stack_size = 0;
77 unsigned long addr;
78
79 if (old_frame->pc == 0 || old_frame->sp == 0 || old_frame->ra == 0)
80 return -9;
81
82 for (addr = new_frame.pc; (addr + max_instr_check > new_frame.pc)
83 && (!ra_offset || !stack_size); --addr) {
84 union mips_instruction ip;
85
86 if (get_mem(addr, (unsigned long *) &ip))
87 return -11;
88
89 if (is_sp_move_ins(&ip)) {
90 int stack_adjustment = ip.i_format.simmediate;
91 if (stack_adjustment > 0)
92 /* This marks the end of the previous function,
93 which means we overran. */
94 break;
95 stack_size = (unsigned) stack_adjustment;
96 } else if (is_ra_save_ins(&ip)) {
97 int ra_slot = ip.i_format.simmediate;
98 if (ra_slot < 0)
99 /* This shouldn't happen. */
100 break;
101 ra_offset = ra_slot;
102 } else if (is_end_of_function_marker(&ip))
103 break;
104 }
105
106 if (!ra_offset || !stack_size)
107 return -1;
108
109 if (ra_offset) {
110 new_frame.ra = old_frame->sp + ra_offset;
111 if (get_mem(new_frame.ra, &(new_frame.ra)))
112 return -13;
113 }
114
115 if (stack_size) {
116 new_frame.sp = old_frame->sp + stack_size;
117 if (get_mem(new_frame.sp, &(new_frame.sp)))
118 return -14;
119 }
120
121 if (new_frame.sp > old_frame->sp)
122 return -2;
123
124 new_frame.pc = old_frame->ra;
125 *old_frame = new_frame;
126
127 return 0;
128}
129
130static inline void do_user_backtrace(unsigned long low_addr,
131 struct stackframe *frame,
132 unsigned int depth)
133{
134 const unsigned int max_instr_check = 512;
135 const unsigned long high_addr = low_addr + THREAD_SIZE;
136
137 while (depth-- && !unwind_user_frame(frame, max_instr_check)) {
138 oprofile_add_trace(frame->ra);
139 if (frame->sp < low_addr || frame->sp > high_addr)
140 break;
141 }
142}
143
144#ifndef CONFIG_KALLSYMS
145static inline void do_kernel_backtrace(unsigned long low_addr,
146 struct stackframe *frame,
147 unsigned int depth) { }
148#else
149static inline void do_kernel_backtrace(unsigned long low_addr,
150 struct stackframe *frame,
151 unsigned int depth)
152{
153 while (depth-- && frame->pc) {
154 frame->pc = unwind_stack_by_address(low_addr,
155 &(frame->sp),
156 frame->pc,
157 &(frame->ra));
158 oprofile_add_trace(frame->ra);
159 }
160}
161#endif
162
163void notrace op_mips_backtrace(struct pt_regs *const regs, unsigned int depth)
164{
165 struct stackframe frame = { .sp = regs->regs[29],
166 .pc = regs->cp0_epc,
167 .ra = regs->regs[31] };
168 const int userspace = user_mode(regs);
169 const unsigned long low_addr = ALIGN(frame.sp, THREAD_SIZE);
170
171 if (userspace)
172 do_user_backtrace(low_addr, &frame, depth);
173 else
174 do_kernel_backtrace(low_addr, &frame, depth);
175}
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index f9eb1aba6345..d1f2d4c52d42 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -115,6 +115,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
115 ops->start = op_mips_start; 115 ops->start = op_mips_start;
116 ops->stop = op_mips_stop; 116 ops->stop = op_mips_stop;
117 ops->cpu_type = lmodel->cpu_type; 117 ops->cpu_type = lmodel->cpu_type;
118 ops->backtrace = op_mips_backtrace;
118 119
119 printk(KERN_INFO "oprofile: using %s performance monitoring.\n", 120 printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
120 lmodel->cpu_type); 121 lmodel->cpu_type);
diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h
index f04b54fb37d1..7c2da27ece04 100644
--- a/arch/mips/oprofile/op_impl.h
+++ b/arch/mips/oprofile/op_impl.h
@@ -36,4 +36,6 @@ struct op_mips_model {
36 unsigned char num_counters; 36 unsigned char num_counters;
37}; 37};
38 38
39void op_mips_backtrace(struct pt_regs * const regs, unsigned int depth);
40
39#endif 41#endif
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 45921672b97a..2cc41c715d2b 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -78,14 +78,14 @@ extern void ppc_warn_emulated_print(const char *type);
78#define PPC_WARN_EMULATED(type, regs) \ 78#define PPC_WARN_EMULATED(type, regs) \
79 do { \ 79 do { \
80 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ 80 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \
81 1, 0, regs, 0); \ 81 1, regs, 0); \
82 __PPC_WARN_EMULATED(type); \ 82 __PPC_WARN_EMULATED(type); \
83 } while (0) 83 } while (0)
84 84
85#define PPC_WARN_ALIGNMENT(type, regs) \ 85#define PPC_WARN_ALIGNMENT(type, regs) \
86 do { \ 86 do { \
87 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ 87 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \
88 1, 0, regs, regs->dar); \ 88 1, regs, regs->dar); \
89 __PPC_WARN_EMULATED(type); \ 89 __PPC_WARN_EMULATED(type); \
90 } while (0) 90 } while (0)
91 91
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index 1c33ec17ca36..80fd4d2b4a62 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -57,7 +57,7 @@ void hw_breakpoint_pmu_read(struct perf_event *bp);
57extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); 57extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
58 58
59extern struct pmu perf_ops_bp; 59extern struct pmu perf_ops_bp;
60extern void ptrace_triggered(struct perf_event *bp, int nmi, 60extern void ptrace_triggered(struct perf_event *bp,
61 struct perf_sample_data *data, struct pt_regs *regs); 61 struct perf_sample_data *data, struct pt_regs *regs);
62static inline void hw_breakpoint_disable(void) 62static inline void hw_breakpoint_disable(void)
63{ 63{
diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c
index b150b510510f..cb2e2949c8d1 100644
--- a/arch/powerpc/kernel/e500-pmu.c
+++ b/arch/powerpc/kernel/e500-pmu.c
@@ -75,6 +75,11 @@ static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
75 [C(OP_WRITE)] = { -1, -1 }, 75 [C(OP_WRITE)] = { -1, -1 },
76 [C(OP_PREFETCH)] = { -1, -1 }, 76 [C(OP_PREFETCH)] = { -1, -1 },
77 }, 77 },
78 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
79 [C(OP_READ)] = { -1, -1 },
80 [C(OP_WRITE)] = { -1, -1 },
81 [C(OP_PREFETCH)] = { -1, -1 },
82 },
78}; 83};
79 84
80static int num_events = 128; 85static int num_events = 128;
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index 2cc5e0301d0b..845a58478890 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -388,6 +388,11 @@ static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
388 [C(OP_WRITE)] = { -1, -1 }, 388 [C(OP_WRITE)] = { -1, -1 },
389 [C(OP_PREFETCH)] = { -1, -1 }, 389 [C(OP_PREFETCH)] = { -1, -1 },
390 }, 390 },
391 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
392 [C(OP_READ)] = { -1, -1 },
393 [C(OP_WRITE)] = { -1, -1 },
394 [C(OP_PREFETCH)] = { -1, -1 },
395 },
391}; 396};
392 397
393struct power_pmu mpc7450_pmu = { 398struct power_pmu mpc7450_pmu = {
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 822f63008ae1..14967de98876 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1207,7 +1207,7 @@ struct pmu power_pmu = {
1207 * here so there is no possibility of being interrupted. 1207 * here so there is no possibility of being interrupted.
1208 */ 1208 */
1209static void record_and_restart(struct perf_event *event, unsigned long val, 1209static void record_and_restart(struct perf_event *event, unsigned long val,
1210 struct pt_regs *regs, int nmi) 1210 struct pt_regs *regs)
1211{ 1211{
1212 u64 period = event->hw.sample_period; 1212 u64 period = event->hw.sample_period;
1213 s64 prev, delta, left; 1213 s64 prev, delta, left;
@@ -1258,7 +1258,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
1258 if (event->attr.sample_type & PERF_SAMPLE_ADDR) 1258 if (event->attr.sample_type & PERF_SAMPLE_ADDR)
1259 perf_get_data_addr(regs, &data.addr); 1259 perf_get_data_addr(regs, &data.addr);
1260 1260
1261 if (perf_event_overflow(event, nmi, &data, regs)) 1261 if (perf_event_overflow(event, &data, regs))
1262 power_pmu_stop(event, 0); 1262 power_pmu_stop(event, 0);
1263 } 1263 }
1264} 1264}
@@ -1346,7 +1346,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
1346 if ((int)val < 0) { 1346 if ((int)val < 0) {
1347 /* event has overflowed */ 1347 /* event has overflowed */
1348 found = 1; 1348 found = 1;
1349 record_and_restart(event, val, regs, nmi); 1349 record_and_restart(event, val, regs);
1350 } 1350 }
1351 } 1351 }
1352 1352
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index b0dc8f7069cd..0a6d2a9d569c 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -568,7 +568,7 @@ static struct pmu fsl_emb_pmu = {
568 * here so there is no possibility of being interrupted. 568 * here so there is no possibility of being interrupted.
569 */ 569 */
570static void record_and_restart(struct perf_event *event, unsigned long val, 570static void record_and_restart(struct perf_event *event, unsigned long val,
571 struct pt_regs *regs, int nmi) 571 struct pt_regs *regs)
572{ 572{
573 u64 period = event->hw.sample_period; 573 u64 period = event->hw.sample_period;
574 s64 prev, delta, left; 574 s64 prev, delta, left;
@@ -616,7 +616,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
616 perf_sample_data_init(&data, 0); 616 perf_sample_data_init(&data, 0);
617 data.period = event->hw.last_period; 617 data.period = event->hw.last_period;
618 618
619 if (perf_event_overflow(event, nmi, &data, regs)) 619 if (perf_event_overflow(event, &data, regs))
620 fsl_emb_pmu_stop(event, 0); 620 fsl_emb_pmu_stop(event, 0);
621 } 621 }
622} 622}
@@ -644,7 +644,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
644 if (event) { 644 if (event) {
645 /* event has overflowed */ 645 /* event has overflowed */
646 found = 1; 646 found = 1;
647 record_and_restart(event, val, regs, nmi); 647 record_and_restart(event, val, regs);
648 } else { 648 } else {
649 /* 649 /*
650 * Disabled counter is negative, 650 * Disabled counter is negative,
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index ead8b3c2649e..e9dbc2d35c9c 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -587,6 +587,11 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
587 [C(OP_WRITE)] = { -1, -1 }, 587 [C(OP_WRITE)] = { -1, -1 },
588 [C(OP_PREFETCH)] = { -1, -1 }, 588 [C(OP_PREFETCH)] = { -1, -1 },
589 }, 589 },
590 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
591 [C(OP_READ)] = { -1, -1 },
592 [C(OP_WRITE)] = { -1, -1 },
593 [C(OP_PREFETCH)] = { -1, -1 },
594 },
590}; 595};
591 596
592static struct power_pmu power4_pmu = { 597static struct power_pmu power4_pmu = {
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index eca0ac595cb6..f58a2bd41b59 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -653,6 +653,11 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
653 [C(OP_WRITE)] = { -1, -1 }, 653 [C(OP_WRITE)] = { -1, -1 },
654 [C(OP_PREFETCH)] = { -1, -1 }, 654 [C(OP_PREFETCH)] = { -1, -1 },
655 }, 655 },
656 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
657 [C(OP_READ)] = { -1, -1 },
658 [C(OP_WRITE)] = { -1, -1 },
659 [C(OP_PREFETCH)] = { -1, -1 },
660 },
656}; 661};
657 662
658static struct power_pmu power5p_pmu = { 663static struct power_pmu power5p_pmu = {
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index d5ff0f64a5e6..b1acab684142 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -595,6 +595,11 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
595 [C(OP_WRITE)] = { -1, -1 }, 595 [C(OP_WRITE)] = { -1, -1 },
596 [C(OP_PREFETCH)] = { -1, -1 }, 596 [C(OP_PREFETCH)] = { -1, -1 },
597 }, 597 },
598 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
599 [C(OP_READ)] = { -1, -1 },
600 [C(OP_WRITE)] = { -1, -1 },
601 [C(OP_PREFETCH)] = { -1, -1 },
602 },
598}; 603};
599 604
600static struct power_pmu power5_pmu = { 605static struct power_pmu power5_pmu = {
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index 31603927e376..b24a3a23d073 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -516,6 +516,11 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
516 [C(OP_WRITE)] = { -1, -1 }, 516 [C(OP_WRITE)] = { -1, -1 },
517 [C(OP_PREFETCH)] = { -1, -1 }, 517 [C(OP_PREFETCH)] = { -1, -1 },
518 }, 518 },
519 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
520 [C(OP_READ)] = { -1, -1 },
521 [C(OP_WRITE)] = { -1, -1 },
522 [C(OP_PREFETCH)] = { -1, -1 },
523 },
519}; 524};
520 525
521static struct power_pmu power6_pmu = { 526static struct power_pmu power6_pmu = {
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 593740fcb799..6d9dccb2ea59 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -342,6 +342,11 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
342 [C(OP_WRITE)] = { -1, -1 }, 342 [C(OP_WRITE)] = { -1, -1 },
343 [C(OP_PREFETCH)] = { -1, -1 }, 343 [C(OP_PREFETCH)] = { -1, -1 },
344 }, 344 },
345 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
346 [C(OP_READ)] = { -1, -1 },
347 [C(OP_WRITE)] = { -1, -1 },
348 [C(OP_PREFETCH)] = { -1, -1 },
349 },
345}; 350};
346 351
347static struct power_pmu power7_pmu = { 352static struct power_pmu power7_pmu = {
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 9a6e093858fe..b121de9658eb 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -467,6 +467,11 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
467 [C(OP_WRITE)] = { -1, -1 }, 467 [C(OP_WRITE)] = { -1, -1 },
468 [C(OP_PREFETCH)] = { -1, -1 }, 468 [C(OP_PREFETCH)] = { -1, -1 },
469 }, 469 },
470 [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
471 [C(OP_READ)] = { -1, -1 },
472 [C(OP_WRITE)] = { -1, -1 },
473 [C(OP_PREFETCH)] = { -1, -1 },
474 },
470}; 475};
471 476
472static struct power_pmu ppc970_pmu = { 477static struct power_pmu ppc970_pmu = {
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index cb22024f2b42..05b7dd217f60 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -882,7 +882,7 @@ void user_disable_single_step(struct task_struct *task)
882} 882}
883 883
884#ifdef CONFIG_HAVE_HW_BREAKPOINT 884#ifdef CONFIG_HAVE_HW_BREAKPOINT
885void ptrace_triggered(struct perf_event *bp, int nmi, 885void ptrace_triggered(struct perf_event *bp,
886 struct perf_sample_data *data, struct pt_regs *regs) 886 struct perf_sample_data *data, struct pt_regs *regs)
887{ 887{
888 struct perf_event_attr attr; 888 struct perf_event_attr attr;
@@ -973,7 +973,7 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
973 &attr.bp_type); 973 &attr.bp_type);
974 974
975 thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr, 975 thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
976 ptrace_triggered, task); 976 ptrace_triggered, NULL, task);
977 if (IS_ERR(bp)) { 977 if (IS_ERR(bp)) {
978 thread->ptrace_bps[0] = NULL; 978 thread->ptrace_bps[0] = NULL;
979 ptrace_put_breakpoints(task); 979 ptrace_put_breakpoints(task);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index f33acfd872ad..03b29a6759ab 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -544,7 +544,7 @@ DEFINE_PER_CPU(u8, irq_work_pending);
544 544
545#endif /* 32 vs 64 bit */ 545#endif /* 32 vs 64 bit */
546 546
547void set_irq_work_pending(void) 547void arch_irq_work_raise(void)
548{ 548{
549 preempt_disable(); 549 preempt_disable();
550 set_irq_work_pending_flag(); 550 set_irq_work_pending_flag();
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index ad35f66c69e8..5efe8c96d37f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -174,7 +174,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
174 die("Weird page fault", regs, SIGSEGV); 174 die("Weird page fault", regs, SIGSEGV);
175 } 175 }
176 176
177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 177 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
178 178
179 /* When running in the kernel we expect faults to occur only to 179 /* When running in the kernel we expect faults to occur only to
180 * addresses in user space. All other faults represent errors in the 180 * addresses in user space. All other faults represent errors in the
@@ -320,7 +320,7 @@ good_area:
320 } 320 }
321 if (ret & VM_FAULT_MAJOR) { 321 if (ret & VM_FAULT_MAJOR) {
322 current->maj_flt++; 322 current->maj_flt++;
323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 323 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
324 regs, address); 324 regs, address);
325#ifdef CONFIG_PPC_SMLPAR 325#ifdef CONFIG_PPC_SMLPAR
326 if (firmware_has_feature(FW_FEATURE_CMO)) { 326 if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -331,7 +331,7 @@ good_area:
331#endif 331#endif
332 } else { 332 } else {
333 current->min_flt++; 333 current->min_flt++;
334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 334 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
335 regs, address); 335 regs, address);
336 } 336 }
337 up_read(&mm->mmap_sem); 337 up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fe103e891e7a..095f782a5512 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -299,7 +299,7 @@ static inline int do_exception(struct pt_regs *regs, int access,
299 goto out; 299 goto out;
300 300
301 address = trans_exc_code & __FAIL_ADDR_MASK; 301 address = trans_exc_code & __FAIL_ADDR_MASK;
302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 302 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
303 flags = FAULT_FLAG_ALLOW_RETRY; 303 flags = FAULT_FLAG_ALLOW_RETRY;
304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 304 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
305 flags |= FAULT_FLAG_WRITE; 305 flags |= FAULT_FLAG_WRITE;
@@ -345,11 +345,11 @@ retry:
345 if (flags & FAULT_FLAG_ALLOW_RETRY) { 345 if (flags & FAULT_FLAG_ALLOW_RETRY) {
346 if (fault & VM_FAULT_MAJOR) { 346 if (fault & VM_FAULT_MAJOR) {
347 tsk->maj_flt++; 347 tsk->maj_flt++;
348 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 348 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
349 regs, address); 349 regs, address);
350 } else { 350 } else {
351 tsk->min_flt++; 351 tsk->min_flt++;
352 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 352 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
353 regs, address); 353 regs, address);
354 } 354 }
355 if (fault & VM_FAULT_RETRY) { 355 if (fault & VM_FAULT_RETRY) {
diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c
index 748955df018d..fa4f724b295a 100644
--- a/arch/sh/kernel/cpu/sh4/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4/perf_event.c
@@ -180,6 +180,21 @@ static const int sh7750_cache_events
180 [ C(RESULT_MISS) ] = -1, 180 [ C(RESULT_MISS) ] = -1,
181 }, 181 },
182 }, 182 },
183
184 [ C(NODE) ] = {
185 [ C(OP_READ) ] = {
186 [ C(RESULT_ACCESS) ] = -1,
187 [ C(RESULT_MISS) ] = -1,
188 },
189 [ C(OP_WRITE) ] = {
190 [ C(RESULT_ACCESS) ] = -1,
191 [ C(RESULT_MISS) ] = -1,
192 },
193 [ C(OP_PREFETCH) ] = {
194 [ C(RESULT_ACCESS) ] = -1,
195 [ C(RESULT_MISS) ] = -1,
196 },
197 },
183}; 198};
184 199
185static int sh7750_event_map(int event) 200static int sh7750_event_map(int event)
diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c
index 17e6bebfede0..84a2c396ceee 100644
--- a/arch/sh/kernel/cpu/sh4a/perf_event.c
+++ b/arch/sh/kernel/cpu/sh4a/perf_event.c
@@ -205,6 +205,21 @@ static const int sh4a_cache_events
205 [ C(RESULT_MISS) ] = -1, 205 [ C(RESULT_MISS) ] = -1,
206 }, 206 },
207 }, 207 },
208
209 [ C(NODE) ] = {
210 [ C(OP_READ) ] = {
211 [ C(RESULT_ACCESS) ] = -1,
212 [ C(RESULT_MISS) ] = -1,
213 },
214 [ C(OP_WRITE) ] = {
215 [ C(RESULT_ACCESS) ] = -1,
216 [ C(RESULT_MISS) ] = -1,
217 },
218 [ C(OP_PREFETCH) ] = {
219 [ C(RESULT_ACCESS) ] = -1,
220 [ C(RESULT_MISS) ] = -1,
221 },
222 },
208}; 223};
209 224
210static int sh4a_event_map(int event) 225static int sh4a_event_map(int event)
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 3d7b209b2178..92b3c276339a 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -63,7 +63,7 @@ static inline int put_stack_long(struct task_struct *task, int offset,
63 return 0; 63 return 0;
64} 64}
65 65
66void ptrace_triggered(struct perf_event *bp, int nmi, 66void ptrace_triggered(struct perf_event *bp,
67 struct perf_sample_data *data, struct pt_regs *regs) 67 struct perf_sample_data *data, struct pt_regs *regs)
68{ 68{
69 struct perf_event_attr attr; 69 struct perf_event_attr attr;
@@ -91,7 +91,8 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
91 attr.bp_len = HW_BREAKPOINT_LEN_2; 91 attr.bp_len = HW_BREAKPOINT_LEN_2;
92 attr.bp_type = HW_BREAKPOINT_R; 92 attr.bp_type = HW_BREAKPOINT_R;
93 93
94 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 94 bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
95 NULL, tsk);
95 if (IS_ERR(bp)) 96 if (IS_ERR(bp))
96 return PTR_ERR(bp); 97 return PTR_ERR(bp);
97 98
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index b51a17104b5f..d9006f8ffc14 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -393,7 +393,7 @@ int handle_unaligned_access(insn_size_t instruction, struct pt_regs *regs,
393 */ 393 */
394 if (!expected) { 394 if (!expected) {
395 unaligned_fixups_notify(current, instruction, regs); 395 unaligned_fixups_notify(current, instruction, regs);
396 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, 396 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1,
397 regs, address); 397 regs, address);
398 } 398 }
399 399
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 6713ca97e553..67110be83fd7 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -434,7 +434,7 @@ static int misaligned_load(struct pt_regs *regs,
434 return error; 434 return error;
435 } 435 }
436 436
437 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); 437 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
438 438
439 destreg = (opcode >> 4) & 0x3f; 439 destreg = (opcode >> 4) & 0x3f;
440 if (user_mode(regs)) { 440 if (user_mode(regs)) {
@@ -512,7 +512,7 @@ static int misaligned_store(struct pt_regs *regs,
512 return error; 512 return error;
513 } 513 }
514 514
515 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, address); 515 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
516 516
517 srcreg = (opcode >> 4) & 0x3f; 517 srcreg = (opcode >> 4) & 0x3f;
518 if (user_mode(regs)) { 518 if (user_mode(regs)) {
@@ -588,7 +588,7 @@ static int misaligned_fpu_load(struct pt_regs *regs,
588 return error; 588 return error;
589 } 589 }
590 590
591 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); 591 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
592 592
593 destreg = (opcode >> 4) & 0x3f; 593 destreg = (opcode >> 4) & 0x3f;
594 if (user_mode(regs)) { 594 if (user_mode(regs)) {
@@ -665,7 +665,7 @@ static int misaligned_fpu_store(struct pt_regs *regs,
665 return error; 665 return error;
666 } 666 }
667 667
668 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, address); 668 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, address);
669 669
670 srcreg = (opcode >> 4) & 0x3f; 670 srcreg = (opcode >> 4) & 0x3f;
671 if (user_mode(regs)) { 671 if (user_mode(regs)) {
diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c
index f76a5090d5d1..977195210653 100644
--- a/arch/sh/math-emu/math.c
+++ b/arch/sh/math-emu/math.c
@@ -620,7 +620,7 @@ int do_fpu_inst(unsigned short inst, struct pt_regs *regs)
620 struct task_struct *tsk = current; 620 struct task_struct *tsk = current;
621 struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu); 621 struct sh_fpu_soft_struct *fpu = &(tsk->thread.xstate->softfpu);
622 622
623 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 623 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
624 624
625 if (!(task_thread_info(tsk)->status & TS_USEDFPU)) { 625 if (!(task_thread_info(tsk)->status & TS_USEDFPU)) {
626 /* initialize once. */ 626 /* initialize once. */
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index d4c34d757f0d..7bebd044f2a1 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -160,7 +160,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
160 if ((regs->sr & SR_IMASK) != SR_IMASK) 160 if ((regs->sr & SR_IMASK) != SR_IMASK)
161 local_irq_enable(); 161 local_irq_enable();
162 162
163 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 163 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
164 164
165 /* 165 /*
166 * If we're in an interrupt, have no user context or are running 166 * If we're in an interrupt, have no user context or are running
@@ -210,11 +210,11 @@ good_area:
210 } 210 }
211 if (fault & VM_FAULT_MAJOR) { 211 if (fault & VM_FAULT_MAJOR) {
212 tsk->maj_flt++; 212 tsk->maj_flt++;
213 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 213 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
214 regs, address); 214 regs, address);
215 } else { 215 } else {
216 tsk->min_flt++; 216 tsk->min_flt++;
217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 217 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
218 regs, address); 218 regs, address);
219 } 219 }
220 220
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 7f5810f5dfdc..e3430e093d43 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
116 /* Not an IO address, so reenable interrupts */ 116 /* Not an IO address, so reenable interrupts */
117 local_irq_enable(); 117 local_irq_enable();
118 118
119 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 119 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
120 120
121 /* 121 /*
122 * If we're in an interrupt or have no user 122 * If we're in an interrupt or have no user
@@ -200,11 +200,11 @@ good_area:
200 200
201 if (fault & VM_FAULT_MAJOR) { 201 if (fault & VM_FAULT_MAJOR) {
202 tsk->maj_flt++; 202 tsk->maj_flt++;
203 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 203 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
204 regs, address); 204 regs, address);
205 } else { 205 } else {
206 tsk->min_flt++; 206 tsk->min_flt++;
207 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 207 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
208 regs, address); 208 regs, address);
209 } 209 }
210 210
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 2cb0e1c001e2..62a034318b18 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -246,6 +246,20 @@ static const cache_map_t ultra3_cache_map = {
246 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 246 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
247 }, 247 },
248}, 248},
249[C(NODE)] = {
250 [C(OP_READ)] = {
251 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
252 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
253 },
254 [ C(OP_WRITE) ] = {
255 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
256 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
257 },
258 [ C(OP_PREFETCH) ] = {
259 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
260 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
261 },
262},
249}; 263};
250 264
251static const struct sparc_pmu ultra3_pmu = { 265static const struct sparc_pmu ultra3_pmu = {
@@ -361,6 +375,20 @@ static const cache_map_t niagara1_cache_map = {
361 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 375 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
362 }, 376 },
363}, 377},
378[C(NODE)] = {
379 [C(OP_READ)] = {
380 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
381 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
382 },
383 [ C(OP_WRITE) ] = {
384 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
385 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
386 },
387 [ C(OP_PREFETCH) ] = {
388 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
389 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
390 },
391},
364}; 392};
365 393
366static const struct sparc_pmu niagara1_pmu = { 394static const struct sparc_pmu niagara1_pmu = {
@@ -473,6 +501,20 @@ static const cache_map_t niagara2_cache_map = {
473 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED }, 501 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
474 }, 502 },
475}, 503},
504[C(NODE)] = {
505 [C(OP_READ)] = {
506 [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
507 [C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
508 },
509 [ C(OP_WRITE) ] = {
510 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
511 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
512 },
513 [ C(OP_PREFETCH) ] = {
514 [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
515 [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
516 },
517},
476}; 518};
477 519
478static const struct sparc_pmu niagara2_pmu = { 520static const struct sparc_pmu niagara2_pmu = {
@@ -1277,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
1277 if (!sparc_perf_event_set_period(event, hwc, idx)) 1319 if (!sparc_perf_event_set_period(event, hwc, idx))
1278 continue; 1320 continue;
1279 1321
1280 if (perf_event_overflow(event, 1, &data, regs)) 1322 if (perf_event_overflow(event, &data, regs))
1281 sparc_pmu_stop(event, 0); 1323 sparc_pmu_stop(event, 0);
1282 } 1324 }
1283 1325
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 4491f4cb2695..7efbb2f9e77f 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -247,7 +247,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
247 unsigned long addr = compute_effective_address(regs, insn); 247 unsigned long addr = compute_effective_address(regs, insn);
248 int err; 248 int err;
249 249
250 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 250 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
251 switch (dir) { 251 switch (dir) {
252 case load: 252 case load:
253 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), 253 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
@@ -338,7 +338,7 @@ asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
338 } 338 }
339 339
340 addr = compute_effective_address(regs, insn); 340 addr = compute_effective_address(regs, insn);
341 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 341 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
342 switch(dir) { 342 switch(dir) {
343 case load: 343 case load:
344 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f), 344 err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
index b2b019ea8caa..35cff1673aa4 100644
--- a/arch/sparc/kernel/unaligned_64.c
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -317,7 +317,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
317 317
318 addr = compute_effective_address(regs, insn, 318 addr = compute_effective_address(regs, insn,
319 ((insn >> 25) & 0x1f)); 319 ((insn >> 25) & 0x1f));
320 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, addr); 320 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
321 switch (asi) { 321 switch (asi) {
322 case ASI_NL: 322 case ASI_NL:
323 case ASI_AIUPL: 323 case ASI_AIUPL:
@@ -384,7 +384,7 @@ int handle_popc(u32 insn, struct pt_regs *regs)
384 int ret, i, rd = ((insn >> 25) & 0x1f); 384 int ret, i, rd = ((insn >> 25) & 0x1f);
385 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; 385 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
386 386
387 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 387 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
388 if (insn & 0x2000) { 388 if (insn & 0x2000) {
389 maybe_flush_windows(0, 0, rd, from_kernel); 389 maybe_flush_windows(0, 0, rd, from_kernel);
390 value = sign_extend_imm13(insn); 390 value = sign_extend_imm13(insn);
@@ -431,7 +431,7 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs)
431 int asi = decode_asi(insn, regs); 431 int asi = decode_asi(insn, regs);
432 int flag = (freg < 32) ? FPRS_DL : FPRS_DU; 432 int flag = (freg < 32) ? FPRS_DL : FPRS_DU;
433 433
434 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 434 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
435 435
436 save_and_clear_fpu(); 436 save_and_clear_fpu();
437 current_thread_info()->xfsr[0] &= ~0x1c000; 437 current_thread_info()->xfsr[0] &= ~0x1c000;
@@ -554,7 +554,7 @@ void handle_ld_nf(u32 insn, struct pt_regs *regs)
554 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0; 554 int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
555 unsigned long *reg; 555 unsigned long *reg;
556 556
557 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 557 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
558 558
559 maybe_flush_windows(0, 0, rd, from_kernel); 559 maybe_flush_windows(0, 0, rd, from_kernel);
560 reg = fetch_reg_addr(rd, regs); 560 reg = fetch_reg_addr(rd, regs);
@@ -586,7 +586,7 @@ void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
586 586
587 if (tstate & TSTATE_PRIV) 587 if (tstate & TSTATE_PRIV)
588 die_if_kernel("lddfmna from kernel", regs); 588 die_if_kernel("lddfmna from kernel", regs);
589 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); 589 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
590 if (test_thread_flag(TIF_32BIT)) 590 if (test_thread_flag(TIF_32BIT))
591 pc = (u32)pc; 591 pc = (u32)pc;
592 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 592 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
@@ -647,7 +647,7 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
647 647
648 if (tstate & TSTATE_PRIV) 648 if (tstate & TSTATE_PRIV)
649 die_if_kernel("stdfmna from kernel", regs); 649 die_if_kernel("stdfmna from kernel", regs);
650 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, 0, regs, sfar); 650 perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
651 if (test_thread_flag(TIF_32BIT)) 651 if (test_thread_flag(TIF_32BIT))
652 pc = (u32)pc; 652 pc = (u32)pc;
653 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 653 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
index 36357717d691..32b626c9d815 100644
--- a/arch/sparc/kernel/visemul.c
+++ b/arch/sparc/kernel/visemul.c
@@ -802,7 +802,7 @@ int vis_emul(struct pt_regs *regs, unsigned int insn)
802 802
803 BUG_ON(regs->tstate & TSTATE_PRIV); 803 BUG_ON(regs->tstate & TSTATE_PRIV);
804 804
805 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 805 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
806 806
807 if (test_thread_flag(TIF_32BIT)) 807 if (test_thread_flag(TIF_32BIT))
808 pc = (u32)pc; 808 pc = (u32)pc;
diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c
index a3fccde894ec..aa4d55b0bdf0 100644
--- a/arch/sparc/math-emu/math_32.c
+++ b/arch/sparc/math-emu/math_32.c
@@ -164,7 +164,7 @@ int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
164 int retcode = 0; /* assume all succeed */ 164 int retcode = 0; /* assume all succeed */
165 unsigned long insn; 165 unsigned long insn;
166 166
167 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 167 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
168 168
169#ifdef DEBUG_MATHEMU 169#ifdef DEBUG_MATHEMU
170 printk("In do_mathemu()... pc is %08lx\n", regs->pc); 170 printk("In do_mathemu()... pc is %08lx\n", regs->pc);
diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c
index 56d2c44747b8..e575bd2fe381 100644
--- a/arch/sparc/math-emu/math_64.c
+++ b/arch/sparc/math-emu/math_64.c
@@ -184,7 +184,7 @@ int do_mathemu(struct pt_regs *regs, struct fpustate *f)
184 184
185 if (tstate & TSTATE_PRIV) 185 if (tstate & TSTATE_PRIV)
186 die_if_kernel("unfinished/unimplemented FPop from kernel", regs); 186 die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
187 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0); 187 perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
188 if (test_thread_flag(TIF_32BIT)) 188 if (test_thread_flag(TIF_32BIT))
189 pc = (u32)pc; 189 pc = (u32)pc;
190 if (get_user(insn, (u32 __user *) pc) != -EFAULT) { 190 if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 7543ddbdadb2..aa1c1b1ce5cc 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -251,7 +251,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
251 if (in_atomic() || !mm) 251 if (in_atomic() || !mm)
252 goto no_context; 252 goto no_context;
253 253
254 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 254 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
255 255
256 down_read(&mm->mmap_sem); 256 down_read(&mm->mmap_sem);
257 257
@@ -301,12 +301,10 @@ good_area:
301 } 301 }
302 if (fault & VM_FAULT_MAJOR) { 302 if (fault & VM_FAULT_MAJOR) {
303 current->maj_flt++; 303 current->maj_flt++;
304 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 304 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
305 regs, address);
306 } else { 305 } else {
307 current->min_flt++; 306 current->min_flt++;
308 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 307 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
309 regs, address);
310 } 308 }
311 up_read(&mm->mmap_sem); 309 up_read(&mm->mmap_sem);
312 return; 310 return;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index f92ce56a8b22..504c0622f729 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -325,7 +325,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
325 if (in_atomic() || !mm) 325 if (in_atomic() || !mm)
326 goto intr_or_no_mm; 326 goto intr_or_no_mm;
327 327
328 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 328 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
329 329
330 if (!down_read_trylock(&mm->mmap_sem)) { 330 if (!down_read_trylock(&mm->mmap_sem)) {
331 if ((regs->tstate & TSTATE_PRIV) && 331 if ((regs->tstate & TSTATE_PRIV) &&
@@ -433,12 +433,10 @@ good_area:
433 } 433 }
434 if (fault & VM_FAULT_MAJOR) { 434 if (fault & VM_FAULT_MAJOR) {
435 current->maj_flt++; 435 current->maj_flt++;
436 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 436 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
437 regs, address);
438 } else { 437 } else {
439 current->min_flt++; 438 current->min_flt++;
440 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 439 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
441 regs, address);
442 } 440 }
443 up_read(&mm->mmap_sem); 441 up_read(&mm->mmap_sem);
444 442
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8bf108..bba3cf88e624 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -60,23 +60,24 @@ static inline void native_halt(void)
60#include <asm/paravirt.h> 60#include <asm/paravirt.h>
61#else 61#else
62#ifndef __ASSEMBLY__ 62#ifndef __ASSEMBLY__
63#include <linux/types.h>
63 64
64static inline unsigned long arch_local_save_flags(void) 65static inline notrace unsigned long arch_local_save_flags(void)
65{ 66{
66 return native_save_fl(); 67 return native_save_fl();
67} 68}
68 69
69static inline void arch_local_irq_restore(unsigned long flags) 70static inline notrace void arch_local_irq_restore(unsigned long flags)
70{ 71{
71 native_restore_fl(flags); 72 native_restore_fl(flags);
72} 73}
73 74
74static inline void arch_local_irq_disable(void) 75static inline notrace void arch_local_irq_disable(void)
75{ 76{
76 native_irq_disable(); 77 native_irq_disable();
77} 78}
78 79
79static inline void arch_local_irq_enable(void) 80static inline notrace void arch_local_irq_enable(void)
80{ 81{
81 native_irq_enable(); 82 native_irq_enable();
82} 83}
@@ -102,7 +103,7 @@ static inline void halt(void)
102/* 103/*
103 * For spinlocks, etc: 104 * For spinlocks, etc:
104 */ 105 */
105static inline unsigned long arch_local_irq_save(void) 106static inline notrace unsigned long arch_local_irq_save(void)
106{ 107{
107 unsigned long flags = arch_local_save_flags(); 108 unsigned long flags = arch_local_save_flags();
108 arch_local_irq_disable(); 109 arch_local_irq_disable();
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index d9d4dae305f6..094fb30817ab 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -152,6 +152,11 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
152 (regs)->bp = caller_frame_pointer(); \ 152 (regs)->bp = caller_frame_pointer(); \
153 (regs)->cs = __KERNEL_CS; \ 153 (regs)->cs = __KERNEL_CS; \
154 regs->flags = 0; \ 154 regs->flags = 0; \
155 asm volatile( \
156 _ASM_MOV "%%"_ASM_SP ", %0\n" \
157 : "=m" ((regs)->sp) \
158 :: "memory" \
159 ); \
155} 160}
156 161
157#else 162#else
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 56fd9e3abbda..4f7e67e2345e 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -102,6 +102,14 @@
102#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) 102#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
103 103
104/* 104/*
105 * If an event has alias it should be marked
106 * with a special bit. (Don't forget to check
107 * P4_PEBS_CONFIG_MASK and related bits on
108 * modification.)
109 */
110#define P4_CONFIG_ALIASABLE (1 << 9)
111
112/*
105 * The bits we allow to pass for RAW events 113 * The bits we allow to pass for RAW events
106 */ 114 */
107#define P4_CONFIG_MASK_ESCR \ 115#define P4_CONFIG_MASK_ESCR \
@@ -123,6 +131,31 @@
123 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ 131 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
124 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) 132 (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
125 133
134/*
135 * In case of event aliasing we need to preserve some
136 * caller bits, otherwise the mapping won't be complete.
137 */
138#define P4_CONFIG_EVENT_ALIAS_MASK \
139 (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
140 p4_config_pack_cccr(P4_CCCR_EDGE | \
141 P4_CCCR_THRESHOLD_MASK | \
142 P4_CCCR_COMPLEMENT | \
143 P4_CCCR_COMPARE))
144
145#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
146 ((P4_CONFIG_HT) | \
147 p4_config_pack_escr(P4_ESCR_T0_OS | \
148 P4_ESCR_T0_USR | \
149 P4_ESCR_T1_OS | \
150 P4_ESCR_T1_USR) | \
151 p4_config_pack_cccr(P4_CCCR_OVF | \
152 P4_CCCR_CASCADE | \
153 P4_CCCR_FORCE_OVF | \
154 P4_CCCR_THREAD_ANY | \
155 P4_CCCR_OVF_PMI_T0 | \
156 P4_CCCR_OVF_PMI_T1 | \
157 P4_CONFIG_ALIASABLE))
158
126static inline bool p4_is_event_cascaded(u64 config) 159static inline bool p4_is_event_cascaded(u64 config)
127{ 160{
128 u32 cccr = p4_config_unpack_cccr(config); 161 u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 99ddd148a760..36361bf6fdd1 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -555,6 +555,9 @@ struct __large_struct { unsigned long buf[100]; };
555 555
556#endif /* CONFIG_X86_WP_WORKS_OK */ 556#endif /* CONFIG_X86_WP_WORKS_OK */
557 557
558extern unsigned long
559copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
560
558/* 561/*
559 * movsl can be slow when source and dest are not both 8-byte aligned 562 * movsl can be slow when source and dest are not both 8-byte aligned
560 */ 563 */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3a0338b4b179..4ee3abf20ed6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -22,7 +22,6 @@
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/highmem.h>
26#include <linux/cpu.h> 25#include <linux/cpu.h>
27#include <linux/bitops.h> 26#include <linux/bitops.h>
28 27
@@ -45,38 +44,27 @@ do { \
45#endif 44#endif
46 45
47/* 46/*
48 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context 47 * | NHM/WSM | SNB |
48 * register -------------------------------
49 * | HT | no HT | HT | no HT |
50 *-----------------------------------------
51 * offcore | core | core | cpu | core |
52 * lbr_sel | core | core | cpu | core |
53 * ld_lat | cpu | core | cpu | core |
54 *-----------------------------------------
55 *
56 * Given that there is a small number of shared regs,
57 * we can pre-allocate their slot in the per-cpu
58 * per-core reg tables.
49 */ 59 */
50static unsigned long 60enum extra_reg_type {
51copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 61 EXTRA_REG_NONE = -1, /* not used */
52{
53 unsigned long offset, addr = (unsigned long)from;
54 unsigned long size, len = 0;
55 struct page *page;
56 void *map;
57 int ret;
58
59 do {
60 ret = __get_user_pages_fast(addr, 1, 0, &page);
61 if (!ret)
62 break;
63
64 offset = addr & (PAGE_SIZE - 1);
65 size = min(PAGE_SIZE - offset, n - len);
66
67 map = kmap_atomic(page);
68 memcpy(to, map+offset, size);
69 kunmap_atomic(map);
70 put_page(page);
71 62
72 len += size; 63 EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
73 to += size; 64 EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
74 addr += size;
75 65
76 } while (len < n); 66 EXTRA_REG_MAX /* number of entries needed */
77 67};
78 return len;
79}
80 68
81struct event_constraint { 69struct event_constraint {
82 union { 70 union {
@@ -132,11 +120,10 @@ struct cpu_hw_events {
132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 120 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
133 121
134 /* 122 /*
135 * Intel percore register state. 123 * manage shared (per-core, per-cpu) registers
136 * Coordinate shared resources between HT threads. 124 * used on Intel NHM/WSM/SNB
137 */ 125 */
138 int percore_used; /* Used by this CPU? */ 126 struct intel_shared_regs *shared_regs;
139 struct intel_percore *per_core;
140 127
141 /* 128 /*
142 * AMD specific bits 129 * AMD specific bits
@@ -187,26 +174,45 @@ struct cpu_hw_events {
187 for ((e) = (c); (e)->weight; (e)++) 174 for ((e) = (c); (e)->weight; (e)++)
188 175
189/* 176/*
177 * Per register state.
178 */
179struct er_account {
180 raw_spinlock_t lock; /* per-core: protect structure */
181 u64 config; /* extra MSR config */
182 u64 reg; /* extra MSR number */
183 atomic_t ref; /* reference count */
184};
185
186/*
190 * Extra registers for specific events. 187 * Extra registers for specific events.
188 *
191 * Some events need large masks and require external MSRs. 189 * Some events need large masks and require external MSRs.
192 * Define a mapping to these extra registers. 190 * Those extra MSRs end up being shared for all events on
191 * a PMU and sometimes between PMU of sibling HT threads.
192 * In either case, the kernel needs to handle conflicting
193 * accesses to those extra, shared, regs. The data structure
194 * to manage those registers is stored in cpu_hw_event.
193 */ 195 */
194struct extra_reg { 196struct extra_reg {
195 unsigned int event; 197 unsigned int event;
196 unsigned int msr; 198 unsigned int msr;
197 u64 config_mask; 199 u64 config_mask;
198 u64 valid_mask; 200 u64 valid_mask;
201 int idx; /* per_xxx->regs[] reg index */
199}; 202};
200 203
201#define EVENT_EXTRA_REG(e, ms, m, vm) { \ 204#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
202 .event = (e), \ 205 .event = (e), \
203 .msr = (ms), \ 206 .msr = (ms), \
204 .config_mask = (m), \ 207 .config_mask = (m), \
205 .valid_mask = (vm), \ 208 .valid_mask = (vm), \
209 .idx = EXTRA_REG_##i \
206 } 210 }
207#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ 211
208 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) 212#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
209#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) 213 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)
214
215#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)
210 216
211union perf_capabilities { 217union perf_capabilities {
212 struct { 218 struct {
@@ -252,7 +258,6 @@ struct x86_pmu {
252 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 258 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
253 struct perf_event *event); 259 struct perf_event *event);
254 struct event_constraint *event_constraints; 260 struct event_constraint *event_constraints;
255 struct event_constraint *percore_constraints;
256 void (*quirks)(void); 261 void (*quirks)(void);
257 int perfctr_second_write; 262 int perfctr_second_write;
258 263
@@ -286,8 +291,12 @@ struct x86_pmu {
286 * Extra registers for events 291 * Extra registers for events
287 */ 292 */
288 struct extra_reg *extra_regs; 293 struct extra_reg *extra_regs;
294 unsigned int er_flags;
289}; 295};
290 296
297#define ERF_NO_HT_SHARING 1
298#define ERF_HAS_RSP_1 2
299
291static struct x86_pmu x86_pmu __read_mostly; 300static struct x86_pmu x86_pmu __read_mostly;
292 301
293static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 302static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
@@ -393,10 +402,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
393 */ 402 */
394static int x86_pmu_extra_regs(u64 config, struct perf_event *event) 403static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
395{ 404{
405 struct hw_perf_event_extra *reg;
396 struct extra_reg *er; 406 struct extra_reg *er;
397 407
398 event->hw.extra_reg = 0; 408 reg = &event->hw.extra_reg;
399 event->hw.extra_config = 0;
400 409
401 if (!x86_pmu.extra_regs) 410 if (!x86_pmu.extra_regs)
402 return 0; 411 return 0;
@@ -406,8 +415,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
406 continue; 415 continue;
407 if (event->attr.config1 & ~er->valid_mask) 416 if (event->attr.config1 & ~er->valid_mask)
408 return -EINVAL; 417 return -EINVAL;
409 event->hw.extra_reg = er->msr; 418
410 event->hw.extra_config = event->attr.config1; 419 reg->idx = er->idx;
420 reg->config = event->attr.config1;
421 reg->reg = er->msr;
411 break; 422 break;
412 } 423 }
413 return 0; 424 return 0;
@@ -706,6 +717,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
706 event->hw.last_cpu = -1; 717 event->hw.last_cpu = -1;
707 event->hw.last_tag = ~0ULL; 718 event->hw.last_tag = ~0ULL;
708 719
720 /* mark unused */
721 event->hw.extra_reg.idx = EXTRA_REG_NONE;
722
709 return x86_pmu.hw_config(event); 723 return x86_pmu.hw_config(event);
710} 724}
711 725
@@ -747,8 +761,8 @@ static void x86_pmu_disable(struct pmu *pmu)
747static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 761static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
748 u64 enable_mask) 762 u64 enable_mask)
749{ 763{
750 if (hwc->extra_reg) 764 if (hwc->extra_reg.reg)
751 wrmsrl(hwc->extra_reg, hwc->extra_config); 765 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
752 wrmsrl(hwc->config_base, hwc->config | enable_mask); 766 wrmsrl(hwc->config_base, hwc->config | enable_mask);
753} 767}
754 768
@@ -1332,7 +1346,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1332 if (!x86_perf_event_set_period(event)) 1346 if (!x86_perf_event_set_period(event))
1333 continue; 1347 continue;
1334 1348
1335 if (perf_event_overflow(event, 1, &data, regs)) 1349 if (perf_event_overflow(event, &data, regs))
1336 x86_pmu_stop(event, 0); 1350 x86_pmu_stop(event, 0);
1337 } 1351 }
1338 1352
@@ -1637,6 +1651,40 @@ static int x86_pmu_commit_txn(struct pmu *pmu)
1637 perf_pmu_enable(pmu); 1651 perf_pmu_enable(pmu);
1638 return 0; 1652 return 0;
1639} 1653}
1654/*
1655 * a fake_cpuc is used to validate event groups. Due to
1656 * the extra reg logic, we need to also allocate a fake
1657 * per_core and per_cpu structure. Otherwise, group events
1658 * using extra reg may conflict without the kernel being
1659 * able to catch this when the last event gets added to
1660 * the group.
1661 */
1662static void free_fake_cpuc(struct cpu_hw_events *cpuc)
1663{
1664 kfree(cpuc->shared_regs);
1665 kfree(cpuc);
1666}
1667
1668static struct cpu_hw_events *allocate_fake_cpuc(void)
1669{
1670 struct cpu_hw_events *cpuc;
1671 int cpu = raw_smp_processor_id();
1672
1673 cpuc = kzalloc(sizeof(*cpuc), GFP_KERNEL);
1674 if (!cpuc)
1675 return ERR_PTR(-ENOMEM);
1676
1677 /* only needed, if we have extra_regs */
1678 if (x86_pmu.extra_regs) {
1679 cpuc->shared_regs = allocate_shared_regs(cpu);
1680 if (!cpuc->shared_regs)
1681 goto error;
1682 }
1683 return cpuc;
1684error:
1685 free_fake_cpuc(cpuc);
1686 return ERR_PTR(-ENOMEM);
1687}
1640 1688
1641/* 1689/*
1642 * validate that we can schedule this event 1690 * validate that we can schedule this event
@@ -1647,9 +1695,9 @@ static int validate_event(struct perf_event *event)
1647 struct event_constraint *c; 1695 struct event_constraint *c;
1648 int ret = 0; 1696 int ret = 0;
1649 1697
1650 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); 1698 fake_cpuc = allocate_fake_cpuc();
1651 if (!fake_cpuc) 1699 if (IS_ERR(fake_cpuc))
1652 return -ENOMEM; 1700 return PTR_ERR(fake_cpuc);
1653 1701
1654 c = x86_pmu.get_event_constraints(fake_cpuc, event); 1702 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1655 1703
@@ -1659,7 +1707,7 @@ static int validate_event(struct perf_event *event)
1659 if (x86_pmu.put_event_constraints) 1707 if (x86_pmu.put_event_constraints)
1660 x86_pmu.put_event_constraints(fake_cpuc, event); 1708 x86_pmu.put_event_constraints(fake_cpuc, event);
1661 1709
1662 kfree(fake_cpuc); 1710 free_fake_cpuc(fake_cpuc);
1663 1711
1664 return ret; 1712 return ret;
1665} 1713}
@@ -1679,36 +1727,32 @@ static int validate_group(struct perf_event *event)
1679{ 1727{
1680 struct perf_event *leader = event->group_leader; 1728 struct perf_event *leader = event->group_leader;
1681 struct cpu_hw_events *fake_cpuc; 1729 struct cpu_hw_events *fake_cpuc;
1682 int ret, n; 1730 int ret = -ENOSPC, n;
1683
1684 ret = -ENOMEM;
1685 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1686 if (!fake_cpuc)
1687 goto out;
1688 1731
1732 fake_cpuc = allocate_fake_cpuc();
1733 if (IS_ERR(fake_cpuc))
1734 return PTR_ERR(fake_cpuc);
1689 /* 1735 /*
1690 * the event is not yet connected with its 1736 * the event is not yet connected with its
1691 * siblings therefore we must first collect 1737 * siblings therefore we must first collect
1692 * existing siblings, then add the new event 1738 * existing siblings, then add the new event
1693 * before we can simulate the scheduling 1739 * before we can simulate the scheduling
1694 */ 1740 */
1695 ret = -ENOSPC;
1696 n = collect_events(fake_cpuc, leader, true); 1741 n = collect_events(fake_cpuc, leader, true);
1697 if (n < 0) 1742 if (n < 0)
1698 goto out_free; 1743 goto out;
1699 1744
1700 fake_cpuc->n_events = n; 1745 fake_cpuc->n_events = n;
1701 n = collect_events(fake_cpuc, event, false); 1746 n = collect_events(fake_cpuc, event, false);
1702 if (n < 0) 1747 if (n < 0)
1703 goto out_free; 1748 goto out;
1704 1749
1705 fake_cpuc->n_events = n; 1750 fake_cpuc->n_events = n;
1706 1751
1707 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); 1752 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1708 1753
1709out_free:
1710 kfree(fake_cpuc);
1711out: 1754out:
1755 free_fake_cpuc(fake_cpuc);
1712 return ret; 1756 return ret;
1713} 1757}
1714 1758
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index fe29c1d2219e..941caa2e449b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -89,6 +89,20 @@ static __initconst const u64 amd_hw_cache_event_ids
89 [ C(RESULT_MISS) ] = -1, 89 [ C(RESULT_MISS) ] = -1,
90 }, 90 },
91 }, 91 },
92 [ C(NODE) ] = {
93 [ C(OP_READ) ] = {
94 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */
95 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */
96 },
97 [ C(OP_WRITE) ] = {
98 [ C(RESULT_ACCESS) ] = -1,
99 [ C(RESULT_MISS) ] = -1,
100 },
101 [ C(OP_PREFETCH) ] = {
102 [ C(RESULT_ACCESS) ] = -1,
103 [ C(RESULT_MISS) ] = -1,
104 },
105 },
92}; 106};
93 107
94/* 108/*
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 41178c826c48..45fbb8f7f549 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,25 +1,15 @@
1#ifdef CONFIG_CPU_SUP_INTEL 1#ifdef CONFIG_CPU_SUP_INTEL
2 2
3#define MAX_EXTRA_REGS 2
4
5/*
6 * Per register state.
7 */
8struct er_account {
9 int ref; /* reference count */
10 unsigned int extra_reg; /* extra MSR number */
11 u64 extra_config; /* extra MSR config */
12};
13
14/* 3/*
15 * Per core state 4 * Per core/cpu state
16 * This used to coordinate shared registers for HT threads. 5 *
6 * Used to coordinate shared registers between HT threads or
7 * among events on a single PMU.
17 */ 8 */
18struct intel_percore { 9struct intel_shared_regs {
19 raw_spinlock_t lock; /* protect structure */ 10 struct er_account regs[EXTRA_REG_MAX];
20 struct er_account regs[MAX_EXTRA_REGS]; 11 int refcnt; /* per-core: #HT threads */
21 int refcnt; /* number of threads */ 12 unsigned core_id; /* per-core: core id */
22 unsigned core_id;
23}; 13};
24 14
25/* 15/*
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
88 78
89static struct extra_reg intel_nehalem_extra_regs[] __read_mostly = 79static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
90{ 80{
91 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 81 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
92 EVENT_EXTRA_END 82 EVENT_EXTRA_END
93}; 83};
94 84
95static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
96{
97 INTEL_EVENT_CONSTRAINT(0xb7, 0),
98 EVENT_CONSTRAINT_END
99};
100
101static struct event_constraint intel_westmere_event_constraints[] __read_mostly = 85static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
102{ 86{
103 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ 87 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -116,8 +100,6 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
116 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ 100 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
117 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ 101 /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
118 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ 102 INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
119 INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
120 INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
121 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 103 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
122 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ 104 INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
123 EVENT_CONSTRAINT_END 105 EVENT_CONSTRAINT_END
@@ -125,15 +107,13 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
125 107
126static struct extra_reg intel_westmere_extra_regs[] __read_mostly = 108static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
127{ 109{
128 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), 110 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
129 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), 111 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
130 EVENT_EXTRA_END 112 EVENT_EXTRA_END
131}; 113};
132 114
133static struct event_constraint intel_westmere_percore_constraints[] __read_mostly = 115static struct event_constraint intel_v1_event_constraints[] __read_mostly =
134{ 116{
135 INTEL_EVENT_CONSTRAINT(0xb7, 0),
136 INTEL_EVENT_CONSTRAINT(0xbb, 0),
137 EVENT_CONSTRAINT_END 117 EVENT_CONSTRAINT_END
138}; 118};
139 119
@@ -145,6 +125,12 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
145 EVENT_CONSTRAINT_END 125 EVENT_CONSTRAINT_END
146}; 126};
147 127
128static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
129 INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
130 INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
131 EVENT_EXTRA_END
132};
133
148static u64 intel_pmu_event_map(int hw_event) 134static u64 intel_pmu_event_map(int hw_event)
149{ 135{
150 return intel_perfmon_event_map[hw_event]; 136 return intel_perfmon_event_map[hw_event];
@@ -245,6 +231,21 @@ static __initconst const u64 snb_hw_cache_event_ids
245 [ C(RESULT_MISS) ] = -1, 231 [ C(RESULT_MISS) ] = -1,
246 }, 232 },
247 }, 233 },
234 [ C(NODE) ] = {
235 [ C(OP_READ) ] = {
236 [ C(RESULT_ACCESS) ] = -1,
237 [ C(RESULT_MISS) ] = -1,
238 },
239 [ C(OP_WRITE) ] = {
240 [ C(RESULT_ACCESS) ] = -1,
241 [ C(RESULT_MISS) ] = -1,
242 },
243 [ C(OP_PREFETCH) ] = {
244 [ C(RESULT_ACCESS) ] = -1,
245 [ C(RESULT_MISS) ] = -1,
246 },
247 },
248
248}; 249};
249 250
250static __initconst const u64 westmere_hw_cache_event_ids 251static __initconst const u64 westmere_hw_cache_event_ids
@@ -346,6 +347,20 @@ static __initconst const u64 westmere_hw_cache_event_ids
346 [ C(RESULT_MISS) ] = -1, 347 [ C(RESULT_MISS) ] = -1,
347 }, 348 },
348 }, 349 },
350 [ C(NODE) ] = {
351 [ C(OP_READ) ] = {
352 [ C(RESULT_ACCESS) ] = 0x01b7,
353 [ C(RESULT_MISS) ] = 0x01b7,
354 },
355 [ C(OP_WRITE) ] = {
356 [ C(RESULT_ACCESS) ] = 0x01b7,
357 [ C(RESULT_MISS) ] = 0x01b7,
358 },
359 [ C(OP_PREFETCH) ] = {
360 [ C(RESULT_ACCESS) ] = 0x01b7,
361 [ C(RESULT_MISS) ] = 0x01b7,
362 },
363 },
349}; 364};
350 365
351/* 366/*
@@ -398,7 +413,21 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
398 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS, 413 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
399 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS, 414 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
400 }, 415 },
401 } 416 },
417 [ C(NODE) ] = {
418 [ C(OP_READ) ] = {
419 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
420 [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
421 },
422 [ C(OP_WRITE) ] = {
423 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
424 [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
425 },
426 [ C(OP_PREFETCH) ] = {
427 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
428 [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
429 },
430 },
402}; 431};
403 432
404static __initconst const u64 nehalem_hw_cache_event_ids 433static __initconst const u64 nehalem_hw_cache_event_ids
@@ -500,6 +529,20 @@ static __initconst const u64 nehalem_hw_cache_event_ids
500 [ C(RESULT_MISS) ] = -1, 529 [ C(RESULT_MISS) ] = -1,
501 }, 530 },
502 }, 531 },
532 [ C(NODE) ] = {
533 [ C(OP_READ) ] = {
534 [ C(RESULT_ACCESS) ] = 0x01b7,
535 [ C(RESULT_MISS) ] = 0x01b7,
536 },
537 [ C(OP_WRITE) ] = {
538 [ C(RESULT_ACCESS) ] = 0x01b7,
539 [ C(RESULT_MISS) ] = 0x01b7,
540 },
541 [ C(OP_PREFETCH) ] = {
542 [ C(RESULT_ACCESS) ] = 0x01b7,
543 [ C(RESULT_MISS) ] = 0x01b7,
544 },
545 },
503}; 546};
504 547
505static __initconst const u64 core2_hw_cache_event_ids 548static __initconst const u64 core2_hw_cache_event_ids
@@ -1003,7 +1046,7 @@ again:
1003 1046
1004 data.period = event->hw.last_period; 1047 data.period = event->hw.last_period;
1005 1048
1006 if (perf_event_overflow(event, 1, &data, regs)) 1049 if (perf_event_overflow(event, &data, regs))
1007 x86_pmu_stop(event, 0); 1050 x86_pmu_stop(event, 0);
1008 } 1051 }
1009 1052
@@ -1037,65 +1080,121 @@ intel_bts_constraints(struct perf_event *event)
1037 return NULL; 1080 return NULL;
1038} 1081}
1039 1082
1083static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
1084{
1085 if (!(x86_pmu.er_flags & ERF_HAS_RSP_1))
1086 return false;
1087
1088 if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) {
1089 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1090 event->hw.config |= 0x01bb;
1091 event->hw.extra_reg.idx = EXTRA_REG_RSP_1;
1092 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
1093 } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) {
1094 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
1095 event->hw.config |= 0x01b7;
1096 event->hw.extra_reg.idx = EXTRA_REG_RSP_0;
1097 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
1098 }
1099
1100 if (event->hw.extra_reg.idx == orig_idx)
1101 return false;
1102
1103 return true;
1104}
1105
1106/*
1107 * manage allocation of shared extra msr for certain events
1108 *
1109 * sharing can be:
1110 * per-cpu: to be shared between the various events on a single PMU
1111 * per-core: per-cpu + shared by HT threads
1112 */
1040static struct event_constraint * 1113static struct event_constraint *
1041intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) 1114__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
1115 struct perf_event *event)
1042{ 1116{
1043 struct hw_perf_event *hwc = &event->hw; 1117 struct event_constraint *c = &emptyconstraint;
1044 unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; 1118 struct hw_perf_event_extra *reg = &event->hw.extra_reg;
1045 struct event_constraint *c;
1046 struct intel_percore *pc;
1047 struct er_account *era; 1119 struct er_account *era;
1048 int i; 1120 unsigned long flags;
1049 int free_slot; 1121 int orig_idx = reg->idx;
1050 int found;
1051 1122
1052 if (!x86_pmu.percore_constraints || hwc->extra_alloc) 1123 /* already allocated shared msr */
1053 return NULL; 1124 if (reg->alloc)
1125 return &unconstrained;
1054 1126
1055 for (c = x86_pmu.percore_constraints; c->cmask; c++) { 1127again:
1056 if (e != c->code) 1128 era = &cpuc->shared_regs->regs[reg->idx];
1057 continue; 1129 /*
1130 * we use spin_lock_irqsave() to avoid lockdep issues when
1131 * passing a fake cpuc
1132 */
1133 raw_spin_lock_irqsave(&era->lock, flags);
1134
1135 if (!atomic_read(&era->ref) || era->config == reg->config) {
1136
1137 /* lock in msr value */
1138 era->config = reg->config;
1139 era->reg = reg->reg;
1140
1141 /* one more user */
1142 atomic_inc(&era->ref);
1143
1144 /* no need to reallocate during incremental event scheduling */
1145 reg->alloc = 1;
1058 1146
1059 /* 1147 /*
1060 * Allocate resource per core. 1148 * All events using extra_reg are unconstrained.
1149 * Avoids calling x86_get_event_constraints()
1150 *
1151 * Must revisit if extra_reg controlling events
1152 * ever have constraints. Worst case we go through
1153 * the regular event constraint table.
1061 */ 1154 */
1062 pc = cpuc->per_core; 1155 c = &unconstrained;
1063 if (!pc) 1156 } else if (intel_try_alt_er(event, orig_idx)) {
1064 break; 1157 raw_spin_unlock(&era->lock);
1065 c = &emptyconstraint; 1158 goto again;
1066 raw_spin_lock(&pc->lock);
1067 free_slot = -1;
1068 found = 0;
1069 for (i = 0; i < MAX_EXTRA_REGS; i++) {
1070 era = &pc->regs[i];
1071 if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
1072 /* Allow sharing same config */
1073 if (hwc->extra_config == era->extra_config) {
1074 era->ref++;
1075 cpuc->percore_used = 1;
1076 hwc->extra_alloc = 1;
1077 c = NULL;
1078 }
1079 /* else conflict */
1080 found = 1;
1081 break;
1082 } else if (era->ref == 0 && free_slot == -1)
1083 free_slot = i;
1084 }
1085 if (!found && free_slot != -1) {
1086 era = &pc->regs[free_slot];
1087 era->ref = 1;
1088 era->extra_reg = hwc->extra_reg;
1089 era->extra_config = hwc->extra_config;
1090 cpuc->percore_used = 1;
1091 hwc->extra_alloc = 1;
1092 c = NULL;
1093 }
1094 raw_spin_unlock(&pc->lock);
1095 return c;
1096 } 1159 }
1160 raw_spin_unlock_irqrestore(&era->lock, flags);
1097 1161
1098 return NULL; 1162 return c;
1163}
1164
1165static void
1166__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
1167 struct hw_perf_event_extra *reg)
1168{
1169 struct er_account *era;
1170
1171 /*
1172 * only put constraint if extra reg was actually
1173 * allocated. Also takes care of event which do
1174 * not use an extra shared reg
1175 */
1176 if (!reg->alloc)
1177 return;
1178
1179 era = &cpuc->shared_regs->regs[reg->idx];
1180
1181 /* one fewer user */
1182 atomic_dec(&era->ref);
1183
1184 /* allocate again next time */
1185 reg->alloc = 0;
1186}
1187
1188static struct event_constraint *
1189intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
1190 struct perf_event *event)
1191{
1192 struct event_constraint *c = NULL;
1193
1194 if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
1195 c = __intel_shared_reg_get_constraints(cpuc, event);
1196
1197 return c;
1099} 1198}
1100 1199
1101static struct event_constraint * 1200static struct event_constraint *
@@ -1111,49 +1210,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
1111 if (c) 1210 if (c)
1112 return c; 1211 return c;
1113 1212
1114 c = intel_percore_constraints(cpuc, event); 1213 c = intel_shared_regs_constraints(cpuc, event);
1115 if (c) 1214 if (c)
1116 return c; 1215 return c;
1117 1216
1118 return x86_get_event_constraints(cpuc, event); 1217 return x86_get_event_constraints(cpuc, event);
1119} 1218}
1120 1219
1121static void intel_put_event_constraints(struct cpu_hw_events *cpuc, 1220static void
1221intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
1122 struct perf_event *event) 1222 struct perf_event *event)
1123{ 1223{
1124 struct extra_reg *er; 1224 struct hw_perf_event_extra *reg;
1125 struct intel_percore *pc;
1126 struct er_account *era;
1127 struct hw_perf_event *hwc = &event->hw;
1128 int i, allref;
1129 1225
1130 if (!cpuc->percore_used) 1226 reg = &event->hw.extra_reg;
1131 return; 1227 if (reg->idx != EXTRA_REG_NONE)
1132 1228 __intel_shared_reg_put_constraints(cpuc, reg);
1133 for (er = x86_pmu.extra_regs; er->msr; er++) { 1229}
1134 if (er->event != (hwc->config & er->config_mask))
1135 continue;
1136 1230
1137 pc = cpuc->per_core; 1231static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
1138 raw_spin_lock(&pc->lock); 1232 struct perf_event *event)
1139 for (i = 0; i < MAX_EXTRA_REGS; i++) { 1233{
1140 era = &pc->regs[i]; 1234 intel_put_shared_regs_event_constraints(cpuc, event);
1141 if (era->ref > 0 &&
1142 era->extra_config == hwc->extra_config &&
1143 era->extra_reg == er->msr) {
1144 era->ref--;
1145 hwc->extra_alloc = 0;
1146 break;
1147 }
1148 }
1149 allref = 0;
1150 for (i = 0; i < MAX_EXTRA_REGS; i++)
1151 allref += pc->regs[i].ref;
1152 if (allref == 0)
1153 cpuc->percore_used = 0;
1154 raw_spin_unlock(&pc->lock);
1155 break;
1156 }
1157} 1235}
1158 1236
1159static int intel_pmu_hw_config(struct perf_event *event) 1237static int intel_pmu_hw_config(struct perf_event *event)
@@ -1231,20 +1309,36 @@ static __initconst const struct x86_pmu core_pmu = {
1231 .event_constraints = intel_core_event_constraints, 1309 .event_constraints = intel_core_event_constraints,
1232}; 1310};
1233 1311
1312static struct intel_shared_regs *allocate_shared_regs(int cpu)
1313{
1314 struct intel_shared_regs *regs;
1315 int i;
1316
1317 regs = kzalloc_node(sizeof(struct intel_shared_regs),
1318 GFP_KERNEL, cpu_to_node(cpu));
1319 if (regs) {
1320 /*
1321 * initialize the locks to keep lockdep happy
1322 */
1323 for (i = 0; i < EXTRA_REG_MAX; i++)
1324 raw_spin_lock_init(&regs->regs[i].lock);
1325
1326 regs->core_id = -1;
1327 }
1328 return regs;
1329}
1330
1234static int intel_pmu_cpu_prepare(int cpu) 1331static int intel_pmu_cpu_prepare(int cpu)
1235{ 1332{
1236 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1333 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1237 1334
1238 if (!cpu_has_ht_siblings()) 1335 if (!x86_pmu.extra_regs)
1239 return NOTIFY_OK; 1336 return NOTIFY_OK;
1240 1337
1241 cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), 1338 cpuc->shared_regs = allocate_shared_regs(cpu);
1242 GFP_KERNEL, cpu_to_node(cpu)); 1339 if (!cpuc->shared_regs)
1243 if (!cpuc->per_core)
1244 return NOTIFY_BAD; 1340 return NOTIFY_BAD;
1245 1341
1246 raw_spin_lock_init(&cpuc->per_core->lock);
1247 cpuc->per_core->core_id = -1;
1248 return NOTIFY_OK; 1342 return NOTIFY_OK;
1249} 1343}
1250 1344
@@ -1260,32 +1354,34 @@ static void intel_pmu_cpu_starting(int cpu)
1260 */ 1354 */
1261 intel_pmu_lbr_reset(); 1355 intel_pmu_lbr_reset();
1262 1356
1263 if (!cpu_has_ht_siblings()) 1357 if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
1264 return; 1358 return;
1265 1359
1266 for_each_cpu(i, topology_thread_cpumask(cpu)) { 1360 for_each_cpu(i, topology_thread_cpumask(cpu)) {
1267 struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; 1361 struct intel_shared_regs *pc;
1268 1362
1363 pc = per_cpu(cpu_hw_events, i).shared_regs;
1269 if (pc && pc->core_id == core_id) { 1364 if (pc && pc->core_id == core_id) {
1270 kfree(cpuc->per_core); 1365 kfree(cpuc->shared_regs);
1271 cpuc->per_core = pc; 1366 cpuc->shared_regs = pc;
1272 break; 1367 break;
1273 } 1368 }
1274 } 1369 }
1275 1370
1276 cpuc->per_core->core_id = core_id; 1371 cpuc->shared_regs->core_id = core_id;
1277 cpuc->per_core->refcnt++; 1372 cpuc->shared_regs->refcnt++;
1278} 1373}
1279 1374
1280static void intel_pmu_cpu_dying(int cpu) 1375static void intel_pmu_cpu_dying(int cpu)
1281{ 1376{
1282 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 1377 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
1283 struct intel_percore *pc = cpuc->per_core; 1378 struct intel_shared_regs *pc;
1284 1379
1380 pc = cpuc->shared_regs;
1285 if (pc) { 1381 if (pc) {
1286 if (pc->core_id == -1 || --pc->refcnt == 0) 1382 if (pc->core_id == -1 || --pc->refcnt == 0)
1287 kfree(pc); 1383 kfree(pc);
1288 cpuc->per_core = NULL; 1384 cpuc->shared_regs = NULL;
1289 } 1385 }
1290 1386
1291 fini_debug_store_on_cpu(cpu); 1387 fini_debug_store_on_cpu(cpu);
@@ -1436,7 +1532,6 @@ static __init int intel_pmu_init(void)
1436 1532
1437 x86_pmu.event_constraints = intel_nehalem_event_constraints; 1533 x86_pmu.event_constraints = intel_nehalem_event_constraints;
1438 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; 1534 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
1439 x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
1440 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1535 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1441 x86_pmu.extra_regs = intel_nehalem_extra_regs; 1536 x86_pmu.extra_regs = intel_nehalem_extra_regs;
1442 1537
@@ -1481,10 +1576,10 @@ static __init int intel_pmu_init(void)
1481 intel_pmu_lbr_init_nhm(); 1576 intel_pmu_lbr_init_nhm();
1482 1577
1483 x86_pmu.event_constraints = intel_westmere_event_constraints; 1578 x86_pmu.event_constraints = intel_westmere_event_constraints;
1484 x86_pmu.percore_constraints = intel_westmere_percore_constraints;
1485 x86_pmu.enable_all = intel_pmu_nhm_enable_all; 1579 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
1486 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; 1580 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
1487 x86_pmu.extra_regs = intel_westmere_extra_regs; 1581 x86_pmu.extra_regs = intel_westmere_extra_regs;
1582 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1488 1583
1489 /* UOPS_ISSUED.STALLED_CYCLES */ 1584 /* UOPS_ISSUED.STALLED_CYCLES */
1490 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1585 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1502,6 +1597,10 @@ static __init int intel_pmu_init(void)
1502 1597
1503 x86_pmu.event_constraints = intel_snb_event_constraints; 1598 x86_pmu.event_constraints = intel_snb_event_constraints;
1504 x86_pmu.pebs_constraints = intel_snb_pebs_events; 1599 x86_pmu.pebs_constraints = intel_snb_pebs_events;
1600 x86_pmu.extra_regs = intel_snb_extra_regs;
1601 /* all extra regs are per-cpu when HT is on */
1602 x86_pmu.er_flags |= ERF_HAS_RSP_1;
1603 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
1505 1604
1506 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ 1605 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
1507 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; 1606 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
@@ -1512,11 +1611,19 @@ static __init int intel_pmu_init(void)
1512 break; 1611 break;
1513 1612
1514 default: 1613 default:
1515 /* 1614 switch (x86_pmu.version) {
1516 * default constraints for v2 and up 1615 case 1:
1517 */ 1616 x86_pmu.event_constraints = intel_v1_event_constraints;
1518 x86_pmu.event_constraints = intel_gen_event_constraints; 1617 pr_cont("generic architected perfmon v1, ");
1519 pr_cont("generic architected perfmon, "); 1618 break;
1619 default:
1620 /*
1621 * default constraints for v2 and up
1622 */
1623 x86_pmu.event_constraints = intel_gen_event_constraints;
1624 pr_cont("generic architected perfmon, ");
1625 break;
1626 }
1520 } 1627 }
1521 return 0; 1628 return 0;
1522} 1629}
@@ -1528,4 +1635,8 @@ static int intel_pmu_init(void)
1528 return 0; 1635 return 0;
1529} 1636}
1530 1637
1638static struct intel_shared_regs *allocate_shared_regs(int cpu)
1639{
1640 return NULL;
1641}
1531#endif /* CONFIG_CPU_SUP_INTEL */ 1642#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index bab491b8ee25..1b1ef3addcfd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -340,7 +340,7 @@ static int intel_pmu_drain_bts_buffer(void)
340 */ 340 */
341 perf_prepare_sample(&header, &data, event, &regs); 341 perf_prepare_sample(&header, &data, event, &regs);
342 342
343 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) 343 if (perf_output_begin(&handle, event, header.size * (top - at)))
344 return 1; 344 return 1;
345 345
346 for (; at < top; at++) { 346 for (; at < top; at++) {
@@ -616,7 +616,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
616 else 616 else
617 regs.flags &= ~PERF_EFLAGS_EXACT; 617 regs.flags &= ~PERF_EFLAGS_EXACT;
618 618
619 if (perf_event_overflow(event, 1, &data, &regs)) 619 if (perf_event_overflow(event, &data, &regs))
620 x86_pmu_stop(event, 0); 620 x86_pmu_stop(event, 0);
621} 621}
622 622
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ead584fb6a7d..7809d2bcb209 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -554,13 +554,102 @@ static __initconst const u64 p4_hw_cache_event_ids
554 [ C(RESULT_MISS) ] = -1, 554 [ C(RESULT_MISS) ] = -1,
555 }, 555 },
556 }, 556 },
557 [ C(NODE) ] = {
558 [ C(OP_READ) ] = {
559 [ C(RESULT_ACCESS) ] = -1,
560 [ C(RESULT_MISS) ] = -1,
561 },
562 [ C(OP_WRITE) ] = {
563 [ C(RESULT_ACCESS) ] = -1,
564 [ C(RESULT_MISS) ] = -1,
565 },
566 [ C(OP_PREFETCH) ] = {
567 [ C(RESULT_ACCESS) ] = -1,
568 [ C(RESULT_MISS) ] = -1,
569 },
570 },
557}; 571};
558 572
573/*
574 * Because of Netburst being quite restricted in how many
575 * identical events may run simultaneously, we introduce event aliases,
576 * ie the different events which have the same functionality but
577 * utilize non-intersected resources (ESCR/CCCR/counter registers).
578 *
579 * This allow us to relax restrictions a bit and run two or more
580 * identical events together.
581 *
582 * Never set any custom internal bits such as P4_CONFIG_HT,
583 * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
584 * either up to date automatically or not applicable at all.
585 */
586struct p4_event_alias {
587 u64 original;
588 u64 alternative;
589} p4_event_aliases[] = {
590 {
591 /*
592 * Non-halted cycles can be substituted with non-sleeping cycles (see
593 * Intel SDM Vol3b for details). We need this alias to be able
594 * to run nmi-watchdog and 'perf top' (or any other user space tool
595 * which is interested in running PERF_COUNT_HW_CPU_CYCLES)
596 * simultaneously.
597 */
598 .original =
599 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
600 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
601 .alternative =
602 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
603 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
604 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
605 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
606 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
607 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
608 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
609 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
610 P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
611 p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
612 P4_CCCR_COMPARE),
613 },
614};
615
616static u64 p4_get_alias_event(u64 config)
617{
618 u64 config_match;
619 int i;
620
621 /*
622 * Only event with special mark is allowed,
623 * we're to be sure it didn't come as malformed
624 * RAW event.
625 */
626 if (!(config & P4_CONFIG_ALIASABLE))
627 return 0;
628
629 config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
630
631 for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
632 if (config_match == p4_event_aliases[i].original) {
633 config_match = p4_event_aliases[i].alternative;
634 break;
635 } else if (config_match == p4_event_aliases[i].alternative) {
636 config_match = p4_event_aliases[i].original;
637 break;
638 }
639 }
640
641 if (i >= ARRAY_SIZE(p4_event_aliases))
642 return 0;
643
644 return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
645}
646
559static u64 p4_general_events[PERF_COUNT_HW_MAX] = { 647static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
560 /* non-halted CPU clocks */ 648 /* non-halted CPU clocks */
561 [PERF_COUNT_HW_CPU_CYCLES] = 649 [PERF_COUNT_HW_CPU_CYCLES] =
562 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | 650 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
563 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), 651 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
652 P4_CONFIG_ALIASABLE,
564 653
565 /* 654 /*
566 * retired instructions 655 * retired instructions
@@ -945,7 +1034,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
945 1034
946 if (!x86_perf_event_set_period(event)) 1035 if (!x86_perf_event_set_period(event))
947 continue; 1036 continue;
948 if (perf_event_overflow(event, 1, &data, regs)) 1037 if (perf_event_overflow(event, &data, regs))
949 x86_pmu_stop(event, 0); 1038 x86_pmu_stop(event, 0);
950 } 1039 }
951 1040
@@ -1120,6 +1209,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1120 struct p4_event_bind *bind; 1209 struct p4_event_bind *bind;
1121 unsigned int i, thread, num; 1210 unsigned int i, thread, num;
1122 int cntr_idx, escr_idx; 1211 int cntr_idx, escr_idx;
1212 u64 config_alias;
1213 int pass;
1123 1214
1124 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 1215 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
1125 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); 1216 bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
@@ -1128,6 +1219,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1128 1219
1129 hwc = &cpuc->event_list[i]->hw; 1220 hwc = &cpuc->event_list[i]->hw;
1130 thread = p4_ht_thread(cpu); 1221 thread = p4_ht_thread(cpu);
1222 pass = 0;
1223
1224again:
1225 /*
1226 * It's possible to hit a circular lock
1227 * between original and alternative events
1228 * if both are scheduled already.
1229 */
1230 if (pass > 2)
1231 goto done;
1232
1131 bind = p4_config_get_bind(hwc->config); 1233 bind = p4_config_get_bind(hwc->config);
1132 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); 1234 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
1133 if (unlikely(escr_idx == -1)) 1235 if (unlikely(escr_idx == -1))
@@ -1141,8 +1243,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
1141 } 1243 }
1142 1244
1143 cntr_idx = p4_next_cntr(thread, used_mask, bind); 1245 cntr_idx = p4_next_cntr(thread, used_mask, bind);
1144 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) 1246 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
1145 goto done; 1247 /*
1248 * Check whether an event alias is still available.
1249 */
1250 config_alias = p4_get_alias_event(hwc->config);
1251 if (!config_alias)
1252 goto done;
1253 hwc->config = config_alias;
1254 pass++;
1255 goto again;
1256 }
1146 1257
1147 p4_pmu_swap_config_ts(hwc, cpu); 1258 p4_pmu_swap_config_ts(hwc, cpu);
1148 if (assign) 1259 if (assign)
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index e71c98d3c0d2..19853ad8afc5 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -105,34 +105,6 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
105} 105}
106 106
107/* 107/*
108 * We are returning from the irq stack and go to the previous one.
109 * If the previous stack is also in the irq stack, then bp in the first
110 * frame of the irq stack points to the previous, interrupted one.
111 * Otherwise we have another level of indirection: We first save
112 * the bp of the previous stack, then we switch the stack to the irq one
113 * and save a new bp that links to the previous one.
114 * (See save_args())
115 */
116static inline unsigned long
117fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
118 unsigned long *irq_stack, unsigned long *irq_stack_end)
119{
120#ifdef CONFIG_FRAME_POINTER
121 struct stack_frame *frame = (struct stack_frame *)bp;
122 unsigned long next;
123
124 if (!in_irq_stack(stack, irq_stack, irq_stack_end)) {
125 if (!probe_kernel_address(&frame->next_frame, next))
126 return next;
127 else
128 WARN_ONCE(1, "Perf: bad frame pointer = %p in "
129 "callchain\n", &frame->next_frame);
130 }
131#endif
132 return bp;
133}
134
135/*
136 * x86-64 can have up to three kernel stacks: 108 * x86-64 can have up to three kernel stacks:
137 * process stack 109 * process stack
138 * interrupt stack 110 * interrupt stack
@@ -155,9 +127,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
155 task = current; 127 task = current;
156 128
157 if (!stack) { 129 if (!stack) {
158 stack = &dummy; 130 if (regs)
159 if (task && task != current) 131 stack = (unsigned long *)regs->sp;
132 else if (task && task != current)
160 stack = (unsigned long *)task->thread.sp; 133 stack = (unsigned long *)task->thread.sp;
134 else
135 stack = &dummy;
161 } 136 }
162 137
163 if (!bp) 138 if (!bp)
@@ -205,8 +180,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
205 * pointer (index -1 to end) in the IRQ stack: 180 * pointer (index -1 to end) in the IRQ stack:
206 */ 181 */
207 stack = (unsigned long *) (irq_stack_end[-1]); 182 stack = (unsigned long *) (irq_stack_end[-1]);
208 bp = fixup_bp_irq_link(bp, stack, irq_stack,
209 irq_stack_end);
210 irq_stack_end = NULL; 183 irq_stack_end = NULL;
211 ops->stack(data, "EOI"); 184 ops->stack(data, "EOI");
212 continue; 185 continue;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8a445a0c989e..d656f68371a4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -297,27 +297,26 @@ ENDPROC(native_usergs_sysret64)
297 .endm 297 .endm
298 298
299/* save partial stack frame */ 299/* save partial stack frame */
300 .pushsection .kprobes.text, "ax" 300 .macro SAVE_ARGS_IRQ
301ENTRY(save_args)
302 XCPT_FRAME
303 cld 301 cld
304 /* 302 /* start from rbp in pt_regs and jump over */
305 * start from rbp in pt_regs and jump over 303 movq_cfi rdi, RDI-RBP
306 * return address. 304 movq_cfi rsi, RSI-RBP
307 */ 305 movq_cfi rdx, RDX-RBP
308 movq_cfi rdi, RDI+8-RBP 306 movq_cfi rcx, RCX-RBP
309 movq_cfi rsi, RSI+8-RBP 307 movq_cfi rax, RAX-RBP
310 movq_cfi rdx, RDX+8-RBP 308 movq_cfi r8, R8-RBP
311 movq_cfi rcx, RCX+8-RBP 309 movq_cfi r9, R9-RBP
312 movq_cfi rax, RAX+8-RBP 310 movq_cfi r10, R10-RBP
313 movq_cfi r8, R8+8-RBP 311 movq_cfi r11, R11-RBP
314 movq_cfi r9, R9+8-RBP 312
315 movq_cfi r10, R10+8-RBP 313 /* Save rbp so that we can unwind from get_irq_regs() */
316 movq_cfi r11, R11+8-RBP 314 movq_cfi rbp, 0
317 315
318 leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ 316 /* Save previous stack value */
319 movq_cfi rbp, 8 /* push %rbp */ 317 movq %rsp, %rsi
320 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 318
319 leaq -RBP(%rsp),%rdi /* arg1 for handler */
321 testl $3, CS(%rdi) 320 testl $3, CS(%rdi)
322 je 1f 321 je 1f
323 SWAPGS 322 SWAPGS
@@ -329,19 +328,14 @@ ENTRY(save_args)
329 */ 328 */
3301: incl PER_CPU_VAR(irq_count) 3291: incl PER_CPU_VAR(irq_count)
331 jne 2f 330 jne 2f
332 popq_cfi %rax /* move return address... */
333 mov PER_CPU_VAR(irq_stack_ptr),%rsp 331 mov PER_CPU_VAR(irq_stack_ptr),%rsp
334 EMPTY_FRAME 0 332 EMPTY_FRAME 0
335 pushq_cfi %rbp /* backlink for unwinder */ 333
336 pushq_cfi %rax /* ... to the new stack */ 3342: /* Store previous stack value */
337 /* 335 pushq %rsi
338 * We entered an interrupt context - irqs are off: 336 /* We entered an interrupt context - irqs are off: */
339 */ 337 TRACE_IRQS_OFF
3402: TRACE_IRQS_OFF 338 .endm
341 ret
342 CFI_ENDPROC
343END(save_args)
344 .popsection
345 339
346ENTRY(save_rest) 340ENTRY(save_rest)
347 PARTIAL_FRAME 1 REST_SKIP+8 341 PARTIAL_FRAME 1 REST_SKIP+8
@@ -791,7 +785,7 @@ END(interrupt)
791 /* reserve pt_regs for scratch regs and rbp */ 785 /* reserve pt_regs for scratch regs and rbp */
792 subq $ORIG_RAX-RBP, %rsp 786 subq $ORIG_RAX-RBP, %rsp
793 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 787 CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
794 call save_args 788 SAVE_ARGS_IRQ
795 PARTIAL_FRAME 0 789 PARTIAL_FRAME 0
796 call \func 790 call \func
797 .endm 791 .endm
@@ -814,15 +808,14 @@ ret_from_intr:
814 DISABLE_INTERRUPTS(CLBR_NONE) 808 DISABLE_INTERRUPTS(CLBR_NONE)
815 TRACE_IRQS_OFF 809 TRACE_IRQS_OFF
816 decl PER_CPU_VAR(irq_count) 810 decl PER_CPU_VAR(irq_count)
817 leaveq
818 811
819 CFI_RESTORE rbp 812 /* Restore saved previous stack */
813 popq %rsi
814 leaq 16(%rsi), %rsp
815
820 CFI_DEF_CFA_REGISTER rsp 816 CFI_DEF_CFA_REGISTER rsp
821 CFI_ADJUST_CFA_OFFSET -8 817 CFI_ADJUST_CFA_OFFSET -16
822 818
823 /* we did not save rbx, restore only from ARGOFFSET */
824 addq $8, %rsp
825 CFI_ADJUST_CFA_OFFSET -8
826exit_intr: 819exit_intr:
827 GET_THREAD_INFO(%rcx) 820 GET_THREAD_INFO(%rcx)
828 testl $3,CS-ARGOFFSET(%rsp) 821 testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 5f9ecff328b5..00354d4919a9 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -608,7 +608,7 @@ int kgdb_arch_init(void)
608 return register_die_notifier(&kgdb_notifier); 608 return register_die_notifier(&kgdb_notifier);
609} 609}
610 610
611static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, 611static void kgdb_hw_overflow_handler(struct perf_event *event,
612 struct perf_sample_data *data, struct pt_regs *regs) 612 struct perf_sample_data *data, struct pt_regs *regs)
613{ 613{
614 struct task_struct *tsk = current; 614 struct task_struct *tsk = current;
@@ -638,7 +638,7 @@ void kgdb_arch_late(void)
638 for (i = 0; i < HBP_NUM; i++) { 638 for (i = 0; i < HBP_NUM; i++) {
639 if (breakinfo[i].pev) 639 if (breakinfo[i].pev)
640 continue; 640 continue;
641 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); 641 breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
642 if (IS_ERR((void * __force)breakinfo[i].pev)) { 642 if (IS_ERR((void * __force)breakinfo[i].pev)) {
643 printk(KERN_ERR "kgdb: Could not allocate hw" 643 printk(KERN_ERR "kgdb: Could not allocate hw"
644 "breakpoints\nDisabling the kernel debugger\n"); 644 "breakpoints\nDisabling the kernel debugger\n");
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 807c2a2b80f1..82528799c5de 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -528,7 +528,7 @@ static int genregs_set(struct task_struct *target,
528 return ret; 528 return ret;
529} 529}
530 530
531static void ptrace_triggered(struct perf_event *bp, int nmi, 531static void ptrace_triggered(struct perf_event *bp,
532 struct perf_sample_data *data, 532 struct perf_sample_data *data,
533 struct pt_regs *regs) 533 struct pt_regs *regs)
534{ 534{
@@ -715,7 +715,8 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
715 attr.bp_type = HW_BREAKPOINT_W; 715 attr.bp_type = HW_BREAKPOINT_W;
716 attr.disabled = 1; 716 attr.disabled = 1;
717 717
718 bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); 718 bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
719 NULL, tsk);
719 720
720 /* 721 /*
721 * CHECKME: the previous code returned -EIO if the addr wasn't 722 * CHECKME: the previous code returned -EIO if the addr wasn't
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 55d9bc03f696..fdd0c6430e5a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -66,7 +66,7 @@ void save_stack_trace(struct stack_trace *trace)
66} 66}
67EXPORT_SYMBOL_GPL(save_stack_trace); 67EXPORT_SYMBOL_GPL(save_stack_trace);
68 68
69void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) 69void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
70{ 70{
71 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); 71 dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
72 if (trace->nr_entries < trace->max_entries) 72 if (trace->nr_entries < trace->max_entries)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f2479f19ddde..6ba477342b8e 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
18 18
19lib-y := delay.o 19lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o 23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
24 24
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
new file mode 100644
index 000000000000..97be9cb54483
--- /dev/null
+++ b/arch/x86/lib/usercopy.c
@@ -0,0 +1,43 @@
1/*
2 * User address space access functions.
3 *
4 * For licencing details see kernel-base/COPYING
5 */
6
7#include <linux/highmem.h>
8#include <linux/module.h>
9
10/*
11 * best effort, GUP based copy_from_user() that is NMI-safe
12 */
13unsigned long
14copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
15{
16 unsigned long offset, addr = (unsigned long)from;
17 unsigned long size, len = 0;
18 struct page *page;
19 void *map;
20 int ret;
21
22 do {
23 ret = __get_user_pages_fast(addr, 1, 0, &page);
24 if (!ret)
25 break;
26
27 offset = addr & (PAGE_SIZE - 1);
28 size = min(PAGE_SIZE - offset, n - len);
29
30 map = kmap_atomic(page);
31 memcpy(to, map+offset, size);
32 kunmap_atomic(map);
33 put_page(page);
34
35 len += size;
36 to += size;
37 addr += size;
38
39 } while (len < n);
40
41 return len;
42}
43EXPORT_SYMBOL_GPL(copy_from_user_nmi);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2dbf6bf4c7e5..4d09df054e39 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1059,7 +1059,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
1059 if (unlikely(error_code & PF_RSVD)) 1059 if (unlikely(error_code & PF_RSVD))
1060 pgtable_bad(regs, error_code, address); 1060 pgtable_bad(regs, error_code, address);
1061 1061
1062 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); 1062 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1063 1063
1064 /* 1064 /*
1065 * If we're in an interrupt, have no user context or are running 1065 * If we're in an interrupt, have no user context or are running
@@ -1161,11 +1161,11 @@ good_area:
1161 if (flags & FAULT_FLAG_ALLOW_RETRY) { 1161 if (flags & FAULT_FLAG_ALLOW_RETRY) {
1162 if (fault & VM_FAULT_MAJOR) { 1162 if (fault & VM_FAULT_MAJOR) {
1163 tsk->maj_flt++; 1163 tsk->maj_flt++;
1164 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, 1164 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
1165 regs, address); 1165 regs, address);
1166 } else { 1166 } else {
1167 tsk->min_flt++; 1167 tsk->min_flt++;
1168 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, 1168 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
1169 regs, address); 1169 regs, address);
1170 } 1170 }
1171 if (fault & VM_FAULT_RETRY) { 1171 if (fault & VM_FAULT_RETRY) {
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c
index 704a37cedddb..dab41876cdd5 100644
--- a/arch/x86/mm/kmemcheck/error.c
+++ b/arch/x86/mm/kmemcheck/error.c
@@ -185,7 +185,7 @@ void kmemcheck_error_save(enum kmemcheck_shadow state,
185 e->trace.entries = e->trace_entries; 185 e->trace.entries = e->trace_entries;
186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries); 186 e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
187 e->trace.skip = 0; 187 e->trace.skip = 0;
188 save_stack_trace_regs(&e->trace, regs); 188 save_stack_trace_regs(regs, &e->trace);
189 189
190 /* Round address down to nearest 16 bytes */ 190 /* Round address down to nearest 16 bytes */
191 shadow_copy = kmemcheck_shadow_lookup(address 191 shadow_copy = kmemcheck_shadow_lookup(address
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index a5b64ab4cd6e..bff89dfe3619 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -11,10 +11,11 @@
11#include <linux/oprofile.h> 11#include <linux/oprofile.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/compat.h>
15#include <linux/uaccess.h>
16
14#include <asm/ptrace.h> 17#include <asm/ptrace.h>
15#include <asm/uaccess.h>
16#include <asm/stacktrace.h> 18#include <asm/stacktrace.h>
17#include <linux/compat.h>
18 19
19static int backtrace_stack(void *data, char *name) 20static int backtrace_stack(void *data, char *name)
20{ 21{
@@ -40,13 +41,13 @@ static struct stacktrace_ops backtrace_ops = {
40static struct stack_frame_ia32 * 41static struct stack_frame_ia32 *
41dump_user_backtrace_32(struct stack_frame_ia32 *head) 42dump_user_backtrace_32(struct stack_frame_ia32 *head)
42{ 43{
44 /* Also check accessibility of one struct frame_head beyond: */
43 struct stack_frame_ia32 bufhead[2]; 45 struct stack_frame_ia32 bufhead[2];
44 struct stack_frame_ia32 *fp; 46 struct stack_frame_ia32 *fp;
47 unsigned long bytes;
45 48
46 /* Also check accessibility of one struct frame_head beyond */ 49 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
47 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 50 if (bytes != sizeof(bufhead))
48 return NULL;
49 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
50 return NULL; 51 return NULL;
51 52
52 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame); 53 fp = (struct stack_frame_ia32 *) compat_ptr(bufhead[0].next_frame);
@@ -87,12 +88,12 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth)
87 88
88static struct stack_frame *dump_user_backtrace(struct stack_frame *head) 89static struct stack_frame *dump_user_backtrace(struct stack_frame *head)
89{ 90{
91 /* Also check accessibility of one struct frame_head beyond: */
90 struct stack_frame bufhead[2]; 92 struct stack_frame bufhead[2];
93 unsigned long bytes;
91 94
92 /* Also check accessibility of one struct stack_frame beyond */ 95 bytes = copy_from_user_nmi(bufhead, head, sizeof(bufhead));
93 if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) 96 if (bytes != sizeof(bufhead))
94 return NULL;
95 if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
96 return NULL; 97 return NULL;
97 98
98 oprofile_add_trace(bufhead[0].return_address); 99 oprofile_add_trace(bufhead[0].return_address);
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
index 9046f7b2ed79..94796f39bc47 100644
--- a/drivers/oprofile/oprofile_perf.c
+++ b/drivers/oprofile/oprofile_perf.c
@@ -31,7 +31,7 @@ static int num_counters;
31/* 31/*
32 * Overflow callback for oprofile. 32 * Overflow callback for oprofile.
33 */ 33 */
34static void op_overflow_handler(struct perf_event *event, int unused, 34static void op_overflow_handler(struct perf_event *event,
35 struct perf_sample_data *data, struct pt_regs *regs) 35 struct perf_sample_data *data, struct pt_regs *regs)
36{ 36{
37 int id; 37 int id;
@@ -79,7 +79,7 @@ static int op_create_counter(int cpu, int event)
79 79
80 pevent = perf_event_create_kernel_counter(&counter_config[event].attr, 80 pevent = perf_event_create_kernel_counter(&counter_config[event].attr,
81 cpu, NULL, 81 cpu, NULL,
82 op_overflow_handler); 82 op_overflow_handler, NULL);
83 83
84 if (IS_ERR(pevent)) 84 if (IS_ERR(pevent))
85 return PTR_ERR(pevent); 85 return PTR_ERR(pevent);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9d88e1cb5dbb..f0c0e8a47ae6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,6 +19,8 @@
19 19
20#include <asm/ftrace.h> 20#include <asm/ftrace.h>
21 21
22struct ftrace_hash;
23
22#ifdef CONFIG_FUNCTION_TRACER 24#ifdef CONFIG_FUNCTION_TRACER
23 25
24extern int ftrace_enabled; 26extern int ftrace_enabled;
@@ -29,8 +31,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
29 31
30typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); 32typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
31 33
32struct ftrace_hash;
33
34enum { 34enum {
35 FTRACE_OPS_FL_ENABLED = 1 << 0, 35 FTRACE_OPS_FL_ENABLED = 1 << 0,
36 FTRACE_OPS_FL_GLOBAL = 1 << 1, 36 FTRACE_OPS_FL_GLOBAL = 1 << 1,
@@ -123,7 +123,8 @@ stack_trace_sysctl(struct ctl_table *table, int write,
123struct ftrace_func_command { 123struct ftrace_func_command {
124 struct list_head list; 124 struct list_head list;
125 char *name; 125 char *name;
126 int (*func)(char *func, char *cmd, 126 int (*func)(struct ftrace_hash *hash,
127 char *func, char *cmd,
127 char *params, int enable); 128 char *params, int enable);
128}; 129};
129 130
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 59d3ef100eb9..96efa6794ea5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -76,6 +76,7 @@ struct trace_iterator {
76 struct trace_entry *ent; 76 struct trace_entry *ent;
77 unsigned long lost_events; 77 unsigned long lost_events;
78 int leftover; 78 int leftover;
79 int ent_size;
79 int cpu; 80 int cpu;
80 u64 ts; 81 u64 ts;
81 82
@@ -129,6 +130,10 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
129void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer, 130void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
130 struct ring_buffer_event *event, 131 struct ring_buffer_event *event,
131 unsigned long flags, int pc); 132 unsigned long flags, int pc);
133void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
134 struct ring_buffer_event *event,
135 unsigned long flags, int pc,
136 struct pt_regs *regs);
132void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 137void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
133 struct ring_buffer_event *event); 138 struct ring_buffer_event *event);
134 139
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index d1e55fed2c7d..6ae9c631a1be 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -73,6 +73,7 @@ static inline unsigned long hw_breakpoint_len(struct perf_event *bp)
73extern struct perf_event * 73extern struct perf_event *
74register_user_hw_breakpoint(struct perf_event_attr *attr, 74register_user_hw_breakpoint(struct perf_event_attr *attr,
75 perf_overflow_handler_t triggered, 75 perf_overflow_handler_t triggered,
76 void *context,
76 struct task_struct *tsk); 77 struct task_struct *tsk);
77 78
78/* FIXME: only change from the attr, and don't unregister */ 79/* FIXME: only change from the attr, and don't unregister */
@@ -85,11 +86,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr);
85extern struct perf_event * 86extern struct perf_event *
86register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, 87register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
87 perf_overflow_handler_t triggered, 88 perf_overflow_handler_t triggered,
89 void *context,
88 int cpu); 90 int cpu);
89 91
90extern struct perf_event * __percpu * 92extern struct perf_event * __percpu *
91register_wide_hw_breakpoint(struct perf_event_attr *attr, 93register_wide_hw_breakpoint(struct perf_event_attr *attr,
92 perf_overflow_handler_t triggered); 94 perf_overflow_handler_t triggered,
95 void *context);
93 96
94extern int register_perf_hw_breakpoint(struct perf_event *bp); 97extern int register_perf_hw_breakpoint(struct perf_event *bp);
95extern int __register_perf_hw_breakpoint(struct perf_event *bp); 98extern int __register_perf_hw_breakpoint(struct perf_event *bp);
@@ -115,6 +118,7 @@ static inline int __init init_hw_breakpoint(void) { return 0; }
115static inline struct perf_event * 118static inline struct perf_event *
116register_user_hw_breakpoint(struct perf_event_attr *attr, 119register_user_hw_breakpoint(struct perf_event_attr *attr,
117 perf_overflow_handler_t triggered, 120 perf_overflow_handler_t triggered,
121 void *context,
118 struct task_struct *tsk) { return NULL; } 122 struct task_struct *tsk) { return NULL; }
119static inline int 123static inline int
120modify_user_hw_breakpoint(struct perf_event *bp, 124modify_user_hw_breakpoint(struct perf_event *bp,
@@ -122,10 +126,12 @@ modify_user_hw_breakpoint(struct perf_event *bp,
122static inline struct perf_event * 126static inline struct perf_event *
123register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, 127register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr,
124 perf_overflow_handler_t triggered, 128 perf_overflow_handler_t triggered,
129 void *context,
125 int cpu) { return NULL; } 130 int cpu) { return NULL; }
126static inline struct perf_event * __percpu * 131static inline struct perf_event * __percpu *
127register_wide_hw_breakpoint(struct perf_event_attr *attr, 132register_wide_hw_breakpoint(struct perf_event_attr *attr,
128 perf_overflow_handler_t triggered) { return NULL; } 133 perf_overflow_handler_t triggered,
134 void *context) { return NULL; }
129static inline int 135static inline int
130register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } 136register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
131static inline int 137static inline int
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e0786e35f247..3f2711ccf910 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -61,7 +61,7 @@ enum perf_hw_id {
61/* 61/*
62 * Generalized hardware cache events: 62 * Generalized hardware cache events:
63 * 63 *
64 * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x 64 * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
65 * { read, write, prefetch } x 65 * { read, write, prefetch } x
66 * { accesses, misses } 66 * { accesses, misses }
67 */ 67 */
@@ -72,6 +72,7 @@ enum perf_hw_cache_id {
72 PERF_COUNT_HW_CACHE_DTLB = 3, 72 PERF_COUNT_HW_CACHE_DTLB = 3,
73 PERF_COUNT_HW_CACHE_ITLB = 4, 73 PERF_COUNT_HW_CACHE_ITLB = 4,
74 PERF_COUNT_HW_CACHE_BPU = 5, 74 PERF_COUNT_HW_CACHE_BPU = 5,
75 PERF_COUNT_HW_CACHE_NODE = 6,
75 76
76 PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ 77 PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
77}; 78};
@@ -536,6 +537,16 @@ struct perf_branch_stack {
536 537
537struct task_struct; 538struct task_struct;
538 539
540/*
541 * extra PMU register associated with an event
542 */
543struct hw_perf_event_extra {
544 u64 config; /* register value */
545 unsigned int reg; /* register address or index */
546 int alloc; /* extra register already allocated */
547 int idx; /* index in shared_regs->regs[] */
548};
549
539/** 550/**
540 * struct hw_perf_event - performance event hardware details: 551 * struct hw_perf_event - performance event hardware details:
541 */ 552 */
@@ -549,9 +560,7 @@ struct hw_perf_event {
549 unsigned long event_base; 560 unsigned long event_base;
550 int idx; 561 int idx;
551 int last_cpu; 562 int last_cpu;
552 unsigned int extra_reg; 563 struct hw_perf_event_extra extra_reg;
553 u64 extra_config;
554 int extra_alloc;
555 }; 564 };
556 struct { /* software */ 565 struct { /* software */
557 struct hrtimer hrtimer; 566 struct hrtimer hrtimer;
@@ -680,36 +689,9 @@ enum perf_event_active_state {
680}; 689};
681 690
682struct file; 691struct file;
683
684#define PERF_BUFFER_WRITABLE 0x01
685
686struct perf_buffer {
687 atomic_t refcount;
688 struct rcu_head rcu_head;
689#ifdef CONFIG_PERF_USE_VMALLOC
690 struct work_struct work;
691 int page_order; /* allocation order */
692#endif
693 int nr_pages; /* nr of data pages */
694 int writable; /* are we writable */
695
696 atomic_t poll; /* POLL_ for wakeups */
697
698 local_t head; /* write position */
699 local_t nest; /* nested writers */
700 local_t events; /* event limit */
701 local_t wakeup; /* wakeup stamp */
702 local_t lost; /* nr records lost */
703
704 long watermark; /* wakeup watermark */
705
706 struct perf_event_mmap_page *user_page;
707 void *data_pages[0];
708};
709
710struct perf_sample_data; 692struct perf_sample_data;
711 693
712typedef void (*perf_overflow_handler_t)(struct perf_event *, int, 694typedef void (*perf_overflow_handler_t)(struct perf_event *,
713 struct perf_sample_data *, 695 struct perf_sample_data *,
714 struct pt_regs *regs); 696 struct pt_regs *regs);
715 697
@@ -745,6 +727,8 @@ struct perf_cgroup {
745}; 727};
746#endif 728#endif
747 729
730struct ring_buffer;
731
748/** 732/**
749 * struct perf_event - performance event kernel representation: 733 * struct perf_event - performance event kernel representation:
750 */ 734 */
@@ -834,7 +818,7 @@ struct perf_event {
834 atomic_t mmap_count; 818 atomic_t mmap_count;
835 int mmap_locked; 819 int mmap_locked;
836 struct user_struct *mmap_user; 820 struct user_struct *mmap_user;
837 struct perf_buffer *buffer; 821 struct ring_buffer *rb;
838 822
839 /* poll related */ 823 /* poll related */
840 wait_queue_head_t waitq; 824 wait_queue_head_t waitq;
@@ -855,6 +839,7 @@ struct perf_event {
855 u64 id; 839 u64 id;
856 840
857 perf_overflow_handler_t overflow_handler; 841 perf_overflow_handler_t overflow_handler;
842 void *overflow_handler_context;
858 843
859#ifdef CONFIG_EVENT_TRACING 844#ifdef CONFIG_EVENT_TRACING
860 struct ftrace_event_call *tp_event; 845 struct ftrace_event_call *tp_event;
@@ -919,8 +904,8 @@ struct perf_event_context {
919 u64 parent_gen; 904 u64 parent_gen;
920 u64 generation; 905 u64 generation;
921 int pin_count; 906 int pin_count;
922 struct rcu_head rcu_head;
923 int nr_cgroups; /* cgroup events present */ 907 int nr_cgroups; /* cgroup events present */
908 struct rcu_head rcu_head;
924}; 909};
925 910
926/* 911/*
@@ -945,13 +930,11 @@ struct perf_cpu_context {
945 930
946struct perf_output_handle { 931struct perf_output_handle {
947 struct perf_event *event; 932 struct perf_event *event;
948 struct perf_buffer *buffer; 933 struct ring_buffer *rb;
949 unsigned long wakeup; 934 unsigned long wakeup;
950 unsigned long size; 935 unsigned long size;
951 void *addr; 936 void *addr;
952 int page; 937 int page;
953 int nmi;
954 int sample;
955}; 938};
956 939
957#ifdef CONFIG_PERF_EVENTS 940#ifdef CONFIG_PERF_EVENTS
@@ -972,13 +955,15 @@ extern void perf_pmu_disable(struct pmu *pmu);
972extern void perf_pmu_enable(struct pmu *pmu); 955extern void perf_pmu_enable(struct pmu *pmu);
973extern int perf_event_task_disable(void); 956extern int perf_event_task_disable(void);
974extern int perf_event_task_enable(void); 957extern int perf_event_task_enable(void);
958extern int perf_event_refresh(struct perf_event *event, int refresh);
975extern void perf_event_update_userpage(struct perf_event *event); 959extern void perf_event_update_userpage(struct perf_event *event);
976extern int perf_event_release_kernel(struct perf_event *event); 960extern int perf_event_release_kernel(struct perf_event *event);
977extern struct perf_event * 961extern struct perf_event *
978perf_event_create_kernel_counter(struct perf_event_attr *attr, 962perf_event_create_kernel_counter(struct perf_event_attr *attr,
979 int cpu, 963 int cpu,
980 struct task_struct *task, 964 struct task_struct *task,
981 perf_overflow_handler_t callback); 965 perf_overflow_handler_t callback,
966 void *context);
982extern u64 perf_event_read_value(struct perf_event *event, 967extern u64 perf_event_read_value(struct perf_event *event,
983 u64 *enabled, u64 *running); 968 u64 *enabled, u64 *running);
984 969
@@ -1018,7 +1003,7 @@ extern void perf_prepare_sample(struct perf_event_header *header,
1018 struct perf_event *event, 1003 struct perf_event *event,
1019 struct pt_regs *regs); 1004 struct pt_regs *regs);
1020 1005
1021extern int perf_event_overflow(struct perf_event *event, int nmi, 1006extern int perf_event_overflow(struct perf_event *event,
1022 struct perf_sample_data *data, 1007 struct perf_sample_data *data,
1023 struct pt_regs *regs); 1008 struct pt_regs *regs);
1024 1009
@@ -1037,7 +1022,7 @@ static inline int is_software_event(struct perf_event *event)
1037 1022
1038extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; 1023extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
1039 1024
1040extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64); 1025extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
1041 1026
1042#ifndef perf_arch_fetch_caller_regs 1027#ifndef perf_arch_fetch_caller_regs
1043static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { } 1028static inline void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
@@ -1059,7 +1044,7 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1059} 1044}
1060 1045
1061static __always_inline void 1046static __always_inline void
1062perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 1047perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
1063{ 1048{
1064 struct pt_regs hot_regs; 1049 struct pt_regs hot_regs;
1065 1050
@@ -1068,7 +1053,7 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
1068 perf_fetch_caller_regs(&hot_regs); 1053 perf_fetch_caller_regs(&hot_regs);
1069 regs = &hot_regs; 1054 regs = &hot_regs;
1070 } 1055 }
1071 __perf_sw_event(event_id, nr, nmi, regs, addr); 1056 __perf_sw_event(event_id, nr, regs, addr);
1072 } 1057 }
1073} 1058}
1074 1059
@@ -1082,7 +1067,7 @@ static inline void perf_event_task_sched_in(struct task_struct *task)
1082 1067
1083static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) 1068static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
1084{ 1069{
1085 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); 1070 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
1086 1071
1087 __perf_event_task_sched_out(task, next); 1072 __perf_event_task_sched_out(task, next);
1088} 1073}
@@ -1143,8 +1128,7 @@ extern void perf_bp_event(struct perf_event *event, void *data);
1143#endif 1128#endif
1144 1129
1145extern int perf_output_begin(struct perf_output_handle *handle, 1130extern int perf_output_begin(struct perf_output_handle *handle,
1146 struct perf_event *event, unsigned int size, 1131 struct perf_event *event, unsigned int size);
1147 int nmi, int sample);
1148extern void perf_output_end(struct perf_output_handle *handle); 1132extern void perf_output_end(struct perf_output_handle *handle);
1149extern void perf_output_copy(struct perf_output_handle *handle, 1133extern void perf_output_copy(struct perf_output_handle *handle,
1150 const void *buf, unsigned int len); 1134 const void *buf, unsigned int len);
@@ -1166,10 +1150,13 @@ static inline void perf_event_delayed_put(struct task_struct *task) { }
1166static inline void perf_event_print_debug(void) { } 1150static inline void perf_event_print_debug(void) { }
1167static inline int perf_event_task_disable(void) { return -EINVAL; } 1151static inline int perf_event_task_disable(void) { return -EINVAL; }
1168static inline int perf_event_task_enable(void) { return -EINVAL; } 1152static inline int perf_event_task_enable(void) { return -EINVAL; }
1153static inline int perf_event_refresh(struct perf_event *event, int refresh)
1154{
1155 return -EINVAL;
1156}
1169 1157
1170static inline void 1158static inline void
1171perf_sw_event(u32 event_id, u64 nr, int nmi, 1159perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
1172 struct pt_regs *regs, u64 addr) { }
1173static inline void 1160static inline void
1174perf_bp_event(struct perf_event *event, void *data) { } 1161perf_bp_event(struct perf_event *event, void *data) { }
1175 1162
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index ab38ac80b0f9..b891de96000f 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -169,7 +169,7 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
169size_t ring_buffer_page_len(void *page); 169size_t ring_buffer_page_len(void *page);
170 170
171 171
172void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); 172void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
173void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); 173void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
174int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, 174int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
175 size_t len, int cpu, int full); 175 size_t len, int cpu, int full);
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 25310f1d7f37..115b570e3bff 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -14,8 +14,8 @@ struct stack_trace {
14}; 14};
15 15
16extern void save_stack_trace(struct stack_trace *trace); 16extern void save_stack_trace(struct stack_trace *trace);
17extern void save_stack_trace_regs(struct stack_trace *trace, 17extern void save_stack_trace_regs(struct pt_regs *regs,
18 struct pt_regs *regs); 18 struct stack_trace *trace);
19extern void save_stack_trace_tsk(struct task_struct *tsk, 19extern void save_stack_trace_tsk(struct task_struct *tsk,
20 struct stack_trace *trace); 20 struct stack_trace *trace);
21 21
diff --git a/kernel/async.c b/kernel/async.c
index cd9dbb913c77..d5fe7af0de2e 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel.
49*/ 49*/
50 50
51#include <linux/async.h> 51#include <linux/async.h>
52#include <linux/atomic.h>
53#include <linux/ktime.h>
52#include <linux/module.h> 54#include <linux/module.h>
53#include <linux/wait.h> 55#include <linux/wait.h>
54#include <linux/sched.h> 56#include <linux/sched.h>
55#include <linux/slab.h> 57#include <linux/slab.h>
56#include <linux/workqueue.h> 58#include <linux/workqueue.h>
57#include <asm/atomic.h>
58 59
59static async_cookie_t next_cookie = 1; 60static async_cookie_t next_cookie = 1;
60 61
@@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work)
128 129
129 /* 2) run (and print duration) */ 130 /* 2) run (and print duration) */
130 if (initcall_debug && system_state == SYSTEM_BOOTING) { 131 if (initcall_debug && system_state == SYSTEM_BOOTING) {
131 printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, 132 printk(KERN_DEBUG "calling %lli_%pF @ %i\n",
133 (long long)entry->cookie,
132 entry->func, task_pid_nr(current)); 134 entry->func, task_pid_nr(current));
133 calltime = ktime_get(); 135 calltime = ktime_get();
134 } 136 }
@@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work)
136 if (initcall_debug && system_state == SYSTEM_BOOTING) { 138 if (initcall_debug && system_state == SYSTEM_BOOTING) {
137 rettime = ktime_get(); 139 rettime = ktime_get();
138 delta = ktime_sub(rettime, calltime); 140 delta = ktime_sub(rettime, calltime);
139 printk("initcall %lli_%pF returned 0 after %lld usecs\n", 141 printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n",
140 (long long)entry->cookie, 142 (long long)entry->cookie,
141 entry->func, 143 entry->func,
142 (long long)ktime_to_ns(delta) >> 10); 144 (long long)ktime_to_ns(delta) >> 10);
@@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
270 ktime_t starttime, delta, endtime; 272 ktime_t starttime, delta, endtime;
271 273
272 if (initcall_debug && system_state == SYSTEM_BOOTING) { 274 if (initcall_debug && system_state == SYSTEM_BOOTING) {
273 printk("async_waiting @ %i\n", task_pid_nr(current)); 275 printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current));
274 starttime = ktime_get(); 276 starttime = ktime_get();
275 } 277 }
276 278
@@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie,
280 endtime = ktime_get(); 282 endtime = ktime_get();
281 delta = ktime_sub(endtime, starttime); 283 delta = ktime_sub(endtime, starttime);
282 284
283 printk("async_continuing @ %i after %lli usec\n", 285 printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n",
284 task_pid_nr(current), 286 task_pid_nr(current),
285 (long long)ktime_to_ns(delta) >> 10); 287 (long long)ktime_to_ns(delta) >> 10);
286 } 288 }
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 1ce23d3d8394..89e5e8aa4c36 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER
2CFLAGS_REMOVE_core.o = -pg 2CFLAGS_REMOVE_core.o = -pg
3endif 3endif
4 4
5obj-y := core.o 5obj-y := core.o ring_buffer.o
6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9efe7108ccaf..b8785e26ee1c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,8 @@
36#include <linux/ftrace_event.h> 36#include <linux/ftrace_event.h>
37#include <linux/hw_breakpoint.h> 37#include <linux/hw_breakpoint.h>
38 38
39#include "internal.h"
40
39#include <asm/irq_regs.h> 41#include <asm/irq_regs.h>
40 42
41struct remote_function_call { 43struct remote_function_call {
@@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx)
200 return this_cpu_ptr(ctx->pmu->pmu_cpu_context); 202 return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
201} 203}
202 204
205static void perf_ctx_lock(struct perf_cpu_context *cpuctx,
206 struct perf_event_context *ctx)
207{
208 raw_spin_lock(&cpuctx->ctx.lock);
209 if (ctx)
210 raw_spin_lock(&ctx->lock);
211}
212
213static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
214 struct perf_event_context *ctx)
215{
216 if (ctx)
217 raw_spin_unlock(&ctx->lock);
218 raw_spin_unlock(&cpuctx->ctx.lock);
219}
220
203#ifdef CONFIG_CGROUP_PERF 221#ifdef CONFIG_CGROUP_PERF
204 222
205/* 223/*
@@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
340 rcu_read_lock(); 358 rcu_read_lock();
341 359
342 list_for_each_entry_rcu(pmu, &pmus, entry) { 360 list_for_each_entry_rcu(pmu, &pmus, entry) {
343
344 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 361 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
345 362
346 perf_pmu_disable(cpuctx->ctx.pmu);
347
348 /* 363 /*
349 * perf_cgroup_events says at least one 364 * perf_cgroup_events says at least one
350 * context on this CPU has cgroup events. 365 * context on this CPU has cgroup events.
@@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
353 * events for a context. 368 * events for a context.
354 */ 369 */
355 if (cpuctx->ctx.nr_cgroups > 0) { 370 if (cpuctx->ctx.nr_cgroups > 0) {
371 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
372 perf_pmu_disable(cpuctx->ctx.pmu);
356 373
357 if (mode & PERF_CGROUP_SWOUT) { 374 if (mode & PERF_CGROUP_SWOUT) {
358 cpu_ctx_sched_out(cpuctx, EVENT_ALL); 375 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
@@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
372 cpuctx->cgrp = perf_cgroup_from_task(task); 389 cpuctx->cgrp = perf_cgroup_from_task(task);
373 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); 390 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
374 } 391 }
392 perf_pmu_enable(cpuctx->ctx.pmu);
393 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
375 } 394 }
376
377 perf_pmu_enable(cpuctx->ctx.pmu);
378 } 395 }
379 396
380 rcu_read_unlock(); 397 rcu_read_unlock();
@@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event)
731 748
732/* 749/*
733 * Update the total_time_enabled and total_time_running fields for a event. 750 * Update the total_time_enabled and total_time_running fields for a event.
751 * The caller of this function needs to hold the ctx->lock.
734 */ 752 */
735static void update_event_times(struct perf_event *event) 753static void update_event_times(struct perf_event *event)
736{ 754{
@@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info)
1105 raw_spin_lock(&ctx->lock); 1123 raw_spin_lock(&ctx->lock);
1106 event_sched_out(event, cpuctx, ctx); 1124 event_sched_out(event, cpuctx, ctx);
1107 list_del_event(event, ctx); 1125 list_del_event(event, ctx);
1126 if (!ctx->nr_events && cpuctx->task_ctx == ctx) {
1127 ctx->is_active = 0;
1128 cpuctx->task_ctx = NULL;
1129 }
1108 raw_spin_unlock(&ctx->lock); 1130 raw_spin_unlock(&ctx->lock);
1109 1131
1110 return 0; 1132 return 0;
@@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event,
1454 event->tstamp_stopped = tstamp; 1476 event->tstamp_stopped = tstamp;
1455} 1477}
1456 1478
1457static void perf_event_context_sched_in(struct perf_event_context *ctx, 1479static void task_ctx_sched_out(struct perf_event_context *ctx);
1458 struct task_struct *tsk); 1480static void
1481ctx_sched_in(struct perf_event_context *ctx,
1482 struct perf_cpu_context *cpuctx,
1483 enum event_type_t event_type,
1484 struct task_struct *task);
1485
1486static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
1487 struct perf_event_context *ctx,
1488 struct task_struct *task)
1489{
1490 cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task);
1491 if (ctx)
1492 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task);
1493 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
1494 if (ctx)
1495 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
1496}
1459 1497
1460/* 1498/*
1461 * Cross CPU call to install and enable a performance event 1499 * Cross CPU call to install and enable a performance event
@@ -1466,20 +1504,37 @@ static int __perf_install_in_context(void *info)
1466{ 1504{
1467 struct perf_event *event = info; 1505 struct perf_event *event = info;
1468 struct perf_event_context *ctx = event->ctx; 1506 struct perf_event_context *ctx = event->ctx;
1469 struct perf_event *leader = event->group_leader;
1470 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1507 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1471 int err; 1508 struct perf_event_context *task_ctx = cpuctx->task_ctx;
1509 struct task_struct *task = current;
1510
1511 perf_ctx_lock(cpuctx, task_ctx);
1512 perf_pmu_disable(cpuctx->ctx.pmu);
1472 1513
1473 /* 1514 /*
1474 * In case we're installing a new context to an already running task, 1515 * If there was an active task_ctx schedule it out.
1475 * could also happen before perf_event_task_sched_in() on architectures
1476 * which do context switches with IRQs enabled.
1477 */ 1516 */
1478 if (ctx->task && !cpuctx->task_ctx) 1517 if (task_ctx)
1479 perf_event_context_sched_in(ctx, ctx->task); 1518 task_ctx_sched_out(task_ctx);
1519
1520 /*
1521 * If the context we're installing events in is not the
1522 * active task_ctx, flip them.
1523 */
1524 if (ctx->task && task_ctx != ctx) {
1525 if (task_ctx)
1526 raw_spin_unlock(&task_ctx->lock);
1527 raw_spin_lock(&ctx->lock);
1528 task_ctx = ctx;
1529 }
1530
1531 if (task_ctx) {
1532 cpuctx->task_ctx = task_ctx;
1533 task = task_ctx->task;
1534 }
1535
1536 cpu_ctx_sched_out(cpuctx, EVENT_ALL);
1480 1537
1481 raw_spin_lock(&ctx->lock);
1482 ctx->is_active = 1;
1483 update_context_time(ctx); 1538 update_context_time(ctx);
1484 /* 1539 /*
1485 * update cgrp time only if current cgrp 1540 * update cgrp time only if current cgrp
@@ -1490,43 +1545,13 @@ static int __perf_install_in_context(void *info)
1490 1545
1491 add_event_to_ctx(event, ctx); 1546 add_event_to_ctx(event, ctx);
1492 1547
1493 if (!event_filter_match(event))
1494 goto unlock;
1495
1496 /*
1497 * Don't put the event on if it is disabled or if
1498 * it is in a group and the group isn't on.
1499 */
1500 if (event->state != PERF_EVENT_STATE_INACTIVE ||
1501 (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
1502 goto unlock;
1503
1504 /* 1548 /*
1505 * An exclusive event can't go on if there are already active 1549 * Schedule everything back in
1506 * hardware events, and no hardware event can go on if there
1507 * is already an exclusive event on.
1508 */ 1550 */
1509 if (!group_can_go_on(event, cpuctx, 1)) 1551 perf_event_sched_in(cpuctx, task_ctx, task);
1510 err = -EEXIST;
1511 else
1512 err = event_sched_in(event, cpuctx, ctx);
1513
1514 if (err) {
1515 /*
1516 * This event couldn't go on. If it is in a group
1517 * then we have to pull the whole group off.
1518 * If the event group is pinned then put it in error state.
1519 */
1520 if (leader != event)
1521 group_sched_out(leader, cpuctx, ctx);
1522 if (leader->attr.pinned) {
1523 update_group_times(leader);
1524 leader->state = PERF_EVENT_STATE_ERROR;
1525 }
1526 }
1527 1552
1528unlock: 1553 perf_pmu_enable(cpuctx->ctx.pmu);
1529 raw_spin_unlock(&ctx->lock); 1554 perf_ctx_unlock(cpuctx, task_ctx);
1530 1555
1531 return 0; 1556 return 0;
1532} 1557}
@@ -1739,7 +1764,7 @@ out:
1739 raw_spin_unlock_irq(&ctx->lock); 1764 raw_spin_unlock_irq(&ctx->lock);
1740} 1765}
1741 1766
1742static int perf_event_refresh(struct perf_event *event, int refresh) 1767int perf_event_refresh(struct perf_event *event, int refresh)
1743{ 1768{
1744 /* 1769 /*
1745 * not supported on inherited events 1770 * not supported on inherited events
@@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
1752 1777
1753 return 0; 1778 return 0;
1754} 1779}
1780EXPORT_SYMBOL_GPL(perf_event_refresh);
1755 1781
1756static void ctx_sched_out(struct perf_event_context *ctx, 1782static void ctx_sched_out(struct perf_event_context *ctx,
1757 struct perf_cpu_context *cpuctx, 1783 struct perf_cpu_context *cpuctx,
1758 enum event_type_t event_type) 1784 enum event_type_t event_type)
1759{ 1785{
1760 struct perf_event *event; 1786 struct perf_event *event;
1787 int is_active = ctx->is_active;
1761 1788
1762 raw_spin_lock(&ctx->lock); 1789 ctx->is_active &= ~event_type;
1763 perf_pmu_disable(ctx->pmu);
1764 ctx->is_active = 0;
1765 if (likely(!ctx->nr_events)) 1790 if (likely(!ctx->nr_events))
1766 goto out; 1791 return;
1792
1767 update_context_time(ctx); 1793 update_context_time(ctx);
1768 update_cgrp_time_from_cpuctx(cpuctx); 1794 update_cgrp_time_from_cpuctx(cpuctx);
1769
1770 if (!ctx->nr_active) 1795 if (!ctx->nr_active)
1771 goto out; 1796 return;
1772 1797
1773 if (event_type & EVENT_PINNED) { 1798 perf_pmu_disable(ctx->pmu);
1799 if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
1774 list_for_each_entry(event, &ctx->pinned_groups, group_entry) 1800 list_for_each_entry(event, &ctx->pinned_groups, group_entry)
1775 group_sched_out(event, cpuctx, ctx); 1801 group_sched_out(event, cpuctx, ctx);
1776 } 1802 }
1777 1803
1778 if (event_type & EVENT_FLEXIBLE) { 1804 if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
1779 list_for_each_entry(event, &ctx->flexible_groups, group_entry) 1805 list_for_each_entry(event, &ctx->flexible_groups, group_entry)
1780 group_sched_out(event, cpuctx, ctx); 1806 group_sched_out(event, cpuctx, ctx);
1781 } 1807 }
1782out:
1783 perf_pmu_enable(ctx->pmu); 1808 perf_pmu_enable(ctx->pmu);
1784 raw_spin_unlock(&ctx->lock);
1785} 1809}
1786 1810
1787/* 1811/*
@@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1929 rcu_read_unlock(); 1953 rcu_read_unlock();
1930 1954
1931 if (do_switch) { 1955 if (do_switch) {
1956 raw_spin_lock(&ctx->lock);
1932 ctx_sched_out(ctx, cpuctx, EVENT_ALL); 1957 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
1933 cpuctx->task_ctx = NULL; 1958 cpuctx->task_ctx = NULL;
1959 raw_spin_unlock(&ctx->lock);
1934 } 1960 }
1935} 1961}
1936 1962
@@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
1965 perf_cgroup_sched_out(task); 1991 perf_cgroup_sched_out(task);
1966} 1992}
1967 1993
1968static void task_ctx_sched_out(struct perf_event_context *ctx, 1994static void task_ctx_sched_out(struct perf_event_context *ctx)
1969 enum event_type_t event_type)
1970{ 1995{
1971 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1996 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1972 1997
@@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx,
1976 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) 2001 if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
1977 return; 2002 return;
1978 2003
1979 ctx_sched_out(ctx, cpuctx, event_type); 2004 ctx_sched_out(ctx, cpuctx, EVENT_ALL);
1980 cpuctx->task_ctx = NULL; 2005 cpuctx->task_ctx = NULL;
1981} 2006}
1982 2007
@@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx,
2055 struct task_struct *task) 2080 struct task_struct *task)
2056{ 2081{
2057 u64 now; 2082 u64 now;
2083 int is_active = ctx->is_active;
2058 2084
2059 raw_spin_lock(&ctx->lock); 2085 ctx->is_active |= event_type;
2060 ctx->is_active = 1;
2061 if (likely(!ctx->nr_events)) 2086 if (likely(!ctx->nr_events))
2062 goto out; 2087 return;
2063 2088
2064 now = perf_clock(); 2089 now = perf_clock();
2065 ctx->timestamp = now; 2090 ctx->timestamp = now;
@@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx,
2068 * First go through the list and put on any pinned groups 2093 * First go through the list and put on any pinned groups
2069 * in order to give them the best chance of going on. 2094 * in order to give them the best chance of going on.
2070 */ 2095 */
2071 if (event_type & EVENT_PINNED) 2096 if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
2072 ctx_pinned_sched_in(ctx, cpuctx); 2097 ctx_pinned_sched_in(ctx, cpuctx);
2073 2098
2074 /* Then walk through the lower prio flexible groups */ 2099 /* Then walk through the lower prio flexible groups */
2075 if (event_type & EVENT_FLEXIBLE) 2100 if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
2076 ctx_flexible_sched_in(ctx, cpuctx); 2101 ctx_flexible_sched_in(ctx, cpuctx);
2077
2078out:
2079 raw_spin_unlock(&ctx->lock);
2080} 2102}
2081 2103
2082static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 2104static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
@@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
2088 ctx_sched_in(ctx, cpuctx, event_type, task); 2110 ctx_sched_in(ctx, cpuctx, event_type, task);
2089} 2111}
2090 2112
2091static void task_ctx_sched_in(struct perf_event_context *ctx,
2092 enum event_type_t event_type)
2093{
2094 struct perf_cpu_context *cpuctx;
2095
2096 cpuctx = __get_cpu_context(ctx);
2097 if (cpuctx->task_ctx == ctx)
2098 return;
2099
2100 ctx_sched_in(ctx, cpuctx, event_type, NULL);
2101 cpuctx->task_ctx = ctx;
2102}
2103
2104static void perf_event_context_sched_in(struct perf_event_context *ctx, 2113static void perf_event_context_sched_in(struct perf_event_context *ctx,
2105 struct task_struct *task) 2114 struct task_struct *task)
2106{ 2115{
@@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2110 if (cpuctx->task_ctx == ctx) 2119 if (cpuctx->task_ctx == ctx)
2111 return; 2120 return;
2112 2121
2122 perf_ctx_lock(cpuctx, ctx);
2113 perf_pmu_disable(ctx->pmu); 2123 perf_pmu_disable(ctx->pmu);
2114 /* 2124 /*
2115 * We want to keep the following priority order: 2125 * We want to keep the following priority order:
@@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2118 */ 2128 */
2119 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2129 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2120 2130
2121 ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); 2131 perf_event_sched_in(cpuctx, ctx, task);
2122 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task);
2123 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task);
2124 2132
2125 cpuctx->task_ctx = ctx; 2133 cpuctx->task_ctx = ctx;
2126 2134
2135 perf_pmu_enable(ctx->pmu);
2136 perf_ctx_unlock(cpuctx, ctx);
2137
2127 /* 2138 /*
2128 * Since these rotations are per-cpu, we need to ensure the 2139 * Since these rotations are per-cpu, we need to ensure the
2129 * cpu-context we got scheduled on is actually rotating. 2140 * cpu-context we got scheduled on is actually rotating.
2130 */ 2141 */
2131 perf_pmu_rotate_start(ctx->pmu); 2142 perf_pmu_rotate_start(ctx->pmu);
2132 perf_pmu_enable(ctx->pmu);
2133} 2143}
2134 2144
2135/* 2145/*
@@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2269 u64 interrupts, now; 2279 u64 interrupts, now;
2270 s64 delta; 2280 s64 delta;
2271 2281
2272 raw_spin_lock(&ctx->lock);
2273 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 2282 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
2274 if (event->state != PERF_EVENT_STATE_ACTIVE) 2283 if (event->state != PERF_EVENT_STATE_ACTIVE)
2275 continue; 2284 continue;
@@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2301 if (delta > 0) 2310 if (delta > 0)
2302 perf_adjust_period(event, period, delta); 2311 perf_adjust_period(event, period, delta);
2303 } 2312 }
2304 raw_spin_unlock(&ctx->lock);
2305} 2313}
2306 2314
2307/* 2315/*
@@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2309 */ 2317 */
2310static void rotate_ctx(struct perf_event_context *ctx) 2318static void rotate_ctx(struct perf_event_context *ctx)
2311{ 2319{
2312 raw_spin_lock(&ctx->lock);
2313
2314 /* 2320 /*
2315 * Rotate the first entry last of non-pinned groups. Rotation might be 2321 * Rotate the first entry last of non-pinned groups. Rotation might be
2316 * disabled by the inheritance code. 2322 * disabled by the inheritance code.
2317 */ 2323 */
2318 if (!ctx->rotate_disable) 2324 if (!ctx->rotate_disable)
2319 list_rotate_left(&ctx->flexible_groups); 2325 list_rotate_left(&ctx->flexible_groups);
2320
2321 raw_spin_unlock(&ctx->lock);
2322} 2326}
2323 2327
2324/* 2328/*
@@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2345 rotate = 1; 2349 rotate = 1;
2346 } 2350 }
2347 2351
2352 perf_ctx_lock(cpuctx, cpuctx->task_ctx);
2348 perf_pmu_disable(cpuctx->ctx.pmu); 2353 perf_pmu_disable(cpuctx->ctx.pmu);
2349 perf_ctx_adjust_freq(&cpuctx->ctx, interval); 2354 perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2350 if (ctx) 2355 if (ctx)
@@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx)
2355 2360
2356 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 2361 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2357 if (ctx) 2362 if (ctx)
2358 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 2363 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2359 2364
2360 rotate_ctx(&cpuctx->ctx); 2365 rotate_ctx(&cpuctx->ctx);
2361 if (ctx) 2366 if (ctx)
2362 rotate_ctx(ctx); 2367 rotate_ctx(ctx);
2363 2368
2364 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); 2369 perf_event_sched_in(cpuctx, ctx, current);
2365 if (ctx)
2366 task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
2367 2370
2368done: 2371done:
2369 if (remove) 2372 if (remove)
2370 list_del_init(&cpuctx->rotation_list); 2373 list_del_init(&cpuctx->rotation_list);
2371 2374
2372 perf_pmu_enable(cpuctx->ctx.pmu); 2375 perf_pmu_enable(cpuctx->ctx.pmu);
2376 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2373} 2377}
2374 2378
2375void perf_event_task_tick(void) 2379void perf_event_task_tick(void)
@@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
2424 * in. 2428 * in.
2425 */ 2429 */
2426 perf_cgroup_sched_out(current); 2430 perf_cgroup_sched_out(current);
2427 task_ctx_sched_out(ctx, EVENT_ALL);
2428 2431
2429 raw_spin_lock(&ctx->lock); 2432 raw_spin_lock(&ctx->lock);
2433 task_ctx_sched_out(ctx);
2430 2434
2431 list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 2435 list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
2432 ret = event_enable_on_exec(event, ctx); 2436 ret = event_enable_on_exec(event, ctx);
@@ -2835,16 +2839,12 @@ retry:
2835 unclone_ctx(ctx); 2839 unclone_ctx(ctx);
2836 ++ctx->pin_count; 2840 ++ctx->pin_count;
2837 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2841 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2838 } 2842 } else {
2839
2840 if (!ctx) {
2841 ctx = alloc_perf_context(pmu, task); 2843 ctx = alloc_perf_context(pmu, task);
2842 err = -ENOMEM; 2844 err = -ENOMEM;
2843 if (!ctx) 2845 if (!ctx)
2844 goto errout; 2846 goto errout;
2845 2847
2846 get_ctx(ctx);
2847
2848 err = 0; 2848 err = 0;
2849 mutex_lock(&task->perf_event_mutex); 2849 mutex_lock(&task->perf_event_mutex);
2850 /* 2850 /*
@@ -2856,14 +2856,14 @@ retry:
2856 else if (task->perf_event_ctxp[ctxn]) 2856 else if (task->perf_event_ctxp[ctxn])
2857 err = -EAGAIN; 2857 err = -EAGAIN;
2858 else { 2858 else {
2859 get_ctx(ctx);
2859 ++ctx->pin_count; 2860 ++ctx->pin_count;
2860 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); 2861 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2861 } 2862 }
2862 mutex_unlock(&task->perf_event_mutex); 2863 mutex_unlock(&task->perf_event_mutex);
2863 2864
2864 if (unlikely(err)) { 2865 if (unlikely(err)) {
2865 put_task_struct(task); 2866 put_ctx(ctx);
2866 kfree(ctx);
2867 2867
2868 if (err == -EAGAIN) 2868 if (err == -EAGAIN)
2869 goto retry; 2869 goto retry;
@@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head)
2890 kfree(event); 2890 kfree(event);
2891} 2891}
2892 2892
2893static void perf_buffer_put(struct perf_buffer *buffer); 2893static void ring_buffer_put(struct ring_buffer *rb);
2894 2894
2895static void free_event(struct perf_event *event) 2895static void free_event(struct perf_event *event)
2896{ 2896{
@@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event)
2913 } 2913 }
2914 } 2914 }
2915 2915
2916 if (event->buffer) { 2916 if (event->rb) {
2917 perf_buffer_put(event->buffer); 2917 ring_buffer_put(event->rb);
2918 event->buffer = NULL; 2918 event->rb = NULL;
2919 } 2919 }
2920 2920
2921 if (is_cgroup_event(event)) 2921 if (is_cgroup_event(event))
@@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event)
2934{ 2934{
2935 struct perf_event_context *ctx = event->ctx; 2935 struct perf_event_context *ctx = event->ctx;
2936 2936
2937 /*
2938 * Remove from the PMU, can't get re-enabled since we got
2939 * here because the last ref went.
2940 */
2941 perf_event_disable(event);
2942
2943 WARN_ON_ONCE(ctx->parent_ctx); 2937 WARN_ON_ONCE(ctx->parent_ctx);
2944 /* 2938 /*
2945 * There are two ways this annotation is useful: 2939 * There are two ways this annotation is useful:
@@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event)
2956 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 2950 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
2957 raw_spin_lock_irq(&ctx->lock); 2951 raw_spin_lock_irq(&ctx->lock);
2958 perf_group_detach(event); 2952 perf_group_detach(event);
2959 list_del_event(event, ctx);
2960 raw_spin_unlock_irq(&ctx->lock); 2953 raw_spin_unlock_irq(&ctx->lock);
2954 perf_remove_from_context(event);
2961 mutex_unlock(&ctx->mutex); 2955 mutex_unlock(&ctx->mutex);
2962 2956
2963 free_event(event); 2957 free_event(event);
@@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
3149static unsigned int perf_poll(struct file *file, poll_table *wait) 3143static unsigned int perf_poll(struct file *file, poll_table *wait)
3150{ 3144{
3151 struct perf_event *event = file->private_data; 3145 struct perf_event *event = file->private_data;
3152 struct perf_buffer *buffer; 3146 struct ring_buffer *rb;
3153 unsigned int events = POLL_HUP; 3147 unsigned int events = POLL_HUP;
3154 3148
3155 rcu_read_lock(); 3149 rcu_read_lock();
3156 buffer = rcu_dereference(event->buffer); 3150 rb = rcu_dereference(event->rb);
3157 if (buffer) 3151 if (rb)
3158 events = atomic_xchg(&buffer->poll, 0); 3152 events = atomic_xchg(&rb->poll, 0);
3159 rcu_read_unlock(); 3153 rcu_read_unlock();
3160 3154
3161 poll_wait(file, &event->waitq, wait); 3155 poll_wait(file, &event->waitq, wait);
@@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event)
3358 return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; 3352 return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
3359} 3353}
3360 3354
3355static void calc_timer_values(struct perf_event *event,
3356 u64 *running,
3357 u64 *enabled)
3358{
3359 u64 now, ctx_time;
3360
3361 now = perf_clock();
3362 ctx_time = event->shadow_ctx_time + now;
3363 *enabled = ctx_time - event->tstamp_enabled;
3364 *running = ctx_time - event->tstamp_running;
3365}
3366
3361/* 3367/*
3362 * Callers need to ensure there can be no nesting of this function, otherwise 3368 * Callers need to ensure there can be no nesting of this function, otherwise
3363 * the seqlock logic goes bad. We can not serialize this because the arch 3369 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event)
3366void perf_event_update_userpage(struct perf_event *event) 3372void perf_event_update_userpage(struct perf_event *event)
3367{ 3373{
3368 struct perf_event_mmap_page *userpg; 3374 struct perf_event_mmap_page *userpg;
3369 struct perf_buffer *buffer; 3375 struct ring_buffer *rb;
3376 u64 enabled, running;
3370 3377
3371 rcu_read_lock(); 3378 rcu_read_lock();
3372 buffer = rcu_dereference(event->buffer); 3379 /*
3373 if (!buffer) 3380 * compute total_time_enabled, total_time_running
3381 * based on snapshot values taken when the event
3382 * was last scheduled in.
3383 *
3384 * we cannot simply called update_context_time()
3385 * because of locking issue as we can be called in
3386 * NMI context
3387 */
3388 calc_timer_values(event, &enabled, &running);
3389 rb = rcu_dereference(event->rb);
3390 if (!rb)
3374 goto unlock; 3391 goto unlock;
3375 3392
3376 userpg = buffer->user_page; 3393 userpg = rb->user_page;
3377 3394
3378 /* 3395 /*
3379 * Disable preemption so as to not let the corresponding user-space 3396 * Disable preemption so as to not let the corresponding user-space
@@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event)
3387 if (event->state == PERF_EVENT_STATE_ACTIVE) 3404 if (event->state == PERF_EVENT_STATE_ACTIVE)
3388 userpg->offset -= local64_read(&event->hw.prev_count); 3405 userpg->offset -= local64_read(&event->hw.prev_count);
3389 3406
3390 userpg->time_enabled = event->total_time_enabled + 3407 userpg->time_enabled = enabled +
3391 atomic64_read(&event->child_total_time_enabled); 3408 atomic64_read(&event->child_total_time_enabled);
3392 3409
3393 userpg->time_running = event->total_time_running + 3410 userpg->time_running = running +
3394 atomic64_read(&event->child_total_time_running); 3411 atomic64_read(&event->child_total_time_running);
3395 3412
3396 barrier(); 3413 barrier();
@@ -3400,220 +3417,10 @@ unlock:
3400 rcu_read_unlock(); 3417 rcu_read_unlock();
3401} 3418}
3402 3419
3403static unsigned long perf_data_size(struct perf_buffer *buffer);
3404
3405static void
3406perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
3407{
3408 long max_size = perf_data_size(buffer);
3409
3410 if (watermark)
3411 buffer->watermark = min(max_size, watermark);
3412
3413 if (!buffer->watermark)
3414 buffer->watermark = max_size / 2;
3415
3416 if (flags & PERF_BUFFER_WRITABLE)
3417 buffer->writable = 1;
3418
3419 atomic_set(&buffer->refcount, 1);
3420}
3421
3422#ifndef CONFIG_PERF_USE_VMALLOC
3423
3424/*
3425 * Back perf_mmap() with regular GFP_KERNEL-0 pages.
3426 */
3427
3428static struct page *
3429perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
3430{
3431 if (pgoff > buffer->nr_pages)
3432 return NULL;
3433
3434 if (pgoff == 0)
3435 return virt_to_page(buffer->user_page);
3436
3437 return virt_to_page(buffer->data_pages[pgoff - 1]);
3438}
3439
3440static void *perf_mmap_alloc_page(int cpu)
3441{
3442 struct page *page;
3443 int node;
3444
3445 node = (cpu == -1) ? cpu : cpu_to_node(cpu);
3446 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
3447 if (!page)
3448 return NULL;
3449
3450 return page_address(page);
3451}
3452
3453static struct perf_buffer *
3454perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
3455{
3456 struct perf_buffer *buffer;
3457 unsigned long size;
3458 int i;
3459
3460 size = sizeof(struct perf_buffer);
3461 size += nr_pages * sizeof(void *);
3462
3463 buffer = kzalloc(size, GFP_KERNEL);
3464 if (!buffer)
3465 goto fail;
3466
3467 buffer->user_page = perf_mmap_alloc_page(cpu);
3468 if (!buffer->user_page)
3469 goto fail_user_page;
3470
3471 for (i = 0; i < nr_pages; i++) {
3472 buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
3473 if (!buffer->data_pages[i])
3474 goto fail_data_pages;
3475 }
3476
3477 buffer->nr_pages = nr_pages;
3478
3479 perf_buffer_init(buffer, watermark, flags);
3480
3481 return buffer;
3482
3483fail_data_pages:
3484 for (i--; i >= 0; i--)
3485 free_page((unsigned long)buffer->data_pages[i]);
3486
3487 free_page((unsigned long)buffer->user_page);
3488
3489fail_user_page:
3490 kfree(buffer);
3491
3492fail:
3493 return NULL;
3494}
3495
3496static void perf_mmap_free_page(unsigned long addr)
3497{
3498 struct page *page = virt_to_page((void *)addr);
3499
3500 page->mapping = NULL;
3501 __free_page(page);
3502}
3503
3504static void perf_buffer_free(struct perf_buffer *buffer)
3505{
3506 int i;
3507
3508 perf_mmap_free_page((unsigned long)buffer->user_page);
3509 for (i = 0; i < buffer->nr_pages; i++)
3510 perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
3511 kfree(buffer);
3512}
3513
3514static inline int page_order(struct perf_buffer *buffer)
3515{
3516 return 0;
3517}
3518
3519#else
3520
3521/*
3522 * Back perf_mmap() with vmalloc memory.
3523 *
3524 * Required for architectures that have d-cache aliasing issues.
3525 */
3526
3527static inline int page_order(struct perf_buffer *buffer)
3528{
3529 return buffer->page_order;
3530}
3531
3532static struct page *
3533perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
3534{
3535 if (pgoff > (1UL << page_order(buffer)))
3536 return NULL;
3537
3538 return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
3539}
3540
3541static void perf_mmap_unmark_page(void *addr)
3542{
3543 struct page *page = vmalloc_to_page(addr);
3544
3545 page->mapping = NULL;
3546}
3547
3548static void perf_buffer_free_work(struct work_struct *work)
3549{
3550 struct perf_buffer *buffer;
3551 void *base;
3552 int i, nr;
3553
3554 buffer = container_of(work, struct perf_buffer, work);
3555 nr = 1 << page_order(buffer);
3556
3557 base = buffer->user_page;
3558 for (i = 0; i < nr + 1; i++)
3559 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
3560
3561 vfree(base);
3562 kfree(buffer);
3563}
3564
3565static void perf_buffer_free(struct perf_buffer *buffer)
3566{
3567 schedule_work(&buffer->work);
3568}
3569
3570static struct perf_buffer *
3571perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
3572{
3573 struct perf_buffer *buffer;
3574 unsigned long size;
3575 void *all_buf;
3576
3577 size = sizeof(struct perf_buffer);
3578 size += sizeof(void *);
3579
3580 buffer = kzalloc(size, GFP_KERNEL);
3581 if (!buffer)
3582 goto fail;
3583
3584 INIT_WORK(&buffer->work, perf_buffer_free_work);
3585
3586 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
3587 if (!all_buf)
3588 goto fail_all_buf;
3589
3590 buffer->user_page = all_buf;
3591 buffer->data_pages[0] = all_buf + PAGE_SIZE;
3592 buffer->page_order = ilog2(nr_pages);
3593 buffer->nr_pages = 1;
3594
3595 perf_buffer_init(buffer, watermark, flags);
3596
3597 return buffer;
3598
3599fail_all_buf:
3600 kfree(buffer);
3601
3602fail:
3603 return NULL;
3604}
3605
3606#endif
3607
3608static unsigned long perf_data_size(struct perf_buffer *buffer)
3609{
3610 return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
3611}
3612
3613static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 3420static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3614{ 3421{
3615 struct perf_event *event = vma->vm_file->private_data; 3422 struct perf_event *event = vma->vm_file->private_data;
3616 struct perf_buffer *buffer; 3423 struct ring_buffer *rb;
3617 int ret = VM_FAULT_SIGBUS; 3424 int ret = VM_FAULT_SIGBUS;
3618 3425
3619 if (vmf->flags & FAULT_FLAG_MKWRITE) { 3426 if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3623 } 3430 }
3624 3431
3625 rcu_read_lock(); 3432 rcu_read_lock();
3626 buffer = rcu_dereference(event->buffer); 3433 rb = rcu_dereference(event->rb);
3627 if (!buffer) 3434 if (!rb)
3628 goto unlock; 3435 goto unlock;
3629 3436
3630 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) 3437 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
3631 goto unlock; 3438 goto unlock;
3632 3439
3633 vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); 3440 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
3634 if (!vmf->page) 3441 if (!vmf->page)
3635 goto unlock; 3442 goto unlock;
3636 3443
@@ -3645,35 +3452,35 @@ unlock:
3645 return ret; 3452 return ret;
3646} 3453}
3647 3454
3648static void perf_buffer_free_rcu(struct rcu_head *rcu_head) 3455static void rb_free_rcu(struct rcu_head *rcu_head)
3649{ 3456{
3650 struct perf_buffer *buffer; 3457 struct ring_buffer *rb;
3651 3458
3652 buffer = container_of(rcu_head, struct perf_buffer, rcu_head); 3459 rb = container_of(rcu_head, struct ring_buffer, rcu_head);
3653 perf_buffer_free(buffer); 3460 rb_free(rb);
3654} 3461}
3655 3462
3656static struct perf_buffer *perf_buffer_get(struct perf_event *event) 3463static struct ring_buffer *ring_buffer_get(struct perf_event *event)
3657{ 3464{
3658 struct perf_buffer *buffer; 3465 struct ring_buffer *rb;
3659 3466
3660 rcu_read_lock(); 3467 rcu_read_lock();
3661 buffer = rcu_dereference(event->buffer); 3468 rb = rcu_dereference(event->rb);
3662 if (buffer) { 3469 if (rb) {
3663 if (!atomic_inc_not_zero(&buffer->refcount)) 3470 if (!atomic_inc_not_zero(&rb->refcount))
3664 buffer = NULL; 3471 rb = NULL;
3665 } 3472 }
3666 rcu_read_unlock(); 3473 rcu_read_unlock();
3667 3474
3668 return buffer; 3475 return rb;
3669} 3476}
3670 3477
3671static void perf_buffer_put(struct perf_buffer *buffer) 3478static void ring_buffer_put(struct ring_buffer *rb)
3672{ 3479{
3673 if (!atomic_dec_and_test(&buffer->refcount)) 3480 if (!atomic_dec_and_test(&rb->refcount))
3674 return; 3481 return;
3675 3482
3676 call_rcu(&buffer->rcu_head, perf_buffer_free_rcu); 3483 call_rcu(&rb->rcu_head, rb_free_rcu);
3677} 3484}
3678 3485
3679static void perf_mmap_open(struct vm_area_struct *vma) 3486static void perf_mmap_open(struct vm_area_struct *vma)
@@ -3688,16 +3495,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3688 struct perf_event *event = vma->vm_file->private_data; 3495 struct perf_event *event = vma->vm_file->private_data;
3689 3496
3690 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 3497 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
3691 unsigned long size = perf_data_size(event->buffer); 3498 unsigned long size = perf_data_size(event->rb);
3692 struct user_struct *user = event->mmap_user; 3499 struct user_struct *user = event->mmap_user;
3693 struct perf_buffer *buffer = event->buffer; 3500 struct ring_buffer *rb = event->rb;
3694 3501
3695 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 3502 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
3696 vma->vm_mm->locked_vm -= event->mmap_locked; 3503 vma->vm_mm->locked_vm -= event->mmap_locked;
3697 rcu_assign_pointer(event->buffer, NULL); 3504 rcu_assign_pointer(event->rb, NULL);
3698 mutex_unlock(&event->mmap_mutex); 3505 mutex_unlock(&event->mmap_mutex);
3699 3506
3700 perf_buffer_put(buffer); 3507 ring_buffer_put(rb);
3701 free_uid(user); 3508 free_uid(user);
3702 } 3509 }
3703} 3510}
@@ -3715,7 +3522,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3715 unsigned long user_locked, user_lock_limit; 3522 unsigned long user_locked, user_lock_limit;
3716 struct user_struct *user = current_user(); 3523 struct user_struct *user = current_user();
3717 unsigned long locked, lock_limit; 3524 unsigned long locked, lock_limit;
3718 struct perf_buffer *buffer; 3525 struct ring_buffer *rb;
3719 unsigned long vma_size; 3526 unsigned long vma_size;
3720 unsigned long nr_pages; 3527 unsigned long nr_pages;
3721 long user_extra, extra; 3528 long user_extra, extra;
@@ -3724,7 +3531,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3724 /* 3531 /*
3725 * Don't allow mmap() of inherited per-task counters. This would 3532 * Don't allow mmap() of inherited per-task counters. This would
3726 * create a performance issue due to all children writing to the 3533 * create a performance issue due to all children writing to the
3727 * same buffer. 3534 * same rb.
3728 */ 3535 */
3729 if (event->cpu == -1 && event->attr.inherit) 3536 if (event->cpu == -1 && event->attr.inherit)
3730 return -EINVAL; 3537 return -EINVAL;
@@ -3736,7 +3543,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3736 nr_pages = (vma_size / PAGE_SIZE) - 1; 3543 nr_pages = (vma_size / PAGE_SIZE) - 1;
3737 3544
3738 /* 3545 /*
3739 * If we have buffer pages ensure they're a power-of-two number, so we 3546 * If we have rb pages ensure they're a power-of-two number, so we
3740 * can do bitmasks instead of modulo. 3547 * can do bitmasks instead of modulo.
3741 */ 3548 */
3742 if (nr_pages != 0 && !is_power_of_2(nr_pages)) 3549 if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -3750,9 +3557,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3750 3557
3751 WARN_ON_ONCE(event->ctx->parent_ctx); 3558 WARN_ON_ONCE(event->ctx->parent_ctx);
3752 mutex_lock(&event->mmap_mutex); 3559 mutex_lock(&event->mmap_mutex);
3753 if (event->buffer) { 3560 if (event->rb) {
3754 if (event->buffer->nr_pages == nr_pages) 3561 if (event->rb->nr_pages == nr_pages)
3755 atomic_inc(&event->buffer->refcount); 3562 atomic_inc(&event->rb->refcount);
3756 else 3563 else
3757 ret = -EINVAL; 3564 ret = -EINVAL;
3758 goto unlock; 3565 goto unlock;
@@ -3782,18 +3589,20 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3782 goto unlock; 3589 goto unlock;
3783 } 3590 }
3784 3591
3785 WARN_ON(event->buffer); 3592 WARN_ON(event->rb);
3786 3593
3787 if (vma->vm_flags & VM_WRITE) 3594 if (vma->vm_flags & VM_WRITE)
3788 flags |= PERF_BUFFER_WRITABLE; 3595 flags |= RING_BUFFER_WRITABLE;
3789 3596
3790 buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark, 3597 rb = rb_alloc(nr_pages,
3791 event->cpu, flags); 3598 event->attr.watermark ? event->attr.wakeup_watermark : 0,
3792 if (!buffer) { 3599 event->cpu, flags);
3600
3601 if (!rb) {
3793 ret = -ENOMEM; 3602 ret = -ENOMEM;
3794 goto unlock; 3603 goto unlock;
3795 } 3604 }
3796 rcu_assign_pointer(event->buffer, buffer); 3605 rcu_assign_pointer(event->rb, rb);
3797 3606
3798 atomic_long_add(user_extra, &user->locked_vm); 3607 atomic_long_add(user_extra, &user->locked_vm);
3799 event->mmap_locked = extra; 3608 event->mmap_locked = extra;
@@ -3892,117 +3701,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
3892} 3701}
3893EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); 3702EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
3894 3703
3895/*
3896 * Output
3897 */
3898static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
3899 unsigned long offset, unsigned long head)
3900{
3901 unsigned long mask;
3902
3903 if (!buffer->writable)
3904 return true;
3905
3906 mask = perf_data_size(buffer) - 1;
3907
3908 offset = (offset - tail) & mask;
3909 head = (head - tail) & mask;
3910
3911 if ((int)(head - offset) < 0)
3912 return false;
3913
3914 return true;
3915}
3916
3917static void perf_output_wakeup(struct perf_output_handle *handle)
3918{
3919 atomic_set(&handle->buffer->poll, POLL_IN);
3920
3921 if (handle->nmi) {
3922 handle->event->pending_wakeup = 1;
3923 irq_work_queue(&handle->event->pending);
3924 } else
3925 perf_event_wakeup(handle->event);
3926}
3927
3928/*
3929 * We need to ensure a later event_id doesn't publish a head when a former
3930 * event isn't done writing. However since we need to deal with NMIs we
3931 * cannot fully serialize things.
3932 *
3933 * We only publish the head (and generate a wakeup) when the outer-most
3934 * event completes.
3935 */
3936static void perf_output_get_handle(struct perf_output_handle *handle)
3937{
3938 struct perf_buffer *buffer = handle->buffer;
3939
3940 preempt_disable();
3941 local_inc(&buffer->nest);
3942 handle->wakeup = local_read(&buffer->wakeup);
3943}
3944
3945static void perf_output_put_handle(struct perf_output_handle *handle)
3946{
3947 struct perf_buffer *buffer = handle->buffer;
3948 unsigned long head;
3949
3950again:
3951 head = local_read(&buffer->head);
3952
3953 /*
3954 * IRQ/NMI can happen here, which means we can miss a head update.
3955 */
3956
3957 if (!local_dec_and_test(&buffer->nest))
3958 goto out;
3959
3960 /*
3961 * Publish the known good head. Rely on the full barrier implied
3962 * by atomic_dec_and_test() order the buffer->head read and this
3963 * write.
3964 */
3965 buffer->user_page->data_head = head;
3966
3967 /*
3968 * Now check if we missed an update, rely on the (compiler)
3969 * barrier in atomic_dec_and_test() to re-read buffer->head.
3970 */
3971 if (unlikely(head != local_read(&buffer->head))) {
3972 local_inc(&buffer->nest);
3973 goto again;
3974 }
3975
3976 if (handle->wakeup != local_read(&buffer->wakeup))
3977 perf_output_wakeup(handle);
3978
3979out:
3980 preempt_enable();
3981}
3982
3983__always_inline void perf_output_copy(struct perf_output_handle *handle,
3984 const void *buf, unsigned int len)
3985{
3986 do {
3987 unsigned long size = min_t(unsigned long, handle->size, len);
3988
3989 memcpy(handle->addr, buf, size);
3990
3991 len -= size;
3992 handle->addr += size;
3993 buf += size;
3994 handle->size -= size;
3995 if (!handle->size) {
3996 struct perf_buffer *buffer = handle->buffer;
3997
3998 handle->page++;
3999 handle->page &= buffer->nr_pages - 1;
4000 handle->addr = buffer->data_pages[handle->page];
4001 handle->size = PAGE_SIZE << page_order(buffer);
4002 }
4003 } while (len);
4004}
4005
4006static void __perf_event_header__init_id(struct perf_event_header *header, 3704static void __perf_event_header__init_id(struct perf_event_header *header,
4007 struct perf_sample_data *data, 3705 struct perf_sample_data *data,
4008 struct perf_event *event) 3706 struct perf_event *event)
@@ -4033,9 +3731,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
4033 } 3731 }
4034} 3732}
4035 3733
4036static void perf_event_header__init_id(struct perf_event_header *header, 3734void perf_event_header__init_id(struct perf_event_header *header,
4037 struct perf_sample_data *data, 3735 struct perf_sample_data *data,
4038 struct perf_event *event) 3736 struct perf_event *event)
4039{ 3737{
4040 if (event->attr.sample_id_all) 3738 if (event->attr.sample_id_all)
4041 __perf_event_header__init_id(header, data, event); 3739 __perf_event_header__init_id(header, data, event);
@@ -4062,121 +3760,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
4062 perf_output_put(handle, data->cpu_entry); 3760 perf_output_put(handle, data->cpu_entry);
4063} 3761}
4064 3762
4065static void perf_event__output_id_sample(struct perf_event *event, 3763void perf_event__output_id_sample(struct perf_event *event,
4066 struct perf_output_handle *handle, 3764 struct perf_output_handle *handle,
4067 struct perf_sample_data *sample) 3765 struct perf_sample_data *sample)
4068{ 3766{
4069 if (event->attr.sample_id_all) 3767 if (event->attr.sample_id_all)
4070 __perf_event__output_id_sample(handle, sample); 3768 __perf_event__output_id_sample(handle, sample);
4071} 3769}
4072 3770
4073int perf_output_begin(struct perf_output_handle *handle,
4074 struct perf_event *event, unsigned int size,
4075 int nmi, int sample)
4076{
4077 struct perf_buffer *buffer;
4078 unsigned long tail, offset, head;
4079 int have_lost;
4080 struct perf_sample_data sample_data;
4081 struct {
4082 struct perf_event_header header;
4083 u64 id;
4084 u64 lost;
4085 } lost_event;
4086
4087 rcu_read_lock();
4088 /*
4089 * For inherited events we send all the output towards the parent.
4090 */
4091 if (event->parent)
4092 event = event->parent;
4093
4094 buffer = rcu_dereference(event->buffer);
4095 if (!buffer)
4096 goto out;
4097
4098 handle->buffer = buffer;
4099 handle->event = event;
4100 handle->nmi = nmi;
4101 handle->sample = sample;
4102
4103 if (!buffer->nr_pages)
4104 goto out;
4105
4106 have_lost = local_read(&buffer->lost);
4107 if (have_lost) {
4108 lost_event.header.size = sizeof(lost_event);
4109 perf_event_header__init_id(&lost_event.header, &sample_data,
4110 event);
4111 size += lost_event.header.size;
4112 }
4113
4114 perf_output_get_handle(handle);
4115
4116 do {
4117 /*
4118 * Userspace could choose to issue a mb() before updating the
4119 * tail pointer. So that all reads will be completed before the
4120 * write is issued.
4121 */
4122 tail = ACCESS_ONCE(buffer->user_page->data_tail);
4123 smp_rmb();
4124 offset = head = local_read(&buffer->head);
4125 head += size;
4126 if (unlikely(!perf_output_space(buffer, tail, offset, head)))
4127 goto fail;
4128 } while (local_cmpxchg(&buffer->head, offset, head) != offset);
4129
4130 if (head - local_read(&buffer->wakeup) > buffer->watermark)
4131 local_add(buffer->watermark, &buffer->wakeup);
4132
4133 handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
4134 handle->page &= buffer->nr_pages - 1;
4135 handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
4136 handle->addr = buffer->data_pages[handle->page];
4137 handle->addr += handle->size;
4138 handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
4139
4140 if (have_lost) {
4141 lost_event.header.type = PERF_RECORD_LOST;
4142 lost_event.header.misc = 0;
4143 lost_event.id = event->id;
4144 lost_event.lost = local_xchg(&buffer->lost, 0);
4145
4146 perf_output_put(handle, lost_event);
4147 perf_event__output_id_sample(event, handle, &sample_data);
4148 }
4149
4150 return 0;
4151
4152fail:
4153 local_inc(&buffer->lost);
4154 perf_output_put_handle(handle);
4155out:
4156 rcu_read_unlock();
4157
4158 return -ENOSPC;
4159}
4160
4161void perf_output_end(struct perf_output_handle *handle)
4162{
4163 struct perf_event *event = handle->event;
4164 struct perf_buffer *buffer = handle->buffer;
4165
4166 int wakeup_events = event->attr.wakeup_events;
4167
4168 if (handle->sample && wakeup_events) {
4169 int events = local_inc_return(&buffer->events);
4170 if (events >= wakeup_events) {
4171 local_sub(wakeup_events, &buffer->events);
4172 local_inc(&buffer->wakeup);
4173 }
4174 }
4175
4176 perf_output_put_handle(handle);
4177 rcu_read_unlock();
4178}
4179
4180static void perf_output_read_one(struct perf_output_handle *handle, 3771static void perf_output_read_one(struct perf_output_handle *handle,
4181 struct perf_event *event, 3772 struct perf_event *event,
4182 u64 enabled, u64 running) 3773 u64 enabled, u64 running)
@@ -4197,7 +3788,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
4197 if (read_format & PERF_FORMAT_ID) 3788 if (read_format & PERF_FORMAT_ID)
4198 values[n++] = primary_event_id(event); 3789 values[n++] = primary_event_id(event);
4199 3790
4200 perf_output_copy(handle, values, n * sizeof(u64)); 3791 __output_copy(handle, values, n * sizeof(u64));
4201} 3792}
4202 3793
4203/* 3794/*
@@ -4227,7 +3818,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
4227 if (read_format & PERF_FORMAT_ID) 3818 if (read_format & PERF_FORMAT_ID)
4228 values[n++] = primary_event_id(leader); 3819 values[n++] = primary_event_id(leader);
4229 3820
4230 perf_output_copy(handle, values, n * sizeof(u64)); 3821 __output_copy(handle, values, n * sizeof(u64));
4231 3822
4232 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 3823 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
4233 n = 0; 3824 n = 0;
@@ -4239,7 +3830,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
4239 if (read_format & PERF_FORMAT_ID) 3830 if (read_format & PERF_FORMAT_ID)
4240 values[n++] = primary_event_id(sub); 3831 values[n++] = primary_event_id(sub);
4241 3832
4242 perf_output_copy(handle, values, n * sizeof(u64)); 3833 __output_copy(handle, values, n * sizeof(u64));
4243 } 3834 }
4244} 3835}
4245 3836
@@ -4249,7 +3840,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
4249static void perf_output_read(struct perf_output_handle *handle, 3840static void perf_output_read(struct perf_output_handle *handle,
4250 struct perf_event *event) 3841 struct perf_event *event)
4251{ 3842{
4252 u64 enabled = 0, running = 0, now, ctx_time; 3843 u64 enabled = 0, running = 0;
4253 u64 read_format = event->attr.read_format; 3844 u64 read_format = event->attr.read_format;
4254 3845
4255 /* 3846 /*
@@ -4261,12 +3852,8 @@ static void perf_output_read(struct perf_output_handle *handle,
4261 * because of locking issue as we are called in 3852 * because of locking issue as we are called in
4262 * NMI context 3853 * NMI context
4263 */ 3854 */
4264 if (read_format & PERF_FORMAT_TOTAL_TIMES) { 3855 if (read_format & PERF_FORMAT_TOTAL_TIMES)
4265 now = perf_clock(); 3856 calc_timer_values(event, &enabled, &running);
4266 ctx_time = event->shadow_ctx_time + now;
4267 enabled = ctx_time - event->tstamp_enabled;
4268 running = ctx_time - event->tstamp_running;
4269 }
4270 3857
4271 if (event->attr.read_format & PERF_FORMAT_GROUP) 3858 if (event->attr.read_format & PERF_FORMAT_GROUP)
4272 perf_output_read_group(handle, event, enabled, running); 3859 perf_output_read_group(handle, event, enabled, running);
@@ -4319,7 +3906,7 @@ void perf_output_sample(struct perf_output_handle *handle,
4319 3906
4320 size *= sizeof(u64); 3907 size *= sizeof(u64);
4321 3908
4322 perf_output_copy(handle, data->callchain, size); 3909 __output_copy(handle, data->callchain, size);
4323 } else { 3910 } else {
4324 u64 nr = 0; 3911 u64 nr = 0;
4325 perf_output_put(handle, nr); 3912 perf_output_put(handle, nr);
@@ -4329,8 +3916,8 @@ void perf_output_sample(struct perf_output_handle *handle,
4329 if (sample_type & PERF_SAMPLE_RAW) { 3916 if (sample_type & PERF_SAMPLE_RAW) {
4330 if (data->raw) { 3917 if (data->raw) {
4331 perf_output_put(handle, data->raw->size); 3918 perf_output_put(handle, data->raw->size);
4332 perf_output_copy(handle, data->raw->data, 3919 __output_copy(handle, data->raw->data,
4333 data->raw->size); 3920 data->raw->size);
4334 } else { 3921 } else {
4335 struct { 3922 struct {
4336 u32 size; 3923 u32 size;
@@ -4342,6 +3929,20 @@ void perf_output_sample(struct perf_output_handle *handle,
4342 perf_output_put(handle, raw); 3929 perf_output_put(handle, raw);
4343 } 3930 }
4344 } 3931 }
3932
3933 if (!event->attr.watermark) {
3934 int wakeup_events = event->attr.wakeup_events;
3935
3936 if (wakeup_events) {
3937 struct ring_buffer *rb = handle->rb;
3938 int events = local_inc_return(&rb->events);
3939
3940 if (events >= wakeup_events) {
3941 local_sub(wakeup_events, &rb->events);
3942 local_inc(&rb->wakeup);
3943 }
3944 }
3945 }
4345} 3946}
4346 3947
4347void perf_prepare_sample(struct perf_event_header *header, 3948void perf_prepare_sample(struct perf_event_header *header,
@@ -4386,7 +3987,7 @@ void perf_prepare_sample(struct perf_event_header *header,
4386 } 3987 }
4387} 3988}
4388 3989
4389static void perf_event_output(struct perf_event *event, int nmi, 3990static void perf_event_output(struct perf_event *event,
4390 struct perf_sample_data *data, 3991 struct perf_sample_data *data,
4391 struct pt_regs *regs) 3992 struct pt_regs *regs)
4392{ 3993{
@@ -4398,7 +3999,7 @@ static void perf_event_output(struct perf_event *event, int nmi,
4398 3999
4399 perf_prepare_sample(&header, data, event, regs); 4000 perf_prepare_sample(&header, data, event, regs);
4400 4001
4401 if (perf_output_begin(&handle, event, header.size, nmi, 1)) 4002 if (perf_output_begin(&handle, event, header.size))
4402 goto exit; 4003 goto exit;
4403 4004
4404 perf_output_sample(&handle, &header, data, event); 4005 perf_output_sample(&handle, &header, data, event);
@@ -4438,7 +4039,7 @@ perf_event_read_event(struct perf_event *event,
4438 int ret; 4039 int ret;
4439 4040
4440 perf_event_header__init_id(&read_event.header, &sample, event); 4041 perf_event_header__init_id(&read_event.header, &sample, event);
4441 ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0); 4042 ret = perf_output_begin(&handle, event, read_event.header.size);
4442 if (ret) 4043 if (ret)
4443 return; 4044 return;
4444 4045
@@ -4481,7 +4082,7 @@ static void perf_event_task_output(struct perf_event *event,
4481 perf_event_header__init_id(&task_event->event_id.header, &sample, event); 4082 perf_event_header__init_id(&task_event->event_id.header, &sample, event);
4482 4083
4483 ret = perf_output_begin(&handle, event, 4084 ret = perf_output_begin(&handle, event,
4484 task_event->event_id.header.size, 0, 0); 4085 task_event->event_id.header.size);
4485 if (ret) 4086 if (ret)
4486 goto out; 4087 goto out;
4487 4088
@@ -4618,7 +4219,7 @@ static void perf_event_comm_output(struct perf_event *event,
4618 4219
4619 perf_event_header__init_id(&comm_event->event_id.header, &sample, event); 4220 perf_event_header__init_id(&comm_event->event_id.header, &sample, event);
4620 ret = perf_output_begin(&handle, event, 4221 ret = perf_output_begin(&handle, event,
4621 comm_event->event_id.header.size, 0, 0); 4222 comm_event->event_id.header.size);
4622 4223
4623 if (ret) 4224 if (ret)
4624 goto out; 4225 goto out;
@@ -4627,7 +4228,7 @@ static void perf_event_comm_output(struct perf_event *event,
4627 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); 4228 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
4628 4229
4629 perf_output_put(&handle, comm_event->event_id); 4230 perf_output_put(&handle, comm_event->event_id);
4630 perf_output_copy(&handle, comm_event->comm, 4231 __output_copy(&handle, comm_event->comm,
4631 comm_event->comm_size); 4232 comm_event->comm_size);
4632 4233
4633 perf_event__output_id_sample(event, &handle, &sample); 4234 perf_event__output_id_sample(event, &handle, &sample);
@@ -4765,7 +4366,7 @@ static void perf_event_mmap_output(struct perf_event *event,
4765 4366
4766 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); 4367 perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
4767 ret = perf_output_begin(&handle, event, 4368 ret = perf_output_begin(&handle, event,
4768 mmap_event->event_id.header.size, 0, 0); 4369 mmap_event->event_id.header.size);
4769 if (ret) 4370 if (ret)
4770 goto out; 4371 goto out;
4771 4372
@@ -4773,7 +4374,7 @@ static void perf_event_mmap_output(struct perf_event *event,
4773 mmap_event->event_id.tid = perf_event_tid(event, current); 4374 mmap_event->event_id.tid = perf_event_tid(event, current);
4774 4375
4775 perf_output_put(&handle, mmap_event->event_id); 4376 perf_output_put(&handle, mmap_event->event_id);
4776 perf_output_copy(&handle, mmap_event->file_name, 4377 __output_copy(&handle, mmap_event->file_name,
4777 mmap_event->file_size); 4378 mmap_event->file_size);
4778 4379
4779 perf_event__output_id_sample(event, &handle, &sample); 4380 perf_event__output_id_sample(event, &handle, &sample);
@@ -4829,7 +4430,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
4829 4430
4830 if (file) { 4431 if (file) {
4831 /* 4432 /*
4832 * d_path works from the end of the buffer backwards, so we 4433 * d_path works from the end of the rb backwards, so we
4833 * need to add enough zero bytes after the string to handle 4434 * need to add enough zero bytes after the string to handle
4834 * the 64bit alignment we do later. 4435 * the 64bit alignment we do later.
4835 */ 4436 */
@@ -4960,7 +4561,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
4960 perf_event_header__init_id(&throttle_event.header, &sample, event); 4561 perf_event_header__init_id(&throttle_event.header, &sample, event);
4961 4562
4962 ret = perf_output_begin(&handle, event, 4563 ret = perf_output_begin(&handle, event,
4963 throttle_event.header.size, 1, 0); 4564 throttle_event.header.size);
4964 if (ret) 4565 if (ret)
4965 return; 4566 return;
4966 4567
@@ -4973,7 +4574,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
4973 * Generic event overflow handling, sampling. 4574 * Generic event overflow handling, sampling.
4974 */ 4575 */
4975 4576
4976static int __perf_event_overflow(struct perf_event *event, int nmi, 4577static int __perf_event_overflow(struct perf_event *event,
4977 int throttle, struct perf_sample_data *data, 4578 int throttle, struct perf_sample_data *data,
4978 struct pt_regs *regs) 4579 struct pt_regs *regs)
4979{ 4580{
@@ -5016,34 +4617,28 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
5016 if (events && atomic_dec_and_test(&event->event_limit)) { 4617 if (events && atomic_dec_and_test(&event->event_limit)) {
5017 ret = 1; 4618 ret = 1;
5018 event->pending_kill = POLL_HUP; 4619 event->pending_kill = POLL_HUP;
5019 if (nmi) { 4620 event->pending_disable = 1;
5020 event->pending_disable = 1; 4621 irq_work_queue(&event->pending);
5021 irq_work_queue(&event->pending);
5022 } else
5023 perf_event_disable(event);
5024 } 4622 }
5025 4623
5026 if (event->overflow_handler) 4624 if (event->overflow_handler)
5027 event->overflow_handler(event, nmi, data, regs); 4625 event->overflow_handler(event, data, regs);
5028 else 4626 else
5029 perf_event_output(event, nmi, data, regs); 4627 perf_event_output(event, data, regs);
5030 4628
5031 if (event->fasync && event->pending_kill) { 4629 if (event->fasync && event->pending_kill) {
5032 if (nmi) { 4630 event->pending_wakeup = 1;
5033 event->pending_wakeup = 1; 4631 irq_work_queue(&event->pending);
5034 irq_work_queue(&event->pending);
5035 } else
5036 perf_event_wakeup(event);
5037 } 4632 }
5038 4633
5039 return ret; 4634 return ret;
5040} 4635}
5041 4636
5042int perf_event_overflow(struct perf_event *event, int nmi, 4637int perf_event_overflow(struct perf_event *event,
5043 struct perf_sample_data *data, 4638 struct perf_sample_data *data,
5044 struct pt_regs *regs) 4639 struct pt_regs *regs)
5045{ 4640{
5046 return __perf_event_overflow(event, nmi, 1, data, regs); 4641 return __perf_event_overflow(event, 1, data, regs);
5047} 4642}
5048 4643
5049/* 4644/*
@@ -5092,7 +4687,7 @@ again:
5092} 4687}
5093 4688
5094static void perf_swevent_overflow(struct perf_event *event, u64 overflow, 4689static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
5095 int nmi, struct perf_sample_data *data, 4690 struct perf_sample_data *data,
5096 struct pt_regs *regs) 4691 struct pt_regs *regs)
5097{ 4692{
5098 struct hw_perf_event *hwc = &event->hw; 4693 struct hw_perf_event *hwc = &event->hw;
@@ -5106,7 +4701,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
5106 return; 4701 return;
5107 4702
5108 for (; overflow; overflow--) { 4703 for (; overflow; overflow--) {
5109 if (__perf_event_overflow(event, nmi, throttle, 4704 if (__perf_event_overflow(event, throttle,
5110 data, regs)) { 4705 data, regs)) {
5111 /* 4706 /*
5112 * We inhibit the overflow from happening when 4707 * We inhibit the overflow from happening when
@@ -5119,7 +4714,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
5119} 4714}
5120 4715
5121static void perf_swevent_event(struct perf_event *event, u64 nr, 4716static void perf_swevent_event(struct perf_event *event, u64 nr,
5122 int nmi, struct perf_sample_data *data, 4717 struct perf_sample_data *data,
5123 struct pt_regs *regs) 4718 struct pt_regs *regs)
5124{ 4719{
5125 struct hw_perf_event *hwc = &event->hw; 4720 struct hw_perf_event *hwc = &event->hw;
@@ -5133,12 +4728,12 @@ static void perf_swevent_event(struct perf_event *event, u64 nr,
5133 return; 4728 return;
5134 4729
5135 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) 4730 if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
5136 return perf_swevent_overflow(event, 1, nmi, data, regs); 4731 return perf_swevent_overflow(event, 1, data, regs);
5137 4732
5138 if (local64_add_negative(nr, &hwc->period_left)) 4733 if (local64_add_negative(nr, &hwc->period_left))
5139 return; 4734 return;
5140 4735
5141 perf_swevent_overflow(event, 0, nmi, data, regs); 4736 perf_swevent_overflow(event, 0, data, regs);
5142} 4737}
5143 4738
5144static int perf_exclude_event(struct perf_event *event, 4739static int perf_exclude_event(struct perf_event *event,
@@ -5226,7 +4821,7 @@ find_swevent_head(struct swevent_htable *swhash, struct perf_event *event)
5226} 4821}
5227 4822
5228static void do_perf_sw_event(enum perf_type_id type, u32 event_id, 4823static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
5229 u64 nr, int nmi, 4824 u64 nr,
5230 struct perf_sample_data *data, 4825 struct perf_sample_data *data,
5231 struct pt_regs *regs) 4826 struct pt_regs *regs)
5232{ 4827{
@@ -5242,7 +4837,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
5242 4837
5243 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 4838 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
5244 if (perf_swevent_match(event, type, event_id, data, regs)) 4839 if (perf_swevent_match(event, type, event_id, data, regs))
5245 perf_swevent_event(event, nr, nmi, data, regs); 4840 perf_swevent_event(event, nr, data, regs);
5246 } 4841 }
5247end: 4842end:
5248 rcu_read_unlock(); 4843 rcu_read_unlock();
@@ -5263,8 +4858,7 @@ inline void perf_swevent_put_recursion_context(int rctx)
5263 put_recursion_context(swhash->recursion, rctx); 4858 put_recursion_context(swhash->recursion, rctx);
5264} 4859}
5265 4860
5266void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4861void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
5267 struct pt_regs *regs, u64 addr)
5268{ 4862{
5269 struct perf_sample_data data; 4863 struct perf_sample_data data;
5270 int rctx; 4864 int rctx;
@@ -5276,7 +4870,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
5276 4870
5277 perf_sample_data_init(&data, addr); 4871 perf_sample_data_init(&data, addr);
5278 4872
5279 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); 4873 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
5280 4874
5281 perf_swevent_put_recursion_context(rctx); 4875 perf_swevent_put_recursion_context(rctx);
5282 preempt_enable_notrace(); 4876 preempt_enable_notrace();
@@ -5524,7 +5118,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
5524 5118
5525 hlist_for_each_entry_rcu(event, node, head, hlist_entry) { 5119 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
5526 if (perf_tp_event_match(event, &data, regs)) 5120 if (perf_tp_event_match(event, &data, regs))
5527 perf_swevent_event(event, count, 1, &data, regs); 5121 perf_swevent_event(event, count, &data, regs);
5528 } 5122 }
5529 5123
5530 perf_swevent_put_recursion_context(rctx); 5124 perf_swevent_put_recursion_context(rctx);
@@ -5617,7 +5211,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
5617 perf_sample_data_init(&sample, bp->attr.bp_addr); 5211 perf_sample_data_init(&sample, bp->attr.bp_addr);
5618 5212
5619 if (!bp->hw.state && !perf_exclude_event(bp, regs)) 5213 if (!bp->hw.state && !perf_exclude_event(bp, regs))
5620 perf_swevent_event(bp, 1, 1, &sample, regs); 5214 perf_swevent_event(bp, 1, &sample, regs);
5621} 5215}
5622#endif 5216#endif
5623 5217
@@ -5646,7 +5240,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
5646 5240
5647 if (regs && !perf_exclude_event(event, regs)) { 5241 if (regs && !perf_exclude_event(event, regs)) {
5648 if (!(event->attr.exclude_idle && current->pid == 0)) 5242 if (!(event->attr.exclude_idle && current->pid == 0))
5649 if (perf_event_overflow(event, 0, &data, regs)) 5243 if (perf_event_overflow(event, &data, regs))
5650 ret = HRTIMER_NORESTART; 5244 ret = HRTIMER_NORESTART;
5651 } 5245 }
5652 5246
@@ -5986,6 +5580,7 @@ free_dev:
5986} 5580}
5987 5581
5988static struct lock_class_key cpuctx_mutex; 5582static struct lock_class_key cpuctx_mutex;
5583static struct lock_class_key cpuctx_lock;
5989 5584
5990int perf_pmu_register(struct pmu *pmu, char *name, int type) 5585int perf_pmu_register(struct pmu *pmu, char *name, int type)
5991{ 5586{
@@ -6036,6 +5631,7 @@ skip_type:
6036 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5631 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
6037 __perf_event_init_context(&cpuctx->ctx); 5632 __perf_event_init_context(&cpuctx->ctx);
6038 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); 5633 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
5634 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
6039 cpuctx->ctx.type = cpu_context; 5635 cpuctx->ctx.type = cpu_context;
6040 cpuctx->ctx.pmu = pmu; 5636 cpuctx->ctx.pmu = pmu;
6041 cpuctx->jiffies_interval = 1; 5637 cpuctx->jiffies_interval = 1;
@@ -6150,7 +5746,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6150 struct task_struct *task, 5746 struct task_struct *task,
6151 struct perf_event *group_leader, 5747 struct perf_event *group_leader,
6152 struct perf_event *parent_event, 5748 struct perf_event *parent_event,
6153 perf_overflow_handler_t overflow_handler) 5749 perf_overflow_handler_t overflow_handler,
5750 void *context)
6154{ 5751{
6155 struct pmu *pmu; 5752 struct pmu *pmu;
6156 struct perf_event *event; 5753 struct perf_event *event;
@@ -6208,10 +5805,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
6208#endif 5805#endif
6209 } 5806 }
6210 5807
6211 if (!overflow_handler && parent_event) 5808 if (!overflow_handler && parent_event) {
6212 overflow_handler = parent_event->overflow_handler; 5809 overflow_handler = parent_event->overflow_handler;
5810 context = parent_event->overflow_handler_context;
5811 }
6213 5812
6214 event->overflow_handler = overflow_handler; 5813 event->overflow_handler = overflow_handler;
5814 event->overflow_handler_context = context;
6215 5815
6216 if (attr->disabled) 5816 if (attr->disabled)
6217 event->state = PERF_EVENT_STATE_OFF; 5817 event->state = PERF_EVENT_STATE_OFF;
@@ -6326,13 +5926,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
6326 if (ret) 5926 if (ret)
6327 return -EFAULT; 5927 return -EFAULT;
6328 5928
6329 /*
6330 * If the type exists, the corresponding creation will verify
6331 * the attr->config.
6332 */
6333 if (attr->type >= PERF_TYPE_MAX)
6334 return -EINVAL;
6335
6336 if (attr->__reserved_1) 5929 if (attr->__reserved_1)
6337 return -EINVAL; 5930 return -EINVAL;
6338 5931
@@ -6354,7 +5947,7 @@ err_size:
6354static int 5947static int
6355perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 5948perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6356{ 5949{
6357 struct perf_buffer *buffer = NULL, *old_buffer = NULL; 5950 struct ring_buffer *rb = NULL, *old_rb = NULL;
6358 int ret = -EINVAL; 5951 int ret = -EINVAL;
6359 5952
6360 if (!output_event) 5953 if (!output_event)
@@ -6371,7 +5964,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6371 goto out; 5964 goto out;
6372 5965
6373 /* 5966 /*
6374 * If its not a per-cpu buffer, it must be the same task. 5967 * If its not a per-cpu rb, it must be the same task.
6375 */ 5968 */
6376 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5969 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
6377 goto out; 5970 goto out;
@@ -6383,20 +5976,20 @@ set:
6383 goto unlock; 5976 goto unlock;
6384 5977
6385 if (output_event) { 5978 if (output_event) {
6386 /* get the buffer we want to redirect to */ 5979 /* get the rb we want to redirect to */
6387 buffer = perf_buffer_get(output_event); 5980 rb = ring_buffer_get(output_event);
6388 if (!buffer) 5981 if (!rb)
6389 goto unlock; 5982 goto unlock;
6390 } 5983 }
6391 5984
6392 old_buffer = event->buffer; 5985 old_rb = event->rb;
6393 rcu_assign_pointer(event->buffer, buffer); 5986 rcu_assign_pointer(event->rb, rb);
6394 ret = 0; 5987 ret = 0;
6395unlock: 5988unlock:
6396 mutex_unlock(&event->mmap_mutex); 5989 mutex_unlock(&event->mmap_mutex);
6397 5990
6398 if (old_buffer) 5991 if (old_rb)
6399 perf_buffer_put(old_buffer); 5992 ring_buffer_put(old_rb);
6400out: 5993out:
6401 return ret; 5994 return ret;
6402} 5995}
@@ -6478,7 +6071,8 @@ SYSCALL_DEFINE5(perf_event_open,
6478 } 6071 }
6479 } 6072 }
6480 6073
6481 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL); 6074 event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
6075 NULL, NULL);
6482 if (IS_ERR(event)) { 6076 if (IS_ERR(event)) {
6483 err = PTR_ERR(event); 6077 err = PTR_ERR(event);
6484 goto err_task; 6078 goto err_task;
@@ -6663,7 +6257,8 @@ err_fd:
6663struct perf_event * 6257struct perf_event *
6664perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, 6258perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6665 struct task_struct *task, 6259 struct task_struct *task,
6666 perf_overflow_handler_t overflow_handler) 6260 perf_overflow_handler_t overflow_handler,
6261 void *context)
6667{ 6262{
6668 struct perf_event_context *ctx; 6263 struct perf_event_context *ctx;
6669 struct perf_event *event; 6264 struct perf_event *event;
@@ -6673,7 +6268,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6673 * Get the target context (task or percpu): 6268 * Get the target context (task or percpu):
6674 */ 6269 */
6675 6270
6676 event = perf_event_alloc(attr, cpu, task, NULL, NULL, overflow_handler); 6271 event = perf_event_alloc(attr, cpu, task, NULL, NULL,
6272 overflow_handler, context);
6677 if (IS_ERR(event)) { 6273 if (IS_ERR(event)) {
6678 err = PTR_ERR(event); 6274 err = PTR_ERR(event);
6679 goto err; 6275 goto err;
@@ -6780,7 +6376,6 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6780 * our context. 6376 * our context.
6781 */ 6377 */
6782 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); 6378 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
6783 task_ctx_sched_out(child_ctx, EVENT_ALL);
6784 6379
6785 /* 6380 /*
6786 * Take the context lock here so that if find_get_context is 6381 * Take the context lock here so that if find_get_context is
@@ -6788,6 +6383,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6788 * incremented the context's refcount before we do put_ctx below. 6383 * incremented the context's refcount before we do put_ctx below.
6789 */ 6384 */
6790 raw_spin_lock(&child_ctx->lock); 6385 raw_spin_lock(&child_ctx->lock);
6386 task_ctx_sched_out(child_ctx);
6791 child->perf_event_ctxp[ctxn] = NULL; 6387 child->perf_event_ctxp[ctxn] = NULL;
6792 /* 6388 /*
6793 * If this context is a clone; unclone it so it can't get 6389 * If this context is a clone; unclone it so it can't get
@@ -6957,7 +6553,7 @@ inherit_event(struct perf_event *parent_event,
6957 parent_event->cpu, 6553 parent_event->cpu,
6958 child, 6554 child,
6959 group_leader, parent_event, 6555 group_leader, parent_event,
6960 NULL); 6556 NULL, NULL);
6961 if (IS_ERR(child_event)) 6557 if (IS_ERR(child_event))
6962 return child_event; 6558 return child_event;
6963 get_ctx(child_ctx); 6559 get_ctx(child_ctx);
@@ -6984,6 +6580,8 @@ inherit_event(struct perf_event *parent_event,
6984 6580
6985 child_event->ctx = child_ctx; 6581 child_event->ctx = child_ctx;
6986 child_event->overflow_handler = parent_event->overflow_handler; 6582 child_event->overflow_handler = parent_event->overflow_handler;
6583 child_event->overflow_handler_context
6584 = parent_event->overflow_handler_context;
6987 6585
6988 /* 6586 /*
6989 * Precalculate sample_data sizes 6587 * Precalculate sample_data sizes
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 086adf25a55e..b7971d6f38bf 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -431,9 +431,11 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
431struct perf_event * 431struct perf_event *
432register_user_hw_breakpoint(struct perf_event_attr *attr, 432register_user_hw_breakpoint(struct perf_event_attr *attr,
433 perf_overflow_handler_t triggered, 433 perf_overflow_handler_t triggered,
434 void *context,
434 struct task_struct *tsk) 435 struct task_struct *tsk)
435{ 436{
436 return perf_event_create_kernel_counter(attr, -1, tsk, triggered); 437 return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
438 context);
437} 439}
438EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); 440EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
439 441
@@ -502,7 +504,8 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
502 */ 504 */
503struct perf_event * __percpu * 505struct perf_event * __percpu *
504register_wide_hw_breakpoint(struct perf_event_attr *attr, 506register_wide_hw_breakpoint(struct perf_event_attr *attr,
505 perf_overflow_handler_t triggered) 507 perf_overflow_handler_t triggered,
508 void *context)
506{ 509{
507 struct perf_event * __percpu *cpu_events, **pevent, *bp; 510 struct perf_event * __percpu *cpu_events, **pevent, *bp;
508 long err; 511 long err;
@@ -515,7 +518,8 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr,
515 get_online_cpus(); 518 get_online_cpus();
516 for_each_online_cpu(cpu) { 519 for_each_online_cpu(cpu) {
517 pevent = per_cpu_ptr(cpu_events, cpu); 520 pevent = per_cpu_ptr(cpu_events, cpu);
518 bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); 521 bp = perf_event_create_kernel_counter(attr, cpu, NULL,
522 triggered, context);
519 523
520 *pevent = bp; 524 *pevent = bp;
521 525
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
new file mode 100644
index 000000000000..09097dd8116c
--- /dev/null
+++ b/kernel/events/internal.h
@@ -0,0 +1,96 @@
1#ifndef _KERNEL_EVENTS_INTERNAL_H
2#define _KERNEL_EVENTS_INTERNAL_H
3
4#define RING_BUFFER_WRITABLE 0x01
5
6struct ring_buffer {
7 atomic_t refcount;
8 struct rcu_head rcu_head;
9#ifdef CONFIG_PERF_USE_VMALLOC
10 struct work_struct work;
11 int page_order; /* allocation order */
12#endif
13 int nr_pages; /* nr of data pages */
14 int writable; /* are we writable */
15
16 atomic_t poll; /* POLL_ for wakeups */
17
18 local_t head; /* write position */
19 local_t nest; /* nested writers */
20 local_t events; /* event limit */
21 local_t wakeup; /* wakeup stamp */
22 local_t lost; /* nr records lost */
23
24 long watermark; /* wakeup watermark */
25
26 struct perf_event_mmap_page *user_page;
27 void *data_pages[0];
28};
29
30extern void rb_free(struct ring_buffer *rb);
31extern struct ring_buffer *
32rb_alloc(int nr_pages, long watermark, int cpu, int flags);
33extern void perf_event_wakeup(struct perf_event *event);
34
35extern void
36perf_event_header__init_id(struct perf_event_header *header,
37 struct perf_sample_data *data,
38 struct perf_event *event);
39extern void
40perf_event__output_id_sample(struct perf_event *event,
41 struct perf_output_handle *handle,
42 struct perf_sample_data *sample);
43
44extern struct page *
45perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
46
47#ifdef CONFIG_PERF_USE_VMALLOC
48/*
49 * Back perf_mmap() with vmalloc memory.
50 *
51 * Required for architectures that have d-cache aliasing issues.
52 */
53
54static inline int page_order(struct ring_buffer *rb)
55{
56 return rb->page_order;
57}
58
59#else
60
61static inline int page_order(struct ring_buffer *rb)
62{
63 return 0;
64}
65#endif
66
67static unsigned long perf_data_size(struct ring_buffer *rb)
68{
69 return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
70}
71
72static inline void
73__output_copy(struct perf_output_handle *handle,
74 const void *buf, unsigned int len)
75{
76 do {
77 unsigned long size = min_t(unsigned long, handle->size, len);
78
79 memcpy(handle->addr, buf, size);
80
81 len -= size;
82 handle->addr += size;
83 buf += size;
84 handle->size -= size;
85 if (!handle->size) {
86 struct ring_buffer *rb = handle->rb;
87
88 handle->page++;
89 handle->page &= rb->nr_pages - 1;
90 handle->addr = rb->data_pages[handle->page];
91 handle->size = PAGE_SIZE << page_order(rb);
92 }
93 } while (len);
94}
95
96#endif /* _KERNEL_EVENTS_INTERNAL_H */
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
new file mode 100644
index 000000000000..a2a29205cc0f
--- /dev/null
+++ b/kernel/events/ring_buffer.c
@@ -0,0 +1,380 @@
1/*
2 * Performance events ring-buffer code:
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 *
9 * For licensing details see kernel-base/COPYING
10 */
11
12#include <linux/perf_event.h>
13#include <linux/vmalloc.h>
14#include <linux/slab.h>
15
16#include "internal.h"
17
18static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
19 unsigned long offset, unsigned long head)
20{
21 unsigned long mask;
22
23 if (!rb->writable)
24 return true;
25
26 mask = perf_data_size(rb) - 1;
27
28 offset = (offset - tail) & mask;
29 head = (head - tail) & mask;
30
31 if ((int)(head - offset) < 0)
32 return false;
33
34 return true;
35}
36
37static void perf_output_wakeup(struct perf_output_handle *handle)
38{
39 atomic_set(&handle->rb->poll, POLL_IN);
40
41 handle->event->pending_wakeup = 1;
42 irq_work_queue(&handle->event->pending);
43}
44
45/*
46 * We need to ensure a later event_id doesn't publish a head when a former
47 * event isn't done writing. However since we need to deal with NMIs we
48 * cannot fully serialize things.
49 *
50 * We only publish the head (and generate a wakeup) when the outer-most
51 * event completes.
52 */
53static void perf_output_get_handle(struct perf_output_handle *handle)
54{
55 struct ring_buffer *rb = handle->rb;
56
57 preempt_disable();
58 local_inc(&rb->nest);
59 handle->wakeup = local_read(&rb->wakeup);
60}
61
62static void perf_output_put_handle(struct perf_output_handle *handle)
63{
64 struct ring_buffer *rb = handle->rb;
65 unsigned long head;
66
67again:
68 head = local_read(&rb->head);
69
70 /*
71 * IRQ/NMI can happen here, which means we can miss a head update.
72 */
73
74 if (!local_dec_and_test(&rb->nest))
75 goto out;
76
77 /*
78 * Publish the known good head. Rely on the full barrier implied
79 * by atomic_dec_and_test() order the rb->head read and this
80 * write.
81 */
82 rb->user_page->data_head = head;
83
84 /*
85 * Now check if we missed an update, rely on the (compiler)
86 * barrier in atomic_dec_and_test() to re-read rb->head.
87 */
88 if (unlikely(head != local_read(&rb->head))) {
89 local_inc(&rb->nest);
90 goto again;
91 }
92
93 if (handle->wakeup != local_read(&rb->wakeup))
94 perf_output_wakeup(handle);
95
96out:
97 preempt_enable();
98}
99
100int perf_output_begin(struct perf_output_handle *handle,
101 struct perf_event *event, unsigned int size)
102{
103 struct ring_buffer *rb;
104 unsigned long tail, offset, head;
105 int have_lost;
106 struct perf_sample_data sample_data;
107 struct {
108 struct perf_event_header header;
109 u64 id;
110 u64 lost;
111 } lost_event;
112
113 rcu_read_lock();
114 /*
115 * For inherited events we send all the output towards the parent.
116 */
117 if (event->parent)
118 event = event->parent;
119
120 rb = rcu_dereference(event->rb);
121 if (!rb)
122 goto out;
123
124 handle->rb = rb;
125 handle->event = event;
126
127 if (!rb->nr_pages)
128 goto out;
129
130 have_lost = local_read(&rb->lost);
131 if (have_lost) {
132 lost_event.header.size = sizeof(lost_event);
133 perf_event_header__init_id(&lost_event.header, &sample_data,
134 event);
135 size += lost_event.header.size;
136 }
137
138 perf_output_get_handle(handle);
139
140 do {
141 /*
142 * Userspace could choose to issue a mb() before updating the
143 * tail pointer. So that all reads will be completed before the
144 * write is issued.
145 */
146 tail = ACCESS_ONCE(rb->user_page->data_tail);
147 smp_rmb();
148 offset = head = local_read(&rb->head);
149 head += size;
150 if (unlikely(!perf_output_space(rb, tail, offset, head)))
151 goto fail;
152 } while (local_cmpxchg(&rb->head, offset, head) != offset);
153
154 if (head - local_read(&rb->wakeup) > rb->watermark)
155 local_add(rb->watermark, &rb->wakeup);
156
157 handle->page = offset >> (PAGE_SHIFT + page_order(rb));
158 handle->page &= rb->nr_pages - 1;
159 handle->size = offset & ((PAGE_SIZE << page_order(rb)) - 1);
160 handle->addr = rb->data_pages[handle->page];
161 handle->addr += handle->size;
162 handle->size = (PAGE_SIZE << page_order(rb)) - handle->size;
163
164 if (have_lost) {
165 lost_event.header.type = PERF_RECORD_LOST;
166 lost_event.header.misc = 0;
167 lost_event.id = event->id;
168 lost_event.lost = local_xchg(&rb->lost, 0);
169
170 perf_output_put(handle, lost_event);
171 perf_event__output_id_sample(event, handle, &sample_data);
172 }
173
174 return 0;
175
176fail:
177 local_inc(&rb->lost);
178 perf_output_put_handle(handle);
179out:
180 rcu_read_unlock();
181
182 return -ENOSPC;
183}
184
185void perf_output_copy(struct perf_output_handle *handle,
186 const void *buf, unsigned int len)
187{
188 __output_copy(handle, buf, len);
189}
190
191void perf_output_end(struct perf_output_handle *handle)
192{
193 perf_output_put_handle(handle);
194 rcu_read_unlock();
195}
196
197static void
198ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
199{
200 long max_size = perf_data_size(rb);
201
202 if (watermark)
203 rb->watermark = min(max_size, watermark);
204
205 if (!rb->watermark)
206 rb->watermark = max_size / 2;
207
208 if (flags & RING_BUFFER_WRITABLE)
209 rb->writable = 1;
210
211 atomic_set(&rb->refcount, 1);
212}
213
214#ifndef CONFIG_PERF_USE_VMALLOC
215
216/*
217 * Back perf_mmap() with regular GFP_KERNEL-0 pages.
218 */
219
220struct page *
221perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
222{
223 if (pgoff > rb->nr_pages)
224 return NULL;
225
226 if (pgoff == 0)
227 return virt_to_page(rb->user_page);
228
229 return virt_to_page(rb->data_pages[pgoff - 1]);
230}
231
232static void *perf_mmap_alloc_page(int cpu)
233{
234 struct page *page;
235 int node;
236
237 node = (cpu == -1) ? cpu : cpu_to_node(cpu);
238 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
239 if (!page)
240 return NULL;
241
242 return page_address(page);
243}
244
245struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
246{
247 struct ring_buffer *rb;
248 unsigned long size;
249 int i;
250
251 size = sizeof(struct ring_buffer);
252 size += nr_pages * sizeof(void *);
253
254 rb = kzalloc(size, GFP_KERNEL);
255 if (!rb)
256 goto fail;
257
258 rb->user_page = perf_mmap_alloc_page(cpu);
259 if (!rb->user_page)
260 goto fail_user_page;
261
262 for (i = 0; i < nr_pages; i++) {
263 rb->data_pages[i] = perf_mmap_alloc_page(cpu);
264 if (!rb->data_pages[i])
265 goto fail_data_pages;
266 }
267
268 rb->nr_pages = nr_pages;
269
270 ring_buffer_init(rb, watermark, flags);
271
272 return rb;
273
274fail_data_pages:
275 for (i--; i >= 0; i--)
276 free_page((unsigned long)rb->data_pages[i]);
277
278 free_page((unsigned long)rb->user_page);
279
280fail_user_page:
281 kfree(rb);
282
283fail:
284 return NULL;
285}
286
287static void perf_mmap_free_page(unsigned long addr)
288{
289 struct page *page = virt_to_page((void *)addr);
290
291 page->mapping = NULL;
292 __free_page(page);
293}
294
295void rb_free(struct ring_buffer *rb)
296{
297 int i;
298
299 perf_mmap_free_page((unsigned long)rb->user_page);
300 for (i = 0; i < rb->nr_pages; i++)
301 perf_mmap_free_page((unsigned long)rb->data_pages[i]);
302 kfree(rb);
303}
304
305#else
306
307struct page *
308perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
309{
310 if (pgoff > (1UL << page_order(rb)))
311 return NULL;
312
313 return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
314}
315
316static void perf_mmap_unmark_page(void *addr)
317{
318 struct page *page = vmalloc_to_page(addr);
319
320 page->mapping = NULL;
321}
322
323static void rb_free_work(struct work_struct *work)
324{
325 struct ring_buffer *rb;
326 void *base;
327 int i, nr;
328
329 rb = container_of(work, struct ring_buffer, work);
330 nr = 1 << page_order(rb);
331
332 base = rb->user_page;
333 for (i = 0; i < nr + 1; i++)
334 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
335
336 vfree(base);
337 kfree(rb);
338}
339
340void rb_free(struct ring_buffer *rb)
341{
342 schedule_work(&rb->work);
343}
344
345struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
346{
347 struct ring_buffer *rb;
348 unsigned long size;
349 void *all_buf;
350
351 size = sizeof(struct ring_buffer);
352 size += sizeof(void *);
353
354 rb = kzalloc(size, GFP_KERNEL);
355 if (!rb)
356 goto fail;
357
358 INIT_WORK(&rb->work, rb_free_work);
359
360 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
361 if (!all_buf)
362 goto fail_all_buf;
363
364 rb->user_page = all_buf;
365 rb->data_pages[0] = all_buf + PAGE_SIZE;
366 rb->page_order = ilog2(nr_pages);
367 rb->nr_pages = 1;
368
369 ring_buffer_init(rb, watermark, flags);
370
371 return rb;
372
373fail_all_buf:
374 kfree(rb);
375
376fail:
377 return NULL;
378}
379
380#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 77981813a1e7..b30fd54eb985 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
1255/* 1255/*
1256 * If we have a symbol_name argument, look it up and add the offset field 1256 * If we have a symbol_name argument, look it up and add the offset field
1257 * to it. This way, we can specify a relative address to a symbol. 1257 * to it. This way, we can specify a relative address to a symbol.
1258 * This returns encoded errors if it fails to look up symbol or invalid
1259 * combination of parameters.
1258 */ 1260 */
1259static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 1261static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
1260{ 1262{
1261 kprobe_opcode_t *addr = p->addr; 1263 kprobe_opcode_t *addr = p->addr;
1264
1265 if ((p->symbol_name && p->addr) ||
1266 (!p->symbol_name && !p->addr))
1267 goto invalid;
1268
1262 if (p->symbol_name) { 1269 if (p->symbol_name) {
1263 if (addr)
1264 return NULL;
1265 kprobe_lookup_name(p->symbol_name, addr); 1270 kprobe_lookup_name(p->symbol_name, addr);
1271 if (!addr)
1272 return ERR_PTR(-ENOENT);
1266 } 1273 }
1267 1274
1268 if (!addr) 1275 addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
1269 return NULL; 1276 if (addr)
1270 return (kprobe_opcode_t *)(((char *)addr) + p->offset); 1277 return addr;
1278
1279invalid:
1280 return ERR_PTR(-EINVAL);
1271} 1281}
1272 1282
1273/* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1283/* Check passed kprobe is valid and return kprobe in kprobe_table. */
@@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p)
1311 kprobe_opcode_t *addr; 1321 kprobe_opcode_t *addr;
1312 1322
1313 addr = kprobe_addr(p); 1323 addr = kprobe_addr(p);
1314 if (!addr) 1324 if (IS_ERR(addr))
1315 return -EINVAL; 1325 return PTR_ERR(addr);
1316 p->addr = addr; 1326 p->addr = addr;
1317 1327
1318 ret = check_kprobe_rereg(p); 1328 ret = check_kprobe_rereg(p);
@@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p)
1335 */ 1345 */
1336 probed_mod = __module_text_address((unsigned long) p->addr); 1346 probed_mod = __module_text_address((unsigned long) p->addr);
1337 if (probed_mod) { 1347 if (probed_mod) {
1348 /* Return -ENOENT if fail. */
1349 ret = -ENOENT;
1338 /* 1350 /*
1339 * We must hold a refcount of the probed module while updating 1351 * We must hold a refcount of the probed module while updating
1340 * its code to prohibit unexpected unloading. 1352 * its code to prohibit unexpected unloading.
@@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
1351 module_put(probed_mod); 1363 module_put(probed_mod);
1352 goto fail_with_jump_label; 1364 goto fail_with_jump_label;
1353 } 1365 }
1366 /* ret will be updated by following code */
1354 } 1367 }
1355 preempt_enable(); 1368 preempt_enable();
1356 jump_label_unlock(); 1369 jump_label_unlock();
@@ -1399,7 +1412,7 @@ out:
1399fail_with_jump_label: 1412fail_with_jump_label:
1400 preempt_enable(); 1413 preempt_enable();
1401 jump_label_unlock(); 1414 jump_label_unlock();
1402 return -EINVAL; 1415 return ret;
1403} 1416}
1404EXPORT_SYMBOL_GPL(register_kprobe); 1417EXPORT_SYMBOL_GPL(register_kprobe);
1405 1418
@@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
1686 1699
1687 if (kretprobe_blacklist_size) { 1700 if (kretprobe_blacklist_size) {
1688 addr = kprobe_addr(&rp->kp); 1701 addr = kprobe_addr(&rp->kp);
1689 if (!addr) 1702 if (IS_ERR(addr))
1690 return -EINVAL; 1703 return PTR_ERR(addr);
1691 1704
1692 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) { 1705 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1693 if (kretprobe_blacklist[i].addr == addr) 1706 if (kretprobe_blacklist[i].addr == addr)
diff --git a/kernel/sched.c b/kernel/sched.c
index c518b05fd062..84b9e076812e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2220,7 +2220,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2220 2220
2221 if (task_cpu(p) != new_cpu) { 2221 if (task_cpu(p) != new_cpu) {
2222 p->se.nr_migrations++; 2222 p->se.nr_migrations++;
2223 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); 2223 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
2224 } 2224 }
2225 2225
2226 __set_task_cpu(p, new_cpu); 2226 __set_task_cpu(p, new_cpu);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index eb212f8f8bc8..d20c6983aad9 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -26,12 +26,18 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
26EXPORT_SYMBOL_GPL(print_stack_trace); 26EXPORT_SYMBOL_GPL(print_stack_trace);
27 27
28/* 28/*
29 * Architectures that do not implement save_stack_trace_tsk get this 29 * Architectures that do not implement save_stack_trace_tsk or
30 * weak alias and a once-per-bootup warning (whenever this facility 30 * save_stack_trace_regs get this weak alias and a once-per-bootup warning
31 * is utilized - for example by procfs): 31 * (whenever this facility is utilized - for example by procfs):
32 */ 32 */
33__weak void 33__weak void
34save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 34save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
35{ 35{
36 WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n"); 36 WARN_ONCE(1, KERN_INFO "save_stack_trace_tsk() not implemented yet.\n");
37} 37}
38
39__weak void
40save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
41{
42 WARN_ONCE(1, KERN_INFO "save_stack_trace_regs() not implemented yet.\n");
43}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 908038f57440..c3e4575e7829 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -32,7 +32,6 @@
32 32
33#include <trace/events/sched.h> 33#include <trace/events/sched.h>
34 34
35#include <asm/ftrace.h>
36#include <asm/setup.h> 35#include <asm/setup.h>
37 36
38#include "trace_output.h" 37#include "trace_output.h"
@@ -82,14 +81,14 @@ static int ftrace_disabled __read_mostly;
82 81
83static DEFINE_MUTEX(ftrace_lock); 82static DEFINE_MUTEX(ftrace_lock);
84 83
85static struct ftrace_ops ftrace_list_end __read_mostly = 84static struct ftrace_ops ftrace_list_end __read_mostly = {
86{
87 .func = ftrace_stub, 85 .func = ftrace_stub,
88}; 86};
89 87
90static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; 88static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
91static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 89static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
92ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 90ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
91static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
93ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 92ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
94ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 93ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
95static struct ftrace_ops global_ops; 94static struct ftrace_ops global_ops;
@@ -148,9 +147,11 @@ void clear_ftrace_function(void)
148{ 147{
149 ftrace_trace_function = ftrace_stub; 148 ftrace_trace_function = ftrace_stub;
150 __ftrace_trace_function = ftrace_stub; 149 __ftrace_trace_function = ftrace_stub;
150 __ftrace_trace_function_delay = ftrace_stub;
151 ftrace_pid_function = ftrace_stub; 151 ftrace_pid_function = ftrace_stub;
152} 152}
153 153
154#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
154#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 155#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
155/* 156/*
156 * For those archs that do not test ftrace_trace_stop in their 157 * For those archs that do not test ftrace_trace_stop in their
@@ -210,7 +211,12 @@ static void update_ftrace_function(void)
210#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST 211#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
211 ftrace_trace_function = func; 212 ftrace_trace_function = func;
212#else 213#else
214#ifdef CONFIG_DYNAMIC_FTRACE
215 /* do not update till all functions have been modified */
216 __ftrace_trace_function_delay = func;
217#else
213 __ftrace_trace_function = func; 218 __ftrace_trace_function = func;
219#endif
214 ftrace_trace_function = ftrace_test_stop_func; 220 ftrace_trace_function = ftrace_test_stop_func;
215#endif 221#endif
216} 222}
@@ -785,8 +791,7 @@ static void unregister_ftrace_profiler(void)
785 unregister_ftrace_graph(); 791 unregister_ftrace_graph();
786} 792}
787#else 793#else
788static struct ftrace_ops ftrace_profile_ops __read_mostly = 794static struct ftrace_ops ftrace_profile_ops __read_mostly = {
789{
790 .func = function_profile_call, 795 .func = function_profile_call,
791}; 796};
792 797
@@ -806,19 +811,10 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
806 size_t cnt, loff_t *ppos) 811 size_t cnt, loff_t *ppos)
807{ 812{
808 unsigned long val; 813 unsigned long val;
809 char buf[64]; /* big enough to hold a number */
810 int ret; 814 int ret;
811 815
812 if (cnt >= sizeof(buf)) 816 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
813 return -EINVAL; 817 if (ret)
814
815 if (copy_from_user(&buf, ubuf, cnt))
816 return -EFAULT;
817
818 buf[cnt] = 0;
819
820 ret = strict_strtoul(buf, 10, &val);
821 if (ret < 0)
822 return ret; 818 return ret;
823 819
824 val = !!val; 820 val = !!val;
@@ -1182,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
1182 return NULL; 1178 return NULL;
1183} 1179}
1184 1180
1181static void
1182ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
1183static void
1184ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
1185
1185static int 1186static int
1186ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) 1187ftrace_hash_move(struct ftrace_ops *ops, int enable,
1188 struct ftrace_hash **dst, struct ftrace_hash *src)
1187{ 1189{
1188 struct ftrace_func_entry *entry; 1190 struct ftrace_func_entry *entry;
1189 struct hlist_node *tp, *tn; 1191 struct hlist_node *tp, *tn;
@@ -1193,9 +1195,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1193 unsigned long key; 1195 unsigned long key;
1194 int size = src->count; 1196 int size = src->count;
1195 int bits = 0; 1197 int bits = 0;
1198 int ret;
1196 int i; 1199 int i;
1197 1200
1198 /* 1201 /*
1202 * Remove the current set, update the hash and add
1203 * them back.
1204 */
1205 ftrace_hash_rec_disable(ops, enable);
1206
1207 /*
1199 * If the new source is empty, just free dst and assign it 1208 * If the new source is empty, just free dst and assign it
1200 * the empty_hash. 1209 * the empty_hash.
1201 */ 1210 */
@@ -1215,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1215 if (bits > FTRACE_HASH_MAX_BITS) 1224 if (bits > FTRACE_HASH_MAX_BITS)
1216 bits = FTRACE_HASH_MAX_BITS; 1225 bits = FTRACE_HASH_MAX_BITS;
1217 1226
1227 ret = -ENOMEM;
1218 new_hash = alloc_ftrace_hash(bits); 1228 new_hash = alloc_ftrace_hash(bits);
1219 if (!new_hash) 1229 if (!new_hash)
1220 return -ENOMEM; 1230 goto out;
1221 1231
1222 size = 1 << src->size_bits; 1232 size = 1 << src->size_bits;
1223 for (i = 0; i < size; i++) { 1233 for (i = 0; i < size; i++) {
@@ -1236,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
1236 rcu_assign_pointer(*dst, new_hash); 1246 rcu_assign_pointer(*dst, new_hash);
1237 free_ftrace_hash_rcu(old_hash); 1247 free_ftrace_hash_rcu(old_hash);
1238 1248
1239 return 0; 1249 ret = 0;
1250 out:
1251 /*
1252 * Enable regardless of ret:
1253 * On success, we enable the new hash.
1254 * On failure, we re-enable the original hash.
1255 */
1256 ftrace_hash_rec_enable(ops, enable);
1257
1258 return ret;
1240} 1259}
1241 1260
1242/* 1261/*
@@ -1596,6 +1615,12 @@ static int __ftrace_modify_code(void *data)
1596{ 1615{
1597 int *command = data; 1616 int *command = data;
1598 1617
1618 /*
1619 * Do not call function tracer while we update the code.
1620 * We are in stop machine, no worrying about races.
1621 */
1622 function_trace_stop++;
1623
1599 if (*command & FTRACE_ENABLE_CALLS) 1624 if (*command & FTRACE_ENABLE_CALLS)
1600 ftrace_replace_code(1); 1625 ftrace_replace_code(1);
1601 else if (*command & FTRACE_DISABLE_CALLS) 1626 else if (*command & FTRACE_DISABLE_CALLS)
@@ -1609,6 +1634,18 @@ static int __ftrace_modify_code(void *data)
1609 else if (*command & FTRACE_STOP_FUNC_RET) 1634 else if (*command & FTRACE_STOP_FUNC_RET)
1610 ftrace_disable_ftrace_graph_caller(); 1635 ftrace_disable_ftrace_graph_caller();
1611 1636
1637#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
1638 /*
1639 * For archs that call ftrace_test_stop_func(), we must
1640 * wait till after we update all the function callers
1641 * before we update the callback. This keeps different
1642 * ops that record different functions from corrupting
1643 * each other.
1644 */
1645 __ftrace_trace_function = __ftrace_trace_function_delay;
1646#endif
1647 function_trace_stop--;
1648
1612 return 0; 1649 return 0;
1613} 1650}
1614 1651
@@ -1744,10 +1781,36 @@ static cycle_t ftrace_update_time;
1744static unsigned long ftrace_update_cnt; 1781static unsigned long ftrace_update_cnt;
1745unsigned long ftrace_update_tot_cnt; 1782unsigned long ftrace_update_tot_cnt;
1746 1783
1784static int ops_traces_mod(struct ftrace_ops *ops)
1785{
1786 struct ftrace_hash *hash;
1787
1788 hash = ops->filter_hash;
1789 return !!(!hash || !hash->count);
1790}
1791
1747static int ftrace_update_code(struct module *mod) 1792static int ftrace_update_code(struct module *mod)
1748{ 1793{
1749 struct dyn_ftrace *p; 1794 struct dyn_ftrace *p;
1750 cycle_t start, stop; 1795 cycle_t start, stop;
1796 unsigned long ref = 0;
1797
1798 /*
1799 * When adding a module, we need to check if tracers are
1800 * currently enabled and if they are set to trace all functions.
1801 * If they are, we need to enable the module functions as well
1802 * as update the reference counts for those function records.
1803 */
1804 if (mod) {
1805 struct ftrace_ops *ops;
1806
1807 for (ops = ftrace_ops_list;
1808 ops != &ftrace_list_end; ops = ops->next) {
1809 if (ops->flags & FTRACE_OPS_FL_ENABLED &&
1810 ops_traces_mod(ops))
1811 ref++;
1812 }
1813 }
1751 1814
1752 start = ftrace_now(raw_smp_processor_id()); 1815 start = ftrace_now(raw_smp_processor_id());
1753 ftrace_update_cnt = 0; 1816 ftrace_update_cnt = 0;
@@ -1760,7 +1823,7 @@ static int ftrace_update_code(struct module *mod)
1760 1823
1761 p = ftrace_new_addrs; 1824 p = ftrace_new_addrs;
1762 ftrace_new_addrs = p->newlist; 1825 ftrace_new_addrs = p->newlist;
1763 p->flags = 0L; 1826 p->flags = ref;
1764 1827
1765 /* 1828 /*
1766 * Do the initial record conversion from mcount jump 1829 * Do the initial record conversion from mcount jump
@@ -1783,7 +1846,7 @@ static int ftrace_update_code(struct module *mod)
1783 * conversion puts the module to the correct state, thus 1846 * conversion puts the module to the correct state, thus
1784 * passing the ftrace_make_call check. 1847 * passing the ftrace_make_call check.
1785 */ 1848 */
1786 if (ftrace_start_up) { 1849 if (ftrace_start_up && ref) {
1787 int failed = __ftrace_replace_code(p, 1); 1850 int failed = __ftrace_replace_code(p, 1);
1788 if (failed) { 1851 if (failed) {
1789 ftrace_bug(failed, p->ip); 1852 ftrace_bug(failed, p->ip);
@@ -2407,10 +2470,9 @@ ftrace_match_module_records(struct ftrace_hash *hash, char *buff, char *mod)
2407 */ 2470 */
2408 2471
2409static int 2472static int
2410ftrace_mod_callback(char *func, char *cmd, char *param, int enable) 2473ftrace_mod_callback(struct ftrace_hash *hash,
2474 char *func, char *cmd, char *param, int enable)
2411{ 2475{
2412 struct ftrace_ops *ops = &global_ops;
2413 struct ftrace_hash *hash;
2414 char *mod; 2476 char *mod;
2415 int ret = -EINVAL; 2477 int ret = -EINVAL;
2416 2478
@@ -2430,11 +2492,6 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
2430 if (!strlen(mod)) 2492 if (!strlen(mod))
2431 return ret; 2493 return ret;
2432 2494
2433 if (enable)
2434 hash = ops->filter_hash;
2435 else
2436 hash = ops->notrace_hash;
2437
2438 ret = ftrace_match_module_records(hash, func, mod); 2495 ret = ftrace_match_module_records(hash, func, mod);
2439 if (!ret) 2496 if (!ret)
2440 ret = -EINVAL; 2497 ret = -EINVAL;
@@ -2760,7 +2817,7 @@ static int ftrace_process_regex(struct ftrace_hash *hash,
2760 mutex_lock(&ftrace_cmd_mutex); 2817 mutex_lock(&ftrace_cmd_mutex);
2761 list_for_each_entry(p, &ftrace_commands, list) { 2818 list_for_each_entry(p, &ftrace_commands, list) {
2762 if (strcmp(p->name, command) == 0) { 2819 if (strcmp(p->name, command) == 0) {
2763 ret = p->func(func, command, next, enable); 2820 ret = p->func(hash, func, command, next, enable);
2764 goto out_unlock; 2821 goto out_unlock;
2765 } 2822 }
2766 } 2823 }
@@ -2857,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
2857 ftrace_match_records(hash, buf, len); 2914 ftrace_match_records(hash, buf, len);
2858 2915
2859 mutex_lock(&ftrace_lock); 2916 mutex_lock(&ftrace_lock);
2860 ret = ftrace_hash_move(orig_hash, hash); 2917 ret = ftrace_hash_move(ops, enable, orig_hash, hash);
2918 if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
2919 && ftrace_enabled)
2920 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
2921
2861 mutex_unlock(&ftrace_lock); 2922 mutex_unlock(&ftrace_lock);
2862 2923
2863 mutex_unlock(&ftrace_regex_lock); 2924 mutex_unlock(&ftrace_regex_lock);
@@ -3040,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
3040 orig_hash = &iter->ops->notrace_hash; 3101 orig_hash = &iter->ops->notrace_hash;
3041 3102
3042 mutex_lock(&ftrace_lock); 3103 mutex_lock(&ftrace_lock);
3043 /* 3104 ret = ftrace_hash_move(iter->ops, filter_hash,
3044 * Remove the current set, update the hash and add 3105 orig_hash, iter->hash);
3045 * them back. 3106 if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
3046 */ 3107 && ftrace_enabled)
3047 ftrace_hash_rec_disable(iter->ops, filter_hash); 3108 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
3048 ret = ftrace_hash_move(orig_hash, iter->hash); 3109
3049 if (!ret) {
3050 ftrace_hash_rec_enable(iter->ops, filter_hash);
3051 if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
3052 && ftrace_enabled)
3053 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
3054 }
3055 mutex_unlock(&ftrace_lock); 3110 mutex_unlock(&ftrace_lock);
3056 } 3111 }
3057 free_ftrace_hash(iter->hash); 3112 free_ftrace_hash(iter->hash);
@@ -3330,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod,
3330{ 3385{
3331 unsigned long *p; 3386 unsigned long *p;
3332 unsigned long addr; 3387 unsigned long addr;
3333 unsigned long flags; 3388 unsigned long flags = 0; /* Shut up gcc */
3334 3389
3335 mutex_lock(&ftrace_lock); 3390 mutex_lock(&ftrace_lock);
3336 p = start; 3391 p = start;
@@ -3348,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod,
3348 } 3403 }
3349 3404
3350 /* 3405 /*
3351 * Disable interrupts to prevent interrupts from executing 3406 * We only need to disable interrupts on start up
3352 * code that is being modified. 3407 * because we are modifying code that an interrupt
3408 * may execute, and the modification is not atomic.
3409 * But for modules, nothing runs the code we modify
3410 * until we are finished with it, and there's no
3411 * reason to cause large interrupt latencies while we do it.
3353 */ 3412 */
3354 local_irq_save(flags); 3413 if (!mod)
3414 local_irq_save(flags);
3355 ftrace_update_code(mod); 3415 ftrace_update_code(mod);
3356 local_irq_restore(flags); 3416 if (!mod)
3417 local_irq_restore(flags);
3357 mutex_unlock(&ftrace_lock); 3418 mutex_unlock(&ftrace_lock);
3358 3419
3359 return 0; 3420 return 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b0c7aa407943..731201bf4acc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -997,15 +997,21 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
997 unsigned nr_pages) 997 unsigned nr_pages)
998{ 998{
999 struct buffer_page *bpage, *tmp; 999 struct buffer_page *bpage, *tmp;
1000 unsigned long addr;
1001 LIST_HEAD(pages); 1000 LIST_HEAD(pages);
1002 unsigned i; 1001 unsigned i;
1003 1002
1004 WARN_ON(!nr_pages); 1003 WARN_ON(!nr_pages);
1005 1004
1006 for (i = 0; i < nr_pages; i++) { 1005 for (i = 0; i < nr_pages; i++) {
1006 struct page *page;
1007 /*
1008 * __GFP_NORETRY flag makes sure that the allocation fails
1009 * gracefully without invoking oom-killer and the system is
1010 * not destabilized.
1011 */
1007 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1012 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1008 GFP_KERNEL, cpu_to_node(cpu_buffer->cpu)); 1013 GFP_KERNEL | __GFP_NORETRY,
1014 cpu_to_node(cpu_buffer->cpu));
1009 if (!bpage) 1015 if (!bpage)
1010 goto free_pages; 1016 goto free_pages;
1011 1017
@@ -1013,10 +1019,11 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
1013 1019
1014 list_add(&bpage->list, &pages); 1020 list_add(&bpage->list, &pages);
1015 1021
1016 addr = __get_free_page(GFP_KERNEL); 1022 page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1017 if (!addr) 1023 GFP_KERNEL | __GFP_NORETRY, 0);
1024 if (!page)
1018 goto free_pages; 1025 goto free_pages;
1019 bpage->page = (void *)addr; 1026 bpage->page = page_address(page);
1020 rb_init_page(bpage->page); 1027 rb_init_page(bpage->page);
1021 } 1028 }
1022 1029
@@ -1045,7 +1052,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1045{ 1052{
1046 struct ring_buffer_per_cpu *cpu_buffer; 1053 struct ring_buffer_per_cpu *cpu_buffer;
1047 struct buffer_page *bpage; 1054 struct buffer_page *bpage;
1048 unsigned long addr; 1055 struct page *page;
1049 int ret; 1056 int ret;
1050 1057
1051 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()), 1058 cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
@@ -1067,10 +1074,10 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
1067 rb_check_bpage(cpu_buffer, bpage); 1074 rb_check_bpage(cpu_buffer, bpage);
1068 1075
1069 cpu_buffer->reader_page = bpage; 1076 cpu_buffer->reader_page = bpage;
1070 addr = __get_free_page(GFP_KERNEL); 1077 page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0);
1071 if (!addr) 1078 if (!page)
1072 goto fail_free_reader; 1079 goto fail_free_reader;
1073 bpage->page = (void *)addr; 1080 bpage->page = page_address(page);
1074 rb_init_page(bpage->page); 1081 rb_init_page(bpage->page);
1075 1082
1076 INIT_LIST_HEAD(&cpu_buffer->reader_page->list); 1083 INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1314,7 +1321,6 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1314 unsigned nr_pages, rm_pages, new_pages; 1321 unsigned nr_pages, rm_pages, new_pages;
1315 struct buffer_page *bpage, *tmp; 1322 struct buffer_page *bpage, *tmp;
1316 unsigned long buffer_size; 1323 unsigned long buffer_size;
1317 unsigned long addr;
1318 LIST_HEAD(pages); 1324 LIST_HEAD(pages);
1319 int i, cpu; 1325 int i, cpu;
1320 1326
@@ -1375,16 +1381,24 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
1375 1381
1376 for_each_buffer_cpu(buffer, cpu) { 1382 for_each_buffer_cpu(buffer, cpu) {
1377 for (i = 0; i < new_pages; i++) { 1383 for (i = 0; i < new_pages; i++) {
1384 struct page *page;
1385 /*
1386 * __GFP_NORETRY flag makes sure that the allocation
1387 * fails gracefully without invoking oom-killer and
1388 * the system is not destabilized.
1389 */
1378 bpage = kzalloc_node(ALIGN(sizeof(*bpage), 1390 bpage = kzalloc_node(ALIGN(sizeof(*bpage),
1379 cache_line_size()), 1391 cache_line_size()),
1380 GFP_KERNEL, cpu_to_node(cpu)); 1392 GFP_KERNEL | __GFP_NORETRY,
1393 cpu_to_node(cpu));
1381 if (!bpage) 1394 if (!bpage)
1382 goto free_pages; 1395 goto free_pages;
1383 list_add(&bpage->list, &pages); 1396 list_add(&bpage->list, &pages);
1384 addr = __get_free_page(GFP_KERNEL); 1397 page = alloc_pages_node(cpu_to_node(cpu),
1385 if (!addr) 1398 GFP_KERNEL | __GFP_NORETRY, 0);
1399 if (!page)
1386 goto free_pages; 1400 goto free_pages;
1387 bpage->page = (void *)addr; 1401 bpage->page = page_address(page);
1388 rb_init_page(bpage->page); 1402 rb_init_page(bpage->page);
1389 } 1403 }
1390 } 1404 }
@@ -3730,16 +3744,17 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
3730 * Returns: 3744 * Returns:
3731 * The page allocated, or NULL on error. 3745 * The page allocated, or NULL on error.
3732 */ 3746 */
3733void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) 3747void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
3734{ 3748{
3735 struct buffer_data_page *bpage; 3749 struct buffer_data_page *bpage;
3736 unsigned long addr; 3750 struct page *page;
3737 3751
3738 addr = __get_free_page(GFP_KERNEL); 3752 page = alloc_pages_node(cpu_to_node(cpu),
3739 if (!addr) 3753 GFP_KERNEL | __GFP_NORETRY, 0);
3754 if (!page)
3740 return NULL; 3755 return NULL;
3741 3756
3742 bpage = (void *)addr; 3757 bpage = page_address(page);
3743 3758
3744 rb_init_page(bpage); 3759 rb_init_page(bpage);
3745 3760
@@ -3978,20 +3993,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
3978 size_t cnt, loff_t *ppos) 3993 size_t cnt, loff_t *ppos)
3979{ 3994{
3980 unsigned long *p = filp->private_data; 3995 unsigned long *p = filp->private_data;
3981 char buf[64];
3982 unsigned long val; 3996 unsigned long val;
3983 int ret; 3997 int ret;
3984 3998
3985 if (cnt >= sizeof(buf)) 3999 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3986 return -EINVAL; 4000 if (ret)
3987
3988 if (copy_from_user(&buf, ubuf, cnt))
3989 return -EFAULT;
3990
3991 buf[cnt] = 0;
3992
3993 ret = strict_strtoul(buf, 10, &val);
3994 if (ret < 0)
3995 return ret; 4001 return ret;
3996 4002
3997 if (val) 4003 if (val)
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 302f8a614635..a5457d577b98 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -106,7 +106,7 @@ static enum event_status read_page(int cpu)
106 int inc; 106 int inc;
107 int i; 107 int i;
108 108
109 bpage = ring_buffer_alloc_read_page(buffer); 109 bpage = ring_buffer_alloc_read_page(buffer, cpu);
110 if (!bpage) 110 if (!bpage)
111 return EVENT_DROPPED; 111 return EVENT_DROPPED;
112 112
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee9c921d7f21..e5df02c69b1d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -343,26 +343,27 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
343static int trace_stop_count; 343static int trace_stop_count;
344static DEFINE_SPINLOCK(tracing_start_lock); 344static DEFINE_SPINLOCK(tracing_start_lock);
345 345
346static void wakeup_work_handler(struct work_struct *work)
347{
348 wake_up(&trace_wait);
349}
350
351static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
352
346/** 353/**
347 * trace_wake_up - wake up tasks waiting for trace input 354 * trace_wake_up - wake up tasks waiting for trace input
348 * 355 *
349 * Simply wakes up any task that is blocked on the trace_wait 356 * Schedules a delayed work to wake up any task that is blocked on the
350 * queue. These is used with trace_poll for tasks polling the trace. 357 * trace_wait queue. These is used with trace_poll for tasks polling the
358 * trace.
351 */ 359 */
352void trace_wake_up(void) 360void trace_wake_up(void)
353{ 361{
354 int cpu; 362 const unsigned long delay = msecs_to_jiffies(2);
355 363
356 if (trace_flags & TRACE_ITER_BLOCK) 364 if (trace_flags & TRACE_ITER_BLOCK)
357 return; 365 return;
358 /* 366 schedule_delayed_work(&wakeup_work, delay);
359 * The runqueue_is_locked() can fail, but this is the best we
360 * have for now:
361 */
362 cpu = get_cpu();
363 if (!runqueue_is_locked(cpu))
364 wake_up(&trace_wait);
365 put_cpu();
366} 367}
367 368
368static int __init set_buf_size(char *str) 369static int __init set_buf_size(char *str)
@@ -424,6 +425,7 @@ static const char *trace_options[] = {
424 "graph-time", 425 "graph-time",
425 "record-cmd", 426 "record-cmd",
426 "overwrite", 427 "overwrite",
428 "disable_on_free",
427 NULL 429 NULL
428}; 430};
429 431
@@ -1191,6 +1193,18 @@ void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1191} 1193}
1192EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); 1194EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1193 1195
1196void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1197 struct ring_buffer_event *event,
1198 unsigned long flags, int pc,
1199 struct pt_regs *regs)
1200{
1201 ring_buffer_unlock_commit(buffer, event);
1202
1203 ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1204 ftrace_trace_userstack(buffer, flags, pc);
1205}
1206EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
1207
1194void trace_current_buffer_discard_commit(struct ring_buffer *buffer, 1208void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1195 struct ring_buffer_event *event) 1209 struct ring_buffer_event *event)
1196{ 1210{
@@ -1234,30 +1248,103 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
1234} 1248}
1235 1249
1236#ifdef CONFIG_STACKTRACE 1250#ifdef CONFIG_STACKTRACE
1251
1252#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1253struct ftrace_stack {
1254 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
1255};
1256
1257static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1258static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1259
1237static void __ftrace_trace_stack(struct ring_buffer *buffer, 1260static void __ftrace_trace_stack(struct ring_buffer *buffer,
1238 unsigned long flags, 1261 unsigned long flags,
1239 int skip, int pc) 1262 int skip, int pc, struct pt_regs *regs)
1240{ 1263{
1241 struct ftrace_event_call *call = &event_kernel_stack; 1264 struct ftrace_event_call *call = &event_kernel_stack;
1242 struct ring_buffer_event *event; 1265 struct ring_buffer_event *event;
1243 struct stack_entry *entry; 1266 struct stack_entry *entry;
1244 struct stack_trace trace; 1267 struct stack_trace trace;
1268 int use_stack;
1269 int size = FTRACE_STACK_ENTRIES;
1270
1271 trace.nr_entries = 0;
1272 trace.skip = skip;
1273
1274 /*
1275 * Since events can happen in NMIs there's no safe way to
1276 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1277 * or NMI comes in, it will just have to use the default
1278 * FTRACE_STACK_SIZE.
1279 */
1280 preempt_disable_notrace();
1281
1282 use_stack = ++__get_cpu_var(ftrace_stack_reserve);
1283 /*
1284 * We don't need any atomic variables, just a barrier.
1285 * If an interrupt comes in, we don't care, because it would
1286 * have exited and put the counter back to what we want.
1287 * We just need a barrier to keep gcc from moving things
1288 * around.
1289 */
1290 barrier();
1291 if (use_stack == 1) {
1292 trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
1293 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
1294
1295 if (regs)
1296 save_stack_trace_regs(regs, &trace);
1297 else
1298 save_stack_trace(&trace);
1299
1300 if (trace.nr_entries > size)
1301 size = trace.nr_entries;
1302 } else
1303 /* From now on, use_stack is a boolean */
1304 use_stack = 0;
1305
1306 size *= sizeof(unsigned long);
1245 1307
1246 event = trace_buffer_lock_reserve(buffer, TRACE_STACK, 1308 event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1247 sizeof(*entry), flags, pc); 1309 sizeof(*entry) + size, flags, pc);
1248 if (!event) 1310 if (!event)
1249 return; 1311 goto out;
1250 entry = ring_buffer_event_data(event); 1312 entry = ring_buffer_event_data(event);
1251 memset(&entry->caller, 0, sizeof(entry->caller));
1252 1313
1253 trace.nr_entries = 0; 1314 memset(&entry->caller, 0, size);
1254 trace.max_entries = FTRACE_STACK_ENTRIES; 1315
1255 trace.skip = skip; 1316 if (use_stack)
1256 trace.entries = entry->caller; 1317 memcpy(&entry->caller, trace.entries,
1318 trace.nr_entries * sizeof(unsigned long));
1319 else {
1320 trace.max_entries = FTRACE_STACK_ENTRIES;
1321 trace.entries = entry->caller;
1322 if (regs)
1323 save_stack_trace_regs(regs, &trace);
1324 else
1325 save_stack_trace(&trace);
1326 }
1327
1328 entry->size = trace.nr_entries;
1257 1329
1258 save_stack_trace(&trace);
1259 if (!filter_check_discard(call, entry, buffer, event)) 1330 if (!filter_check_discard(call, entry, buffer, event))
1260 ring_buffer_unlock_commit(buffer, event); 1331 ring_buffer_unlock_commit(buffer, event);
1332
1333 out:
1334 /* Again, don't let gcc optimize things here */
1335 barrier();
1336 __get_cpu_var(ftrace_stack_reserve)--;
1337 preempt_enable_notrace();
1338
1339}
1340
1341void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1342 int skip, int pc, struct pt_regs *regs)
1343{
1344 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1345 return;
1346
1347 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1261} 1348}
1262 1349
1263void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, 1350void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
@@ -1266,13 +1353,13 @@ void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1266 if (!(trace_flags & TRACE_ITER_STACKTRACE)) 1353 if (!(trace_flags & TRACE_ITER_STACKTRACE))
1267 return; 1354 return;
1268 1355
1269 __ftrace_trace_stack(buffer, flags, skip, pc); 1356 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1270} 1357}
1271 1358
1272void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, 1359void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1273 int pc) 1360 int pc)
1274{ 1361{
1275 __ftrace_trace_stack(tr->buffer, flags, skip, pc); 1362 __ftrace_trace_stack(tr->buffer, flags, skip, pc, NULL);
1276} 1363}
1277 1364
1278/** 1365/**
@@ -1288,7 +1375,7 @@ void trace_dump_stack(void)
1288 local_save_flags(flags); 1375 local_save_flags(flags);
1289 1376
1290 /* skipping 3 traces, seems to get us at the caller of this function */ 1377 /* skipping 3 traces, seems to get us at the caller of this function */
1291 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); 1378 __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count(), NULL);
1292} 1379}
1293 1380
1294static DEFINE_PER_CPU(int, user_stack_count); 1381static DEFINE_PER_CPU(int, user_stack_count);
@@ -1536,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
1536 1623
1537 ftrace_enable_cpu(); 1624 ftrace_enable_cpu();
1538 1625
1539 return event ? ring_buffer_event_data(event) : NULL; 1626 if (event) {
1627 iter->ent_size = ring_buffer_event_length(event);
1628 return ring_buffer_event_data(event);
1629 }
1630 iter->ent_size = 0;
1631 return NULL;
1540} 1632}
1541 1633
1542static struct trace_entry * 1634static struct trace_entry *
@@ -2051,6 +2143,9 @@ void trace_default_header(struct seq_file *m)
2051{ 2143{
2052 struct trace_iterator *iter = m->private; 2144 struct trace_iterator *iter = m->private;
2053 2145
2146 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2147 return;
2148
2054 if (iter->iter_flags & TRACE_FILE_LAT_FMT) { 2149 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2055 /* print nothing if the buffers are empty */ 2150 /* print nothing if the buffers are empty */
2056 if (trace_empty(iter)) 2151 if (trace_empty(iter))
@@ -2701,20 +2796,11 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2701 size_t cnt, loff_t *ppos) 2796 size_t cnt, loff_t *ppos)
2702{ 2797{
2703 struct trace_array *tr = filp->private_data; 2798 struct trace_array *tr = filp->private_data;
2704 char buf[64];
2705 unsigned long val; 2799 unsigned long val;
2706 int ret; 2800 int ret;
2707 2801
2708 if (cnt >= sizeof(buf)) 2802 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2709 return -EINVAL; 2803 if (ret)
2710
2711 if (copy_from_user(&buf, ubuf, cnt))
2712 return -EFAULT;
2713
2714 buf[cnt] = 0;
2715
2716 ret = strict_strtoul(buf, 10, &val);
2717 if (ret < 0)
2718 return ret; 2804 return ret;
2719 2805
2720 val = !!val; 2806 val = !!val;
@@ -2767,7 +2853,7 @@ int tracer_init(struct tracer *t, struct trace_array *tr)
2767 return t->init(tr); 2853 return t->init(tr);
2768} 2854}
2769 2855
2770static int tracing_resize_ring_buffer(unsigned long size) 2856static int __tracing_resize_ring_buffer(unsigned long size)
2771{ 2857{
2772 int ret; 2858 int ret;
2773 2859
@@ -2819,6 +2905,41 @@ static int tracing_resize_ring_buffer(unsigned long size)
2819 return ret; 2905 return ret;
2820} 2906}
2821 2907
2908static ssize_t tracing_resize_ring_buffer(unsigned long size)
2909{
2910 int cpu, ret = size;
2911
2912 mutex_lock(&trace_types_lock);
2913
2914 tracing_stop();
2915
2916 /* disable all cpu buffers */
2917 for_each_tracing_cpu(cpu) {
2918 if (global_trace.data[cpu])
2919 atomic_inc(&global_trace.data[cpu]->disabled);
2920 if (max_tr.data[cpu])
2921 atomic_inc(&max_tr.data[cpu]->disabled);
2922 }
2923
2924 if (size != global_trace.entries)
2925 ret = __tracing_resize_ring_buffer(size);
2926
2927 if (ret < 0)
2928 ret = -ENOMEM;
2929
2930 for_each_tracing_cpu(cpu) {
2931 if (global_trace.data[cpu])
2932 atomic_dec(&global_trace.data[cpu]->disabled);
2933 if (max_tr.data[cpu])
2934 atomic_dec(&max_tr.data[cpu]->disabled);
2935 }
2936
2937 tracing_start();
2938 mutex_unlock(&trace_types_lock);
2939
2940 return ret;
2941}
2942
2822 2943
2823/** 2944/**
2824 * tracing_update_buffers - used by tracing facility to expand ring buffers 2945 * tracing_update_buffers - used by tracing facility to expand ring buffers
@@ -2836,7 +2957,7 @@ int tracing_update_buffers(void)
2836 2957
2837 mutex_lock(&trace_types_lock); 2958 mutex_lock(&trace_types_lock);
2838 if (!ring_buffer_expanded) 2959 if (!ring_buffer_expanded)
2839 ret = tracing_resize_ring_buffer(trace_buf_size); 2960 ret = __tracing_resize_ring_buffer(trace_buf_size);
2840 mutex_unlock(&trace_types_lock); 2961 mutex_unlock(&trace_types_lock);
2841 2962
2842 return ret; 2963 return ret;
@@ -2860,7 +2981,7 @@ static int tracing_set_tracer(const char *buf)
2860 mutex_lock(&trace_types_lock); 2981 mutex_lock(&trace_types_lock);
2861 2982
2862 if (!ring_buffer_expanded) { 2983 if (!ring_buffer_expanded) {
2863 ret = tracing_resize_ring_buffer(trace_buf_size); 2984 ret = __tracing_resize_ring_buffer(trace_buf_size);
2864 if (ret < 0) 2985 if (ret < 0)
2865 goto out; 2986 goto out;
2866 ret = 0; 2987 ret = 0;
@@ -2966,20 +3087,11 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2966 size_t cnt, loff_t *ppos) 3087 size_t cnt, loff_t *ppos)
2967{ 3088{
2968 unsigned long *ptr = filp->private_data; 3089 unsigned long *ptr = filp->private_data;
2969 char buf[64];
2970 unsigned long val; 3090 unsigned long val;
2971 int ret; 3091 int ret;
2972 3092
2973 if (cnt >= sizeof(buf)) 3093 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
2974 return -EINVAL; 3094 if (ret)
2975
2976 if (copy_from_user(&buf, ubuf, cnt))
2977 return -EFAULT;
2978
2979 buf[cnt] = 0;
2980
2981 ret = strict_strtoul(buf, 10, &val);
2982 if (ret < 0)
2983 return ret; 3095 return ret;
2984 3096
2985 *ptr = val * 1000; 3097 *ptr = val * 1000;
@@ -3434,67 +3546,54 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
3434 size_t cnt, loff_t *ppos) 3546 size_t cnt, loff_t *ppos)
3435{ 3547{
3436 unsigned long val; 3548 unsigned long val;
3437 char buf[64]; 3549 int ret;
3438 int ret, cpu;
3439
3440 if (cnt >= sizeof(buf))
3441 return -EINVAL;
3442
3443 if (copy_from_user(&buf, ubuf, cnt))
3444 return -EFAULT;
3445
3446 buf[cnt] = 0;
3447 3550
3448 ret = strict_strtoul(buf, 10, &val); 3551 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3449 if (ret < 0) 3552 if (ret)
3450 return ret; 3553 return ret;
3451 3554
3452 /* must have at least 1 entry */ 3555 /* must have at least 1 entry */
3453 if (!val) 3556 if (!val)
3454 return -EINVAL; 3557 return -EINVAL;
3455 3558
3456 mutex_lock(&trace_types_lock);
3457
3458 tracing_stop();
3459
3460 /* disable all cpu buffers */
3461 for_each_tracing_cpu(cpu) {
3462 if (global_trace.data[cpu])
3463 atomic_inc(&global_trace.data[cpu]->disabled);
3464 if (max_tr.data[cpu])
3465 atomic_inc(&max_tr.data[cpu]->disabled);
3466 }
3467
3468 /* value is in KB */ 3559 /* value is in KB */
3469 val <<= 10; 3560 val <<= 10;
3470 3561
3471 if (val != global_trace.entries) { 3562 ret = tracing_resize_ring_buffer(val);
3472 ret = tracing_resize_ring_buffer(val); 3563 if (ret < 0)
3473 if (ret < 0) { 3564 return ret;
3474 cnt = ret;
3475 goto out;
3476 }
3477 }
3478 3565
3479 *ppos += cnt; 3566 *ppos += cnt;
3480 3567
3481 /* If check pages failed, return ENOMEM */ 3568 return cnt;
3482 if (tracing_disabled) 3569}
3483 cnt = -ENOMEM;
3484 out:
3485 for_each_tracing_cpu(cpu) {
3486 if (global_trace.data[cpu])
3487 atomic_dec(&global_trace.data[cpu]->disabled);
3488 if (max_tr.data[cpu])
3489 atomic_dec(&max_tr.data[cpu]->disabled);
3490 }
3491 3570
3492 tracing_start(); 3571static ssize_t
3493 mutex_unlock(&trace_types_lock); 3572tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
3573 size_t cnt, loff_t *ppos)
3574{
3575 /*
3576 * There is no need to read what the user has written, this function
3577 * is just to make sure that there is no error when "echo" is used
3578 */
3579
3580 *ppos += cnt;
3494 3581
3495 return cnt; 3582 return cnt;
3496} 3583}
3497 3584
3585static int
3586tracing_free_buffer_release(struct inode *inode, struct file *filp)
3587{
3588 /* disable tracing ? */
3589 if (trace_flags & TRACE_ITER_STOP_ON_FREE)
3590 tracing_off();
3591 /* resize the ring buffer to 0 */
3592 tracing_resize_ring_buffer(0);
3593
3594 return 0;
3595}
3596
3498static int mark_printk(const char *fmt, ...) 3597static int mark_printk(const char *fmt, ...)
3499{ 3598{
3500 int ret; 3599 int ret;
@@ -3640,6 +3739,11 @@ static const struct file_operations tracing_entries_fops = {
3640 .llseek = generic_file_llseek, 3739 .llseek = generic_file_llseek,
3641}; 3740};
3642 3741
3742static const struct file_operations tracing_free_buffer_fops = {
3743 .write = tracing_free_buffer_write,
3744 .release = tracing_free_buffer_release,
3745};
3746
3643static const struct file_operations tracing_mark_fops = { 3747static const struct file_operations tracing_mark_fops = {
3644 .open = tracing_open_generic, 3748 .open = tracing_open_generic,
3645 .write = tracing_mark_write, 3749 .write = tracing_mark_write,
@@ -3696,7 +3800,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3696 return 0; 3800 return 0;
3697 3801
3698 if (!info->spare) 3802 if (!info->spare)
3699 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3803 info->spare = ring_buffer_alloc_read_page(info->tr->buffer, info->cpu);
3700 if (!info->spare) 3804 if (!info->spare)
3701 return -ENOMEM; 3805 return -ENOMEM;
3702 3806
@@ -3853,7 +3957,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3853 3957
3854 ref->ref = 1; 3958 ref->ref = 1;
3855 ref->buffer = info->tr->buffer; 3959 ref->buffer = info->tr->buffer;
3856 ref->page = ring_buffer_alloc_read_page(ref->buffer); 3960 ref->page = ring_buffer_alloc_read_page(ref->buffer, info->cpu);
3857 if (!ref->page) { 3961 if (!ref->page) {
3858 kfree(ref); 3962 kfree(ref);
3859 break; 3963 break;
@@ -3862,8 +3966,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3862 r = ring_buffer_read_page(ref->buffer, &ref->page, 3966 r = ring_buffer_read_page(ref->buffer, &ref->page,
3863 len, info->cpu, 1); 3967 len, info->cpu, 1);
3864 if (r < 0) { 3968 if (r < 0) {
3865 ring_buffer_free_read_page(ref->buffer, 3969 ring_buffer_free_read_page(ref->buffer, ref->page);
3866 ref->page);
3867 kfree(ref); 3970 kfree(ref);
3868 break; 3971 break;
3869 } 3972 }
@@ -4099,19 +4202,10 @@ trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
4099{ 4202{
4100 struct trace_option_dentry *topt = filp->private_data; 4203 struct trace_option_dentry *topt = filp->private_data;
4101 unsigned long val; 4204 unsigned long val;
4102 char buf[64];
4103 int ret; 4205 int ret;
4104 4206
4105 if (cnt >= sizeof(buf)) 4207 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4106 return -EINVAL; 4208 if (ret)
4107
4108 if (copy_from_user(&buf, ubuf, cnt))
4109 return -EFAULT;
4110
4111 buf[cnt] = 0;
4112
4113 ret = strict_strtoul(buf, 10, &val);
4114 if (ret < 0)
4115 return ret; 4209 return ret;
4116 4210
4117 if (val != 0 && val != 1) 4211 if (val != 0 && val != 1)
@@ -4159,20 +4253,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4159 loff_t *ppos) 4253 loff_t *ppos)
4160{ 4254{
4161 long index = (long)filp->private_data; 4255 long index = (long)filp->private_data;
4162 char buf[64];
4163 unsigned long val; 4256 unsigned long val;
4164 int ret; 4257 int ret;
4165 4258
4166 if (cnt >= sizeof(buf)) 4259 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4167 return -EINVAL; 4260 if (ret)
4168
4169 if (copy_from_user(&buf, ubuf, cnt))
4170 return -EFAULT;
4171
4172 buf[cnt] = 0;
4173
4174 ret = strict_strtoul(buf, 10, &val);
4175 if (ret < 0)
4176 return ret; 4261 return ret;
4177 4262
4178 if (val != 0 && val != 1) 4263 if (val != 0 && val != 1)
@@ -4365,6 +4450,9 @@ static __init int tracer_init_debugfs(void)
4365 trace_create_file("buffer_size_kb", 0644, d_tracer, 4450 trace_create_file("buffer_size_kb", 0644, d_tracer,
4366 &global_trace, &tracing_entries_fops); 4451 &global_trace, &tracing_entries_fops);
4367 4452
4453 trace_create_file("free_buffer", 0644, d_tracer,
4454 &global_trace, &tracing_free_buffer_fops);
4455
4368 trace_create_file("trace_marker", 0220, d_tracer, 4456 trace_create_file("trace_marker", 0220, d_tracer,
4369 NULL, &tracing_mark_fops); 4457 NULL, &tracing_mark_fops);
4370 4458
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 229f8591f61d..3f381d0b20a8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -278,6 +278,29 @@ struct tracer {
278}; 278};
279 279
280 280
281/* Only current can touch trace_recursion */
282#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
283#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
284
285/* Ring buffer has the 10 LSB bits to count */
286#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
287
288/* for function tracing recursion */
289#define TRACE_INTERNAL_BIT (1<<11)
290#define TRACE_GLOBAL_BIT (1<<12)
291/*
292 * Abuse of the trace_recursion.
293 * As we need a way to maintain state if we are tracing the function
294 * graph in irq because we want to trace a particular function that
295 * was called in irq context but we have irq tracing off. Since this
296 * can only be modified by current, we can reuse trace_recursion.
297 */
298#define TRACE_IRQ_BIT (1<<13)
299
300#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
301#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
302#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
303
281#define TRACE_PIPE_ALL_CPU -1 304#define TRACE_PIPE_ALL_CPU -1
282 305
283int tracer_init(struct tracer *t, struct trace_array *tr); 306int tracer_init(struct tracer *t, struct trace_array *tr);
@@ -389,6 +412,9 @@ void update_max_tr_single(struct trace_array *tr,
389void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, 412void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
390 int skip, int pc); 413 int skip, int pc);
391 414
415void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
416 int skip, int pc, struct pt_regs *regs);
417
392void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, 418void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
393 int pc); 419 int pc);
394 420
@@ -400,6 +426,12 @@ static inline void ftrace_trace_stack(struct ring_buffer *buffer,
400{ 426{
401} 427}
402 428
429static inline void ftrace_trace_stack_regs(struct ring_buffer *buffer,
430 unsigned long flags, int skip,
431 int pc, struct pt_regs *regs)
432{
433}
434
403static inline void ftrace_trace_userstack(struct ring_buffer *buffer, 435static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
404 unsigned long flags, int pc) 436 unsigned long flags, int pc)
405{ 437{
@@ -507,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
507 return 1; 539 return 1;
508 540
509 for (i = 0; i < ftrace_graph_count; i++) { 541 for (i = 0; i < ftrace_graph_count; i++) {
510 if (addr == ftrace_graph_funcs[i]) 542 if (addr == ftrace_graph_funcs[i]) {
543 /*
544 * If no irqs are to be traced, but a set_graph_function
545 * is set, and called by an interrupt handler, we still
546 * want to trace it.
547 */
548 if (in_irq())
549 trace_recursion_set(TRACE_IRQ_BIT);
550 else
551 trace_recursion_clear(TRACE_IRQ_BIT);
511 return 1; 552 return 1;
553 }
512 } 554 }
513 555
514 return 0; 556 return 0;
@@ -609,6 +651,7 @@ enum trace_iterator_flags {
609 TRACE_ITER_GRAPH_TIME = 0x80000, 651 TRACE_ITER_GRAPH_TIME = 0x80000,
610 TRACE_ITER_RECORD_CMD = 0x100000, 652 TRACE_ITER_RECORD_CMD = 0x100000,
611 TRACE_ITER_OVERWRITE = 0x200000, 653 TRACE_ITER_OVERWRITE = 0x200000,
654 TRACE_ITER_STOP_ON_FREE = 0x400000,
612}; 655};
613 656
614/* 657/*
@@ -677,6 +720,7 @@ struct event_subsystem {
677 struct dentry *entry; 720 struct dentry *entry;
678 struct event_filter *filter; 721 struct event_filter *filter;
679 int nr_events; 722 int nr_events;
723 int ref_count;
680}; 724};
681 725
682#define FILTER_PRED_INVALID ((unsigned short)-1) 726#define FILTER_PRED_INVALID ((unsigned short)-1)
@@ -784,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];
784 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) 828 FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
785#include "trace_entries.h" 829#include "trace_entries.h"
786 830
787/* Only current can touch trace_recursion */
788#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
789#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
790
791/* Ring buffer has the 10 LSB bits to count */
792#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
793
794/* for function tracing recursion */
795#define TRACE_INTERNAL_BIT (1<<11)
796#define TRACE_GLOBAL_BIT (1<<12)
797
798#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
799#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
800#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
801
802#endif /* _LINUX_KERNEL_TRACE_H */ 831#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e32744c84d94..93365907f219 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
161 TRACE_STACK, 161 TRACE_STACK,
162 162
163 F_STRUCT( 163 F_STRUCT(
164 __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) 164 __field( int, size )
165 __dynamic_array(unsigned long, caller )
165 ), 166 ),
166 167
167 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" 168 F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 686ec399f2a8..581876f9f387 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -244,6 +244,35 @@ static void ftrace_clear_events(void)
244 mutex_unlock(&event_mutex); 244 mutex_unlock(&event_mutex);
245} 245}
246 246
247static void __put_system(struct event_subsystem *system)
248{
249 struct event_filter *filter = system->filter;
250
251 WARN_ON_ONCE(system->ref_count == 0);
252 if (--system->ref_count)
253 return;
254
255 if (filter) {
256 kfree(filter->filter_string);
257 kfree(filter);
258 }
259 kfree(system->name);
260 kfree(system);
261}
262
263static void __get_system(struct event_subsystem *system)
264{
265 WARN_ON_ONCE(system->ref_count == 0);
266 system->ref_count++;
267}
268
269static void put_system(struct event_subsystem *system)
270{
271 mutex_lock(&event_mutex);
272 __put_system(system);
273 mutex_unlock(&event_mutex);
274}
275
247/* 276/*
248 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. 277 * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
249 */ 278 */
@@ -486,20 +515,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
486 loff_t *ppos) 515 loff_t *ppos)
487{ 516{
488 struct ftrace_event_call *call = filp->private_data; 517 struct ftrace_event_call *call = filp->private_data;
489 char buf[64];
490 unsigned long val; 518 unsigned long val;
491 int ret; 519 int ret;
492 520
493 if (cnt >= sizeof(buf)) 521 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
494 return -EINVAL; 522 if (ret)
495
496 if (copy_from_user(&buf, ubuf, cnt))
497 return -EFAULT;
498
499 buf[cnt] = 0;
500
501 ret = strict_strtoul(buf, 10, &val);
502 if (ret < 0)
503 return ret; 523 return ret;
504 524
505 ret = tracing_update_buffers(); 525 ret = tracing_update_buffers();
@@ -528,7 +548,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
528 loff_t *ppos) 548 loff_t *ppos)
529{ 549{
530 const char set_to_char[4] = { '?', '0', '1', 'X' }; 550 const char set_to_char[4] = { '?', '0', '1', 'X' };
531 const char *system = filp->private_data; 551 struct event_subsystem *system = filp->private_data;
532 struct ftrace_event_call *call; 552 struct ftrace_event_call *call;
533 char buf[2]; 553 char buf[2];
534 int set = 0; 554 int set = 0;
@@ -539,7 +559,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
539 if (!call->name || !call->class || !call->class->reg) 559 if (!call->name || !call->class || !call->class->reg)
540 continue; 560 continue;
541 561
542 if (system && strcmp(call->class->system, system) != 0) 562 if (system && strcmp(call->class->system, system->name) != 0)
543 continue; 563 continue;
544 564
545 /* 565 /*
@@ -569,21 +589,13 @@ static ssize_t
569system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, 589system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
570 loff_t *ppos) 590 loff_t *ppos)
571{ 591{
572 const char *system = filp->private_data; 592 struct event_subsystem *system = filp->private_data;
593 const char *name = NULL;
573 unsigned long val; 594 unsigned long val;
574 char buf[64];
575 ssize_t ret; 595 ssize_t ret;
576 596
577 if (cnt >= sizeof(buf)) 597 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
578 return -EINVAL; 598 if (ret)
579
580 if (copy_from_user(&buf, ubuf, cnt))
581 return -EFAULT;
582
583 buf[cnt] = 0;
584
585 ret = strict_strtoul(buf, 10, &val);
586 if (ret < 0)
587 return ret; 599 return ret;
588 600
589 ret = tracing_update_buffers(); 601 ret = tracing_update_buffers();
@@ -593,7 +605,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
593 if (val != 0 && val != 1) 605 if (val != 0 && val != 1)
594 return -EINVAL; 606 return -EINVAL;
595 607
596 ret = __ftrace_set_clr_event(NULL, system, NULL, val); 608 /*
609 * Opening of "enable" adds a ref count to system,
610 * so the name is safe to use.
611 */
612 if (system)
613 name = system->name;
614
615 ret = __ftrace_set_clr_event(NULL, name, NULL, val);
597 if (ret) 616 if (ret)
598 goto out; 617 goto out;
599 618
@@ -826,6 +845,52 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
826 return cnt; 845 return cnt;
827} 846}
828 847
848static LIST_HEAD(event_subsystems);
849
850static int subsystem_open(struct inode *inode, struct file *filp)
851{
852 struct event_subsystem *system = NULL;
853 int ret;
854
855 if (!inode->i_private)
856 goto skip_search;
857
858 /* Make sure the system still exists */
859 mutex_lock(&event_mutex);
860 list_for_each_entry(system, &event_subsystems, list) {
861 if (system == inode->i_private) {
862 /* Don't open systems with no events */
863 if (!system->nr_events) {
864 system = NULL;
865 break;
866 }
867 __get_system(system);
868 break;
869 }
870 }
871 mutex_unlock(&event_mutex);
872
873 if (system != inode->i_private)
874 return -ENODEV;
875
876 skip_search:
877 ret = tracing_open_generic(inode, filp);
878 if (ret < 0 && system)
879 put_system(system);
880
881 return ret;
882}
883
884static int subsystem_release(struct inode *inode, struct file *file)
885{
886 struct event_subsystem *system = inode->i_private;
887
888 if (system)
889 put_system(system);
890
891 return 0;
892}
893
829static ssize_t 894static ssize_t
830subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, 895subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
831 loff_t *ppos) 896 loff_t *ppos)
@@ -963,17 +1028,19 @@ static const struct file_operations ftrace_event_filter_fops = {
963}; 1028};
964 1029
965static const struct file_operations ftrace_subsystem_filter_fops = { 1030static const struct file_operations ftrace_subsystem_filter_fops = {
966 .open = tracing_open_generic, 1031 .open = subsystem_open,
967 .read = subsystem_filter_read, 1032 .read = subsystem_filter_read,
968 .write = subsystem_filter_write, 1033 .write = subsystem_filter_write,
969 .llseek = default_llseek, 1034 .llseek = default_llseek,
1035 .release = subsystem_release,
970}; 1036};
971 1037
972static const struct file_operations ftrace_system_enable_fops = { 1038static const struct file_operations ftrace_system_enable_fops = {
973 .open = tracing_open_generic, 1039 .open = subsystem_open,
974 .read = system_enable_read, 1040 .read = system_enable_read,
975 .write = system_enable_write, 1041 .write = system_enable_write,
976 .llseek = default_llseek, 1042 .llseek = default_llseek,
1043 .release = subsystem_release,
977}; 1044};
978 1045
979static const struct file_operations ftrace_show_header_fops = { 1046static const struct file_operations ftrace_show_header_fops = {
@@ -1002,8 +1069,6 @@ static struct dentry *event_trace_events_dir(void)
1002 return d_events; 1069 return d_events;
1003} 1070}
1004 1071
1005static LIST_HEAD(event_subsystems);
1006
1007static struct dentry * 1072static struct dentry *
1008event_subsystem_dir(const char *name, struct dentry *d_events) 1073event_subsystem_dir(const char *name, struct dentry *d_events)
1009{ 1074{
@@ -1013,6 +1078,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
1013 /* First see if we did not already create this dir */ 1078 /* First see if we did not already create this dir */
1014 list_for_each_entry(system, &event_subsystems, list) { 1079 list_for_each_entry(system, &event_subsystems, list) {
1015 if (strcmp(system->name, name) == 0) { 1080 if (strcmp(system->name, name) == 0) {
1081 __get_system(system);
1016 system->nr_events++; 1082 system->nr_events++;
1017 return system->entry; 1083 return system->entry;
1018 } 1084 }
@@ -1035,6 +1101,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
1035 } 1101 }
1036 1102
1037 system->nr_events = 1; 1103 system->nr_events = 1;
1104 system->ref_count = 1;
1038 system->name = kstrdup(name, GFP_KERNEL); 1105 system->name = kstrdup(name, GFP_KERNEL);
1039 if (!system->name) { 1106 if (!system->name) {
1040 debugfs_remove(system->entry); 1107 debugfs_remove(system->entry);
@@ -1062,8 +1129,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
1062 "'%s/filter' entry\n", name); 1129 "'%s/filter' entry\n", name);
1063 } 1130 }
1064 1131
1065 trace_create_file("enable", 0644, system->entry, 1132 trace_create_file("enable", 0644, system->entry, system,
1066 (void *)system->name,
1067 &ftrace_system_enable_fops); 1133 &ftrace_system_enable_fops);
1068 1134
1069 return system->entry; 1135 return system->entry;
@@ -1184,16 +1250,9 @@ static void remove_subsystem_dir(const char *name)
1184 list_for_each_entry(system, &event_subsystems, list) { 1250 list_for_each_entry(system, &event_subsystems, list) {
1185 if (strcmp(system->name, name) == 0) { 1251 if (strcmp(system->name, name) == 0) {
1186 if (!--system->nr_events) { 1252 if (!--system->nr_events) {
1187 struct event_filter *filter = system->filter;
1188
1189 debugfs_remove_recursive(system->entry); 1253 debugfs_remove_recursive(system->entry);
1190 list_del(&system->list); 1254 list_del(&system->list);
1191 if (filter) { 1255 __put_system(system);
1192 kfree(filter->filter_string);
1193 kfree(filter);
1194 }
1195 kfree(system->name);
1196 kfree(system);
1197 } 1256 }
1198 break; 1257 break;
1199 } 1258 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8008ddcfbf20..256764ecccd6 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1886,6 +1886,12 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
1886 1886
1887 mutex_lock(&event_mutex); 1887 mutex_lock(&event_mutex);
1888 1888
1889 /* Make sure the system still has events */
1890 if (!system->nr_events) {
1891 err = -ENODEV;
1892 goto out_unlock;
1893 }
1894
1889 if (!strcmp(strstrip(filter_string), "0")) { 1895 if (!strcmp(strstrip(filter_string), "0")) {
1890 filter_free_subsystem_preds(system); 1896 filter_free_subsystem_preds(system);
1891 remove_filter_string(system->filter); 1897 remove_filter_string(system->filter);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8d0e1cc4e974..c7b0c6a7db09 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -324,7 +324,8 @@ ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
324} 324}
325 325
326static int 326static int
327ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable) 327ftrace_trace_onoff_callback(struct ftrace_hash *hash,
328 char *glob, char *cmd, char *param, int enable)
328{ 329{
329 struct ftrace_probe_ops *ops; 330 struct ftrace_probe_ops *ops;
330 void *count = (void *)-1; 331 void *count = (void *)-1;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 962cdb24ed81..a7d2a4c653d8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -74,6 +74,20 @@ static struct tracer_flags tracer_flags = {
74 74
75static struct trace_array *graph_array; 75static struct trace_array *graph_array;
76 76
77/*
78 * DURATION column is being also used to display IRQ signs,
79 * following values are used by print_graph_irq and others
80 * to fill in space into DURATION column.
81 */
82enum {
83 DURATION_FILL_FULL = -1,
84 DURATION_FILL_START = -2,
85 DURATION_FILL_END = -3,
86};
87
88static enum print_line_t
89print_graph_duration(unsigned long long duration, struct trace_seq *s,
90 u32 flags);
77 91
78/* Add a function return address to the trace stack on thread info.*/ 92/* Add a function return address to the trace stack on thread info.*/
79int 93int
@@ -213,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
213 227
214static inline int ftrace_graph_ignore_irqs(void) 228static inline int ftrace_graph_ignore_irqs(void)
215{ 229{
216 if (!ftrace_graph_skip_irqs) 230 if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
217 return 0; 231 return 0;
218 232
219 return in_irq(); 233 return in_irq();
@@ -577,32 +591,6 @@ get_return_for_leaf(struct trace_iterator *iter,
577 return next; 591 return next;
578} 592}
579 593
580/* Signal a overhead of time execution to the output */
581static int
582print_graph_overhead(unsigned long long duration, struct trace_seq *s,
583 u32 flags)
584{
585 /* If duration disappear, we don't need anything */
586 if (!(flags & TRACE_GRAPH_PRINT_DURATION))
587 return 1;
588
589 /* Non nested entry or return */
590 if (duration == -1)
591 return trace_seq_printf(s, " ");
592
593 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
594 /* Duration exceeded 100 msecs */
595 if (duration > 100000ULL)
596 return trace_seq_printf(s, "! ");
597
598 /* Duration exceeded 10 msecs */
599 if (duration > 10000ULL)
600 return trace_seq_printf(s, "+ ");
601 }
602
603 return trace_seq_printf(s, " ");
604}
605
606static int print_graph_abs_time(u64 t, struct trace_seq *s) 594static int print_graph_abs_time(u64 t, struct trace_seq *s)
607{ 595{
608 unsigned long usecs_rem; 596 unsigned long usecs_rem;
@@ -625,34 +613,36 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
625 addr >= (unsigned long)__irqentry_text_end) 613 addr >= (unsigned long)__irqentry_text_end)
626 return TRACE_TYPE_UNHANDLED; 614 return TRACE_TYPE_UNHANDLED;
627 615
628 /* Absolute time */ 616 if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
629 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { 617 /* Absolute time */
630 ret = print_graph_abs_time(iter->ts, s); 618 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
631 if (!ret) 619 ret = print_graph_abs_time(iter->ts, s);
632 return TRACE_TYPE_PARTIAL_LINE; 620 if (!ret)
633 } 621 return TRACE_TYPE_PARTIAL_LINE;
622 }
634 623
635 /* Cpu */ 624 /* Cpu */
636 if (flags & TRACE_GRAPH_PRINT_CPU) { 625 if (flags & TRACE_GRAPH_PRINT_CPU) {
637 ret = print_graph_cpu(s, cpu); 626 ret = print_graph_cpu(s, cpu);
638 if (ret == TRACE_TYPE_PARTIAL_LINE) 627 if (ret == TRACE_TYPE_PARTIAL_LINE)
639 return TRACE_TYPE_PARTIAL_LINE; 628 return TRACE_TYPE_PARTIAL_LINE;
640 } 629 }
641 630
642 /* Proc */ 631 /* Proc */
643 if (flags & TRACE_GRAPH_PRINT_PROC) { 632 if (flags & TRACE_GRAPH_PRINT_PROC) {
644 ret = print_graph_proc(s, pid); 633 ret = print_graph_proc(s, pid);
645 if (ret == TRACE_TYPE_PARTIAL_LINE) 634 if (ret == TRACE_TYPE_PARTIAL_LINE)
646 return TRACE_TYPE_PARTIAL_LINE; 635 return TRACE_TYPE_PARTIAL_LINE;
647 ret = trace_seq_printf(s, " | "); 636 ret = trace_seq_printf(s, " | ");
648 if (!ret) 637 if (!ret)
649 return TRACE_TYPE_PARTIAL_LINE; 638 return TRACE_TYPE_PARTIAL_LINE;
639 }
650 } 640 }
651 641
652 /* No overhead */ 642 /* No overhead */
653 ret = print_graph_overhead(-1, s, flags); 643 ret = print_graph_duration(DURATION_FILL_START, s, flags);
654 if (!ret) 644 if (ret != TRACE_TYPE_HANDLED)
655 return TRACE_TYPE_PARTIAL_LINE; 645 return ret;
656 646
657 if (type == TRACE_GRAPH_ENT) 647 if (type == TRACE_GRAPH_ENT)
658 ret = trace_seq_printf(s, "==========>"); 648 ret = trace_seq_printf(s, "==========>");
@@ -662,9 +652,10 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
662 if (!ret) 652 if (!ret)
663 return TRACE_TYPE_PARTIAL_LINE; 653 return TRACE_TYPE_PARTIAL_LINE;
664 654
665 /* Don't close the duration column if haven't one */ 655 ret = print_graph_duration(DURATION_FILL_END, s, flags);
666 if (flags & TRACE_GRAPH_PRINT_DURATION) 656 if (ret != TRACE_TYPE_HANDLED)
667 trace_seq_printf(s, " |"); 657 return ret;
658
668 ret = trace_seq_printf(s, "\n"); 659 ret = trace_seq_printf(s, "\n");
669 660
670 if (!ret) 661 if (!ret)
@@ -716,9 +707,49 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
716} 707}
717 708
718static enum print_line_t 709static enum print_line_t
719print_graph_duration(unsigned long long duration, struct trace_seq *s) 710print_graph_duration(unsigned long long duration, struct trace_seq *s,
711 u32 flags)
720{ 712{
721 int ret; 713 int ret = -1;
714
715 if (!(flags & TRACE_GRAPH_PRINT_DURATION) ||
716 !(trace_flags & TRACE_ITER_CONTEXT_INFO))
717 return TRACE_TYPE_HANDLED;
718
719 /* No real adata, just filling the column with spaces */
720 switch (duration) {
721 case DURATION_FILL_FULL:
722 ret = trace_seq_printf(s, " | ");
723 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
724 case DURATION_FILL_START:
725 ret = trace_seq_printf(s, " ");
726 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
727 case DURATION_FILL_END:
728 ret = trace_seq_printf(s, " |");
729 return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
730 }
731
732 /* Signal a overhead of time execution to the output */
733 if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
734 /* Duration exceeded 100 msecs */
735 if (duration > 100000ULL)
736 ret = trace_seq_printf(s, "! ");
737 /* Duration exceeded 10 msecs */
738 else if (duration > 10000ULL)
739 ret = trace_seq_printf(s, "+ ");
740 }
741
742 /*
743 * The -1 means we either did not exceed the duration tresholds
744 * or we dont want to print out the overhead. Either way we need
745 * to fill out the space.
746 */
747 if (ret == -1)
748 ret = trace_seq_printf(s, " ");
749
750 /* Catching here any failure happenned above */
751 if (!ret)
752 return TRACE_TYPE_PARTIAL_LINE;
722 753
723 ret = trace_print_graph_duration(duration, s); 754 ret = trace_print_graph_duration(duration, s);
724 if (ret != TRACE_TYPE_HANDLED) 755 if (ret != TRACE_TYPE_HANDLED)
@@ -767,18 +798,11 @@ print_graph_entry_leaf(struct trace_iterator *iter,
767 cpu_data->enter_funcs[call->depth] = 0; 798 cpu_data->enter_funcs[call->depth] = 0;
768 } 799 }
769 800
770 /* Overhead */ 801 /* Overhead and duration */
771 ret = print_graph_overhead(duration, s, flags); 802 ret = print_graph_duration(duration, s, flags);
772 if (!ret) 803 if (ret == TRACE_TYPE_PARTIAL_LINE)
773 return TRACE_TYPE_PARTIAL_LINE; 804 return TRACE_TYPE_PARTIAL_LINE;
774 805
775 /* Duration */
776 if (flags & TRACE_GRAPH_PRINT_DURATION) {
777 ret = print_graph_duration(duration, s);
778 if (ret == TRACE_TYPE_PARTIAL_LINE)
779 return TRACE_TYPE_PARTIAL_LINE;
780 }
781
782 /* Function */ 806 /* Function */
783 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 807 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
784 ret = trace_seq_printf(s, " "); 808 ret = trace_seq_printf(s, " ");
@@ -815,17 +839,10 @@ print_graph_entry_nested(struct trace_iterator *iter,
815 cpu_data->enter_funcs[call->depth] = call->func; 839 cpu_data->enter_funcs[call->depth] = call->func;
816 } 840 }
817 841
818 /* No overhead */
819 ret = print_graph_overhead(-1, s, flags);
820 if (!ret)
821 return TRACE_TYPE_PARTIAL_LINE;
822
823 /* No time */ 842 /* No time */
824 if (flags & TRACE_GRAPH_PRINT_DURATION) { 843 ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
825 ret = trace_seq_printf(s, " | "); 844 if (ret != TRACE_TYPE_HANDLED)
826 if (!ret) 845 return ret;
827 return TRACE_TYPE_PARTIAL_LINE;
828 }
829 846
830 /* Function */ 847 /* Function */
831 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { 848 for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -865,6 +882,9 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
865 return TRACE_TYPE_PARTIAL_LINE; 882 return TRACE_TYPE_PARTIAL_LINE;
866 } 883 }
867 884
885 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
886 return 0;
887
868 /* Absolute time */ 888 /* Absolute time */
869 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { 889 if (flags & TRACE_GRAPH_PRINT_ABS_TIME) {
870 ret = print_graph_abs_time(iter->ts, s); 890 ret = print_graph_abs_time(iter->ts, s);
@@ -1078,18 +1098,11 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
1078 if (print_graph_prologue(iter, s, 0, 0, flags)) 1098 if (print_graph_prologue(iter, s, 0, 0, flags))
1079 return TRACE_TYPE_PARTIAL_LINE; 1099 return TRACE_TYPE_PARTIAL_LINE;
1080 1100
1081 /* Overhead */ 1101 /* Overhead and duration */
1082 ret = print_graph_overhead(duration, s, flags); 1102 ret = print_graph_duration(duration, s, flags);
1083 if (!ret) 1103 if (ret == TRACE_TYPE_PARTIAL_LINE)
1084 return TRACE_TYPE_PARTIAL_LINE; 1104 return TRACE_TYPE_PARTIAL_LINE;
1085 1105
1086 /* Duration */
1087 if (flags & TRACE_GRAPH_PRINT_DURATION) {
1088 ret = print_graph_duration(duration, s);
1089 if (ret == TRACE_TYPE_PARTIAL_LINE)
1090 return TRACE_TYPE_PARTIAL_LINE;
1091 }
1092
1093 /* Closing brace */ 1106 /* Closing brace */
1094 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { 1107 for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
1095 ret = trace_seq_printf(s, " "); 1108 ret = trace_seq_printf(s, " ");
@@ -1146,17 +1159,10 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1146 if (print_graph_prologue(iter, s, 0, 0, flags)) 1159 if (print_graph_prologue(iter, s, 0, 0, flags))
1147 return TRACE_TYPE_PARTIAL_LINE; 1160 return TRACE_TYPE_PARTIAL_LINE;
1148 1161
1149 /* No overhead */
1150 ret = print_graph_overhead(-1, s, flags);
1151 if (!ret)
1152 return TRACE_TYPE_PARTIAL_LINE;
1153
1154 /* No time */ 1162 /* No time */
1155 if (flags & TRACE_GRAPH_PRINT_DURATION) { 1163 ret = print_graph_duration(DURATION_FILL_FULL, s, flags);
1156 ret = trace_seq_printf(s, " | "); 1164 if (ret != TRACE_TYPE_HANDLED)
1157 if (!ret) 1165 return ret;
1158 return TRACE_TYPE_PARTIAL_LINE;
1159 }
1160 1166
1161 /* Indentation */ 1167 /* Indentation */
1162 if (depth > 0) 1168 if (depth > 0)
@@ -1207,7 +1213,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
1207 1213
1208 1214
1209enum print_line_t 1215enum print_line_t
1210__print_graph_function_flags(struct trace_iterator *iter, u32 flags) 1216print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1211{ 1217{
1212 struct ftrace_graph_ent_entry *field; 1218 struct ftrace_graph_ent_entry *field;
1213 struct fgraph_data *data = iter->private; 1219 struct fgraph_data *data = iter->private;
@@ -1270,18 +1276,7 @@ __print_graph_function_flags(struct trace_iterator *iter, u32 flags)
1270static enum print_line_t 1276static enum print_line_t
1271print_graph_function(struct trace_iterator *iter) 1277print_graph_function(struct trace_iterator *iter)
1272{ 1278{
1273 return __print_graph_function_flags(iter, tracer_flags.val); 1279 return print_graph_function_flags(iter, tracer_flags.val);
1274}
1275
1276enum print_line_t print_graph_function_flags(struct trace_iterator *iter,
1277 u32 flags)
1278{
1279 if (trace_flags & TRACE_ITER_LATENCY_FMT)
1280 flags |= TRACE_GRAPH_PRINT_DURATION;
1281 else
1282 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1283
1284 return __print_graph_function_flags(iter, flags);
1285} 1280}
1286 1281
1287static enum print_line_t 1282static enum print_line_t
@@ -1309,8 +1304,7 @@ static void print_lat_header(struct seq_file *s, u32 flags)
1309 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces); 1304 seq_printf(s, "#%.*s / _----=> need-resched \n", size, spaces);
1310 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces); 1305 seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
1311 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces); 1306 seq_printf(s, "#%.*s|| / _--=> preempt-depth \n", size, spaces);
1312 seq_printf(s, "#%.*s||| / _-=> lock-depth \n", size, spaces); 1307 seq_printf(s, "#%.*s||| / \n", size, spaces);
1313 seq_printf(s, "#%.*s|||| / \n", size, spaces);
1314} 1308}
1315 1309
1316static void __print_graph_headers_flags(struct seq_file *s, u32 flags) 1310static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
@@ -1329,7 +1323,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1329 if (flags & TRACE_GRAPH_PRINT_PROC) 1323 if (flags & TRACE_GRAPH_PRINT_PROC)
1330 seq_printf(s, " TASK/PID "); 1324 seq_printf(s, " TASK/PID ");
1331 if (lat) 1325 if (lat)
1332 seq_printf(s, "|||||"); 1326 seq_printf(s, "||||");
1333 if (flags & TRACE_GRAPH_PRINT_DURATION) 1327 if (flags & TRACE_GRAPH_PRINT_DURATION)
1334 seq_printf(s, " DURATION "); 1328 seq_printf(s, " DURATION ");
1335 seq_printf(s, " FUNCTION CALLS\n"); 1329 seq_printf(s, " FUNCTION CALLS\n");
@@ -1343,7 +1337,7 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags)
1343 if (flags & TRACE_GRAPH_PRINT_PROC) 1337 if (flags & TRACE_GRAPH_PRINT_PROC)
1344 seq_printf(s, " | | "); 1338 seq_printf(s, " | | ");
1345 if (lat) 1339 if (lat)
1346 seq_printf(s, "|||||"); 1340 seq_printf(s, "||||");
1347 if (flags & TRACE_GRAPH_PRINT_DURATION) 1341 if (flags & TRACE_GRAPH_PRINT_DURATION)
1348 seq_printf(s, " | | "); 1342 seq_printf(s, " | | ");
1349 seq_printf(s, " | | | |\n"); 1343 seq_printf(s, " | | | |\n");
@@ -1358,15 +1352,16 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
1358{ 1352{
1359 struct trace_iterator *iter = s->private; 1353 struct trace_iterator *iter = s->private;
1360 1354
1355 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
1356 return;
1357
1361 if (trace_flags & TRACE_ITER_LATENCY_FMT) { 1358 if (trace_flags & TRACE_ITER_LATENCY_FMT) {
1362 /* print nothing if the buffers are empty */ 1359 /* print nothing if the buffers are empty */
1363 if (trace_empty(iter)) 1360 if (trace_empty(iter))
1364 return; 1361 return;
1365 1362
1366 print_trace_header(s, iter); 1363 print_trace_header(s, iter);
1367 flags |= TRACE_GRAPH_PRINT_DURATION; 1364 }
1368 } else
1369 flags |= TRACE_GRAPH_PRINT_ABS_TIME;
1370 1365
1371 __print_graph_headers_flags(s, flags); 1366 __print_graph_headers_flags(s, flags);
1372} 1367}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index c77424be284d..667aa8cc0cfc 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -226,7 +226,9 @@ static void irqsoff_trace_close(struct trace_iterator *iter)
226} 226}
227 227
228#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \ 228#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
229 TRACE_GRAPH_PRINT_PROC) 229 TRACE_GRAPH_PRINT_PROC | \
230 TRACE_GRAPH_PRINT_ABS_TIME | \
231 TRACE_GRAPH_PRINT_DURATION)
230 232
231static enum print_line_t irqsoff_print_line(struct trace_iterator *iter) 233static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
232{ 234{
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 27d13b36b8be..5fb3697bf0e5 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)
343DEFINE_FETCH_deref(string) 343DEFINE_FETCH_deref(string)
344DEFINE_FETCH_deref(string_size) 344DEFINE_FETCH_deref(string_size)
345 345
346static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
347{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
349 update_deref_fetch_param(data->orig.data);
350 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
351 update_symbol_cache(data->orig.data);
352}
353
346static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 354static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
347{ 355{
348 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 356 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
@@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)
377#define fetch_bitfield_string_size NULL 385#define fetch_bitfield_string_size NULL
378 386
379static __kprobes void 387static __kprobes void
388update_bitfield_fetch_param(struct bitfield_fetch_param *data)
389{
390 /*
391 * Don't check the bitfield itself, because this must be the
392 * last fetch function.
393 */
394 if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
395 update_deref_fetch_param(data->orig.data);
396 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
397 update_symbol_cache(data->orig.data);
398}
399
400static __kprobes void
380free_bitfield_fetch_param(struct bitfield_fetch_param *data) 401free_bitfield_fetch_param(struct bitfield_fetch_param *data)
381{ 402{
382 /* 403 /*
@@ -389,6 +410,7 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
389 free_symbol_cache(data->orig.data); 410 free_symbol_cache(data->orig.data);
390 kfree(data); 411 kfree(data);
391} 412}
413
392/* Default (unsigned long) fetch type */ 414/* Default (unsigned long) fetch type */
393#define __DEFAULT_FETCH_TYPE(t) u##t 415#define __DEFAULT_FETCH_TYPE(t) u##t
394#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t) 416#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -536,6 +558,7 @@ struct probe_arg {
536/* Flags for trace_probe */ 558/* Flags for trace_probe */
537#define TP_FLAG_TRACE 1 559#define TP_FLAG_TRACE 1
538#define TP_FLAG_PROFILE 2 560#define TP_FLAG_PROFILE 2
561#define TP_FLAG_REGISTERED 4
539 562
540struct trace_probe { 563struct trace_probe {
541 struct list_head list; 564 struct list_head list;
@@ -555,16 +578,49 @@ struct trace_probe {
555 (sizeof(struct probe_arg) * (n))) 578 (sizeof(struct probe_arg) * (n)))
556 579
557 580
558static __kprobes int probe_is_return(struct trace_probe *tp) 581static __kprobes int trace_probe_is_return(struct trace_probe *tp)
559{ 582{
560 return tp->rp.handler != NULL; 583 return tp->rp.handler != NULL;
561} 584}
562 585
563static __kprobes const char *probe_symbol(struct trace_probe *tp) 586static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
564{ 587{
565 return tp->symbol ? tp->symbol : "unknown"; 588 return tp->symbol ? tp->symbol : "unknown";
566} 589}
567 590
591static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
592{
593 return tp->rp.kp.offset;
594}
595
596static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
597{
598 return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
599}
600
601static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
602{
603 return !!(tp->flags & TP_FLAG_REGISTERED);
604}
605
606static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
607{
608 return !!(kprobe_gone(&tp->rp.kp));
609}
610
611static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
612 struct module *mod)
613{
614 int len = strlen(mod->name);
615 const char *name = trace_probe_symbol(tp);
616 return strncmp(mod->name, name, len) == 0 && name[len] == ':';
617}
618
619static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
620{
621 return !!strchr(trace_probe_symbol(tp), ':');
622}
623
568static int register_probe_event(struct trace_probe *tp); 624static int register_probe_event(struct trace_probe *tp);
569static void unregister_probe_event(struct trace_probe *tp); 625static void unregister_probe_event(struct trace_probe *tp);
570 626
@@ -646,6 +702,16 @@ error:
646 return ERR_PTR(ret); 702 return ERR_PTR(ret);
647} 703}
648 704
705static void update_probe_arg(struct probe_arg *arg)
706{
707 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
708 update_bitfield_fetch_param(arg->fetch.data);
709 else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
710 update_deref_fetch_param(arg->fetch.data);
711 else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
712 update_symbol_cache(arg->fetch.data);
713}
714
649static void free_probe_arg(struct probe_arg *arg) 715static void free_probe_arg(struct probe_arg *arg)
650{ 716{
651 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn)) 717 if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
@@ -671,7 +737,7 @@ static void free_trace_probe(struct trace_probe *tp)
671 kfree(tp); 737 kfree(tp);
672} 738}
673 739
674static struct trace_probe *find_probe_event(const char *event, 740static struct trace_probe *find_trace_probe(const char *event,
675 const char *group) 741 const char *group)
676{ 742{
677 struct trace_probe *tp; 743 struct trace_probe *tp;
@@ -683,13 +749,96 @@ static struct trace_probe *find_probe_event(const char *event,
683 return NULL; 749 return NULL;
684} 750}
685 751
752/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
753static int enable_trace_probe(struct trace_probe *tp, int flag)
754{
755 int ret = 0;
756
757 tp->flags |= flag;
758 if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
759 !trace_probe_has_gone(tp)) {
760 if (trace_probe_is_return(tp))
761 ret = enable_kretprobe(&tp->rp);
762 else
763 ret = enable_kprobe(&tp->rp.kp);
764 }
765
766 return ret;
767}
768
769/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
770static void disable_trace_probe(struct trace_probe *tp, int flag)
771{
772 tp->flags &= ~flag;
773 if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
774 if (trace_probe_is_return(tp))
775 disable_kretprobe(&tp->rp);
776 else
777 disable_kprobe(&tp->rp.kp);
778 }
779}
780
781/* Internal register function - just handle k*probes and flags */
782static int __register_trace_probe(struct trace_probe *tp)
783{
784 int i, ret;
785
786 if (trace_probe_is_registered(tp))
787 return -EINVAL;
788
789 for (i = 0; i < tp->nr_args; i++)
790 update_probe_arg(&tp->args[i]);
791
792 /* Set/clear disabled flag according to tp->flag */
793 if (trace_probe_is_enabled(tp))
794 tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
795 else
796 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
797
798 if (trace_probe_is_return(tp))
799 ret = register_kretprobe(&tp->rp);
800 else
801 ret = register_kprobe(&tp->rp.kp);
802
803 if (ret == 0)
804 tp->flags |= TP_FLAG_REGISTERED;
805 else {
806 pr_warning("Could not insert probe at %s+%lu: %d\n",
807 trace_probe_symbol(tp), trace_probe_offset(tp), ret);
808 if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
809 pr_warning("This probe might be able to register after"
810 "target module is loaded. Continue.\n");
811 ret = 0;
812 } else if (ret == -EILSEQ) {
813 pr_warning("Probing address(0x%p) is not an "
814 "instruction boundary.\n",
815 tp->rp.kp.addr);
816 ret = -EINVAL;
817 }
818 }
819
820 return ret;
821}
822
823/* Internal unregister function - just handle k*probes and flags */
824static void __unregister_trace_probe(struct trace_probe *tp)
825{
826 if (trace_probe_is_registered(tp)) {
827 if (trace_probe_is_return(tp))
828 unregister_kretprobe(&tp->rp);
829 else
830 unregister_kprobe(&tp->rp.kp);
831 tp->flags &= ~TP_FLAG_REGISTERED;
832 /* Cleanup kprobe for reuse */
833 if (tp->rp.kp.symbol_name)
834 tp->rp.kp.addr = NULL;
835 }
836}
837
686/* Unregister a trace_probe and probe_event: call with locking probe_lock */ 838/* Unregister a trace_probe and probe_event: call with locking probe_lock */
687static void unregister_trace_probe(struct trace_probe *tp) 839static void unregister_trace_probe(struct trace_probe *tp)
688{ 840{
689 if (probe_is_return(tp)) 841 __unregister_trace_probe(tp);
690 unregister_kretprobe(&tp->rp);
691 else
692 unregister_kprobe(&tp->rp.kp);
693 list_del(&tp->list); 842 list_del(&tp->list);
694 unregister_probe_event(tp); 843 unregister_probe_event(tp);
695} 844}
@@ -702,41 +851,65 @@ static int register_trace_probe(struct trace_probe *tp)
702 851
703 mutex_lock(&probe_lock); 852 mutex_lock(&probe_lock);
704 853
705 /* register as an event */ 854 /* Delete old (same name) event if exist */
706 old_tp = find_probe_event(tp->call.name, tp->call.class->system); 855 old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
707 if (old_tp) { 856 if (old_tp) {
708 /* delete old event */
709 unregister_trace_probe(old_tp); 857 unregister_trace_probe(old_tp);
710 free_trace_probe(old_tp); 858 free_trace_probe(old_tp);
711 } 859 }
860
861 /* Register new event */
712 ret = register_probe_event(tp); 862 ret = register_probe_event(tp);
713 if (ret) { 863 if (ret) {
714 pr_warning("Failed to register probe event(%d)\n", ret); 864 pr_warning("Failed to register probe event(%d)\n", ret);
715 goto end; 865 goto end;
716 } 866 }
717 867
718 tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; 868 /* Register k*probe */
719 if (probe_is_return(tp)) 869 ret = __register_trace_probe(tp);
720 ret = register_kretprobe(&tp->rp); 870 if (ret < 0)
721 else
722 ret = register_kprobe(&tp->rp.kp);
723
724 if (ret) {
725 pr_warning("Could not insert probe(%d)\n", ret);
726 if (ret == -EILSEQ) {
727 pr_warning("Probing address(0x%p) is not an "
728 "instruction boundary.\n",
729 tp->rp.kp.addr);
730 ret = -EINVAL;
731 }
732 unregister_probe_event(tp); 871 unregister_probe_event(tp);
733 } else 872 else
734 list_add_tail(&tp->list, &probe_list); 873 list_add_tail(&tp->list, &probe_list);
874
735end: 875end:
736 mutex_unlock(&probe_lock); 876 mutex_unlock(&probe_lock);
737 return ret; 877 return ret;
738} 878}
739 879
880/* Module notifier call back, checking event on the module */
881static int trace_probe_module_callback(struct notifier_block *nb,
882 unsigned long val, void *data)
883{
884 struct module *mod = data;
885 struct trace_probe *tp;
886 int ret;
887
888 if (val != MODULE_STATE_COMING)
889 return NOTIFY_DONE;
890
891 /* Update probes on coming module */
892 mutex_lock(&probe_lock);
893 list_for_each_entry(tp, &probe_list, list) {
894 if (trace_probe_within_module(tp, mod)) {
895 __unregister_trace_probe(tp);
896 ret = __register_trace_probe(tp);
897 if (ret)
898 pr_warning("Failed to re-register probe %s on"
899 "%s: %d\n",
900 tp->call.name, mod->name, ret);
901 }
902 }
903 mutex_unlock(&probe_lock);
904
905 return NOTIFY_DONE;
906}
907
908static struct notifier_block trace_probe_module_nb = {
909 .notifier_call = trace_probe_module_callback,
910 .priority = 1 /* Invoked after kprobe module callback */
911};
912
740/* Split symbol and offset. */ 913/* Split symbol and offset. */
741static int split_symbol_offset(char *symbol, unsigned long *offset) 914static int split_symbol_offset(char *symbol, unsigned long *offset)
742{ 915{
@@ -962,8 +1135,8 @@ static int create_trace_probe(int argc, char **argv)
962{ 1135{
963 /* 1136 /*
964 * Argument syntax: 1137 * Argument syntax:
965 * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] 1138 * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
966 * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] 1139 * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
967 * Fetch args: 1140 * Fetch args:
968 * $retval : fetch return value 1141 * $retval : fetch return value
969 * $stack : fetch stack address 1142 * $stack : fetch stack address
@@ -1025,7 +1198,7 @@ static int create_trace_probe(int argc, char **argv)
1025 return -EINVAL; 1198 return -EINVAL;
1026 } 1199 }
1027 mutex_lock(&probe_lock); 1200 mutex_lock(&probe_lock);
1028 tp = find_probe_event(event, group); 1201 tp = find_trace_probe(event, group);
1029 if (!tp) { 1202 if (!tp) {
1030 mutex_unlock(&probe_lock); 1203 mutex_unlock(&probe_lock);
1031 pr_info("Event %s/%s doesn't exist.\n", group, event); 1204 pr_info("Event %s/%s doesn't exist.\n", group, event);
@@ -1144,7 +1317,7 @@ error:
1144 return ret; 1317 return ret;
1145} 1318}
1146 1319
1147static void cleanup_all_probes(void) 1320static void release_all_trace_probes(void)
1148{ 1321{
1149 struct trace_probe *tp; 1322 struct trace_probe *tp;
1150 1323
@@ -1158,7 +1331,6 @@ static void cleanup_all_probes(void)
1158 mutex_unlock(&probe_lock); 1331 mutex_unlock(&probe_lock);
1159} 1332}
1160 1333
1161
1162/* Probes listing interfaces */ 1334/* Probes listing interfaces */
1163static void *probes_seq_start(struct seq_file *m, loff_t *pos) 1335static void *probes_seq_start(struct seq_file *m, loff_t *pos)
1164{ 1336{
@@ -1181,15 +1353,16 @@ static int probes_seq_show(struct seq_file *m, void *v)
1181 struct trace_probe *tp = v; 1353 struct trace_probe *tp = v;
1182 int i; 1354 int i;
1183 1355
1184 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 1356 seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
1185 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name); 1357 seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
1186 1358
1187 if (!tp->symbol) 1359 if (!tp->symbol)
1188 seq_printf(m, " 0x%p", tp->rp.kp.addr); 1360 seq_printf(m, " 0x%p", tp->rp.kp.addr);
1189 else if (tp->rp.kp.offset) 1361 else if (tp->rp.kp.offset)
1190 seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); 1362 seq_printf(m, " %s+%u", trace_probe_symbol(tp),
1363 tp->rp.kp.offset);
1191 else 1364 else
1192 seq_printf(m, " %s", probe_symbol(tp)); 1365 seq_printf(m, " %s", trace_probe_symbol(tp));
1193 1366
1194 for (i = 0; i < tp->nr_args; i++) 1367 for (i = 0; i < tp->nr_args; i++)
1195 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm); 1368 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
@@ -1209,7 +1382,7 @@ static int probes_open(struct inode *inode, struct file *file)
1209{ 1382{
1210 if ((file->f_mode & FMODE_WRITE) && 1383 if ((file->f_mode & FMODE_WRITE) &&
1211 (file->f_flags & O_TRUNC)) 1384 (file->f_flags & O_TRUNC))
1212 cleanup_all_probes(); 1385 release_all_trace_probes();
1213 1386
1214 return seq_open(file, &probes_seq_op); 1387 return seq_open(file, &probes_seq_op);
1215} 1388}
@@ -1397,7 +1570,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
1397 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1570 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1398 1571
1399 if (!filter_current_check_discard(buffer, call, entry, event)) 1572 if (!filter_current_check_discard(buffer, call, entry, event))
1400 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1573 trace_nowake_buffer_unlock_commit_regs(buffer, event,
1574 irq_flags, pc, regs);
1401} 1575}
1402 1576
1403/* Kretprobe handler */ 1577/* Kretprobe handler */
@@ -1429,7 +1603,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1429 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); 1603 store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1430 1604
1431 if (!filter_current_check_discard(buffer, call, entry, event)) 1605 if (!filter_current_check_discard(buffer, call, entry, event))
1432 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1606 trace_nowake_buffer_unlock_commit_regs(buffer, event,
1607 irq_flags, pc, regs);
1433} 1608}
1434 1609
1435/* Event entry printers */ 1610/* Event entry printers */
@@ -1511,30 +1686,6 @@ partial:
1511 return TRACE_TYPE_PARTIAL_LINE; 1686 return TRACE_TYPE_PARTIAL_LINE;
1512} 1687}
1513 1688
1514static int probe_event_enable(struct ftrace_event_call *call)
1515{
1516 struct trace_probe *tp = (struct trace_probe *)call->data;
1517
1518 tp->flags |= TP_FLAG_TRACE;
1519 if (probe_is_return(tp))
1520 return enable_kretprobe(&tp->rp);
1521 else
1522 return enable_kprobe(&tp->rp.kp);
1523}
1524
1525static void probe_event_disable(struct ftrace_event_call *call)
1526{
1527 struct trace_probe *tp = (struct trace_probe *)call->data;
1528
1529 tp->flags &= ~TP_FLAG_TRACE;
1530 if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) {
1531 if (probe_is_return(tp))
1532 disable_kretprobe(&tp->rp);
1533 else
1534 disable_kprobe(&tp->rp.kp);
1535 }
1536}
1537
1538#undef DEFINE_FIELD 1689#undef DEFINE_FIELD
1539#define DEFINE_FIELD(type, item, name, is_signed) \ 1690#define DEFINE_FIELD(type, item, name, is_signed) \
1540 do { \ 1691 do { \
@@ -1596,7 +1747,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1596 1747
1597 const char *fmt, *arg; 1748 const char *fmt, *arg;
1598 1749
1599 if (!probe_is_return(tp)) { 1750 if (!trace_probe_is_return(tp)) {
1600 fmt = "(%lx)"; 1751 fmt = "(%lx)";
1601 arg = "REC->" FIELD_STRING_IP; 1752 arg = "REC->" FIELD_STRING_IP;
1602 } else { 1753 } else {
@@ -1713,49 +1864,25 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1713 head = this_cpu_ptr(call->perf_events); 1864 head = this_cpu_ptr(call->perf_events);
1714 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1865 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1715} 1866}
1716
1717static int probe_perf_enable(struct ftrace_event_call *call)
1718{
1719 struct trace_probe *tp = (struct trace_probe *)call->data;
1720
1721 tp->flags |= TP_FLAG_PROFILE;
1722
1723 if (probe_is_return(tp))
1724 return enable_kretprobe(&tp->rp);
1725 else
1726 return enable_kprobe(&tp->rp.kp);
1727}
1728
1729static void probe_perf_disable(struct ftrace_event_call *call)
1730{
1731 struct trace_probe *tp = (struct trace_probe *)call->data;
1732
1733 tp->flags &= ~TP_FLAG_PROFILE;
1734
1735 if (!(tp->flags & TP_FLAG_TRACE)) {
1736 if (probe_is_return(tp))
1737 disable_kretprobe(&tp->rp);
1738 else
1739 disable_kprobe(&tp->rp.kp);
1740 }
1741}
1742#endif /* CONFIG_PERF_EVENTS */ 1867#endif /* CONFIG_PERF_EVENTS */
1743 1868
1744static __kprobes 1869static __kprobes
1745int kprobe_register(struct ftrace_event_call *event, enum trace_reg type) 1870int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
1746{ 1871{
1872 struct trace_probe *tp = (struct trace_probe *)event->data;
1873
1747 switch (type) { 1874 switch (type) {
1748 case TRACE_REG_REGISTER: 1875 case TRACE_REG_REGISTER:
1749 return probe_event_enable(event); 1876 return enable_trace_probe(tp, TP_FLAG_TRACE);
1750 case TRACE_REG_UNREGISTER: 1877 case TRACE_REG_UNREGISTER:
1751 probe_event_disable(event); 1878 disable_trace_probe(tp, TP_FLAG_TRACE);
1752 return 0; 1879 return 0;
1753 1880
1754#ifdef CONFIG_PERF_EVENTS 1881#ifdef CONFIG_PERF_EVENTS
1755 case TRACE_REG_PERF_REGISTER: 1882 case TRACE_REG_PERF_REGISTER:
1756 return probe_perf_enable(event); 1883 return enable_trace_probe(tp, TP_FLAG_PROFILE);
1757 case TRACE_REG_PERF_UNREGISTER: 1884 case TRACE_REG_PERF_UNREGISTER:
1758 probe_perf_disable(event); 1885 disable_trace_probe(tp, TP_FLAG_PROFILE);
1759 return 0; 1886 return 0;
1760#endif 1887#endif
1761 } 1888 }
@@ -1805,7 +1932,7 @@ static int register_probe_event(struct trace_probe *tp)
1805 1932
1806 /* Initialize ftrace_event_call */ 1933 /* Initialize ftrace_event_call */
1807 INIT_LIST_HEAD(&call->class->fields); 1934 INIT_LIST_HEAD(&call->class->fields);
1808 if (probe_is_return(tp)) { 1935 if (trace_probe_is_return(tp)) {
1809 call->event.funcs = &kretprobe_funcs; 1936 call->event.funcs = &kretprobe_funcs;
1810 call->class->define_fields = kretprobe_event_define_fields; 1937 call->class->define_fields = kretprobe_event_define_fields;
1811 } else { 1938 } else {
@@ -1844,6 +1971,9 @@ static __init int init_kprobe_trace(void)
1844 struct dentry *d_tracer; 1971 struct dentry *d_tracer;
1845 struct dentry *entry; 1972 struct dentry *entry;
1846 1973
1974 if (register_module_notifier(&trace_probe_module_nb))
1975 return -EINVAL;
1976
1847 d_tracer = tracing_init_dentry(); 1977 d_tracer = tracing_init_dentry();
1848 if (!d_tracer) 1978 if (!d_tracer)
1849 return 0; 1979 return 0;
@@ -1897,12 +2027,12 @@ static __init int kprobe_trace_self_tests_init(void)
1897 warn++; 2027 warn++;
1898 } else { 2028 } else {
1899 /* Enable trace point */ 2029 /* Enable trace point */
1900 tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM); 2030 tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1901 if (WARN_ON_ONCE(tp == NULL)) { 2031 if (WARN_ON_ONCE(tp == NULL)) {
1902 pr_warning("error on getting new probe.\n"); 2032 pr_warning("error on getting new probe.\n");
1903 warn++; 2033 warn++;
1904 } else 2034 } else
1905 probe_event_enable(&tp->call); 2035 enable_trace_probe(tp, TP_FLAG_TRACE);
1906 } 2036 }
1907 2037
1908 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " 2038 ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
@@ -1912,12 +2042,12 @@ static __init int kprobe_trace_self_tests_init(void)
1912 warn++; 2042 warn++;
1913 } else { 2043 } else {
1914 /* Enable trace point */ 2044 /* Enable trace point */
1915 tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM); 2045 tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1916 if (WARN_ON_ONCE(tp == NULL)) { 2046 if (WARN_ON_ONCE(tp == NULL)) {
1917 pr_warning("error on getting new probe.\n"); 2047 pr_warning("error on getting new probe.\n");
1918 warn++; 2048 warn++;
1919 } else 2049 } else
1920 probe_event_enable(&tp->call); 2050 enable_trace_probe(tp, TP_FLAG_TRACE);
1921 } 2051 }
1922 2052
1923 if (warn) 2053 if (warn)
@@ -1938,7 +2068,7 @@ static __init int kprobe_trace_self_tests_init(void)
1938 } 2068 }
1939 2069
1940end: 2070end:
1941 cleanup_all_probes(); 2071 release_all_trace_probes();
1942 if (warn) 2072 if (warn)
1943 pr_cont("NG: Some tests are failed. Please check them.\n"); 2073 pr_cont("NG: Some tests are failed. Please check them.\n");
1944 else 2074 else
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e37de492a9e1..51999309a6cf 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1107,19 +1107,20 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
1107{ 1107{
1108 struct stack_entry *field; 1108 struct stack_entry *field;
1109 struct trace_seq *s = &iter->seq; 1109 struct trace_seq *s = &iter->seq;
1110 int i; 1110 unsigned long *p;
1111 unsigned long *end;
1111 1112
1112 trace_assign_type(field, iter->ent); 1113 trace_assign_type(field, iter->ent);
1114 end = (unsigned long *)((long)iter->ent + iter->ent_size);
1113 1115
1114 if (!trace_seq_puts(s, "<stack trace>\n")) 1116 if (!trace_seq_puts(s, "<stack trace>\n"))
1115 goto partial; 1117 goto partial;
1116 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { 1118
1117 if (!field->caller[i] || (field->caller[i] == ULONG_MAX)) 1119 for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
1118 break;
1119 if (!trace_seq_puts(s, " => ")) 1120 if (!trace_seq_puts(s, " => "))
1120 goto partial; 1121 goto partial;
1121 1122
1122 if (!seq_print_ip_sym(s, field->caller[i], flags)) 1123 if (!seq_print_ip_sym(s, *p, flags))
1123 goto partial; 1124 goto partial;
1124 if (!trace_seq_puts(s, "\n")) 1125 if (!trace_seq_puts(s, "\n"))
1125 goto partial; 1126 goto partial;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index f029dd4fd2ca..e4a70c0c71b6 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -227,7 +227,9 @@ static void wakeup_trace_close(struct trace_iterator *iter)
227 graph_trace_close(iter); 227 graph_trace_close(iter);
228} 228}
229 229
230#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC) 230#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_PROC | \
231 TRACE_GRAPH_PRINT_ABS_TIME | \
232 TRACE_GRAPH_PRINT_DURATION)
231 233
232static enum print_line_t wakeup_print_line(struct trace_iterator *iter) 234static enum print_line_t wakeup_print_line(struct trace_iterator *iter)
233{ 235{
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b0b53b8e4c25..77575b386d97 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -156,20 +156,11 @@ stack_max_size_write(struct file *filp, const char __user *ubuf,
156{ 156{
157 long *ptr = filp->private_data; 157 long *ptr = filp->private_data;
158 unsigned long val, flags; 158 unsigned long val, flags;
159 char buf[64];
160 int ret; 159 int ret;
161 int cpu; 160 int cpu;
162 161
163 if (count >= sizeof(buf)) 162 ret = kstrtoul_from_user(ubuf, count, 10, &val);
164 return -EINVAL; 163 if (ret)
165
166 if (copy_from_user(&buf, ubuf, count))
167 return -EFAULT;
168
169 buf[count] = 0;
170
171 ret = strict_strtoul(buf, 10, &val);
172 if (ret < 0)
173 return ret; 164 return ret;
174 165
175 local_irq_save(flags); 166 local_irq_save(flags);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 3d0c56ad4792..36491cd5b7d4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -200,6 +200,7 @@ static int is_softlockup(unsigned long touch_ts)
200} 200}
201 201
202#ifdef CONFIG_HARDLOCKUP_DETECTOR 202#ifdef CONFIG_HARDLOCKUP_DETECTOR
203
203static struct perf_event_attr wd_hw_attr = { 204static struct perf_event_attr wd_hw_attr = {
204 .type = PERF_TYPE_HARDWARE, 205 .type = PERF_TYPE_HARDWARE,
205 .config = PERF_COUNT_HW_CPU_CYCLES, 206 .config = PERF_COUNT_HW_CPU_CYCLES,
@@ -209,7 +210,7 @@ static struct perf_event_attr wd_hw_attr = {
209}; 210};
210 211
211/* Callback function for perf event subsystem */ 212/* Callback function for perf event subsystem */
212static void watchdog_overflow_callback(struct perf_event *event, int nmi, 213static void watchdog_overflow_callback(struct perf_event *event,
213 struct perf_sample_data *data, 214 struct perf_sample_data *data,
214 struct pt_regs *regs) 215 struct pt_regs *regs)
215{ 216{
@@ -368,10 +369,11 @@ static int watchdog_nmi_enable(int cpu)
368 if (event != NULL) 369 if (event != NULL)
369 goto out_enable; 370 goto out_enable;
370 371
371 /* Try to register using hardware perf events */
372 wd_attr = &wd_hw_attr; 372 wd_attr = &wd_hw_attr;
373 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); 373 wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
374 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); 374
375 /* Try to register using hardware perf events */
376 event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
375 if (!IS_ERR(event)) { 377 if (!IS_ERR(event)) {
376 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); 378 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
377 goto out_save; 379 goto out_save;
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
index 063653955f9f..ef7f32291852 100644
--- a/samples/hw_breakpoint/data_breakpoint.c
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -41,7 +41,7 @@ module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
41MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" 41MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
42 " write operations on the kernel symbol"); 42 " write operations on the kernel symbol");
43 43
44static void sample_hbp_handler(struct perf_event *bp, int nmi, 44static void sample_hbp_handler(struct perf_event *bp,
45 struct perf_sample_data *data, 45 struct perf_sample_data *data,
46 struct pt_regs *regs) 46 struct pt_regs *regs)
47{ 47{
@@ -60,7 +60,7 @@ static int __init hw_break_module_init(void)
60 attr.bp_len = HW_BREAKPOINT_LEN_4; 60 attr.bp_len = HW_BREAKPOINT_LEN_4;
61 attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; 61 attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
62 62
63 sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler); 63 sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler, NULL);
64 if (IS_ERR((void __force *)sample_hbp)) { 64 if (IS_ERR((void __force *)sample_hbp)) {
65 ret = PTR_ERR((void __force *)sample_hbp); 65 ret = PTR_ERR((void __force *)sample_hbp);
66 goto fail; 66 goto fail;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 6f5a498608b2..85c5f026930d 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -66,6 +66,12 @@ OPTIONS
66 used. This interfaces starts by centering on the line with more 66 used. This interfaces starts by centering on the line with more
67 samples, TAB/UNTAB cycles through the lines with more samples. 67 samples, TAB/UNTAB cycles through the lines with more samples.
68 68
69-c::
70--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
71 be provided as a comma-separated list with no space: 0,1. Ranges of
72 CPUs are specified with -: 0-2. Default is to report samples on all
73 CPUs.
74
69SEE ALSO 75SEE ALSO
70-------- 76--------
71linkperf:perf-record[1], linkperf:perf-report[1] 77linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 02bafce4b341..2780d9ce48bf 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -34,9 +34,11 @@ OPTIONS
34 Specify vmlinux path which has debuginfo (Dwarf binary). 34 Specify vmlinux path which has debuginfo (Dwarf binary).
35 35
36-m:: 36-m::
37--module=MODNAME:: 37--module=MODNAME|PATH::
38 Specify module name in which perf-probe searches probe points 38 Specify module name in which perf-probe searches probe points
39 or lines. 39 or lines. If a path of module file is passed, perf-probe
40 treat it as an offline module (this means you can add a probe on
41 a module which has not been loaded yet).
40 42
41-s:: 43-s::
42--source=PATH:: 44--source=PATH::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8ba03d6e5398..04253c07d19a 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -80,15 +80,24 @@ OPTIONS
80--dump-raw-trace:: 80--dump-raw-trace::
81 Dump raw trace in ASCII. 81 Dump raw trace in ASCII.
82 82
83-g [type,min]:: 83-g [type,min,order]::
84--call-graph:: 84--call-graph::
85 Display call chains using type and min percent threshold. 85 Display call chains using type, min percent threshold and order.
86 type can be either: 86 type can be either:
87 - flat: single column, linear exposure of call chains. 87 - flat: single column, linear exposure of call chains.
88 - graph: use a graph tree, displaying absolute overhead rates. 88 - graph: use a graph tree, displaying absolute overhead rates.
89 - fractal: like graph, but displays relative rates. Each branch of 89 - fractal: like graph, but displays relative rates. Each branch of
90 the tree is considered as a new profiled object. + 90 the tree is considered as a new profiled object. +
91 Default: fractal,0.5. 91
92 order can be either:
93 - callee: callee based call graph.
94 - caller: inverted caller based call graph.
95
96 Default: fractal,0.5,callee.
97
98-G::
99--inverted::
100 alias for inverted caller based call graph.
92 101
93--pretty=<key>:: 102--pretty=<key>::
94 Pretty printing style. key: normal, raw 103 Pretty printing style. key: normal, raw
@@ -119,6 +128,12 @@ OPTIONS
119--symfs=<directory>:: 128--symfs=<directory>::
120 Look for files with symbols relative to this directory. 129 Look for files with symbols relative to this directory.
121 130
131-c::
132--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
133 be provided as a comma-separated list with no space: 0,1. Ranges of
134 CPUs are specified with -: 0-2. Default is to report samples on all
135 CPUs.
136
122SEE ALSO 137SEE ALSO
123-------- 138--------
124linkperf:perf-stat[1] 139linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 86c87e214b11..db017867d9e8 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,10 +115,10 @@ OPTIONS
115-f:: 115-f::
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, sym. Field 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
119 list can be prepended with the type, trace, sw or hw, 119 Field list can be prepended with the type, trace, sw or hw,
120 to indicate to which event type the field list applies. 120 to indicate to which event type the field list applies.
121 e.g., -f sw:comm,tid,time,sym and -f trace:time,cpu,trace 121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
122 122
123 perf script -f <fields> 123 perf script -f <fields>
124 124
@@ -132,17 +132,17 @@ OPTIONS
132 The arguments are processed in the order received. A later usage can 132 The arguments are processed in the order received. A later usage can
133 reset a prior request. e.g.: 133 reset a prior request. e.g.:
134 134
135 -f trace: -f comm,tid,time,sym 135 -f trace: -f comm,tid,time,ip,sym
136 136
137 The first -f suppresses trace events (field list is ""), but then the 137 The first -f suppresses trace events (field list is ""), but then the
138 second invocation sets the fields to comm,tid,time,sym. In this case a 138 second invocation sets the fields to comm,tid,time,ip,sym. In this case a
139 warning is given to the user: 139 warning is given to the user:
140 140
141 "Overriding previous field request for all events." 141 "Overriding previous field request for all events."
142 142
143 Alternativey, consider the order: 143 Alternativey, consider the order:
144 144
145 -f comm,tid,time,sym -f trace: 145 -f comm,tid,time,ip,sym -f trace:
146 146
147 The first -f sets the fields for all events and the second -f 147 The first -f sets the fields for all events and the second -f
148 suppresses trace events. The user is given a warning message about 148 suppresses trace events. The user is given a warning message about
@@ -182,6 +182,12 @@ OPTIONS
182--hide-call-graph:: 182--hide-call-graph::
183 When printing symbols do not display call chain. 183 When printing symbols do not display call chain.
184 184
185-c::
186--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
187 be provided as a comma-separated list with no space: 0,1. Ranges of
188 CPUs are specified with -: 0-2. Default is to report samples on all
189 CPUs.
190
185SEE ALSO 191SEE ALSO
186-------- 192--------
187linkperf:perf-record[1], linkperf:perf-script-perl[1], 193linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 940257b5774e..d0861bbd1d94 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -279,6 +279,7 @@ LIB_H += util/thread.h
279LIB_H += util/thread_map.h 279LIB_H += util/thread_map.h
280LIB_H += util/trace-event.h 280LIB_H += util/trace-event.h
281LIB_H += util/probe-finder.h 281LIB_H += util/probe-finder.h
282LIB_H += util/dwarf-aux.h
282LIB_H += util/probe-event.h 283LIB_H += util/probe-event.h
283LIB_H += util/pstack.h 284LIB_H += util/pstack.h
284LIB_H += util/cpumap.h 285LIB_H += util/cpumap.h
@@ -435,6 +436,7 @@ else
435 BASIC_CFLAGS += -DDWARF_SUPPORT 436 BASIC_CFLAGS += -DDWARF_SUPPORT
436 EXTLIBS += -lelf -ldw 437 EXTLIBS += -lelf -ldw
437 LIB_OBJS += $(OUTPUT)util/probe-finder.o 438 LIB_OBJS += $(OUTPUT)util/probe-finder.o
439 LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
438endif # PERF_HAVE_DWARF_REGS 440endif # PERF_HAVE_DWARF_REGS
439endif # NO_DWARF 441endif # NO_DWARF
440 442
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7b139e1e7e86..555aefd7fe01 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,6 +28,8 @@
28#include "util/hist.h" 28#include "util/hist.h"
29#include "util/session.h" 29#include "util/session.h"
30 30
31#include <linux/bitmap.h>
32
31static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
32 34
33static bool force, use_tui, use_stdio; 35static bool force, use_tui, use_stdio;
@@ -38,6 +40,9 @@ static bool print_line;
38 40
39static const char *sym_hist_filter; 41static const char *sym_hist_filter;
40 42
43static const char *cpu_list;
44static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
45
41static int perf_evlist__add_sample(struct perf_evlist *evlist, 46static int perf_evlist__add_sample(struct perf_evlist *evlist,
42 struct perf_sample *sample, 47 struct perf_sample *sample,
43 struct perf_evsel *evsel, 48 struct perf_evsel *evsel,
@@ -90,6 +95,9 @@ static int process_sample_event(union perf_event *event,
90 return -1; 95 return -1;
91 } 96 }
92 97
98 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
99 return 0;
100
93 if (!al.filtered && 101 if (!al.filtered &&
94 perf_evlist__add_sample(session->evlist, sample, evsel, &al)) { 102 perf_evlist__add_sample(session->evlist, sample, evsel, &al)) {
95 pr_warning("problem incrementing symbol count, " 103 pr_warning("problem incrementing symbol count, "
@@ -177,6 +185,12 @@ static int __cmd_annotate(void)
177 if (session == NULL) 185 if (session == NULL)
178 return -ENOMEM; 186 return -ENOMEM;
179 187
188 if (cpu_list) {
189 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
190 if (ret)
191 goto out_delete;
192 }
193
180 ret = perf_session__process_events(session, &event_ops); 194 ret = perf_session__process_events(session, &event_ops);
181 if (ret) 195 if (ret)
182 goto out_delete; 196 goto out_delete;
@@ -252,6 +266,7 @@ static const struct option options[] = {
252 "print matching source lines (may be slow)"), 266 "print matching source lines (may be slow)"),
253 OPT_BOOLEAN('P', "full-paths", &full_paths, 267 OPT_BOOLEAN('P', "full-paths", &full_paths,
254 "Don't shorten the displayed pathnames"), 268 "Don't shorten the displayed pathnames"),
269 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
255 OPT_END() 270 OPT_END()
256}; 271};
257 272
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 2c0e64d0b4aa..5f2a5c7046df 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -242,7 +242,8 @@ static const struct option options[] = {
242 OPT_STRING('s', "source", &symbol_conf.source_prefix, 242 OPT_STRING('s', "source", &symbol_conf.source_prefix,
243 "directory", "path to kernel source"), 243 "directory", "path to kernel source"),
244 OPT_STRING('m', "module", &params.target_module, 244 OPT_STRING('m', "module", &params.target_module,
245 "modname", "target module name"), 245 "modname|path",
246 "target module name (for online) or path (for offline)"),
246#endif 247#endif
247 OPT__DRY_RUN(&probe_event_dry_run), 248 OPT__DRY_RUN(&probe_event_dry_run),
248 OPT_INTEGER('\0', "max-probes", &params.max_probe_points, 249 OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8e2c85798185..80dc5b790e47 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -740,7 +740,7 @@ static bool force, append_file;
740const struct option record_options[] = { 740const struct option record_options[] = {
741 OPT_CALLBACK('e', "event", &evsel_list, "event", 741 OPT_CALLBACK('e', "event", &evsel_list, "event",
742 "event selector. use 'perf list' to list available events", 742 "event selector. use 'perf list' to list available events",
743 parse_events), 743 parse_events_option),
744 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 744 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
745 "event filter", parse_filter), 745 "event filter", parse_filter),
746 OPT_INTEGER('p', "pid", &target_pid, 746 OPT_INTEGER('p', "pid", &target_pid,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 287a173523a7..f854efda7686 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,6 +33,8 @@
33#include "util/sort.h" 33#include "util/sort.h"
34#include "util/hist.h" 34#include "util/hist.h"
35 35
36#include <linux/bitmap.h>
37
36static char const *input_name = "perf.data"; 38static char const *input_name = "perf.data";
37 39
38static bool force, use_tui, use_stdio; 40static bool force, use_tui, use_stdio;
@@ -45,9 +47,13 @@ static struct perf_read_values show_threads_values;
45static const char default_pretty_printing_style[] = "normal"; 47static const char default_pretty_printing_style[] = "normal";
46static const char *pretty_printing_style = default_pretty_printing_style; 48static const char *pretty_printing_style = default_pretty_printing_style;
47 49
48static char callchain_default_opt[] = "fractal,0.5"; 50static char callchain_default_opt[] = "fractal,0.5,callee";
51static bool inverted_callchain;
49static symbol_filter_t annotate_init; 52static symbol_filter_t annotate_init;
50 53
54static const char *cpu_list;
55static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
56
51static int perf_session__add_hist_entry(struct perf_session *session, 57static int perf_session__add_hist_entry(struct perf_session *session,
52 struct addr_location *al, 58 struct addr_location *al,
53 struct perf_sample *sample, 59 struct perf_sample *sample,
@@ -116,6 +122,9 @@ static int process_sample_event(union perf_event *event,
116 if (al.filtered || (hide_unresolved && al.sym == NULL)) 122 if (al.filtered || (hide_unresolved && al.sym == NULL))
117 return 0; 123 return 0;
118 124
125 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
126 return 0;
127
119 if (al.map != NULL) 128 if (al.map != NULL)
120 al.map->dso->hit = 1; 129 al.map->dso->hit = 1;
121 130
@@ -262,6 +271,12 @@ static int __cmd_report(void)
262 if (session == NULL) 271 if (session == NULL)
263 return -ENOMEM; 272 return -ENOMEM;
264 273
274 if (cpu_list) {
275 ret = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
276 if (ret)
277 goto out_delete;
278 }
279
265 if (show_threads) 280 if (show_threads)
266 perf_read_values_init(&show_threads_values); 281 perf_read_values_init(&show_threads_values);
267 282
@@ -386,13 +401,29 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
386 if (!tok) 401 if (!tok)
387 goto setup; 402 goto setup;
388 403
389 tok2 = strtok(NULL, ",");
390 callchain_param.min_percent = strtod(tok, &endptr); 404 callchain_param.min_percent = strtod(tok, &endptr);
391 if (tok == endptr) 405 if (tok == endptr)
392 return -1; 406 return -1;
393 407
394 if (tok2) 408 /* get the print limit */
409 tok2 = strtok(NULL, ",");
410 if (!tok2)
411 goto setup;
412
413 if (tok2[0] != 'c') {
395 callchain_param.print_limit = strtod(tok2, &endptr); 414 callchain_param.print_limit = strtod(tok2, &endptr);
415 tok2 = strtok(NULL, ",");
416 if (!tok2)
417 goto setup;
418 }
419
420 /* get the call chain order */
421 if (!strcmp(tok2, "caller"))
422 callchain_param.order = ORDER_CALLER;
423 else if (!strcmp(tok2, "callee"))
424 callchain_param.order = ORDER_CALLEE;
425 else
426 return -1;
396setup: 427setup:
397 if (callchain_register_param(&callchain_param) < 0) { 428 if (callchain_register_param(&callchain_param) < 0) {
398 fprintf(stderr, "Can't register callchain params\n"); 429 fprintf(stderr, "Can't register callchain params\n");
@@ -436,9 +467,10 @@ static const struct option options[] = {
436 "regex filter to identify parent, see: '--sort parent'"), 467 "regex filter to identify parent, see: '--sort parent'"),
437 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 468 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
438 "Only display entries with parent-match"), 469 "Only display entries with parent-match"),
439 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent", 470 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL, "output_type,min_percent, call_order",
440 "Display callchains using output_type (graph, flat, fractal, or none) and min percent threshold. " 471 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold and callchain order. "
441 "Default: fractal,0.5", &parse_callchain_opt, callchain_default_opt), 472 "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
473 OPT_BOOLEAN('G', "inverted", &inverted_callchain, "alias for inverted call graph"),
442 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", 474 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
443 "only consider symbols in these dsos"), 475 "only consider symbols in these dsos"),
444 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", 476 OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
@@ -455,6 +487,7 @@ static const struct option options[] = {
455 "Only display entries resolved to a symbol"), 487 "Only display entries resolved to a symbol"),
456 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 488 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
457 "Look for files with symbols relative to this directory"), 489 "Look for files with symbols relative to this directory"),
490 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
458 OPT_END() 491 OPT_END()
459}; 492};
460 493
@@ -467,6 +500,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
467 else if (use_tui) 500 else if (use_tui)
468 use_browser = 1; 501 use_browser = 1;
469 502
503 if (inverted_callchain)
504 callchain_param.order = ORDER_CALLER;
505
470 if (strcmp(input_name, "-") != 0) 506 if (strcmp(input_name, "-") != 0)
471 setup_browser(true); 507 setup_browser(true);
472 else 508 else
@@ -504,7 +540,14 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
504 if (parent_pattern != default_parent_pattern) { 540 if (parent_pattern != default_parent_pattern) {
505 if (sort_dimension__add("parent") < 0) 541 if (sort_dimension__add("parent") < 0)
506 return -1; 542 return -1;
507 sort_parent.elide = 1; 543
544 /*
545 * Only show the parent fields if we explicitly
546 * sort that way. If we only use parent machinery
547 * for filtering, we don't want it.
548 */
549 if (!strstr(sort_order, "parent"))
550 sort_parent.elide = 1;
508 } else 551 } else
509 symbol_conf.exclude_other = false; 552 symbol_conf.exclude_other = false;
510 553
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 22747de7234b..09024ec2ab2e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -13,6 +13,7 @@
13#include "util/util.h" 13#include "util/util.h"
14#include "util/evlist.h" 14#include "util/evlist.h"
15#include "util/evsel.h" 15#include "util/evsel.h"
16#include <linux/bitmap.h>
16 17
17static char const *script_name; 18static char const *script_name;
18static char const *generate_script_lang; 19static char const *generate_script_lang;
@@ -21,6 +22,8 @@ static u64 last_timestamp;
21static u64 nr_unordered; 22static u64 nr_unordered;
22extern const struct option record_options[]; 23extern const struct option record_options[];
23static bool no_callchain; 24static bool no_callchain;
25static const char *cpu_list;
26static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
24 27
25enum perf_output_field { 28enum perf_output_field {
26 PERF_OUTPUT_COMM = 1U << 0, 29 PERF_OUTPUT_COMM = 1U << 0,
@@ -30,7 +33,10 @@ enum perf_output_field {
30 PERF_OUTPUT_CPU = 1U << 4, 33 PERF_OUTPUT_CPU = 1U << 4,
31 PERF_OUTPUT_EVNAME = 1U << 5, 34 PERF_OUTPUT_EVNAME = 1U << 5,
32 PERF_OUTPUT_TRACE = 1U << 6, 35 PERF_OUTPUT_TRACE = 1U << 6,
33 PERF_OUTPUT_SYM = 1U << 7, 36 PERF_OUTPUT_IP = 1U << 7,
37 PERF_OUTPUT_SYM = 1U << 8,
38 PERF_OUTPUT_DSO = 1U << 9,
39 PERF_OUTPUT_ADDR = 1U << 10,
34}; 40};
35 41
36struct output_option { 42struct output_option {
@@ -44,7 +50,10 @@ struct output_option {
44 {.str = "cpu", .field = PERF_OUTPUT_CPU}, 50 {.str = "cpu", .field = PERF_OUTPUT_CPU},
45 {.str = "event", .field = PERF_OUTPUT_EVNAME}, 51 {.str = "event", .field = PERF_OUTPUT_EVNAME},
46 {.str = "trace", .field = PERF_OUTPUT_TRACE}, 52 {.str = "trace", .field = PERF_OUTPUT_TRACE},
53 {.str = "ip", .field = PERF_OUTPUT_IP},
47 {.str = "sym", .field = PERF_OUTPUT_SYM}, 54 {.str = "sym", .field = PERF_OUTPUT_SYM},
55 {.str = "dso", .field = PERF_OUTPUT_DSO},
56 {.str = "addr", .field = PERF_OUTPUT_ADDR},
48}; 57};
49 58
50/* default set to maintain compatibility with current format */ 59/* default set to maintain compatibility with current format */
@@ -60,7 +69,8 @@ static struct {
60 69
61 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 70 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
62 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 71 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
63 PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, 72 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
73 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
64 74
65 .invalid_fields = PERF_OUTPUT_TRACE, 75 .invalid_fields = PERF_OUTPUT_TRACE,
66 }, 76 },
@@ -70,7 +80,8 @@ static struct {
70 80
71 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 81 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
72 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 82 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
73 PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, 83 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
84 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
74 85
75 .invalid_fields = PERF_OUTPUT_TRACE, 86 .invalid_fields = PERF_OUTPUT_TRACE,
76 }, 87 },
@@ -88,7 +99,8 @@ static struct {
88 99
89 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | 100 .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
90 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | 101 PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
91 PERF_OUTPUT_EVNAME | PERF_OUTPUT_SYM, 102 PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
103 PERF_OUTPUT_SYM | PERF_OUTPUT_DSO,
92 104
93 .invalid_fields = PERF_OUTPUT_TRACE, 105 .invalid_fields = PERF_OUTPUT_TRACE,
94 }, 106 },
@@ -157,9 +169,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
157 !perf_session__has_traces(session, "record -R")) 169 !perf_session__has_traces(session, "record -R"))
158 return -EINVAL; 170 return -EINVAL;
159 171
160 if (PRINT_FIELD(SYM)) { 172 if (PRINT_FIELD(IP)) {
161 if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP", 173 if (perf_event_attr__check_stype(attr, PERF_SAMPLE_IP, "IP",
162 PERF_OUTPUT_SYM)) 174 PERF_OUTPUT_IP))
163 return -EINVAL; 175 return -EINVAL;
164 176
165 if (!no_callchain && 177 if (!no_callchain &&
@@ -167,6 +179,24 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
167 symbol_conf.use_callchain = false; 179 symbol_conf.use_callchain = false;
168 } 180 }
169 181
182 if (PRINT_FIELD(ADDR) &&
183 perf_event_attr__check_stype(attr, PERF_SAMPLE_ADDR, "ADDR",
184 PERF_OUTPUT_ADDR))
185 return -EINVAL;
186
187 if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
188 pr_err("Display of symbols requested but neither sample IP nor "
189 "sample address\nis selected. Hence, no addresses to convert "
190 "to symbols.\n");
191 return -EINVAL;
192 }
193 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
194 pr_err("Display of DSO requested but neither sample IP nor "
195 "sample address\nis selected. Hence, no addresses to convert "
196 "to DSO.\n");
197 return -EINVAL;
198 }
199
170 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && 200 if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
171 perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID", 201 perf_event_attr__check_stype(attr, PERF_SAMPLE_TID, "TID",
172 PERF_OUTPUT_TID|PERF_OUTPUT_PID)) 202 PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -230,7 +260,7 @@ static void print_sample_start(struct perf_sample *sample,
230 if (PRINT_FIELD(COMM)) { 260 if (PRINT_FIELD(COMM)) {
231 if (latency_format) 261 if (latency_format)
232 printf("%8.8s ", thread->comm); 262 printf("%8.8s ", thread->comm);
233 else if (PRINT_FIELD(SYM) && symbol_conf.use_callchain) 263 else if (PRINT_FIELD(IP) && symbol_conf.use_callchain)
234 printf("%s ", thread->comm); 264 printf("%s ", thread->comm);
235 else 265 else
236 printf("%16s ", thread->comm); 266 printf("%16s ", thread->comm);
@@ -271,6 +301,63 @@ static void print_sample_start(struct perf_sample *sample,
271 } 301 }
272} 302}
273 303
304static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
305{
306 if ((attr->type == PERF_TYPE_SOFTWARE) &&
307 ((attr->config == PERF_COUNT_SW_PAGE_FAULTS) ||
308 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN) ||
309 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
310 return true;
311
312 return false;
313}
314
315static void print_sample_addr(union perf_event *event,
316 struct perf_sample *sample,
317 struct perf_session *session,
318 struct thread *thread,
319 struct perf_event_attr *attr)
320{
321 struct addr_location al;
322 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
323 const char *symname, *dsoname;
324
325 printf("%16" PRIx64, sample->addr);
326
327 if (!sample_addr_correlates_sym(attr))
328 return;
329
330 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
331 event->ip.pid, sample->addr, &al);
332 if (!al.map)
333 thread__find_addr_map(thread, session, cpumode, MAP__VARIABLE,
334 event->ip.pid, sample->addr, &al);
335
336 al.cpu = sample->cpu;
337 al.sym = NULL;
338
339 if (al.map)
340 al.sym = map__find_symbol(al.map, al.addr, NULL);
341
342 if (PRINT_FIELD(SYM)) {
343 if (al.sym && al.sym->name)
344 symname = al.sym->name;
345 else
346 symname = "";
347
348 printf(" %16s", symname);
349 }
350
351 if (PRINT_FIELD(DSO)) {
352 if (al.map && al.map->dso && al.map->dso->name)
353 dsoname = al.map->dso->name;
354 else
355 dsoname = "";
356
357 printf(" (%s)", dsoname);
358 }
359}
360
274static void process_event(union perf_event *event __unused, 361static void process_event(union perf_event *event __unused,
275 struct perf_sample *sample, 362 struct perf_sample *sample,
276 struct perf_evsel *evsel, 363 struct perf_evsel *evsel,
@@ -288,12 +375,16 @@ static void process_event(union perf_event *event __unused,
288 print_trace_event(sample->cpu, sample->raw_data, 375 print_trace_event(sample->cpu, sample->raw_data,
289 sample->raw_size); 376 sample->raw_size);
290 377
291 if (PRINT_FIELD(SYM)) { 378 if (PRINT_FIELD(ADDR))
379 print_sample_addr(event, sample, session, thread, attr);
380
381 if (PRINT_FIELD(IP)) {
292 if (!symbol_conf.use_callchain) 382 if (!symbol_conf.use_callchain)
293 printf(" "); 383 printf(" ");
294 else 384 else
295 printf("\n"); 385 printf("\n");
296 perf_session__print_symbols(event, sample, session); 386 perf_session__print_ip(event, sample, session,
387 PRINT_FIELD(SYM), PRINT_FIELD(DSO));
297 } 388 }
298 389
299 printf("\n"); 390 printf("\n");
@@ -365,6 +456,10 @@ static int process_sample_event(union perf_event *event,
365 last_timestamp = sample->time; 456 last_timestamp = sample->time;
366 return 0; 457 return 0;
367 } 458 }
459
460 if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
461 return 0;
462
368 scripting_ops->process_event(event, sample, evsel, session, thread); 463 scripting_ops->process_event(event, sample, evsel, session, thread);
369 464
370 session->hists.stats.total_period += sample->period; 465 session->hists.stats.total_period += sample->period;
@@ -985,8 +1080,9 @@ static const struct option options[] = {
985 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 1080 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
986 "Look for files with symbols relative to this directory"), 1081 "Look for files with symbols relative to this directory"),
987 OPT_CALLBACK('f', "fields", NULL, "str", 1082 OPT_CALLBACK('f', "fields", NULL, "str",
988 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,sym", 1083 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
989 parse_output_fields), 1084 parse_output_fields),
1085 OPT_STRING('c', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
990 1086
991 OPT_END() 1087 OPT_END()
992}; 1088};
@@ -1167,6 +1263,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
1167 if (session == NULL) 1263 if (session == NULL)
1168 return -ENOMEM; 1264 return -ENOMEM;
1169 1265
1266 if (cpu_list) {
1267 if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap))
1268 return -1;
1269 }
1270
1170 if (!no_callchain) 1271 if (!no_callchain)
1171 symbol_conf.use_callchain = true; 1272 symbol_conf.use_callchain = true;
1172 else 1273 else
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a9f06715e44d..1ad04ce29c34 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -61,6 +61,8 @@
61#include <locale.h> 61#include <locale.h>
62 62
63#define DEFAULT_SEPARATOR " " 63#define DEFAULT_SEPARATOR " "
64#define CNTR_NOT_SUPPORTED "<not supported>"
65#define CNTR_NOT_COUNTED "<not counted>"
64 66
65static struct perf_event_attr default_attrs[] = { 67static struct perf_event_attr default_attrs[] = {
66 68
@@ -448,6 +450,7 @@ static int run_perf_stat(int argc __used, const char **argv)
448 if (verbose) 450 if (verbose)
449 ui__warning("%s event is not supported by the kernel.\n", 451 ui__warning("%s event is not supported by the kernel.\n",
450 event_name(counter)); 452 event_name(counter));
453 counter->supported = false;
451 continue; 454 continue;
452 } 455 }
453 456
@@ -466,6 +469,7 @@ static int run_perf_stat(int argc __used, const char **argv)
466 die("Not all events could be opened.\n"); 469 die("Not all events could be opened.\n");
467 return -1; 470 return -1;
468 } 471 }
472 counter->supported = true;
469 } 473 }
470 474
471 if (perf_evlist__set_filters(evsel_list)) { 475 if (perf_evlist__set_filters(evsel_list)) {
@@ -513,7 +517,10 @@ static void print_noise_pct(double total, double avg)
513 if (avg) 517 if (avg)
514 pct = 100.0*total/avg; 518 pct = 100.0*total/avg;
515 519
516 fprintf(stderr, " ( +-%6.2f%% )", pct); 520 if (csv_output)
521 fprintf(stderr, "%s%.2f%%", csv_sep, pct);
522 else
523 fprintf(stderr, " ( +-%6.2f%% )", pct);
517} 524}
518 525
519static void print_noise(struct perf_evsel *evsel, double avg) 526static void print_noise(struct perf_evsel *evsel, double avg)
@@ -861,7 +868,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
861 if (scaled == -1) { 868 if (scaled == -1) {
862 fprintf(stderr, "%*s%s%*s", 869 fprintf(stderr, "%*s%s%*s",
863 csv_output ? 0 : 18, 870 csv_output ? 0 : 18,
864 "<not counted>", 871 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
865 csv_sep, 872 csv_sep,
866 csv_output ? 0 : -24, 873 csv_output ? 0 : -24,
867 event_name(counter)); 874 event_name(counter));
@@ -878,13 +885,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
878 else 885 else
879 abs_printout(-1, counter, avg); 886 abs_printout(-1, counter, avg);
880 887
888 print_noise(counter, avg);
889
881 if (csv_output) { 890 if (csv_output) {
882 fputc('\n', stderr); 891 fputc('\n', stderr);
883 return; 892 return;
884 } 893 }
885 894
886 print_noise(counter, avg);
887
888 if (scaled) { 895 if (scaled) {
889 double avg_enabled, avg_running; 896 double avg_enabled, avg_running;
890 897
@@ -914,7 +921,8 @@ static void print_counter(struct perf_evsel *counter)
914 csv_output ? 0 : -4, 921 csv_output ? 0 : -4,
915 evsel_list->cpus->map[cpu], csv_sep, 922 evsel_list->cpus->map[cpu], csv_sep,
916 csv_output ? 0 : 18, 923 csv_output ? 0 : 18,
917 "<not counted>", csv_sep, 924 counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
925 csv_sep,
918 csv_output ? 0 : -24, 926 csv_output ? 0 : -24,
919 event_name(counter)); 927 event_name(counter));
920 928
@@ -1024,7 +1032,7 @@ static int stat__set_big_num(const struct option *opt __used,
1024static const struct option options[] = { 1032static const struct option options[] = {
1025 OPT_CALLBACK('e', "event", &evsel_list, "event", 1033 OPT_CALLBACK('e', "event", &evsel_list, "event",
1026 "event selector. use 'perf list' to list available events", 1034 "event selector. use 'perf list' to list available events",
1027 parse_events), 1035 parse_events_option),
1028 OPT_CALLBACK(0, "filter", &evsel_list, "filter", 1036 OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1029 "event filter", parse_filter), 1037 "event filter", parse_filter),
1030 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1038 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 2da9162262b0..55f4c76f2821 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -12,6 +12,7 @@
12#include "util/parse-events.h" 12#include "util/parse-events.h"
13#include "util/symbol.h" 13#include "util/symbol.h"
14#include "util/thread_map.h" 14#include "util/thread_map.h"
15#include "../../include/linux/hw_breakpoint.h"
15 16
16static long page_size; 17static long page_size;
17 18
@@ -245,8 +246,8 @@ static int trace_event__id(const char *evname)
245 int err = -1, fd; 246 int err = -1, fd;
246 247
247 if (asprintf(&filename, 248 if (asprintf(&filename,
248 "/sys/kernel/debug/tracing/events/syscalls/%s/id", 249 "%s/syscalls/%s/id",
249 evname) < 0) 250 debugfs_path, evname) < 0)
250 return -1; 251 return -1;
251 252
252 fd = open(filename, O_RDONLY); 253 fd = open(filename, O_RDONLY);
@@ -600,6 +601,246 @@ out_free_threads:
600#undef nsyscalls 601#undef nsyscalls
601} 602}
602 603
604#define TEST_ASSERT_VAL(text, cond) \
605do { \
606 if (!cond) { \
607 pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
608 return -1; \
609 } \
610} while (0)
611
612static int test__checkevent_tracepoint(struct perf_evlist *evlist)
613{
614 struct perf_evsel *evsel = list_entry(evlist->entries.next,
615 struct perf_evsel, node);
616
617 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
618 TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
619 TEST_ASSERT_VAL("wrong sample_type",
620 (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU) ==
621 evsel->attr.sample_type);
622 TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
623 return 0;
624}
625
626static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
627{
628 struct perf_evsel *evsel;
629
630 TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
631
632 list_for_each_entry(evsel, &evlist->entries, node) {
633 TEST_ASSERT_VAL("wrong type",
634 PERF_TYPE_TRACEPOINT == evsel->attr.type);
635 TEST_ASSERT_VAL("wrong sample_type",
636 (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU)
637 == evsel->attr.sample_type);
638 TEST_ASSERT_VAL("wrong sample_period",
639 1 == evsel->attr.sample_period);
640 }
641 return 0;
642}
643
644static int test__checkevent_raw(struct perf_evlist *evlist)
645{
646 struct perf_evsel *evsel = list_entry(evlist->entries.next,
647 struct perf_evsel, node);
648
649 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
650 TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
651 TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
652 return 0;
653}
654
655static int test__checkevent_numeric(struct perf_evlist *evlist)
656{
657 struct perf_evsel *evsel = list_entry(evlist->entries.next,
658 struct perf_evsel, node);
659
660 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
661 TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
662 TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
663 return 0;
664}
665
666static int test__checkevent_symbolic_name(struct perf_evlist *evlist)
667{
668 struct perf_evsel *evsel = list_entry(evlist->entries.next,
669 struct perf_evsel, node);
670
671 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
672 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
673 TEST_ASSERT_VAL("wrong config",
674 PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
675 return 0;
676}
677
678static int test__checkevent_symbolic_alias(struct perf_evlist *evlist)
679{
680 struct perf_evsel *evsel = list_entry(evlist->entries.next,
681 struct perf_evsel, node);
682
683 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
684 TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
685 TEST_ASSERT_VAL("wrong config",
686 PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
687 return 0;
688}
689
690static int test__checkevent_genhw(struct perf_evlist *evlist)
691{
692 struct perf_evsel *evsel = list_entry(evlist->entries.next,
693 struct perf_evsel, node);
694
695 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
696 TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
697 TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config);
698 return 0;
699}
700
701static int test__checkevent_breakpoint(struct perf_evlist *evlist)
702{
703 struct perf_evsel *evsel = list_entry(evlist->entries.next,
704 struct perf_evsel, node);
705
706 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
707 TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
708 TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
709 TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
710 evsel->attr.bp_type);
711 TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
712 evsel->attr.bp_len);
713 return 0;
714}
715
716static int test__checkevent_breakpoint_x(struct perf_evlist *evlist)
717{
718 struct perf_evsel *evsel = list_entry(evlist->entries.next,
719 struct perf_evsel, node);
720
721 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
722 TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
723 TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
724 TEST_ASSERT_VAL("wrong bp_type",
725 HW_BREAKPOINT_X == evsel->attr.bp_type);
726 TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len);
727 return 0;
728}
729
730static int test__checkevent_breakpoint_r(struct perf_evlist *evlist)
731{
732 struct perf_evsel *evsel = list_entry(evlist->entries.next,
733 struct perf_evsel, node);
734
735 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
736 TEST_ASSERT_VAL("wrong type",
737 PERF_TYPE_BREAKPOINT == evsel->attr.type);
738 TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
739 TEST_ASSERT_VAL("wrong bp_type",
740 HW_BREAKPOINT_R == evsel->attr.bp_type);
741 TEST_ASSERT_VAL("wrong bp_len",
742 HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
743 return 0;
744}
745
746static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
747{
748 struct perf_evsel *evsel = list_entry(evlist->entries.next,
749 struct perf_evsel, node);
750
751 TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
752 TEST_ASSERT_VAL("wrong type",
753 PERF_TYPE_BREAKPOINT == evsel->attr.type);
754 TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
755 TEST_ASSERT_VAL("wrong bp_type",
756 HW_BREAKPOINT_W == evsel->attr.bp_type);
757 TEST_ASSERT_VAL("wrong bp_len",
758 HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
759 return 0;
760}
761
762static struct test__event_st {
763 const char *name;
764 __u32 type;
765 int (*check)(struct perf_evlist *evlist);
766} test__events[] = {
767 {
768 .name = "syscalls:sys_enter_open",
769 .check = test__checkevent_tracepoint,
770 },
771 {
772 .name = "syscalls:*",
773 .check = test__checkevent_tracepoint_multi,
774 },
775 {
776 .name = "r1",
777 .check = test__checkevent_raw,
778 },
779 {
780 .name = "1:1",
781 .check = test__checkevent_numeric,
782 },
783 {
784 .name = "instructions",
785 .check = test__checkevent_symbolic_name,
786 },
787 {
788 .name = "faults",
789 .check = test__checkevent_symbolic_alias,
790 },
791 {
792 .name = "L1-dcache-load-miss",
793 .check = test__checkevent_genhw,
794 },
795 {
796 .name = "mem:0",
797 .check = test__checkevent_breakpoint,
798 },
799 {
800 .name = "mem:0:x",
801 .check = test__checkevent_breakpoint_x,
802 },
803 {
804 .name = "mem:0:r",
805 .check = test__checkevent_breakpoint_r,
806 },
807 {
808 .name = "mem:0:w",
809 .check = test__checkevent_breakpoint_w,
810 },
811};
812
813#define TEST__EVENTS_CNT (sizeof(test__events) / sizeof(struct test__event_st))
814
815static int test__parse_events(void)
816{
817 struct perf_evlist *evlist;
818 u_int i;
819 int ret = 0;
820
821 for (i = 0; i < TEST__EVENTS_CNT; i++) {
822 struct test__event_st *e = &test__events[i];
823
824 evlist = perf_evlist__new(NULL, NULL);
825 if (evlist == NULL)
826 break;
827
828 ret = parse_events(evlist, e->name, 0);
829 if (ret) {
830 pr_debug("failed to parse event '%s', err %d\n",
831 e->name, ret);
832 break;
833 }
834
835 ret = e->check(evlist);
836 if (ret)
837 break;
838
839 perf_evlist__delete(evlist);
840 }
841
842 return ret;
843}
603static struct test { 844static struct test {
604 const char *desc; 845 const char *desc;
605 int (*func)(void); 846 int (*func)(void);
@@ -621,6 +862,10 @@ static struct test {
621 .func = test__basic_mmap, 862 .func = test__basic_mmap,
622 }, 863 },
623 { 864 {
865 .desc = "parse events tests",
866 .func = test__parse_events,
867 },
868 {
624 .func = NULL, 869 .func = NULL,
625 }, 870 },
626}; 871};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f2f3f4937aa2..a43433f08300 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -990,7 +990,7 @@ static const char * const top_usage[] = {
990static const struct option options[] = { 990static const struct option options[] = {
991 OPT_CALLBACK('e', "event", &top.evlist, "event", 991 OPT_CALLBACK('e', "event", &top.evlist, "event",
992 "event selector. use 'perf list' to list available events", 992 "event selector. use 'perf list' to list available events",
993 parse_events), 993 parse_events_option),
994 OPT_INTEGER('c', "count", &default_interval, 994 OPT_INTEGER('c', "count", &default_interval,
995 "event period to sample"), 995 "event period to sample"),
996 OPT_INTEGER('p', "pid", &top.target_pid, 996 OPT_INTEGER('p', "pid", &top.target_pid,
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 1a79df9f739f..9b4ff16cac96 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -14,6 +14,11 @@ enum chain_mode {
14 CHAIN_GRAPH_REL 14 CHAIN_GRAPH_REL
15}; 15};
16 16
17enum chain_order {
18 ORDER_CALLER,
19 ORDER_CALLEE
20};
21
17struct callchain_node { 22struct callchain_node {
18 struct callchain_node *parent; 23 struct callchain_node *parent;
19 struct list_head siblings; 24 struct list_head siblings;
@@ -41,6 +46,7 @@ struct callchain_param {
41 u32 print_limit; 46 u32 print_limit;
42 double min_percent; 47 double min_percent;
43 sort_chain_func_t sort; 48 sort_chain_func_t sort;
49 enum chain_order order;
44}; 50};
45 51
46struct callchain_list { 52struct callchain_list {
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
new file mode 100644
index 000000000000..fddf40f30d3e
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.c
@@ -0,0 +1,663 @@
1/*
2 * dwarf-aux.c : libdw auxiliary interfaces
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 */
19
20#include <stdbool.h>
21#include "util.h"
22#include "debug.h"
23#include "dwarf-aux.h"
24
25/**
26 * cu_find_realpath - Find the realpath of the target file
27 * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
28 * @fname: The tail filename of the target file
29 *
30 * Find the real(long) path of @fname in @cu_die.
31 */
32const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
33{
34 Dwarf_Files *files;
35 size_t nfiles, i;
36 const char *src = NULL;
37 int ret;
38
39 if (!fname)
40 return NULL;
41
42 ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
43 if (ret != 0)
44 return NULL;
45
46 for (i = 0; i < nfiles; i++) {
47 src = dwarf_filesrc(files, i, NULL, NULL);
48 if (strtailcmp(src, fname) == 0)
49 break;
50 }
51 if (i == nfiles)
52 return NULL;
53 return src;
54}
55
56/**
57 * cu_get_comp_dir - Get the path of compilation directory
58 * @cu_die: a CU DIE
59 *
60 * Get the path of compilation directory of given @cu_die.
61 * Since this depends on DW_AT_comp_dir, older gcc will not
62 * embedded it. In that case, this returns NULL.
63 */
64const char *cu_get_comp_dir(Dwarf_Die *cu_die)
65{
66 Dwarf_Attribute attr;
67 if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
68 return NULL;
69 return dwarf_formstring(&attr);
70}
71
72/**
73 * cu_find_lineinfo - Get a line number and file name for given address
74 * @cu_die: a CU DIE
75 * @addr: An address
76 * @fname: a pointer which returns the file name string
77 * @lineno: a pointer which returns the line number
78 *
79 * Find a line number and file name for @addr in @cu_die.
80 */
81int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
82 const char **fname, int *lineno)
83{
84 Dwarf_Line *line;
85 Dwarf_Addr laddr;
86
87 line = dwarf_getsrc_die(cu_die, (Dwarf_Addr)addr);
88 if (line && dwarf_lineaddr(line, &laddr) == 0 &&
89 addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
90 *fname = dwarf_linesrc(line, NULL, NULL);
91 if (!*fname)
92 /* line number is useless without filename */
93 *lineno = 0;
94 }
95
96 return *lineno ?: -ENOENT;
97}
98
99/**
100 * die_compare_name - Compare diename and tname
101 * @dw_die: a DIE
102 * @tname: a string of target name
103 *
104 * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
105 */
106bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
107{
108 const char *name;
109 name = dwarf_diename(dw_die);
110 return name ? (strcmp(tname, name) == 0) : false;
111}
112
113/**
114 * die_get_call_lineno - Get callsite line number of inline-function instance
115 * @in_die: a DIE of an inlined function instance
116 *
117 * Get call-site line number of @in_die. This means from where the inline
118 * function is called.
119 */
120int die_get_call_lineno(Dwarf_Die *in_die)
121{
122 Dwarf_Attribute attr;
123 Dwarf_Word ret;
124
125 if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
126 return -ENOENT;
127
128 dwarf_formudata(&attr, &ret);
129 return (int)ret;
130}
131
132/**
133 * die_get_type - Get type DIE
134 * @vr_die: a DIE of a variable
135 * @die_mem: where to store a type DIE
136 *
137 * Get a DIE of the type of given variable (@vr_die), and store
138 * it to die_mem. Return NULL if fails to get a type DIE.
139 */
140Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
141{
142 Dwarf_Attribute attr;
143
144 if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
145 dwarf_formref_die(&attr, die_mem))
146 return die_mem;
147 else
148 return NULL;
149}
150
151/* Get a type die, but skip qualifiers */
152static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
153{
154 int tag;
155
156 do {
157 vr_die = die_get_type(vr_die, die_mem);
158 if (!vr_die)
159 break;
160 tag = dwarf_tag(vr_die);
161 } while (tag == DW_TAG_const_type ||
162 tag == DW_TAG_restrict_type ||
163 tag == DW_TAG_volatile_type ||
164 tag == DW_TAG_shared_type);
165
166 return vr_die;
167}
168
169/**
170 * die_get_real_type - Get a type die, but skip qualifiers and typedef
171 * @vr_die: a DIE of a variable
172 * @die_mem: where to store a type DIE
173 *
174 * Get a DIE of the type of given variable (@vr_die), and store
175 * it to die_mem. Return NULL if fails to get a type DIE.
176 * If the type is qualifiers (e.g. const) or typedef, this skips it
177 * and tries to find real type (structure or basic types, e.g. int).
178 */
179Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
180{
181 do {
182 vr_die = __die_get_real_type(vr_die, die_mem);
183 } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
184
185 return vr_die;
186}
187
188/* Get attribute and translate it as a udata */
189static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
190 Dwarf_Word *result)
191{
192 Dwarf_Attribute attr;
193
194 if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
195 dwarf_formudata(&attr, result) != 0)
196 return -ENOENT;
197
198 return 0;
199}
200
201/**
202 * die_is_signed_type - Check whether a type DIE is signed or not
203 * @tp_die: a DIE of a type
204 *
205 * Get the encoding of @tp_die and return true if the encoding
206 * is signed.
207 */
208bool die_is_signed_type(Dwarf_Die *tp_die)
209{
210 Dwarf_Word ret;
211
212 if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
213 return false;
214
215 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
216 ret == DW_ATE_signed_fixed);
217}
218
219/**
220 * die_get_data_member_location - Get the data-member offset
221 * @mb_die: a DIE of a member of a data structure
222 * @offs: The offset of the member in the data structure
223 *
224 * Get the offset of @mb_die in the data structure including @mb_die, and
225 * stores result offset to @offs. If any error occurs this returns errno.
226 */
227int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
228{
229 Dwarf_Attribute attr;
230 Dwarf_Op *expr;
231 size_t nexpr;
232 int ret;
233
234 if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
235 return -ENOENT;
236
237 if (dwarf_formudata(&attr, offs) != 0) {
238 /* DW_AT_data_member_location should be DW_OP_plus_uconst */
239 ret = dwarf_getlocation(&attr, &expr, &nexpr);
240 if (ret < 0 || nexpr == 0)
241 return -ENOENT;
242
243 if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
244 pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
245 expr[0].atom, nexpr);
246 return -ENOTSUP;
247 }
248 *offs = (Dwarf_Word)expr[0].number;
249 }
250 return 0;
251}
252
253/**
254 * die_find_child - Generic DIE search function in DIE tree
255 * @rt_die: a root DIE
256 * @callback: a callback function
257 * @data: a user data passed to the callback function
258 * @die_mem: a buffer for result DIE
259 *
260 * Trace DIE tree from @rt_die and call @callback for each child DIE.
261 * If @callback returns DIE_FIND_CB_END, this stores the DIE into
262 * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
263 * this continues to trace the tree. Optionally, @callback can return
264 * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
265 * the children and trace only the siblings respectively.
266 * Returns NULL if @callback can't find any appropriate DIE.
267 */
268Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
269 int (*callback)(Dwarf_Die *, void *),
270 void *data, Dwarf_Die *die_mem)
271{
272 Dwarf_Die child_die;
273 int ret;
274
275 ret = dwarf_child(rt_die, die_mem);
276 if (ret != 0)
277 return NULL;
278
279 do {
280 ret = callback(die_mem, data);
281 if (ret == DIE_FIND_CB_END)
282 return die_mem;
283
284 if ((ret & DIE_FIND_CB_CHILD) &&
285 die_find_child(die_mem, callback, data, &child_die)) {
286 memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
287 return die_mem;
288 }
289 } while ((ret & DIE_FIND_CB_SIBLING) &&
290 dwarf_siblingof(die_mem, die_mem) == 0);
291
292 return NULL;
293}
294
295struct __addr_die_search_param {
296 Dwarf_Addr addr;
297 Dwarf_Die *die_mem;
298};
299
300/* die_find callback for non-inlined function search */
301static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
302{
303 struct __addr_die_search_param *ad = data;
304
305 if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
306 dwarf_haspc(fn_die, ad->addr)) {
307 memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
308 return DWARF_CB_ABORT;
309 }
310 return DWARF_CB_OK;
311}
312
313/**
314 * die_find_realfunc - Search a non-inlined function at given address
315 * @cu_die: a CU DIE which including @addr
316 * @addr: target address
317 * @die_mem: a buffer for result DIE
318 *
319 * Search a non-inlined function DIE which includes @addr. Stores the
320 * DIE to @die_mem and returns it if found. Returns NULl if failed.
321 */
322Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
323 Dwarf_Die *die_mem)
324{
325 struct __addr_die_search_param ad;
326 ad.addr = addr;
327 ad.die_mem = die_mem;
328 /* dwarf_getscopes can't find subprogram. */
329 if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
330 return NULL;
331 else
332 return die_mem;
333}
334
335/* die_find callback for inline function search */
336static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
337{
338 Dwarf_Addr *addr = data;
339
340 if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
341 dwarf_haspc(die_mem, *addr))
342 return DIE_FIND_CB_END;
343
344 return DIE_FIND_CB_CONTINUE;
345}
346
347/**
348 * die_find_inlinefunc - Search an inlined function at given address
349 * @cu_die: a CU DIE which including @addr
350 * @addr: target address
351 * @die_mem: a buffer for result DIE
352 *
353 * Search an inlined function DIE which includes @addr. Stores the
354 * DIE to @die_mem and returns it if found. Returns NULl if failed.
355 * If several inlined functions are expanded recursively, this trace
356 * it and returns deepest one.
357 */
358Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
359 Dwarf_Die *die_mem)
360{
361 Dwarf_Die tmp_die;
362
363 sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
364 if (!sp_die)
365 return NULL;
366
367 /* Inlined function could be recursive. Trace it until fail */
368 while (sp_die) {
369 memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
370 sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
371 &tmp_die);
372 }
373
374 return die_mem;
375}
376
377/* Line walker internal parameters */
378struct __line_walk_param {
379 const char *fname;
380 line_walk_callback_t callback;
381 void *data;
382 int retval;
383};
384
385static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
386{
387 struct __line_walk_param *lw = data;
388 Dwarf_Addr addr;
389 int lineno;
390
391 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
392 lineno = die_get_call_lineno(in_die);
393 if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
394 lw->retval = lw->callback(lw->fname, lineno, addr,
395 lw->data);
396 if (lw->retval != 0)
397 return DIE_FIND_CB_END;
398 }
399 }
400 return DIE_FIND_CB_SIBLING;
401}
402
403/* Walk on lines of blocks included in given DIE */
404static int __die_walk_funclines(Dwarf_Die *sp_die,
405 line_walk_callback_t callback, void *data)
406{
407 struct __line_walk_param lw = {
408 .callback = callback,
409 .data = data,
410 .retval = 0,
411 };
412 Dwarf_Die die_mem;
413 Dwarf_Addr addr;
414 int lineno;
415
416 /* Handle function declaration line */
417 lw.fname = dwarf_decl_file(sp_die);
418 if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
419 dwarf_entrypc(sp_die, &addr) == 0) {
420 lw.retval = callback(lw.fname, lineno, addr, data);
421 if (lw.retval != 0)
422 goto done;
423 }
424 die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
425done:
426 return lw.retval;
427}
428
429static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
430{
431 struct __line_walk_param *lw = data;
432
433 lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data);
434 if (lw->retval != 0)
435 return DWARF_CB_ABORT;
436
437 return DWARF_CB_OK;
438}
439
440/**
441 * die_walk_lines - Walk on lines inside given DIE
442 * @rt_die: a root DIE (CU or subprogram)
443 * @callback: callback routine
444 * @data: user data
445 *
446 * Walk on all lines inside given @rt_die and call @callback on each line.
447 * If the @rt_die is a function, walk only on the lines inside the function,
448 * otherwise @rt_die must be a CU DIE.
449 * Note that this walks not only dwarf line list, but also function entries
450 * and inline call-site.
451 */
452int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
453{
454 Dwarf_Lines *lines;
455 Dwarf_Line *line;
456 Dwarf_Addr addr;
457 const char *fname;
458 int lineno, ret = 0;
459 Dwarf_Die die_mem, *cu_die;
460 size_t nlines, i;
461
462 /* Get the CU die */
463 if (dwarf_tag(rt_die) == DW_TAG_subprogram)
464 cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
465 else
466 cu_die = rt_die;
467 if (!cu_die) {
468 pr_debug2("Failed to get CU from subprogram\n");
469 return -EINVAL;
470 }
471
472 /* Get lines list in the CU */
473 if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
474 pr_debug2("Failed to get source lines on this CU.\n");
475 return -ENOENT;
476 }
477 pr_debug2("Get %zd lines from this CU\n", nlines);
478
479 /* Walk on the lines on lines list */
480 for (i = 0; i < nlines; i++) {
481 line = dwarf_onesrcline(lines, i);
482 if (line == NULL ||
483 dwarf_lineno(line, &lineno) != 0 ||
484 dwarf_lineaddr(line, &addr) != 0) {
485 pr_debug2("Failed to get line info. "
486 "Possible error in debuginfo.\n");
487 continue;
488 }
489 /* Filter lines based on address */
490 if (rt_die != cu_die)
491 /*
492 * Address filtering
493 * The line is included in given function, and
494 * no inline block includes it.
495 */
496 if (!dwarf_haspc(rt_die, addr) ||
497 die_find_inlinefunc(rt_die, addr, &die_mem))
498 continue;
499 /* Get source line */
500 fname = dwarf_linesrc(line, NULL, NULL);
501
502 ret = callback(fname, lineno, addr, data);
503 if (ret != 0)
504 return ret;
505 }
506
507 /*
508 * Dwarf lines doesn't include function declarations and inlined
509 * subroutines. We have to check functions list or given function.
510 */
511 if (rt_die != cu_die)
512 ret = __die_walk_funclines(rt_die, callback, data);
513 else {
514 struct __line_walk_param param = {
515 .callback = callback,
516 .data = data,
517 .retval = 0,
518 };
519 dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
520 ret = param.retval;
521 }
522
523 return ret;
524}
525
526struct __find_variable_param {
527 const char *name;
528 Dwarf_Addr addr;
529};
530
531static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
532{
533 struct __find_variable_param *fvp = data;
534 int tag;
535
536 tag = dwarf_tag(die_mem);
537 if ((tag == DW_TAG_formal_parameter ||
538 tag == DW_TAG_variable) &&
539 die_compare_name(die_mem, fvp->name))
540 return DIE_FIND_CB_END;
541
542 if (dwarf_haspc(die_mem, fvp->addr))
543 return DIE_FIND_CB_CONTINUE;
544 else
545 return DIE_FIND_CB_SIBLING;
546}
547
548/**
549 * die_find_variable_at - Find a given name variable at given address
550 * @sp_die: a function DIE
551 * @name: variable name
552 * @addr: address
553 * @die_mem: a buffer for result DIE
554 *
555 * Find a variable DIE called @name at @addr in @sp_die.
556 */
557Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
558 Dwarf_Addr addr, Dwarf_Die *die_mem)
559{
560 struct __find_variable_param fvp = { .name = name, .addr = addr};
561
562 return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
563 die_mem);
564}
565
566static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
567{
568 const char *name = data;
569
570 if ((dwarf_tag(die_mem) == DW_TAG_member) &&
571 die_compare_name(die_mem, name))
572 return DIE_FIND_CB_END;
573
574 return DIE_FIND_CB_SIBLING;
575}
576
577/**
578 * die_find_member - Find a given name member in a data structure
579 * @st_die: a data structure type DIE
580 * @name: member name
581 * @die_mem: a buffer for result DIE
582 *
583 * Find a member DIE called @name in @st_die.
584 */
585Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
586 Dwarf_Die *die_mem)
587{
588 return die_find_child(st_die, __die_find_member_cb, (void *)name,
589 die_mem);
590}
591
592/**
593 * die_get_typename - Get the name of given variable DIE
594 * @vr_die: a variable DIE
595 * @buf: a buffer for result type name
596 * @len: a max-length of @buf
597 *
598 * Get the name of @vr_die and stores it to @buf. Return the actual length
599 * of type name if succeeded. Return -E2BIG if @len is not enough long, and
600 * Return -ENOENT if failed to find type name.
601 * Note that the result will stores typedef name if possible, and stores
602 * "*(function_type)" if the type is a function pointer.
603 */
604int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
605{
606 Dwarf_Die type;
607 int tag, ret, ret2;
608 const char *tmp = "";
609
610 if (__die_get_real_type(vr_die, &type) == NULL)
611 return -ENOENT;
612
613 tag = dwarf_tag(&type);
614 if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
615 tmp = "*";
616 else if (tag == DW_TAG_subroutine_type) {
617 /* Function pointer */
618 ret = snprintf(buf, len, "(function_type)");
619 return (ret >= len) ? -E2BIG : ret;
620 } else {
621 if (!dwarf_diename(&type))
622 return -ENOENT;
623 if (tag == DW_TAG_union_type)
624 tmp = "union ";
625 else if (tag == DW_TAG_structure_type)
626 tmp = "struct ";
627 /* Write a base name */
628 ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
629 return (ret >= len) ? -E2BIG : ret;
630 }
631 ret = die_get_typename(&type, buf, len);
632 if (ret > 0) {
633 ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
634 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
635 }
636 return ret;
637}
638
639/**
640 * die_get_varname - Get the name and type of given variable DIE
641 * @vr_die: a variable DIE
642 * @buf: a buffer for type and variable name
643 * @len: the max-length of @buf
644 *
645 * Get the name and type of @vr_die and stores it in @buf as "type\tname".
646 */
647int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
648{
649 int ret, ret2;
650
651 ret = die_get_typename(vr_die, buf, len);
652 if (ret < 0) {
653 pr_debug("Failed to get type, make it unknown.\n");
654 ret = snprintf(buf, len, "(unknown_type)");
655 }
656 if (ret > 0) {
657 ret2 = snprintf(buf + ret, len - ret, "\t%s",
658 dwarf_diename(vr_die));
659 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
660 }
661 return ret;
662}
663
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
new file mode 100644
index 000000000000..bc3b21167e70
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.h
@@ -0,0 +1,100 @@
1#ifndef _DWARF_AUX_H
2#define _DWARF_AUX_H
3/*
4 * dwarf-aux.h : libdw auxiliary interfaces
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 */
21
22#include <dwarf.h>
23#include <elfutils/libdw.h>
24#include <elfutils/libdwfl.h>
25#include <elfutils/version.h>
26
27/* Find the realpath of the target file */
28extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
29
30/* Get DW_AT_comp_dir (should be NULL with older gcc) */
31extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
32
33/* Get a line number and file name for given address */
34extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
35 const char **fname, int *lineno);
36
37/* Compare diename and tname */
38extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
39
40/* Get callsite line number of inline-function instance */
41extern int die_get_call_lineno(Dwarf_Die *in_die);
42
43/* Get type die */
44extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
45
46/* Get a type die, but skip qualifiers and typedef */
47extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
48
49/* Check whether the DIE is signed or not */
50extern bool die_is_signed_type(Dwarf_Die *tp_die);
51
52/* Get data_member_location offset */
53extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
54
55/* Return values for die_find_child() callbacks */
56enum {
57 DIE_FIND_CB_END = 0, /* End of Search */
58 DIE_FIND_CB_CHILD = 1, /* Search only children */
59 DIE_FIND_CB_SIBLING = 2, /* Search only siblings */
60 DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */
61};
62
63/* Search child DIEs */
64extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
65 int (*callback)(Dwarf_Die *, void *),
66 void *data, Dwarf_Die *die_mem);
67
68/* Search a non-inlined function including given address */
69extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
70 Dwarf_Die *die_mem);
71
72/* Search an inlined function including given address */
73extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
74 Dwarf_Die *die_mem);
75
76/* Walker on lines (Note: line number will not be sorted) */
77typedef int (* line_walk_callback_t) (const char *fname, int lineno,
78 Dwarf_Addr addr, void *data);
79
80/*
81 * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
82 * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
83 */
84extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback,
85 void *data);
86
87/* Find a variable called 'name' at given address */
88extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
89 Dwarf_Addr addr, Dwarf_Die *die_mem);
90
91/* Find a member called 'name' */
92extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
93 Dwarf_Die *die_mem);
94
95/* Get the name of given variable DIE */
96extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len);
97
98/* Get the name and type of given variable DIE, stored as "type\tname" */
99extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len);
100#endif
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0239eb87b232..a03a36b7908a 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -377,6 +377,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
377 array++; 377 array++;
378 } 378 }
379 379
380 data->addr = 0;
380 if (type & PERF_SAMPLE_ADDR) { 381 if (type & PERF_SAMPLE_ADDR) {
381 data->addr = *array; 382 data->addr = *array;
382 array++; 383 array++;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 7e9366e4490b..e9a31554e265 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -61,6 +61,7 @@ struct perf_evsel {
61 off_t id_offset; 61 off_t id_offset;
62 }; 62 };
63 struct cgroup_sel *cgrp; 63 struct cgroup_sel *cgrp;
64 bool supported;
64}; 65};
65 66
66struct cpu_map; 67struct cpu_map;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index afb0849fe530..cb2959a3fb43 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -877,9 +877,12 @@ int perf_session__read_header(struct perf_session *session, int fd)
877 struct perf_evsel *evsel; 877 struct perf_evsel *evsel;
878 off_t tmp; 878 off_t tmp;
879 879
880 if (perf_header__getbuffer64(header, fd, &f_attr, sizeof(f_attr))) 880 if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
881 goto out_errno; 881 goto out_errno;
882 882
883 if (header->needs_swap)
884 perf_event__attr_swap(&f_attr.attr);
885
883 tmp = lseek(fd, 0, SEEK_CUR); 886 tmp = lseek(fd, 0, SEEK_CUR);
884 evsel = perf_evsel__new(&f_attr.attr, i); 887 evsel = perf_evsel__new(&f_attr.attr, i);
885 888
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 627a02e03c57..677e1da6bb3e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -14,7 +14,8 @@ enum hist_filter {
14 14
15struct callchain_param callchain_param = { 15struct callchain_param callchain_param = {
16 .mode = CHAIN_GRAPH_REL, 16 .mode = CHAIN_GRAPH_REL,
17 .min_percent = 0.5 17 .min_percent = 0.5,
18 .order = ORDER_CALLEE
18}; 19};
19 20
20u16 hists__col_len(struct hists *self, enum hist_column col) 21u16 hists__col_len(struct hists *self, enum hist_column col)
@@ -846,6 +847,9 @@ print_entries:
846 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { 847 for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) {
847 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 848 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
848 849
850 if (h->filtered)
851 continue;
852
849 if (show_displacement) { 853 if (show_displacement) {
850 if (h->pair != NULL) 854 if (h->pair != NULL)
851 displacement = ((long)h->pair->position - 855 displacement = ((long)h->pair->position -
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 41982c373faf..4ea7e19f5251 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -86,22 +86,24 @@ static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
86 86
87#define MAX_ALIASES 8 87#define MAX_ALIASES 8
88 88
89static const char *hw_cache[][MAX_ALIASES] = { 89static const char *hw_cache[PERF_COUNT_HW_CACHE_MAX][MAX_ALIASES] = {
90 { "L1-dcache", "l1-d", "l1d", "L1-data", }, 90 { "L1-dcache", "l1-d", "l1d", "L1-data", },
91 { "L1-icache", "l1-i", "l1i", "L1-instruction", }, 91 { "L1-icache", "l1-i", "l1i", "L1-instruction", },
92 { "LLC", "L2" }, 92 { "LLC", "L2", },
93 { "dTLB", "d-tlb", "Data-TLB", }, 93 { "dTLB", "d-tlb", "Data-TLB", },
94 { "iTLB", "i-tlb", "Instruction-TLB", }, 94 { "iTLB", "i-tlb", "Instruction-TLB", },
95 { "branch", "branches", "bpu", "btb", "bpc", }, 95 { "branch", "branches", "bpu", "btb", "bpc", },
96 { "node", },
96}; 97};
97 98
98static const char *hw_cache_op[][MAX_ALIASES] = { 99static const char *hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][MAX_ALIASES] = {
99 { "load", "loads", "read", }, 100 { "load", "loads", "read", },
100 { "store", "stores", "write", }, 101 { "store", "stores", "write", },
101 { "prefetch", "prefetches", "speculative-read", "speculative-load", }, 102 { "prefetch", "prefetches", "speculative-read", "speculative-load", },
102}; 103};
103 104
104static const char *hw_cache_result[][MAX_ALIASES] = { 105static const char *hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
106 [MAX_ALIASES] = {
105 { "refs", "Reference", "ops", "access", }, 107 { "refs", "Reference", "ops", "access", },
106 { "misses", "miss", }, 108 { "misses", "miss", },
107}; 109};
@@ -124,6 +126,7 @@ static unsigned long hw_cache_stat[C(MAX)] = {
124 [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 126 [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
125 [C(ITLB)] = (CACHE_READ), 127 [C(ITLB)] = (CACHE_READ),
126 [C(BPU)] = (CACHE_READ), 128 [C(BPU)] = (CACHE_READ),
129 [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
127}; 130};
128 131
129#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ 132#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \
@@ -393,7 +396,7 @@ parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
393 PERF_COUNT_HW_CACHE_OP_MAX); 396 PERF_COUNT_HW_CACHE_OP_MAX);
394 if (cache_op >= 0) { 397 if (cache_op >= 0) {
395 if (!is_cache_op_valid(cache_type, cache_op)) 398 if (!is_cache_op_valid(cache_type, cache_op))
396 return 0; 399 return EVT_FAILED;
397 continue; 400 continue;
398 } 401 }
399 } 402 }
@@ -475,7 +478,7 @@ parse_single_tracepoint_event(char *sys_name,
475/* sys + ':' + event + ':' + flags*/ 478/* sys + ':' + event + ':' + flags*/
476#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128) 479#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
477static enum event_result 480static enum event_result
478parse_multiple_tracepoint_event(const struct option *opt, char *sys_name, 481parse_multiple_tracepoint_event(struct perf_evlist *evlist, char *sys_name,
479 const char *evt_exp, char *flags) 482 const char *evt_exp, char *flags)
480{ 483{
481 char evt_path[MAXPATHLEN]; 484 char evt_path[MAXPATHLEN];
@@ -509,7 +512,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
509 if (len < 0) 512 if (len < 0)
510 return EVT_FAILED; 513 return EVT_FAILED;
511 514
512 if (parse_events(opt, event_opt, 0)) 515 if (parse_events(evlist, event_opt, 0))
513 return EVT_FAILED; 516 return EVT_FAILED;
514 } 517 }
515 518
@@ -517,7 +520,7 @@ parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
517} 520}
518 521
519static enum event_result 522static enum event_result
520parse_tracepoint_event(const struct option *opt, const char **strp, 523parse_tracepoint_event(struct perf_evlist *evlist, const char **strp,
521 struct perf_event_attr *attr) 524 struct perf_event_attr *attr)
522{ 525{
523 const char *evt_name; 526 const char *evt_name;
@@ -557,8 +560,8 @@ parse_tracepoint_event(const struct option *opt, const char **strp,
557 return EVT_FAILED; 560 return EVT_FAILED;
558 if (strpbrk(evt_name, "*?")) { 561 if (strpbrk(evt_name, "*?")) {
559 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */ 562 *strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
560 return parse_multiple_tracepoint_event(opt, sys_name, evt_name, 563 return parse_multiple_tracepoint_event(evlist, sys_name,
561 flags); 564 evt_name, flags);
562 } else { 565 } else {
563 return parse_single_tracepoint_event(sys_name, evt_name, 566 return parse_single_tracepoint_event(sys_name, evt_name,
564 evt_length, attr, strp); 567 evt_length, attr, strp);
@@ -778,12 +781,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
778 * Symbolic names are (almost) exactly matched. 781 * Symbolic names are (almost) exactly matched.
779 */ 782 */
780static enum event_result 783static enum event_result
781parse_event_symbols(const struct option *opt, const char **str, 784parse_event_symbols(struct perf_evlist *evlist, const char **str,
782 struct perf_event_attr *attr) 785 struct perf_event_attr *attr)
783{ 786{
784 enum event_result ret; 787 enum event_result ret;
785 788
786 ret = parse_tracepoint_event(opt, str, attr); 789 ret = parse_tracepoint_event(evlist, str, attr);
787 if (ret != EVT_FAILED) 790 if (ret != EVT_FAILED)
788 goto modifier; 791 goto modifier;
789 792
@@ -822,9 +825,8 @@ modifier:
822 return ret; 825 return ret;
823} 826}
824 827
825int parse_events(const struct option *opt, const char *str, int unset __used) 828int parse_events(struct perf_evlist *evlist , const char *str, int unset __used)
826{ 829{
827 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
828 struct perf_event_attr attr; 830 struct perf_event_attr attr;
829 enum event_result ret; 831 enum event_result ret;
830 const char *ostr; 832 const char *ostr;
@@ -832,7 +834,7 @@ int parse_events(const struct option *opt, const char *str, int unset __used)
832 for (;;) { 834 for (;;) {
833 ostr = str; 835 ostr = str;
834 memset(&attr, 0, sizeof(attr)); 836 memset(&attr, 0, sizeof(attr));
835 ret = parse_event_symbols(opt, &str, &attr); 837 ret = parse_event_symbols(evlist, &str, &attr);
836 if (ret == EVT_FAILED) 838 if (ret == EVT_FAILED)
837 return -1; 839 return -1;
838 840
@@ -863,6 +865,13 @@ int parse_events(const struct option *opt, const char *str, int unset __used)
863 return 0; 865 return 0;
864} 866}
865 867
868int parse_events_option(const struct option *opt, const char *str,
869 int unset __used)
870{
871 struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
872 return parse_events(evlist, str, unset);
873}
874
866int parse_filter(const struct option *opt, const char *str, 875int parse_filter(const struct option *opt, const char *str,
867 int unset __used) 876 int unset __used)
868{ 877{
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 746d3fcbfc2a..2f8e375e038d 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -8,6 +8,7 @@
8 8
9struct list_head; 9struct list_head;
10struct perf_evsel; 10struct perf_evsel;
11struct perf_evlist;
11 12
12struct option; 13struct option;
13 14
@@ -24,7 +25,10 @@ const char *event_type(int type);
24const char *event_name(struct perf_evsel *event); 25const char *event_name(struct perf_evsel *event);
25extern const char *__event_name(int type, u64 config); 26extern const char *__event_name(int type, u64 config);
26 27
27extern int parse_events(const struct option *opt, const char *str, int unset); 28extern int parse_events_option(const struct option *opt, const char *str,
29 int unset);
30extern int parse_events(struct perf_evlist *evlist, const char *str,
31 int unset);
28extern int parse_filter(const struct option *opt, const char *str, int unset); 32extern int parse_filter(const struct option *opt, const char *str, int unset);
29 33
30#define EVENTS_HELP_MAX (128*1024) 34#define EVENTS_HELP_MAX (128*1024)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f0223166e761..b82d54fa2c56 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -117,6 +117,10 @@ static struct map *kernel_get_module_map(const char *module)
117 struct rb_node *nd; 117 struct rb_node *nd;
118 struct map_groups *grp = &machine.kmaps; 118 struct map_groups *grp = &machine.kmaps;
119 119
120 /* A file path -- this is an offline module */
121 if (module && strchr(module, '/'))
122 return machine__new_module(&machine, 0, module);
123
120 if (!module) 124 if (!module)
121 module = "kernel"; 125 module = "kernel";
122 126
@@ -170,16 +174,24 @@ const char *kernel_get_module_path(const char *module)
170} 174}
171 175
172#ifdef DWARF_SUPPORT 176#ifdef DWARF_SUPPORT
173static int open_vmlinux(const char *module) 177/* Open new debuginfo of given module */
178static struct debuginfo *open_debuginfo(const char *module)
174{ 179{
175 const char *path = kernel_get_module_path(module); 180 const char *path;
176 if (!path) { 181
177 pr_err("Failed to find path of %s module.\n", 182 /* A file path -- this is an offline module */
178 module ?: "kernel"); 183 if (module && strchr(module, '/'))
179 return -ENOENT; 184 path = module;
185 else {
186 path = kernel_get_module_path(module);
187
188 if (!path) {
189 pr_err("Failed to find path of %s module.\n",
190 module ?: "kernel");
191 return NULL;
192 }
180 } 193 }
181 pr_debug("Try to open %s\n", path); 194 return debuginfo__new(path);
182 return open(path, O_RDONLY);
183} 195}
184 196
185/* 197/*
@@ -193,13 +205,24 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
193 struct map *map; 205 struct map *map;
194 u64 addr; 206 u64 addr;
195 int ret = -ENOENT; 207 int ret = -ENOENT;
208 struct debuginfo *dinfo;
196 209
197 sym = __find_kernel_function_by_name(tp->symbol, &map); 210 sym = __find_kernel_function_by_name(tp->symbol, &map);
198 if (sym) { 211 if (sym) {
199 addr = map->unmap_ip(map, sym->start + tp->offset); 212 addr = map->unmap_ip(map, sym->start + tp->offset);
200 pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol, 213 pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol,
201 tp->offset, addr); 214 tp->offset, addr);
202 ret = find_perf_probe_point((unsigned long)addr, pp); 215
216 dinfo = debuginfo__new_online_kernel(addr);
217 if (dinfo) {
218 ret = debuginfo__find_probe_point(dinfo,
219 (unsigned long)addr, pp);
220 debuginfo__delete(dinfo);
221 } else {
222 pr_debug("Failed to open debuginfo at 0x%" PRIx64 "\n",
223 addr);
224 ret = -ENOENT;
225 }
203 } 226 }
204 if (ret <= 0) { 227 if (ret <= 0) {
205 pr_debug("Failed to find corresponding probes from " 228 pr_debug("Failed to find corresponding probes from "
@@ -214,30 +237,70 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp,
214 return 0; 237 return 0;
215} 238}
216 239
240static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
241 int ntevs, const char *module)
242{
243 int i, ret = 0;
244 char *tmp;
245
246 if (!module)
247 return 0;
248
249 tmp = strrchr(module, '/');
250 if (tmp) {
251 /* This is a module path -- get the module name */
252 module = strdup(tmp + 1);
253 if (!module)
254 return -ENOMEM;
255 tmp = strchr(module, '.');
256 if (tmp)
257 *tmp = '\0';
258 tmp = (char *)module; /* For free() */
259 }
260
261 for (i = 0; i < ntevs; i++) {
262 tevs[i].point.module = strdup(module);
263 if (!tevs[i].point.module) {
264 ret = -ENOMEM;
265 break;
266 }
267 }
268
269 if (tmp)
270 free(tmp);
271
272 return ret;
273}
274
217/* Try to find perf_probe_event with debuginfo */ 275/* Try to find perf_probe_event with debuginfo */
218static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 276static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
219 struct probe_trace_event **tevs, 277 struct probe_trace_event **tevs,
220 int max_tevs, const char *module) 278 int max_tevs, const char *module)
221{ 279{
222 bool need_dwarf = perf_probe_event_need_dwarf(pev); 280 bool need_dwarf = perf_probe_event_need_dwarf(pev);
223 int fd, ntevs; 281 struct debuginfo *dinfo = open_debuginfo(module);
282 int ntevs, ret = 0;
224 283
225 fd = open_vmlinux(module); 284 if (!dinfo) {
226 if (fd < 0) {
227 if (need_dwarf) { 285 if (need_dwarf) {
228 pr_warning("Failed to open debuginfo file.\n"); 286 pr_warning("Failed to open debuginfo file.\n");
229 return fd; 287 return -ENOENT;
230 } 288 }
231 pr_debug("Could not open vmlinux. Try to use symbols.\n"); 289 pr_debug("Could not open debuginfo. Try to use symbols.\n");
232 return 0; 290 return 0;
233 } 291 }
234 292
235 /* Searching trace events corresponding to probe event */ 293 /* Searching trace events corresponding to a probe event */
236 ntevs = find_probe_trace_events(fd, pev, tevs, max_tevs); 294 ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
295
296 debuginfo__delete(dinfo);
237 297
238 if (ntevs > 0) { /* Succeeded to find trace events */ 298 if (ntevs > 0) { /* Succeeded to find trace events */
239 pr_debug("find %d probe_trace_events.\n", ntevs); 299 pr_debug("find %d probe_trace_events.\n", ntevs);
240 return ntevs; 300 if (module)
301 ret = add_module_to_probe_trace_events(*tevs, ntevs,
302 module);
303 return ret < 0 ? ret : ntevs;
241 } 304 }
242 305
243 if (ntevs == 0) { /* No error but failed to find probe point. */ 306 if (ntevs == 0) { /* No error but failed to find probe point. */
@@ -371,8 +434,9 @@ int show_line_range(struct line_range *lr, const char *module)
371{ 434{
372 int l = 1; 435 int l = 1;
373 struct line_node *ln; 436 struct line_node *ln;
437 struct debuginfo *dinfo;
374 FILE *fp; 438 FILE *fp;
375 int fd, ret; 439 int ret;
376 char *tmp; 440 char *tmp;
377 441
378 /* Search a line range */ 442 /* Search a line range */
@@ -380,13 +444,14 @@ int show_line_range(struct line_range *lr, const char *module)
380 if (ret < 0) 444 if (ret < 0)
381 return ret; 445 return ret;
382 446
383 fd = open_vmlinux(module); 447 dinfo = open_debuginfo(module);
384 if (fd < 0) { 448 if (!dinfo) {
385 pr_warning("Failed to open debuginfo file.\n"); 449 pr_warning("Failed to open debuginfo file.\n");
386 return fd; 450 return -ENOENT;
387 } 451 }
388 452
389 ret = find_line_range(fd, lr); 453 ret = debuginfo__find_line_range(dinfo, lr);
454 debuginfo__delete(dinfo);
390 if (ret == 0) { 455 if (ret == 0) {
391 pr_warning("Specified source line is not found.\n"); 456 pr_warning("Specified source line is not found.\n");
392 return -ENOENT; 457 return -ENOENT;
@@ -448,7 +513,8 @@ end:
448 return ret; 513 return ret;
449} 514}
450 515
451static int show_available_vars_at(int fd, struct perf_probe_event *pev, 516static int show_available_vars_at(struct debuginfo *dinfo,
517 struct perf_probe_event *pev,
452 int max_vls, struct strfilter *_filter, 518 int max_vls, struct strfilter *_filter,
453 bool externs) 519 bool externs)
454{ 520{
@@ -463,7 +529,8 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
463 return -EINVAL; 529 return -EINVAL;
464 pr_debug("Searching variables at %s\n", buf); 530 pr_debug("Searching variables at %s\n", buf);
465 531
466 ret = find_available_vars_at(fd, pev, &vls, max_vls, externs); 532 ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
533 max_vls, externs);
467 if (ret <= 0) { 534 if (ret <= 0) {
468 pr_err("Failed to find variables at %s (%d)\n", buf, ret); 535 pr_err("Failed to find variables at %s (%d)\n", buf, ret);
469 goto end; 536 goto end;
@@ -504,24 +571,26 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
504 int max_vls, const char *module, 571 int max_vls, const char *module,
505 struct strfilter *_filter, bool externs) 572 struct strfilter *_filter, bool externs)
506{ 573{
507 int i, fd, ret = 0; 574 int i, ret = 0;
575 struct debuginfo *dinfo;
508 576
509 ret = init_vmlinux(); 577 ret = init_vmlinux();
510 if (ret < 0) 578 if (ret < 0)
511 return ret; 579 return ret;
512 580
581 dinfo = open_debuginfo(module);
582 if (!dinfo) {
583 pr_warning("Failed to open debuginfo file.\n");
584 return -ENOENT;
585 }
586
513 setup_pager(); 587 setup_pager();
514 588
515 for (i = 0; i < npevs && ret >= 0; i++) { 589 for (i = 0; i < npevs && ret >= 0; i++)
516 fd = open_vmlinux(module); 590 ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
517 if (fd < 0) {
518 pr_warning("Failed to open debug information file.\n");
519 ret = fd;
520 break;
521 }
522 ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
523 externs); 591 externs);
524 } 592
593 debuginfo__delete(dinfo);
525 return ret; 594 return ret;
526} 595}
527 596
@@ -990,7 +1059,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
990 1059
991/* Parse probe_events event into struct probe_point */ 1060/* Parse probe_events event into struct probe_point */
992static int parse_probe_trace_command(const char *cmd, 1061static int parse_probe_trace_command(const char *cmd,
993 struct probe_trace_event *tev) 1062 struct probe_trace_event *tev)
994{ 1063{
995 struct probe_trace_point *tp = &tev->point; 1064 struct probe_trace_point *tp = &tev->point;
996 char pr; 1065 char pr;
@@ -1023,8 +1092,14 @@ static int parse_probe_trace_command(const char *cmd,
1023 1092
1024 tp->retprobe = (pr == 'r'); 1093 tp->retprobe = (pr == 'r');
1025 1094
1026 /* Scan function name and offset */ 1095 /* Scan module name(if there), function name and offset */
1027 ret = sscanf(argv[1], "%a[^+]+%lu", (float *)(void *)&tp->symbol, 1096 p = strchr(argv[1], ':');
1097 if (p) {
1098 tp->module = strndup(argv[1], p - argv[1]);
1099 p++;
1100 } else
1101 p = argv[1];
1102 ret = sscanf(p, "%a[^+]+%lu", (float *)(void *)&tp->symbol,
1028 &tp->offset); 1103 &tp->offset);
1029 if (ret == 1) 1104 if (ret == 1)
1030 tp->offset = 0; 1105 tp->offset = 0;
@@ -1269,9 +1344,10 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
1269 if (buf == NULL) 1344 if (buf == NULL)
1270 return NULL; 1345 return NULL;
1271 1346
1272 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s+%lu", 1347 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu",
1273 tp->retprobe ? 'r' : 'p', 1348 tp->retprobe ? 'r' : 'p',
1274 tev->group, tev->event, 1349 tev->group, tev->event,
1350 tp->module ?: "", tp->module ? ":" : "",
1275 tp->symbol, tp->offset); 1351 tp->symbol, tp->offset);
1276 if (len <= 0) 1352 if (len <= 0)
1277 goto error; 1353 goto error;
@@ -1378,6 +1454,8 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
1378 free(tev->group); 1454 free(tev->group);
1379 if (tev->point.symbol) 1455 if (tev->point.symbol)
1380 free(tev->point.symbol); 1456 free(tev->point.symbol);
1457 if (tev->point.module)
1458 free(tev->point.module);
1381 for (i = 0; i < tev->nargs; i++) { 1459 for (i = 0; i < tev->nargs; i++) {
1382 if (tev->args[i].name) 1460 if (tev->args[i].name)
1383 free(tev->args[i].name); 1461 free(tev->args[i].name);
@@ -1729,7 +1807,7 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1729 /* Convert perf_probe_event with debuginfo */ 1807 /* Convert perf_probe_event with debuginfo */
1730 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); 1808 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
1731 if (ret != 0) 1809 if (ret != 0)
1732 return ret; 1810 return ret; /* Found in debuginfo or got an error */
1733 1811
1734 /* Allocate trace event buffer */ 1812 /* Allocate trace event buffer */
1735 tev = *tevs = zalloc(sizeof(struct probe_trace_event)); 1813 tev = *tevs = zalloc(sizeof(struct probe_trace_event));
@@ -1742,6 +1820,11 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1742 ret = -ENOMEM; 1820 ret = -ENOMEM;
1743 goto error; 1821 goto error;
1744 } 1822 }
1823 tev->point.module = strdup(module);
1824 if (tev->point.module == NULL) {
1825 ret = -ENOMEM;
1826 goto error;
1827 }
1745 tev->point.offset = pev->point.offset; 1828 tev->point.offset = pev->point.offset;
1746 tev->point.retprobe = pev->point.retprobe; 1829 tev->point.retprobe = pev->point.retprobe;
1747 tev->nargs = pev->nargs; 1830 tev->nargs = pev->nargs;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 3434fc9d79d5..a7dee835f49c 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -10,6 +10,7 @@ extern bool probe_event_dry_run;
10/* kprobe-tracer tracing point */ 10/* kprobe-tracer tracing point */
11struct probe_trace_point { 11struct probe_trace_point {
12 char *symbol; /* Base symbol */ 12 char *symbol; /* Base symbol */
13 char *module; /* Module name */
13 unsigned long offset; /* Offset from symbol */ 14 unsigned long offset; /* Offset from symbol */
14 bool retprobe; /* Return probe flag */ 15 bool retprobe; /* Return probe flag */
15}; 16};
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 3b9d0b800d5c..3e44a3e36519 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -43,21 +43,6 @@
43/* Kprobe tracer basic type is up to u64 */ 43/* Kprobe tracer basic type is up to u64 */
44#define MAX_BASIC_TYPE_BITS 64 44#define MAX_BASIC_TYPE_BITS 64
45 45
46/*
47 * Compare the tail of two strings.
48 * Return 0 if whole of either string is same as another's tail part.
49 */
50static int strtailcmp(const char *s1, const char *s2)
51{
52 int i1 = strlen(s1);
53 int i2 = strlen(s2);
54 while (--i1 >= 0 && --i2 >= 0) {
55 if (s1[i1] != s2[i2])
56 return s1[i1] - s2[i2];
57 }
58 return 0;
59}
60
61/* Line number list operations */ 46/* Line number list operations */
62 47
63/* Add a line to line number list */ 48/* Add a line to line number list */
@@ -131,29 +116,37 @@ static const Dwfl_Callbacks offline_callbacks = {
131}; 116};
132 117
133/* Get a Dwarf from offline image */ 118/* Get a Dwarf from offline image */
134static Dwarf *dwfl_init_offline_dwarf(int fd, Dwfl **dwflp, Dwarf_Addr *bias) 119static int debuginfo__init_offline_dwarf(struct debuginfo *self,
120 const char *path)
135{ 121{
136 Dwfl_Module *mod; 122 Dwfl_Module *mod;
137 Dwarf *dbg = NULL; 123 int fd;
138 124
139 if (!dwflp) 125 fd = open(path, O_RDONLY);
140 return NULL; 126 if (fd < 0)
127 return fd;
141 128
142 *dwflp = dwfl_begin(&offline_callbacks); 129 self->dwfl = dwfl_begin(&offline_callbacks);
143 if (!*dwflp) 130 if (!self->dwfl)
144 return NULL; 131 goto error;
145 132
146 mod = dwfl_report_offline(*dwflp, "", "", fd); 133 mod = dwfl_report_offline(self->dwfl, "", "", fd);
147 if (!mod) 134 if (!mod)
148 goto error; 135 goto error;
149 136
150 dbg = dwfl_module_getdwarf(mod, bias); 137 self->dbg = dwfl_module_getdwarf(mod, &self->bias);
151 if (!dbg) { 138 if (!self->dbg)
139 goto error;
140
141 return 0;
152error: 142error:
153 dwfl_end(*dwflp); 143 if (self->dwfl)
154 *dwflp = NULL; 144 dwfl_end(self->dwfl);
155 } 145 else
156 return dbg; 146 close(fd);
147 memset(self, 0, sizeof(*self));
148
149 return -ENOENT;
157} 150}
158 151
159#if _ELFUTILS_PREREQ(0, 148) 152#if _ELFUTILS_PREREQ(0, 148)
@@ -189,597 +182,81 @@ static const Dwfl_Callbacks kernel_callbacks = {
189}; 182};
190 183
191/* Get a Dwarf from live kernel image */ 184/* Get a Dwarf from live kernel image */
192static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr, Dwfl **dwflp, 185static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
193 Dwarf_Addr *bias) 186 Dwarf_Addr addr)
194{ 187{
195 Dwarf *dbg; 188 self->dwfl = dwfl_begin(&kernel_callbacks);
196 189 if (!self->dwfl)
197 if (!dwflp) 190 return -EINVAL;
198 return NULL;
199
200 *dwflp = dwfl_begin(&kernel_callbacks);
201 if (!*dwflp)
202 return NULL;
203 191
204 /* Load the kernel dwarves: Don't care the result here */ 192 /* Load the kernel dwarves: Don't care the result here */
205 dwfl_linux_kernel_report_kernel(*dwflp); 193 dwfl_linux_kernel_report_kernel(self->dwfl);
206 dwfl_linux_kernel_report_modules(*dwflp); 194 dwfl_linux_kernel_report_modules(self->dwfl);
207 195
208 dbg = dwfl_addrdwarf(*dwflp, addr, bias); 196 self->dbg = dwfl_addrdwarf(self->dwfl, addr, &self->bias);
209 /* Here, check whether we could get a real dwarf */ 197 /* Here, check whether we could get a real dwarf */
210 if (!dbg) { 198 if (!self->dbg) {
211 pr_debug("Failed to find kernel dwarf at %lx\n", 199 pr_debug("Failed to find kernel dwarf at %lx\n",
212 (unsigned long)addr); 200 (unsigned long)addr);
213 dwfl_end(*dwflp); 201 dwfl_end(self->dwfl);
214 *dwflp = NULL; 202 memset(self, 0, sizeof(*self));
203 return -ENOENT;
215 } 204 }
216 return dbg; 205
206 return 0;
217} 207}
218#else 208#else
219/* With older elfutils, this just support kernel module... */ 209/* With older elfutils, this just support kernel module... */
220static Dwarf *dwfl_init_live_kernel_dwarf(Dwarf_Addr addr __used, Dwfl **dwflp, 210static int debuginfo__init_online_kernel_dwarf(struct debuginfo *self,
221 Dwarf_Addr *bias) 211 Dwarf_Addr addr __used)
222{ 212{
223 int fd;
224 const char *path = kernel_get_module_path("kernel"); 213 const char *path = kernel_get_module_path("kernel");
225 214
226 if (!path) { 215 if (!path) {
227 pr_err("Failed to find vmlinux path\n"); 216 pr_err("Failed to find vmlinux path\n");
228 return NULL; 217 return -ENOENT;
229 } 218 }
230 219
231 pr_debug2("Use file %s for debuginfo\n", path); 220 pr_debug2("Use file %s for debuginfo\n", path);
232 fd = open(path, O_RDONLY); 221 return debuginfo__init_offline_dwarf(self, path);
233 if (fd < 0)
234 return NULL;
235
236 return dwfl_init_offline_dwarf(fd, dwflp, bias);
237} 222}
238#endif 223#endif
239 224
240/* Dwarf wrappers */ 225struct debuginfo *debuginfo__new(const char *path)
241
242/* Find the realpath of the target file. */
243static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
244{
245 Dwarf_Files *files;
246 size_t nfiles, i;
247 const char *src = NULL;
248 int ret;
249
250 if (!fname)
251 return NULL;
252
253 ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
254 if (ret != 0)
255 return NULL;
256
257 for (i = 0; i < nfiles; i++) {
258 src = dwarf_filesrc(files, i, NULL, NULL);
259 if (strtailcmp(src, fname) == 0)
260 break;
261 }
262 if (i == nfiles)
263 return NULL;
264 return src;
265}
266
267/* Get DW_AT_comp_dir (should be NULL with older gcc) */
268static const char *cu_get_comp_dir(Dwarf_Die *cu_die)
269{
270 Dwarf_Attribute attr;
271 if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
272 return NULL;
273 return dwarf_formstring(&attr);
274}
275
276/* Get a line number and file name for given address */
277static int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
278 const char **fname, int *lineno)
279{
280 Dwarf_Line *line;
281 Dwarf_Addr laddr;
282
283 line = dwarf_getsrc_die(cudie, (Dwarf_Addr)addr);
284 if (line && dwarf_lineaddr(line, &laddr) == 0 &&
285 addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
286 *fname = dwarf_linesrc(line, NULL, NULL);
287 if (!*fname)
288 /* line number is useless without filename */
289 *lineno = 0;
290 }
291
292 return *lineno ?: -ENOENT;
293}
294
295/* Compare diename and tname */
296static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
297{
298 const char *name;
299 name = dwarf_diename(dw_die);
300 return name ? (strcmp(tname, name) == 0) : false;
301}
302
303/* Get callsite line number of inline-function instance */
304static int die_get_call_lineno(Dwarf_Die *in_die)
305{
306 Dwarf_Attribute attr;
307 Dwarf_Word ret;
308
309 if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
310 return -ENOENT;
311
312 dwarf_formudata(&attr, &ret);
313 return (int)ret;
314}
315
316/* Get type die */
317static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
318{
319 Dwarf_Attribute attr;
320
321 if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
322 dwarf_formref_die(&attr, die_mem))
323 return die_mem;
324 else
325 return NULL;
326}
327
328/* Get a type die, but skip qualifiers */
329static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
330{
331 int tag;
332
333 do {
334 vr_die = die_get_type(vr_die, die_mem);
335 if (!vr_die)
336 break;
337 tag = dwarf_tag(vr_die);
338 } while (tag == DW_TAG_const_type ||
339 tag == DW_TAG_restrict_type ||
340 tag == DW_TAG_volatile_type ||
341 tag == DW_TAG_shared_type);
342
343 return vr_die;
344}
345
346/* Get a type die, but skip qualifiers and typedef */
347static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
348{
349 do {
350 vr_die = __die_get_real_type(vr_die, die_mem);
351 } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
352
353 return vr_die;
354}
355
356static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
357 Dwarf_Word *result)
358{
359 Dwarf_Attribute attr;
360
361 if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
362 dwarf_formudata(&attr, result) != 0)
363 return -ENOENT;
364
365 return 0;
366}
367
368static bool die_is_signed_type(Dwarf_Die *tp_die)
369{
370 Dwarf_Word ret;
371
372 if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
373 return false;
374
375 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
376 ret == DW_ATE_signed_fixed);
377}
378
379static int die_get_byte_size(Dwarf_Die *tp_die)
380{
381 Dwarf_Word ret;
382
383 if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
384 return 0;
385
386 return (int)ret;
387}
388
389static int die_get_bit_size(Dwarf_Die *tp_die)
390{
391 Dwarf_Word ret;
392
393 if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
394 return 0;
395
396 return (int)ret;
397}
398
399static int die_get_bit_offset(Dwarf_Die *tp_die)
400{
401 Dwarf_Word ret;
402
403 if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
404 return 0;
405
406 return (int)ret;
407}
408
409/* Get data_member_location offset */
410static int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
411{
412 Dwarf_Attribute attr;
413 Dwarf_Op *expr;
414 size_t nexpr;
415 int ret;
416
417 if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
418 return -ENOENT;
419
420 if (dwarf_formudata(&attr, offs) != 0) {
421 /* DW_AT_data_member_location should be DW_OP_plus_uconst */
422 ret = dwarf_getlocation(&attr, &expr, &nexpr);
423 if (ret < 0 || nexpr == 0)
424 return -ENOENT;
425
426 if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
427 pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
428 expr[0].atom, nexpr);
429 return -ENOTSUP;
430 }
431 *offs = (Dwarf_Word)expr[0].number;
432 }
433 return 0;
434}
435
436/* Return values for die_find callbacks */
437enum {
438 DIE_FIND_CB_FOUND = 0, /* End of Search */
439 DIE_FIND_CB_CHILD = 1, /* Search only children */
440 DIE_FIND_CB_SIBLING = 2, /* Search only siblings */
441 DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */
442};
443
444/* Search a child die */
445static Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
446 int (*callback)(Dwarf_Die *, void *),
447 void *data, Dwarf_Die *die_mem)
448{ 226{
449 Dwarf_Die child_die; 227 struct debuginfo *self = zalloc(sizeof(struct debuginfo));
450 int ret; 228 if (!self)
451
452 ret = dwarf_child(rt_die, die_mem);
453 if (ret != 0)
454 return NULL; 229 return NULL;
455 230
456 do { 231 if (debuginfo__init_offline_dwarf(self, path) < 0) {
457 ret = callback(die_mem, data); 232 free(self);
458 if (ret == DIE_FIND_CB_FOUND) 233 self = NULL;
459 return die_mem;
460
461 if ((ret & DIE_FIND_CB_CHILD) &&
462 die_find_child(die_mem, callback, data, &child_die)) {
463 memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
464 return die_mem;
465 }
466 } while ((ret & DIE_FIND_CB_SIBLING) &&
467 dwarf_siblingof(die_mem, die_mem) == 0);
468
469 return NULL;
470}
471
472struct __addr_die_search_param {
473 Dwarf_Addr addr;
474 Dwarf_Die *die_mem;
475};
476
477static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
478{
479 struct __addr_die_search_param *ad = data;
480
481 if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
482 dwarf_haspc(fn_die, ad->addr)) {
483 memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
484 return DWARF_CB_ABORT;
485 } 234 }
486 return DWARF_CB_OK;
487}
488
489/* Search a real subprogram including this line, */
490static Dwarf_Die *die_find_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr,
491 Dwarf_Die *die_mem)
492{
493 struct __addr_die_search_param ad;
494 ad.addr = addr;
495 ad.die_mem = die_mem;
496 /* dwarf_getscopes can't find subprogram. */
497 if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
498 return NULL;
499 else
500 return die_mem;
501}
502
503/* die_find callback for inline function search */
504static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
505{
506 Dwarf_Addr *addr = data;
507
508 if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
509 dwarf_haspc(die_mem, *addr))
510 return DIE_FIND_CB_FOUND;
511 235
512 return DIE_FIND_CB_CONTINUE; 236 return self;
513} 237}
514 238
515/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */ 239struct debuginfo *debuginfo__new_online_kernel(unsigned long addr)
516static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
517 Dwarf_Die *die_mem)
518{ 240{
519 Dwarf_Die tmp_die; 241 struct debuginfo *self = zalloc(sizeof(struct debuginfo));
520 242 if (!self)
521 sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
522 if (!sp_die)
523 return NULL; 243 return NULL;
524 244
525 /* Inlined function could be recursive. Trace it until fail */ 245 if (debuginfo__init_online_kernel_dwarf(self, (Dwarf_Addr)addr) < 0) {
526 while (sp_die) { 246 free(self);
527 memcpy(die_mem, sp_die, sizeof(Dwarf_Die)); 247 self = NULL;
528 sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
529 &tmp_die);
530 }
531
532 return die_mem;
533}
534
535/* Walker on lines (Note: line number will not be sorted) */
536typedef int (* line_walk_handler_t) (const char *fname, int lineno,
537 Dwarf_Addr addr, void *data);
538
539struct __line_walk_param {
540 const char *fname;
541 line_walk_handler_t handler;
542 void *data;
543 int retval;
544};
545
546static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
547{
548 struct __line_walk_param *lw = data;
549 Dwarf_Addr addr;
550 int lineno;
551
552 if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
553 lineno = die_get_call_lineno(in_die);
554 if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
555 lw->retval = lw->handler(lw->fname, lineno, addr,
556 lw->data);
557 if (lw->retval != 0)
558 return DIE_FIND_CB_FOUND;
559 }
560 }
561 return DIE_FIND_CB_SIBLING;
562}
563
564/* Walk on lines of blocks included in given DIE */
565static int __die_walk_funclines(Dwarf_Die *sp_die,
566 line_walk_handler_t handler, void *data)
567{
568 struct __line_walk_param lw = {
569 .handler = handler,
570 .data = data,
571 .retval = 0,
572 };
573 Dwarf_Die die_mem;
574 Dwarf_Addr addr;
575 int lineno;
576
577 /* Handle function declaration line */
578 lw.fname = dwarf_decl_file(sp_die);
579 if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
580 dwarf_entrypc(sp_die, &addr) == 0) {
581 lw.retval = handler(lw.fname, lineno, addr, data);
582 if (lw.retval != 0)
583 goto done;
584 }
585 die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
586done:
587 return lw.retval;
588}
589
590static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
591{
592 struct __line_walk_param *lw = data;
593
594 lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
595 if (lw->retval != 0)
596 return DWARF_CB_ABORT;
597
598 return DWARF_CB_OK;
599}
600
601/*
602 * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
603 * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
604 */
605static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
606 void *data)
607{
608 Dwarf_Lines *lines;
609 Dwarf_Line *line;
610 Dwarf_Addr addr;
611 const char *fname;
612 int lineno, ret = 0;
613 Dwarf_Die die_mem, *cu_die;
614 size_t nlines, i;
615
616 /* Get the CU die */
617 if (dwarf_tag(pdie) == DW_TAG_subprogram)
618 cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
619 else
620 cu_die = pdie;
621 if (!cu_die) {
622 pr_debug2("Failed to get CU from subprogram\n");
623 return -EINVAL;
624 }
625
626 /* Get lines list in the CU */
627 if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
628 pr_debug2("Failed to get source lines on this CU.\n");
629 return -ENOENT;
630 }
631 pr_debug2("Get %zd lines from this CU\n", nlines);
632
633 /* Walk on the lines on lines list */
634 for (i = 0; i < nlines; i++) {
635 line = dwarf_onesrcline(lines, i);
636 if (line == NULL ||
637 dwarf_lineno(line, &lineno) != 0 ||
638 dwarf_lineaddr(line, &addr) != 0) {
639 pr_debug2("Failed to get line info. "
640 "Possible error in debuginfo.\n");
641 continue;
642 }
643 /* Filter lines based on address */
644 if (pdie != cu_die)
645 /*
646 * Address filtering
647 * The line is included in given function, and
648 * no inline block includes it.
649 */
650 if (!dwarf_haspc(pdie, addr) ||
651 die_find_inlinefunc(pdie, addr, &die_mem))
652 continue;
653 /* Get source line */
654 fname = dwarf_linesrc(line, NULL, NULL);
655
656 ret = handler(fname, lineno, addr, data);
657 if (ret != 0)
658 return ret;
659 }
660
661 /*
662 * Dwarf lines doesn't include function declarations and inlined
663 * subroutines. We have to check functions list or given function.
664 */
665 if (pdie != cu_die)
666 ret = __die_walk_funclines(pdie, handler, data);
667 else {
668 struct __line_walk_param param = {
669 .handler = handler,
670 .data = data,
671 .retval = 0,
672 };
673 dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
674 ret = param.retval;
675 } 248 }
676 249
677 return ret; 250 return self;
678}
679
680struct __find_variable_param {
681 const char *name;
682 Dwarf_Addr addr;
683};
684
685static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
686{
687 struct __find_variable_param *fvp = data;
688 int tag;
689
690 tag = dwarf_tag(die_mem);
691 if ((tag == DW_TAG_formal_parameter ||
692 tag == DW_TAG_variable) &&
693 die_compare_name(die_mem, fvp->name))
694 return DIE_FIND_CB_FOUND;
695
696 if (dwarf_haspc(die_mem, fvp->addr))
697 return DIE_FIND_CB_CONTINUE;
698 else
699 return DIE_FIND_CB_SIBLING;
700}
701
702/* Find a variable called 'name' at given address */
703static Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
704 Dwarf_Addr addr, Dwarf_Die *die_mem)
705{
706 struct __find_variable_param fvp = { .name = name, .addr = addr};
707
708 return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
709 die_mem);
710}
711
712static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
713{
714 const char *name = data;
715
716 if ((dwarf_tag(die_mem) == DW_TAG_member) &&
717 die_compare_name(die_mem, name))
718 return DIE_FIND_CB_FOUND;
719
720 return DIE_FIND_CB_SIBLING;
721}
722
723/* Find a member called 'name' */
724static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
725 Dwarf_Die *die_mem)
726{
727 return die_find_child(st_die, __die_find_member_cb, (void *)name,
728 die_mem);
729}
730
731/* Get the name of given variable DIE */
732static int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
733{
734 Dwarf_Die type;
735 int tag, ret, ret2;
736 const char *tmp = "";
737
738 if (__die_get_real_type(vr_die, &type) == NULL)
739 return -ENOENT;
740
741 tag = dwarf_tag(&type);
742 if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
743 tmp = "*";
744 else if (tag == DW_TAG_subroutine_type) {
745 /* Function pointer */
746 ret = snprintf(buf, len, "(function_type)");
747 return (ret >= len) ? -E2BIG : ret;
748 } else {
749 if (!dwarf_diename(&type))
750 return -ENOENT;
751 if (tag == DW_TAG_union_type)
752 tmp = "union ";
753 else if (tag == DW_TAG_structure_type)
754 tmp = "struct ";
755 /* Write a base name */
756 ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
757 return (ret >= len) ? -E2BIG : ret;
758 }
759 ret = die_get_typename(&type, buf, len);
760 if (ret > 0) {
761 ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
762 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
763 }
764 return ret;
765} 251}
766 252
767/* Get the name and type of given variable DIE, stored as "type\tname" */ 253void debuginfo__delete(struct debuginfo *self)
768static int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
769{ 254{
770 int ret, ret2; 255 if (self) {
771 256 if (self->dwfl)
772 ret = die_get_typename(vr_die, buf, len); 257 dwfl_end(self->dwfl);
773 if (ret < 0) { 258 free(self);
774 pr_debug("Failed to get type, make it unknown.\n");
775 ret = snprintf(buf, len, "(unknown_type)");
776 } 259 }
777 if (ret > 0) {
778 ret2 = snprintf(buf + ret, len - ret, "\t%s",
779 dwarf_diename(vr_die));
780 ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
781 }
782 return ret;
783} 260}
784 261
785/* 262/*
@@ -897,6 +374,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
897 struct probe_trace_arg_ref **ref_ptr = &tvar->ref; 374 struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
898 Dwarf_Die type; 375 Dwarf_Die type;
899 char buf[16]; 376 char buf[16];
377 int bsize, boffs, total;
900 int ret; 378 int ret;
901 379
902 /* TODO: check all types */ 380 /* TODO: check all types */
@@ -906,11 +384,15 @@ static int convert_variable_type(Dwarf_Die *vr_die,
906 return (tvar->type == NULL) ? -ENOMEM : 0; 384 return (tvar->type == NULL) ? -ENOMEM : 0;
907 } 385 }
908 386
909 if (die_get_bit_size(vr_die) != 0) { 387 bsize = dwarf_bitsize(vr_die);
388 if (bsize > 0) {
910 /* This is a bitfield */ 389 /* This is a bitfield */
911 ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die), 390 boffs = dwarf_bitoffset(vr_die);
912 die_get_bit_offset(vr_die), 391 total = dwarf_bytesize(vr_die);
913 BYTES_TO_BITS(die_get_byte_size(vr_die))); 392 if (boffs < 0 || total < 0)
393 return -ENOENT;
394 ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
395 BYTES_TO_BITS(total));
914 goto formatted; 396 goto formatted;
915 } 397 }
916 398
@@ -958,10 +440,11 @@ static int convert_variable_type(Dwarf_Die *vr_die,
958 return (tvar->type == NULL) ? -ENOMEM : 0; 440 return (tvar->type == NULL) ? -ENOMEM : 0;
959 } 441 }
960 442
961 ret = BYTES_TO_BITS(die_get_byte_size(&type)); 443 ret = dwarf_bytesize(&type);
962 if (!ret) 444 if (ret <= 0)
963 /* No size ... try to use default type */ 445 /* No size ... try to use default type */
964 return 0; 446 return 0;
447 ret = BYTES_TO_BITS(ret);
965 448
966 /* Check the bitwidth */ 449 /* Check the bitwidth */
967 if (ret > MAX_BASIC_TYPE_BITS) { 450 if (ret > MAX_BASIC_TYPE_BITS) {
@@ -1025,7 +508,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
1025 else 508 else
1026 *ref_ptr = ref; 509 *ref_ptr = ref;
1027 } 510 }
1028 ref->offset += die_get_byte_size(&type) * field->index; 511 ref->offset += dwarf_bytesize(&type) * field->index;
1029 if (!field->next) 512 if (!field->next)
1030 /* Save vr_die for converting types */ 513 /* Save vr_die for converting types */
1031 memcpy(die_mem, vr_die, sizeof(*die_mem)); 514 memcpy(die_mem, vr_die, sizeof(*die_mem));
@@ -1245,8 +728,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
1245 728
1246 /* If no real subprogram, find a real one */ 729 /* If no real subprogram, find a real one */
1247 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { 730 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
1248 sp_die = die_find_real_subprogram(&pf->cu_die, 731 sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem);
1249 pf->addr, &die_mem);
1250 if (!sp_die) { 732 if (!sp_die) {
1251 pr_warning("Failed to find probe point in any " 733 pr_warning("Failed to find probe point in any "
1252 "functions.\n"); 734 "functions.\n");
@@ -1504,28 +986,18 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
1504} 986}
1505 987
1506/* Find probe points from debuginfo */ 988/* Find probe points from debuginfo */
1507static int find_probes(int fd, struct probe_finder *pf) 989static int debuginfo__find_probes(struct debuginfo *self,
990 struct probe_finder *pf)
1508{ 991{
1509 struct perf_probe_point *pp = &pf->pev->point; 992 struct perf_probe_point *pp = &pf->pev->point;
1510 Dwarf_Off off, noff; 993 Dwarf_Off off, noff;
1511 size_t cuhl; 994 size_t cuhl;
1512 Dwarf_Die *diep; 995 Dwarf_Die *diep;
1513 Dwarf *dbg = NULL;
1514 Dwfl *dwfl;
1515 Dwarf_Addr bias; /* Currently ignored */
1516 int ret = 0; 996 int ret = 0;
1517 997
1518 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1519 if (!dbg) {
1520 pr_warning("No debug information found in the vmlinux - "
1521 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1522 close(fd); /* Without dwfl_end(), fd isn't closed. */
1523 return -EBADF;
1524 }
1525
1526#if _ELFUTILS_PREREQ(0, 142) 998#if _ELFUTILS_PREREQ(0, 142)
1527 /* Get the call frame information from this dwarf */ 999 /* Get the call frame information from this dwarf */
1528 pf->cfi = dwarf_getcfi(dbg); 1000 pf->cfi = dwarf_getcfi(self->dbg);
1529#endif 1001#endif
1530 1002
1531 off = 0; 1003 off = 0;
@@ -1544,7 +1016,8 @@ static int find_probes(int fd, struct probe_finder *pf)
1544 .data = pf, 1016 .data = pf,
1545 }; 1017 };
1546 1018
1547 dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0); 1019 dwarf_getpubnames(self->dbg, pubname_search_cb,
1020 &pubname_param, 0);
1548 if (pubname_param.found) { 1021 if (pubname_param.found) {
1549 ret = probe_point_search_cb(&pf->sp_die, &probe_param); 1022 ret = probe_point_search_cb(&pf->sp_die, &probe_param);
1550 if (ret) 1023 if (ret)
@@ -1553,9 +1026,9 @@ static int find_probes(int fd, struct probe_finder *pf)
1553 } 1026 }
1554 1027
1555 /* Loop on CUs (Compilation Unit) */ 1028 /* Loop on CUs (Compilation Unit) */
1556 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { 1029 while (!dwarf_nextcu(self->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
1557 /* Get the DIE(Debugging Information Entry) of this CU */ 1030 /* Get the DIE(Debugging Information Entry) of this CU */
1558 diep = dwarf_offdie(dbg, off + cuhl, &pf->cu_die); 1031 diep = dwarf_offdie(self->dbg, off + cuhl, &pf->cu_die);
1559 if (!diep) 1032 if (!diep)
1560 continue; 1033 continue;
1561 1034
@@ -1582,8 +1055,6 @@ static int find_probes(int fd, struct probe_finder *pf)
1582 1055
1583found: 1056found:
1584 line_list__free(&pf->lcache); 1057 line_list__free(&pf->lcache);
1585 if (dwfl)
1586 dwfl_end(dwfl);
1587 1058
1588 return ret; 1059 return ret;
1589} 1060}
@@ -1629,8 +1100,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf)
1629} 1100}
1630 1101
1631/* Find probe_trace_events specified by perf_probe_event from debuginfo */ 1102/* Find probe_trace_events specified by perf_probe_event from debuginfo */
1632int find_probe_trace_events(int fd, struct perf_probe_event *pev, 1103int debuginfo__find_trace_events(struct debuginfo *self,
1633 struct probe_trace_event **tevs, int max_tevs) 1104 struct perf_probe_event *pev,
1105 struct probe_trace_event **tevs, int max_tevs)
1634{ 1106{
1635 struct trace_event_finder tf = { 1107 struct trace_event_finder tf = {
1636 .pf = {.pev = pev, .callback = add_probe_trace_event}, 1108 .pf = {.pev = pev, .callback = add_probe_trace_event},
@@ -1645,7 +1117,7 @@ int find_probe_trace_events(int fd, struct perf_probe_event *pev,
1645 tf.tevs = *tevs; 1117 tf.tevs = *tevs;
1646 tf.ntevs = 0; 1118 tf.ntevs = 0;
1647 1119
1648 ret = find_probes(fd, &tf.pf); 1120 ret = debuginfo__find_probes(self, &tf.pf);
1649 if (ret < 0) { 1121 if (ret < 0) {
1650 free(*tevs); 1122 free(*tevs);
1651 *tevs = NULL; 1123 *tevs = NULL;
@@ -1739,9 +1211,10 @@ out:
1739} 1211}
1740 1212
1741/* Find available variables at given probe point */ 1213/* Find available variables at given probe point */
1742int find_available_vars_at(int fd, struct perf_probe_event *pev, 1214int debuginfo__find_available_vars_at(struct debuginfo *self,
1743 struct variable_list **vls, int max_vls, 1215 struct perf_probe_event *pev,
1744 bool externs) 1216 struct variable_list **vls,
1217 int max_vls, bool externs)
1745{ 1218{
1746 struct available_var_finder af = { 1219 struct available_var_finder af = {
1747 .pf = {.pev = pev, .callback = add_available_vars}, 1220 .pf = {.pev = pev, .callback = add_available_vars},
@@ -1756,7 +1229,7 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,
1756 af.vls = *vls; 1229 af.vls = *vls;
1757 af.nvls = 0; 1230 af.nvls = 0;
1758 1231
1759 ret = find_probes(fd, &af.pf); 1232 ret = debuginfo__find_probes(self, &af.pf);
1760 if (ret < 0) { 1233 if (ret < 0) {
1761 /* Free vlist for error */ 1234 /* Free vlist for error */
1762 while (af.nvls--) { 1235 while (af.nvls--) {
@@ -1774,28 +1247,19 @@ int find_available_vars_at(int fd, struct perf_probe_event *pev,
1774} 1247}
1775 1248
1776/* Reverse search */ 1249/* Reverse search */
1777int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt) 1250int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr,
1251 struct perf_probe_point *ppt)
1778{ 1252{
1779 Dwarf_Die cudie, spdie, indie; 1253 Dwarf_Die cudie, spdie, indie;
1780 Dwarf *dbg = NULL; 1254 Dwarf_Addr _addr, baseaddr;
1781 Dwfl *dwfl = NULL;
1782 Dwarf_Addr _addr, baseaddr, bias = 0;
1783 const char *fname = NULL, *func = NULL, *tmp; 1255 const char *fname = NULL, *func = NULL, *tmp;
1784 int baseline = 0, lineno = 0, ret = 0; 1256 int baseline = 0, lineno = 0, ret = 0;
1785 1257
1786 /* Open the live linux kernel */
1787 dbg = dwfl_init_live_kernel_dwarf(addr, &dwfl, &bias);
1788 if (!dbg) {
1789 pr_warning("No debug information found in the vmlinux - "
1790 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1791 ret = -EINVAL;
1792 goto end;
1793 }
1794
1795 /* Adjust address with bias */ 1258 /* Adjust address with bias */
1796 addr += bias; 1259 addr += self->bias;
1260
1797 /* Find cu die */ 1261 /* Find cu die */
1798 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr - bias, &cudie)) { 1262 if (!dwarf_addrdie(self->dbg, (Dwarf_Addr)addr - self->bias, &cudie)) {
1799 pr_warning("Failed to find debug information for address %lx\n", 1263 pr_warning("Failed to find debug information for address %lx\n",
1800 addr); 1264 addr);
1801 ret = -EINVAL; 1265 ret = -EINVAL;
@@ -1807,7 +1271,7 @@ int find_perf_probe_point(unsigned long addr, struct perf_probe_point *ppt)
1807 /* Don't care whether it failed or not */ 1271 /* Don't care whether it failed or not */
1808 1272
1809 /* Find a corresponding function (name, baseline and baseaddr) */ 1273 /* Find a corresponding function (name, baseline and baseaddr) */
1810 if (die_find_real_subprogram(&cudie, (Dwarf_Addr)addr, &spdie)) { 1274 if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
1811 /* Get function entry information */ 1275 /* Get function entry information */
1812 tmp = dwarf_diename(&spdie); 1276 tmp = dwarf_diename(&spdie);
1813 if (!tmp || 1277 if (!tmp ||
@@ -1871,8 +1335,6 @@ post:
1871 } 1335 }
1872 } 1336 }
1873end: 1337end:
1874 if (dwfl)
1875 dwfl_end(dwfl);
1876 if (ret == 0 && (fname || func)) 1338 if (ret == 0 && (fname || func))
1877 ret = 1; /* Found a point */ 1339 ret = 1; /* Found a point */
1878 return ret; 1340 return ret;
@@ -1982,26 +1444,15 @@ static int find_line_range_by_func(struct line_finder *lf)
1982 return param.retval; 1444 return param.retval;
1983} 1445}
1984 1446
1985int find_line_range(int fd, struct line_range *lr) 1447int debuginfo__find_line_range(struct debuginfo *self, struct line_range *lr)
1986{ 1448{
1987 struct line_finder lf = {.lr = lr, .found = 0}; 1449 struct line_finder lf = {.lr = lr, .found = 0};
1988 int ret = 0; 1450 int ret = 0;
1989 Dwarf_Off off = 0, noff; 1451 Dwarf_Off off = 0, noff;
1990 size_t cuhl; 1452 size_t cuhl;
1991 Dwarf_Die *diep; 1453 Dwarf_Die *diep;
1992 Dwarf *dbg = NULL;
1993 Dwfl *dwfl;
1994 Dwarf_Addr bias; /* Currently ignored */
1995 const char *comp_dir; 1454 const char *comp_dir;
1996 1455
1997 dbg = dwfl_init_offline_dwarf(fd, &dwfl, &bias);
1998 if (!dbg) {
1999 pr_warning("No debug information found in the vmlinux - "
2000 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
2001 close(fd); /* Without dwfl_end(), fd isn't closed. */
2002 return -EBADF;
2003 }
2004
2005 /* Fastpath: lookup by function name from .debug_pubnames section */ 1456 /* Fastpath: lookup by function name from .debug_pubnames section */
2006 if (lr->function) { 1457 if (lr->function) {
2007 struct pubname_callback_param pubname_param = { 1458 struct pubname_callback_param pubname_param = {
@@ -2010,7 +1461,8 @@ int find_line_range(int fd, struct line_range *lr)
2010 struct dwarf_callback_param line_range_param = { 1461 struct dwarf_callback_param line_range_param = {
2011 .data = (void *)&lf, .retval = 0}; 1462 .data = (void *)&lf, .retval = 0};
2012 1463
2013 dwarf_getpubnames(dbg, pubname_search_cb, &pubname_param, 0); 1464 dwarf_getpubnames(self->dbg, pubname_search_cb,
1465 &pubname_param, 0);
2014 if (pubname_param.found) { 1466 if (pubname_param.found) {
2015 line_range_search_cb(&lf.sp_die, &line_range_param); 1467 line_range_search_cb(&lf.sp_die, &line_range_param);
2016 if (lf.found) 1468 if (lf.found)
@@ -2020,11 +1472,12 @@ int find_line_range(int fd, struct line_range *lr)
2020 1472
2021 /* Loop on CUs (Compilation Unit) */ 1473 /* Loop on CUs (Compilation Unit) */
2022 while (!lf.found && ret >= 0) { 1474 while (!lf.found && ret >= 0) {
2023 if (dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0) 1475 if (dwarf_nextcu(self->dbg, off, &noff, &cuhl,
1476 NULL, NULL, NULL) != 0)
2024 break; 1477 break;
2025 1478
2026 /* Get the DIE(Debugging Information Entry) of this CU */ 1479 /* Get the DIE(Debugging Information Entry) of this CU */
2027 diep = dwarf_offdie(dbg, off + cuhl, &lf.cu_die); 1480 diep = dwarf_offdie(self->dbg, off + cuhl, &lf.cu_die);
2028 if (!diep) 1481 if (!diep)
2029 continue; 1482 continue;
2030 1483
@@ -2058,7 +1511,6 @@ found:
2058 } 1511 }
2059 1512
2060 pr_debug("path: %s\n", lr->path); 1513 pr_debug("path: %s\n", lr->path);
2061 dwfl_end(dwfl);
2062 return (ret < 0) ? ret : lf.found; 1514 return (ret < 0) ? ret : lf.found;
2063} 1515}
2064 1516
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 605730a366db..c478b42a2473 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -16,27 +16,42 @@ static inline int is_c_varname(const char *name)
16} 16}
17 17
18#ifdef DWARF_SUPPORT 18#ifdef DWARF_SUPPORT
19
20#include "dwarf-aux.h"
21
22/* TODO: export debuginfo data structure even if no dwarf support */
23
24/* debug information structure */
25struct debuginfo {
26 Dwarf *dbg;
27 Dwfl *dwfl;
28 Dwarf_Addr bias;
29};
30
31extern struct debuginfo *debuginfo__new(const char *path);
32extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr);
33extern void debuginfo__delete(struct debuginfo *self);
34
19/* Find probe_trace_events specified by perf_probe_event from debuginfo */ 35/* Find probe_trace_events specified by perf_probe_event from debuginfo */
20extern int find_probe_trace_events(int fd, struct perf_probe_event *pev, 36extern int debuginfo__find_trace_events(struct debuginfo *self,
21 struct probe_trace_event **tevs, 37 struct perf_probe_event *pev,
22 int max_tevs); 38 struct probe_trace_event **tevs,
39 int max_tevs);
23 40
24/* Find a perf_probe_point from debuginfo */ 41/* Find a perf_probe_point from debuginfo */
25extern int find_perf_probe_point(unsigned long addr, 42extern int debuginfo__find_probe_point(struct debuginfo *self,
26 struct perf_probe_point *ppt); 43 unsigned long addr,
44 struct perf_probe_point *ppt);
27 45
28/* Find a line range */ 46/* Find a line range */
29extern int find_line_range(int fd, struct line_range *lr); 47extern int debuginfo__find_line_range(struct debuginfo *self,
48 struct line_range *lr);
30 49
31/* Find available variables */ 50/* Find available variables */
32extern int find_available_vars_at(int fd, struct perf_probe_event *pev, 51extern int debuginfo__find_available_vars_at(struct debuginfo *self,
33 struct variable_list **vls, int max_points, 52 struct perf_probe_event *pev,
34 bool externs); 53 struct variable_list **vls,
35 54 int max_points, bool externs);
36#include <dwarf.h>
37#include <elfutils/libdw.h>
38#include <elfutils/libdwfl.h>
39#include <elfutils/version.h>
40 55
41struct probe_finder { 56struct probe_finder {
42 struct perf_probe_event *pev; /* Target probe event */ 57 struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index a9ac0504aabd..8e0b5a39d8a7 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -247,7 +247,7 @@ struct pyrf_cpu_map {
247static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, 247static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
248 PyObject *args, PyObject *kwargs) 248 PyObject *args, PyObject *kwargs)
249{ 249{
250 static char *kwlist[] = { "cpustr", NULL, NULL, }; 250 static char *kwlist[] = { "cpustr", NULL };
251 char *cpustr = NULL; 251 char *cpustr = NULL;
252 252
253 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s", 253 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
@@ -316,7 +316,7 @@ struct pyrf_thread_map {
316static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, 316static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
317 PyObject *args, PyObject *kwargs) 317 PyObject *args, PyObject *kwargs)
318{ 318{
319 static char *kwlist[] = { "pid", "tid", NULL, NULL, }; 319 static char *kwlist[] = { "pid", "tid", NULL };
320 int pid = -1, tid = -1; 320 int pid = -1, tid = -1;
321 321
322 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", 322 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
@@ -418,7 +418,9 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
418 "wakeup_events", 418 "wakeup_events",
419 "bp_type", 419 "bp_type",
420 "bp_addr", 420 "bp_addr",
421 "bp_len", NULL, NULL, }; 421 "bp_len",
422 NULL
423 };
422 u64 sample_period = 0; 424 u64 sample_period = 0;
423 u32 disabled = 0, 425 u32 disabled = 0,
424 inherit = 0, 426 inherit = 0,
@@ -499,7 +501,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
499 struct thread_map *threads = NULL; 501 struct thread_map *threads = NULL;
500 PyObject *pcpus = NULL, *pthreads = NULL; 502 PyObject *pcpus = NULL, *pthreads = NULL;
501 int group = 0, inherit = 0; 503 int group = 0, inherit = 0;
502 static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL}; 504 static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
503 505
504 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, 506 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
505 &pcpus, &pthreads, &group, &inherit)) 507 &pcpus, &pthreads, &group, &inherit))
@@ -582,8 +584,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
582 PyObject *args, PyObject *kwargs) 584 PyObject *args, PyObject *kwargs)
583{ 585{
584 struct perf_evlist *evlist = &pevlist->evlist; 586 struct perf_evlist *evlist = &pevlist->evlist;
585 static char *kwlist[] = {"pages", "overwrite", 587 static char *kwlist[] = { "pages", "overwrite", NULL };
586 NULL, NULL};
587 int pages = 128, overwrite = false; 588 int pages = 128, overwrite = false;
588 589
589 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist, 590 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
@@ -603,7 +604,7 @@ static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
603 PyObject *args, PyObject *kwargs) 604 PyObject *args, PyObject *kwargs)
604{ 605{
605 struct perf_evlist *evlist = &pevlist->evlist; 606 struct perf_evlist *evlist = &pevlist->evlist;
606 static char *kwlist[] = {"timeout", NULL, NULL}; 607 static char *kwlist[] = { "timeout", NULL };
607 int timeout = -1, n; 608 int timeout = -1, n;
608 609
609 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout)) 610 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
@@ -674,7 +675,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
674 struct perf_evlist *evlist = &pevlist->evlist; 675 struct perf_evlist *evlist = &pevlist->evlist;
675 union perf_event *event; 676 union perf_event *event;
676 int sample_id_all = 1, cpu; 677 int sample_id_all = 1, cpu;
677 static char *kwlist[] = {"cpu", "sample_id_all", NULL, NULL}; 678 static char *kwlist[] = { "cpu", "sample_id_all", NULL };
678 int err; 679 int err;
679 680
680 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, 681 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f5a8fbdd3f76..72458d9da5b1 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -12,6 +12,7 @@
12#include "session.h" 12#include "session.h"
13#include "sort.h" 13#include "sort.h"
14#include "util.h" 14#include "util.h"
15#include "cpumap.h"
15 16
16static int perf_session__open(struct perf_session *self, bool force) 17static int perf_session__open(struct perf_session *self, bool force)
17{ 18{
@@ -247,9 +248,14 @@ int perf_session__resolve_callchain(struct perf_session *self,
247 callchain_cursor_reset(&self->callchain_cursor); 248 callchain_cursor_reset(&self->callchain_cursor);
248 249
249 for (i = 0; i < chain->nr; i++) { 250 for (i = 0; i < chain->nr; i++) {
250 u64 ip = chain->ips[i]; 251 u64 ip;
251 struct addr_location al; 252 struct addr_location al;
252 253
254 if (callchain_param.order == ORDER_CALLEE)
255 ip = chain->ips[i];
256 else
257 ip = chain->ips[chain->nr - i - 1];
258
253 if (ip >= PERF_CONTEXT_MAX) { 259 if (ip >= PERF_CONTEXT_MAX) {
254 switch (ip) { 260 switch (ip) {
255 case PERF_CONTEXT_HV: 261 case PERF_CONTEXT_HV:
@@ -407,20 +413,26 @@ static void perf_event__read_swap(union perf_event *event)
407 event->read.id = bswap_64(event->read.id); 413 event->read.id = bswap_64(event->read.id);
408} 414}
409 415
410static void perf_event__attr_swap(union perf_event *event) 416/* exported for swapping attributes in file header */
417void perf_event__attr_swap(struct perf_event_attr *attr)
418{
419 attr->type = bswap_32(attr->type);
420 attr->size = bswap_32(attr->size);
421 attr->config = bswap_64(attr->config);
422 attr->sample_period = bswap_64(attr->sample_period);
423 attr->sample_type = bswap_64(attr->sample_type);
424 attr->read_format = bswap_64(attr->read_format);
425 attr->wakeup_events = bswap_32(attr->wakeup_events);
426 attr->bp_type = bswap_32(attr->bp_type);
427 attr->bp_addr = bswap_64(attr->bp_addr);
428 attr->bp_len = bswap_64(attr->bp_len);
429}
430
431static void perf_event__hdr_attr_swap(union perf_event *event)
411{ 432{
412 size_t size; 433 size_t size;
413 434
414 event->attr.attr.type = bswap_32(event->attr.attr.type); 435 perf_event__attr_swap(&event->attr.attr);
415 event->attr.attr.size = bswap_32(event->attr.attr.size);
416 event->attr.attr.config = bswap_64(event->attr.attr.config);
417 event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period);
418 event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type);
419 event->attr.attr.read_format = bswap_64(event->attr.attr.read_format);
420 event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events);
421 event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type);
422 event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr);
423 event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len);
424 436
425 size = event->header.size; 437 size = event->header.size;
426 size -= (void *)&event->attr.id - (void *)event; 438 size -= (void *)&event->attr.id - (void *)event;
@@ -448,7 +460,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
448 [PERF_RECORD_LOST] = perf_event__all64_swap, 460 [PERF_RECORD_LOST] = perf_event__all64_swap,
449 [PERF_RECORD_READ] = perf_event__read_swap, 461 [PERF_RECORD_READ] = perf_event__read_swap,
450 [PERF_RECORD_SAMPLE] = perf_event__all64_swap, 462 [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
451 [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap, 463 [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
452 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, 464 [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
453 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, 465 [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
454 [PERF_RECORD_HEADER_BUILD_ID] = NULL, 466 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
@@ -708,9 +720,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event,
708 if (!dump_trace) 720 if (!dump_trace)
709 return; 721 return;
710 722
711 printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 "\n", 723 printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
712 event->header.misc, sample->pid, sample->tid, sample->ip, 724 event->header.misc, sample->pid, sample->tid, sample->ip,
713 sample->period); 725 sample->period, sample->addr);
714 726
715 if (session->sample_type & PERF_SAMPLE_CALLCHAIN) 727 if (session->sample_type & PERF_SAMPLE_CALLCHAIN)
716 callchain__printf(sample); 728 callchain__printf(sample);
@@ -1202,9 +1214,10 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1202 return NULL; 1214 return NULL;
1203} 1215}
1204 1216
1205void perf_session__print_symbols(union perf_event *event, 1217void perf_session__print_ip(union perf_event *event,
1206 struct perf_sample *sample, 1218 struct perf_sample *sample,
1207 struct perf_session *session) 1219 struct perf_session *session,
1220 int print_sym, int print_dso)
1208{ 1221{
1209 struct addr_location al; 1222 struct addr_location al;
1210 const char *symname, *dsoname; 1223 const char *symname, *dsoname;
@@ -1233,32 +1246,83 @@ void perf_session__print_symbols(union perf_event *event,
1233 if (!node) 1246 if (!node)
1234 break; 1247 break;
1235 1248
1236 if (node->sym && node->sym->name) 1249 printf("\t%16" PRIx64, node->ip);
1237 symname = node->sym->name; 1250 if (print_sym) {
1251 if (node->sym && node->sym->name)
1252 symname = node->sym->name;
1253 else
1254 symname = "";
1255
1256 printf(" %s", symname);
1257 }
1258 if (print_dso) {
1259 if (node->map && node->map->dso && node->map->dso->name)
1260 dsoname = node->map->dso->name;
1261 else
1262 dsoname = "";
1263
1264 printf(" (%s)", dsoname);
1265 }
1266 printf("\n");
1267
1268 callchain_cursor_advance(cursor);
1269 }
1270
1271 } else {
1272 printf("%16" PRIx64, sample->ip);
1273 if (print_sym) {
1274 if (al.sym && al.sym->name)
1275 symname = al.sym->name;
1238 else 1276 else
1239 symname = ""; 1277 symname = "";
1240 1278
1241 if (node->map && node->map->dso && node->map->dso->name) 1279 printf(" %s", symname);
1242 dsoname = node->map->dso->name; 1280 }
1281
1282 if (print_dso) {
1283 if (al.map && al.map->dso && al.map->dso->name)
1284 dsoname = al.map->dso->name;
1243 else 1285 else
1244 dsoname = ""; 1286 dsoname = "";
1245 1287
1246 printf("\t%16" PRIx64 " %s (%s)\n", node->ip, symname, dsoname); 1288 printf(" (%s)", dsoname);
1289 }
1290 }
1291}
1247 1292
1248 callchain_cursor_advance(cursor); 1293int perf_session__cpu_bitmap(struct perf_session *session,
1294 const char *cpu_list, unsigned long *cpu_bitmap)
1295{
1296 int i;
1297 struct cpu_map *map;
1298
1299 for (i = 0; i < PERF_TYPE_MAX; ++i) {
1300 struct perf_evsel *evsel;
1301
1302 evsel = perf_session__find_first_evtype(session, i);
1303 if (!evsel)
1304 continue;
1305
1306 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
1307 pr_err("File does not contain CPU events. "
1308 "Remove -c option to proceed.\n");
1309 return -1;
1249 } 1310 }
1311 }
1250 1312
1251 } else { 1313 map = cpu_map__new(cpu_list);
1252 if (al.sym && al.sym->name)
1253 symname = al.sym->name;
1254 else
1255 symname = "";
1256 1314
1257 if (al.map && al.map->dso && al.map->dso->name) 1315 for (i = 0; i < map->nr; i++) {
1258 dsoname = al.map->dso->name; 1316 int cpu = map->map[i];
1259 else 1317
1260 dsoname = ""; 1318 if (cpu >= MAX_NR_CPUS) {
1319 pr_err("Requested CPU %d too large. "
1320 "Consider raising MAX_NR_CPUS\n", cpu);
1321 return -1;
1322 }
1261 1323
1262 printf("%16" PRIx64 " %s (%s)", al.addr, symname, dsoname); 1324 set_bit(cpu, cpu_bitmap);
1263 } 1325 }
1326
1327 return 0;
1264} 1328}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 66d4e1490879..170601e67d6b 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -112,6 +112,7 @@ int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
112 u64 addr); 112 u64 addr);
113 113
114void mem_bswap_64(void *src, int byte_size); 114void mem_bswap_64(void *src, int byte_size);
115void perf_event__attr_swap(struct perf_event_attr *attr);
115 116
116int perf_session__create_kernel_maps(struct perf_session *self); 117int perf_session__create_kernel_maps(struct perf_session *self);
117 118
@@ -167,8 +168,12 @@ static inline int perf_session__parse_sample(struct perf_session *session,
167struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, 168struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
168 unsigned int type); 169 unsigned int type);
169 170
170void perf_session__print_symbols(union perf_event *event, 171void perf_session__print_ip(union perf_event *event,
171 struct perf_sample *sample, 172 struct perf_sample *sample,
172 struct perf_session *session); 173 struct perf_session *session,
174 int print_sym, int print_dso);
175
176int perf_session__cpu_bitmap(struct perf_session *session,
177 const char *cpu_list, unsigned long *cpu_bitmap);
173 178
174#endif /* __PERF_SESSION_H */ 179#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f44fa541d56e..401e220566fd 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -15,95 +15,6 @@ char * field_sep;
15 15
16LIST_HEAD(hist_entry__sort_list); 16LIST_HEAD(hist_entry__sort_list);
17 17
18static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
19 size_t size, unsigned int width);
20static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
21 size_t size, unsigned int width);
22static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
23 size_t size, unsigned int width);
24static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
25 size_t size, unsigned int width);
26static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
27 size_t size, unsigned int width);
28static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
29 size_t size, unsigned int width);
30
31struct sort_entry sort_thread = {
32 .se_header = "Command: Pid",
33 .se_cmp = sort__thread_cmp,
34 .se_snprintf = hist_entry__thread_snprintf,
35 .se_width_idx = HISTC_THREAD,
36};
37
38struct sort_entry sort_comm = {
39 .se_header = "Command",
40 .se_cmp = sort__comm_cmp,
41 .se_collapse = sort__comm_collapse,
42 .se_snprintf = hist_entry__comm_snprintf,
43 .se_width_idx = HISTC_COMM,
44};
45
46struct sort_entry sort_dso = {
47 .se_header = "Shared Object",
48 .se_cmp = sort__dso_cmp,
49 .se_snprintf = hist_entry__dso_snprintf,
50 .se_width_idx = HISTC_DSO,
51};
52
53struct sort_entry sort_sym = {
54 .se_header = "Symbol",
55 .se_cmp = sort__sym_cmp,
56 .se_snprintf = hist_entry__sym_snprintf,
57 .se_width_idx = HISTC_SYMBOL,
58};
59
60struct sort_entry sort_parent = {
61 .se_header = "Parent symbol",
62 .se_cmp = sort__parent_cmp,
63 .se_snprintf = hist_entry__parent_snprintf,
64 .se_width_idx = HISTC_PARENT,
65};
66
67struct sort_entry sort_cpu = {
68 .se_header = "CPU",
69 .se_cmp = sort__cpu_cmp,
70 .se_snprintf = hist_entry__cpu_snprintf,
71 .se_width_idx = HISTC_CPU,
72};
73
74struct sort_dimension {
75 const char *name;
76 struct sort_entry *entry;
77 int taken;
78};
79
80static struct sort_dimension sort_dimensions[] = {
81 { .name = "pid", .entry = &sort_thread, },
82 { .name = "comm", .entry = &sort_comm, },
83 { .name = "dso", .entry = &sort_dso, },
84 { .name = "symbol", .entry = &sort_sym, },
85 { .name = "parent", .entry = &sort_parent, },
86 { .name = "cpu", .entry = &sort_cpu, },
87};
88
89int64_t cmp_null(void *l, void *r)
90{
91 if (!l && !r)
92 return 0;
93 else if (!l)
94 return -1;
95 else
96 return 1;
97}
98
99/* --sort pid */
100
101int64_t
102sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
103{
104 return right->thread->pid - left->thread->pid;
105}
106
107static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) 18static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
108{ 19{
109 int n; 20 int n;
@@ -125,6 +36,24 @@ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
125 return n; 36 return n;
126} 37}
127 38
39static int64_t cmp_null(void *l, void *r)
40{
41 if (!l && !r)
42 return 0;
43 else if (!l)
44 return -1;
45 else
46 return 1;
47}
48
49/* --sort pid */
50
51static int64_t
52sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
53{
54 return right->thread->pid - left->thread->pid;
55}
56
128static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf, 57static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
129 size_t size, unsigned int width) 58 size_t size, unsigned int width)
130{ 59{
@@ -132,15 +61,50 @@ static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
132 self->thread->comm ?: "", self->thread->pid); 61 self->thread->comm ?: "", self->thread->pid);
133} 62}
134 63
64struct sort_entry sort_thread = {
65 .se_header = "Command: Pid",
66 .se_cmp = sort__thread_cmp,
67 .se_snprintf = hist_entry__thread_snprintf,
68 .se_width_idx = HISTC_THREAD,
69};
70
71/* --sort comm */
72
73static int64_t
74sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
75{
76 return right->thread->pid - left->thread->pid;
77}
78
79static int64_t
80sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
81{
82 char *comm_l = left->thread->comm;
83 char *comm_r = right->thread->comm;
84
85 if (!comm_l || !comm_r)
86 return cmp_null(comm_l, comm_r);
87
88 return strcmp(comm_l, comm_r);
89}
90
135static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, 91static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
136 size_t size, unsigned int width) 92 size_t size, unsigned int width)
137{ 93{
138 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); 94 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
139} 95}
140 96
97struct sort_entry sort_comm = {
98 .se_header = "Command",
99 .se_cmp = sort__comm_cmp,
100 .se_collapse = sort__comm_collapse,
101 .se_snprintf = hist_entry__comm_snprintf,
102 .se_width_idx = HISTC_COMM,
103};
104
141/* --sort dso */ 105/* --sort dso */
142 106
143int64_t 107static int64_t
144sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) 108sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
145{ 109{
146 struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; 110 struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
@@ -173,9 +137,16 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
173 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); 137 return repsep_snprintf(bf, size, "%-*s", width, "[unknown]");
174} 138}
175 139
140struct sort_entry sort_dso = {
141 .se_header = "Shared Object",
142 .se_cmp = sort__dso_cmp,
143 .se_snprintf = hist_entry__dso_snprintf,
144 .se_width_idx = HISTC_DSO,
145};
146
176/* --sort symbol */ 147/* --sort symbol */
177 148
178int64_t 149static int64_t
179sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) 150sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
180{ 151{
181 u64 ip_l, ip_r; 152 u64 ip_l, ip_r;
@@ -211,29 +182,16 @@ static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
211 return ret; 182 return ret;
212} 183}
213 184
214/* --sort comm */ 185struct sort_entry sort_sym = {
215 186 .se_header = "Symbol",
216int64_t 187 .se_cmp = sort__sym_cmp,
217sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) 188 .se_snprintf = hist_entry__sym_snprintf,
218{ 189 .se_width_idx = HISTC_SYMBOL,
219 return right->thread->pid - left->thread->pid; 190};
220}
221
222int64_t
223sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
224{
225 char *comm_l = left->thread->comm;
226 char *comm_r = right->thread->comm;
227
228 if (!comm_l || !comm_r)
229 return cmp_null(comm_l, comm_r);
230
231 return strcmp(comm_l, comm_r);
232}
233 191
234/* --sort parent */ 192/* --sort parent */
235 193
236int64_t 194static int64_t
237sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) 195sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
238{ 196{
239 struct symbol *sym_l = left->parent; 197 struct symbol *sym_l = left->parent;
@@ -252,9 +210,16 @@ static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
252 self->parent ? self->parent->name : "[other]"); 210 self->parent ? self->parent->name : "[other]");
253} 211}
254 212
213struct sort_entry sort_parent = {
214 .se_header = "Parent symbol",
215 .se_cmp = sort__parent_cmp,
216 .se_snprintf = hist_entry__parent_snprintf,
217 .se_width_idx = HISTC_PARENT,
218};
219
255/* --sort cpu */ 220/* --sort cpu */
256 221
257int64_t 222static int64_t
258sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right) 223sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
259{ 224{
260 return right->cpu - left->cpu; 225 return right->cpu - left->cpu;
@@ -266,6 +231,28 @@ static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf,
266 return repsep_snprintf(bf, size, "%-*d", width, self->cpu); 231 return repsep_snprintf(bf, size, "%-*d", width, self->cpu);
267} 232}
268 233
234struct sort_entry sort_cpu = {
235 .se_header = "CPU",
236 .se_cmp = sort__cpu_cmp,
237 .se_snprintf = hist_entry__cpu_snprintf,
238 .se_width_idx = HISTC_CPU,
239};
240
241struct sort_dimension {
242 const char *name;
243 struct sort_entry *entry;
244 int taken;
245};
246
247static struct sort_dimension sort_dimensions[] = {
248 { .name = "pid", .entry = &sort_thread, },
249 { .name = "comm", .entry = &sort_comm, },
250 { .name = "dso", .entry = &sort_dso, },
251 { .name = "symbol", .entry = &sort_sym, },
252 { .name = "parent", .entry = &sort_parent, },
253 { .name = "cpu", .entry = &sort_cpu, },
254};
255
269int sort_dimension__add(const char *tok) 256int sort_dimension__add(const char *tok)
270{ 257{
271 unsigned int i; 258 unsigned int i;
@@ -273,15 +260,9 @@ int sort_dimension__add(const char *tok)
273 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { 260 for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) {
274 struct sort_dimension *sd = &sort_dimensions[i]; 261 struct sort_dimension *sd = &sort_dimensions[i];
275 262
276 if (sd->taken)
277 continue;
278
279 if (strncasecmp(tok, sd->name, strlen(tok))) 263 if (strncasecmp(tok, sd->name, strlen(tok)))
280 continue; 264 continue;
281 265
282 if (sd->entry->se_collapse)
283 sort__need_collapse = 1;
284
285 if (sd->entry == &sort_parent) { 266 if (sd->entry == &sort_parent) {
286 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); 267 int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
287 if (ret) { 268 if (ret) {
@@ -294,6 +275,12 @@ int sort_dimension__add(const char *tok)
294 sort__has_parent = 1; 275 sort__has_parent = 1;
295 } 276 }
296 277
278 if (sd->taken)
279 return 0;
280
281 if (sd->entry->se_collapse)
282 sort__need_collapse = 1;
283
297 if (list_empty(&hist_entry__sort_list)) { 284 if (list_empty(&hist_entry__sort_list)) {
298 if (!strcmp(sd->name, "pid")) 285 if (!strcmp(sd->name, "pid"))
299 sort__first_dimension = SORT_PID; 286 sort__first_dimension = SORT_PID;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 0b91053a7d11..77d0388ad415 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -103,20 +103,6 @@ extern struct sort_entry sort_thread;
103extern struct list_head hist_entry__sort_list; 103extern struct list_head hist_entry__sort_list;
104 104
105void setup_sorting(const char * const usagestr[], const struct option *opts); 105void setup_sorting(const char * const usagestr[], const struct option *opts);
106
107extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
108extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
109extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
110extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used);
111extern int64_t cmp_null(void *, void *);
112extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *);
113extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *);
114extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *);
115extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *);
116extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *);
117extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *);
118int64_t sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right);
119extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int);
120extern int sort_dimension__add(const char *); 106extern int sort_dimension__add(const char *);
121void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, 107void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list,
122 const char *list_name, FILE *fp); 108 const char *list_name, FILE *fp);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index b9a985dadd08..d5836382ff2c 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -294,3 +294,22 @@ bool strlazymatch(const char *str, const char *pat)
294{ 294{
295 return __match_glob(str, pat, true); 295 return __match_glob(str, pat, true);
296} 296}
297
298/**
299 * strtailcmp - Compare the tail of two strings
300 * @s1: 1st string to be compared
301 * @s2: 2nd string to be compared
302 *
303 * Return 0 if whole of either string is same as another's tail part.
304 */
305int strtailcmp(const char *s1, const char *s2)
306{
307 int i1 = strlen(s1);
308 int i2 = strlen(s2);
309 while (--i1 >= 0 && --i2 >= 0) {
310 if (s1[i1] != s2[i2])
311 return s1[i1] - s2[i2];
312 }
313 return 0;
314}
315
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 35729f4c40cb..3403f814ad72 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -183,106 +183,59 @@ int bigendian(void)
183 return *ptr == 0x01020304; 183 return *ptr == 0x01020304;
184} 184}
185 185
186static unsigned long long copy_file_fd(int fd) 186/* unfortunately, you can not stat debugfs or proc files for size */
187static void record_file(const char *file, size_t hdr_sz)
187{ 188{
188 unsigned long long size = 0; 189 unsigned long long size = 0;
189 char buf[BUFSIZ]; 190 char buf[BUFSIZ], *sizep;
190 int r; 191 off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
191 192 int r, fd;
192 do {
193 r = read(fd, buf, BUFSIZ);
194 if (r > 0) {
195 size += r;
196 write_or_die(buf, r);
197 }
198 } while (r > 0);
199
200 return size;
201}
202
203static unsigned long long copy_file(const char *file)
204{
205 unsigned long long size = 0;
206 int fd;
207 193
208 fd = open(file, O_RDONLY); 194 fd = open(file, O_RDONLY);
209 if (fd < 0) 195 if (fd < 0)
210 die("Can't read '%s'", file); 196 die("Can't read '%s'", file);
211 size = copy_file_fd(fd);
212 close(fd);
213 197
214 return size; 198 /* put in zeros for file size, then fill true size later */
215} 199 write_or_die(&size, hdr_sz);
216
217static unsigned long get_size_fd(int fd)
218{
219 unsigned long long size = 0;
220 char buf[BUFSIZ];
221 int r;
222 200
223 do { 201 do {
224 r = read(fd, buf, BUFSIZ); 202 r = read(fd, buf, BUFSIZ);
225 if (r > 0) 203 if (r > 0) {
226 size += r; 204 size += r;
205 write_or_die(buf, r);
206 }
227 } while (r > 0); 207 } while (r > 0);
228
229 lseek(fd, 0, SEEK_SET);
230
231 return size;
232}
233
234static unsigned long get_size(const char *file)
235{
236 unsigned long long size = 0;
237 int fd;
238
239 fd = open(file, O_RDONLY);
240 if (fd < 0)
241 die("Can't read '%s'", file);
242 size = get_size_fd(fd);
243 close(fd); 208 close(fd);
244 209
245 return size; 210 /* ugh, handle big-endian hdr_size == 4 */
211 sizep = (char*)&size;
212 if (bigendian())
213 sizep += sizeof(u64) - hdr_sz;
214
215 if (pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0)
216 die("writing to %s", output_file);
246} 217}
247 218
248static void read_header_files(void) 219static void read_header_files(void)
249{ 220{
250 unsigned long long size, check_size;
251 char *path; 221 char *path;
252 int fd; 222 struct stat st;
253 223
254 path = get_tracing_file("events/header_page"); 224 path = get_tracing_file("events/header_page");
255 fd = open(path, O_RDONLY); 225 if (stat(path, &st) < 0)
256 if (fd < 0)
257 die("can't read '%s'", path); 226 die("can't read '%s'", path);
258 227
259 /* unfortunately, you can not stat debugfs files for size */
260 size = get_size_fd(fd);
261
262 write_or_die("header_page", 12); 228 write_or_die("header_page", 12);
263 write_or_die(&size, 8); 229 record_file(path, 8);
264 check_size = copy_file_fd(fd);
265 close(fd);
266
267 if (size != check_size)
268 die("wrong size for '%s' size=%lld read=%lld",
269 path, size, check_size);
270 put_tracing_file(path); 230 put_tracing_file(path);
271 231
272 path = get_tracing_file("events/header_event"); 232 path = get_tracing_file("events/header_event");
273 fd = open(path, O_RDONLY); 233 if (stat(path, &st) < 0)
274 if (fd < 0)
275 die("can't read '%s'", path); 234 die("can't read '%s'", path);
276 235
277 size = get_size_fd(fd);
278
279 write_or_die("header_event", 13); 236 write_or_die("header_event", 13);
280 write_or_die(&size, 8); 237 record_file(path, 8);
281 check_size = copy_file_fd(fd);
282 if (size != check_size)
283 die("wrong size for '%s'", path);
284 put_tracing_file(path); 238 put_tracing_file(path);
285 close(fd);
286} 239}
287 240
288static bool name_in_tp_list(char *sys, struct tracepoint_path *tps) 241static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -298,7 +251,6 @@ static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
298 251
299static void copy_event_system(const char *sys, struct tracepoint_path *tps) 252static void copy_event_system(const char *sys, struct tracepoint_path *tps)
300{ 253{
301 unsigned long long size, check_size;
302 struct dirent *dent; 254 struct dirent *dent;
303 struct stat st; 255 struct stat st;
304 char *format; 256 char *format;
@@ -338,14 +290,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
338 sprintf(format, "%s/%s/format", sys, dent->d_name); 290 sprintf(format, "%s/%s/format", sys, dent->d_name);
339 ret = stat(format, &st); 291 ret = stat(format, &st);
340 292
341 if (ret >= 0) { 293 if (ret >= 0)
342 /* unfortunately, you can not stat debugfs files for size */ 294 record_file(format, 8);
343 size = get_size(format);
344 write_or_die(&size, 8);
345 check_size = copy_file(format);
346 if (size != check_size)
347 die("error in size of file '%s'", format);
348 }
349 295
350 free(format); 296 free(format);
351 } 297 }
@@ -426,7 +372,7 @@ static void read_event_files(struct tracepoint_path *tps)
426 372
427static void read_proc_kallsyms(void) 373static void read_proc_kallsyms(void)
428{ 374{
429 unsigned int size, check_size; 375 unsigned int size;
430 const char *path = "/proc/kallsyms"; 376 const char *path = "/proc/kallsyms";
431 struct stat st; 377 struct stat st;
432 int ret; 378 int ret;
@@ -438,17 +384,12 @@ static void read_proc_kallsyms(void)
438 write_or_die(&size, 4); 384 write_or_die(&size, 4);
439 return; 385 return;
440 } 386 }
441 size = get_size(path); 387 record_file(path, 4);
442 write_or_die(&size, 4);
443 check_size = copy_file(path);
444 if (size != check_size)
445 die("error in size of file '%s'", path);
446
447} 388}
448 389
449static void read_ftrace_printk(void) 390static void read_ftrace_printk(void)
450{ 391{
451 unsigned int size, check_size; 392 unsigned int size;
452 char *path; 393 char *path;
453 struct stat st; 394 struct stat st;
454 int ret; 395 int ret;
@@ -461,11 +402,8 @@ static void read_ftrace_printk(void)
461 write_or_die(&size, 4); 402 write_or_die(&size, 4);
462 goto out; 403 goto out;
463 } 404 }
464 size = get_size(path); 405 record_file(path, 4);
465 write_or_die(&size, 4); 406
466 check_size = copy_file(path);
467 if (size != check_size)
468 die("error in size of file '%s'", path);
469out: 407out:
470 put_tracing_file(path); 408 put_tracing_file(path);
471} 409}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index fc784284ac8b..0128906bac88 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -238,6 +238,7 @@ char **argv_split(const char *str, int *argcp);
238void argv_free(char **argv); 238void argv_free(char **argv);
239bool strglobmatch(const char *str, const char *pat); 239bool strglobmatch(const char *str, const char *pat);
240bool strlazymatch(const char *str, const char *pat); 240bool strlazymatch(const char *str, const char *pat);
241int strtailcmp(const char *s1, const char *s2);
241unsigned long convert_unit(unsigned long value, char *unit); 242unsigned long convert_unit(unsigned long value, char *unit);
242int readn(int fd, void *buf, size_t size); 243int readn(int fd, void *buf, size_t size);
243 244