aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kprobes.txt10
-rw-r--r--Documentation/trace/kprobetrace.txt4
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.cpu20
-rw-r--r--arch/x86/Kconfig.debug9
-rw-r--r--arch/x86/include/asm/apic.h13
-rw-r--r--arch/x86/include/asm/ds.h302
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/msr-index.h15
-rw-r--r--arch/x86/include/asm/perf_event.h76
-rw-r--r--arch/x86/include/asm/perf_event_p4.h794
-rw-r--r--arch/x86/include/asm/processor.h35
-rw-r--r--arch/x86/include/asm/ptrace-abi.h57
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/thread_info.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c552
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c46
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c354
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c664
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c218
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c834
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c31
-rw-r--r--arch/x86/kernel/ds.c1437
-rw-r--r--arch/x86/kernel/ds_selftest.c408
-rw-r--r--arch/x86/kernel/ds_selftest.h15
-rw-r--r--arch/x86/kernel/dumpstack.c5
-rw-r--r--arch/x86/kernel/kprobes.c16
-rw-r--r--arch/x86/kernel/process.c18
-rw-r--r--arch/x86/kernel/process_32.c8
-rw-r--r--arch/x86/kernel/process_64.c8
-rw-r--r--arch/x86/kernel/ptrace.c382
-rw-r--r--arch/x86/kernel/step.c46
-rw-r--r--arch/x86/kernel/traps.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c46
-rw-r--r--arch/x86/kvm/x86.h3
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
-rw-r--r--include/linux/ftrace.h12
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/perf_event.h53
-rw-r--r--include/linux/ptrace.h12
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/trace/ftrace.h23
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/perf_event.c283
-rw-r--r--kernel/ptrace.c1
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/trace/Kconfig11
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h20
-rw-r--r--kernel/trace/trace_entries.h12
-rw-r--r--kernel/trace/trace_hw_branches.c312
-rw-r--r--kernel/trace/trace_kprobe.c535
-rw-r--r--kernel/trace/trace_selftest.c57
-rw-r--r--mm/mlock.c41
-rw-r--r--tools/perf/Documentation/perf-bench.txt6
-rw-r--r--tools/perf/Documentation/perf-kvm.txt67
-rw-r--r--tools/perf/Documentation/perf-probe.txt18
-rw-r--r--tools/perf/Documentation/perf-record.txt4
-rw-r--r--tools/perf/Documentation/perf-sched.txt4
-rw-r--r--tools/perf/Makefile340
-rw-r--r--tools/perf/bench/mem-memcpy.c3
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c57
-rw-r--r--tools/perf/builtin-buildid-cache.c2
-rw-r--r--tools/perf/builtin-buildid-list.c6
-rw-r--r--tools/perf/builtin-diff.c10
-rw-r--r--tools/perf/builtin-help.c2
-rw-r--r--tools/perf/builtin-kmem.c16
-rw-r--r--tools/perf/builtin-kvm.c144
-rw-r--r--tools/perf/builtin-lock.c2
-rw-r--r--tools/perf/builtin-probe.c279
-rw-r--r--tools/perf/builtin-record.c498
-rw-r--r--tools/perf/builtin-report.c99
-rw-r--r--tools/perf/builtin-sched.c8
-rw-r--r--tools/perf/builtin-stat.c155
-rw-r--r--tools/perf/builtin-timechart.c5
-rw-r--r--tools/perf/builtin-top.c290
-rw-r--r--tools/perf/builtin-trace.c77
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/perf-archive.sh3
-rw-r--r--tools/perf/perf.c6
-rw-r--r--tools/perf/perf.h8
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm6
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-record2
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report8
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-record3
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report8
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-record2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report2
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-record2
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report23
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-record2
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-record2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report2
-rw-r--r--tools/perf/scripts/perl/rwtop.pl177
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py3
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report8
-rw-r--r--tools/perf/scripts/python/bin/sctop-record2
-rw-r--r--tools/perf/scripts/python/bin/sctop-report24
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report8
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report8
-rw-r--r--tools/perf/scripts/python/sctop.py78
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN6
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/cache.h14
-rw-r--r--tools/perf/util/callchain.c110
-rw-r--r--tools/perf/util/callchain.h6
-rw-r--r--tools/perf/util/color.c48
-rw-r--r--tools/perf/util/color.h4
-rw-r--r--tools/perf/util/debug.c8
-rw-r--r--tools/perf/util/debug.h30
-rw-r--r--tools/perf/util/event.c324
-rw-r--r--tools/perf/util/event.h49
-rw-r--r--tools/perf/util/header.c524
-rw-r--r--tools/perf/util/header.h41
-rw-r--r--tools/perf/util/hist.c219
-rw-r--r--tools/perf/util/hist.h15
-rw-r--r--tools/perf/util/include/linux/compiler.h2
-rw-r--r--tools/perf/util/include/linux/kernel.h9
-rw-r--r--tools/perf/util/map.c426
-rw-r--r--tools/perf/util/map.h121
-rw-r--r--tools/perf/util/newt.c725
-rw-r--r--tools/perf/util/parse-events.c21
-rw-r--r--tools/perf/util/parse-options.c7
-rw-r--r--tools/perf/util/parse-options.h4
-rw-r--r--tools/perf/util/probe-event.c1572
-rw-r--r--tools/perf/util/probe-event.h130
-rw-r--r--tools/perf/util/probe-finder.c990
-rw-r--r--tools/perf/util/probe-finder.h65
-rw-r--r--tools/perf/util/session.c325
-rw-r--r--tools/perf/util/session.h50
-rw-r--r--tools/perf/util/sort.c145
-rw-r--r--tools/perf/util/sort.h30
-rw-r--r--tools/perf/util/string.c45
-rw-r--r--tools/perf/util/string.h18
-rw-r--r--tools/perf/util/symbol.c494
-rw-r--r--tools/perf/util/symbol.h74
-rw-r--r--tools/perf/util/thread.c242
-rw-r--r--tools/perf/util/thread.h53
-rw-r--r--tools/perf/util/trace-event-info.c24
-rw-r--r--tools/perf/util/trace-event-parse.c53
-rw-r--r--tools/perf/util/trace-event-read.c89
-rw-r--r--tools/perf/util/trace-event.h7
-rw-r--r--tools/perf/util/util.h21
157 files changed, 11060 insertions, 6494 deletions
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 2f9115c0ae62..61c291cddf18 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -165,8 +165,8 @@ the user entry_handler invocation is also skipped.
165 165
1661.4 How Does Jump Optimization Work? 1661.4 How Does Jump Optimization Work?
167 167
168If you configured your kernel with CONFIG_OPTPROBES=y (currently 168If your kernel is built with CONFIG_OPTPROBES=y (currently this flag
169this option is supported on x86/x86-64, non-preemptive kernel) and 169is automatically set 'y' on x86/x86-64, non-preemptive kernel) and
170the "debug.kprobes_optimization" kernel parameter is set to 1 (see 170the "debug.kprobes_optimization" kernel parameter is set to 1 (see
171sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump 171sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
172instruction instead of a breakpoint instruction at each probepoint. 172instruction instead of a breakpoint instruction at each probepoint.
@@ -271,8 +271,6 @@ tweak the kernel's execution path, you need to suppress optimization,
271using one of the following techniques: 271using one of the following techniques:
272- Specify an empty function for the kprobe's post_handler or break_handler. 272- Specify an empty function for the kprobe's post_handler or break_handler.
273 or 273 or
274- Config CONFIG_OPTPROBES=n.
275 or
276- Execute 'sysctl -w debug.kprobes_optimization=n' 274- Execute 'sysctl -w debug.kprobes_optimization=n'
277 275
2782. Architectures Supported 2762. Architectures Supported
@@ -307,10 +305,6 @@ it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
307so you can use "objdump -d -l vmlinux" to see the source-to-object 305so you can use "objdump -d -l vmlinux" to see the source-to-object
308code mapping. 306code mapping.
309 307
310If you want to reduce probing overhead, set "Kprobes jump optimization
311support" (CONFIG_OPTPROBES) to "y". You can find this option under the
312"Kprobes" line.
313
3144. API Reference 3084. API Reference
315 309
316The Kprobes API includes a "register" function and an "unregister" 310The Kprobes API includes a "register" function and an "unregister"
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index a9100b28eb84..ec94748ae65b 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -40,7 +40,9 @@ Synopsis of kprobe_events
40 $stack : Fetch stack address. 40 $stack : Fetch stack address.
41 $retval : Fetch return value.(*) 41 $retval : Fetch return value.(*)
42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) 42 +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
43 NAME=FETCHARG: Set NAME as the argument name of FETCHARG. 43 NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
44 FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
45 (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
44 46
45 (*) only for return probe. 47 (*) only for return probe.
46 (**) this is useful for fetching a field of data structures. 48 (**) this is useful for fetching a field of data structures.
diff --git a/MAINTAINERS b/MAINTAINERS
index a0e3c3a47a51..0716c65c05c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4353,13 +4353,13 @@ M: Paul Mackerras <paulus@samba.org>
4353M: Ingo Molnar <mingo@elte.hu> 4353M: Ingo Molnar <mingo@elte.hu>
4354M: Arnaldo Carvalho de Melo <acme@redhat.com> 4354M: Arnaldo Carvalho de Melo <acme@redhat.com>
4355S: Supported 4355S: Supported
4356F: kernel/perf_event.c 4356F: kernel/perf_event*.c
4357F: include/linux/perf_event.h 4357F: include/linux/perf_event.h
4358F: arch/*/kernel/perf_event.c 4358F: arch/*/kernel/perf_event*.c
4359F: arch/*/kernel/*/perf_event.c 4359F: arch/*/kernel/*/perf_event*.c
4360F: arch/*/kernel/*/*/perf_event.c 4360F: arch/*/kernel/*/*/perf_event*.c
4361F: arch/*/include/asm/perf_event.h 4361F: arch/*/include/asm/perf_event.h
4362F: arch/*/lib/perf_event.c 4362F: arch/*/lib/perf_event*.c
4363F: arch/*/kernel/perf_callchain.c 4363F: arch/*/kernel/perf_callchain.c
4364F: tools/perf/ 4364F: tools/perf/
4365 4365
diff --git a/arch/Kconfig b/arch/Kconfig
index e5eb1337a537..f06010fb4838 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -42,15 +42,10 @@ config KPROBES
42 If in doubt, say "N". 42 If in doubt, say "N".
43 43
44config OPTPROBES 44config OPTPROBES
45 bool "Kprobes jump optimization support (EXPERIMENTAL)" 45 def_bool y
46 default y 46 depends on KPROBES && HAVE_OPTPROBES
47 depends on KPROBES
48 depends on !PREEMPT 47 depends on !PREEMPT
49 depends on HAVE_OPTPROBES
50 select KALLSYMS_ALL 48 select KALLSYMS_ALL
51 help
52 This option will allow kprobes to optimize breakpoint to
53 a jump for reducing its overhead.
54 49
55config HAVE_EFFICIENT_UNALIGNED_ACCESS 50config HAVE_EFFICIENT_UNALIGNED_ACCESS
56 bool 51 bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9458685902bd..97a95dfd1181 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -58,6 +58,9 @@ config X86
58 select HAVE_ARCH_KMEMCHECK 58 select HAVE_ARCH_KMEMCHECK
59 select HAVE_USER_RETURN_NOTIFIER 59 select HAVE_USER_RETURN_NOTIFIER
60 60
61config INSTRUCTION_DECODER
62 def_bool (KPROBES || PERF_EVENTS)
63
61config OUTPUT_FORMAT 64config OUTPUT_FORMAT
62 string 65 string
63 default "elf32-i386" if X86_32 66 default "elf32-i386" if X86_32
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a19829374e6a..918fbb1855cc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -502,23 +502,3 @@ config CPU_SUP_UMC_32
502 CPU might render the kernel unbootable. 502 CPU might render the kernel unbootable.
503 503
504 If unsure, say N. 504 If unsure, say N.
505
506config X86_DS
507 def_bool X86_PTRACE_BTS
508 depends on X86_DEBUGCTLMSR
509 select HAVE_HW_BRANCH_TRACER
510
511config X86_PTRACE_BTS
512 bool "Branch Trace Store"
513 default y
514 depends on X86_DEBUGCTLMSR
515 depends on BROKEN
516 ---help---
517 This adds a ptrace interface to the hardware's branch trace store.
518
519 Debuggers may use it to collect an execution trace of the debugged
520 application in order to answer the question 'how did I get here?'.
521 Debuggers may trace user mode as well as kernel mode.
522
523 Say Y unless there is no application development on this machine
524 and you want to save a small amount of code size.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bc01e3ebfeb2..bd58c8abbfbd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,15 +174,6 @@ config IOMMU_LEAK
174 Add a simple leak tracer to the IOMMU code. This is useful when you 174 Add a simple leak tracer to the IOMMU code. This is useful when you
175 are debugging a buggy device driver that leaks IOMMU mappings. 175 are debugging a buggy device driver that leaks IOMMU mappings.
176 176
177config X86_DS_SELFTEST
178 bool "DS selftest"
179 default y
180 depends on DEBUG_KERNEL
181 depends on X86_DS
182 ---help---
183 Perform Debug Store selftests at boot time.
184 If in doubt, say "N".
185
186config HAVE_MMIOTRACE_SUPPORT 177config HAVE_MMIOTRACE_SUPPORT
187 def_bool y 178 def_bool y
188 179
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index b4ac2cdcb64f..1fa03e04ae44 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -373,6 +373,7 @@ extern atomic_t init_deasserted;
373extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); 373extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
374#endif 374#endif
375 375
376#ifdef CONFIG_X86_LOCAL_APIC
376static inline u32 apic_read(u32 reg) 377static inline u32 apic_read(u32 reg)
377{ 378{
378 return apic->read(reg); 379 return apic->read(reg);
@@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void)
403 return apic->safe_wait_icr_idle(); 404 return apic->safe_wait_icr_idle();
404} 405}
405 406
407#else /* CONFIG_X86_LOCAL_APIC */
408
409static inline u32 apic_read(u32 reg) { return 0; }
410static inline void apic_write(u32 reg, u32 val) { }
411static inline u64 apic_icr_read(void) { return 0; }
412static inline void apic_icr_write(u32 low, u32 high) { }
413static inline void apic_wait_icr_idle(void) { }
414static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
415
416#endif /* CONFIG_X86_LOCAL_APIC */
406 417
407static inline void ack_APIC_irq(void) 418static inline void ack_APIC_irq(void)
408{ 419{
409#ifdef CONFIG_X86_LOCAL_APIC
410 /* 420 /*
411 * ack_APIC_irq() actually gets compiled as a single instruction 421 * ack_APIC_irq() actually gets compiled as a single instruction
412 * ... yummie. 422 * ... yummie.
@@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void)
414 424
415 /* Docs say use 0 for future compatibility */ 425 /* Docs say use 0 for future compatibility */
416 apic_write(APIC_EOI, 0); 426 apic_write(APIC_EOI, 0);
417#endif
418} 427}
419 428
420static inline unsigned default_get_apic_id(unsigned long x) 429static inline unsigned default_get_apic_id(unsigned long x)
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
deleted file mode 100644
index 70dac199b093..000000000000
--- a/arch/x86/include/asm/ds.h
+++ /dev/null
@@ -1,302 +0,0 @@
1/*
2 * Debug Store (DS) support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS).
7 *
8 * It manages:
9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done)
11 * - buffer access
12 *
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
16 *
17 *
18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */
21
22#ifndef _ASM_X86_DS_H
23#define _ASM_X86_DS_H
24
25
26#include <linux/types.h>
27#include <linux/init.h>
28#include <linux/err.h>
29
30
31#ifdef CONFIG_X86_DS
32
33struct task_struct;
34struct ds_context;
35struct ds_tracer;
36struct bts_tracer;
37struct pebs_tracer;
38
39typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
40typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
41
42
43/*
44 * A list of features plus corresponding macros to talk about them in
45 * the ds_request function's flags parameter.
46 *
47 * We use the enum to index an array of corresponding control bits;
48 * we use the macro to index a flags bit-vector.
49 */
50enum ds_feature {
51 dsf_bts = 0,
52 dsf_bts_kernel,
53#define BTS_KERNEL (1 << dsf_bts_kernel)
54 /* trace kernel-mode branches */
55
56 dsf_bts_user,
57#define BTS_USER (1 << dsf_bts_user)
58 /* trace user-mode branches */
59
60 dsf_bts_overflow,
61 dsf_bts_max,
62 dsf_pebs = dsf_bts_max,
63
64 dsf_pebs_max,
65 dsf_ctl_max = dsf_pebs_max,
66 dsf_bts_timestamps = dsf_ctl_max,
67#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
68 /* add timestamps into BTS trace */
69
70#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
71};
72
73
74/*
75 * Request BTS or PEBS
76 *
77 * Due to alignement constraints, the actual buffer may be slightly
78 * smaller than the requested or provided buffer.
79 *
80 * Returns a pointer to a tracer structure on success, or
81 * ERR_PTR(errcode) on failure.
82 *
83 * The interrupt threshold is independent from the overflow callback
84 * to allow users to use their own overflow interrupt handling mechanism.
85 *
86 * The function might sleep.
87 *
88 * task: the task to request recording for
89 * cpu: the cpu to request recording for
90 * base: the base pointer for the (non-pageable) buffer;
91 * size: the size of the provided buffer in bytes
92 * ovfl: pointer to a function to be called on buffer overflow;
93 * NULL if cyclic buffer requested
94 * th: the interrupt threshold in records from the end of the buffer;
95 * -1 if no interrupt threshold is requested.
96 * flags: a bit-mask of the above flags
97 */
98extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
99 void *base, size_t size,
100 bts_ovfl_callback_t ovfl,
101 size_t th, unsigned int flags);
102extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
103 bts_ovfl_callback_t ovfl,
104 size_t th, unsigned int flags);
105extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
106 void *base, size_t size,
107 pebs_ovfl_callback_t ovfl,
108 size_t th, unsigned int flags);
109extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
110 void *base, size_t size,
111 pebs_ovfl_callback_t ovfl,
112 size_t th, unsigned int flags);
113
114/*
115 * Release BTS or PEBS resources
116 * Suspend and resume BTS or PEBS tracing
117 *
118 * Must be called with irq's enabled.
119 *
120 * tracer: the tracer handle returned from ds_request_~()
121 */
122extern void ds_release_bts(struct bts_tracer *tracer);
123extern void ds_suspend_bts(struct bts_tracer *tracer);
124extern void ds_resume_bts(struct bts_tracer *tracer);
125extern void ds_release_pebs(struct pebs_tracer *tracer);
126extern void ds_suspend_pebs(struct pebs_tracer *tracer);
127extern void ds_resume_pebs(struct pebs_tracer *tracer);
128
129/*
130 * Release BTS or PEBS resources
131 * Suspend and resume BTS or PEBS tracing
132 *
133 * Cpu tracers must call this on the traced cpu.
134 * Task tracers must call ds_release_~_noirq() for themselves.
135 *
136 * May be called with irq's disabled.
137 *
138 * Returns 0 if successful;
139 * -EPERM if the cpu tracer does not trace the current cpu.
140 * -EPERM if the task tracer does not trace itself.
141 *
142 * tracer: the tracer handle returned from ds_request_~()
143 */
144extern int ds_release_bts_noirq(struct bts_tracer *tracer);
145extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
146extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
147extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
148extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
149extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
150
151
152/*
153 * The raw DS buffer state as it is used for BTS and PEBS recording.
154 *
155 * This is the low-level, arch-dependent interface for working
156 * directly on the raw trace data.
157 */
158struct ds_trace {
159 /* the number of bts/pebs records */
160 size_t n;
161 /* the size of a bts/pebs record in bytes */
162 size_t size;
163 /* pointers into the raw buffer:
164 - to the first entry */
165 void *begin;
166 /* - one beyond the last entry */
167 void *end;
168 /* - one beyond the newest entry */
169 void *top;
170 /* - the interrupt threshold */
171 void *ith;
172 /* flags given on ds_request() */
173 unsigned int flags;
174};
175
176/*
177 * An arch-independent view on branch trace data.
178 */
179enum bts_qualifier {
180 bts_invalid,
181#define BTS_INVALID bts_invalid
182
183 bts_branch,
184#define BTS_BRANCH bts_branch
185
186 bts_task_arrives,
187#define BTS_TASK_ARRIVES bts_task_arrives
188
189 bts_task_departs,
190#define BTS_TASK_DEPARTS bts_task_departs
191
192 bts_qual_bit_size = 4,
193 bts_qual_max = (1 << bts_qual_bit_size),
194};
195
196struct bts_struct {
197 __u64 qualifier;
198 union {
199 /* BTS_BRANCH */
200 struct {
201 __u64 from;
202 __u64 to;
203 } lbr;
204 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
205 struct {
206 __u64 clock;
207 pid_t pid;
208 } event;
209 } variant;
210};
211
212
213/*
214 * The BTS state.
215 *
216 * This gives access to the raw DS state and adds functions to provide
217 * an arch-independent view of the BTS data.
218 */
219struct bts_trace {
220 struct ds_trace ds;
221
222 int (*read)(struct bts_tracer *tracer, const void *at,
223 struct bts_struct *out);
224 int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
225};
226
227
228/*
229 * The PEBS state.
230 *
231 * This gives access to the raw DS state and the PEBS-specific counter
232 * reset value.
233 */
234struct pebs_trace {
235 struct ds_trace ds;
236
237 /* the number of valid counters in the below array */
238 unsigned int counters;
239
240#define MAX_PEBS_COUNTERS 4
241 /* the counter reset value */
242 unsigned long long counter_reset[MAX_PEBS_COUNTERS];
243};
244
245
246/*
247 * Read the BTS or PEBS trace.
248 *
249 * Returns a view on the trace collected for the parameter tracer.
250 *
251 * The view remains valid as long as the traced task is not running or
252 * the tracer is suspended.
253 * Writes into the trace buffer are not reflected.
254 *
255 * tracer: the tracer handle returned from ds_request_~()
256 */
257extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
258extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
259
260
261/*
262 * Reset the write pointer of the BTS/PEBS buffer.
263 *
264 * Returns 0 on success; -Eerrno on error
265 *
266 * tracer: the tracer handle returned from ds_request_~()
267 */
268extern int ds_reset_bts(struct bts_tracer *tracer);
269extern int ds_reset_pebs(struct pebs_tracer *tracer);
270
271/*
272 * Set the PEBS counter reset value.
273 *
274 * Returns 0 on success; -Eerrno on error
275 *
276 * tracer: the tracer handle returned from ds_request_pebs()
277 * counter: the index of the counter
278 * value: the new counter reset value
279 */
280extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
281 unsigned int counter, u64 value);
282
283/*
284 * Initialization
285 */
286struct cpuinfo_x86;
287extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
288
289/*
290 * Context switch work
291 */
292extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
293
294#else /* CONFIG_X86_DS */
295
296struct cpuinfo_x86;
297static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
298static inline void ds_switch_to(struct task_struct *prev,
299 struct task_struct *next) {}
300
301#endif /* CONFIG_X86_DS */
302#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 96c2e0ad04ca..88c765e16410 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -68,6 +68,8 @@ struct insn {
68 const insn_byte_t *next_byte; 68 const insn_byte_t *next_byte;
69}; 69};
70 70
71#define MAX_INSN_SIZE 16
72
71#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) 73#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
72#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) 74#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
73#define X86_MODRM_RM(modrm) ((modrm) & 0x07) 75#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 4ffa345a8ccb..547882539157 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -24,6 +24,7 @@
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/ptrace.h> 25#include <linux/ptrace.h>
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <asm/insn.h>
27 28
28#define __ARCH_WANT_KPROBES_INSN_SLOT 29#define __ARCH_WANT_KPROBES_INSN_SLOT
29 30
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t;
36#define RELATIVEJUMP_SIZE 5 37#define RELATIVEJUMP_SIZE 5
37#define RELATIVECALL_OPCODE 0xe8 38#define RELATIVECALL_OPCODE 0xe8
38#define RELATIVE_ADDR_SIZE 4 39#define RELATIVE_ADDR_SIZE 4
39#define MAX_INSN_SIZE 16
40#define MAX_STACK_SIZE 64 40#define MAX_STACK_SIZE 64
41#define MIN_STACK_SIZE(ADDR) \ 41#define MIN_STACK_SIZE(ADDR) \
42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ 42 (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 4604e6a54d36..bc473acfa7f9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -71,11 +71,14 @@
71#define MSR_IA32_LASTINTTOIP 0x000001de 71#define MSR_IA32_LASTINTTOIP 0x000001de
72 72
73/* DEBUGCTLMSR bits (others vary by model): */ 73/* DEBUGCTLMSR bits (others vary by model): */
74#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ 74#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
75#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ 75#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
76 76#define DEBUGCTLMSR_TR (1UL << 6)
77#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) 77#define DEBUGCTLMSR_BTS (1UL << 7)
78#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) 78#define DEBUGCTLMSR_BTINT (1UL << 8)
79#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
80#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
81#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
79 82
80#define MSR_IA32_MC0_CTL 0x00000400 83#define MSR_IA32_MC0_CTL 0x00000400
81#define MSR_IA32_MC0_STATUS 0x00000401 84#define MSR_IA32_MC0_STATUS 0x00000401
@@ -359,6 +362,8 @@
359#define MSR_P4_U2L_ESCR0 0x000003b0 362#define MSR_P4_U2L_ESCR0 0x000003b0
360#define MSR_P4_U2L_ESCR1 0x000003b1 363#define MSR_P4_U2L_ESCR1 0x000003b1
361 364
365#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
366
362/* Intel Core-based CPU performance counters */ 367/* Intel Core-based CPU performance counters */
363#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 368#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
364#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a 369#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index db6109a885a7..254883d0c7e0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -5,7 +5,7 @@
5 * Performance event hw details: 5 * Performance event hw details:
6 */ 6 */
7 7
8#define X86_PMC_MAX_GENERIC 8 8#define X86_PMC_MAX_GENERIC 32
9#define X86_PMC_MAX_FIXED 3 9#define X86_PMC_MAX_FIXED 3
10 10
11#define X86_PMC_IDX_GENERIC 0 11#define X86_PMC_IDX_GENERIC 0
@@ -18,39 +18,31 @@
18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 18#define MSR_ARCH_PERFMON_EVENTSEL0 0x186
19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 19#define MSR_ARCH_PERFMON_EVENTSEL1 0x187
20 20
21#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) 21#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL
22#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) 22#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL
23#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) 23#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
24#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) 24#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
25#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) 25#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
26 26#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
27/* 27#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
28 * Includes eventsel and unit mask as well: 28#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
29 */ 29#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
30 30#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
31 31
32#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL 32#define AMD64_EVENTSEL_EVENT \
33#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL 33 (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
34#define INTEL_ARCH_EDGE_MASK 0x00040000ULL 34#define INTEL_ARCH_EVENT_MASK \
35#define INTEL_ARCH_INV_MASK 0x00800000ULL 35 (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT)
36#define INTEL_ARCH_CNT_MASK 0xFF000000ULL 36
37#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) 37#define X86_RAW_EVENT_MASK \
38 38 (ARCH_PERFMON_EVENTSEL_EVENT | \
39/* 39 ARCH_PERFMON_EVENTSEL_UMASK | \
40 * filter mask to validate fixed counter events. 40 ARCH_PERFMON_EVENTSEL_EDGE | \
41 * the following filters disqualify for fixed counters: 41 ARCH_PERFMON_EVENTSEL_INV | \
42 * - inv 42 ARCH_PERFMON_EVENTSEL_CMASK)
43 * - edge 43#define AMD64_RAW_EVENT_MASK \
44 * - cnt-mask 44 (X86_RAW_EVENT_MASK | \
45 * The other filters are supported by fixed counters. 45 AMD64_EVENTSEL_EVENT)
46 * The any-thread option is supported starting with v3.
47 */
48#define INTEL_ARCH_FIXED_MASK \
49 (INTEL_ARCH_CNT_MASK| \
50 INTEL_ARCH_INV_MASK| \
51 INTEL_ARCH_EDGE_MASK|\
52 INTEL_ARCH_UNIT_MASK|\
53 INTEL_ARCH_EVENT_MASK)
54 46
55#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c 47#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
56#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) 48#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
@@ -67,7 +59,7 @@
67union cpuid10_eax { 59union cpuid10_eax {
68 struct { 60 struct {
69 unsigned int version_id:8; 61 unsigned int version_id:8;
70 unsigned int num_events:8; 62 unsigned int num_counters:8;
71 unsigned int bit_width:8; 63 unsigned int bit_width:8;
72 unsigned int mask_length:8; 64 unsigned int mask_length:8;
73 } split; 65 } split;
@@ -76,7 +68,7 @@ union cpuid10_eax {
76 68
77union cpuid10_edx { 69union cpuid10_edx {
78 struct { 70 struct {
79 unsigned int num_events_fixed:4; 71 unsigned int num_counters_fixed:4;
80 unsigned int reserved:28; 72 unsigned int reserved:28;
81 } split; 73 } split;
82 unsigned int full; 74 unsigned int full;
@@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void);
136 128
137#define PERF_EVENT_INDEX_OFFSET 0 129#define PERF_EVENT_INDEX_OFFSET 0
138 130
131/*
132 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
133 * This flag is otherwise unused and ABI specified to be 0, so nobody should
134 * care what we do with it.
135 */
136#define PERF_EFLAGS_EXACT (1UL << 3)
137
138struct pt_regs;
139extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
140extern unsigned long perf_misc_flags(struct pt_regs *regs);
141#define perf_misc_flags(regs) perf_misc_flags(regs)
142
139#else 143#else
140static inline void init_hw_perf_events(void) { } 144static inline void init_hw_perf_events(void) { }
141static inline void perf_events_lapic_init(void) { } 145static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
new file mode 100644
index 000000000000..b05400a542ff
--- /dev/null
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -0,0 +1,794 @@
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 */
4
5#ifndef PERF_EVENT_P4_H
6#define PERF_EVENT_P4_H
7
8#include <linux/cpu.h>
9#include <linux/bitops.h>
10
11/*
12 * NetBurst has perfomance MSRs shared between
13 * threads if HT is turned on, ie for both logical
14 * processors (mem: in turn in Atom with HT support
15 * perf-MSRs are not shared and every thread has its
16 * own perf-MSRs set)
17 */
18#define ARCH_P4_TOTAL_ESCR (46)
19#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21#define ARCH_P4_MAX_CCCR (18)
22#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
23
24#define P4_ESCR_EVENT_MASK 0x7e000000U
25#define P4_ESCR_EVENT_SHIFT 25
26#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
27#define P4_ESCR_EVENTMASK_SHIFT 9
28#define P4_ESCR_TAG_MASK 0x000001e0U
29#define P4_ESCR_TAG_SHIFT 5
30#define P4_ESCR_TAG_ENABLE 0x00000010U
31#define P4_ESCR_T0_OS 0x00000008U
32#define P4_ESCR_T0_USR 0x00000004U
33#define P4_ESCR_T1_OS 0x00000002U
34#define P4_ESCR_T1_USR 0x00000001U
35
36#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
37#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
38#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
39
40/* Non HT mask */
41#define P4_ESCR_MASK \
42 (P4_ESCR_EVENT_MASK | \
43 P4_ESCR_EVENTMASK_MASK | \
44 P4_ESCR_TAG_MASK | \
45 P4_ESCR_TAG_ENABLE | \
46 P4_ESCR_T0_OS | \
47 P4_ESCR_T0_USR)
48
49/* HT mask */
50#define P4_ESCR_MASK_HT \
51 (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR)
52
53#define P4_CCCR_OVF 0x80000000U
54#define P4_CCCR_CASCADE 0x40000000U
55#define P4_CCCR_OVF_PMI_T0 0x04000000U
56#define P4_CCCR_OVF_PMI_T1 0x08000000U
57#define P4_CCCR_FORCE_OVF 0x02000000U
58#define P4_CCCR_EDGE 0x01000000U
59#define P4_CCCR_THRESHOLD_MASK 0x00f00000U
60#define P4_CCCR_THRESHOLD_SHIFT 20
61#define P4_CCCR_COMPLEMENT 0x00080000U
62#define P4_CCCR_COMPARE 0x00040000U
63#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
64#define P4_CCCR_ESCR_SELECT_SHIFT 13
65#define P4_CCCR_ENABLE 0x00001000U
66#define P4_CCCR_THREAD_SINGLE 0x00010000U
67#define P4_CCCR_THREAD_BOTH 0x00020000U
68#define P4_CCCR_THREAD_ANY 0x00030000U
69#define P4_CCCR_RESERVED 0x00000fffU
70
71#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
72#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
73
74/* Custom bits in reerved CCCR area */
75#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
76
77
78/* Non HT mask */
79#define P4_CCCR_MASK \
80 (P4_CCCR_OVF | \
81 P4_CCCR_CASCADE | \
82 P4_CCCR_OVF_PMI_T0 | \
83 P4_CCCR_FORCE_OVF | \
84 P4_CCCR_EDGE | \
85 P4_CCCR_THRESHOLD_MASK | \
86 P4_CCCR_COMPLEMENT | \
87 P4_CCCR_COMPARE | \
88 P4_CCCR_ESCR_SELECT_MASK | \
89 P4_CCCR_ENABLE)
90
91/* HT mask */
92#define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY)
93
94#define P4_GEN_ESCR_EMASK(class, name, bit) \
95 class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
96#define P4_ESCR_EMASK_BIT(class, name) class##__##name
97
98/*
99 * config field is 64bit width and consists of
100 * HT << 63 | ESCR << 32 | CCCR
101 * where HT is HyperThreading bit (since ESCR
102 * has it reserved we may use it for own purpose)
103 *
104 * note that this is NOT the addresses of respective
105 * ESCR and CCCR but rather an only packed value should
106 * be unpacked and written to a proper addresses
107 *
108 * the base idea is to pack as much info as
109 * possible
110 */
111#define p4_config_pack_escr(v) (((u64)(v)) << 32)
112#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
113#define p4_config_unpack_escr(v) (((u64)(v)) >> 32)
114#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL)
115
116#define p4_config_unpack_emask(v) \
117 ({ \
118 u32 t = p4_config_unpack_escr((v)); \
119 t = t & P4_ESCR_EVENTMASK_MASK; \
120 t = t >> P4_ESCR_EVENTMASK_SHIFT; \
121 t; \
122 })
123
124#define p4_config_unpack_event(v) \
125 ({ \
126 u32 t = p4_config_unpack_escr((v)); \
127 t = t & P4_ESCR_EVENT_MASK; \
128 t = t >> P4_ESCR_EVENT_SHIFT; \
129 t; \
130 })
131
132#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
133
134#define P4_CONFIG_HT_SHIFT 63
135#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
136
137static inline bool p4_is_event_cascaded(u64 config)
138{
139 u32 cccr = p4_config_unpack_cccr(config);
140 return !!(cccr & P4_CCCR_CASCADE);
141}
142
143static inline int p4_ht_config_thread(u64 config)
144{
145 return !!(config & P4_CONFIG_HT);
146}
147
148static inline u64 p4_set_ht_bit(u64 config)
149{
150 return config | P4_CONFIG_HT;
151}
152
153static inline u64 p4_clear_ht_bit(u64 config)
154{
155 return config & ~P4_CONFIG_HT;
156}
157
158static inline int p4_ht_active(void)
159{
160#ifdef CONFIG_SMP
161 return smp_num_siblings > 1;
162#endif
163 return 0;
164}
165
166static inline int p4_ht_thread(int cpu)
167{
168#ifdef CONFIG_SMP
169 if (smp_num_siblings == 2)
170 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
171#endif
172 return 0;
173}
174
175static inline int p4_should_swap_ts(u64 config, int cpu)
176{
177 return p4_ht_config_thread(config) ^ p4_ht_thread(cpu);
178}
179
180static inline u32 p4_default_cccr_conf(int cpu)
181{
182 /*
183 * Note that P4_CCCR_THREAD_ANY is "required" on
184 * non-HT machines (on HT machines we count TS events
185 * regardless the state of second logical processor
186 */
187 u32 cccr = P4_CCCR_THREAD_ANY;
188
189 if (!p4_ht_thread(cpu))
190 cccr |= P4_CCCR_OVF_PMI_T0;
191 else
192 cccr |= P4_CCCR_OVF_PMI_T1;
193
194 return cccr;
195}
196
197static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
198{
199 u32 escr = 0;
200
201 if (!p4_ht_thread(cpu)) {
202 if (!exclude_os)
203 escr |= P4_ESCR_T0_OS;
204 if (!exclude_usr)
205 escr |= P4_ESCR_T0_USR;
206 } else {
207 if (!exclude_os)
208 escr |= P4_ESCR_T1_OS;
209 if (!exclude_usr)
210 escr |= P4_ESCR_T1_USR;
211 }
212
213 return escr;
214}
215
216enum P4_EVENTS {
217 P4_EVENT_TC_DELIVER_MODE,
218 P4_EVENT_BPU_FETCH_REQUEST,
219 P4_EVENT_ITLB_REFERENCE,
220 P4_EVENT_MEMORY_CANCEL,
221 P4_EVENT_MEMORY_COMPLETE,
222 P4_EVENT_LOAD_PORT_REPLAY,
223 P4_EVENT_STORE_PORT_REPLAY,
224 P4_EVENT_MOB_LOAD_REPLAY,
225 P4_EVENT_PAGE_WALK_TYPE,
226 P4_EVENT_BSQ_CACHE_REFERENCE,
227 P4_EVENT_IOQ_ALLOCATION,
228 P4_EVENT_IOQ_ACTIVE_ENTRIES,
229 P4_EVENT_FSB_DATA_ACTIVITY,
230 P4_EVENT_BSQ_ALLOCATION,
231 P4_EVENT_BSQ_ACTIVE_ENTRIES,
232 P4_EVENT_SSE_INPUT_ASSIST,
233 P4_EVENT_PACKED_SP_UOP,
234 P4_EVENT_PACKED_DP_UOP,
235 P4_EVENT_SCALAR_SP_UOP,
236 P4_EVENT_SCALAR_DP_UOP,
237 P4_EVENT_64BIT_MMX_UOP,
238 P4_EVENT_128BIT_MMX_UOP,
239 P4_EVENT_X87_FP_UOP,
240 P4_EVENT_TC_MISC,
241 P4_EVENT_GLOBAL_POWER_EVENTS,
242 P4_EVENT_TC_MS_XFER,
243 P4_EVENT_UOP_QUEUE_WRITES,
244 P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE,
245 P4_EVENT_RETIRED_BRANCH_TYPE,
246 P4_EVENT_RESOURCE_STALL,
247 P4_EVENT_WC_BUFFER,
248 P4_EVENT_B2B_CYCLES,
249 P4_EVENT_BNR,
250 P4_EVENT_SNOOP,
251 P4_EVENT_RESPONSE,
252 P4_EVENT_FRONT_END_EVENT,
253 P4_EVENT_EXECUTION_EVENT,
254 P4_EVENT_REPLAY_EVENT,
255 P4_EVENT_INSTR_RETIRED,
256 P4_EVENT_UOPS_RETIRED,
257 P4_EVENT_UOP_TYPE,
258 P4_EVENT_BRANCH_RETIRED,
259 P4_EVENT_MISPRED_BRANCH_RETIRED,
260 P4_EVENT_X87_ASSIST,
261 P4_EVENT_MACHINE_CLEAR,
262 P4_EVENT_INSTR_COMPLETED,
263};
264
265#define P4_OPCODE(event) event##_OPCODE
266#define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0)
267#define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8)
268#define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel)
269
270/*
271 * Comments below the event represent ESCR restriction
272 * for this event and counter index per ESCR
273 *
274 * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early
275 * processor builds (family 0FH, models 01H-02H). These MSRs
276 * are not available on later versions, so that we don't use
277 * them completely
278 *
279 * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly
280 * working so that we should not use this CCCR and respective
281 * counter as result
282 */
283enum P4_EVENT_OPCODES {
284 P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01),
285 /*
286 * MSR_P4_TC_ESCR0: 4, 5
287 * MSR_P4_TC_ESCR1: 6, 7
288 */
289
290 P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00),
291 /*
292 * MSR_P4_BPU_ESCR0: 0, 1
293 * MSR_P4_BPU_ESCR1: 2, 3
294 */
295
296 P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03),
297 /*
298 * MSR_P4_ITLB_ESCR0: 0, 1
299 * MSR_P4_ITLB_ESCR1: 2, 3
300 */
301
302 P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05),
303 /*
304 * MSR_P4_DAC_ESCR0: 8, 9
305 * MSR_P4_DAC_ESCR1: 10, 11
306 */
307
308 P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02),
309 /*
310 * MSR_P4_SAAT_ESCR0: 8, 9
311 * MSR_P4_SAAT_ESCR1: 10, 11
312 */
313
314 P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02),
315 /*
316 * MSR_P4_SAAT_ESCR0: 8, 9
317 * MSR_P4_SAAT_ESCR1: 10, 11
318 */
319
320 P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02),
321 /*
322 * MSR_P4_SAAT_ESCR0: 8, 9
323 * MSR_P4_SAAT_ESCR1: 10, 11
324 */
325
326 P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02),
327 /*
328 * MSR_P4_MOB_ESCR0: 0, 1
329 * MSR_P4_MOB_ESCR1: 2, 3
330 */
331
332 P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04),
333 /*
334 * MSR_P4_PMH_ESCR0: 0, 1
335 * MSR_P4_PMH_ESCR1: 2, 3
336 */
337
338 P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07),
339 /*
340 * MSR_P4_BSU_ESCR0: 0, 1
341 * MSR_P4_BSU_ESCR1: 2, 3
342 */
343
344 P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06),
345 /*
346 * MSR_P4_FSB_ESCR0: 0, 1
347 * MSR_P4_FSB_ESCR1: 2, 3
348 */
349
350 P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06),
351 /*
352 * MSR_P4_FSB_ESCR1: 2, 3
353 */
354
355 P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06),
356 /*
357 * MSR_P4_FSB_ESCR0: 0, 1
358 * MSR_P4_FSB_ESCR1: 2, 3
359 */
360
361 P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07),
362 /*
363 * MSR_P4_BSU_ESCR0: 0, 1
364 */
365
366 P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07),
367 /*
368 * NOTE: no ESCR name in docs, it's guessed
369 * MSR_P4_BSU_ESCR1: 2, 3
370 */
371
372 P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01),
373 /*
374 * MSR_P4_FIRM_ESCR0: 8, 9
375 * MSR_P4_FIRM_ESCR1: 10, 11
376 */
377
378 P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01),
379 /*
380 * MSR_P4_FIRM_ESCR0: 8, 9
381 * MSR_P4_FIRM_ESCR1: 10, 11
382 */
383
384 P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01),
385 /*
386 * MSR_P4_FIRM_ESCR0: 8, 9
387 * MSR_P4_FIRM_ESCR1: 10, 11
388 */
389
390 P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01),
391 /*
392 * MSR_P4_FIRM_ESCR0: 8, 9
393 * MSR_P4_FIRM_ESCR1: 10, 11
394 */
395
396 P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01),
397 /*
398 * MSR_P4_FIRM_ESCR0: 8, 9
399 * MSR_P4_FIRM_ESCR1: 10, 11
400 */
401
402 P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01),
403 /*
404 * MSR_P4_FIRM_ESCR0: 8, 9
405 * MSR_P4_FIRM_ESCR1: 10, 11
406 */
407
408 P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01),
409 /*
410 * MSR_P4_FIRM_ESCR0: 8, 9
411 * MSR_P4_FIRM_ESCR1: 10, 11
412 */
413
414 P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01),
415 /*
416 * MSR_P4_FIRM_ESCR0: 8, 9
417 * MSR_P4_FIRM_ESCR1: 10, 11
418 */
419
420 P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01),
421 /*
422 * MSR_P4_TC_ESCR0: 4, 5
423 * MSR_P4_TC_ESCR1: 6, 7
424 */
425
426 P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06),
427 /*
428 * MSR_P4_FSB_ESCR0: 0, 1
429 * MSR_P4_FSB_ESCR1: 2, 3
430 */
431
432 P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00),
433 /*
434 * MSR_P4_MS_ESCR0: 4, 5
435 * MSR_P4_MS_ESCR1: 6, 7
436 */
437
438 P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00),
439 /*
440 * MSR_P4_MS_ESCR0: 4, 5
441 * MSR_P4_MS_ESCR1: 6, 7
442 */
443
444 P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02),
445 /*
446 * MSR_P4_TBPU_ESCR0: 4, 5
447 * MSR_P4_TBPU_ESCR1: 6, 7
448 */
449
450 P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02),
451 /*
452 * MSR_P4_TBPU_ESCR0: 4, 5
453 * MSR_P4_TBPU_ESCR1: 6, 7
454 */
455
456 P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01),
457 /*
458 * MSR_P4_ALF_ESCR0: 12, 13, 16
459 * MSR_P4_ALF_ESCR1: 14, 15, 17
460 */
461
462 P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05),
463 /*
464 * MSR_P4_DAC_ESCR0: 8, 9
465 * MSR_P4_DAC_ESCR1: 10, 11
466 */
467
468 P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03),
469 /*
470 * MSR_P4_FSB_ESCR0: 0, 1
471 * MSR_P4_FSB_ESCR1: 2, 3
472 */
473
474 P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03),
475 /*
476 * MSR_P4_FSB_ESCR0: 0, 1
477 * MSR_P4_FSB_ESCR1: 2, 3
478 */
479
480 P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03),
481 /*
482 * MSR_P4_FSB_ESCR0: 0, 1
483 * MSR_P4_FSB_ESCR1: 2, 3
484 */
485
486 P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03),
487 /*
488 * MSR_P4_FSB_ESCR0: 0, 1
489 * MSR_P4_FSB_ESCR1: 2, 3
490 */
491
492 P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05),
493 /*
494 * MSR_P4_CRU_ESCR2: 12, 13, 16
495 * MSR_P4_CRU_ESCR3: 14, 15, 17
496 */
497
498 P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05),
499 /*
500 * MSR_P4_CRU_ESCR2: 12, 13, 16
501 * MSR_P4_CRU_ESCR3: 14, 15, 17
502 */
503
504 P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05),
505 /*
506 * MSR_P4_CRU_ESCR2: 12, 13, 16
507 * MSR_P4_CRU_ESCR3: 14, 15, 17
508 */
509
510 P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04),
511 /*
512 * MSR_P4_CRU_ESCR0: 12, 13, 16
513 * MSR_P4_CRU_ESCR1: 14, 15, 17
514 */
515
516 P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04),
517 /*
518 * MSR_P4_CRU_ESCR0: 12, 13, 16
519 * MSR_P4_CRU_ESCR1: 14, 15, 17
520 */
521
522 P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02),
523 /*
524 * MSR_P4_RAT_ESCR0: 12, 13, 16
525 * MSR_P4_RAT_ESCR1: 14, 15, 17
526 */
527
528 P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05),
529 /*
530 * MSR_P4_CRU_ESCR2: 12, 13, 16
531 * MSR_P4_CRU_ESCR3: 14, 15, 17
532 */
533
534 P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04),
535 /*
536 * MSR_P4_CRU_ESCR0: 12, 13, 16
537 * MSR_P4_CRU_ESCR1: 14, 15, 17
538 */
539
540 P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05),
541 /*
542 * MSR_P4_CRU_ESCR2: 12, 13, 16
543 * MSR_P4_CRU_ESCR3: 14, 15, 17
544 */
545
546 P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05),
547 /*
548 * MSR_P4_CRU_ESCR2: 12, 13, 16
549 * MSR_P4_CRU_ESCR3: 14, 15, 17
550 */
551
552 P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04),
553 /*
554 * MSR_P4_CRU_ESCR0: 12, 13, 16
555 * MSR_P4_CRU_ESCR1: 14, 15, 17
556 */
557};
558
559/*
560 * a caller should use P4_ESCR_EMASK_NAME helper to
561 * pick the EventMask needed, for example
562 *
563 * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
564 */
565enum P4_ESCR_EMASKS {
566 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
567 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1),
568 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2),
569 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3),
570 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4),
571 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5),
572 P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6),
573
574 P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0),
575
576 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0),
577 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1),
578 P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2),
579
580 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2),
581 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3),
582
583 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0),
584 P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1),
585
586 P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1),
587
588 P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1),
589
590 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1),
591 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3),
592 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4),
593 P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5),
594
595 P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0),
596 P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1),
597
598 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0),
599 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1),
600 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2),
601 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3),
602 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4),
603 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5),
604 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8),
605 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9),
606 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10),
607
608 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0),
609 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5),
610 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6),
611 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7),
612 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8),
613 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9),
614 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10),
615 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11),
616 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13),
617 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14),
618 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15),
619
620 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0),
621 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5),
622 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6),
623 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7),
624 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8),
625 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9),
626 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10),
627 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11),
628 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13),
629 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14),
630 P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15),
631
632 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0),
633 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1),
634 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2),
635 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3),
636 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4),
637 P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5),
638
639 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0),
640 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1),
641 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2),
642 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3),
643 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5),
644 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6),
645 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7),
646 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8),
647 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9),
648 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10),
649 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11),
650 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12),
651 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13),
652
653 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0),
654 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1),
655 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2),
656 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3),
657 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5),
658 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6),
659 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7),
660 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8),
661 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9),
662 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10),
663 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11),
664 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12),
665 P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13),
666
667 P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15),
668
669 P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15),
670
671 P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15),
672
673 P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15),
674
675 P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15),
676
677 P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15),
678
679 P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15),
680
681 P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15),
682
683 P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4),
684
685 P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0),
686
687 P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0),
688
689 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0),
690 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1),
691 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2),
692
693 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1),
694 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2),
695 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3),
696 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4),
697
698 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1),
699 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2),
700 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3),
701 P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4),
702
703 P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5),
704
705 P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0),
706 P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1),
707
708 P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0),
709 P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1),
710
711 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0),
712 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1),
713 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2),
714 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3),
715 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4),
716 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5),
717 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6),
718 P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7),
719
720 P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0),
721 P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1),
722
723 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0),
724 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1),
725 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2),
726 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3),
727
728 P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0),
729 P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1),
730
731 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1),
732 P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2),
733
734 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0),
735 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1),
736 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2),
737 P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3),
738
739 P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0),
740
741 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0),
742 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1),
743 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2),
744 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3),
745 P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4),
746
747 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0),
748 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1),
749 P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2),
750
751 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0),
752 P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
753};
754
755/* P4 PEBS: stale for a while */
756#define P4_PEBS_METRIC_MASK 0x00001fffU
757#define P4_PEBS_UOB_TAG 0x01000000U
758#define P4_PEBS_ENABLE 0x02000000U
759
760/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
761#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001
762#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002
763#define P4_PEBS__dtlb_load_miss_retired 0x3000004
764#define P4_PEBS__dtlb_store_miss_retired 0x3000004
765#define P4_PEBS__dtlb_all_miss_retired 0x3000004
766#define P4_PEBS__tagged_mispred_branch 0x3018000
767#define P4_PEBS__mob_load_replay_retired 0x3000200
768#define P4_PEBS__split_load_retired 0x3000400
769#define P4_PEBS__split_store_retired 0x3000400
770
771#define P4_VERT__1stl_cache_load_miss_retired 0x0000001
772#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001
773#define P4_VERT__dtlb_load_miss_retired 0x0000001
774#define P4_VERT__dtlb_store_miss_retired 0x0000002
775#define P4_VERT__dtlb_all_miss_retired 0x0000003
776#define P4_VERT__tagged_mispred_branch 0x0000010
777#define P4_VERT__mob_load_replay_retired 0x0000001
778#define P4_VERT__split_load_retired 0x0000001
779#define P4_VERT__split_store_retired 0x0000002
780
781enum P4_CACHE_EVENTS {
782 P4_CACHE__NONE,
783
784 P4_CACHE__1stl_cache_load_miss_retired,
785 P4_CACHE__2ndl_cache_load_miss_retired,
786 P4_CACHE__dtlb_load_miss_retired,
787 P4_CACHE__dtlb_store_miss_retired,
788 P4_CACHE__itlb_reference_hit,
789 P4_CACHE__itlb_reference_miss,
790
791 P4_CACHE__MAX
792};
793
794#endif /* PERF_EVENT_P4_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b753ea59703a..32428b410b55 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,7 +21,6 @@ struct mm_struct;
21#include <asm/msr.h> 21#include <asm/msr.h>
22#include <asm/desc_defs.h> 22#include <asm/desc_defs.h>
23#include <asm/nops.h> 23#include <asm/nops.h>
24#include <asm/ds.h>
25 24
26#include <linux/personality.h> 25#include <linux/personality.h>
27#include <linux/cpumask.h> 26#include <linux/cpumask.h>
@@ -29,6 +28,7 @@ struct mm_struct;
29#include <linux/threads.h> 28#include <linux/threads.h>
30#include <linux/math64.h> 29#include <linux/math64.h>
31#include <linux/init.h> 30#include <linux/init.h>
31#include <linux/err.h>
32 32
33#define HBP_NUM 4 33#define HBP_NUM 4
34/* 34/*
@@ -473,10 +473,6 @@ struct thread_struct {
473 unsigned long iopl; 473 unsigned long iopl;
474 /* Max allowed port in the bitmap, in bytes: */ 474 /* Max allowed port in the bitmap, in bytes: */
475 unsigned io_bitmap_max; 475 unsigned io_bitmap_max;
476/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
477 unsigned long debugctlmsr;
478 /* Debug Store context; see asm/ds.h */
479 struct ds_context *ds_ctx;
480}; 476};
481 477
482static inline unsigned long native_get_debugreg(int regno) 478static inline unsigned long native_get_debugreg(int regno)
@@ -803,7 +799,7 @@ extern void cpu_init(void);
803 799
804static inline unsigned long get_debugctlmsr(void) 800static inline unsigned long get_debugctlmsr(void)
805{ 801{
806 unsigned long debugctlmsr = 0; 802 unsigned long debugctlmsr = 0;
807 803
808#ifndef CONFIG_X86_DEBUGCTLMSR 804#ifndef CONFIG_X86_DEBUGCTLMSR
809 if (boot_cpu_data.x86 < 6) 805 if (boot_cpu_data.x86 < 6)
@@ -811,21 +807,6 @@ static inline unsigned long get_debugctlmsr(void)
811#endif 807#endif
812 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 808 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
813 809
814 return debugctlmsr;
815}
816
817static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
818{
819 u64 debugctlmsr = 0;
820 u32 val1, val2;
821
822#ifndef CONFIG_X86_DEBUGCTLMSR
823 if (boot_cpu_data.x86 < 6)
824 return 0;
825#endif
826 rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
827 debugctlmsr = val1 | ((u64)val2 << 32);
828
829 return debugctlmsr; 810 return debugctlmsr;
830} 811}
831 812
@@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
838 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); 819 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
839} 820}
840 821
841static inline void update_debugctlmsr_on_cpu(int cpu,
842 unsigned long debugctlmsr)
843{
844#ifndef CONFIG_X86_DEBUGCTLMSR
845 if (boot_cpu_data.x86 < 6)
846 return;
847#endif
848 wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
849 (u32)((u64)debugctlmsr),
850 (u32)((u64)debugctlmsr >> 32));
851}
852
853/* 822/*
854 * from system description table in BIOS. Mostly for MCA use, but 823 * from system description table in BIOS. Mostly for MCA use, but
855 * others may find it useful: 824 * others may find it useful:
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h
index 86723035a515..52b098a6eebb 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/asm/ptrace-abi.h
@@ -82,61 +82,6 @@
82 82
83#ifndef __ASSEMBLY__ 83#ifndef __ASSEMBLY__
84#include <linux/types.h> 84#include <linux/types.h>
85 85#endif
86/* configuration/status structure used in PTRACE_BTS_CONFIG and
87 PTRACE_BTS_STATUS commands.
88*/
89struct ptrace_bts_config {
90 /* requested or actual size of BTS buffer in bytes */
91 __u32 size;
92 /* bitmask of below flags */
93 __u32 flags;
94 /* buffer overflow signal */
95 __u32 signal;
96 /* actual size of bts_struct in bytes */
97 __u32 bts_size;
98};
99#endif /* __ASSEMBLY__ */
100
101#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */
102#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */
103#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow
104 instead of wrapping around */
105#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */
106
107#define PTRACE_BTS_CONFIG 40
108/* Configure branch trace recording.
109 ADDR points to a struct ptrace_bts_config.
110 DATA gives the size of that buffer.
111 A new buffer is allocated, if requested in the flags.
112 An overflow signal may only be requested for new buffers.
113 Returns the number of bytes read.
114*/
115#define PTRACE_BTS_STATUS 41
116/* Return the current configuration in a struct ptrace_bts_config
117 pointed to by ADDR; DATA gives the size of that buffer.
118 Returns the number of bytes written.
119*/
120#define PTRACE_BTS_SIZE 42
121/* Return the number of available BTS records for draining.
122 DATA and ADDR are ignored.
123*/
124#define PTRACE_BTS_GET 43
125/* Get a single BTS record.
126 DATA defines the index into the BTS array, where 0 is the newest
127 entry, and higher indices refer to older entries.
128 ADDR is pointing to struct bts_struct (see asm/ds.h).
129*/
130#define PTRACE_BTS_CLEAR 44
131/* Clear the BTS buffer.
132 DATA and ADDR are ignored.
133*/
134#define PTRACE_BTS_DRAIN 45
135/* Read all available BTS records and clear the buffer.
136 ADDR points to an array of struct bts_struct.
137 DATA gives the size of that buffer.
138 BTS records are read from oldest to newest.
139 Returns number of BTS records drained.
140*/
141 86
142#endif /* _ASM_X86_PTRACE_ABI_H */ 87#endif /* _ASM_X86_PTRACE_ABI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 69a686a7dff0..78cd1ea94500 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
289extern int do_set_thread_area(struct task_struct *p, int idx, 289extern int do_set_thread_area(struct task_struct *p, int idx,
290 struct user_desc __user *info, int can_allocate); 290 struct user_desc __user *info, int can_allocate);
291 291
292#ifdef CONFIG_X86_PTRACE_BTS
293extern void ptrace_bts_untrace(struct task_struct *tsk);
294
295#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
296#endif /* CONFIG_X86_PTRACE_BTS */
297
298#endif /* __KERNEL__ */ 292#endif /* __KERNEL__ */
299 293
300#endif /* !__ASSEMBLY__ */ 294#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0d28901e969..d017ed5502e2 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,8 +92,7 @@ struct thread_info {
92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */ 92#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
93#define TIF_FREEZE 23 /* is freezing for suspend */ 93#define TIF_FREEZE 23 /* is freezing for suspend */
94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 94#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
95#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 95#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
96#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
97#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ 96#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
98#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ 97#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
99 98
@@ -115,8 +114,7 @@ struct thread_info {
115#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) 114#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
116#define _TIF_FREEZE (1 << TIF_FREEZE) 115#define _TIF_FREEZE (1 << TIF_FREEZE)
117#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 116#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
118#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 117#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
119#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
120#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) 118#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
121#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) 119#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
122 120
@@ -147,7 +145,7 @@ struct thread_info {
147 145
148/* flags to check in __switch_to() */ 146/* flags to check in __switch_to() */
149#define _TIF_WORK_CTXSW \ 147#define _TIF_WORK_CTXSW \
150 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) 148 (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
151 149
152#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 150#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
153#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 151#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c58352209e0..e77b22083721 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
47obj-y += process.o 47obj-y += process.o
48obj-y += i387.o xsave.o 48obj-y += i387.o xsave.o
49obj-y += ptrace.o 49obj-y += ptrace.o
50obj-$(CONFIG_X86_DS) += ds.o
51obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
52obj-$(CONFIG_X86_32) += tls.o 50obj-$(CONFIG_X86_32) += tls.o
53obj-$(CONFIG_IA32_EMULATION) += tls.o 51obj-$(CONFIG_IA32_EMULATION) += tls.o
54obj-y += step.o 52obj-y += step.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7e1cca13af35..d72377c41c76 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -12,7 +12,6 @@
12#include <asm/processor.h> 12#include <asm/processor.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable.h>
14#include <asm/msr.h> 14#include <asm/msr.h>
15#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17#include <asm/cpu.h> 16#include <asm/cpu.h>
18 17
@@ -367,7 +366,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
367 set_cpu_cap(c, X86_FEATURE_BTS); 366 set_cpu_cap(c, X86_FEATURE_BTS);
368 if (!(l1 & (1<<12))) 367 if (!(l1 & (1<<12)))
369 set_cpu_cap(c, X86_FEATURE_PEBS); 368 set_cpu_cap(c, X86_FEATURE_PEBS);
370 ds_init_intel(c);
371 } 369 }
372 370
373 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) 371 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index db5bdc8addf8..2ea78abf69d9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,46 +31,51 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33 33
34static u64 perf_event_mask __read_mostly; 34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
35 44
36/* The maximal number of PEBS events: */ 45/*
37#define MAX_PEBS_EVENTS 4 46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0;
54 struct page *page;
55 void *map;
56 int ret;
38 57
39/* The size of a BTS record in bytes: */ 58 do {
40#define BTS_RECORD_SIZE 24 59 ret = __get_user_pages_fast(addr, 1, 0, &page);
60 if (!ret)
61 break;
41 62
42/* The size of a per-cpu BTS buffer in bytes: */ 63 offset = addr & (PAGE_SIZE - 1);
43#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 64 size = min(PAGE_SIZE - offset, n - len);
44 65
45/* The BTS overflow threshold in bytes from the end of the buffer: */ 66 map = kmap_atomic(page, type);
46#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 67 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type);
69 put_page(page);
47 70
71 len += size;
72 to += size;
73 addr += size;
48 74
49/* 75 } while (len < n);
50 * Bits in the debugctlmsr controlling branch tracing.
51 */
52#define X86_DEBUGCTL_TR (1 << 6)
53#define X86_DEBUGCTL_BTS (1 << 7)
54#define X86_DEBUGCTL_BTINT (1 << 8)
55#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
56#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
57 76
58/* 77 return len;
59 * A debug store configuration. 78}
60 *
61 * We only support architectures that use 64bit fields.
62 */
63struct debug_store {
64 u64 bts_buffer_base;
65 u64 bts_index;
66 u64 bts_absolute_maximum;
67 u64 bts_interrupt_threshold;
68 u64 pebs_buffer_base;
69 u64 pebs_index;
70 u64 pebs_absolute_maximum;
71 u64 pebs_interrupt_threshold;
72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
73};
74 79
75struct event_constraint { 80struct event_constraint {
76 union { 81 union {
@@ -89,18 +94,39 @@ struct amd_nb {
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90}; 95};
91 96
97#define MAX_LBR_ENTRIES 16
98
92struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
95 unsigned long interrupts;
96 int enabled; 105 int enabled;
97 struct debug_store *ds;
98 106
99 int n_events; 107 int n_events;
100 int n_added; 108 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX]; 110 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112
113 /*
114 * Intel DebugStore bits
115 */
116 struct debug_store *ds;
117 u64 pebs_enabled;
118
119 /*
120 * Intel LBR bits
121 */
122 int lbr_users;
123 void *lbr_context;
124 struct perf_branch_stack lbr_stack;
125 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
126
127 /*
128 * AMD specific bits
129 */
104 struct amd_nb *amd_nb; 130 struct amd_nb *amd_nb;
105}; 131};
106 132
@@ -114,11 +140,31 @@ struct cpu_hw_events {
114#define EVENT_CONSTRAINT(c, n, m) \ 140#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 141 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116 142
143/*
144 * Constraint on the Event code.
145 */
117#define INTEL_EVENT_CONSTRAINT(c, n) \ 146#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 147 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
119 148
149/*
150 * Constraint on the Event code + UMask + fixed-mask
151 *
152 * filter mask to validate fixed counter events.
153 * the following filters disqualify for fixed counters:
154 * - inv
155 * - edge
156 * - cnt-mask
157 * The other filters are supported by fixed counters.
158 * The any-thread option is supported starting with v3.
159 */
120#define FIXED_EVENT_CONSTRAINT(c, n) \ 160#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 161 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
162
163/*
164 * Constraint on the Event code + UMask
165 */
166#define PEBS_EVENT_CONSTRAINT(c, n) \
167 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
122 168
123#define EVENT_CONSTRAINT_END \ 169#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0) 170 EVENT_CONSTRAINT(0, 0, 0)
@@ -126,32 +172,43 @@ struct cpu_hw_events {
126#define for_each_event_constraint(e, c) \ 172#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++) 173 for ((e) = (c); (e)->cmask; (e)++)
128 174
175union perf_capabilities {
176 struct {
177 u64 lbr_format : 6;
178 u64 pebs_trap : 1;
179 u64 pebs_arch_reg : 1;
180 u64 pebs_format : 4;
181 u64 smm_freeze : 1;
182 };
183 u64 capabilities;
184};
185
129/* 186/*
130 * struct x86_pmu - generic x86 pmu 187 * struct x86_pmu - generic x86 pmu
131 */ 188 */
132struct x86_pmu { 189struct x86_pmu {
190 /*
191 * Generic x86 PMC bits
192 */
133 const char *name; 193 const char *name;
134 int version; 194 int version;
135 int (*handle_irq)(struct pt_regs *); 195 int (*handle_irq)(struct pt_regs *);
136 void (*disable_all)(void); 196 void (*disable_all)(void);
137 void (*enable_all)(void); 197 void (*enable_all)(int added);
138 void (*enable)(struct perf_event *); 198 void (*enable)(struct perf_event *);
139 void (*disable)(struct perf_event *); 199 void (*disable)(struct perf_event *);
200 int (*hw_config)(struct perf_event *event);
201 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
140 unsigned eventsel; 202 unsigned eventsel;
141 unsigned perfctr; 203 unsigned perfctr;
142 u64 (*event_map)(int); 204 u64 (*event_map)(int);
143 u64 (*raw_event)(u64);
144 int max_events; 205 int max_events;
145 int num_events; 206 int num_counters;
146 int num_events_fixed; 207 int num_counters_fixed;
147 int event_bits; 208 int cntval_bits;
148 u64 event_mask; 209 u64 cntval_mask;
149 int apic; 210 int apic;
150 u64 max_period; 211 u64 max_period;
151 u64 intel_ctrl;
152 void (*enable_bts)(u64 config);
153 void (*disable_bts)(void);
154
155 struct event_constraint * 212 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc, 213 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event); 214 struct perf_event *event);
@@ -159,11 +216,32 @@ struct x86_pmu {
159 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 216 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
160 struct perf_event *event); 217 struct perf_event *event);
161 struct event_constraint *event_constraints; 218 struct event_constraint *event_constraints;
219 void (*quirks)(void);
162 220
163 int (*cpu_prepare)(int cpu); 221 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu); 222 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu); 223 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu); 224 void (*cpu_dead)(int cpu);
225
226 /*
227 * Intel Arch Perfmon v2+
228 */
229 u64 intel_ctrl;
230 union perf_capabilities intel_cap;
231
232 /*
233 * Intel DebugStore bits
234 */
235 int bts, pebs;
236 int pebs_record_size;
237 void (*drain_pebs)(struct pt_regs *regs);
238 struct event_constraint *pebs_constraints;
239
240 /*
241 * Intel LBR
242 */
243 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
244 int lbr_nr; /* hardware stack size */
167}; 245};
168 246
169static struct x86_pmu x86_pmu __read_mostly; 247static struct x86_pmu x86_pmu __read_mostly;
@@ -198,7 +276,7 @@ static u64
198x86_perf_event_update(struct perf_event *event) 276x86_perf_event_update(struct perf_event *event)
199{ 277{
200 struct hw_perf_event *hwc = &event->hw; 278 struct hw_perf_event *hwc = &event->hw;
201 int shift = 64 - x86_pmu.event_bits; 279 int shift = 64 - x86_pmu.cntval_bits;
202 u64 prev_raw_count, new_raw_count; 280 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx; 281 int idx = hwc->idx;
204 s64 delta; 282 s64 delta;
@@ -241,33 +319,32 @@ again:
241static atomic_t active_events; 319static atomic_t active_events;
242static DEFINE_MUTEX(pmc_reserve_mutex); 320static DEFINE_MUTEX(pmc_reserve_mutex);
243 321
322#ifdef CONFIG_X86_LOCAL_APIC
323
244static bool reserve_pmc_hardware(void) 324static bool reserve_pmc_hardware(void)
245{ 325{
246#ifdef CONFIG_X86_LOCAL_APIC
247 int i; 326 int i;
248 327
249 if (nmi_watchdog == NMI_LOCAL_APIC) 328 if (nmi_watchdog == NMI_LOCAL_APIC)
250 disable_lapic_nmi_watchdog(); 329 disable_lapic_nmi_watchdog();
251 330
252 for (i = 0; i < x86_pmu.num_events; i++) { 331 for (i = 0; i < x86_pmu.num_counters; i++) {
253 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 332 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
254 goto perfctr_fail; 333 goto perfctr_fail;
255 } 334 }
256 335
257 for (i = 0; i < x86_pmu.num_events; i++) { 336 for (i = 0; i < x86_pmu.num_counters; i++) {
258 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 337 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
259 goto eventsel_fail; 338 goto eventsel_fail;
260 } 339 }
261#endif
262 340
263 return true; 341 return true;
264 342
265#ifdef CONFIG_X86_LOCAL_APIC
266eventsel_fail: 343eventsel_fail:
267 for (i--; i >= 0; i--) 344 for (i--; i >= 0; i--)
268 release_evntsel_nmi(x86_pmu.eventsel + i); 345 release_evntsel_nmi(x86_pmu.eventsel + i);
269 346
270 i = x86_pmu.num_events; 347 i = x86_pmu.num_counters;
271 348
272perfctr_fail: 349perfctr_fail:
273 for (i--; i >= 0; i--) 350 for (i--; i >= 0; i--)
@@ -277,128 +354,36 @@ perfctr_fail:
277 enable_lapic_nmi_watchdog(); 354 enable_lapic_nmi_watchdog();
278 355
279 return false; 356 return false;
280#endif
281} 357}
282 358
283static void release_pmc_hardware(void) 359static void release_pmc_hardware(void)
284{ 360{
285#ifdef CONFIG_X86_LOCAL_APIC
286 int i; 361 int i;
287 362
288 for (i = 0; i < x86_pmu.num_events; i++) { 363 for (i = 0; i < x86_pmu.num_counters; i++) {
289 release_perfctr_nmi(x86_pmu.perfctr + i); 364 release_perfctr_nmi(x86_pmu.perfctr + i);
290 release_evntsel_nmi(x86_pmu.eventsel + i); 365 release_evntsel_nmi(x86_pmu.eventsel + i);
291 } 366 }
292 367
293 if (nmi_watchdog == NMI_LOCAL_APIC) 368 if (nmi_watchdog == NMI_LOCAL_APIC)
294 enable_lapic_nmi_watchdog(); 369 enable_lapic_nmi_watchdog();
295#endif
296}
297
298static inline bool bts_available(void)
299{
300 return x86_pmu.enable_bts != NULL;
301}
302
303static void init_debug_store_on_cpu(int cpu)
304{
305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306
307 if (!ds)
308 return;
309
310 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
311 (u32)((u64)(unsigned long)ds),
312 (u32)((u64)(unsigned long)ds >> 32));
313}
314
315static void fini_debug_store_on_cpu(int cpu)
316{
317 if (!per_cpu(cpu_hw_events, cpu).ds)
318 return;
319
320 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
321}
322
323static void release_bts_hardware(void)
324{
325 int cpu;
326
327 if (!bts_available())
328 return;
329
330 get_online_cpus();
331
332 for_each_online_cpu(cpu)
333 fini_debug_store_on_cpu(cpu);
334
335 for_each_possible_cpu(cpu) {
336 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337
338 if (!ds)
339 continue;
340
341 per_cpu(cpu_hw_events, cpu).ds = NULL;
342
343 kfree((void *)(unsigned long)ds->bts_buffer_base);
344 kfree(ds);
345 }
346
347 put_online_cpus();
348} 370}
349 371
350static int reserve_bts_hardware(void) 372#else
351{
352 int cpu, err = 0;
353
354 if (!bts_available())
355 return 0;
356
357 get_online_cpus();
358
359 for_each_possible_cpu(cpu) {
360 struct debug_store *ds;
361 void *buffer;
362
363 err = -ENOMEM;
364 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
365 if (unlikely(!buffer))
366 break;
367
368 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
369 if (unlikely(!ds)) {
370 kfree(buffer);
371 break;
372 }
373
374 ds->bts_buffer_base = (u64)(unsigned long)buffer;
375 ds->bts_index = ds->bts_buffer_base;
376 ds->bts_absolute_maximum =
377 ds->bts_buffer_base + BTS_BUFFER_SIZE;
378 ds->bts_interrupt_threshold =
379 ds->bts_absolute_maximum - BTS_OVFL_TH;
380 373
381 per_cpu(cpu_hw_events, cpu).ds = ds; 374static bool reserve_pmc_hardware(void) { return true; }
382 err = 0; 375static void release_pmc_hardware(void) {}
383 }
384 376
385 if (err) 377#endif
386 release_bts_hardware();
387 else {
388 for_each_online_cpu(cpu)
389 init_debug_store_on_cpu(cpu);
390 }
391
392 put_online_cpus();
393 378
394 return err; 379static int reserve_ds_buffers(void);
395} 380static void release_ds_buffers(void);
396 381
397static void hw_perf_event_destroy(struct perf_event *event) 382static void hw_perf_event_destroy(struct perf_event *event)
398{ 383{
399 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 384 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
400 release_pmc_hardware(); 385 release_pmc_hardware();
401 release_bts_hardware(); 386 release_ds_buffers();
402 mutex_unlock(&pmc_reserve_mutex); 387 mutex_unlock(&pmc_reserve_mutex);
403 } 388 }
404} 389}
@@ -441,6 +426,28 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
441 return 0; 426 return 0;
442} 427}
443 428
429static int x86_pmu_hw_config(struct perf_event *event)
430{
431 /*
432 * Generate PMC IRQs:
433 * (keep 'enabled' bit clear for now)
434 */
435 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
436
437 /*
438 * Count user and OS events unless requested not to
439 */
440 if (!event->attr.exclude_user)
441 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
442 if (!event->attr.exclude_kernel)
443 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
444
445 if (event->attr.type == PERF_TYPE_RAW)
446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
447
448 return 0;
449}
450
444/* 451/*
445 * Setup the hardware configuration for a given attr_type 452 * Setup the hardware configuration for a given attr_type
446 */ 453 */
@@ -460,8 +467,11 @@ static int __hw_perf_event_init(struct perf_event *event)
460 if (atomic_read(&active_events) == 0) { 467 if (atomic_read(&active_events) == 0) {
461 if (!reserve_pmc_hardware()) 468 if (!reserve_pmc_hardware())
462 err = -EBUSY; 469 err = -EBUSY;
463 else 470 else {
464 err = reserve_bts_hardware(); 471 err = reserve_ds_buffers();
472 if (err)
473 release_pmc_hardware();
474 }
465 } 475 }
466 if (!err) 476 if (!err)
467 atomic_inc(&active_events); 477 atomic_inc(&active_events);
@@ -472,23 +482,14 @@ static int __hw_perf_event_init(struct perf_event *event)
472 482
473 event->destroy = hw_perf_event_destroy; 483 event->destroy = hw_perf_event_destroy;
474 484
475 /*
476 * Generate PMC IRQs:
477 * (keep 'enabled' bit clear for now)
478 */
479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
480
481 hwc->idx = -1; 485 hwc->idx = -1;
482 hwc->last_cpu = -1; 486 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL; 487 hwc->last_tag = ~0ULL;
484 488
485 /* 489 /* Processor specifics */
486 * Count user and OS events unless requested not to. 490 err = x86_pmu.hw_config(event);
487 */ 491 if (err)
488 if (!attr->exclude_user) 492 return err;
489 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
490 if (!attr->exclude_kernel)
491 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
492 493
493 if (!hwc->sample_period) { 494 if (!hwc->sample_period) {
494 hwc->sample_period = x86_pmu.max_period; 495 hwc->sample_period = x86_pmu.max_period;
@@ -505,16 +506,8 @@ static int __hw_perf_event_init(struct perf_event *event)
505 return -EOPNOTSUPP; 506 return -EOPNOTSUPP;
506 } 507 }
507 508
508 /* 509 if (attr->type == PERF_TYPE_RAW)
509 * Raw hw_event type provide the config in the hw_event structure
510 */
511 if (attr->type == PERF_TYPE_RAW) {
512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
516 return 0; 510 return 0;
517 }
518 511
519 if (attr->type == PERF_TYPE_HW_CACHE) 512 if (attr->type == PERF_TYPE_HW_CACHE)
520 return set_ext_hw_attr(hwc, attr); 513 return set_ext_hw_attr(hwc, attr);
@@ -539,11 +532,11 @@ static int __hw_perf_event_init(struct perf_event *event)
539 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 532 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
540 (hwc->sample_period == 1)) { 533 (hwc->sample_period == 1)) {
541 /* BTS is not supported by this architecture. */ 534 /* BTS is not supported by this architecture. */
542 if (!bts_available()) 535 if (!x86_pmu.bts)
543 return -EOPNOTSUPP; 536 return -EOPNOTSUPP;
544 537
545 /* BTS is currently only allowed for user-mode. */ 538 /* BTS is currently only allowed for user-mode. */
546 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 539 if (!attr->exclude_kernel)
547 return -EOPNOTSUPP; 540 return -EOPNOTSUPP;
548 } 541 }
549 542
@@ -557,7 +550,7 @@ static void x86_pmu_disable_all(void)
557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 550 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
558 int idx; 551 int idx;
559 552
560 for (idx = 0; idx < x86_pmu.num_events; idx++) { 553 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
561 u64 val; 554 u64 val;
562 555
563 if (!test_bit(idx, cpuc->active_mask)) 556 if (!test_bit(idx, cpuc->active_mask))
@@ -587,12 +580,12 @@ void hw_perf_disable(void)
587 x86_pmu.disable_all(); 580 x86_pmu.disable_all();
588} 581}
589 582
590static void x86_pmu_enable_all(void) 583static void x86_pmu_enable_all(int added)
591{ 584{
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 585 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593 int idx; 586 int idx;
594 587
595 for (idx = 0; idx < x86_pmu.num_events; idx++) { 588 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
596 struct perf_event *event = cpuc->events[idx]; 589 struct perf_event *event = cpuc->events[idx];
597 u64 val; 590 u64 val;
598 591
@@ -667,14 +660,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
667 * assign events to counters starting with most 660 * assign events to counters starting with most
668 * constrained events. 661 * constrained events.
669 */ 662 */
670 wmax = x86_pmu.num_events; 663 wmax = x86_pmu.num_counters;
671 664
672 /* 665 /*
673 * when fixed event counters are present, 666 * when fixed event counters are present,
674 * wmax is incremented by 1 to account 667 * wmax is incremented by 1 to account
675 * for one more choice 668 * for one more choice
676 */ 669 */
677 if (x86_pmu.num_events_fixed) 670 if (x86_pmu.num_counters_fixed)
678 wmax++; 671 wmax++;
679 672
680 for (w = 1, num = n; num && w <= wmax; w++) { 673 for (w = 1, num = n; num && w <= wmax; w++) {
@@ -724,7 +717,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
724 struct perf_event *event; 717 struct perf_event *event;
725 int n, max_count; 718 int n, max_count;
726 719
727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; 720 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
728 721
729 /* current number of events already accepted */ 722 /* current number of events already accepted */
730 n = cpuc->n_events; 723 n = cpuc->n_events;
@@ -795,7 +788,7 @@ void hw_perf_enable(void)
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 788 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct perf_event *event; 789 struct perf_event *event;
797 struct hw_perf_event *hwc; 790 struct hw_perf_event *hwc;
798 int i; 791 int i, added = cpuc->n_added;
799 792
800 if (!x86_pmu_initialized()) 793 if (!x86_pmu_initialized())
801 return; 794 return;
@@ -847,19 +840,20 @@ void hw_perf_enable(void)
847 cpuc->enabled = 1; 840 cpuc->enabled = 1;
848 barrier(); 841 barrier();
849 842
850 x86_pmu.enable_all(); 843 x86_pmu.enable_all(added);
851} 844}
852 845
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 846static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
854{ 847{
855 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 848 wrmsrl(hwc->config_base + hwc->idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); 849 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 850}
858 851
859static inline void x86_pmu_disable_event(struct perf_event *event) 852static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 853{
861 struct hw_perf_event *hwc = &event->hw; 854 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 855
856 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
863} 857}
864 858
865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 859static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -874,7 +868,7 @@ x86_perf_event_set_period(struct perf_event *event)
874 struct hw_perf_event *hwc = &event->hw; 868 struct hw_perf_event *hwc = &event->hw;
875 s64 left = atomic64_read(&hwc->period_left); 869 s64 left = atomic64_read(&hwc->period_left);
876 s64 period = hwc->sample_period; 870 s64 period = hwc->sample_period;
877 int err, ret = 0, idx = hwc->idx; 871 int ret = 0, idx = hwc->idx;
878 872
879 if (idx == X86_PMC_IDX_FIXED_BTS) 873 if (idx == X86_PMC_IDX_FIXED_BTS)
880 return 0; 874 return 0;
@@ -912,8 +906,8 @@ x86_perf_event_set_period(struct perf_event *event)
912 */ 906 */
913 atomic64_set(&hwc->prev_count, (u64)-left); 907 atomic64_set(&hwc->prev_count, (u64)-left);
914 908
915 err = checking_wrmsrl(hwc->event_base + idx, 909 wrmsrl(hwc->event_base + idx,
916 (u64)(-left) & x86_pmu.event_mask); 910 (u64)(-left) & x86_pmu.cntval_mask);
917 911
918 perf_event_update_userpage(event); 912 perf_event_update_userpage(event);
919 913
@@ -950,7 +944,7 @@ static int x86_pmu_enable(struct perf_event *event)
950 if (n < 0) 944 if (n < 0)
951 return n; 945 return n;
952 946
953 ret = x86_schedule_events(cpuc, n, assign); 947 ret = x86_pmu.schedule_events(cpuc, n, assign);
954 if (ret) 948 if (ret)
955 return ret; 949 return ret;
956 /* 950 /*
@@ -991,11 +985,12 @@ static void x86_pmu_unthrottle(struct perf_event *event)
991void perf_event_print_debug(void) 985void perf_event_print_debug(void)
992{ 986{
993 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 987 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
988 u64 pebs;
994 struct cpu_hw_events *cpuc; 989 struct cpu_hw_events *cpuc;
995 unsigned long flags; 990 unsigned long flags;
996 int cpu, idx; 991 int cpu, idx;
997 992
998 if (!x86_pmu.num_events) 993 if (!x86_pmu.num_counters)
999 return; 994 return;
1000 995
1001 local_irq_save(flags); 996 local_irq_save(flags);
@@ -1008,16 +1003,18 @@ void perf_event_print_debug(void)
1008 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1003 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1009 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 1004 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1010 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 1005 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1006 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1011 1007
1012 pr_info("\n"); 1008 pr_info("\n");
1013 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 1009 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1014 pr_info("CPU#%d: status: %016llx\n", cpu, status); 1010 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1011 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1012 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1013 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1017 } 1014 }
1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1015 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1019 1016
1020 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1017 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1018 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1022 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1019 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1023 1020
@@ -1030,7 +1027,7 @@ void perf_event_print_debug(void)
1030 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1027 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1031 cpu, idx, prev_left); 1028 cpu, idx, prev_left);
1032 } 1029 }
1033 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 1030 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1034 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1031 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1035 1032
1036 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1033 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1095,7 +1092,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1095 1092
1096 cpuc = &__get_cpu_var(cpu_hw_events); 1093 cpuc = &__get_cpu_var(cpu_hw_events);
1097 1094
1098 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1095 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1099 if (!test_bit(idx, cpuc->active_mask)) 1096 if (!test_bit(idx, cpuc->active_mask))
1100 continue; 1097 continue;
1101 1098
@@ -1103,7 +1100,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1103 hwc = &event->hw; 1100 hwc = &event->hw;
1104 1101
1105 val = x86_perf_event_update(event); 1102 val = x86_perf_event_update(event);
1106 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1103 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1107 continue; 1104 continue;
1108 1105
1109 /* 1106 /*
@@ -1146,7 +1143,6 @@ void set_perf_event_pending(void)
1146 1143
1147void perf_events_lapic_init(void) 1144void perf_events_lapic_init(void)
1148{ 1145{
1149#ifdef CONFIG_X86_LOCAL_APIC
1150 if (!x86_pmu.apic || !x86_pmu_initialized()) 1146 if (!x86_pmu.apic || !x86_pmu_initialized())
1151 return; 1147 return;
1152 1148
@@ -1154,7 +1150,6 @@ void perf_events_lapic_init(void)
1154 * Always use NMI for PMU 1150 * Always use NMI for PMU
1155 */ 1151 */
1156 apic_write(APIC_LVTPC, APIC_DM_NMI); 1152 apic_write(APIC_LVTPC, APIC_DM_NMI);
1157#endif
1158} 1153}
1159 1154
1160static int __kprobes 1155static int __kprobes
@@ -1178,9 +1173,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1178 1173
1179 regs = args->regs; 1174 regs = args->regs;
1180 1175
1181#ifdef CONFIG_X86_LOCAL_APIC
1182 apic_write(APIC_LVTPC, APIC_DM_NMI); 1176 apic_write(APIC_LVTPC, APIC_DM_NMI);
1183#endif
1184 /* 1177 /*
1185 * Can't rely on the handled return value to say it was our NMI, two 1178 * Can't rely on the handled return value to say it was our NMI, two
1186 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1179 * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1274,12 +1267,15 @@ int hw_perf_group_sched_in(struct perf_event *leader,
1274 int assign[X86_PMC_IDX_MAX]; 1267 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret; 1268 int n0, n1, ret;
1276 1269
1270 if (!x86_pmu_initialized())
1271 return 0;
1272
1277 /* n0 = total number of events */ 1273 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true); 1274 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0) 1275 if (n0 < 0)
1280 return n0; 1276 return n0;
1281 1277
1282 ret = x86_schedule_events(cpuc, n0, assign); 1278 ret = x86_pmu.schedule_events(cpuc, n0, assign);
1283 if (ret) 1279 if (ret)
1284 return ret; 1280 return ret;
1285 1281
@@ -1329,6 +1325,9 @@ undo:
1329 1325
1330#include "perf_event_amd.c" 1326#include "perf_event_amd.c"
1331#include "perf_event_p6.c" 1327#include "perf_event_p6.c"
1328#include "perf_event_p4.c"
1329#include "perf_event_intel_lbr.c"
1330#include "perf_event_intel_ds.c"
1332#include "perf_event_intel.c" 1331#include "perf_event_intel.c"
1333 1332
1334static int __cpuinit 1333static int __cpuinit
@@ -1402,48 +1401,50 @@ void __init init_hw_perf_events(void)
1402 1401
1403 pr_cont("%s PMU driver.\n", x86_pmu.name); 1402 pr_cont("%s PMU driver.\n", x86_pmu.name);
1404 1403
1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1404 if (x86_pmu.quirks)
1405 x86_pmu.quirks();
1406
1407 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1406 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1408 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1407 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1409 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1408 x86_pmu.num_events = X86_PMC_MAX_GENERIC; 1410 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1409 } 1411 }
1410 perf_event_mask = (1 << x86_pmu.num_events) - 1; 1412 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1411 perf_max_events = x86_pmu.num_events; 1413 perf_max_events = x86_pmu.num_counters;
1412 1414
1413 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { 1415 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1414 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1416 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1415 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); 1417 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1416 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; 1418 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1417 } 1419 }
1418 1420
1419 perf_event_mask |= 1421 x86_pmu.intel_ctrl |=
1420 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; 1422 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1421 x86_pmu.intel_ctrl = perf_event_mask;
1422 1423
1423 perf_events_lapic_init(); 1424 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier); 1425 register_die_notifier(&perf_event_nmi_notifier);
1425 1426
1426 unconstrained = (struct event_constraint) 1427 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1428 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_events); 1429 0, x86_pmu.num_counters);
1429 1430
1430 if (x86_pmu.event_constraints) { 1431 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) { 1432 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK) 1433 if (c->cmask != X86_RAW_EVENT_MASK)
1433 continue; 1434 continue;
1434 1435
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; 1436 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_events; 1437 c->weight += x86_pmu.num_counters;
1437 } 1438 }
1438 } 1439 }
1439 1440
1440 pr_info("... version: %d\n", x86_pmu.version); 1441 pr_info("... version: %d\n", x86_pmu.version);
1441 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1442 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1443 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 1444 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1445 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1446 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1446 pr_info("... event mask: %016Lx\n", perf_event_mask); 1447 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1447 1448
1448 perf_cpu_notifier(x86_pmu_notifier); 1449 perf_cpu_notifier(x86_pmu_notifier);
1449} 1450}
@@ -1463,6 +1464,32 @@ static const struct pmu pmu = {
1463}; 1464};
1464 1465
1465/* 1466/*
1467 * validate that we can schedule this event
1468 */
1469static int validate_event(struct perf_event *event)
1470{
1471 struct cpu_hw_events *fake_cpuc;
1472 struct event_constraint *c;
1473 int ret = 0;
1474
1475 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1476 if (!fake_cpuc)
1477 return -ENOMEM;
1478
1479 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1480
1481 if (!c || !c->weight)
1482 ret = -ENOSPC;
1483
1484 if (x86_pmu.put_event_constraints)
1485 x86_pmu.put_event_constraints(fake_cpuc, event);
1486
1487 kfree(fake_cpuc);
1488
1489 return ret;
1490}
1491
1492/*
1466 * validate a single event group 1493 * validate a single event group
1467 * 1494 *
1468 * validation include: 1495 * validation include:
@@ -1502,7 +1529,7 @@ static int validate_group(struct perf_event *event)
1502 1529
1503 fake_cpuc->n_events = n; 1530 fake_cpuc->n_events = n;
1504 1531
1505 ret = x86_schedule_events(fake_cpuc, n, NULL); 1532 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1506 1533
1507out_free: 1534out_free:
1508 kfree(fake_cpuc); 1535 kfree(fake_cpuc);
@@ -1527,6 +1554,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1527 1554
1528 if (event->group_leader != event) 1555 if (event->group_leader != event)
1529 err = validate_group(event); 1556 err = validate_group(event);
1557 else
1558 err = validate_event(event);
1530 1559
1531 event->pmu = tmp; 1560 event->pmu = tmp;
1532 } 1561 }
@@ -1574,8 +1603,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1574{ 1603{
1575 struct perf_callchain_entry *entry = data; 1604 struct perf_callchain_entry *entry = data;
1576 1605
1577 if (reliable) 1606 callchain_store(entry, addr);
1578 callchain_store(entry, addr);
1579} 1607}
1580 1608
1581static const struct stacktrace_ops backtrace_ops = { 1609static const struct stacktrace_ops backtrace_ops = {
@@ -1597,41 +1625,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1625 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1598} 1626}
1599 1627
1600/*
1601 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1602 */
1603static unsigned long
1604copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1605{
1606 unsigned long offset, addr = (unsigned long)from;
1607 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1608 unsigned long size, len = 0;
1609 struct page *page;
1610 void *map;
1611 int ret;
1612
1613 do {
1614 ret = __get_user_pages_fast(addr, 1, 0, &page);
1615 if (!ret)
1616 break;
1617
1618 offset = addr & (PAGE_SIZE - 1);
1619 size = min(PAGE_SIZE - offset, n - len);
1620
1621 map = kmap_atomic(page, type);
1622 memcpy(to, map+offset, size);
1623 kunmap_atomic(map, type);
1624 put_page(page);
1625
1626 len += size;
1627 to += size;
1628 addr += size;
1629
1630 } while (len < n);
1631
1632 return len;
1633}
1634
1635#ifdef CONFIG_COMPAT 1628#ifdef CONFIG_COMPAT
1636static inline int 1629static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1630perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1727,6 +1720,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1727{ 1720{
1728 struct perf_callchain_entry *entry; 1721 struct perf_callchain_entry *entry;
1729 1722
1723 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1724 /* TODO: We don't support guest os callchain now */
1725 return NULL;
1726 }
1727
1730 if (in_nmi()) 1728 if (in_nmi())
1731 entry = &__get_cpu_var(pmc_nmi_entry); 1729 entry = &__get_cpu_var(pmc_nmi_entry);
1732 else 1730 else
@@ -1750,3 +1748,29 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1750 regs->cs = __KERNEL_CS; 1748 regs->cs = __KERNEL_CS;
1751 local_save_flags(regs->flags); 1749 local_save_flags(regs->flags);
1752} 1750}
1751
1752unsigned long perf_instruction_pointer(struct pt_regs *regs)
1753{
1754 unsigned long ip;
1755 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1756 ip = perf_guest_cbs->get_guest_ip();
1757 else
1758 ip = instruction_pointer(regs);
1759 return ip;
1760}
1761
1762unsigned long perf_misc_flags(struct pt_regs *regs)
1763{
1764 int misc = 0;
1765 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1766 misc |= perf_guest_cbs->is_user_mode() ?
1767 PERF_RECORD_MISC_GUEST_USER :
1768 PERF_RECORD_MISC_GUEST_KERNEL;
1769 } else
1770 misc |= user_mode(regs) ? PERF_RECORD_MISC_USER :
1771 PERF_RECORD_MISC_KERNEL;
1772 if (regs->flags & PERF_EFLAGS_EXACT)
1773 misc |= PERF_RECORD_MISC_EXACT;
1774
1775 return misc;
1776}
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index db6f7d4056e1..611df11ba15e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -2,7 +2,7 @@
2 2
3static DEFINE_RAW_SPINLOCK(amd_nb_lock); 3static DEFINE_RAW_SPINLOCK(amd_nb_lock);
4 4
5static __initconst u64 amd_hw_cache_event_ids 5static __initconst const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX] 6 [PERF_COUNT_HW_CACHE_MAX]
7 [PERF_COUNT_HW_CACHE_OP_MAX] 7 [PERF_COUNT_HW_CACHE_OP_MAX]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 8 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event)
111 return amd_perfmon_event_map[hw_event]; 111 return amd_perfmon_event_map[hw_event];
112} 112}
113 113
114static u64 amd_pmu_raw_event(u64 hw_event) 114static int amd_pmu_hw_config(struct perf_event *event)
115{ 115{
116#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL 116 int ret = x86_pmu_hw_config(event);
117#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL 117
118#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL 118 if (ret)
119#define K7_EVNTSEL_INV_MASK 0x000800000ULL 119 return ret;
120#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL 120
121 121 if (event->attr.type != PERF_TYPE_RAW)
122#define K7_EVNTSEL_MASK \ 122 return 0;
123 (K7_EVNTSEL_EVENT_MASK | \ 123
124 K7_EVNTSEL_UNIT_MASK | \ 124 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
125 K7_EVNTSEL_EDGE_MASK | \ 125
126 K7_EVNTSEL_INV_MASK | \ 126 return 0;
127 K7_EVNTSEL_REG_MASK)
128
129 return hw_event & K7_EVNTSEL_MASK;
130} 127}
131 128
132/* 129/*
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
165 * be removed on one CPU at a time AND PMU is disabled 162 * be removed on one CPU at a time AND PMU is disabled
166 * when we come here 163 * when we come here
167 */ 164 */
168 for (i = 0; i < x86_pmu.num_events; i++) { 165 for (i = 0; i < x86_pmu.num_counters; i++) {
169 if (nb->owners[i] == event) { 166 if (nb->owners[i] == event) {
170 cmpxchg(nb->owners+i, event, NULL); 167 cmpxchg(nb->owners+i, event, NULL);
171 break; 168 break;
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
215 struct hw_perf_event *hwc = &event->hw; 212 struct hw_perf_event *hwc = &event->hw;
216 struct amd_nb *nb = cpuc->amd_nb; 213 struct amd_nb *nb = cpuc->amd_nb;
217 struct perf_event *old = NULL; 214 struct perf_event *old = NULL;
218 int max = x86_pmu.num_events; 215 int max = x86_pmu.num_counters;
219 int i, j, k = -1; 216 int i, j, k = -1;
220 217
221 /* 218 /*
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
293 /* 290 /*
294 * initialize all possible NB constraints 291 * initialize all possible NB constraints
295 */ 292 */
296 for (i = 0; i < x86_pmu.num_events; i++) { 293 for (i = 0; i < x86_pmu.num_counters; i++) {
297 __set_bit(i, nb->event_constraints[i].idxmsk); 294 __set_bit(i, nb->event_constraints[i].idxmsk);
298 nb->event_constraints[i].weight = 1; 295 nb->event_constraints[i].weight = 1;
299 } 296 }
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu)
371 raw_spin_unlock(&amd_nb_lock); 368 raw_spin_unlock(&amd_nb_lock);
372} 369}
373 370
374static __initconst struct x86_pmu amd_pmu = { 371static __initconst const struct x86_pmu amd_pmu = {
375 .name = "AMD", 372 .name = "AMD",
376 .handle_irq = x86_pmu_handle_irq, 373 .handle_irq = x86_pmu_handle_irq,
377 .disable_all = x86_pmu_disable_all, 374 .disable_all = x86_pmu_disable_all,
378 .enable_all = x86_pmu_enable_all, 375 .enable_all = x86_pmu_enable_all,
379 .enable = x86_pmu_enable_event, 376 .enable = x86_pmu_enable_event,
380 .disable = x86_pmu_disable_event, 377 .disable = x86_pmu_disable_event,
378 .hw_config = amd_pmu_hw_config,
379 .schedule_events = x86_schedule_events,
381 .eventsel = MSR_K7_EVNTSEL0, 380 .eventsel = MSR_K7_EVNTSEL0,
382 .perfctr = MSR_K7_PERFCTR0, 381 .perfctr = MSR_K7_PERFCTR0,
383 .event_map = amd_pmu_event_map, 382 .event_map = amd_pmu_event_map,
384 .raw_event = amd_pmu_raw_event,
385 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 383 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
386 .num_events = 4, 384 .num_counters = 4,
387 .event_bits = 48, 385 .cntval_bits = 48,
388 .event_mask = (1ULL << 48) - 1, 386 .cntval_mask = (1ULL << 48) - 1,
389 .apic = 1, 387 .apic = 1,
390 /* use highest bit to detect overflow */ 388 /* use highest bit to detect overflow */
391 .max_period = (1ULL << 47) - 1, 389 .max_period = (1ULL << 47) - 1,
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9c794ac87837..a099df96f916 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event)
88 return intel_perfmon_event_map[hw_event]; 88 return intel_perfmon_event_map[hw_event];
89} 89}
90 90
91static __initconst u64 westmere_hw_cache_event_ids 91static __initconst const u64 westmere_hw_cache_event_ids
92 [PERF_COUNT_HW_CACHE_MAX] 92 [PERF_COUNT_HW_CACHE_MAX]
93 [PERF_COUNT_HW_CACHE_OP_MAX] 93 [PERF_COUNT_HW_CACHE_OP_MAX]
94 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 94 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids
179 }, 179 },
180}; 180};
181 181
182static __initconst u64 nehalem_hw_cache_event_ids 182static __initconst const u64 nehalem_hw_cache_event_ids
183 [PERF_COUNT_HW_CACHE_MAX] 183 [PERF_COUNT_HW_CACHE_MAX]
184 [PERF_COUNT_HW_CACHE_OP_MAX] 184 [PERF_COUNT_HW_CACHE_OP_MAX]
185 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 185 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids
270 }, 270 },
271}; 271};
272 272
273static __initconst u64 core2_hw_cache_event_ids 273static __initconst const u64 core2_hw_cache_event_ids
274 [PERF_COUNT_HW_CACHE_MAX] 274 [PERF_COUNT_HW_CACHE_MAX]
275 [PERF_COUNT_HW_CACHE_OP_MAX] 275 [PERF_COUNT_HW_CACHE_OP_MAX]
276 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 276 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids
361 }, 361 },
362}; 362};
363 363
364static __initconst u64 atom_hw_cache_event_ids 364static __initconst const u64 atom_hw_cache_event_ids
365 [PERF_COUNT_HW_CACHE_MAX] 365 [PERF_COUNT_HW_CACHE_MAX]
366 [PERF_COUNT_HW_CACHE_OP_MAX] 366 [PERF_COUNT_HW_CACHE_OP_MAX]
367 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 367 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids
452 }, 452 },
453}; 453};
454 454
455static u64 intel_pmu_raw_event(u64 hw_event)
456{
457#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
458#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
459#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
460#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
461#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
462
463#define CORE_EVNTSEL_MASK \
464 (INTEL_ARCH_EVTSEL_MASK | \
465 INTEL_ARCH_UNIT_MASK | \
466 INTEL_ARCH_EDGE_MASK | \
467 INTEL_ARCH_INV_MASK | \
468 INTEL_ARCH_CNT_MASK)
469
470 return hw_event & CORE_EVNTSEL_MASK;
471}
472
473static void intel_pmu_enable_bts(u64 config)
474{
475 unsigned long debugctlmsr;
476
477 debugctlmsr = get_debugctlmsr();
478
479 debugctlmsr |= X86_DEBUGCTL_TR;
480 debugctlmsr |= X86_DEBUGCTL_BTS;
481 debugctlmsr |= X86_DEBUGCTL_BTINT;
482
483 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
484 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
485
486 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
487 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
488
489 update_debugctlmsr(debugctlmsr);
490}
491
492static void intel_pmu_disable_bts(void)
493{
494 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
495 unsigned long debugctlmsr;
496
497 if (!cpuc->ds)
498 return;
499
500 debugctlmsr = get_debugctlmsr();
501
502 debugctlmsr &=
503 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
504 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
505
506 update_debugctlmsr(debugctlmsr);
507}
508
509static void intel_pmu_disable_all(void) 455static void intel_pmu_disable_all(void)
510{ 456{
511 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void)
514 460
515 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) 461 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
516 intel_pmu_disable_bts(); 462 intel_pmu_disable_bts();
463
464 intel_pmu_pebs_disable_all();
465 intel_pmu_lbr_disable_all();
517} 466}
518 467
519static void intel_pmu_enable_all(void) 468static void intel_pmu_enable_all(int added)
520{ 469{
521 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 470 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
522 471
472 intel_pmu_pebs_enable_all();
473 intel_pmu_lbr_enable_all();
523 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 474 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
524 475
525 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { 476 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -533,6 +484,41 @@ static void intel_pmu_enable_all(void)
533 } 484 }
534} 485}
535 486
487/*
488 * Workaround for:
489 * Intel Errata AAK100 (model 26)
490 * Intel Errata AAP53 (model 30)
491 * Intel Errata BD53 (model 44)
492 *
493 * These chips need to be 'reset' when adding counters by programming
494 * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
495 * either in sequence on the same PMC or on different PMCs.
496 */
497static void intel_pmu_nhm_enable_all(int added)
498{
499 if (added) {
500 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
501 int i;
502
503 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
504 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
505 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
506
507 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
508 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
509
510 for (i = 0; i < 3; i++) {
511 struct perf_event *event = cpuc->events[i];
512
513 if (!event)
514 continue;
515
516 __x86_pmu_enable_event(&event->hw);
517 }
518 }
519 intel_pmu_enable_all(added);
520}
521
536static inline u64 intel_pmu_get_status(void) 522static inline u64 intel_pmu_get_status(void)
537{ 523{
538 u64 status; 524 u64 status;
@@ -547,8 +533,7 @@ static inline void intel_pmu_ack_status(u64 ack)
547 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); 533 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
548} 534}
549 535
550static inline void 536static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
551intel_pmu_disable_fixed(struct hw_perf_event *hwc)
552{ 537{
553 int idx = hwc->idx - X86_PMC_IDX_FIXED; 538 int idx = hwc->idx - X86_PMC_IDX_FIXED;
554 u64 ctrl_val, mask; 539 u64 ctrl_val, mask;
@@ -557,71 +542,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc)
557 542
558 rdmsrl(hwc->config_base, ctrl_val); 543 rdmsrl(hwc->config_base, ctrl_val);
559 ctrl_val &= ~mask; 544 ctrl_val &= ~mask;
560 (void)checking_wrmsrl(hwc->config_base, ctrl_val); 545 wrmsrl(hwc->config_base, ctrl_val);
561}
562
563static void intel_pmu_drain_bts_buffer(void)
564{
565 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
566 struct debug_store *ds = cpuc->ds;
567 struct bts_record {
568 u64 from;
569 u64 to;
570 u64 flags;
571 };
572 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
573 struct bts_record *at, *top;
574 struct perf_output_handle handle;
575 struct perf_event_header header;
576 struct perf_sample_data data;
577 struct pt_regs regs;
578
579 if (!event)
580 return;
581
582 if (!ds)
583 return;
584
585 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
586 top = (struct bts_record *)(unsigned long)ds->bts_index;
587
588 if (top <= at)
589 return;
590
591 ds->bts_index = ds->bts_buffer_base;
592
593 perf_sample_data_init(&data, 0);
594
595 data.period = event->hw.last_period;
596 regs.ip = 0;
597
598 /*
599 * Prepare a generic sample, i.e. fill in the invariant fields.
600 * We will overwrite the from and to address before we output
601 * the sample.
602 */
603 perf_prepare_sample(&header, &data, event, &regs);
604
605 if (perf_output_begin(&handle, event,
606 header.size * (top - at), 1, 1))
607 return;
608
609 for (; at < top; at++) {
610 data.ip = at->from;
611 data.addr = at->to;
612
613 perf_output_sample(&handle, &header, &data, event);
614 }
615
616 perf_output_end(&handle);
617
618 /* There's new data available. */
619 event->hw.interrupts++;
620 event->pending_kill = POLL_IN;
621} 546}
622 547
623static inline void 548static void intel_pmu_disable_event(struct perf_event *event)
624intel_pmu_disable_event(struct perf_event *event)
625{ 549{
626 struct hw_perf_event *hwc = &event->hw; 550 struct hw_perf_event *hwc = &event->hw;
627 551
@@ -637,14 +561,15 @@ intel_pmu_disable_event(struct perf_event *event)
637 } 561 }
638 562
639 x86_pmu_disable_event(event); 563 x86_pmu_disable_event(event);
564
565 if (unlikely(event->attr.precise))
566 intel_pmu_pebs_disable(event);
640} 567}
641 568
642static inline void 569static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
643intel_pmu_enable_fixed(struct hw_perf_event *hwc)
644{ 570{
645 int idx = hwc->idx - X86_PMC_IDX_FIXED; 571 int idx = hwc->idx - X86_PMC_IDX_FIXED;
646 u64 ctrl_val, bits, mask; 572 u64 ctrl_val, bits, mask;
647 int err;
648 573
649 /* 574 /*
650 * Enable IRQ generation (0x8), 575 * Enable IRQ generation (0x8),
@@ -669,7 +594,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc)
669 rdmsrl(hwc->config_base, ctrl_val); 594 rdmsrl(hwc->config_base, ctrl_val);
670 ctrl_val &= ~mask; 595 ctrl_val &= ~mask;
671 ctrl_val |= bits; 596 ctrl_val |= bits;
672 err = checking_wrmsrl(hwc->config_base, ctrl_val); 597 wrmsrl(hwc->config_base, ctrl_val);
673} 598}
674 599
675static void intel_pmu_enable_event(struct perf_event *event) 600static void intel_pmu_enable_event(struct perf_event *event)
@@ -689,6 +614,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
689 return; 614 return;
690 } 615 }
691 616
617 if (unlikely(event->attr.precise))
618 intel_pmu_pebs_enable(event);
619
692 __x86_pmu_enable_event(hwc); 620 __x86_pmu_enable_event(hwc);
693} 621}
694 622
@@ -708,20 +636,20 @@ static void intel_pmu_reset(void)
708 unsigned long flags; 636 unsigned long flags;
709 int idx; 637 int idx;
710 638
711 if (!x86_pmu.num_events) 639 if (!x86_pmu.num_counters)
712 return; 640 return;
713 641
714 local_irq_save(flags); 642 local_irq_save(flags);
715 643
716 printk("clearing PMU state on CPU#%d\n", smp_processor_id()); 644 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
717 645
718 for (idx = 0; idx < x86_pmu.num_events; idx++) { 646 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
719 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); 647 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
720 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); 648 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
721 } 649 }
722 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 650 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
723 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); 651 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
724 } 652
725 if (ds) 653 if (ds)
726 ds->bts_index = ds->bts_buffer_base; 654 ds->bts_index = ds->bts_buffer_base;
727 655
@@ -747,7 +675,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
747 intel_pmu_drain_bts_buffer(); 675 intel_pmu_drain_bts_buffer();
748 status = intel_pmu_get_status(); 676 status = intel_pmu_get_status();
749 if (!status) { 677 if (!status) {
750 intel_pmu_enable_all(); 678 intel_pmu_enable_all(0);
751 return 0; 679 return 0;
752 } 680 }
753 681
@@ -762,6 +690,15 @@ again:
762 690
763 inc_irq_stat(apic_perf_irqs); 691 inc_irq_stat(apic_perf_irqs);
764 ack = status; 692 ack = status;
693
694 intel_pmu_lbr_read();
695
696 /*
697 * PEBS overflow sets bit 62 in the global status register
698 */
699 if (__test_and_clear_bit(62, (unsigned long *)&status))
700 x86_pmu.drain_pebs(regs);
701
765 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { 702 for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
766 struct perf_event *event = cpuc->events[bit]; 703 struct perf_event *event = cpuc->events[bit];
767 704
@@ -787,26 +724,22 @@ again:
787 goto again; 724 goto again;
788 725
789done: 726done:
790 intel_pmu_enable_all(); 727 intel_pmu_enable_all(0);
791 return 1; 728 return 1;
792} 729}
793 730
794static struct event_constraint bts_constraint =
795 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
796
797static struct event_constraint * 731static struct event_constraint *
798intel_special_constraints(struct perf_event *event) 732intel_bts_constraints(struct perf_event *event)
799{ 733{
800 unsigned int hw_event; 734 struct hw_perf_event *hwc = &event->hw;
801 735 unsigned int hw_event, bts_event;
802 hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
803 736
804 if (unlikely((hw_event == 737 hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
805 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && 738 bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
806 (event->hw.sample_period == 1))) {
807 739
740 if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
808 return &bts_constraint; 741 return &bts_constraint;
809 } 742
810 return NULL; 743 return NULL;
811} 744}
812 745
@@ -815,24 +748,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
815{ 748{
816 struct event_constraint *c; 749 struct event_constraint *c;
817 750
818 c = intel_special_constraints(event); 751 c = intel_bts_constraints(event);
752 if (c)
753 return c;
754
755 c = intel_pebs_constraints(event);
819 if (c) 756 if (c)
820 return c; 757 return c;
821 758
822 return x86_get_event_constraints(cpuc, event); 759 return x86_get_event_constraints(cpuc, event);
823} 760}
824 761
825static __initconst struct x86_pmu core_pmu = { 762static int intel_pmu_hw_config(struct perf_event *event)
763{
764 int ret = x86_pmu_hw_config(event);
765
766 if (ret)
767 return ret;
768
769 if (event->attr.type != PERF_TYPE_RAW)
770 return 0;
771
772 if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
773 return 0;
774
775 if (x86_pmu.version < 3)
776 return -EINVAL;
777
778 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
779 return -EACCES;
780
781 event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
782
783 return 0;
784}
785
786static __initconst const struct x86_pmu core_pmu = {
826 .name = "core", 787 .name = "core",
827 .handle_irq = x86_pmu_handle_irq, 788 .handle_irq = x86_pmu_handle_irq,
828 .disable_all = x86_pmu_disable_all, 789 .disable_all = x86_pmu_disable_all,
829 .enable_all = x86_pmu_enable_all, 790 .enable_all = x86_pmu_enable_all,
830 .enable = x86_pmu_enable_event, 791 .enable = x86_pmu_enable_event,
831 .disable = x86_pmu_disable_event, 792 .disable = x86_pmu_disable_event,
793 .hw_config = x86_pmu_hw_config,
794 .schedule_events = x86_schedule_events,
832 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 795 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
833 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 796 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
834 .event_map = intel_pmu_event_map, 797 .event_map = intel_pmu_event_map,
835 .raw_event = intel_pmu_raw_event,
836 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 798 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
837 .apic = 1, 799 .apic = 1,
838 /* 800 /*
@@ -845,17 +807,32 @@ static __initconst struct x86_pmu core_pmu = {
845 .event_constraints = intel_core_event_constraints, 807 .event_constraints = intel_core_event_constraints,
846}; 808};
847 809
848static __initconst struct x86_pmu intel_pmu = { 810static void intel_pmu_cpu_starting(int cpu)
811{
812 init_debug_store_on_cpu(cpu);
813 /*
814 * Deal with CPUs that don't clear their LBRs on power-up.
815 */
816 intel_pmu_lbr_reset();
817}
818
819static void intel_pmu_cpu_dying(int cpu)
820{
821 fini_debug_store_on_cpu(cpu);
822}
823
824static __initconst const struct x86_pmu intel_pmu = {
849 .name = "Intel", 825 .name = "Intel",
850 .handle_irq = intel_pmu_handle_irq, 826 .handle_irq = intel_pmu_handle_irq,
851 .disable_all = intel_pmu_disable_all, 827 .disable_all = intel_pmu_disable_all,
852 .enable_all = intel_pmu_enable_all, 828 .enable_all = intel_pmu_enable_all,
853 .enable = intel_pmu_enable_event, 829 .enable = intel_pmu_enable_event,
854 .disable = intel_pmu_disable_event, 830 .disable = intel_pmu_disable_event,
831 .hw_config = intel_pmu_hw_config,
832 .schedule_events = x86_schedule_events,
855 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, 833 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
856 .perfctr = MSR_ARCH_PERFMON_PERFCTR0, 834 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
857 .event_map = intel_pmu_event_map, 835 .event_map = intel_pmu_event_map,
858 .raw_event = intel_pmu_raw_event,
859 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 836 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
860 .apic = 1, 837 .apic = 1,
861 /* 838 /*
@@ -864,14 +841,38 @@ static __initconst struct x86_pmu intel_pmu = {
864 * the generic event period: 841 * the generic event period:
865 */ 842 */
866 .max_period = (1ULL << 31) - 1, 843 .max_period = (1ULL << 31) - 1,
867 .enable_bts = intel_pmu_enable_bts,
868 .disable_bts = intel_pmu_disable_bts,
869 .get_event_constraints = intel_get_event_constraints, 844 .get_event_constraints = intel_get_event_constraints,
870 845
871 .cpu_starting = init_debug_store_on_cpu, 846 .cpu_starting = intel_pmu_cpu_starting,
872 .cpu_dying = fini_debug_store_on_cpu, 847 .cpu_dying = intel_pmu_cpu_dying,
873}; 848};
874 849
850static void intel_clovertown_quirks(void)
851{
852 /*
853 * PEBS is unreliable due to:
854 *
855 * AJ67 - PEBS may experience CPL leaks
856 * AJ68 - PEBS PMI may be delayed by one event
857 * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
858 * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
859 *
860 * AJ67 could be worked around by restricting the OS/USR flags.
861 * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
862 *
863 * AJ106 could possibly be worked around by not allowing LBR
864 * usage from PEBS, including the fixup.
865 * AJ68 could possibly be worked around by always programming
866 * a pebs_event_reset[0] value and coping with the lost events.
867 *
868 * But taken together it might just make sense to not enable PEBS on
869 * these chips.
870 */
871 printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
872 x86_pmu.pebs = 0;
873 x86_pmu.pebs_constraints = NULL;
874}
875
875static __init int intel_pmu_init(void) 876static __init int intel_pmu_init(void)
876{ 877{
877 union cpuid10_edx edx; 878 union cpuid10_edx edx;
@@ -881,12 +882,13 @@ static __init int intel_pmu_init(void)
881 int version; 882 int version;
882 883
883 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 884 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
884 /* check for P6 processor family */ 885 switch (boot_cpu_data.x86) {
885 if (boot_cpu_data.x86 == 6) { 886 case 0x6:
886 return p6_pmu_init(); 887 return p6_pmu_init();
887 } else { 888 case 0xf:
889 return p4_pmu_init();
890 }
888 return -ENODEV; 891 return -ENODEV;
889 }
890 } 892 }
891 893
892 /* 894 /*
@@ -904,16 +906,28 @@ static __init int intel_pmu_init(void)
904 x86_pmu = intel_pmu; 906 x86_pmu = intel_pmu;
905 907
906 x86_pmu.version = version; 908 x86_pmu.version = version;
907 x86_pmu.num_events = eax.split.num_events; 909 x86_pmu.num_counters = eax.split.num_counters;
908 x86_pmu.event_bits = eax.split.bit_width; 910 x86_pmu.cntval_bits = eax.split.bit_width;
909 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; 911 x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
910 912
911 /* 913 /*
912 * Quirk: v2 perfmon does not report fixed-purpose events, so 914 * Quirk: v2 perfmon does not report fixed-purpose events, so
913 * assume at least 3 events: 915 * assume at least 3 events:
914 */ 916 */
915 if (version > 1) 917 if (version > 1)
916 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); 918 x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
919
920 /*
921 * v2 and above have a perf capabilities MSR
922 */
923 if (version > 1) {
924 u64 capabilities;
925
926 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
927 x86_pmu.intel_cap.capabilities = capabilities;
928 }
929
930 intel_ds_init();
917 931
918 /* 932 /*
919 * Install the hw-cache-events table: 933 * Install the hw-cache-events table:
@@ -924,12 +938,15 @@ static __init int intel_pmu_init(void)
924 break; 938 break;
925 939
926 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ 940 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
941 x86_pmu.quirks = intel_clovertown_quirks;
927 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ 942 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
928 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ 943 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
929 case 29: /* six-core 45 nm xeon "Dunnington" */ 944 case 29: /* six-core 45 nm xeon "Dunnington" */
930 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, 945 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
931 sizeof(hw_cache_event_ids)); 946 sizeof(hw_cache_event_ids));
932 947
948 intel_pmu_lbr_init_core();
949
933 x86_pmu.event_constraints = intel_core2_event_constraints; 950 x86_pmu.event_constraints = intel_core2_event_constraints;
934 pr_cont("Core2 events, "); 951 pr_cont("Core2 events, ");
935 break; 952 break;
@@ -940,13 +957,19 @@ static __init int intel_pmu_init(void)
940 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, 957 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
941 sizeof(hw_cache_event_ids)); 958 sizeof(hw_cache_event_ids));
942 959
960 intel_pmu_lbr_init_nhm();
961
943 x86_pmu.event_constraints = intel_nehalem_event_constraints; 962 x86_pmu.event_constraints = intel_nehalem_event_constraints;
944 pr_cont("Nehalem/Corei7 events, "); 963 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
964 pr_cont("Nehalem events, ");
945 break; 965 break;
966
946 case 28: /* Atom */ 967 case 28: /* Atom */
947 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, 968 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
948 sizeof(hw_cache_event_ids)); 969 sizeof(hw_cache_event_ids));
949 970
971 intel_pmu_lbr_init_atom();
972
950 x86_pmu.event_constraints = intel_gen_event_constraints; 973 x86_pmu.event_constraints = intel_gen_event_constraints;
951 pr_cont("Atom events, "); 974 pr_cont("Atom events, ");
952 break; 975 break;
@@ -956,7 +979,10 @@ static __init int intel_pmu_init(void)
956 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, 979 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
957 sizeof(hw_cache_event_ids)); 980 sizeof(hw_cache_event_ids));
958 981
982 intel_pmu_lbr_init_nhm();
983
959 x86_pmu.event_constraints = intel_westmere_event_constraints; 984 x86_pmu.event_constraints = intel_westmere_event_constraints;
985 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
960 pr_cont("Westmere events, "); 986 pr_cont("Westmere events, ");
961 break; 987 break;
962 988
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
new file mode 100644
index 000000000000..ec8b2e12e104
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -0,0 +1,664 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3/* The maximal number of PEBS events: */
4#define MAX_PEBS_EVENTS 4
5
6/* The size of a BTS record in bytes: */
7#define BTS_RECORD_SIZE 24
8
9#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
10#define PEBS_BUFFER_SIZE PAGE_SIZE
11
12/*
13 * pebs_record_32 for p4 and core not supported
14
15struct pebs_record_32 {
16 u32 flags, ip;
17 u32 ax, bc, cx, dx;
18 u32 si, di, bp, sp;
19};
20
21 */
22
23struct pebs_record_core {
24 u64 flags, ip;
25 u64 ax, bx, cx, dx;
26 u64 si, di, bp, sp;
27 u64 r8, r9, r10, r11;
28 u64 r12, r13, r14, r15;
29};
30
31struct pebs_record_nhm {
32 u64 flags, ip;
33 u64 ax, bx, cx, dx;
34 u64 si, di, bp, sp;
35 u64 r8, r9, r10, r11;
36 u64 r12, r13, r14, r15;
37 u64 status, dla, dse, lat;
38};
39
40/*
41 * A debug store configuration.
42 *
43 * We only support architectures that use 64bit fields.
44 */
45struct debug_store {
46 u64 bts_buffer_base;
47 u64 bts_index;
48 u64 bts_absolute_maximum;
49 u64 bts_interrupt_threshold;
50 u64 pebs_buffer_base;
51 u64 pebs_index;
52 u64 pebs_absolute_maximum;
53 u64 pebs_interrupt_threshold;
54 u64 pebs_event_reset[MAX_PEBS_EVENTS];
55};
56
57static void init_debug_store_on_cpu(int cpu)
58{
59 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
60
61 if (!ds)
62 return;
63
64 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
65 (u32)((u64)(unsigned long)ds),
66 (u32)((u64)(unsigned long)ds >> 32));
67}
68
69static void fini_debug_store_on_cpu(int cpu)
70{
71 if (!per_cpu(cpu_hw_events, cpu).ds)
72 return;
73
74 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
75}
76
77static void release_ds_buffers(void)
78{
79 int cpu;
80
81 if (!x86_pmu.bts && !x86_pmu.pebs)
82 return;
83
84 get_online_cpus();
85
86 for_each_online_cpu(cpu)
87 fini_debug_store_on_cpu(cpu);
88
89 for_each_possible_cpu(cpu) {
90 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
91
92 if (!ds)
93 continue;
94
95 per_cpu(cpu_hw_events, cpu).ds = NULL;
96
97 kfree((void *)(unsigned long)ds->pebs_buffer_base);
98 kfree((void *)(unsigned long)ds->bts_buffer_base);
99 kfree(ds);
100 }
101
102 put_online_cpus();
103}
104
105static int reserve_ds_buffers(void)
106{
107 int cpu, err = 0;
108
109 if (!x86_pmu.bts && !x86_pmu.pebs)
110 return 0;
111
112 get_online_cpus();
113
114 for_each_possible_cpu(cpu) {
115 struct debug_store *ds;
116 void *buffer;
117 int max, thresh;
118
119 err = -ENOMEM;
120 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
121 if (unlikely(!ds))
122 break;
123 per_cpu(cpu_hw_events, cpu).ds = ds;
124
125 if (x86_pmu.bts) {
126 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
127 if (unlikely(!buffer))
128 break;
129
130 max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
131 thresh = max / 16;
132
133 ds->bts_buffer_base = (u64)(unsigned long)buffer;
134 ds->bts_index = ds->bts_buffer_base;
135 ds->bts_absolute_maximum = ds->bts_buffer_base +
136 max * BTS_RECORD_SIZE;
137 ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
138 thresh * BTS_RECORD_SIZE;
139 }
140
141 if (x86_pmu.pebs) {
142 buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
143 if (unlikely(!buffer))
144 break;
145
146 max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
147
148 ds->pebs_buffer_base = (u64)(unsigned long)buffer;
149 ds->pebs_index = ds->pebs_buffer_base;
150 ds->pebs_absolute_maximum = ds->pebs_buffer_base +
151 max * x86_pmu.pebs_record_size;
152 /*
153 * Always use single record PEBS
154 */
155 ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
156 x86_pmu.pebs_record_size;
157 }
158
159 err = 0;
160 }
161
162 if (err)
163 release_ds_buffers();
164 else {
165 for_each_online_cpu(cpu)
166 init_debug_store_on_cpu(cpu);
167 }
168
169 put_online_cpus();
170
171 return err;
172}
173
174/*
175 * BTS
176 */
177
178static struct event_constraint bts_constraint =
179 EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
180
181static void intel_pmu_enable_bts(u64 config)
182{
183 unsigned long debugctlmsr;
184
185 debugctlmsr = get_debugctlmsr();
186
187 debugctlmsr |= DEBUGCTLMSR_TR;
188 debugctlmsr |= DEBUGCTLMSR_BTS;
189 debugctlmsr |= DEBUGCTLMSR_BTINT;
190
191 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
192 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
193
194 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
195 debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
196
197 update_debugctlmsr(debugctlmsr);
198}
199
200static void intel_pmu_disable_bts(void)
201{
202 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
203 unsigned long debugctlmsr;
204
205 if (!cpuc->ds)
206 return;
207
208 debugctlmsr = get_debugctlmsr();
209
210 debugctlmsr &=
211 ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
212 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
213
214 update_debugctlmsr(debugctlmsr);
215}
216
217static void intel_pmu_drain_bts_buffer(void)
218{
219 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
220 struct debug_store *ds = cpuc->ds;
221 struct bts_record {
222 u64 from;
223 u64 to;
224 u64 flags;
225 };
226 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
227 struct bts_record *at, *top;
228 struct perf_output_handle handle;
229 struct perf_event_header header;
230 struct perf_sample_data data;
231 struct pt_regs regs;
232
233 if (!event)
234 return;
235
236 if (!ds)
237 return;
238
239 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
240 top = (struct bts_record *)(unsigned long)ds->bts_index;
241
242 if (top <= at)
243 return;
244
245 ds->bts_index = ds->bts_buffer_base;
246
247 perf_sample_data_init(&data, 0);
248 data.period = event->hw.last_period;
249 regs.ip = 0;
250
251 /*
252 * Prepare a generic sample, i.e. fill in the invariant fields.
253 * We will overwrite the from and to address before we output
254 * the sample.
255 */
256 perf_prepare_sample(&header, &data, event, &regs);
257
258 if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
259 return;
260
261 for (; at < top; at++) {
262 data.ip = at->from;
263 data.addr = at->to;
264
265 perf_output_sample(&handle, &header, &data, event);
266 }
267
268 perf_output_end(&handle);
269
270 /* There's new data available. */
271 event->hw.interrupts++;
272 event->pending_kill = POLL_IN;
273}
274
275/*
276 * PEBS
277 */
278
279static struct event_constraint intel_core_pebs_events[] = {
280 PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
281 PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
282 PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
283 PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
284 PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
285 PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
286 PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
287 PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
288 PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
289 EVENT_CONSTRAINT_END
290};
291
292static struct event_constraint intel_nehalem_pebs_events[] = {
293 PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
294 PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
295 PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
296 PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
297 PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
298 PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
299 PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
300 PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
301 PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
302 EVENT_CONSTRAINT_END
303};
304
305static struct event_constraint *
306intel_pebs_constraints(struct perf_event *event)
307{
308 struct event_constraint *c;
309
310 if (!event->attr.precise)
311 return NULL;
312
313 if (x86_pmu.pebs_constraints) {
314 for_each_event_constraint(c, x86_pmu.pebs_constraints) {
315 if ((event->hw.config & c->cmask) == c->code)
316 return c;
317 }
318 }
319
320 return &emptyconstraint;
321}
322
323static void intel_pmu_pebs_enable(struct perf_event *event)
324{
325 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
326 struct hw_perf_event *hwc = &event->hw;
327
328 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
329
330 cpuc->pebs_enabled |= 1ULL << hwc->idx;
331 WARN_ON_ONCE(cpuc->enabled);
332
333 if (x86_pmu.intel_cap.pebs_trap)
334 intel_pmu_lbr_enable(event);
335}
336
337static void intel_pmu_pebs_disable(struct perf_event *event)
338{
339 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
340 struct hw_perf_event *hwc = &event->hw;
341
342 cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
343 if (cpuc->enabled)
344 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
345
346 hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
347
348 if (x86_pmu.intel_cap.pebs_trap)
349 intel_pmu_lbr_disable(event);
350}
351
352static void intel_pmu_pebs_enable_all(void)
353{
354 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
355
356 if (cpuc->pebs_enabled)
357 wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
358}
359
360static void intel_pmu_pebs_disable_all(void)
361{
362 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
363
364 if (cpuc->pebs_enabled)
365 wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
366}
367
368#include <asm/insn.h>
369
370static inline bool kernel_ip(unsigned long ip)
371{
372#ifdef CONFIG_X86_32
373 return ip > PAGE_OFFSET;
374#else
375 return (long)ip < 0;
376#endif
377}
378
379static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
380{
381 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
382 unsigned long from = cpuc->lbr_entries[0].from;
383 unsigned long old_to, to = cpuc->lbr_entries[0].to;
384 unsigned long ip = regs->ip;
385
386 /*
387 * We don't need to fixup if the PEBS assist is fault like
388 */
389 if (!x86_pmu.intel_cap.pebs_trap)
390 return 1;
391
392 /*
393 * No LBR entry, no basic block, no rewinding
394 */
395 if (!cpuc->lbr_stack.nr || !from || !to)
396 return 0;
397
398 /*
399 * Basic blocks should never cross user/kernel boundaries
400 */
401 if (kernel_ip(ip) != kernel_ip(to))
402 return 0;
403
404 /*
405 * unsigned math, either ip is before the start (impossible) or
406 * the basic block is larger than 1 page (sanity)
407 */
408 if ((ip - to) > PAGE_SIZE)
409 return 0;
410
411 /*
412 * We sampled a branch insn, rewind using the LBR stack
413 */
414 if (ip == to) {
415 regs->ip = from;
416 return 1;
417 }
418
419 do {
420 struct insn insn;
421 u8 buf[MAX_INSN_SIZE];
422 void *kaddr;
423
424 old_to = to;
425 if (!kernel_ip(ip)) {
426 int bytes, size = MAX_INSN_SIZE;
427
428 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
429 if (bytes != size)
430 return 0;
431
432 kaddr = buf;
433 } else
434 kaddr = (void *)to;
435
436 kernel_insn_init(&insn, kaddr);
437 insn_get_length(&insn);
438 to += insn.length;
439 } while (to < ip);
440
441 if (to == ip) {
442 regs->ip = old_to;
443 return 1;
444 }
445
446 /*
447 * Even though we decoded the basic block, the instruction stream
448 * never matched the given IP, either the TO or the IP got corrupted.
449 */
450 return 0;
451}
452
453static int intel_pmu_save_and_restart(struct perf_event *event);
454
455static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
456{
457 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
458 struct debug_store *ds = cpuc->ds;
459 struct perf_event *event = cpuc->events[0]; /* PMC0 only */
460 struct pebs_record_core *at, *top;
461 struct perf_sample_data data;
462 struct perf_raw_record raw;
463 struct pt_regs regs;
464 int n;
465
466 if (!ds || !x86_pmu.pebs)
467 return;
468
469 at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
470 top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
471
472 /*
473 * Whatever else happens, drain the thing
474 */
475 ds->pebs_index = ds->pebs_buffer_base;
476
477 if (!test_bit(0, cpuc->active_mask))
478 return;
479
480 WARN_ON_ONCE(!event);
481
482 if (!event->attr.precise)
483 return;
484
485 n = top - at;
486 if (n <= 0)
487 return;
488
489 if (!intel_pmu_save_and_restart(event))
490 return;
491
492 /*
493 * Should not happen, we program the threshold at 1 and do not
494 * set a reset value.
495 */
496 WARN_ON_ONCE(n > 1);
497 at += n - 1;
498
499 perf_sample_data_init(&data, 0);
500 data.period = event->hw.last_period;
501
502 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
503 raw.size = x86_pmu.pebs_record_size;
504 raw.data = at;
505 data.raw = &raw;
506 }
507
508 /*
509 * We use the interrupt regs as a base because the PEBS record
510 * does not contain a full regs set, specifically it seems to
511 * lack segment descriptors, which get used by things like
512 * user_mode().
513 *
514 * In the simple case fix up only the IP and BP,SP regs, for
515 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
516 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
517 */
518 regs = *iregs;
519 regs.ip = at->ip;
520 regs.bp = at->bp;
521 regs.sp = at->sp;
522
523 if (intel_pmu_pebs_fixup_ip(&regs))
524 regs.flags |= PERF_EFLAGS_EXACT;
525 else
526 regs.flags &= ~PERF_EFLAGS_EXACT;
527
528 if (perf_event_overflow(event, 1, &data, &regs))
529 x86_pmu_stop(event);
530}
531
532static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
533{
534 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
535 struct debug_store *ds = cpuc->ds;
536 struct pebs_record_nhm *at, *top;
537 struct perf_sample_data data;
538 struct perf_event *event = NULL;
539 struct perf_raw_record raw;
540 struct pt_regs regs;
541 u64 status = 0;
542 int bit, n;
543
544 if (!ds || !x86_pmu.pebs)
545 return;
546
547 at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
548 top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
549
550 ds->pebs_index = ds->pebs_buffer_base;
551
552 n = top - at;
553 if (n <= 0)
554 return;
555
556 /*
557 * Should not happen, we program the threshold at 1 and do not
558 * set a reset value.
559 */
560 WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
561
562 for ( ; at < top; at++) {
563 for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
564 event = cpuc->events[bit];
565 if (!test_bit(bit, cpuc->active_mask))
566 continue;
567
568 WARN_ON_ONCE(!event);
569
570 if (!event->attr.precise)
571 continue;
572
573 if (__test_and_set_bit(bit, (unsigned long *)&status))
574 continue;
575
576 break;
577 }
578
579 if (!event || bit >= MAX_PEBS_EVENTS)
580 continue;
581
582 if (!intel_pmu_save_and_restart(event))
583 continue;
584
585 perf_sample_data_init(&data, 0);
586 data.period = event->hw.last_period;
587
588 if (event->attr.sample_type & PERF_SAMPLE_RAW) {
589 raw.size = x86_pmu.pebs_record_size;
590 raw.data = at;
591 data.raw = &raw;
592 }
593
594 /*
595 * See the comment in intel_pmu_drain_pebs_core()
596 */
597 regs = *iregs;
598 regs.ip = at->ip;
599 regs.bp = at->bp;
600 regs.sp = at->sp;
601
602 if (intel_pmu_pebs_fixup_ip(&regs))
603 regs.flags |= PERF_EFLAGS_EXACT;
604 else
605 regs.flags &= ~PERF_EFLAGS_EXACT;
606
607 if (perf_event_overflow(event, 1, &data, &regs))
608 x86_pmu_stop(event);
609 }
610}
611
612/*
613 * BTS, PEBS probe and setup
614 */
615
616static void intel_ds_init(void)
617{
618 /*
619 * No support for 32bit formats
620 */
621 if (!boot_cpu_has(X86_FEATURE_DTES64))
622 return;
623
624 x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
625 x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
626 if (x86_pmu.pebs) {
627 char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
628 int format = x86_pmu.intel_cap.pebs_format;
629
630 switch (format) {
631 case 0:
632 printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
633 x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
634 x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
635 x86_pmu.pebs_constraints = intel_core_pebs_events;
636 break;
637
638 case 1:
639 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
640 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
641 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
642 x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
643 break;
644
645 default:
646 printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
647 x86_pmu.pebs = 0;
648 break;
649 }
650 }
651}
652
653#else /* CONFIG_CPU_SUP_INTEL */
654
655static int reserve_ds_buffers(void)
656{
657 return 0;
658}
659
660static void release_ds_buffers(void)
661{
662}
663
664#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..d202c1bece1a
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,218 @@
1#ifdef CONFIG_CPU_SUP_INTEL
2
3enum {
4 LBR_FORMAT_32 = 0x00,
5 LBR_FORMAT_LIP = 0x01,
6 LBR_FORMAT_EIP = 0x02,
7 LBR_FORMAT_EIP_FLAGS = 0x03,
8};
9
10/*
11 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
12 * otherwise it becomes near impossible to get a reliable stack.
13 */
14
15static void __intel_pmu_lbr_enable(void)
16{
17 u64 debugctl;
18
19 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
20 debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
21 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
22}
23
24static void __intel_pmu_lbr_disable(void)
25{
26 u64 debugctl;
27
28 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
29 debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
30 wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
31}
32
33static void intel_pmu_lbr_reset_32(void)
34{
35 int i;
36
37 for (i = 0; i < x86_pmu.lbr_nr; i++)
38 wrmsrl(x86_pmu.lbr_from + i, 0);
39}
40
41static void intel_pmu_lbr_reset_64(void)
42{
43 int i;
44
45 for (i = 0; i < x86_pmu.lbr_nr; i++) {
46 wrmsrl(x86_pmu.lbr_from + i, 0);
47 wrmsrl(x86_pmu.lbr_to + i, 0);
48 }
49}
50
51static void intel_pmu_lbr_reset(void)
52{
53 if (!x86_pmu.lbr_nr)
54 return;
55
56 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
57 intel_pmu_lbr_reset_32();
58 else
59 intel_pmu_lbr_reset_64();
60}
61
62static void intel_pmu_lbr_enable(struct perf_event *event)
63{
64 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
65
66 if (!x86_pmu.lbr_nr)
67 return;
68
69 WARN_ON_ONCE(cpuc->enabled);
70
71 /*
72 * Reset the LBR stack if we changed task context to
73 * avoid data leaks.
74 */
75
76 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
77 intel_pmu_lbr_reset();
78 cpuc->lbr_context = event->ctx;
79 }
80
81 cpuc->lbr_users++;
82}
83
84static void intel_pmu_lbr_disable(struct perf_event *event)
85{
86 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
87
88 if (!x86_pmu.lbr_nr)
89 return;
90
91 cpuc->lbr_users--;
92 WARN_ON_ONCE(cpuc->lbr_users < 0);
93
94 if (cpuc->enabled && !cpuc->lbr_users)
95 __intel_pmu_lbr_disable();
96}
97
98static void intel_pmu_lbr_enable_all(void)
99{
100 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
101
102 if (cpuc->lbr_users)
103 __intel_pmu_lbr_enable();
104}
105
106static void intel_pmu_lbr_disable_all(void)
107{
108 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
109
110 if (cpuc->lbr_users)
111 __intel_pmu_lbr_disable();
112}
113
114static inline u64 intel_pmu_lbr_tos(void)
115{
116 u64 tos;
117
118 rdmsrl(x86_pmu.lbr_tos, tos);
119
120 return tos;
121}
122
123static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
124{
125 unsigned long mask = x86_pmu.lbr_nr - 1;
126 u64 tos = intel_pmu_lbr_tos();
127 int i;
128
129 for (i = 0; i < x86_pmu.lbr_nr; i++) {
130 unsigned long lbr_idx = (tos - i) & mask;
131 union {
132 struct {
133 u32 from;
134 u32 to;
135 };
136 u64 lbr;
137 } msr_lastbranch;
138
139 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
140
141 cpuc->lbr_entries[i].from = msr_lastbranch.from;
142 cpuc->lbr_entries[i].to = msr_lastbranch.to;
143 cpuc->lbr_entries[i].flags = 0;
144 }
145 cpuc->lbr_stack.nr = i;
146}
147
148#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
149
150/*
151 * Due to lack of segmentation in Linux the effective address (offset)
152 * is the same as the linear address, allowing us to merge the LIP and EIP
153 * LBR formats.
154 */
155static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
156{
157 unsigned long mask = x86_pmu.lbr_nr - 1;
158 int lbr_format = x86_pmu.intel_cap.lbr_format;
159 u64 tos = intel_pmu_lbr_tos();
160 int i;
161
162 for (i = 0; i < x86_pmu.lbr_nr; i++) {
163 unsigned long lbr_idx = (tos - i) & mask;
164 u64 from, to, flags = 0;
165
166 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
167 rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
168
169 if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
170 flags = !!(from & LBR_FROM_FLAG_MISPRED);
171 from = (u64)((((s64)from) << 1) >> 1);
172 }
173
174 cpuc->lbr_entries[i].from = from;
175 cpuc->lbr_entries[i].to = to;
176 cpuc->lbr_entries[i].flags = flags;
177 }
178 cpuc->lbr_stack.nr = i;
179}
180
181static void intel_pmu_lbr_read(void)
182{
183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
184
185 if (!cpuc->lbr_users)
186 return;
187
188 if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
189 intel_pmu_lbr_read_32(cpuc);
190 else
191 intel_pmu_lbr_read_64(cpuc);
192}
193
194static void intel_pmu_lbr_init_core(void)
195{
196 x86_pmu.lbr_nr = 4;
197 x86_pmu.lbr_tos = 0x01c9;
198 x86_pmu.lbr_from = 0x40;
199 x86_pmu.lbr_to = 0x60;
200}
201
202static void intel_pmu_lbr_init_nhm(void)
203{
204 x86_pmu.lbr_nr = 16;
205 x86_pmu.lbr_tos = 0x01c9;
206 x86_pmu.lbr_from = 0x680;
207 x86_pmu.lbr_to = 0x6c0;
208}
209
210static void intel_pmu_lbr_init_atom(void)
211{
212 x86_pmu.lbr_nr = 8;
213 x86_pmu.lbr_tos = 0x01c9;
214 x86_pmu.lbr_from = 0x40;
215 x86_pmu.lbr_to = 0x60;
216}
217
218#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
new file mode 100644
index 000000000000..15367cce66bd
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -0,0 +1,834 @@
1/*
2 * Netburst Perfomance Events (P4, old Xeon)
3 *
4 * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org>
5 * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com>
6 *
7 * For licencing details see kernel-base/COPYING
8 */
9
10#ifdef CONFIG_CPU_SUP_INTEL
11
12#include <asm/perf_event_p4.h>
13
14#define P4_CNTR_LIMIT 3
15/*
16 * array indices: 0,1 - HT threads, used with HT enabled cpu
17 */
18struct p4_event_bind {
19 unsigned int opcode; /* Event code and ESCR selector */
20 unsigned int escr_msr[2]; /* ESCR MSR for this event */
21 unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
22};
23
24struct p4_cache_event_bind {
25 unsigned int metric_pebs;
26 unsigned int metric_vert;
27};
28
29#define P4_GEN_CACHE_EVENT_BIND(name) \
30 [P4_CACHE__##name] = { \
31 .metric_pebs = P4_PEBS__##name, \
32 .metric_vert = P4_VERT__##name, \
33 }
34
35static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
36 P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
37 P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
38 P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
39 P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
40};
41
42/*
43 * Note that we don't use CCCR1 here, there is an
44 * exception for P4_BSQ_ALLOCATION but we just have
45 * no workaround
46 *
47 * consider this binding as resources which particular
48 * event may borrow, it doesn't contain EventMask,
49 * Tags and friends -- they are left to a caller
50 */
51static struct p4_event_bind p4_event_bind_map[] = {
52 [P4_EVENT_TC_DELIVER_MODE] = {
53 .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
54 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
55 .cntr = { {4, 5, -1}, {6, 7, -1} },
56 },
57 [P4_EVENT_BPU_FETCH_REQUEST] = {
58 .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
59 .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
60 .cntr = { {0, -1, -1}, {2, -1, -1} },
61 },
62 [P4_EVENT_ITLB_REFERENCE] = {
63 .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
64 .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
65 .cntr = { {0, -1, -1}, {2, -1, -1} },
66 },
67 [P4_EVENT_MEMORY_CANCEL] = {
68 .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
69 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
70 .cntr = { {8, 9, -1}, {10, 11, -1} },
71 },
72 [P4_EVENT_MEMORY_COMPLETE] = {
73 .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
74 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
75 .cntr = { {8, 9, -1}, {10, 11, -1} },
76 },
77 [P4_EVENT_LOAD_PORT_REPLAY] = {
78 .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
79 .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
80 .cntr = { {8, 9, -1}, {10, 11, -1} },
81 },
82 [P4_EVENT_STORE_PORT_REPLAY] = {
83 .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
84 .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
85 .cntr = { {8, 9, -1}, {10, 11, -1} },
86 },
87 [P4_EVENT_MOB_LOAD_REPLAY] = {
88 .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
89 .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
90 .cntr = { {0, -1, -1}, {2, -1, -1} },
91 },
92 [P4_EVENT_PAGE_WALK_TYPE] = {
93 .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
94 .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
95 .cntr = { {0, -1, -1}, {2, -1, -1} },
96 },
97 [P4_EVENT_BSQ_CACHE_REFERENCE] = {
98 .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
99 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
100 .cntr = { {0, -1, -1}, {2, -1, -1} },
101 },
102 [P4_EVENT_IOQ_ALLOCATION] = {
103 .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
104 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
105 .cntr = { {0, -1, -1}, {2, -1, -1} },
106 },
107 [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
108 .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
109 .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
110 .cntr = { {2, -1, -1}, {3, -1, -1} },
111 },
112 [P4_EVENT_FSB_DATA_ACTIVITY] = {
113 .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
114 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
115 .cntr = { {0, -1, -1}, {2, -1, -1} },
116 },
117 [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
118 .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
119 .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
120 .cntr = { {0, -1, -1}, {1, -1, -1} },
121 },
122 [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
123 .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
124 .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
125 .cntr = { {2, -1, -1}, {3, -1, -1} },
126 },
127 [P4_EVENT_SSE_INPUT_ASSIST] = {
128 .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
129 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
130 .cntr = { {8, 9, -1}, {10, 11, -1} },
131 },
132 [P4_EVENT_PACKED_SP_UOP] = {
133 .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
134 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
135 .cntr = { {8, 9, -1}, {10, 11, -1} },
136 },
137 [P4_EVENT_PACKED_DP_UOP] = {
138 .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
139 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
140 .cntr = { {8, 9, -1}, {10, 11, -1} },
141 },
142 [P4_EVENT_SCALAR_SP_UOP] = {
143 .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
144 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
145 .cntr = { {8, 9, -1}, {10, 11, -1} },
146 },
147 [P4_EVENT_SCALAR_DP_UOP] = {
148 .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
149 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
150 .cntr = { {8, 9, -1}, {10, 11, -1} },
151 },
152 [P4_EVENT_64BIT_MMX_UOP] = {
153 .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
154 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
155 .cntr = { {8, 9, -1}, {10, 11, -1} },
156 },
157 [P4_EVENT_128BIT_MMX_UOP] = {
158 .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
159 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
160 .cntr = { {8, 9, -1}, {10, 11, -1} },
161 },
162 [P4_EVENT_X87_FP_UOP] = {
163 .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
164 .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
165 .cntr = { {8, 9, -1}, {10, 11, -1} },
166 },
167 [P4_EVENT_TC_MISC] = {
168 .opcode = P4_OPCODE(P4_EVENT_TC_MISC),
169 .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
170 .cntr = { {4, 5, -1}, {6, 7, -1} },
171 },
172 [P4_EVENT_GLOBAL_POWER_EVENTS] = {
173 .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
174 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
175 .cntr = { {0, -1, -1}, {2, -1, -1} },
176 },
177 [P4_EVENT_TC_MS_XFER] = {
178 .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
179 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
180 .cntr = { {4, 5, -1}, {6, 7, -1} },
181 },
182 [P4_EVENT_UOP_QUEUE_WRITES] = {
183 .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
184 .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
185 .cntr = { {4, 5, -1}, {6, 7, -1} },
186 },
187 [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
188 .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
189 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
190 .cntr = { {4, 5, -1}, {6, 7, -1} },
191 },
192 [P4_EVENT_RETIRED_BRANCH_TYPE] = {
193 .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
194 .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
195 .cntr = { {4, 5, -1}, {6, 7, -1} },
196 },
197 [P4_EVENT_RESOURCE_STALL] = {
198 .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
199 .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
200 .cntr = { {12, 13, 16}, {14, 15, 17} },
201 },
202 [P4_EVENT_WC_BUFFER] = {
203 .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
204 .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
205 .cntr = { {8, 9, -1}, {10, 11, -1} },
206 },
207 [P4_EVENT_B2B_CYCLES] = {
208 .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
209 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
210 .cntr = { {0, -1, -1}, {2, -1, -1} },
211 },
212 [P4_EVENT_BNR] = {
213 .opcode = P4_OPCODE(P4_EVENT_BNR),
214 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
215 .cntr = { {0, -1, -1}, {2, -1, -1} },
216 },
217 [P4_EVENT_SNOOP] = {
218 .opcode = P4_OPCODE(P4_EVENT_SNOOP),
219 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
220 .cntr = { {0, -1, -1}, {2, -1, -1} },
221 },
222 [P4_EVENT_RESPONSE] = {
223 .opcode = P4_OPCODE(P4_EVENT_RESPONSE),
224 .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
225 .cntr = { {0, -1, -1}, {2, -1, -1} },
226 },
227 [P4_EVENT_FRONT_END_EVENT] = {
228 .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
229 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
230 .cntr = { {12, 13, 16}, {14, 15, 17} },
231 },
232 [P4_EVENT_EXECUTION_EVENT] = {
233 .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
234 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
235 .cntr = { {12, 13, 16}, {14, 15, 17} },
236 },
237 [P4_EVENT_REPLAY_EVENT] = {
238 .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
239 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
240 .cntr = { {12, 13, 16}, {14, 15, 17} },
241 },
242 [P4_EVENT_INSTR_RETIRED] = {
243 .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
244 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
245 .cntr = { {12, 13, 16}, {14, 15, 17} },
246 },
247 [P4_EVENT_UOPS_RETIRED] = {
248 .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
249 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
250 .cntr = { {12, 13, 16}, {14, 15, 17} },
251 },
252 [P4_EVENT_UOP_TYPE] = {
253 .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
254 .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
255 .cntr = { {12, 13, 16}, {14, 15, 17} },
256 },
257 [P4_EVENT_BRANCH_RETIRED] = {
258 .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
259 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
260 .cntr = { {12, 13, 16}, {14, 15, 17} },
261 },
262 [P4_EVENT_MISPRED_BRANCH_RETIRED] = {
263 .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
264 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
265 .cntr = { {12, 13, 16}, {14, 15, 17} },
266 },
267 [P4_EVENT_X87_ASSIST] = {
268 .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
269 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
270 .cntr = { {12, 13, 16}, {14, 15, 17} },
271 },
272 [P4_EVENT_MACHINE_CLEAR] = {
273 .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
274 .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
275 .cntr = { {12, 13, 16}, {14, 15, 17} },
276 },
277 [P4_EVENT_INSTR_COMPLETED] = {
278 .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
279 .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
280 .cntr = { {12, 13, 16}, {14, 15, 17} },
281 },
282};
283
284#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
285 p4_config_pack_escr(P4_ESCR_EVENT(event) | \
286 P4_ESCR_EMASK_BIT(event, bit)) | \
287 p4_config_pack_cccr(cache_event | \
288 P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
289
290static __initconst const u64 p4_hw_cache_event_ids
291 [PERF_COUNT_HW_CACHE_MAX]
292 [PERF_COUNT_HW_CACHE_OP_MAX]
293 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
294{
295 [ C(L1D ) ] = {
296 [ C(OP_READ) ] = {
297 [ C(RESULT_ACCESS) ] = 0x0,
298 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
299 P4_CACHE__1stl_cache_load_miss_retired),
300 },
301 },
302 [ C(LL ) ] = {
303 [ C(OP_READ) ] = {
304 [ C(RESULT_ACCESS) ] = 0x0,
305 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
306 P4_CACHE__2ndl_cache_load_miss_retired),
307 },
308},
309 [ C(DTLB) ] = {
310 [ C(OP_READ) ] = {
311 [ C(RESULT_ACCESS) ] = 0x0,
312 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
313 P4_CACHE__dtlb_load_miss_retired),
314 },
315 [ C(OP_WRITE) ] = {
316 [ C(RESULT_ACCESS) ] = 0x0,
317 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
318 P4_CACHE__dtlb_store_miss_retired),
319 },
320 },
321 [ C(ITLB) ] = {
322 [ C(OP_READ) ] = {
323 [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
324 P4_CACHE__itlb_reference_hit),
325 [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
326 P4_CACHE__itlb_reference_miss),
327 },
328 [ C(OP_WRITE) ] = {
329 [ C(RESULT_ACCESS) ] = -1,
330 [ C(RESULT_MISS) ] = -1,
331 },
332 [ C(OP_PREFETCH) ] = {
333 [ C(RESULT_ACCESS) ] = -1,
334 [ C(RESULT_MISS) ] = -1,
335 },
336 },
337};
338
339static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
340 /* non-halted CPU clocks */
341 [PERF_COUNT_HW_CPU_CYCLES] =
342 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
343 P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
344
345 /*
346 * retired instructions
347 * in a sake of simplicity we don't use the FSB tagging
348 */
349 [PERF_COUNT_HW_INSTRUCTIONS] =
350 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
351 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
352 P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
353
354 /* cache hits */
355 [PERF_COUNT_HW_CACHE_REFERENCES] =
356 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
357 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
358 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
359 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
360 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
361 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
362 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
363
364 /* cache misses */
365 [PERF_COUNT_HW_CACHE_MISSES] =
366 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
367 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
368 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
369 P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
370
371 /* branch instructions retired */
372 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
373 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
374 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
375 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
376 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
377 P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
378
379 /* mispredicted branches retired */
380 [PERF_COUNT_HW_BRANCH_MISSES] =
381 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
382 P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
383
384 /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
385 [PERF_COUNT_HW_BUS_CYCLES] =
386 p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
387 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
388 P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
389 p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
390};
391
392static struct p4_event_bind *p4_config_get_bind(u64 config)
393{
394 unsigned int evnt = p4_config_unpack_event(config);
395 struct p4_event_bind *bind = NULL;
396
397 if (evnt < ARRAY_SIZE(p4_event_bind_map))
398 bind = &p4_event_bind_map[evnt];
399
400 return bind;
401}
402
403static u64 p4_pmu_event_map(int hw_event)
404{
405 struct p4_event_bind *bind;
406 unsigned int esel;
407 u64 config;
408
409 if (hw_event > ARRAY_SIZE(p4_general_events)) {
410 printk_once(KERN_ERR "P4 PMU: Bad index: %i\n", hw_event);
411 return 0;
412 }
413
414 config = p4_general_events[hw_event];
415 bind = p4_config_get_bind(config);
416 esel = P4_OPCODE_ESEL(bind->opcode);
417 config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
418
419 return config;
420}
421
422static int p4_hw_config(struct perf_event *event)
423{
424 int cpu = raw_smp_processor_id();
425 u32 escr, cccr;
426
427 /*
428 * the reason we use cpu that early is that: if we get scheduled
429 * first time on the same cpu -- we will not need swap thread
430 * specific flags in config (and will save some cpu cycles)
431 */
432
433 cccr = p4_default_cccr_conf(cpu);
434 escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel,
435 event->attr.exclude_user);
436 event->hw.config = p4_config_pack_escr(escr) |
437 p4_config_pack_cccr(cccr);
438
439 if (p4_ht_active() && p4_ht_thread(cpu))
440 event->hw.config = p4_set_ht_bit(event->hw.config);
441
442 if (event->attr.type != PERF_TYPE_RAW)
443 return 0;
444
445 /*
446 * We don't control raw events so it's up to the caller
447 * to pass sane values (and we don't count the thread number
448 * on HT machine but allow HT-compatible specifics to be
449 * passed on)
450 *
451 * XXX: HT wide things should check perf_paranoid_cpu() &&
452 * CAP_SYS_ADMIN
453 */
454 event->hw.config |= event->attr.config &
455 (p4_config_pack_escr(P4_ESCR_MASK_HT) |
456 p4_config_pack_cccr(P4_CCCR_MASK_HT));
457
458 return 0;
459}
460
461static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
462{
463 unsigned long dummy;
464
465 rdmsrl(hwc->config_base + hwc->idx, dummy);
466 if (dummy & P4_CCCR_OVF) {
467 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
468 ((u64)dummy) & ~P4_CCCR_OVF);
469 }
470}
471
472static inline void p4_pmu_disable_event(struct perf_event *event)
473{
474 struct hw_perf_event *hwc = &event->hw;
475
476 /*
477 * If event gets disabled while counter is in overflowed
478 * state we need to clear P4_CCCR_OVF, otherwise interrupt get
479 * asserted again and again
480 */
481 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
482 (u64)(p4_config_unpack_cccr(hwc->config)) &
483 ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
484}
485
486static void p4_pmu_disable_all(void)
487{
488 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
489 int idx;
490
491 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
492 struct perf_event *event = cpuc->events[idx];
493 if (!test_bit(idx, cpuc->active_mask))
494 continue;
495 p4_pmu_disable_event(event);
496 }
497}
498
499static void p4_pmu_enable_event(struct perf_event *event)
500{
501 struct hw_perf_event *hwc = &event->hw;
502 int thread = p4_ht_config_thread(hwc->config);
503 u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
504 unsigned int idx = p4_config_unpack_event(hwc->config);
505 unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
506 struct p4_event_bind *bind;
507 struct p4_cache_event_bind *bind_cache;
508 u64 escr_addr, cccr;
509
510 bind = &p4_event_bind_map[idx];
511 escr_addr = (u64)bind->escr_msr[thread];
512
513 /*
514 * - we dont support cascaded counters yet
515 * - and counter 1 is broken (erratum)
516 */
517 WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
518 WARN_ON_ONCE(hwc->idx == 1);
519
520 /* we need a real Event value */
521 escr_conf &= ~P4_ESCR_EVENT_MASK;
522 escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
523
524 cccr = p4_config_unpack_cccr(hwc->config);
525
526 /*
527 * it could be Cache event so that we need to
528 * set metrics into additional MSRs
529 */
530 BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
531 if (idx_cache > P4_CACHE__NONE &&
532 idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
533 bind_cache = &p4_cache_event_bind_map[idx_cache];
534 (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
535 (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
536 }
537
538 (void)checking_wrmsrl(escr_addr, escr_conf);
539 (void)checking_wrmsrl(hwc->config_base + hwc->idx,
540 (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
541}
542
543static void p4_pmu_enable_all(int added)
544{
545 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
546 int idx;
547
548 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
549 struct perf_event *event = cpuc->events[idx];
550 if (!test_bit(idx, cpuc->active_mask))
551 continue;
552 p4_pmu_enable_event(event);
553 }
554}
555
556static int p4_pmu_handle_irq(struct pt_regs *regs)
557{
558 struct perf_sample_data data;
559 struct cpu_hw_events *cpuc;
560 struct perf_event *event;
561 struct hw_perf_event *hwc;
562 int idx, handled = 0;
563 u64 val;
564
565 data.addr = 0;
566 data.raw = NULL;
567
568 cpuc = &__get_cpu_var(cpu_hw_events);
569
570 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
571
572 if (!test_bit(idx, cpuc->active_mask))
573 continue;
574
575 event = cpuc->events[idx];
576 hwc = &event->hw;
577
578 WARN_ON_ONCE(hwc->idx != idx);
579
580 /*
581 * FIXME: Redundant call, actually not needed
582 * but just to check if we're screwed
583 */
584 p4_pmu_clear_cccr_ovf(hwc);
585
586 val = x86_perf_event_update(event);
587 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
588 continue;
589
590 /*
591 * event overflow
592 */
593 handled = 1;
594 data.period = event->hw.last_period;
595
596 if (!x86_perf_event_set_period(event))
597 continue;
598 if (perf_event_overflow(event, 1, &data, regs))
599 p4_pmu_disable_event(event);
600 }
601
602 if (handled) {
603 /* p4 quirk: unmask it again */
604 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
605 inc_irq_stat(apic_perf_irqs);
606 }
607
608 return handled;
609}
610
611/*
612 * swap thread specific fields according to a thread
613 * we are going to run on
614 */
615static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
616{
617 u32 escr, cccr;
618
619 /*
620 * we either lucky and continue on same cpu or no HT support
621 */
622 if (!p4_should_swap_ts(hwc->config, cpu))
623 return;
624
625 /*
626 * the event is migrated from an another logical
627 * cpu, so we need to swap thread specific flags
628 */
629
630 escr = p4_config_unpack_escr(hwc->config);
631 cccr = p4_config_unpack_cccr(hwc->config);
632
633 if (p4_ht_thread(cpu)) {
634 cccr &= ~P4_CCCR_OVF_PMI_T0;
635 cccr |= P4_CCCR_OVF_PMI_T1;
636 if (escr & P4_ESCR_T0_OS) {
637 escr &= ~P4_ESCR_T0_OS;
638 escr |= P4_ESCR_T1_OS;
639 }
640 if (escr & P4_ESCR_T0_USR) {
641 escr &= ~P4_ESCR_T0_USR;
642 escr |= P4_ESCR_T1_USR;
643 }
644 hwc->config = p4_config_pack_escr(escr);
645 hwc->config |= p4_config_pack_cccr(cccr);
646 hwc->config |= P4_CONFIG_HT;
647 } else {
648 cccr &= ~P4_CCCR_OVF_PMI_T1;
649 cccr |= P4_CCCR_OVF_PMI_T0;
650 if (escr & P4_ESCR_T1_OS) {
651 escr &= ~P4_ESCR_T1_OS;
652 escr |= P4_ESCR_T0_OS;
653 }
654 if (escr & P4_ESCR_T1_USR) {
655 escr &= ~P4_ESCR_T1_USR;
656 escr |= P4_ESCR_T0_USR;
657 }
658 hwc->config = p4_config_pack_escr(escr);
659 hwc->config |= p4_config_pack_cccr(cccr);
660 hwc->config &= ~P4_CONFIG_HT;
661 }
662}
663
664/* ESCRs are not sequential in memory so we need a map */
665static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = {
666 MSR_P4_ALF_ESCR0, /* 0 */
667 MSR_P4_ALF_ESCR1, /* 1 */
668 MSR_P4_BPU_ESCR0, /* 2 */
669 MSR_P4_BPU_ESCR1, /* 3 */
670 MSR_P4_BSU_ESCR0, /* 4 */
671 MSR_P4_BSU_ESCR1, /* 5 */
672 MSR_P4_CRU_ESCR0, /* 6 */
673 MSR_P4_CRU_ESCR1, /* 7 */
674 MSR_P4_CRU_ESCR2, /* 8 */
675 MSR_P4_CRU_ESCR3, /* 9 */
676 MSR_P4_CRU_ESCR4, /* 10 */
677 MSR_P4_CRU_ESCR5, /* 11 */
678 MSR_P4_DAC_ESCR0, /* 12 */
679 MSR_P4_DAC_ESCR1, /* 13 */
680 MSR_P4_FIRM_ESCR0, /* 14 */
681 MSR_P4_FIRM_ESCR1, /* 15 */
682 MSR_P4_FLAME_ESCR0, /* 16 */
683 MSR_P4_FLAME_ESCR1, /* 17 */
684 MSR_P4_FSB_ESCR0, /* 18 */
685 MSR_P4_FSB_ESCR1, /* 19 */
686 MSR_P4_IQ_ESCR0, /* 20 */
687 MSR_P4_IQ_ESCR1, /* 21 */
688 MSR_P4_IS_ESCR0, /* 22 */
689 MSR_P4_IS_ESCR1, /* 23 */
690 MSR_P4_ITLB_ESCR0, /* 24 */
691 MSR_P4_ITLB_ESCR1, /* 25 */
692 MSR_P4_IX_ESCR0, /* 26 */
693 MSR_P4_IX_ESCR1, /* 27 */
694 MSR_P4_MOB_ESCR0, /* 28 */
695 MSR_P4_MOB_ESCR1, /* 29 */
696 MSR_P4_MS_ESCR0, /* 30 */
697 MSR_P4_MS_ESCR1, /* 31 */
698 MSR_P4_PMH_ESCR0, /* 32 */
699 MSR_P4_PMH_ESCR1, /* 33 */
700 MSR_P4_RAT_ESCR0, /* 34 */
701 MSR_P4_RAT_ESCR1, /* 35 */
702 MSR_P4_SAAT_ESCR0, /* 36 */
703 MSR_P4_SAAT_ESCR1, /* 37 */
704 MSR_P4_SSU_ESCR0, /* 38 */
705 MSR_P4_SSU_ESCR1, /* 39 */
706 MSR_P4_TBPU_ESCR0, /* 40 */
707 MSR_P4_TBPU_ESCR1, /* 41 */
708 MSR_P4_TC_ESCR0, /* 42 */
709 MSR_P4_TC_ESCR1, /* 43 */
710 MSR_P4_U2L_ESCR0, /* 44 */
711 MSR_P4_U2L_ESCR1, /* 45 */
712};
713
714static int p4_get_escr_idx(unsigned int addr)
715{
716 unsigned int i;
717
718 for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) {
719 if (addr == p4_escr_map[i])
720 return i;
721 }
722
723 return -1;
724}
725
726static int p4_next_cntr(int thread, unsigned long *used_mask,
727 struct p4_event_bind *bind)
728{
729 int i = 0, j;
730
731 for (i = 0; i < P4_CNTR_LIMIT; i++) {
732 j = bind->cntr[thread][i++];
733 if (j == -1 || !test_bit(j, used_mask))
734 return j;
735 }
736
737 return -1;
738}
739
740static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
741{
742 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
743 unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
744 int cpu = raw_smp_processor_id();
745 struct hw_perf_event *hwc;
746 struct p4_event_bind *bind;
747 unsigned int i, thread, num;
748 int cntr_idx, escr_idx;
749
750 bitmap_zero(used_mask, X86_PMC_IDX_MAX);
751 bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
752
753 for (i = 0, num = n; i < n; i++, num--) {
754
755 hwc = &cpuc->event_list[i]->hw;
756 thread = p4_ht_thread(cpu);
757 bind = p4_config_get_bind(hwc->config);
758 escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
759
760 if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
761 cntr_idx = hwc->idx;
762 if (assign)
763 assign[i] = hwc->idx;
764 goto reserve;
765 }
766
767 cntr_idx = p4_next_cntr(thread, used_mask, bind);
768 if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
769 goto done;
770
771 p4_pmu_swap_config_ts(hwc, cpu);
772 if (assign)
773 assign[i] = cntr_idx;
774reserve:
775 set_bit(cntr_idx, used_mask);
776 set_bit(escr_idx, escr_mask);
777 }
778
779done:
780 return num ? -ENOSPC : 0;
781}
782
783static __initconst const struct x86_pmu p4_pmu = {
784 .name = "Netburst P4/Xeon",
785 .handle_irq = p4_pmu_handle_irq,
786 .disable_all = p4_pmu_disable_all,
787 .enable_all = p4_pmu_enable_all,
788 .enable = p4_pmu_enable_event,
789 .disable = p4_pmu_disable_event,
790 .eventsel = MSR_P4_BPU_CCCR0,
791 .perfctr = MSR_P4_BPU_PERFCTR0,
792 .event_map = p4_pmu_event_map,
793 .max_events = ARRAY_SIZE(p4_general_events),
794 .get_event_constraints = x86_get_event_constraints,
795 /*
796 * IF HT disabled we may need to use all
797 * ARCH_P4_MAX_CCCR counters simulaneously
798 * though leave it restricted at moment assuming
799 * HT is on
800 */
801 .num_counters = ARCH_P4_MAX_CCCR,
802 .apic = 1,
803 .cntval_bits = 40,
804 .cntval_mask = (1ULL << 40) - 1,
805 .max_period = (1ULL << 39) - 1,
806 .hw_config = p4_hw_config,
807 .schedule_events = p4_pmu_schedule_events,
808};
809
810static __init int p4_pmu_init(void)
811{
812 unsigned int low, high;
813
814 /* If we get stripped -- indexig fails */
815 BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC);
816
817 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
818 if (!(low & (1 << 7))) {
819 pr_cont("unsupported Netburst CPU model %d ",
820 boot_cpu_data.x86_model);
821 return -ENODEV;
822 }
823
824 memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
825 sizeof(hw_cache_event_ids));
826
827 pr_cont("Netburst events, ");
828
829 x86_pmu = p4_pmu;
830
831 return 0;
832}
833
834#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index a330485d14da..34ba07be2cda 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event)
27 */ 27 */
28#define P6_NOP_EVENT 0x0000002EULL 28#define P6_NOP_EVENT 0x0000002EULL
29 29
30static u64 p6_pmu_raw_event(u64 hw_event)
31{
32#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
33#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
34#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
35#define P6_EVNTSEL_INV_MASK 0x00800000ULL
36#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
37
38#define P6_EVNTSEL_MASK \
39 (P6_EVNTSEL_EVENT_MASK | \
40 P6_EVNTSEL_UNIT_MASK | \
41 P6_EVNTSEL_EDGE_MASK | \
42 P6_EVNTSEL_INV_MASK | \
43 P6_EVNTSEL_REG_MASK)
44
45 return hw_event & P6_EVNTSEL_MASK;
46}
47
48static struct event_constraint p6_event_constraints[] = 30static struct event_constraint p6_event_constraints[] =
49{ 31{
50 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ 32 INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void)
66 wrmsrl(MSR_P6_EVNTSEL0, val); 48 wrmsrl(MSR_P6_EVNTSEL0, val);
67} 49}
68 50
69static void p6_pmu_enable_all(void) 51static void p6_pmu_enable_all(int added)
70{ 52{
71 unsigned long val; 53 unsigned long val;
72 54
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
102 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); 84 (void)checking_wrmsrl(hwc->config_base + hwc->idx, val);
103} 85}
104 86
105static __initconst struct x86_pmu p6_pmu = { 87static __initconst const struct x86_pmu p6_pmu = {
106 .name = "p6", 88 .name = "p6",
107 .handle_irq = x86_pmu_handle_irq, 89 .handle_irq = x86_pmu_handle_irq,
108 .disable_all = p6_pmu_disable_all, 90 .disable_all = p6_pmu_disable_all,
109 .enable_all = p6_pmu_enable_all, 91 .enable_all = p6_pmu_enable_all,
110 .enable = p6_pmu_enable_event, 92 .enable = p6_pmu_enable_event,
111 .disable = p6_pmu_disable_event, 93 .disable = p6_pmu_disable_event,
94 .hw_config = x86_pmu_hw_config,
95 .schedule_events = x86_schedule_events,
112 .eventsel = MSR_P6_EVNTSEL0, 96 .eventsel = MSR_P6_EVNTSEL0,
113 .perfctr = MSR_P6_PERFCTR0, 97 .perfctr = MSR_P6_PERFCTR0,
114 .event_map = p6_pmu_event_map, 98 .event_map = p6_pmu_event_map,
115 .raw_event = p6_pmu_raw_event,
116 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 99 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
117 .apic = 1, 100 .apic = 1,
118 .max_period = (1ULL << 31) - 1, 101 .max_period = (1ULL << 31) - 1,
119 .version = 0, 102 .version = 0,
120 .num_events = 2, 103 .num_counters = 2,
121 /* 104 /*
122 * Events have 40 bits implemented. However they are designed such 105 * Events have 40 bits implemented. However they are designed such
123 * that bits [32-39] are sign extensions of bit 31. As such the 106 * that bits [32-39] are sign extensions of bit 31. As such the
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = {
125 * 108 *
126 * See IA-32 Intel Architecture Software developer manual Vol 3B 109 * See IA-32 Intel Architecture Software developer manual Vol 3B
127 */ 110 */
128 .event_bits = 32, 111 .cntval_bits = 32,
129 .event_mask = (1ULL << 32) - 1, 112 .cntval_mask = (1ULL << 32) - 1,
130 .get_event_constraints = x86_get_event_constraints, 113 .get_event_constraints = x86_get_event_constraints,
131 .event_constraints = p6_event_constraints, 114 .event_constraints = p6_event_constraints,
132}; 115};
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
deleted file mode 100644
index 1c47390dd0e5..000000000000
--- a/arch/x86/kernel/ds.c
+++ /dev/null
@@ -1,1437 +0,0 @@
1/*
2 * Debug Store support
3 *
4 * This provides a low-level interface to the hardware's Debug Store
5 * feature that is used for branch trace store (BTS) and
6 * precise-event based sampling (PEBS).
7 *
8 * It manages:
9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done)
11 * - buffer access
12 *
13 * It does not do:
14 * - security checking (is the caller allowed to trace the task)
15 * - buffer allocation (memory accounting)
16 *
17 *
18 * Copyright (C) 2007-2009 Intel Corporation.
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */
21
22#include <linux/kernel.h>
23#include <linux/string.h>
24#include <linux/errno.h>
25#include <linux/sched.h>
26#include <linux/slab.h>
27#include <linux/mm.h>
28#include <linux/trace_clock.h>
29
30#include <asm/ds.h>
31
32#include "ds_selftest.h"
33
34/*
35 * The configuration for a particular DS hardware implementation:
36 */
37struct ds_configuration {
38 /* The name of the configuration: */
39 const char *name;
40
41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
42 unsigned char sizeof_ptr_field;
43
44 /* The size of a BTS/PEBS record in bytes: */
45 unsigned char sizeof_rec[2];
46
47 /* The number of pebs counter reset values in the DS structure. */
48 unsigned char nr_counter_reset;
49
50 /* Control bit-masks indexed by enum ds_feature: */
51 unsigned long ctl[dsf_ctl_max];
52};
53static struct ds_configuration ds_cfg __read_mostly;
54
55
56/* Maximal size of a DS configuration: */
57#define MAX_SIZEOF_DS 0x80
58
59/* Maximal size of a BTS record: */
60#define MAX_SIZEOF_BTS (3 * 8)
61
62/* BTS and PEBS buffer alignment: */
63#define DS_ALIGNMENT (1 << 3)
64
65/* Number of buffer pointers in DS: */
66#define NUM_DS_PTR_FIELDS 8
67
68/* Size of a pebs reset value in DS: */
69#define PEBS_RESET_FIELD_SIZE 8
70
71/* Mask of control bits in the DS MSR register: */
72#define BTS_CONTROL \
73 ( ds_cfg.ctl[dsf_bts] | \
74 ds_cfg.ctl[dsf_bts_kernel] | \
75 ds_cfg.ctl[dsf_bts_user] | \
76 ds_cfg.ctl[dsf_bts_overflow] )
77
78/*
79 * A BTS or PEBS tracer.
80 *
81 * This holds the configuration of the tracer and serves as a handle
82 * to identify tracers.
83 */
84struct ds_tracer {
85 /* The DS context (partially) owned by this tracer. */
86 struct ds_context *context;
87 /* The buffer provided on ds_request() and its size in bytes. */
88 void *buffer;
89 size_t size;
90};
91
92struct bts_tracer {
93 /* The common DS part: */
94 struct ds_tracer ds;
95
96 /* The trace including the DS configuration: */
97 struct bts_trace trace;
98
99 /* Buffer overflow notification function: */
100 bts_ovfl_callback_t ovfl;
101
102 /* Active flags affecting trace collection. */
103 unsigned int flags;
104};
105
106struct pebs_tracer {
107 /* The common DS part: */
108 struct ds_tracer ds;
109
110 /* The trace including the DS configuration: */
111 struct pebs_trace trace;
112
113 /* Buffer overflow notification function: */
114 pebs_ovfl_callback_t ovfl;
115};
116
117/*
118 * Debug Store (DS) save area configuration (see Intel64 and IA32
119 * Architectures Software Developer's Manual, section 18.5)
120 *
121 * The DS configuration consists of the following fields; different
122 * architetures vary in the size of those fields.
123 *
124 * - double-word aligned base linear address of the BTS buffer
125 * - write pointer into the BTS buffer
126 * - end linear address of the BTS buffer (one byte beyond the end of
127 * the buffer)
128 * - interrupt pointer into BTS buffer
129 * (interrupt occurs when write pointer passes interrupt pointer)
130 * - double-word aligned base linear address of the PEBS buffer
131 * - write pointer into the PEBS buffer
132 * - end linear address of the PEBS buffer (one byte beyond the end of
133 * the buffer)
134 * - interrupt pointer into PEBS buffer
135 * (interrupt occurs when write pointer passes interrupt pointer)
136 * - value to which counter is reset following counter overflow
137 *
138 * Later architectures use 64bit pointers throughout, whereas earlier
139 * architectures use 32bit pointers in 32bit mode.
140 *
141 *
142 * We compute the base address for the first 8 fields based on:
143 * - the field size stored in the DS configuration
144 * - the relative field position
145 * - an offset giving the start of the respective region
146 *
147 * This offset is further used to index various arrays holding
148 * information for BTS and PEBS at the respective index.
149 *
150 * On later 32bit processors, we only access the lower 32bit of the
151 * 64bit pointer fields. The upper halves will be zeroed out.
152 */
153
154enum ds_field {
155 ds_buffer_base = 0,
156 ds_index,
157 ds_absolute_maximum,
158 ds_interrupt_threshold,
159};
160
161enum ds_qualifier {
162 ds_bts = 0,
163 ds_pebs
164};
165
166static inline unsigned long
167ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
168{
169 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
170 return *(unsigned long *)base;
171}
172
173static inline void
174ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
175 unsigned long value)
176{
177 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
178 (*(unsigned long *)base) = value;
179}
180
181
182/*
183 * Locking is done only for allocating BTS or PEBS resources.
184 */
185static DEFINE_SPINLOCK(ds_lock);
186
187/*
188 * We either support (system-wide) per-cpu or per-thread allocation.
189 * We distinguish the two based on the task_struct pointer, where a
190 * NULL pointer indicates per-cpu allocation for the current cpu.
191 *
192 * Allocations are use-counted. As soon as resources are allocated,
193 * further allocations must be of the same type (per-cpu or
194 * per-thread). We model this by counting allocations (i.e. the number
195 * of tracers of a certain type) for one type negatively:
196 * =0 no tracers
197 * >0 number of per-thread tracers
198 * <0 number of per-cpu tracers
199 *
200 * Tracers essentially gives the number of ds contexts for a certain
201 * type of allocation.
202 */
203static atomic_t tracers = ATOMIC_INIT(0);
204
205static inline int get_tracer(struct task_struct *task)
206{
207 int error;
208
209 spin_lock_irq(&ds_lock);
210
211 if (task) {
212 error = -EPERM;
213 if (atomic_read(&tracers) < 0)
214 goto out;
215 atomic_inc(&tracers);
216 } else {
217 error = -EPERM;
218 if (atomic_read(&tracers) > 0)
219 goto out;
220 atomic_dec(&tracers);
221 }
222
223 error = 0;
224out:
225 spin_unlock_irq(&ds_lock);
226 return error;
227}
228
229static inline void put_tracer(struct task_struct *task)
230{
231 if (task)
232 atomic_dec(&tracers);
233 else
234 atomic_inc(&tracers);
235}
236
237/*
238 * The DS context is either attached to a thread or to a cpu:
239 * - in the former case, the thread_struct contains a pointer to the
240 * attached context.
241 * - in the latter case, we use a static array of per-cpu context
242 * pointers.
243 *
244 * Contexts are use-counted. They are allocated on first access and
245 * deallocated when the last user puts the context.
246 */
247struct ds_context {
248 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
249 unsigned char ds[MAX_SIZEOF_DS];
250
251 /* The owner of the BTS and PEBS configuration, respectively: */
252 struct bts_tracer *bts_master;
253 struct pebs_tracer *pebs_master;
254
255 /* Use count: */
256 unsigned long count;
257
258 /* Pointer to the context pointer field: */
259 struct ds_context **this;
260
261 /* The traced task; NULL for cpu tracing: */
262 struct task_struct *task;
263
264 /* The traced cpu; only valid if task is NULL: */
265 int cpu;
266};
267
268static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
269
270
271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
272{
273 struct ds_context **p_context =
274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
275 struct ds_context *context = NULL;
276 struct ds_context *new_context = NULL;
277
278 /* Chances are small that we already have a context. */
279 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
280 if (!new_context)
281 return NULL;
282
283 spin_lock_irq(&ds_lock);
284
285 context = *p_context;
286 if (likely(!context)) {
287 context = new_context;
288
289 context->this = p_context;
290 context->task = task;
291 context->cpu = cpu;
292 context->count = 0;
293
294 *p_context = context;
295 }
296
297 context->count++;
298
299 spin_unlock_irq(&ds_lock);
300
301 if (context != new_context)
302 kfree(new_context);
303
304 return context;
305}
306
307static void ds_put_context(struct ds_context *context)
308{
309 struct task_struct *task;
310 unsigned long irq;
311
312 if (!context)
313 return;
314
315 spin_lock_irqsave(&ds_lock, irq);
316
317 if (--context->count) {
318 spin_unlock_irqrestore(&ds_lock, irq);
319 return;
320 }
321
322 *(context->this) = NULL;
323
324 task = context->task;
325
326 if (task)
327 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
328
329 /*
330 * We leave the (now dangling) pointer to the DS configuration in
331 * the DS_AREA msr. This is as good or as bad as replacing it with
332 * NULL - the hardware would crash if we enabled tracing.
333 *
334 * This saves us some problems with having to write an msr on a
335 * different cpu while preventing others from doing the same for the
336 * next context for that same cpu.
337 */
338
339 spin_unlock_irqrestore(&ds_lock, irq);
340
341 /* The context might still be in use for context switching. */
342 if (task && (task != current))
343 wait_task_context_switch(task);
344
345 kfree(context);
346}
347
348static void ds_install_ds_area(struct ds_context *context)
349{
350 unsigned long ds;
351
352 ds = (unsigned long)context->ds;
353
354 /*
355 * There is a race between the bts master and the pebs master.
356 *
357 * The thread/cpu access is synchronized via get/put_cpu() for
358 * task tracing and via wrmsr_on_cpu for cpu tracing.
359 *
360 * If bts and pebs are collected for the same task or same cpu,
361 * the same confiuration is written twice.
362 */
363 if (context->task) {
364 get_cpu();
365 if (context->task == current)
366 wrmsrl(MSR_IA32_DS_AREA, ds);
367 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
368 put_cpu();
369 } else
370 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
371 (u32)((u64)ds), (u32)((u64)ds >> 32));
372}
373
374/*
375 * Call the tracer's callback on a buffer overflow.
376 *
377 * context: the ds context
378 * qual: the buffer type
379 */
380static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
381{
382 switch (qual) {
383 case ds_bts:
384 if (context->bts_master &&
385 context->bts_master->ovfl)
386 context->bts_master->ovfl(context->bts_master);
387 break;
388 case ds_pebs:
389 if (context->pebs_master &&
390 context->pebs_master->ovfl)
391 context->pebs_master->ovfl(context->pebs_master);
392 break;
393 }
394}
395
396
397/*
398 * Write raw data into the BTS or PEBS buffer.
399 *
400 * The remainder of any partially written record is zeroed out.
401 *
402 * context: the DS context
403 * qual: the buffer type
404 * record: the data to write
405 * size: the size of the data
406 */
407static int ds_write(struct ds_context *context, enum ds_qualifier qual,
408 const void *record, size_t size)
409{
410 int bytes_written = 0;
411
412 if (!record)
413 return -EINVAL;
414
415 while (size) {
416 unsigned long base, index, end, write_end, int_th;
417 unsigned long write_size, adj_write_size;
418
419 /*
420 * Write as much as possible without producing an
421 * overflow interrupt.
422 *
423 * Interrupt_threshold must either be
424 * - bigger than absolute_maximum or
425 * - point to a record between buffer_base and absolute_maximum
426 *
427 * Index points to a valid record.
428 */
429 base = ds_get(context->ds, qual, ds_buffer_base);
430 index = ds_get(context->ds, qual, ds_index);
431 end = ds_get(context->ds, qual, ds_absolute_maximum);
432 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
433
434 write_end = min(end, int_th);
435
436 /*
437 * If we are already beyond the interrupt threshold,
438 * we fill the entire buffer.
439 */
440 if (write_end <= index)
441 write_end = end;
442
443 if (write_end <= index)
444 break;
445
446 write_size = min((unsigned long) size, write_end - index);
447 memcpy((void *)index, record, write_size);
448
449 record = (const char *)record + write_size;
450 size -= write_size;
451 bytes_written += write_size;
452
453 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
454 adj_write_size *= ds_cfg.sizeof_rec[qual];
455
456 /* Zero out trailing bytes. */
457 memset((char *)index + write_size, 0,
458 adj_write_size - write_size);
459 index += adj_write_size;
460
461 if (index >= end)
462 index = base;
463 ds_set(context->ds, qual, ds_index, index);
464
465 if (index >= int_th)
466 ds_overflow(context, qual);
467 }
468
469 return bytes_written;
470}
471
472
473/*
474 * Branch Trace Store (BTS) uses the following format. Different
475 * architectures vary in the size of those fields.
476 * - source linear address
477 * - destination linear address
478 * - flags
479 *
480 * Later architectures use 64bit pointers throughout, whereas earlier
481 * architectures use 32bit pointers in 32bit mode.
482 *
483 * We compute the base address for the fields based on:
484 * - the field size stored in the DS configuration
485 * - the relative field position
486 *
487 * In order to store additional information in the BTS buffer, we use
488 * a special source address to indicate that the record requires
489 * special interpretation.
490 *
491 * Netburst indicated via a bit in the flags field whether the branch
492 * was predicted; this is ignored.
493 *
494 * We use two levels of abstraction:
495 * - the raw data level defined here
496 * - an arch-independent level defined in ds.h
497 */
498
499enum bts_field {
500 bts_from,
501 bts_to,
502 bts_flags,
503
504 bts_qual = bts_from,
505 bts_clock = bts_to,
506 bts_pid = bts_flags,
507
508 bts_qual_mask = (bts_qual_max - 1),
509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
510};
511
512static inline unsigned long bts_get(const char *base, unsigned long field)
513{
514 base += (ds_cfg.sizeof_ptr_field * field);
515 return *(unsigned long *)base;
516}
517
518static inline void bts_set(char *base, unsigned long field, unsigned long val)
519{
520 base += (ds_cfg.sizeof_ptr_field * field);
521 (*(unsigned long *)base) = val;
522}
523
524
525/*
526 * The raw BTS data is architecture dependent.
527 *
528 * For higher-level users, we give an arch-independent view.
529 * - ds.h defines struct bts_struct
530 * - bts_read translates one raw bts record into a bts_struct
531 * - bts_write translates one bts_struct into the raw format and
532 * writes it into the top of the parameter tracer's buffer.
533 *
534 * return: bytes read/written on success; -Eerrno, otherwise
535 */
536static int
537bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
538{
539 if (!tracer)
540 return -EINVAL;
541
542 if (at < tracer->trace.ds.begin)
543 return -EINVAL;
544
545 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
546 return -EINVAL;
547
548 memset(out, 0, sizeof(*out));
549 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
550 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
551 out->variant.event.clock = bts_get(at, bts_clock);
552 out->variant.event.pid = bts_get(at, bts_pid);
553 } else {
554 out->qualifier = bts_branch;
555 out->variant.lbr.from = bts_get(at, bts_from);
556 out->variant.lbr.to = bts_get(at, bts_to);
557
558 if (!out->variant.lbr.from && !out->variant.lbr.to)
559 out->qualifier = bts_invalid;
560 }
561
562 return ds_cfg.sizeof_rec[ds_bts];
563}
564
565static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
566{
567 unsigned char raw[MAX_SIZEOF_BTS];
568
569 if (!tracer)
570 return -EINVAL;
571
572 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
573 return -EOVERFLOW;
574
575 switch (in->qualifier) {
576 case bts_invalid:
577 bts_set(raw, bts_from, 0);
578 bts_set(raw, bts_to, 0);
579 bts_set(raw, bts_flags, 0);
580 break;
581 case bts_branch:
582 bts_set(raw, bts_from, in->variant.lbr.from);
583 bts_set(raw, bts_to, in->variant.lbr.to);
584 bts_set(raw, bts_flags, 0);
585 break;
586 case bts_task_arrives:
587 case bts_task_departs:
588 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
589 bts_set(raw, bts_clock, in->variant.event.clock);
590 bts_set(raw, bts_pid, in->variant.event.pid);
591 break;
592 default:
593 return -EINVAL;
594 }
595
596 return ds_write(tracer->ds.context, ds_bts, raw,
597 ds_cfg.sizeof_rec[ds_bts]);
598}
599
600
601static void ds_write_config(struct ds_context *context,
602 struct ds_trace *cfg, enum ds_qualifier qual)
603{
604 unsigned char *ds = context->ds;
605
606 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
607 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
608 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
609 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
610}
611
612static void ds_read_config(struct ds_context *context,
613 struct ds_trace *cfg, enum ds_qualifier qual)
614{
615 unsigned char *ds = context->ds;
616
617 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
618 cfg->top = (void *)ds_get(ds, qual, ds_index);
619 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
620 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
621}
622
623static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
624 void *base, size_t size, size_t ith,
625 unsigned int flags) {
626 unsigned long buffer, adj;
627
628 /*
629 * Adjust the buffer address and size to meet alignment
630 * constraints:
631 * - buffer is double-word aligned
632 * - size is multiple of record size
633 *
634 * We checked the size at the very beginning; we have enough
635 * space to do the adjustment.
636 */
637 buffer = (unsigned long)base;
638
639 adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
640 buffer += adj;
641 size -= adj;
642
643 trace->n = size / ds_cfg.sizeof_rec[qual];
644 trace->size = ds_cfg.sizeof_rec[qual];
645
646 size = (trace->n * trace->size);
647
648 trace->begin = (void *)buffer;
649 trace->top = trace->begin;
650 trace->end = (void *)(buffer + size);
651 /*
652 * The value for 'no threshold' is -1, which will set the
653 * threshold outside of the buffer, just like we want it.
654 */
655 ith *= ds_cfg.sizeof_rec[qual];
656 trace->ith = (void *)(buffer + size - ith);
657
658 trace->flags = flags;
659}
660
661
662static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
663 enum ds_qualifier qual, struct task_struct *task,
664 int cpu, void *base, size_t size, size_t th)
665{
666 struct ds_context *context;
667 int error;
668 size_t req_size;
669
670 error = -EOPNOTSUPP;
671 if (!ds_cfg.sizeof_rec[qual])
672 goto out;
673
674 error = -EINVAL;
675 if (!base)
676 goto out;
677
678 req_size = ds_cfg.sizeof_rec[qual];
679 /* We might need space for alignment adjustments. */
680 if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
681 req_size += DS_ALIGNMENT;
682
683 error = -EINVAL;
684 if (size < req_size)
685 goto out;
686
687 if (th != (size_t)-1) {
688 th *= ds_cfg.sizeof_rec[qual];
689
690 error = -EINVAL;
691 if (size <= th)
692 goto out;
693 }
694
695 tracer->buffer = base;
696 tracer->size = size;
697
698 error = -ENOMEM;
699 context = ds_get_context(task, cpu);
700 if (!context)
701 goto out;
702 tracer->context = context;
703
704 /*
705 * Defer any tracer-specific initialization work for the context until
706 * context ownership has been clarified.
707 */
708
709 error = 0;
710 out:
711 return error;
712}
713
714static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
715 void *base, size_t size,
716 bts_ovfl_callback_t ovfl, size_t th,
717 unsigned int flags)
718{
719 struct bts_tracer *tracer;
720 int error;
721
722 /* Buffer overflow notification is not yet implemented. */
723 error = -EOPNOTSUPP;
724 if (ovfl)
725 goto out;
726
727 error = get_tracer(task);
728 if (error < 0)
729 goto out;
730
731 error = -ENOMEM;
732 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
733 if (!tracer)
734 goto out_put_tracer;
735 tracer->ovfl = ovfl;
736
737 /* Do some more error checking and acquire a tracing context. */
738 error = ds_request(&tracer->ds, &tracer->trace.ds,
739 ds_bts, task, cpu, base, size, th);
740 if (error < 0)
741 goto out_tracer;
742
743 /* Claim the bts part of the tracing context we acquired above. */
744 spin_lock_irq(&ds_lock);
745
746 error = -EPERM;
747 if (tracer->ds.context->bts_master)
748 goto out_unlock;
749 tracer->ds.context->bts_master = tracer;
750
751 spin_unlock_irq(&ds_lock);
752
753 /*
754 * Now that we own the bts part of the context, let's complete the
755 * initialization for that part.
756 */
757 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
758 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
759 ds_install_ds_area(tracer->ds.context);
760
761 tracer->trace.read = bts_read;
762 tracer->trace.write = bts_write;
763
764 /* Start tracing. */
765 ds_resume_bts(tracer);
766
767 return tracer;
768
769 out_unlock:
770 spin_unlock_irq(&ds_lock);
771 ds_put_context(tracer->ds.context);
772 out_tracer:
773 kfree(tracer);
774 out_put_tracer:
775 put_tracer(task);
776 out:
777 return ERR_PTR(error);
778}
779
780struct bts_tracer *ds_request_bts_task(struct task_struct *task,
781 void *base, size_t size,
782 bts_ovfl_callback_t ovfl,
783 size_t th, unsigned int flags)
784{
785 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
786}
787
788struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
789 bts_ovfl_callback_t ovfl,
790 size_t th, unsigned int flags)
791{
792 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
793}
794
795static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
796 void *base, size_t size,
797 pebs_ovfl_callback_t ovfl, size_t th,
798 unsigned int flags)
799{
800 struct pebs_tracer *tracer;
801 int error;
802
803 /* Buffer overflow notification is not yet implemented. */
804 error = -EOPNOTSUPP;
805 if (ovfl)
806 goto out;
807
808 error = get_tracer(task);
809 if (error < 0)
810 goto out;
811
812 error = -ENOMEM;
813 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
814 if (!tracer)
815 goto out_put_tracer;
816 tracer->ovfl = ovfl;
817
818 /* Do some more error checking and acquire a tracing context. */
819 error = ds_request(&tracer->ds, &tracer->trace.ds,
820 ds_pebs, task, cpu, base, size, th);
821 if (error < 0)
822 goto out_tracer;
823
824 /* Claim the pebs part of the tracing context we acquired above. */
825 spin_lock_irq(&ds_lock);
826
827 error = -EPERM;
828 if (tracer->ds.context->pebs_master)
829 goto out_unlock;
830 tracer->ds.context->pebs_master = tracer;
831
832 spin_unlock_irq(&ds_lock);
833
834 /*
835 * Now that we own the pebs part of the context, let's complete the
836 * initialization for that part.
837 */
838 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
839 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
840 ds_install_ds_area(tracer->ds.context);
841
842 /* Start tracing. */
843 ds_resume_pebs(tracer);
844
845 return tracer;
846
847 out_unlock:
848 spin_unlock_irq(&ds_lock);
849 ds_put_context(tracer->ds.context);
850 out_tracer:
851 kfree(tracer);
852 out_put_tracer:
853 put_tracer(task);
854 out:
855 return ERR_PTR(error);
856}
857
858struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
859 void *base, size_t size,
860 pebs_ovfl_callback_t ovfl,
861 size_t th, unsigned int flags)
862{
863 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
864}
865
866struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
867 pebs_ovfl_callback_t ovfl,
868 size_t th, unsigned int flags)
869{
870 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
871}
872
873static void ds_free_bts(struct bts_tracer *tracer)
874{
875 struct task_struct *task;
876
877 task = tracer->ds.context->task;
878
879 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
880 tracer->ds.context->bts_master = NULL;
881
882 /* Make sure tracing stopped and the tracer is not in use. */
883 if (task && (task != current))
884 wait_task_context_switch(task);
885
886 ds_put_context(tracer->ds.context);
887 put_tracer(task);
888
889 kfree(tracer);
890}
891
892void ds_release_bts(struct bts_tracer *tracer)
893{
894 might_sleep();
895
896 if (!tracer)
897 return;
898
899 ds_suspend_bts(tracer);
900 ds_free_bts(tracer);
901}
902
903int ds_release_bts_noirq(struct bts_tracer *tracer)
904{
905 struct task_struct *task;
906 unsigned long irq;
907 int error;
908
909 if (!tracer)
910 return 0;
911
912 task = tracer->ds.context->task;
913
914 local_irq_save(irq);
915
916 error = -EPERM;
917 if (!task &&
918 (tracer->ds.context->cpu != smp_processor_id()))
919 goto out;
920
921 error = -EPERM;
922 if (task && (task != current))
923 goto out;
924
925 ds_suspend_bts_noirq(tracer);
926 ds_free_bts(tracer);
927
928 error = 0;
929 out:
930 local_irq_restore(irq);
931 return error;
932}
933
934static void update_task_debugctlmsr(struct task_struct *task,
935 unsigned long debugctlmsr)
936{
937 task->thread.debugctlmsr = debugctlmsr;
938
939 get_cpu();
940 if (task == current)
941 update_debugctlmsr(debugctlmsr);
942 put_cpu();
943}
944
945void ds_suspend_bts(struct bts_tracer *tracer)
946{
947 struct task_struct *task;
948 unsigned long debugctlmsr;
949 int cpu;
950
951 if (!tracer)
952 return;
953
954 tracer->flags = 0;
955
956 task = tracer->ds.context->task;
957 cpu = tracer->ds.context->cpu;
958
959 WARN_ON(!task && irqs_disabled());
960
961 debugctlmsr = (task ?
962 task->thread.debugctlmsr :
963 get_debugctlmsr_on_cpu(cpu));
964 debugctlmsr &= ~BTS_CONTROL;
965
966 if (task)
967 update_task_debugctlmsr(task, debugctlmsr);
968 else
969 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
970}
971
972int ds_suspend_bts_noirq(struct bts_tracer *tracer)
973{
974 struct task_struct *task;
975 unsigned long debugctlmsr, irq;
976 int cpu, error = 0;
977
978 if (!tracer)
979 return 0;
980
981 tracer->flags = 0;
982
983 task = tracer->ds.context->task;
984 cpu = tracer->ds.context->cpu;
985
986 local_irq_save(irq);
987
988 error = -EPERM;
989 if (!task && (cpu != smp_processor_id()))
990 goto out;
991
992 debugctlmsr = (task ?
993 task->thread.debugctlmsr :
994 get_debugctlmsr());
995 debugctlmsr &= ~BTS_CONTROL;
996
997 if (task)
998 update_task_debugctlmsr(task, debugctlmsr);
999 else
1000 update_debugctlmsr(debugctlmsr);
1001
1002 error = 0;
1003 out:
1004 local_irq_restore(irq);
1005 return error;
1006}
1007
1008static unsigned long ds_bts_control(struct bts_tracer *tracer)
1009{
1010 unsigned long control;
1011
1012 control = ds_cfg.ctl[dsf_bts];
1013 if (!(tracer->trace.ds.flags & BTS_KERNEL))
1014 control |= ds_cfg.ctl[dsf_bts_kernel];
1015 if (!(tracer->trace.ds.flags & BTS_USER))
1016 control |= ds_cfg.ctl[dsf_bts_user];
1017
1018 return control;
1019}
1020
1021void ds_resume_bts(struct bts_tracer *tracer)
1022{
1023 struct task_struct *task;
1024 unsigned long debugctlmsr;
1025 int cpu;
1026
1027 if (!tracer)
1028 return;
1029
1030 tracer->flags = tracer->trace.ds.flags;
1031
1032 task = tracer->ds.context->task;
1033 cpu = tracer->ds.context->cpu;
1034
1035 WARN_ON(!task && irqs_disabled());
1036
1037 debugctlmsr = (task ?
1038 task->thread.debugctlmsr :
1039 get_debugctlmsr_on_cpu(cpu));
1040 debugctlmsr |= ds_bts_control(tracer);
1041
1042 if (task)
1043 update_task_debugctlmsr(task, debugctlmsr);
1044 else
1045 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1046}
1047
1048int ds_resume_bts_noirq(struct bts_tracer *tracer)
1049{
1050 struct task_struct *task;
1051 unsigned long debugctlmsr, irq;
1052 int cpu, error = 0;
1053
1054 if (!tracer)
1055 return 0;
1056
1057 tracer->flags = tracer->trace.ds.flags;
1058
1059 task = tracer->ds.context->task;
1060 cpu = tracer->ds.context->cpu;
1061
1062 local_irq_save(irq);
1063
1064 error = -EPERM;
1065 if (!task && (cpu != smp_processor_id()))
1066 goto out;
1067
1068 debugctlmsr = (task ?
1069 task->thread.debugctlmsr :
1070 get_debugctlmsr());
1071 debugctlmsr |= ds_bts_control(tracer);
1072
1073 if (task)
1074 update_task_debugctlmsr(task, debugctlmsr);
1075 else
1076 update_debugctlmsr(debugctlmsr);
1077
1078 error = 0;
1079 out:
1080 local_irq_restore(irq);
1081 return error;
1082}
1083
1084static void ds_free_pebs(struct pebs_tracer *tracer)
1085{
1086 struct task_struct *task;
1087
1088 task = tracer->ds.context->task;
1089
1090 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
1091 tracer->ds.context->pebs_master = NULL;
1092
1093 ds_put_context(tracer->ds.context);
1094 put_tracer(task);
1095
1096 kfree(tracer);
1097}
1098
1099void ds_release_pebs(struct pebs_tracer *tracer)
1100{
1101 might_sleep();
1102
1103 if (!tracer)
1104 return;
1105
1106 ds_suspend_pebs(tracer);
1107 ds_free_pebs(tracer);
1108}
1109
1110int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1111{
1112 struct task_struct *task;
1113 unsigned long irq;
1114 int error;
1115
1116 if (!tracer)
1117 return 0;
1118
1119 task = tracer->ds.context->task;
1120
1121 local_irq_save(irq);
1122
1123 error = -EPERM;
1124 if (!task &&
1125 (tracer->ds.context->cpu != smp_processor_id()))
1126 goto out;
1127
1128 error = -EPERM;
1129 if (task && (task != current))
1130 goto out;
1131
1132 ds_suspend_pebs_noirq(tracer);
1133 ds_free_pebs(tracer);
1134
1135 error = 0;
1136 out:
1137 local_irq_restore(irq);
1138 return error;
1139}
1140
1141void ds_suspend_pebs(struct pebs_tracer *tracer)
1142{
1143
1144}
1145
1146int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1147{
1148 return 0;
1149}
1150
1151void ds_resume_pebs(struct pebs_tracer *tracer)
1152{
1153
1154}
1155
1156int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1157{
1158 return 0;
1159}
1160
1161const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
1162{
1163 if (!tracer)
1164 return NULL;
1165
1166 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
1167 return &tracer->trace;
1168}
1169
1170const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
1171{
1172 if (!tracer)
1173 return NULL;
1174
1175 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
1176
1177 tracer->trace.counters = ds_cfg.nr_counter_reset;
1178 memcpy(tracer->trace.counter_reset,
1179 tracer->ds.context->ds +
1180 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
1181 ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
1182
1183 return &tracer->trace;
1184}
1185
1186int ds_reset_bts(struct bts_tracer *tracer)
1187{
1188 if (!tracer)
1189 return -EINVAL;
1190
1191 tracer->trace.ds.top = tracer->trace.ds.begin;
1192
1193 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
1194 (unsigned long)tracer->trace.ds.top);
1195
1196 return 0;
1197}
1198
1199int ds_reset_pebs(struct pebs_tracer *tracer)
1200{
1201 if (!tracer)
1202 return -EINVAL;
1203
1204 tracer->trace.ds.top = tracer->trace.ds.begin;
1205
1206 ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
1207 (unsigned long)tracer->trace.ds.top);
1208
1209 return 0;
1210}
1211
1212int ds_set_pebs_reset(struct pebs_tracer *tracer,
1213 unsigned int counter, u64 value)
1214{
1215 if (!tracer)
1216 return -EINVAL;
1217
1218 if (ds_cfg.nr_counter_reset < counter)
1219 return -EINVAL;
1220
1221 *(u64 *)(tracer->ds.context->ds +
1222 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
1223 (counter * PEBS_RESET_FIELD_SIZE)) = value;
1224
1225 return 0;
1226}
1227
1228static const struct ds_configuration ds_cfg_netburst = {
1229 .name = "Netburst",
1230 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
1231 .ctl[dsf_bts_kernel] = (1 << 5),
1232 .ctl[dsf_bts_user] = (1 << 6),
1233 .nr_counter_reset = 1,
1234};
1235static const struct ds_configuration ds_cfg_pentium_m = {
1236 .name = "Pentium M",
1237 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1238 .nr_counter_reset = 1,
1239};
1240static const struct ds_configuration ds_cfg_core2_atom = {
1241 .name = "Core 2/Atom",
1242 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1243 .ctl[dsf_bts_kernel] = (1 << 9),
1244 .ctl[dsf_bts_user] = (1 << 10),
1245 .nr_counter_reset = 1,
1246};
1247static const struct ds_configuration ds_cfg_core_i7 = {
1248 .name = "Core i7",
1249 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1250 .ctl[dsf_bts_kernel] = (1 << 9),
1251 .ctl[dsf_bts_user] = (1 << 10),
1252 .nr_counter_reset = 4,
1253};
1254
1255static void
1256ds_configure(const struct ds_configuration *cfg,
1257 struct cpuinfo_x86 *cpu)
1258{
1259 unsigned long nr_pebs_fields = 0;
1260
1261 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1262
1263#ifdef __i386__
1264 nr_pebs_fields = 10;
1265#else
1266 nr_pebs_fields = 18;
1267#endif
1268
1269 /*
1270 * Starting with version 2, architectural performance
1271 * monitoring supports a format specifier.
1272 */
1273 if ((cpuid_eax(0xa) & 0xff) > 1) {
1274 unsigned long perf_capabilities, format;
1275
1276 rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
1277
1278 format = (perf_capabilities >> 8) & 0xf;
1279
1280 switch (format) {
1281 case 0:
1282 nr_pebs_fields = 18;
1283 break;
1284 case 1:
1285 nr_pebs_fields = 22;
1286 break;
1287 default:
1288 printk(KERN_INFO
1289 "[ds] unknown PEBS format: %lu\n", format);
1290 nr_pebs_fields = 0;
1291 break;
1292 }
1293 }
1294
1295 memset(&ds_cfg, 0, sizeof(ds_cfg));
1296 ds_cfg = *cfg;
1297
1298 ds_cfg.sizeof_ptr_field =
1299 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
1300
1301 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1302 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
1303
1304 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
1305 ds_cfg.sizeof_rec[ds_bts] = 0;
1306 printk(KERN_INFO "[ds] bts not available\n");
1307 }
1308 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1309 ds_cfg.sizeof_rec[ds_pebs] = 0;
1310 printk(KERN_INFO "[ds] pebs not available\n");
1311 }
1312
1313 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1314 8 * ds_cfg.sizeof_ptr_field);
1315 printk("bts/pebs record: %u/%u bytes\n",
1316 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
1317
1318 WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
1319}
1320
1321void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
1322{
1323 /* Only configure the first cpu. Others are identical. */
1324 if (ds_cfg.name)
1325 return;
1326
1327 switch (c->x86) {
1328 case 0x6:
1329 switch (c->x86_model) {
1330 case 0x9:
1331 case 0xd: /* Pentium M */
1332 ds_configure(&ds_cfg_pentium_m, c);
1333 break;
1334 case 0xf:
1335 case 0x17: /* Core2 */
1336 case 0x1c: /* Atom */
1337 ds_configure(&ds_cfg_core2_atom, c);
1338 break;
1339 case 0x1a: /* Core i7 */
1340 ds_configure(&ds_cfg_core_i7, c);
1341 break;
1342 default:
1343 /* Sorry, don't know about them. */
1344 break;
1345 }
1346 break;
1347 case 0xf:
1348 switch (c->x86_model) {
1349 case 0x0:
1350 case 0x1:
1351 case 0x2: /* Netburst */
1352 ds_configure(&ds_cfg_netburst, c);
1353 break;
1354 default:
1355 /* Sorry, don't know about them. */
1356 break;
1357 }
1358 break;
1359 default:
1360 /* Sorry, don't know about them. */
1361 break;
1362 }
1363}
1364
1365static inline void ds_take_timestamp(struct ds_context *context,
1366 enum bts_qualifier qualifier,
1367 struct task_struct *task)
1368{
1369 struct bts_tracer *tracer = context->bts_master;
1370 struct bts_struct ts;
1371
1372 /* Prevent compilers from reading the tracer pointer twice. */
1373 barrier();
1374
1375 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1376 return;
1377
1378 memset(&ts, 0, sizeof(ts));
1379 ts.qualifier = qualifier;
1380 ts.variant.event.clock = trace_clock_global();
1381 ts.variant.event.pid = task->pid;
1382
1383 bts_write(tracer, &ts);
1384}
1385
1386/*
1387 * Change the DS configuration from tracing prev to tracing next.
1388 */
1389void ds_switch_to(struct task_struct *prev, struct task_struct *next)
1390{
1391 struct ds_context *prev_ctx = prev->thread.ds_ctx;
1392 struct ds_context *next_ctx = next->thread.ds_ctx;
1393 unsigned long debugctlmsr = next->thread.debugctlmsr;
1394
1395 /* Make sure all data is read before we start. */
1396 barrier();
1397
1398 if (prev_ctx) {
1399 update_debugctlmsr(0);
1400
1401 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
1402 }
1403
1404 if (next_ctx) {
1405 ds_take_timestamp(next_ctx, bts_task_arrives, next);
1406
1407 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1408 }
1409
1410 update_debugctlmsr(debugctlmsr);
1411}
1412
1413static __init int ds_selftest(void)
1414{
1415 if (ds_cfg.sizeof_rec[ds_bts]) {
1416 int error;
1417
1418 error = ds_selftest_bts();
1419 if (error) {
1420 WARN(1, "[ds] selftest failed. disabling bts.\n");
1421 ds_cfg.sizeof_rec[ds_bts] = 0;
1422 }
1423 }
1424
1425 if (ds_cfg.sizeof_rec[ds_pebs]) {
1426 int error;
1427
1428 error = ds_selftest_pebs();
1429 if (error) {
1430 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1431 ds_cfg.sizeof_rec[ds_pebs] = 0;
1432 }
1433 }
1434
1435 return 0;
1436}
1437device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
deleted file mode 100644
index 6bc7c199ab99..000000000000
--- a/arch/x86/kernel/ds_selftest.c
+++ /dev/null
@@ -1,408 +0,0 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#include "ds_selftest.h"
10
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/smp.h>
14#include <linux/cpu.h>
15
16#include <asm/ds.h>
17
18
19#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
20#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
21
22struct ds_selftest_bts_conf {
23 struct bts_tracer *tracer;
24 int error;
25 int (*suspend)(struct bts_tracer *);
26 int (*resume)(struct bts_tracer *);
27};
28
29static int ds_selftest_bts_consistency(const struct bts_trace *trace)
30{
31 int error = 0;
32
33 if (!trace) {
34 printk(KERN_CONT "failed to access trace...");
35 /* Bail out. Other tests are pointless. */
36 return -1;
37 }
38
39 if (!trace->read) {
40 printk(KERN_CONT "bts read not available...");
41 error = -1;
42 }
43
44 /* Do some sanity checks on the trace configuration. */
45 if (!trace->ds.n) {
46 printk(KERN_CONT "empty bts buffer...");
47 error = -1;
48 }
49 if (!trace->ds.size) {
50 printk(KERN_CONT "bad bts trace setup...");
51 error = -1;
52 }
53 if (trace->ds.end !=
54 (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
55 printk(KERN_CONT "bad bts buffer setup...");
56 error = -1;
57 }
58 /*
59 * We allow top in [begin; end], since its not clear when the
60 * overflow adjustment happens: after the increment or before the
61 * write.
62 */
63 if ((trace->ds.top < trace->ds.begin) ||
64 (trace->ds.end < trace->ds.top)) {
65 printk(KERN_CONT "bts top out of bounds...");
66 error = -1;
67 }
68
69 return error;
70}
71
72static int ds_selftest_bts_read(struct bts_tracer *tracer,
73 const struct bts_trace *trace,
74 const void *from, const void *to)
75{
76 const unsigned char *at;
77
78 /*
79 * Check a few things which do not belong to this test.
80 * They should be covered by other tests.
81 */
82 if (!trace)
83 return -1;
84
85 if (!trace->read)
86 return -1;
87
88 if (to < from)
89 return -1;
90
91 if (from < trace->ds.begin)
92 return -1;
93
94 if (trace->ds.end < to)
95 return -1;
96
97 if (!trace->ds.size)
98 return -1;
99
100 /* Now to the test itself. */
101 for (at = from; (void *)at < to; at += trace->ds.size) {
102 struct bts_struct bts;
103 unsigned long index;
104 int error;
105
106 if (((void *)at - trace->ds.begin) % trace->ds.size) {
107 printk(KERN_CONT
108 "read from non-integer index...");
109 return -1;
110 }
111 index = ((void *)at - trace->ds.begin) / trace->ds.size;
112
113 memset(&bts, 0, sizeof(bts));
114 error = trace->read(tracer, at, &bts);
115 if (error < 0) {
116 printk(KERN_CONT
117 "error reading bts trace at [%lu] (0x%p)...",
118 index, at);
119 return error;
120 }
121
122 switch (bts.qualifier) {
123 case BTS_BRANCH:
124 break;
125 default:
126 printk(KERN_CONT
127 "unexpected bts entry %llu at [%lu] (0x%p)...",
128 bts.qualifier, index, at);
129 return -1;
130 }
131 }
132
133 return 0;
134}
135
136static void ds_selftest_bts_cpu(void *arg)
137{
138 struct ds_selftest_bts_conf *conf = arg;
139 const struct bts_trace *trace;
140 void *top;
141
142 if (IS_ERR(conf->tracer)) {
143 conf->error = PTR_ERR(conf->tracer);
144 conf->tracer = NULL;
145
146 printk(KERN_CONT
147 "initialization failed (err: %d)...", conf->error);
148 return;
149 }
150
151 /* We should meanwhile have enough trace. */
152 conf->error = conf->suspend(conf->tracer);
153 if (conf->error < 0)
154 return;
155
156 /* Let's see if we can access the trace. */
157 trace = ds_read_bts(conf->tracer);
158
159 conf->error = ds_selftest_bts_consistency(trace);
160 if (conf->error < 0)
161 return;
162
163 /* If everything went well, we should have a few trace entries. */
164 if (trace->ds.top == trace->ds.begin) {
165 /*
166 * It is possible but highly unlikely that we got a
167 * buffer overflow and end up at exactly the same
168 * position we started from.
169 * Let's issue a warning, but continue.
170 */
171 printk(KERN_CONT "no trace/overflow...");
172 }
173
174 /* Let's try to read the trace we collected. */
175 conf->error =
176 ds_selftest_bts_read(conf->tracer, trace,
177 trace->ds.begin, trace->ds.top);
178 if (conf->error < 0)
179 return;
180
181 /*
182 * Let's read the trace again.
183 * Since we suspended tracing, we should get the same result.
184 */
185 top = trace->ds.top;
186
187 trace = ds_read_bts(conf->tracer);
188 conf->error = ds_selftest_bts_consistency(trace);
189 if (conf->error < 0)
190 return;
191
192 if (top != trace->ds.top) {
193 printk(KERN_CONT "suspend not working...");
194 conf->error = -1;
195 return;
196 }
197
198 /* Let's collect some more trace - see if resume is working. */
199 conf->error = conf->resume(conf->tracer);
200 if (conf->error < 0)
201 return;
202
203 conf->error = conf->suspend(conf->tracer);
204 if (conf->error < 0)
205 return;
206
207 trace = ds_read_bts(conf->tracer);
208
209 conf->error = ds_selftest_bts_consistency(trace);
210 if (conf->error < 0)
211 return;
212
213 if (trace->ds.top == top) {
214 /*
215 * It is possible but highly unlikely that we got a
216 * buffer overflow and end up at exactly the same
217 * position we started from.
218 * Let's issue a warning and check the full trace.
219 */
220 printk(KERN_CONT
221 "no resume progress/overflow...");
222
223 conf->error =
224 ds_selftest_bts_read(conf->tracer, trace,
225 trace->ds.begin, trace->ds.end);
226 } else if (trace->ds.top < top) {
227 /*
228 * We had a buffer overflow - the entire buffer should
229 * contain trace records.
230 */
231 conf->error =
232 ds_selftest_bts_read(conf->tracer, trace,
233 trace->ds.begin, trace->ds.end);
234 } else {
235 /*
236 * It is quite likely that the buffer did not overflow.
237 * Let's just check the delta trace.
238 */
239 conf->error =
240 ds_selftest_bts_read(conf->tracer, trace, top,
241 trace->ds.top);
242 }
243 if (conf->error < 0)
244 return;
245
246 conf->error = 0;
247}
248
249static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
250{
251 ds_suspend_bts(tracer);
252 return 0;
253}
254
255static int ds_resume_bts_wrap(struct bts_tracer *tracer)
256{
257 ds_resume_bts(tracer);
258 return 0;
259}
260
261static void ds_release_bts_noirq_wrap(void *tracer)
262{
263 (void)ds_release_bts_noirq(tracer);
264}
265
266static int ds_selftest_bts_bad_release_noirq(int cpu,
267 struct bts_tracer *tracer)
268{
269 int error = -EPERM;
270
271 /* Try to release the tracer on the wrong cpu. */
272 get_cpu();
273 if (cpu != smp_processor_id()) {
274 error = ds_release_bts_noirq(tracer);
275 if (error != -EPERM)
276 printk(KERN_CONT "release on wrong cpu...");
277 }
278 put_cpu();
279
280 return error ? 0 : -1;
281}
282
283static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
284{
285 struct bts_tracer *tracer;
286 int error;
287
288 /* Try to request cpu tracing while task tracing is active. */
289 tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
290 (size_t)-1, BTS_KERNEL);
291 error = PTR_ERR(tracer);
292 if (!IS_ERR(tracer)) {
293 ds_release_bts(tracer);
294 error = 0;
295 }
296
297 if (error != -EPERM)
298 printk(KERN_CONT "cpu/task tracing overlap...");
299
300 return error ? 0 : -1;
301}
302
303static int ds_selftest_bts_bad_request_task(void *buffer)
304{
305 struct bts_tracer *tracer;
306 int error;
307
308 /* Try to request cpu tracing while task tracing is active. */
309 tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
310 (size_t)-1, BTS_KERNEL);
311 error = PTR_ERR(tracer);
312 if (!IS_ERR(tracer)) {
313 error = 0;
314 ds_release_bts(tracer);
315 }
316
317 if (error != -EPERM)
318 printk(KERN_CONT "task/cpu tracing overlap...");
319
320 return error ? 0 : -1;
321}
322
323int ds_selftest_bts(void)
324{
325 struct ds_selftest_bts_conf conf;
326 unsigned char buffer[BUFFER_SIZE], *small_buffer;
327 unsigned long irq;
328 int cpu;
329
330 printk(KERN_INFO "[ds] bts selftest...");
331 conf.error = 0;
332
333 small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
334
335 get_online_cpus();
336 for_each_online_cpu(cpu) {
337 conf.suspend = ds_suspend_bts_wrap;
338 conf.resume = ds_resume_bts_wrap;
339 conf.tracer =
340 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
341 NULL, (size_t)-1, BTS_KERNEL);
342 ds_selftest_bts_cpu(&conf);
343 if (conf.error >= 0)
344 conf.error = ds_selftest_bts_bad_request_task(buffer);
345 ds_release_bts(conf.tracer);
346 if (conf.error < 0)
347 goto out;
348
349 conf.suspend = ds_suspend_bts_noirq;
350 conf.resume = ds_resume_bts_noirq;
351 conf.tracer =
352 ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
353 NULL, (size_t)-1, BTS_KERNEL);
354 smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
355 if (conf.error >= 0) {
356 conf.error =
357 ds_selftest_bts_bad_release_noirq(cpu,
358 conf.tracer);
359 /* We must not release the tracer twice. */
360 if (conf.error < 0)
361 conf.tracer = NULL;
362 }
363 if (conf.error >= 0)
364 conf.error = ds_selftest_bts_bad_request_task(buffer);
365 smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
366 conf.tracer, 1);
367 if (conf.error < 0)
368 goto out;
369 }
370
371 conf.suspend = ds_suspend_bts_wrap;
372 conf.resume = ds_resume_bts_wrap;
373 conf.tracer =
374 ds_request_bts_task(current, buffer, BUFFER_SIZE,
375 NULL, (size_t)-1, BTS_KERNEL);
376 ds_selftest_bts_cpu(&conf);
377 if (conf.error >= 0)
378 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
379 ds_release_bts(conf.tracer);
380 if (conf.error < 0)
381 goto out;
382
383 conf.suspend = ds_suspend_bts_noirq;
384 conf.resume = ds_resume_bts_noirq;
385 conf.tracer =
386 ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
387 NULL, (size_t)-1, BTS_KERNEL);
388 local_irq_save(irq);
389 ds_selftest_bts_cpu(&conf);
390 if (conf.error >= 0)
391 conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
392 ds_release_bts_noirq(conf.tracer);
393 local_irq_restore(irq);
394 if (conf.error < 0)
395 goto out;
396
397 conf.error = 0;
398 out:
399 put_online_cpus();
400 printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
401
402 return conf.error;
403}
404
405int ds_selftest_pebs(void)
406{
407 return 0;
408}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
deleted file mode 100644
index 2ba8745c6663..000000000000
--- a/arch/x86/kernel/ds_selftest.h
+++ /dev/null
@@ -1,15 +0,0 @@
1/*
2 * Debug Store support - selftest
3 *
4 *
5 * Copyright (C) 2009 Intel Corporation.
6 * Markus Metzger <markus.t.metzger@intel.com>, 2009
7 */
8
9#ifdef CONFIG_X86_DS_SELFTEST
10extern int ds_selftest_bts(void);
11extern int ds_selftest_pebs(void);
12#else
13static inline int ds_selftest_bts(void) { return 0; }
14static inline int ds_selftest_pebs(void) { return 0; }
15#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6d817554780a..c89a386930b7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void)
224 int cpu; 224 int cpu;
225 unsigned long flags; 225 unsigned long flags;
226 226
227 /* notify the hw-branch tracer so it may disable tracing and
228 add the last trace to the trace buffer -
229 the earlier this happens, the more useful the trace. */
230 trace_hw_branch_oops();
231
232 oops_enter(); 227 oops_enter();
233 228
234 /* racy, but better than risking deadlock. */ 229 /* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b43bbaebe2c0..f2f56c0967b6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
422 422
423static void __kprobes clear_btf(void) 423static void __kprobes clear_btf(void)
424{ 424{
425 if (test_thread_flag(TIF_DEBUGCTLMSR)) 425 if (test_thread_flag(TIF_BLOCKSTEP)) {
426 update_debugctlmsr(0); 426 unsigned long debugctl = get_debugctlmsr();
427
428 debugctl &= ~DEBUGCTLMSR_BTF;
429 update_debugctlmsr(debugctl);
430 }
427} 431}
428 432
429static void __kprobes restore_btf(void) 433static void __kprobes restore_btf(void)
430{ 434{
431 if (test_thread_flag(TIF_DEBUGCTLMSR)) 435 if (test_thread_flag(TIF_BLOCKSTEP)) {
432 update_debugctlmsr(current->thread.debugctlmsr); 436 unsigned long debugctl = get_debugctlmsr();
437
438 debugctl |= DEBUGCTLMSR_BTF;
439 update_debugctlmsr(debugctl);
440 }
433} 441}
434 442
435void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, 443void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 28ad9f4d8b94..eccdb57094e3 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,7 +20,6 @@
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <asm/i387.h> 22#include <asm/i387.h>
23#include <asm/ds.h>
24#include <asm/debugreg.h> 23#include <asm/debugreg.h>
25 24
26unsigned long idle_halt; 25unsigned long idle_halt;
@@ -50,8 +49,6 @@ void free_thread_xstate(struct task_struct *tsk)
50 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); 49 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
51 tsk->thread.xstate = NULL; 50 tsk->thread.xstate = NULL;
52 } 51 }
53
54 WARN(tsk->thread.ds_ctx, "leaking DS context\n");
55} 52}
56 53
57void free_thread_info(struct thread_info *ti) 54void free_thread_info(struct thread_info *ti)
@@ -198,11 +195,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
198 prev = &prev_p->thread; 195 prev = &prev_p->thread;
199 next = &next_p->thread; 196 next = &next_p->thread;
200 197
201 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || 198 if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
202 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) 199 test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
203 ds_switch_to(prev_p, next_p); 200 unsigned long debugctl = get_debugctlmsr();
204 else if (next->debugctlmsr != prev->debugctlmsr) 201
205 update_debugctlmsr(next->debugctlmsr); 202 debugctl &= ~DEBUGCTLMSR_BTF;
203 if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
204 debugctl |= DEBUGCTLMSR_BTF;
205
206 update_debugctlmsr(debugctl);
207 }
206 208
207 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ 209 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
208 test_tsk_thread_flag(next_p, TIF_NOTSC)) { 210 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f6c62667e30c..75090c589b7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,7 +55,6 @@
55#include <asm/cpu.h> 55#include <asm/cpu.h>
56#include <asm/idle.h> 56#include <asm/idle.h>
57#include <asm/syscalls.h> 57#include <asm/syscalls.h>
58#include <asm/ds.h>
59#include <asm/debugreg.h> 58#include <asm/debugreg.h>
60 59
61asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 60asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
238 kfree(p->thread.io_bitmap_ptr); 237 kfree(p->thread.io_bitmap_ptr);
239 p->thread.io_bitmap_max = 0; 238 p->thread.io_bitmap_max = 0;
240 } 239 }
241
242 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
243 p->thread.ds_ctx = NULL;
244
245 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
246 p->thread.debugctlmsr = 0;
247
248 return err; 240 return err;
249} 241}
250 242
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4cc..cc4258f2beb5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,7 +49,6 @@
49#include <asm/ia32.h> 49#include <asm/ia32.h>
50#include <asm/idle.h> 50#include <asm/idle.h>
51#include <asm/syscalls.h> 51#include <asm/syscalls.h>
52#include <asm/ds.h>
53#include <asm/debugreg.h> 52#include <asm/debugreg.h>
54 53
55asmlinkage extern void ret_from_fork(void); 54asmlinkage extern void ret_from_fork(void);
@@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
313 if (err) 312 if (err)
314 goto out; 313 goto out;
315 } 314 }
316
317 clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
318 p->thread.ds_ctx = NULL;
319
320 clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
321 p->thread.debugctlmsr = 0;
322
323 err = 0; 315 err = 0;
324out: 316out:
325 if (err && p->thread.io_bitmap_ptr) { 317 if (err && p->thread.io_bitmap_ptr) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2e9b55027b7e..055be0afd330 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -2,9 +2,6 @@
2/* 2/*
3 * Pentium III FXSR, SSE support 3 * Pentium III FXSR, SSE support
4 * Gareth Hughes <gareth@valinux.com>, May 2000 4 * Gareth Hughes <gareth@valinux.com>, May 2000
5 *
6 * BTS tracing
7 * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
8 */ 5 */
9 6
10#include <linux/kernel.h> 7#include <linux/kernel.h>
@@ -22,7 +19,6 @@
22#include <linux/audit.h> 19#include <linux/audit.h>
23#include <linux/seccomp.h> 20#include <linux/seccomp.h>
24#include <linux/signal.h> 21#include <linux/signal.h>
25#include <linux/workqueue.h>
26#include <linux/perf_event.h> 22#include <linux/perf_event.h>
27#include <linux/hw_breakpoint.h> 23#include <linux/hw_breakpoint.h>
28 24
@@ -36,7 +32,6 @@
36#include <asm/desc.h> 32#include <asm/desc.h>
37#include <asm/prctl.h> 33#include <asm/prctl.h>
38#include <asm/proto.h> 34#include <asm/proto.h>
39#include <asm/ds.h>
40#include <asm/hw_breakpoint.h> 35#include <asm/hw_breakpoint.h>
41 36
42#include "tls.h" 37#include "tls.h"
@@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target,
789 0, IO_BITMAP_BYTES); 784 0, IO_BITMAP_BYTES);
790} 785}
791 786
792#ifdef CONFIG_X86_PTRACE_BTS
793/*
794 * A branch trace store context.
795 *
796 * Contexts may only be installed by ptrace_bts_config() and only for
797 * ptraced tasks.
798 *
799 * Contexts are destroyed when the tracee is detached from the tracer.
800 * The actual destruction work requires interrupts enabled, so the
801 * work is deferred and will be scheduled during __ptrace_unlink().
802 *
803 * Contexts hold an additional task_struct reference on the traced
804 * task, as well as a reference on the tracer's mm.
805 *
806 * Ptrace already holds a task_struct for the duration of ptrace operations,
807 * but since destruction is deferred, it may be executed after both
808 * tracer and tracee exited.
809 */
810struct bts_context {
811 /* The branch trace handle. */
812 struct bts_tracer *tracer;
813
814 /* The buffer used to store the branch trace and its size. */
815 void *buffer;
816 unsigned int size;
817
818 /* The mm that paid for the above buffer. */
819 struct mm_struct *mm;
820
821 /* The task this context belongs to. */
822 struct task_struct *task;
823
824 /* The signal to send on a bts buffer overflow. */
825 unsigned int bts_ovfl_signal;
826
827 /* The work struct to destroy a context. */
828 struct work_struct work;
829};
830
831static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
832{
833 void *buffer = NULL;
834 int err = -ENOMEM;
835
836 err = account_locked_memory(current->mm, current->signal->rlim, size);
837 if (err < 0)
838 return err;
839
840 buffer = kzalloc(size, GFP_KERNEL);
841 if (!buffer)
842 goto out_refund;
843
844 context->buffer = buffer;
845 context->size = size;
846 context->mm = get_task_mm(current);
847
848 return 0;
849
850 out_refund:
851 refund_locked_memory(current->mm, size);
852 return err;
853}
854
855static inline void free_bts_buffer(struct bts_context *context)
856{
857 if (!context->buffer)
858 return;
859
860 kfree(context->buffer);
861 context->buffer = NULL;
862
863 refund_locked_memory(context->mm, context->size);
864 context->size = 0;
865
866 mmput(context->mm);
867 context->mm = NULL;
868}
869
870static void free_bts_context_work(struct work_struct *w)
871{
872 struct bts_context *context;
873
874 context = container_of(w, struct bts_context, work);
875
876 ds_release_bts(context->tracer);
877 put_task_struct(context->task);
878 free_bts_buffer(context);
879 kfree(context);
880}
881
882static inline void free_bts_context(struct bts_context *context)
883{
884 INIT_WORK(&context->work, free_bts_context_work);
885 schedule_work(&context->work);
886}
887
888static inline struct bts_context *alloc_bts_context(struct task_struct *task)
889{
890 struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
891 if (context) {
892 context->task = task;
893 task->bts = context;
894
895 get_task_struct(task);
896 }
897
898 return context;
899}
900
901static int ptrace_bts_read_record(struct task_struct *child, size_t index,
902 struct bts_struct __user *out)
903{
904 struct bts_context *context;
905 const struct bts_trace *trace;
906 struct bts_struct bts;
907 const unsigned char *at;
908 int error;
909
910 context = child->bts;
911 if (!context)
912 return -ESRCH;
913
914 trace = ds_read_bts(context->tracer);
915 if (!trace)
916 return -ESRCH;
917
918 at = trace->ds.top - ((index + 1) * trace->ds.size);
919 if ((void *)at < trace->ds.begin)
920 at += (trace->ds.n * trace->ds.size);
921
922 if (!trace->read)
923 return -EOPNOTSUPP;
924
925 error = trace->read(context->tracer, at, &bts);
926 if (error < 0)
927 return error;
928
929 if (copy_to_user(out, &bts, sizeof(bts)))
930 return -EFAULT;
931
932 return sizeof(bts);
933}
934
935static int ptrace_bts_drain(struct task_struct *child,
936 long size,
937 struct bts_struct __user *out)
938{
939 struct bts_context *context;
940 const struct bts_trace *trace;
941 const unsigned char *at;
942 int error, drained = 0;
943
944 context = child->bts;
945 if (!context)
946 return -ESRCH;
947
948 trace = ds_read_bts(context->tracer);
949 if (!trace)
950 return -ESRCH;
951
952 if (!trace->read)
953 return -EOPNOTSUPP;
954
955 if (size < (trace->ds.top - trace->ds.begin))
956 return -EIO;
957
958 for (at = trace->ds.begin; (void *)at < trace->ds.top;
959 out++, drained++, at += trace->ds.size) {
960 struct bts_struct bts;
961
962 error = trace->read(context->tracer, at, &bts);
963 if (error < 0)
964 return error;
965
966 if (copy_to_user(out, &bts, sizeof(bts)))
967 return -EFAULT;
968 }
969
970 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
971
972 error = ds_reset_bts(context->tracer);
973 if (error < 0)
974 return error;
975
976 return drained;
977}
978
979static int ptrace_bts_config(struct task_struct *child,
980 long cfg_size,
981 const struct ptrace_bts_config __user *ucfg)
982{
983 struct bts_context *context;
984 struct ptrace_bts_config cfg;
985 unsigned int flags = 0;
986
987 if (cfg_size < sizeof(cfg))
988 return -EIO;
989
990 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
991 return -EFAULT;
992
993 context = child->bts;
994 if (!context)
995 context = alloc_bts_context(child);
996 if (!context)
997 return -ENOMEM;
998
999 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
1000 if (!cfg.signal)
1001 return -EINVAL;
1002
1003 return -EOPNOTSUPP;
1004 context->bts_ovfl_signal = cfg.signal;
1005 }
1006
1007 ds_release_bts(context->tracer);
1008 context->tracer = NULL;
1009
1010 if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
1011 int err;
1012
1013 free_bts_buffer(context);
1014 if (!cfg.size)
1015 return 0;
1016
1017 err = alloc_bts_buffer(context, cfg.size);
1018 if (err < 0)
1019 return err;
1020 }
1021
1022 if (cfg.flags & PTRACE_BTS_O_TRACE)
1023 flags |= BTS_USER;
1024
1025 if (cfg.flags & PTRACE_BTS_O_SCHED)
1026 flags |= BTS_TIMESTAMPS;
1027
1028 context->tracer =
1029 ds_request_bts_task(child, context->buffer, context->size,
1030 NULL, (size_t)-1, flags);
1031 if (unlikely(IS_ERR(context->tracer))) {
1032 int error = PTR_ERR(context->tracer);
1033
1034 free_bts_buffer(context);
1035 context->tracer = NULL;
1036 return error;
1037 }
1038
1039 return sizeof(cfg);
1040}
1041
1042static int ptrace_bts_status(struct task_struct *child,
1043 long cfg_size,
1044 struct ptrace_bts_config __user *ucfg)
1045{
1046 struct bts_context *context;
1047 const struct bts_trace *trace;
1048 struct ptrace_bts_config cfg;
1049
1050 context = child->bts;
1051 if (!context)
1052 return -ESRCH;
1053
1054 if (cfg_size < sizeof(cfg))
1055 return -EIO;
1056
1057 trace = ds_read_bts(context->tracer);
1058 if (!trace)
1059 return -ESRCH;
1060
1061 memset(&cfg, 0, sizeof(cfg));
1062 cfg.size = trace->ds.end - trace->ds.begin;
1063 cfg.signal = context->bts_ovfl_signal;
1064 cfg.bts_size = sizeof(struct bts_struct);
1065
1066 if (cfg.signal)
1067 cfg.flags |= PTRACE_BTS_O_SIGNAL;
1068
1069 if (trace->ds.flags & BTS_USER)
1070 cfg.flags |= PTRACE_BTS_O_TRACE;
1071
1072 if (trace->ds.flags & BTS_TIMESTAMPS)
1073 cfg.flags |= PTRACE_BTS_O_SCHED;
1074
1075 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
1076 return -EFAULT;
1077
1078 return sizeof(cfg);
1079}
1080
1081static int ptrace_bts_clear(struct task_struct *child)
1082{
1083 struct bts_context *context;
1084 const struct bts_trace *trace;
1085
1086 context = child->bts;
1087 if (!context)
1088 return -ESRCH;
1089
1090 trace = ds_read_bts(context->tracer);
1091 if (!trace)
1092 return -ESRCH;
1093
1094 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
1095
1096 return ds_reset_bts(context->tracer);
1097}
1098
1099static int ptrace_bts_size(struct task_struct *child)
1100{
1101 struct bts_context *context;
1102 const struct bts_trace *trace;
1103
1104 context = child->bts;
1105 if (!context)
1106 return -ESRCH;
1107
1108 trace = ds_read_bts(context->tracer);
1109 if (!trace)
1110 return -ESRCH;
1111
1112 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
1113}
1114
1115/*
1116 * Called from __ptrace_unlink() after the child has been moved back
1117 * to its original parent.
1118 */
1119void ptrace_bts_untrace(struct task_struct *child)
1120{
1121 if (unlikely(child->bts)) {
1122 free_bts_context(child->bts);
1123 child->bts = NULL;
1124 }
1125}
1126#endif /* CONFIG_X86_PTRACE_BTS */
1127
1128/* 787/*
1129 * Called by kernel/ptrace.c when detaching.. 788 * Called by kernel/ptrace.c when detaching..
1130 * 789 *
@@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1252 break; 911 break;
1253#endif 912#endif
1254 913
1255 /*
1256 * These bits need more cooking - not enabled yet:
1257 */
1258#ifdef CONFIG_X86_PTRACE_BTS
1259 case PTRACE_BTS_CONFIG:
1260 ret = ptrace_bts_config
1261 (child, data, (struct ptrace_bts_config __user *)addr);
1262 break;
1263
1264 case PTRACE_BTS_STATUS:
1265 ret = ptrace_bts_status
1266 (child, data, (struct ptrace_bts_config __user *)addr);
1267 break;
1268
1269 case PTRACE_BTS_SIZE:
1270 ret = ptrace_bts_size(child);
1271 break;
1272
1273 case PTRACE_BTS_GET:
1274 ret = ptrace_bts_read_record
1275 (child, data, (struct bts_struct __user *) addr);
1276 break;
1277
1278 case PTRACE_BTS_CLEAR:
1279 ret = ptrace_bts_clear(child);
1280 break;
1281
1282 case PTRACE_BTS_DRAIN:
1283 ret = ptrace_bts_drain
1284 (child, data, (struct bts_struct __user *) addr);
1285 break;
1286#endif /* CONFIG_X86_PTRACE_BTS */
1287
1288 default: 914 default:
1289 ret = ptrace_request(child, request, addr, data); 915 ret = ptrace_request(child, request, addr, data);
1290 break; 916 break;
@@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1544 1170
1545 case PTRACE_GET_THREAD_AREA: 1171 case PTRACE_GET_THREAD_AREA:
1546 case PTRACE_SET_THREAD_AREA: 1172 case PTRACE_SET_THREAD_AREA:
1547#ifdef CONFIG_X86_PTRACE_BTS
1548 case PTRACE_BTS_CONFIG:
1549 case PTRACE_BTS_STATUS:
1550 case PTRACE_BTS_SIZE:
1551 case PTRACE_BTS_GET:
1552 case PTRACE_BTS_CLEAR:
1553 case PTRACE_BTS_DRAIN:
1554#endif /* CONFIG_X86_PTRACE_BTS */
1555 return arch_ptrace(child, request, addr, data); 1173 return arch_ptrace(child, request, addr, data);
1556 1174
1557 default: 1175 default:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 3149032ff107..58de45ee08b6 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child)
158} 158}
159 159
160/* 160/*
161 * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
162 */
163static void write_debugctlmsr(struct task_struct *child, unsigned long val)
164{
165 if (child->thread.debugctlmsr == val)
166 return;
167
168 child->thread.debugctlmsr = val;
169
170 if (child != current)
171 return;
172
173 update_debugctlmsr(val);
174}
175
176/*
177 * Enable single or block step. 161 * Enable single or block step.
178 */ 162 */
179static void enable_step(struct task_struct *child, bool block) 163static void enable_step(struct task_struct *child, bool block)
@@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block)
186 * that uses user-mode single stepping itself. 170 * that uses user-mode single stepping itself.
187 */ 171 */
188 if (enable_single_step(child) && block) { 172 if (enable_single_step(child) && block) {
189 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 173 unsigned long debugctl = get_debugctlmsr();
190 write_debugctlmsr(child, 174
191 child->thread.debugctlmsr | DEBUGCTLMSR_BTF); 175 debugctl |= DEBUGCTLMSR_BTF;
192 } else { 176 update_debugctlmsr(debugctl);
193 write_debugctlmsr(child, 177 set_tsk_thread_flag(child, TIF_BLOCKSTEP);
194 child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); 178 } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
195 179 unsigned long debugctl = get_debugctlmsr();
196 if (!child->thread.debugctlmsr) 180
197 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 181 debugctl &= ~DEBUGCTLMSR_BTF;
182 update_debugctlmsr(debugctl);
183 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
198 } 184 }
199} 185}
200 186
@@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child)
213 /* 199 /*
214 * Make sure block stepping (BTF) is disabled. 200 * Make sure block stepping (BTF) is disabled.
215 */ 201 */
216 write_debugctlmsr(child, 202 if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
217 child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); 203 unsigned long debugctl = get_debugctlmsr();
218 204
219 if (!child->thread.debugctlmsr) 205 debugctl &= ~DEBUGCTLMSR_BTF;
220 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 206 update_debugctlmsr(debugctl);
207 clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
208 }
221 209
222 /* Always clear TIF_SINGLESTEP... */ 210 /* Always clear TIF_SINGLESTEP... */
223 clear_tsk_thread_flag(child, TIF_SINGLESTEP); 211 clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1168e4454188..36f1bd9f8e76 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -543,11 +543,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
543 543
544 /* DR6 may or may not be cleared by the CPU */ 544 /* DR6 may or may not be cleared by the CPU */
545 set_debugreg(0, 6); 545 set_debugreg(0, 6);
546
546 /* 547 /*
547 * The processor cleared BTF, so don't mark that we need it set. 548 * The processor cleared BTF, so don't mark that we need it set.
548 */ 549 */
549 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); 550 clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP);
550 tsk->thread.debugctlmsr = 0;
551 551
552 /* Store the virtualized DR6 value */ 552 /* Store the virtualized DR6 value */
553 tsk->thread.debugreg6 = dr6; 553 tsk->thread.debugreg6 = dr6;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e5e75e0d7d3..0b896ac7e4bb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3652,8 +3652,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
3652 3652
3653 /* We need to handle NMIs before interrupts are enabled */ 3653 /* We need to handle NMIs before interrupts are enabled */
3654 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && 3654 if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
3655 (exit_intr_info & INTR_INFO_VALID_MASK)) 3655 (exit_intr_info & INTR_INFO_VALID_MASK)) {
3656 kvm_before_handle_nmi(&vmx->vcpu);
3656 asm("int $2"); 3657 asm("int $2");
3658 kvm_after_handle_nmi(&vmx->vcpu);
3659 }
3657 3660
3658 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; 3661 idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
3659 3662
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 30efeead4511..58a96e6a234c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -40,6 +40,7 @@
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h> 41#include <linux/srcu.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/perf_event.h>
43#include <trace/events/kvm.h> 44#include <trace/events/kvm.h>
44 45
45#define CREATE_TRACE_POINTS 46#define CREATE_TRACE_POINTS
@@ -3955,6 +3956,47 @@ static void kvm_timer_init(void)
3955 } 3956 }
3956} 3957}
3957 3958
3959static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
3960
3961static int kvm_is_in_guest(void)
3962{
3963 return percpu_read(current_vcpu) != NULL;
3964}
3965
3966static int kvm_is_user_mode(void)
3967{
3968 int user_mode = 3;
3969 if (percpu_read(current_vcpu))
3970 user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
3971 return user_mode != 0;
3972}
3973
3974static unsigned long kvm_get_guest_ip(void)
3975{
3976 unsigned long ip = 0;
3977 if (percpu_read(current_vcpu))
3978 ip = kvm_rip_read(percpu_read(current_vcpu));
3979 return ip;
3980}
3981
3982static struct perf_guest_info_callbacks kvm_guest_cbs = {
3983 .is_in_guest = kvm_is_in_guest,
3984 .is_user_mode = kvm_is_user_mode,
3985 .get_guest_ip = kvm_get_guest_ip,
3986};
3987
3988void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
3989{
3990 percpu_write(current_vcpu, vcpu);
3991}
3992EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
3993
3994void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
3995{
3996 percpu_write(current_vcpu, NULL);
3997}
3998EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
3999
3958int kvm_arch_init(void *opaque) 4000int kvm_arch_init(void *opaque)
3959{ 4001{
3960 int r; 4002 int r;
@@ -3991,6 +4033,8 @@ int kvm_arch_init(void *opaque)
3991 4033
3992 kvm_timer_init(); 4034 kvm_timer_init();
3993 4035
4036 perf_register_guest_info_callbacks(&kvm_guest_cbs);
4037
3994 return 0; 4038 return 0;
3995 4039
3996out: 4040out:
@@ -3999,6 +4043,8 @@ out:
3999 4043
4000void kvm_arch_exit(void) 4044void kvm_arch_exit(void)
4001{ 4045{
4046 perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
4047
4002 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 4048 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
4003 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, 4049 cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
4004 CPUFREQ_TRANSITION_NOTIFIER); 4050 CPUFREQ_TRANSITION_NOTIFIER);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2d101639bd8d..b7a404722d2b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG); 65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
66} 66}
67 67
68void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
69void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
70
68#endif 71#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 419386c24b82..cbaf8f2b83df 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -20,7 +20,7 @@ lib-y := delay.o
20lib-y += thunk_$(BITS).o 20lib-y += thunk_$(BITS).o
21lib-y += usercopy_$(BITS).o getuser.o putuser.o 21lib-y += usercopy_$(BITS).o getuser.o putuser.o
22lib-y += memcpy_$(BITS).o 22lib-y += memcpy_$(BITS).o
23lib-$(CONFIG_KPROBES) += insn.o inat.o 23lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
24 24
25obj-y += msr.o msr-reg.o msr-reg-export.o 25obj-y += msr.o msr-reg.o msr-reg-export.o
26 26
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 2bf90fafa7b5..c8abc4d1bf35 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -239,11 +239,11 @@ static void arch_perfmon_setup_counters(void)
239 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && 239 if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
240 current_cpu_data.x86_model == 15) { 240 current_cpu_data.x86_model == 15) {
241 eax.split.version_id = 2; 241 eax.split.version_id = 2;
242 eax.split.num_events = 2; 242 eax.split.num_counters = 2;
243 eax.split.bit_width = 40; 243 eax.split.bit_width = 40;
244 } 244 }
245 245
246 num_counters = eax.split.num_events; 246 num_counters = eax.split.num_counters;
247 247
248 op_arch_perfmon_spec.num_counters = num_counters; 248 op_arch_perfmon_spec.num_counters = num_counters;
249 op_arch_perfmon_spec.num_controls = num_counters; 249 op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 01e6adea07ec..cc12b3c556b3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -504,18 +504,6 @@ extern int ftrace_dump_on_oops;
504#define INIT_TRACE_RECURSION 504#define INIT_TRACE_RECURSION
505#endif 505#endif
506 506
507#ifdef CONFIG_HW_BRANCH_TRACER
508
509void trace_hw_branch(u64 from, u64 to);
510void trace_hw_branch_oops(void);
511
512#else /* CONFIG_HW_BRANCH_TRACER */
513
514static inline void trace_hw_branch(u64 from, u64 to) {}
515static inline void trace_hw_branch_oops(void) {}
516
517#endif /* CONFIG_HW_BRANCH_TRACER */
518
519#ifdef CONFIG_FTRACE_SYSCALLS 507#ifdef CONFIG_FTRACE_SYSCALLS
520 508
521unsigned long arch_syscall_addr(int nr); 509unsigned long arch_syscall_addr(int nr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 462acaf36f3a..fb19bb92b809 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,7 +19,6 @@ struct anon_vma;
19struct file_ra_state; 19struct file_ra_state;
20struct user_struct; 20struct user_struct;
21struct writeback_control; 21struct writeback_control;
22struct rlimit;
23 22
24#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ 23#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
25extern unsigned long max_mapnr; 24extern unsigned long max_mapnr;
@@ -1449,9 +1448,6 @@ int vmemmap_populate_basepages(struct page *start_page,
1449int vmemmap_populate(struct page *start_page, unsigned long pages, int node); 1448int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
1450void vmemmap_populate_print_last(void); 1449void vmemmap_populate_print_last(void);
1451 1450
1452extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
1453 size_t size);
1454extern void refund_locked_memory(struct mm_struct *mm, size_t size);
1455 1451
1456enum mf_flags { 1452enum mf_flags {
1457 MF_COUNT_INCREASED = 1 << 0, 1453 MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8e375440403..24de5f181a41 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -203,8 +203,9 @@ struct perf_event_attr {
203 enable_on_exec : 1, /* next exec enables */ 203 enable_on_exec : 1, /* next exec enables */
204 task : 1, /* trace fork/exit */ 204 task : 1, /* trace fork/exit */
205 watermark : 1, /* wakeup_watermark */ 205 watermark : 1, /* wakeup_watermark */
206 precise : 1, /* OoO invariant counter */
206 207
207 __reserved_1 : 49; 208 __reserved_1 : 48;
208 209
209 union { 210 union {
210 __u32 wakeup_events; /* wakeup every n events */ 211 __u32 wakeup_events; /* wakeup every n events */
@@ -287,11 +288,19 @@ struct perf_event_mmap_page {
287 __u64 data_tail; /* user-space written tail */ 288 __u64 data_tail; /* user-space written tail */
288}; 289};
289 290
290#define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) 291#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
291#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) 292#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
292#define PERF_RECORD_MISC_KERNEL (1 << 0) 293#define PERF_RECORD_MISC_KERNEL (1 << 0)
293#define PERF_RECORD_MISC_USER (2 << 0) 294#define PERF_RECORD_MISC_USER (2 << 0)
294#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) 295#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
296#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
297#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
298
299#define PERF_RECORD_MISC_EXACT (1 << 14)
300/*
301 * Reserve the last bit to indicate some extended misc field
302 */
303#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
295 304
296struct perf_event_header { 305struct perf_event_header {
297 __u32 type; 306 __u32 type;
@@ -439,6 +448,12 @@ enum perf_callchain_context {
439# include <asm/perf_event.h> 448# include <asm/perf_event.h>
440#endif 449#endif
441 450
451struct perf_guest_info_callbacks {
452 int (*is_in_guest) (void);
453 int (*is_user_mode) (void);
454 unsigned long (*get_guest_ip) (void);
455};
456
442#ifdef CONFIG_HAVE_HW_BREAKPOINT 457#ifdef CONFIG_HAVE_HW_BREAKPOINT
443#include <asm/hw_breakpoint.h> 458#include <asm/hw_breakpoint.h>
444#endif 459#endif
@@ -468,6 +483,17 @@ struct perf_raw_record {
468 void *data; 483 void *data;
469}; 484};
470 485
486struct perf_branch_entry {
487 __u64 from;
488 __u64 to;
489 __u64 flags;
490};
491
492struct perf_branch_stack {
493 __u64 nr;
494 struct perf_branch_entry entries[0];
495};
496
471struct task_struct; 497struct task_struct;
472 498
473/** 499/**
@@ -571,6 +597,14 @@ enum perf_group_flag {
571 PERF_GROUP_SOFTWARE = 0x1, 597 PERF_GROUP_SOFTWARE = 0x1,
572}; 598};
573 599
600#define SWEVENT_HLIST_BITS 8
601#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
602
603struct swevent_hlist {
604 struct hlist_head heads[SWEVENT_HLIST_SIZE];
605 struct rcu_head rcu_head;
606};
607
574/** 608/**
575 * struct perf_event - performance event kernel representation: 609 * struct perf_event - performance event kernel representation:
576 */ 610 */
@@ -579,6 +613,7 @@ struct perf_event {
579 struct list_head group_entry; 613 struct list_head group_entry;
580 struct list_head event_entry; 614 struct list_head event_entry;
581 struct list_head sibling_list; 615 struct list_head sibling_list;
616 struct hlist_node hlist_entry;
582 int nr_siblings; 617 int nr_siblings;
583 int group_flags; 618 int group_flags;
584 struct perf_event *group_leader; 619 struct perf_event *group_leader;
@@ -726,6 +761,9 @@ struct perf_cpu_context {
726 int active_oncpu; 761 int active_oncpu;
727 int max_pertask; 762 int max_pertask;
728 int exclusive; 763 int exclusive;
764 struct swevent_hlist *swevent_hlist;
765 struct mutex hlist_mutex;
766 int hlist_refcount;
729 767
730 /* 768 /*
731 * Recursion avoidance: 769 * Recursion avoidance:
@@ -902,6 +940,12 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)
902 __perf_event_mmap(vma); 940 __perf_event_mmap(vma);
903} 941}
904 942
943extern struct perf_guest_info_callbacks *perf_guest_cbs;
944extern int perf_register_guest_info_callbacks(
945 struct perf_guest_info_callbacks *);
946extern int perf_unregister_guest_info_callbacks(
947 struct perf_guest_info_callbacks *);
948
905extern void perf_event_comm(struct task_struct *tsk); 949extern void perf_event_comm(struct task_struct *tsk);
906extern void perf_event_fork(struct task_struct *tsk); 950extern void perf_event_fork(struct task_struct *tsk);
907 951
@@ -971,6 +1015,11 @@ perf_sw_event(u32 event_id, u64 nr, int nmi,
971static inline void 1015static inline void
972perf_bp_event(struct perf_event *event, void *data) { } 1016perf_bp_event(struct perf_event *event, void *data) { }
973 1017
1018static inline int perf_register_guest_info_callbacks
1019(struct perf_guest_info_callbacks *) {return 0; }
1020static inline int perf_unregister_guest_info_callbacks
1021(struct perf_guest_info_callbacks *) {return 0; }
1022
974static inline void perf_event_mmap(struct vm_area_struct *vma) { } 1023static inline void perf_event_mmap(struct vm_area_struct *vma) { }
975static inline void perf_event_comm(struct task_struct *tsk) { } 1024static inline void perf_event_comm(struct task_struct *tsk) { }
976static inline void perf_event_fork(struct task_struct *tsk) { } 1025static inline void perf_event_fork(struct task_struct *tsk) { }
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e1fb60729979..4272521e29e9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -345,18 +345,6 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
345#define arch_ptrace_stop(code, info) do { } while (0) 345#define arch_ptrace_stop(code, info) do { } while (0)
346#endif 346#endif
347 347
348#ifndef arch_ptrace_untrace
349/*
350 * Do machine-specific work before untracing child.
351 *
352 * This is called for a normal detach as well as from ptrace_exit()
353 * when the tracing task dies.
354 *
355 * Called with write_lock(&tasklist_lock) held.
356 */
357#define arch_ptrace_untrace(task) do { } while (0)
358#endif
359
360extern int task_current_syscall(struct task_struct *target, long *callno, 348extern int task_current_syscall(struct task_struct *target, long *callno,
361 unsigned long args[6], unsigned int maxargs, 349 unsigned long args[6], unsigned int maxargs,
362 unsigned long *sp, unsigned long *pc); 350 unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf7..e0447c64af6a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,7 +99,6 @@ struct futex_pi_state;
99struct robust_list_head; 99struct robust_list_head;
100struct bio_list; 100struct bio_list;
101struct fs_struct; 101struct fs_struct;
102struct bts_context;
103struct perf_event_context; 102struct perf_event_context;
104 103
105/* 104/*
@@ -1272,12 +1271,6 @@ struct task_struct {
1272 struct list_head ptraced; 1271 struct list_head ptraced;
1273 struct list_head ptrace_entry; 1272 struct list_head ptrace_entry;
1274 1273
1275 /*
1276 * This is the tracer handle for the ptrace BTS extension.
1277 * This field actually belongs to the ptracer task.
1278 */
1279 struct bts_context *bts;
1280
1281 /* PID/PID hash table linkage. */ 1274 /* PID/PID hash table linkage. */
1282 struct pid_link pids[PIDTYPE_MAX]; 1275 struct pid_link pids[PIDTYPE_MAX];
1283 struct list_head thread_group; 1276 struct list_head thread_group;
@@ -2123,10 +2116,8 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
2123extern char *get_task_comm(char *to, struct task_struct *tsk); 2116extern char *get_task_comm(char *to, struct task_struct *tsk);
2124 2117
2125#ifdef CONFIG_SMP 2118#ifdef CONFIG_SMP
2126extern void wait_task_context_switch(struct task_struct *p);
2127extern unsigned long wait_task_inactive(struct task_struct *, long match_state); 2119extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
2128#else 2120#else
2129static inline void wait_task_context_switch(struct task_struct *p) {}
2130static inline unsigned long wait_task_inactive(struct task_struct *p, 2121static inline unsigned long wait_task_inactive(struct task_struct *p,
2131 long match_state) 2122 long match_state)
2132{ 2123{
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ea6f9d4a20e9..882c64832ffe 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -758,13 +758,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
758#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ 758#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
759static notrace void \ 759static notrace void \
760perf_trace_templ_##call(struct ftrace_event_call *event_call, \ 760perf_trace_templ_##call(struct ftrace_event_call *event_call, \
761 proto) \ 761 struct pt_regs *__regs, proto) \
762{ \ 762{ \
763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ 763 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
764 struct ftrace_raw_##call *entry; \ 764 struct ftrace_raw_##call *entry; \
765 u64 __addr = 0, __count = 1; \ 765 u64 __addr = 0, __count = 1; \
766 unsigned long irq_flags; \ 766 unsigned long irq_flags; \
767 struct pt_regs *__regs; \
768 int __entry_size; \ 767 int __entry_size; \
769 int __data_size; \ 768 int __data_size; \
770 int rctx; \ 769 int rctx; \
@@ -785,20 +784,22 @@ perf_trace_templ_##call(struct ftrace_event_call *event_call, \
785 \ 784 \
786 { assign; } \ 785 { assign; } \
787 \ 786 \
788 __regs = &__get_cpu_var(perf_trace_regs); \
789 perf_fetch_caller_regs(__regs, 2); \
790 \
791 perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \ 787 perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
792 __count, irq_flags, __regs); \ 788 __count, irq_flags, __regs); \
793} 789}
794 790
795#undef DEFINE_EVENT 791#undef DEFINE_EVENT
796#define DEFINE_EVENT(template, call, proto, args) \ 792#define DEFINE_EVENT(template, call, proto, args) \
797static notrace void perf_trace_##call(proto) \ 793static notrace void perf_trace_##call(proto) \
798{ \ 794{ \
799 struct ftrace_event_call *event_call = &event_##call; \ 795 struct ftrace_event_call *event_call = &event_##call; \
800 \ 796 struct pt_regs *__regs = &get_cpu_var(perf_trace_regs); \
801 perf_trace_templ_##template(event_call, args); \ 797 \
798 perf_fetch_caller_regs(__regs, 1); \
799 \
800 perf_trace_templ_##template(event_call, __regs, args); \
801 \
802 put_cpu_var(perf_trace_regs); \
802} 803}
803 804
804#undef DEFINE_EVENT_PRINT 805#undef DEFINE_EVENT_PRINT
diff --git a/kernel/fork.c b/kernel/fork.c
index 44b0791b0a2e..5d3592deaf71 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,9 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1111 p->memcg_batch.do_batch = 0; 1111 p->memcg_batch.do_batch = 0;
1112 p->memcg_batch.memcg = NULL; 1112 p->memcg_batch.memcg = NULL;
1113#endif 1113#endif
1114
1115 p->bts = NULL;
1116
1117 p->stack_start = stack_start; 1114 p->stack_start = stack_start;
1118 1115
1119 /* Perform scheduler related setup. Assign this task to a CPU. */ 1116 /* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2f3fbf84215a..9dbe8cdaf145 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/poll.h> 17#include <linux/poll.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/hash.h>
19#include <linux/sysfs.h> 20#include <linux/sysfs.h>
20#include <linux/dcache.h> 21#include <linux/dcache.h>
21#include <linux/percpu.h> 22#include <linux/percpu.h>
@@ -1367,6 +1368,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1367 if (cpuctx->task_ctx == ctx) 1368 if (cpuctx->task_ctx == ctx)
1368 return; 1369 return;
1369 1370
1371 perf_disable();
1372
1370 /* 1373 /*
1371 * We want to keep the following priority order: 1374 * We want to keep the following priority order:
1372 * cpu pinned (that don't need to move), task pinned, 1375 * cpu pinned (that don't need to move), task pinned,
@@ -1379,6 +1382,8 @@ void perf_event_task_sched_in(struct task_struct *task)
1379 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE); 1382 ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
1380 1383
1381 cpuctx->task_ctx = ctx; 1384 cpuctx->task_ctx = ctx;
1385
1386 perf_enable();
1382} 1387}
1383 1388
1384#define MAX_INTERRUPTS (~0ULL) 1389#define MAX_INTERRUPTS (~0ULL)
@@ -2642,6 +2647,7 @@ static int perf_fasync(int fd, struct file *filp, int on)
2642} 2647}
2643 2648
2644static const struct file_operations perf_fops = { 2649static const struct file_operations perf_fops = {
2650 .llseek = no_llseek,
2645 .release = perf_release, 2651 .release = perf_release,
2646 .read = perf_read, 2652 .read = perf_read,
2647 .poll = perf_poll, 2653 .poll = perf_poll,
@@ -2792,6 +2798,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
2792 2798
2793 2799
2794/* 2800/*
2801 * We assume there is only KVM supporting the callbacks.
2802 * Later on, we might change it to a list if there is
2803 * another virtualization implementation supporting the callbacks.
2804 */
2805struct perf_guest_info_callbacks *perf_guest_cbs;
2806
2807int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2808{
2809 perf_guest_cbs = cbs;
2810 return 0;
2811}
2812EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
2813
2814int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
2815{
2816 perf_guest_cbs = NULL;
2817 return 0;
2818}
2819EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
2820
2821/*
2795 * Output 2822 * Output
2796 */ 2823 */
2797static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail, 2824static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
@@ -3743,7 +3770,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
3743 .event_id = { 3770 .event_id = {
3744 .header = { 3771 .header = {
3745 .type = PERF_RECORD_MMAP, 3772 .type = PERF_RECORD_MMAP,
3746 .misc = 0, 3773 .misc = PERF_RECORD_MISC_USER,
3747 /* .size */ 3774 /* .size */
3748 }, 3775 },
3749 /* .pid */ 3776 /* .pid */
@@ -3961,36 +3988,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
3961 perf_swevent_overflow(event, 0, nmi, data, regs); 3988 perf_swevent_overflow(event, 0, nmi, data, regs);
3962} 3989}
3963 3990
3964static int perf_swevent_is_counting(struct perf_event *event)
3965{
3966 /*
3967 * The event is active, we're good!
3968 */
3969 if (event->state == PERF_EVENT_STATE_ACTIVE)
3970 return 1;
3971
3972 /*
3973 * The event is off/error, not counting.
3974 */
3975 if (event->state != PERF_EVENT_STATE_INACTIVE)
3976 return 0;
3977
3978 /*
3979 * The event is inactive, if the context is active
3980 * we're part of a group that didn't make it on the 'pmu',
3981 * not counting.
3982 */
3983 if (event->ctx->is_active)
3984 return 0;
3985
3986 /*
3987 * We're inactive and the context is too, this means the
3988 * task is scheduled out, we're counting events that happen
3989 * to us, like migration events.
3990 */
3991 return 1;
3992}
3993
3994static int perf_tp_event_match(struct perf_event *event, 3991static int perf_tp_event_match(struct perf_event *event,
3995 struct perf_sample_data *data); 3992 struct perf_sample_data *data);
3996 3993
@@ -4014,12 +4011,6 @@ static int perf_swevent_match(struct perf_event *event,
4014 struct perf_sample_data *data, 4011 struct perf_sample_data *data,
4015 struct pt_regs *regs) 4012 struct pt_regs *regs)
4016{ 4013{
4017 if (event->cpu != -1 && event->cpu != smp_processor_id())
4018 return 0;
4019
4020 if (!perf_swevent_is_counting(event))
4021 return 0;
4022
4023 if (event->attr.type != type) 4014 if (event->attr.type != type)
4024 return 0; 4015 return 0;
4025 4016
@@ -4036,18 +4027,53 @@ static int perf_swevent_match(struct perf_event *event,
4036 return 1; 4027 return 1;
4037} 4028}
4038 4029
4039static void perf_swevent_ctx_event(struct perf_event_context *ctx, 4030static inline u64 swevent_hash(u64 type, u32 event_id)
4040 enum perf_type_id type, 4031{
4041 u32 event_id, u64 nr, int nmi, 4032 u64 val = event_id | (type << 32);
4042 struct perf_sample_data *data, 4033
4043 struct pt_regs *regs) 4034 return hash_64(val, SWEVENT_HLIST_BITS);
4035}
4036
4037static struct hlist_head *
4038find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
4044{ 4039{
4040 u64 hash;
4041 struct swevent_hlist *hlist;
4042
4043 hash = swevent_hash(type, event_id);
4044
4045 hlist = rcu_dereference(ctx->swevent_hlist);
4046 if (!hlist)
4047 return NULL;
4048
4049 return &hlist->heads[hash];
4050}
4051
4052static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4053 u64 nr, int nmi,
4054 struct perf_sample_data *data,
4055 struct pt_regs *regs)
4056{
4057 struct perf_cpu_context *cpuctx;
4045 struct perf_event *event; 4058 struct perf_event *event;
4059 struct hlist_node *node;
4060 struct hlist_head *head;
4046 4061
4047 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 4062 cpuctx = &__get_cpu_var(perf_cpu_context);
4063
4064 rcu_read_lock();
4065
4066 head = find_swevent_head(cpuctx, type, event_id);
4067
4068 if (!head)
4069 goto end;
4070
4071 hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
4048 if (perf_swevent_match(event, type, event_id, data, regs)) 4072 if (perf_swevent_match(event, type, event_id, data, regs))
4049 perf_swevent_add(event, nr, nmi, data, regs); 4073 perf_swevent_add(event, nr, nmi, data, regs);
4050 } 4074 }
4075end:
4076 rcu_read_unlock();
4051} 4077}
4052 4078
4053int perf_swevent_get_recursion_context(void) 4079int perf_swevent_get_recursion_context(void)
@@ -4085,27 +4111,6 @@ void perf_swevent_put_recursion_context(int rctx)
4085} 4111}
4086EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); 4112EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
4087 4113
4088static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
4089 u64 nr, int nmi,
4090 struct perf_sample_data *data,
4091 struct pt_regs *regs)
4092{
4093 struct perf_cpu_context *cpuctx;
4094 struct perf_event_context *ctx;
4095
4096 cpuctx = &__get_cpu_var(perf_cpu_context);
4097 rcu_read_lock();
4098 perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
4099 nr, nmi, data, regs);
4100 /*
4101 * doesn't really matter which of the child contexts the
4102 * events ends up in.
4103 */
4104 ctx = rcu_dereference(current->perf_event_ctxp);
4105 if (ctx)
4106 perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
4107 rcu_read_unlock();
4108}
4109 4114
4110void __perf_sw_event(u32 event_id, u64 nr, int nmi, 4115void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4111 struct pt_regs *regs, u64 addr) 4116 struct pt_regs *regs, u64 addr)
@@ -4131,16 +4136,28 @@ static void perf_swevent_read(struct perf_event *event)
4131static int perf_swevent_enable(struct perf_event *event) 4136static int perf_swevent_enable(struct perf_event *event)
4132{ 4137{
4133 struct hw_perf_event *hwc = &event->hw; 4138 struct hw_perf_event *hwc = &event->hw;
4139 struct perf_cpu_context *cpuctx;
4140 struct hlist_head *head;
4141
4142 cpuctx = &__get_cpu_var(perf_cpu_context);
4134 4143
4135 if (hwc->sample_period) { 4144 if (hwc->sample_period) {
4136 hwc->last_period = hwc->sample_period; 4145 hwc->last_period = hwc->sample_period;
4137 perf_swevent_set_period(event); 4146 perf_swevent_set_period(event);
4138 } 4147 }
4148
4149 head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
4150 if (WARN_ON_ONCE(!head))
4151 return -EINVAL;
4152
4153 hlist_add_head_rcu(&event->hlist_entry, head);
4154
4139 return 0; 4155 return 0;
4140} 4156}
4141 4157
4142static void perf_swevent_disable(struct perf_event *event) 4158static void perf_swevent_disable(struct perf_event *event)
4143{ 4159{
4160 hlist_del_rcu(&event->hlist_entry);
4144} 4161}
4145 4162
4146static const struct pmu perf_ops_generic = { 4163static const struct pmu perf_ops_generic = {
@@ -4168,15 +4185,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4168 perf_sample_data_init(&data, 0); 4185 perf_sample_data_init(&data, 0);
4169 data.period = event->hw.last_period; 4186 data.period = event->hw.last_period;
4170 regs = get_irq_regs(); 4187 regs = get_irq_regs();
4171 /*
4172 * In case we exclude kernel IPs or are somehow not in interrupt
4173 * context, provide the next best thing, the user IP.
4174 */
4175 if ((event->attr.exclude_kernel || !regs) &&
4176 !event->attr.exclude_user)
4177 regs = task_pt_regs(current);
4178 4188
4179 if (regs) { 4189 if (regs && !perf_exclude_event(event, regs)) {
4180 if (!(event->attr.exclude_idle && current->pid == 0)) 4190 if (!(event->attr.exclude_idle && current->pid == 0))
4181 if (perf_event_overflow(event, 0, &data, regs)) 4191 if (perf_event_overflow(event, 0, &data, regs))
4182 ret = HRTIMER_NORESTART; 4192 ret = HRTIMER_NORESTART;
@@ -4324,6 +4334,105 @@ static const struct pmu perf_ops_task_clock = {
4324 .read = task_clock_perf_event_read, 4334 .read = task_clock_perf_event_read,
4325}; 4335};
4326 4336
4337static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
4338{
4339 struct swevent_hlist *hlist;
4340
4341 hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
4342 kfree(hlist);
4343}
4344
4345static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
4346{
4347 struct swevent_hlist *hlist;
4348
4349 if (!cpuctx->swevent_hlist)
4350 return;
4351
4352 hlist = cpuctx->swevent_hlist;
4353 rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
4354 call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
4355}
4356
4357static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
4358{
4359 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4360
4361 mutex_lock(&cpuctx->hlist_mutex);
4362
4363 if (!--cpuctx->hlist_refcount)
4364 swevent_hlist_release(cpuctx);
4365
4366 mutex_unlock(&cpuctx->hlist_mutex);
4367}
4368
4369static void swevent_hlist_put(struct perf_event *event)
4370{
4371 int cpu;
4372
4373 if (event->cpu != -1) {
4374 swevent_hlist_put_cpu(event, event->cpu);
4375 return;
4376 }
4377
4378 for_each_possible_cpu(cpu)
4379 swevent_hlist_put_cpu(event, cpu);
4380}
4381
4382static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
4383{
4384 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
4385 int err = 0;
4386
4387 mutex_lock(&cpuctx->hlist_mutex);
4388
4389 if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
4390 struct swevent_hlist *hlist;
4391
4392 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
4393 if (!hlist) {
4394 err = -ENOMEM;
4395 goto exit;
4396 }
4397 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
4398 }
4399 cpuctx->hlist_refcount++;
4400 exit:
4401 mutex_unlock(&cpuctx->hlist_mutex);
4402
4403 return err;
4404}
4405
4406static int swevent_hlist_get(struct perf_event *event)
4407{
4408 int err;
4409 int cpu, failed_cpu;
4410
4411 if (event->cpu != -1)
4412 return swevent_hlist_get_cpu(event, event->cpu);
4413
4414 get_online_cpus();
4415 for_each_possible_cpu(cpu) {
4416 err = swevent_hlist_get_cpu(event, cpu);
4417 if (err) {
4418 failed_cpu = cpu;
4419 goto fail;
4420 }
4421 }
4422 put_online_cpus();
4423
4424 return 0;
4425 fail:
4426 for_each_possible_cpu(cpu) {
4427 if (cpu == failed_cpu)
4428 break;
4429 swevent_hlist_put_cpu(event, cpu);
4430 }
4431
4432 put_online_cpus();
4433 return err;
4434}
4435
4327#ifdef CONFIG_EVENT_TRACING 4436#ifdef CONFIG_EVENT_TRACING
4328 4437
4329void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4438void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
@@ -4357,10 +4466,13 @@ static int perf_tp_event_match(struct perf_event *event,
4357static void tp_perf_event_destroy(struct perf_event *event) 4466static void tp_perf_event_destroy(struct perf_event *event)
4358{ 4467{
4359 perf_trace_disable(event->attr.config); 4468 perf_trace_disable(event->attr.config);
4469 swevent_hlist_put(event);
4360} 4470}
4361 4471
4362static const struct pmu *tp_perf_event_init(struct perf_event *event) 4472static const struct pmu *tp_perf_event_init(struct perf_event *event)
4363{ 4473{
4474 int err;
4475
4364 /* 4476 /*
4365 * Raw tracepoint data is a severe data leak, only allow root to 4477 * Raw tracepoint data is a severe data leak, only allow root to
4366 * have these. 4478 * have these.
@@ -4374,6 +4486,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4374 return NULL; 4486 return NULL;
4375 4487
4376 event->destroy = tp_perf_event_destroy; 4488 event->destroy = tp_perf_event_destroy;
4489 err = swevent_hlist_get(event);
4490 if (err) {
4491 perf_trace_disable(event->attr.config);
4492 return ERR_PTR(err);
4493 }
4377 4494
4378 return &perf_ops_generic; 4495 return &perf_ops_generic;
4379} 4496}
@@ -4474,6 +4591,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
4474 WARN_ON(event->parent); 4591 WARN_ON(event->parent);
4475 4592
4476 atomic_dec(&perf_swevent_enabled[event_id]); 4593 atomic_dec(&perf_swevent_enabled[event_id]);
4594 swevent_hlist_put(event);
4477} 4595}
4478 4596
4479static const struct pmu *sw_perf_event_init(struct perf_event *event) 4597static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4512,6 +4630,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
4512 case PERF_COUNT_SW_ALIGNMENT_FAULTS: 4630 case PERF_COUNT_SW_ALIGNMENT_FAULTS:
4513 case PERF_COUNT_SW_EMULATION_FAULTS: 4631 case PERF_COUNT_SW_EMULATION_FAULTS:
4514 if (!event->parent) { 4632 if (!event->parent) {
4633 int err;
4634
4635 err = swevent_hlist_get(event);
4636 if (err)
4637 return ERR_PTR(err);
4638
4515 atomic_inc(&perf_swevent_enabled[event_id]); 4639 atomic_inc(&perf_swevent_enabled[event_id]);
4516 event->destroy = sw_perf_event_destroy; 4640 event->destroy = sw_perf_event_destroy;
4517 } 4641 }
@@ -5384,6 +5508,7 @@ static void __init perf_event_init_all_cpus(void)
5384 5508
5385 for_each_possible_cpu(cpu) { 5509 for_each_possible_cpu(cpu) {
5386 cpuctx = &per_cpu(perf_cpu_context, cpu); 5510 cpuctx = &per_cpu(perf_cpu_context, cpu);
5511 mutex_init(&cpuctx->hlist_mutex);
5387 __perf_event_init_context(&cpuctx->ctx, NULL); 5512 __perf_event_init_context(&cpuctx->ctx, NULL);
5388 } 5513 }
5389} 5514}
@@ -5397,6 +5522,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
5397 spin_lock(&perf_resource_lock); 5522 spin_lock(&perf_resource_lock);
5398 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 5523 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5399 spin_unlock(&perf_resource_lock); 5524 spin_unlock(&perf_resource_lock);
5525
5526 mutex_lock(&cpuctx->hlist_mutex);
5527 if (cpuctx->hlist_refcount > 0) {
5528 struct swevent_hlist *hlist;
5529
5530 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
5531 WARN_ON_ONCE(!hlist);
5532 rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
5533 }
5534 mutex_unlock(&cpuctx->hlist_mutex);
5400} 5535}
5401 5536
5402#ifdef CONFIG_HOTPLUG_CPU 5537#ifdef CONFIG_HOTPLUG_CPU
@@ -5416,6 +5551,10 @@ static void perf_event_exit_cpu(int cpu)
5416 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); 5551 struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
5417 struct perf_event_context *ctx = &cpuctx->ctx; 5552 struct perf_event_context *ctx = &cpuctx->ctx;
5418 5553
5554 mutex_lock(&cpuctx->hlist_mutex);
5555 swevent_hlist_release(cpuctx);
5556 mutex_unlock(&cpuctx->hlist_mutex);
5557
5419 mutex_lock(&ctx->mutex); 5558 mutex_lock(&ctx->mutex);
5420 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); 5559 smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
5421 mutex_unlock(&ctx->mutex); 5560 mutex_unlock(&ctx->mutex);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 42ad8ae729a0..9fb51237b18c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -76,7 +76,6 @@ void __ptrace_unlink(struct task_struct *child)
76 child->parent = child->real_parent; 76 child->parent = child->real_parent;
77 list_del_init(&child->ptrace_entry); 77 list_del_init(&child->ptrace_entry);
78 78
79 arch_ptrace_untrace(child);
80 if (task_is_traced(child)) 79 if (task_is_traced(child))
81 ptrace_untrace(child); 80 ptrace_untrace(child);
82} 81}
diff --git a/kernel/sched.c b/kernel/sched.c
index 6af210a7de70..b0bbadc24955 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2078,49 +2078,6 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2078} 2078}
2079 2079
2080/* 2080/*
2081 * wait_task_context_switch - wait for a thread to complete at least one
2082 * context switch.
2083 *
2084 * @p must not be current.
2085 */
2086void wait_task_context_switch(struct task_struct *p)
2087{
2088 unsigned long nvcsw, nivcsw, flags;
2089 int running;
2090 struct rq *rq;
2091
2092 nvcsw = p->nvcsw;
2093 nivcsw = p->nivcsw;
2094 for (;;) {
2095 /*
2096 * The runqueue is assigned before the actual context
2097 * switch. We need to take the runqueue lock.
2098 *
2099 * We could check initially without the lock but it is
2100 * very likely that we need to take the lock in every
2101 * iteration.
2102 */
2103 rq = task_rq_lock(p, &flags);
2104 running = task_running(rq, p);
2105 task_rq_unlock(rq, &flags);
2106
2107 if (likely(!running))
2108 break;
2109 /*
2110 * The switch count is incremented before the actual
2111 * context switch. We thus wait for two switches to be
2112 * sure at least one completed.
2113 */
2114 if ((p->nvcsw - nvcsw) > 1)
2115 break;
2116 if ((p->nivcsw - nivcsw) > 1)
2117 break;
2118
2119 cpu_relax();
2120 }
2121}
2122
2123/*
2124 * wait_task_inactive - wait for a thread to unschedule. 2081 * wait_task_inactive - wait for a thread to unschedule.
2125 * 2082 *
2126 * If @match_state is nonzero, it's the @p->state value just checked and 2083 * If @match_state is nonzero, it's the @p->state value just checked and
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 13e13d428cd3..8b1797c4545b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -44,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD
44 help 44 help
45 See Documentation/trace/ftrace-design.txt 45 See Documentation/trace/ftrace-design.txt
46 46
47config HAVE_HW_BRANCH_TRACER
48 bool
49
50config HAVE_SYSCALL_TRACEPOINTS 47config HAVE_SYSCALL_TRACEPOINTS
51 bool 48 bool
52 help 49 help
@@ -374,14 +371,6 @@ config STACK_TRACER
374 371
375 Say N if unsure. 372 Say N if unsure.
376 373
377config HW_BRANCH_TRACER
378 depends on HAVE_HW_BRANCH_TRACER
379 bool "Trace hw branches"
380 select GENERIC_TRACER
381 help
382 This tracer records all branches on the system in a circular
383 buffer, giving access to the last N branches for each cpu.
384
385config KMEMTRACE 374config KMEMTRACE
386 bool "Trace SLAB allocations" 375 bool "Trace SLAB allocations"
387 select GENERIC_TRACER 376 select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 78edc6490038..ffb1a5b0550e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o 41obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o 42obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o 43obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
44obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
45obj-$(CONFIG_KMEMTRACE) += kmemtrace.o 44obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
46obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o 45obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
47obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 46obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2825ef2c0b15..3ebdb6bd2362 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,7 +34,6 @@ enum trace_type {
34 TRACE_GRAPH_RET, 34 TRACE_GRAPH_RET,
35 TRACE_GRAPH_ENT, 35 TRACE_GRAPH_ENT,
36 TRACE_USER_STACK, 36 TRACE_USER_STACK,
37 TRACE_HW_BRANCHES,
38 TRACE_KMEM_ALLOC, 37 TRACE_KMEM_ALLOC,
39 TRACE_KMEM_FREE, 38 TRACE_KMEM_FREE,
40 TRACE_BLK, 39 TRACE_BLK,
@@ -103,29 +102,17 @@ struct syscall_trace_exit {
103 long ret; 102 long ret;
104}; 103};
105 104
106struct kprobe_trace_entry { 105struct kprobe_trace_entry_head {
107 struct trace_entry ent; 106 struct trace_entry ent;
108 unsigned long ip; 107 unsigned long ip;
109 int nargs;
110 unsigned long args[];
111}; 108};
112 109
113#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ 110struct kretprobe_trace_entry_head {
114 (offsetof(struct kprobe_trace_entry, args) + \
115 (sizeof(unsigned long) * (n)))
116
117struct kretprobe_trace_entry {
118 struct trace_entry ent; 111 struct trace_entry ent;
119 unsigned long func; 112 unsigned long func;
120 unsigned long ret_ip; 113 unsigned long ret_ip;
121 int nargs;
122 unsigned long args[];
123}; 114};
124 115
125#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \
126 (offsetof(struct kretprobe_trace_entry, args) + \
127 (sizeof(unsigned long) * (n)))
128
129/* 116/*
130 * trace_flag_type is an enumeration that holds different 117 * trace_flag_type is an enumeration that holds different
131 * states when a trace occurs. These are: 118 * states when a trace occurs. These are:
@@ -229,7 +216,6 @@ extern void __ftrace_bad_type(void);
229 TRACE_GRAPH_ENT); \ 216 TRACE_GRAPH_ENT); \
230 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ 217 IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
231 TRACE_GRAPH_RET); \ 218 TRACE_GRAPH_RET); \
232 IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
233 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ 219 IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
234 TRACE_KMEM_ALLOC); \ 220 TRACE_KMEM_ALLOC); \
235 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ 221 IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
@@ -467,8 +453,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
467 struct trace_array *tr); 453 struct trace_array *tr);
468extern int trace_selftest_startup_branch(struct tracer *trace, 454extern int trace_selftest_startup_branch(struct tracer *trace,
469 struct trace_array *tr); 455 struct trace_array *tr);
470extern int trace_selftest_startup_hw_branches(struct tracer *trace,
471 struct trace_array *tr);
472extern int trace_selftest_startup_ksym(struct tracer *trace, 456extern int trace_selftest_startup_ksym(struct tracer *trace,
473 struct trace_array *tr); 457 struct trace_array *tr);
474#endif /* CONFIG_FTRACE_STARTUP_TEST */ 458#endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c16a08f399df..dc008c1240da 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -318,18 +318,6 @@ FTRACE_ENTRY(branch, trace_branch,
318 __entry->func, __entry->file, __entry->correct) 318 __entry->func, __entry->file, __entry->correct)
319); 319);
320 320
321FTRACE_ENTRY(hw_branch, hw_branch_entry,
322
323 TRACE_HW_BRANCHES,
324
325 F_STRUCT(
326 __field( u64, from )
327 __field( u64, to )
328 ),
329
330 F_printk("from: %llx to: %llx", __entry->from, __entry->to)
331);
332
333FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, 321FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
334 322
335 TRACE_KMEM_ALLOC, 323 TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index 7b97000745f5..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,312 +0,0 @@
1/*
2 * h/w branch tracer for x86 based on BTS
3 *
4 * Copyright (C) 2008-2009 Intel Corporation.
5 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
6 */
7#include <linux/kallsyms.h>
8#include <linux/debugfs.h>
9#include <linux/ftrace.h>
10#include <linux/module.h>
11#include <linux/cpu.h>
12#include <linux/smp.h>
13#include <linux/fs.h>
14
15#include <asm/ds.h>
16
17#include "trace_output.h"
18#include "trace.h"
19
20
21#define BTS_BUFFER_SIZE (1 << 13)
22
23static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
24static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
25
26#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
27
28static int trace_hw_branches_enabled __read_mostly;
29static int trace_hw_branches_suspended __read_mostly;
30static struct trace_array *hw_branch_trace __read_mostly;
31
32
33static void bts_trace_init_cpu(int cpu)
34{
35 per_cpu(hwb_tracer, cpu) =
36 ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
37 BTS_BUFFER_SIZE, NULL, (size_t)-1,
38 BTS_KERNEL);
39
40 if (IS_ERR(per_cpu(hwb_tracer, cpu)))
41 per_cpu(hwb_tracer, cpu) = NULL;
42}
43
44static int bts_trace_init(struct trace_array *tr)
45{
46 int cpu;
47
48 hw_branch_trace = tr;
49 trace_hw_branches_enabled = 0;
50
51 get_online_cpus();
52 for_each_online_cpu(cpu) {
53 bts_trace_init_cpu(cpu);
54
55 if (likely(per_cpu(hwb_tracer, cpu)))
56 trace_hw_branches_enabled = 1;
57 }
58 trace_hw_branches_suspended = 0;
59 put_online_cpus();
60
61 /* If we could not enable tracing on a single cpu, we fail. */
62 return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
63}
64
65static void bts_trace_reset(struct trace_array *tr)
66{
67 int cpu;
68
69 get_online_cpus();
70 for_each_online_cpu(cpu) {
71 if (likely(per_cpu(hwb_tracer, cpu))) {
72 ds_release_bts(per_cpu(hwb_tracer, cpu));
73 per_cpu(hwb_tracer, cpu) = NULL;
74 }
75 }
76 trace_hw_branches_enabled = 0;
77 trace_hw_branches_suspended = 0;
78 put_online_cpus();
79}
80
81static void bts_trace_start(struct trace_array *tr)
82{
83 int cpu;
84
85 get_online_cpus();
86 for_each_online_cpu(cpu)
87 if (likely(per_cpu(hwb_tracer, cpu)))
88 ds_resume_bts(per_cpu(hwb_tracer, cpu));
89 trace_hw_branches_suspended = 0;
90 put_online_cpus();
91}
92
93static void bts_trace_stop(struct trace_array *tr)
94{
95 int cpu;
96
97 get_online_cpus();
98 for_each_online_cpu(cpu)
99 if (likely(per_cpu(hwb_tracer, cpu)))
100 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
101 trace_hw_branches_suspended = 1;
102 put_online_cpus();
103}
104
105static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
106 unsigned long action, void *hcpu)
107{
108 int cpu = (long)hcpu;
109
110 switch (action) {
111 case CPU_ONLINE:
112 case CPU_DOWN_FAILED:
113 /* The notification is sent with interrupts enabled. */
114 if (trace_hw_branches_enabled) {
115 bts_trace_init_cpu(cpu);
116
117 if (trace_hw_branches_suspended &&
118 likely(per_cpu(hwb_tracer, cpu)))
119 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
120 }
121 break;
122
123 case CPU_DOWN_PREPARE:
124 /* The notification is sent with interrupts enabled. */
125 if (likely(per_cpu(hwb_tracer, cpu))) {
126 ds_release_bts(per_cpu(hwb_tracer, cpu));
127 per_cpu(hwb_tracer, cpu) = NULL;
128 }
129 }
130
131 return NOTIFY_DONE;
132}
133
134static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
135 .notifier_call = bts_hotcpu_handler
136};
137
138static void bts_trace_print_header(struct seq_file *m)
139{
140 seq_puts(m, "# CPU# TO <- FROM\n");
141}
142
143static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
144{
145 unsigned long symflags = TRACE_ITER_SYM_OFFSET;
146 struct trace_entry *entry = iter->ent;
147 struct trace_seq *seq = &iter->seq;
148 struct hw_branch_entry *it;
149
150 trace_assign_type(it, entry);
151
152 if (entry->type == TRACE_HW_BRANCHES) {
153 if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
154 seq_print_ip_sym(seq, it->to, symflags) &&
155 trace_seq_printf(seq, "\t <- ") &&
156 seq_print_ip_sym(seq, it->from, symflags) &&
157 trace_seq_printf(seq, "\n"))
158 return TRACE_TYPE_HANDLED;
159 return TRACE_TYPE_PARTIAL_LINE;
160 }
161 return TRACE_TYPE_UNHANDLED;
162}
163
164void trace_hw_branch(u64 from, u64 to)
165{
166 struct ftrace_event_call *call = &event_hw_branch;
167 struct trace_array *tr = hw_branch_trace;
168 struct ring_buffer_event *event;
169 struct ring_buffer *buf;
170 struct hw_branch_entry *entry;
171 unsigned long irq1;
172 int cpu;
173
174 if (unlikely(!tr))
175 return;
176
177 if (unlikely(!trace_hw_branches_enabled))
178 return;
179
180 local_irq_save(irq1);
181 cpu = raw_smp_processor_id();
182 if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
183 goto out;
184
185 buf = tr->buffer;
186 event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
187 sizeof(*entry), 0, 0);
188 if (!event)
189 goto out;
190 entry = ring_buffer_event_data(event);
191 tracing_generic_entry_update(&entry->ent, 0, from);
192 entry->ent.type = TRACE_HW_BRANCHES;
193 entry->from = from;
194 entry->to = to;
195 if (!filter_check_discard(call, entry, buf, event))
196 trace_buffer_unlock_commit(buf, event, 0, 0);
197
198 out:
199 atomic_dec(&tr->data[cpu]->disabled);
200 local_irq_restore(irq1);
201}
202
203static void trace_bts_at(const struct bts_trace *trace, void *at)
204{
205 struct bts_struct bts;
206 int err = 0;
207
208 WARN_ON_ONCE(!trace->read);
209 if (!trace->read)
210 return;
211
212 err = trace->read(this_tracer, at, &bts);
213 if (err < 0)
214 return;
215
216 switch (bts.qualifier) {
217 case BTS_BRANCH:
218 trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
219 break;
220 }
221}
222
223/*
224 * Collect the trace on the current cpu and write it into the ftrace buffer.
225 *
226 * pre: tracing must be suspended on the current cpu
227 */
228static void trace_bts_cpu(void *arg)
229{
230 struct trace_array *tr = (struct trace_array *)arg;
231 const struct bts_trace *trace;
232 unsigned char *at;
233
234 if (unlikely(!tr))
235 return;
236
237 if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
238 return;
239
240 if (unlikely(!this_tracer))
241 return;
242
243 trace = ds_read_bts(this_tracer);
244 if (!trace)
245 return;
246
247 for (at = trace->ds.top; (void *)at < trace->ds.end;
248 at += trace->ds.size)
249 trace_bts_at(trace, at);
250
251 for (at = trace->ds.begin; (void *)at < trace->ds.top;
252 at += trace->ds.size)
253 trace_bts_at(trace, at);
254}
255
256static void trace_bts_prepare(struct trace_iterator *iter)
257{
258 int cpu;
259
260 get_online_cpus();
261 for_each_online_cpu(cpu)
262 if (likely(per_cpu(hwb_tracer, cpu)))
263 ds_suspend_bts(per_cpu(hwb_tracer, cpu));
264 /*
265 * We need to collect the trace on the respective cpu since ftrace
266 * implicitly adds the record for the current cpu.
267 * Once that is more flexible, we could collect the data from any cpu.
268 */
269 on_each_cpu(trace_bts_cpu, iter->tr, 1);
270
271 for_each_online_cpu(cpu)
272 if (likely(per_cpu(hwb_tracer, cpu)))
273 ds_resume_bts(per_cpu(hwb_tracer, cpu));
274 put_online_cpus();
275}
276
277static void trace_bts_close(struct trace_iterator *iter)
278{
279 tracing_reset_online_cpus(iter->tr);
280}
281
282void trace_hw_branch_oops(void)
283{
284 if (this_tracer) {
285 ds_suspend_bts_noirq(this_tracer);
286 trace_bts_cpu(hw_branch_trace);
287 ds_resume_bts_noirq(this_tracer);
288 }
289}
290
291struct tracer bts_tracer __read_mostly =
292{
293 .name = "hw-branch-tracer",
294 .init = bts_trace_init,
295 .reset = bts_trace_reset,
296 .print_header = bts_trace_print_header,
297 .print_line = bts_trace_print_line,
298 .start = bts_trace_start,
299 .stop = bts_trace_stop,
300 .open = trace_bts_prepare,
301 .close = trace_bts_close,
302#ifdef CONFIG_FTRACE_SELFTEST
303 .selftest = trace_selftest_startup_hw_branches,
304#endif /* CONFIG_FTRACE_SELFTEST */
305};
306
307__init static int init_bts_trace(void)
308{
309 register_hotcpu_notifier(&bts_hotcpu_notifier);
310 return register_tracer(&bts_tracer);
311}
312device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 1251e367bae9..a7514326052b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -29,6 +29,8 @@
29#include <linux/ctype.h> 29#include <linux/ctype.h>
30#include <linux/ptrace.h> 30#include <linux/ptrace.h>
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h>
33#include <asm/bitsperlong.h>
32 34
33#include "trace.h" 35#include "trace.h"
34#include "trace_output.h" 36#include "trace_output.h"
@@ -40,7 +42,6 @@
40 42
41/* Reserved field names */ 43/* Reserved field names */
42#define FIELD_STRING_IP "__probe_ip" 44#define FIELD_STRING_IP "__probe_ip"
43#define FIELD_STRING_NARGS "__probe_nargs"
44#define FIELD_STRING_RETIP "__probe_ret_ip" 45#define FIELD_STRING_RETIP "__probe_ret_ip"
45#define FIELD_STRING_FUNC "__probe_func" 46#define FIELD_STRING_FUNC "__probe_func"
46 47
@@ -52,56 +53,102 @@ const char *reserved_field_names[] = {
52 "common_tgid", 53 "common_tgid",
53 "common_lock_depth", 54 "common_lock_depth",
54 FIELD_STRING_IP, 55 FIELD_STRING_IP,
55 FIELD_STRING_NARGS,
56 FIELD_STRING_RETIP, 56 FIELD_STRING_RETIP,
57 FIELD_STRING_FUNC, 57 FIELD_STRING_FUNC,
58}; 58};
59 59
60struct fetch_func { 60/* Printing function type */
61 unsigned long (*func)(struct pt_regs *, void *); 61typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *);
62#define PRINT_TYPE_FUNC_NAME(type) print_type_##type
63#define PRINT_TYPE_FMT_NAME(type) print_type_format_##type
64
65/* Printing in basic type function template */
66#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast) \
67static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
68 const char *name, void *data)\
69{ \
70 return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\
71} \
72static const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
73
74DEFINE_BASIC_PRINT_TYPE_FUNC(u8, "%x", unsigned int)
75DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "%x", unsigned int)
76DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "%lx", unsigned long)
77DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "%llx", unsigned long long)
78DEFINE_BASIC_PRINT_TYPE_FUNC(s8, "%d", int)
79DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)
80DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)
81DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long)
82
83/* Data fetch function type */
84typedef void (*fetch_func_t)(struct pt_regs *, void *, void *);
85
86struct fetch_param {
87 fetch_func_t fn;
62 void *data; 88 void *data;
63}; 89};
64 90
65static __kprobes unsigned long call_fetch(struct fetch_func *f, 91static __kprobes void call_fetch(struct fetch_param *fprm,
66 struct pt_regs *regs) 92 struct pt_regs *regs, void *dest)
67{ 93{
68 return f->func(regs, f->data); 94 return fprm->fn(regs, fprm->data, dest);
69} 95}
70 96
71/* fetch handlers */ 97#define FETCH_FUNC_NAME(kind, type) fetch_##kind##_##type
72static __kprobes unsigned long fetch_register(struct pt_regs *regs, 98/*
73 void *offset) 99 * Define macro for basic types - we don't need to define s* types, because
74{ 100 * we have to care only about bitwidth at recording time.
75 return regs_get_register(regs, (unsigned int)((unsigned long)offset)); 101 */
102#define DEFINE_BASIC_FETCH_FUNCS(kind) \
103DEFINE_FETCH_##kind(u8) \
104DEFINE_FETCH_##kind(u16) \
105DEFINE_FETCH_##kind(u32) \
106DEFINE_FETCH_##kind(u64)
107
108#define CHECK_BASIC_FETCH_FUNCS(kind, fn) \
109 ((FETCH_FUNC_NAME(kind, u8) == fn) || \
110 (FETCH_FUNC_NAME(kind, u16) == fn) || \
111 (FETCH_FUNC_NAME(kind, u32) == fn) || \
112 (FETCH_FUNC_NAME(kind, u64) == fn))
113
114/* Data fetch function templates */
115#define DEFINE_FETCH_reg(type) \
116static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
117 void *offset, void *dest) \
118{ \
119 *(type *)dest = (type)regs_get_register(regs, \
120 (unsigned int)((unsigned long)offset)); \
76} 121}
77 122DEFINE_BASIC_FETCH_FUNCS(reg)
78static __kprobes unsigned long fetch_stack(struct pt_regs *regs, 123
79 void *num) 124#define DEFINE_FETCH_stack(type) \
80{ 125static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
81 return regs_get_kernel_stack_nth(regs, 126 void *offset, void *dest) \
82 (unsigned int)((unsigned long)num)); 127{ \
128 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
129 (unsigned int)((unsigned long)offset)); \
83} 130}
131DEFINE_BASIC_FETCH_FUNCS(stack)
84 132
85static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) 133#define DEFINE_FETCH_retval(type) \
86{ 134static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\
87 unsigned long retval; 135 void *dummy, void *dest) \
88 136{ \
89 if (probe_kernel_address(addr, retval)) 137 *(type *)dest = (type)regs_return_value(regs); \
90 return 0;
91 return retval;
92} 138}
93 139DEFINE_BASIC_FETCH_FUNCS(retval)
94static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, 140
95 void *dummy) 141#define DEFINE_FETCH_memory(type) \
96{ 142static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
97 return regs_return_value(regs); 143 void *addr, void *dest) \
98} 144{ \
99 145 type retval; \
100static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, 146 if (probe_kernel_address(addr, retval)) \
101 void *dummy) 147 *(type *)dest = 0; \
102{ 148 else \
103 return kernel_stack_pointer(regs); 149 *(type *)dest = retval; \
104} 150}
151DEFINE_BASIC_FETCH_FUNCS(memory)
105 152
106/* Memory fetching by symbol */ 153/* Memory fetching by symbol */
107struct symbol_cache { 154struct symbol_cache {
@@ -145,51 +192,126 @@ static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
145 return sc; 192 return sc;
146} 193}
147 194
148static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) 195#define DEFINE_FETCH_symbol(type) \
149{ 196static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\
150 struct symbol_cache *sc = data; 197 void *data, void *dest) \
151 198{ \
152 if (sc->addr) 199 struct symbol_cache *sc = data; \
153 return fetch_memory(regs, (void *)sc->addr); 200 if (sc->addr) \
154 else 201 fetch_memory_##type(regs, (void *)sc->addr, dest); \
155 return 0; 202 else \
203 *(type *)dest = 0; \
156} 204}
205DEFINE_BASIC_FETCH_FUNCS(symbol)
157 206
158/* Special indirect memory access interface */ 207/* Dereference memory access function */
159struct indirect_fetch_data { 208struct deref_fetch_param {
160 struct fetch_func orig; 209 struct fetch_param orig;
161 long offset; 210 long offset;
162}; 211};
163 212
164static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) 213#define DEFINE_FETCH_deref(type) \
165{ 214static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\
166 struct indirect_fetch_data *ind = data; 215 void *data, void *dest) \
167 unsigned long addr; 216{ \
168 217 struct deref_fetch_param *dprm = data; \
169 addr = call_fetch(&ind->orig, regs); 218 unsigned long addr; \
170 if (addr) { 219 call_fetch(&dprm->orig, regs, &addr); \
171 addr += ind->offset; 220 if (addr) { \
172 return fetch_memory(regs, (void *)addr); 221 addr += dprm->offset; \
173 } else 222 fetch_memory_##type(regs, (void *)addr, dest); \
174 return 0; 223 } else \
224 *(type *)dest = 0; \
175} 225}
226DEFINE_BASIC_FETCH_FUNCS(deref)
176 227
177static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) 228static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
178{ 229{
179 if (data->orig.func == fetch_indirect) 230 if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn))
180 free_indirect_fetch_data(data->orig.data); 231 free_deref_fetch_param(data->orig.data);
181 else if (data->orig.func == fetch_symbol) 232 else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn))
182 free_symbol_cache(data->orig.data); 233 free_symbol_cache(data->orig.data);
183 kfree(data); 234 kfree(data);
184} 235}
185 236
237/* Default (unsigned long) fetch type */
238#define __DEFAULT_FETCH_TYPE(t) u##t
239#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
240#define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
241#define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
242
243#define ASSIGN_FETCH_FUNC(kind, type) \
244 .kind = FETCH_FUNC_NAME(kind, type)
245
246#define ASSIGN_FETCH_TYPE(ptype, ftype, sign) \
247 {.name = #ptype, \
248 .size = sizeof(ftype), \
249 .is_signed = sign, \
250 .print = PRINT_TYPE_FUNC_NAME(ptype), \
251 .fmt = PRINT_TYPE_FMT_NAME(ptype), \
252ASSIGN_FETCH_FUNC(reg, ftype), \
253ASSIGN_FETCH_FUNC(stack, ftype), \
254ASSIGN_FETCH_FUNC(retval, ftype), \
255ASSIGN_FETCH_FUNC(memory, ftype), \
256ASSIGN_FETCH_FUNC(symbol, ftype), \
257ASSIGN_FETCH_FUNC(deref, ftype), \
258 }
259
260/* Fetch type information table */
261static const struct fetch_type {
262 const char *name; /* Name of type */
263 size_t size; /* Byte size of type */
264 int is_signed; /* Signed flag */
265 print_type_func_t print; /* Print functions */
266 const char *fmt; /* Fromat string */
267 /* Fetch functions */
268 fetch_func_t reg;
269 fetch_func_t stack;
270 fetch_func_t retval;
271 fetch_func_t memory;
272 fetch_func_t symbol;
273 fetch_func_t deref;
274} fetch_type_table[] = {
275 ASSIGN_FETCH_TYPE(u8, u8, 0),
276 ASSIGN_FETCH_TYPE(u16, u16, 0),
277 ASSIGN_FETCH_TYPE(u32, u32, 0),
278 ASSIGN_FETCH_TYPE(u64, u64, 0),
279 ASSIGN_FETCH_TYPE(s8, u8, 1),
280 ASSIGN_FETCH_TYPE(s16, u16, 1),
281 ASSIGN_FETCH_TYPE(s32, u32, 1),
282 ASSIGN_FETCH_TYPE(s64, u64, 1),
283};
284
285static const struct fetch_type *find_fetch_type(const char *type)
286{
287 int i;
288
289 if (!type)
290 type = DEFAULT_FETCH_TYPE_STR;
291
292 for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
293 if (strcmp(type, fetch_type_table[i].name) == 0)
294 return &fetch_type_table[i];
295 return NULL;
296}
297
298/* Special function : only accept unsigned long */
299static __kprobes void fetch_stack_address(struct pt_regs *regs,
300 void *dummy, void *dest)
301{
302 *(unsigned long *)dest = kernel_stack_pointer(regs);
303}
304
186/** 305/**
187 * Kprobe event core functions 306 * Kprobe event core functions
188 */ 307 */
189 308
190struct probe_arg { 309struct probe_arg {
191 struct fetch_func fetch; 310 struct fetch_param fetch;
192 const char *name; 311 unsigned int offset; /* Offset from argument entry */
312 const char *name; /* Name of this argument */
313 const char *comm; /* Command of this argument */
314 const struct fetch_type *type; /* Type of this argument */
193}; 315};
194 316
195/* Flags for trace_probe */ 317/* Flags for trace_probe */
@@ -204,6 +326,7 @@ struct trace_probe {
204 const char *symbol; /* symbol name */ 326 const char *symbol; /* symbol name */
205 struct ftrace_event_call call; 327 struct ftrace_event_call call;
206 struct trace_event event; 328 struct trace_event event;
329 ssize_t size; /* trace entry size */
207 unsigned int nr_args; 330 unsigned int nr_args;
208 struct probe_arg args[]; 331 struct probe_arg args[];
209}; 332};
@@ -212,6 +335,7 @@ struct trace_probe {
212 (offsetof(struct trace_probe, args) + \ 335 (offsetof(struct trace_probe, args) + \
213 (sizeof(struct probe_arg) * (n))) 336 (sizeof(struct probe_arg) * (n)))
214 337
338
215static __kprobes int probe_is_return(struct trace_probe *tp) 339static __kprobes int probe_is_return(struct trace_probe *tp)
216{ 340{
217 return tp->rp.handler != NULL; 341 return tp->rp.handler != NULL;
@@ -222,49 +346,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp)
222 return tp->symbol ? tp->symbol : "unknown"; 346 return tp->symbol ? tp->symbol : "unknown";
223} 347}
224 348
225static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
226{
227 int ret = -EINVAL;
228
229 if (ff->func == fetch_register) {
230 const char *name;
231 name = regs_query_register_name((unsigned int)((long)ff->data));
232 ret = snprintf(buf, n, "%%%s", name);
233 } else if (ff->func == fetch_stack)
234 ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data);
235 else if (ff->func == fetch_memory)
236 ret = snprintf(buf, n, "@0x%p", ff->data);
237 else if (ff->func == fetch_symbol) {
238 struct symbol_cache *sc = ff->data;
239 if (sc->offset)
240 ret = snprintf(buf, n, "@%s%+ld", sc->symbol,
241 sc->offset);
242 else
243 ret = snprintf(buf, n, "@%s", sc->symbol);
244 } else if (ff->func == fetch_retvalue)
245 ret = snprintf(buf, n, "$retval");
246 else if (ff->func == fetch_stack_address)
247 ret = snprintf(buf, n, "$stack");
248 else if (ff->func == fetch_indirect) {
249 struct indirect_fetch_data *id = ff->data;
250 size_t l = 0;
251 ret = snprintf(buf, n, "%+ld(", id->offset);
252 if (ret >= n)
253 goto end;
254 l += ret;
255 ret = probe_arg_string(buf + l, n - l, &id->orig);
256 if (ret < 0)
257 goto end;
258 l += ret;
259 ret = snprintf(buf + l, n - l, ")");
260 ret += l;
261 }
262end:
263 if (ret >= n)
264 return -ENOSPC;
265 return ret;
266}
267
268static int register_probe_event(struct trace_probe *tp); 349static int register_probe_event(struct trace_probe *tp);
269static void unregister_probe_event(struct trace_probe *tp); 350static void unregister_probe_event(struct trace_probe *tp);
270 351
@@ -347,11 +428,12 @@ error:
347 428
348static void free_probe_arg(struct probe_arg *arg) 429static void free_probe_arg(struct probe_arg *arg)
349{ 430{
350 if (arg->fetch.func == fetch_symbol) 431 if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn))
432 free_deref_fetch_param(arg->fetch.data);
433 else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn))
351 free_symbol_cache(arg->fetch.data); 434 free_symbol_cache(arg->fetch.data);
352 else if (arg->fetch.func == fetch_indirect)
353 free_indirect_fetch_data(arg->fetch.data);
354 kfree(arg->name); 435 kfree(arg->name);
436 kfree(arg->comm);
355} 437}
356 438
357static void free_trace_probe(struct trace_probe *tp) 439static void free_trace_probe(struct trace_probe *tp)
@@ -457,28 +539,30 @@ static int split_symbol_offset(char *symbol, unsigned long *offset)
457#define PARAM_MAX_ARGS 16 539#define PARAM_MAX_ARGS 16
458#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) 540#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long))
459 541
460static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) 542static int parse_probe_vars(char *arg, const struct fetch_type *t,
543 struct fetch_param *f, int is_return)
461{ 544{
462 int ret = 0; 545 int ret = 0;
463 unsigned long param; 546 unsigned long param;
464 547
465 if (strcmp(arg, "retval") == 0) { 548 if (strcmp(arg, "retval") == 0) {
466 if (is_return) { 549 if (is_return)
467 ff->func = fetch_retvalue; 550 f->fn = t->retval;
468 ff->data = NULL; 551 else
469 } else
470 ret = -EINVAL; 552 ret = -EINVAL;
471 } else if (strncmp(arg, "stack", 5) == 0) { 553 } else if (strncmp(arg, "stack", 5) == 0) {
472 if (arg[5] == '\0') { 554 if (arg[5] == '\0') {
473 ff->func = fetch_stack_address; 555 if (strcmp(t->name, DEFAULT_FETCH_TYPE_STR) == 0)
474 ff->data = NULL; 556 f->fn = fetch_stack_address;
557 else
558 ret = -EINVAL;
475 } else if (isdigit(arg[5])) { 559 } else if (isdigit(arg[5])) {
476 ret = strict_strtoul(arg + 5, 10, &param); 560 ret = strict_strtoul(arg + 5, 10, &param);
477 if (ret || param > PARAM_MAX_STACK) 561 if (ret || param > PARAM_MAX_STACK)
478 ret = -EINVAL; 562 ret = -EINVAL;
479 else { 563 else {
480 ff->func = fetch_stack; 564 f->fn = t->stack;
481 ff->data = (void *)param; 565 f->data = (void *)param;
482 } 566 }
483 } else 567 } else
484 ret = -EINVAL; 568 ret = -EINVAL;
@@ -488,7 +572,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
488} 572}
489 573
490/* Recursive argument parser */ 574/* Recursive argument parser */
491static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 575static int __parse_probe_arg(char *arg, const struct fetch_type *t,
576 struct fetch_param *f, int is_return)
492{ 577{
493 int ret = 0; 578 int ret = 0;
494 unsigned long param; 579 unsigned long param;
@@ -497,13 +582,13 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
497 582
498 switch (arg[0]) { 583 switch (arg[0]) {
499 case '$': 584 case '$':
500 ret = parse_probe_vars(arg + 1, ff, is_return); 585 ret = parse_probe_vars(arg + 1, t, f, is_return);
501 break; 586 break;
502 case '%': /* named register */ 587 case '%': /* named register */
503 ret = regs_query_register_offset(arg + 1); 588 ret = regs_query_register_offset(arg + 1);
504 if (ret >= 0) { 589 if (ret >= 0) {
505 ff->func = fetch_register; 590 f->fn = t->reg;
506 ff->data = (void *)(unsigned long)ret; 591 f->data = (void *)(unsigned long)ret;
507 ret = 0; 592 ret = 0;
508 } 593 }
509 break; 594 break;
@@ -512,26 +597,22 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
512 ret = strict_strtoul(arg + 1, 0, &param); 597 ret = strict_strtoul(arg + 1, 0, &param);
513 if (ret) 598 if (ret)
514 break; 599 break;
515 ff->func = fetch_memory; 600 f->fn = t->memory;
516 ff->data = (void *)param; 601 f->data = (void *)param;
517 } else { 602 } else {
518 ret = split_symbol_offset(arg + 1, &offset); 603 ret = split_symbol_offset(arg + 1, &offset);
519 if (ret) 604 if (ret)
520 break; 605 break;
521 ff->data = alloc_symbol_cache(arg + 1, offset); 606 f->data = alloc_symbol_cache(arg + 1, offset);
522 if (ff->data) 607 if (f->data)
523 ff->func = fetch_symbol; 608 f->fn = t->symbol;
524 else
525 ret = -EINVAL;
526 } 609 }
527 break; 610 break;
528 case '+': /* indirect memory */ 611 case '+': /* deref memory */
529 case '-': 612 case '-':
530 tmp = strchr(arg, '('); 613 tmp = strchr(arg, '(');
531 if (!tmp) { 614 if (!tmp)
532 ret = -EINVAL;
533 break; 615 break;
534 }
535 *tmp = '\0'; 616 *tmp = '\0';
536 ret = strict_strtol(arg + 1, 0, &offset); 617 ret = strict_strtol(arg + 1, 0, &offset);
537 if (ret) 618 if (ret)
@@ -541,38 +622,58 @@ static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return)
541 arg = tmp + 1; 622 arg = tmp + 1;
542 tmp = strrchr(arg, ')'); 623 tmp = strrchr(arg, ')');
543 if (tmp) { 624 if (tmp) {
544 struct indirect_fetch_data *id; 625 struct deref_fetch_param *dprm;
626 const struct fetch_type *t2 = find_fetch_type(NULL);
545 *tmp = '\0'; 627 *tmp = '\0';
546 id = kzalloc(sizeof(struct indirect_fetch_data), 628 dprm = kzalloc(sizeof(struct deref_fetch_param),
547 GFP_KERNEL); 629 GFP_KERNEL);
548 if (!id) 630 if (!dprm)
549 return -ENOMEM; 631 return -ENOMEM;
550 id->offset = offset; 632 dprm->offset = offset;
551 ret = __parse_probe_arg(arg, &id->orig, is_return); 633 ret = __parse_probe_arg(arg, t2, &dprm->orig,
634 is_return);
552 if (ret) 635 if (ret)
553 kfree(id); 636 kfree(dprm);
554 else { 637 else {
555 ff->func = fetch_indirect; 638 f->fn = t->deref;
556 ff->data = (void *)id; 639 f->data = (void *)dprm;
557 } 640 }
558 } else 641 }
559 ret = -EINVAL;
560 break; 642 break;
561 default:
562 /* TODO: support custom handler */
563 ret = -EINVAL;
564 } 643 }
644 if (!ret && !f->fn)
645 ret = -EINVAL;
565 return ret; 646 return ret;
566} 647}
567 648
568/* String length checking wrapper */ 649/* String length checking wrapper */
569static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) 650static int parse_probe_arg(char *arg, struct trace_probe *tp,
651 struct probe_arg *parg, int is_return)
570{ 652{
653 const char *t;
654
571 if (strlen(arg) > MAX_ARGSTR_LEN) { 655 if (strlen(arg) > MAX_ARGSTR_LEN) {
572 pr_info("Argument is too long.: %s\n", arg); 656 pr_info("Argument is too long.: %s\n", arg);
573 return -ENOSPC; 657 return -ENOSPC;
574 } 658 }
575 return __parse_probe_arg(arg, ff, is_return); 659 parg->comm = kstrdup(arg, GFP_KERNEL);
660 if (!parg->comm) {
661 pr_info("Failed to allocate memory for command '%s'.\n", arg);
662 return -ENOMEM;
663 }
664 t = strchr(parg->comm, ':');
665 if (t) {
666 arg[t - parg->comm] = '\0';
667 t++;
668 }
669 parg->type = find_fetch_type(t);
670 if (!parg->type) {
671 pr_info("Unsupported type: %s\n", t);
672 return -EINVAL;
673 }
674 parg->offset = tp->size;
675 tp->size += parg->type->size;
676 return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
576} 677}
577 678
578/* Return 1 if name is reserved or already used by another argument */ 679/* Return 1 if name is reserved or already used by another argument */
@@ -602,15 +703,18 @@ static int create_trace_probe(int argc, char **argv)
602 * @ADDR : fetch memory at ADDR (ADDR should be in kernel) 703 * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
603 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) 704 * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
604 * %REG : fetch register REG 705 * %REG : fetch register REG
605 * Indirect memory fetch: 706 * Dereferencing memory fetch:
606 * +|-offs(ARG) : fetch memory at ARG +|- offs address. 707 * +|-offs(ARG) : fetch memory at ARG +|- offs address.
607 * Alias name of args: 708 * Alias name of args:
608 * NAME=FETCHARG : set NAME as alias of FETCHARG. 709 * NAME=FETCHARG : set NAME as alias of FETCHARG.
710 * Type of args:
711 * FETCHARG:TYPE : use TYPE instead of unsigned long.
609 */ 712 */
610 struct trace_probe *tp; 713 struct trace_probe *tp;
611 int i, ret = 0; 714 int i, ret = 0;
612 int is_return = 0, is_delete = 0; 715 int is_return = 0, is_delete = 0;
613 char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; 716 char *symbol = NULL, *event = NULL, *group = NULL;
717 char *arg, *tmp;
614 unsigned long offset = 0; 718 unsigned long offset = 0;
615 void *addr = NULL; 719 void *addr = NULL;
616 char buf[MAX_EVENT_NAME_LEN]; 720 char buf[MAX_EVENT_NAME_LEN];
@@ -723,13 +827,6 @@ static int create_trace_probe(int argc, char **argv)
723 else 827 else
724 arg = argv[i]; 828 arg = argv[i];
725 829
726 if (conflict_field_name(argv[i], tp->args, i)) {
727 pr_info("Argument%d name '%s' conflicts with "
728 "another field.\n", i, argv[i]);
729 ret = -EINVAL;
730 goto error;
731 }
732
733 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); 830 tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
734 if (!tp->args[i].name) { 831 if (!tp->args[i].name) {
735 pr_info("Failed to allocate argument%d name '%s'.\n", 832 pr_info("Failed to allocate argument%d name '%s'.\n",
@@ -737,9 +834,19 @@ static int create_trace_probe(int argc, char **argv)
737 ret = -ENOMEM; 834 ret = -ENOMEM;
738 goto error; 835 goto error;
739 } 836 }
837 tmp = strchr(tp->args[i].name, ':');
838 if (tmp)
839 *tmp = '_'; /* convert : to _ */
840
841 if (conflict_field_name(tp->args[i].name, tp->args, i)) {
842 pr_info("Argument%d name '%s' conflicts with "
843 "another field.\n", i, argv[i]);
844 ret = -EINVAL;
845 goto error;
846 }
740 847
741 /* Parse fetch argument */ 848 /* Parse fetch argument */
742 ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); 849 ret = parse_probe_arg(arg, tp, &tp->args[i], is_return);
743 if (ret) { 850 if (ret) {
744 pr_info("Parse error at argument%d. (%d)\n", i, ret); 851 pr_info("Parse error at argument%d. (%d)\n", i, ret);
745 kfree(tp->args[i].name); 852 kfree(tp->args[i].name);
@@ -794,8 +901,7 @@ static void probes_seq_stop(struct seq_file *m, void *v)
794static int probes_seq_show(struct seq_file *m, void *v) 901static int probes_seq_show(struct seq_file *m, void *v)
795{ 902{
796 struct trace_probe *tp = v; 903 struct trace_probe *tp = v;
797 int i, ret; 904 int i;
798 char buf[MAX_ARGSTR_LEN + 1];
799 905
800 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); 906 seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p');
801 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); 907 seq_printf(m, ":%s/%s", tp->call.system, tp->call.name);
@@ -807,15 +913,10 @@ static int probes_seq_show(struct seq_file *m, void *v)
807 else 913 else
808 seq_printf(m, " %s", probe_symbol(tp)); 914 seq_printf(m, " %s", probe_symbol(tp));
809 915
810 for (i = 0; i < tp->nr_args; i++) { 916 for (i = 0; i < tp->nr_args; i++)
811 ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); 917 seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
812 if (ret < 0) {
813 pr_warning("Argument%d decoding error(%d).\n", i, ret);
814 return ret;
815 }
816 seq_printf(m, " %s=%s", tp->args[i].name, buf);
817 }
818 seq_printf(m, "\n"); 918 seq_printf(m, "\n");
919
819 return 0; 920 return 0;
820} 921}
821 922
@@ -945,9 +1046,10 @@ static const struct file_operations kprobe_profile_ops = {
945static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) 1046static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
946{ 1047{
947 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1048 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
948 struct kprobe_trace_entry *entry; 1049 struct kprobe_trace_entry_head *entry;
949 struct ring_buffer_event *event; 1050 struct ring_buffer_event *event;
950 struct ring_buffer *buffer; 1051 struct ring_buffer *buffer;
1052 u8 *data;
951 int size, i, pc; 1053 int size, i, pc;
952 unsigned long irq_flags; 1054 unsigned long irq_flags;
953 struct ftrace_event_call *call = &tp->call; 1055 struct ftrace_event_call *call = &tp->call;
@@ -957,7 +1059,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
957 local_save_flags(irq_flags); 1059 local_save_flags(irq_flags);
958 pc = preempt_count(); 1060 pc = preempt_count();
959 1061
960 size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1062 size = sizeof(*entry) + tp->size;
961 1063
962 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1064 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
963 irq_flags, pc); 1065 irq_flags, pc);
@@ -965,10 +1067,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
965 return; 1067 return;
966 1068
967 entry = ring_buffer_event_data(event); 1069 entry = ring_buffer_event_data(event);
968 entry->nargs = tp->nr_args;
969 entry->ip = (unsigned long)kp->addr; 1070 entry->ip = (unsigned long)kp->addr;
1071 data = (u8 *)&entry[1];
970 for (i = 0; i < tp->nr_args; i++) 1072 for (i = 0; i < tp->nr_args; i++)
971 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1073 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
972 1074
973 if (!filter_current_check_discard(buffer, call, entry, event)) 1075 if (!filter_current_check_discard(buffer, call, entry, event))
974 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1076 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -979,9 +1081,10 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
979 struct pt_regs *regs) 1081 struct pt_regs *regs)
980{ 1082{
981 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1083 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
982 struct kretprobe_trace_entry *entry; 1084 struct kretprobe_trace_entry_head *entry;
983 struct ring_buffer_event *event; 1085 struct ring_buffer_event *event;
984 struct ring_buffer *buffer; 1086 struct ring_buffer *buffer;
1087 u8 *data;
985 int size, i, pc; 1088 int size, i, pc;
986 unsigned long irq_flags; 1089 unsigned long irq_flags;
987 struct ftrace_event_call *call = &tp->call; 1090 struct ftrace_event_call *call = &tp->call;
@@ -989,7 +1092,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
989 local_save_flags(irq_flags); 1092 local_save_flags(irq_flags);
990 pc = preempt_count(); 1093 pc = preempt_count();
991 1094
992 size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1095 size = sizeof(*entry) + tp->size;
993 1096
994 event = trace_current_buffer_lock_reserve(&buffer, call->id, size, 1097 event = trace_current_buffer_lock_reserve(&buffer, call->id, size,
995 irq_flags, pc); 1098 irq_flags, pc);
@@ -997,11 +1100,11 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
997 return; 1100 return;
998 1101
999 entry = ring_buffer_event_data(event); 1102 entry = ring_buffer_event_data(event);
1000 entry->nargs = tp->nr_args;
1001 entry->func = (unsigned long)tp->rp.kp.addr; 1103 entry->func = (unsigned long)tp->rp.kp.addr;
1002 entry->ret_ip = (unsigned long)ri->ret_addr; 1104 entry->ret_ip = (unsigned long)ri->ret_addr;
1105 data = (u8 *)&entry[1];
1003 for (i = 0; i < tp->nr_args; i++) 1106 for (i = 0; i < tp->nr_args; i++)
1004 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1107 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1005 1108
1006 if (!filter_current_check_discard(buffer, call, entry, event)) 1109 if (!filter_current_check_discard(buffer, call, entry, event))
1007 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); 1110 trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc);
@@ -1011,13 +1114,14 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
1011enum print_line_t 1114enum print_line_t
1012print_kprobe_event(struct trace_iterator *iter, int flags) 1115print_kprobe_event(struct trace_iterator *iter, int flags)
1013{ 1116{
1014 struct kprobe_trace_entry *field; 1117 struct kprobe_trace_entry_head *field;
1015 struct trace_seq *s = &iter->seq; 1118 struct trace_seq *s = &iter->seq;
1016 struct trace_event *event; 1119 struct trace_event *event;
1017 struct trace_probe *tp; 1120 struct trace_probe *tp;
1121 u8 *data;
1018 int i; 1122 int i;
1019 1123
1020 field = (struct kprobe_trace_entry *)iter->ent; 1124 field = (struct kprobe_trace_entry_head *)iter->ent;
1021 event = ftrace_find_event(field->ent.type); 1125 event = ftrace_find_event(field->ent.type);
1022 tp = container_of(event, struct trace_probe, event); 1126 tp = container_of(event, struct trace_probe, event);
1023 1127
@@ -1030,9 +1134,10 @@ print_kprobe_event(struct trace_iterator *iter, int flags)
1030 if (!trace_seq_puts(s, ")")) 1134 if (!trace_seq_puts(s, ")"))
1031 goto partial; 1135 goto partial;
1032 1136
1033 for (i = 0; i < field->nargs; i++) 1137 data = (u8 *)&field[1];
1034 if (!trace_seq_printf(s, " %s=%lx", 1138 for (i = 0; i < tp->nr_args; i++)
1035 tp->args[i].name, field->args[i])) 1139 if (!tp->args[i].type->print(s, tp->args[i].name,
1140 data + tp->args[i].offset))
1036 goto partial; 1141 goto partial;
1037 1142
1038 if (!trace_seq_puts(s, "\n")) 1143 if (!trace_seq_puts(s, "\n"))
@@ -1046,13 +1151,14 @@ partial:
1046enum print_line_t 1151enum print_line_t
1047print_kretprobe_event(struct trace_iterator *iter, int flags) 1152print_kretprobe_event(struct trace_iterator *iter, int flags)
1048{ 1153{
1049 struct kretprobe_trace_entry *field; 1154 struct kretprobe_trace_entry_head *field;
1050 struct trace_seq *s = &iter->seq; 1155 struct trace_seq *s = &iter->seq;
1051 struct trace_event *event; 1156 struct trace_event *event;
1052 struct trace_probe *tp; 1157 struct trace_probe *tp;
1158 u8 *data;
1053 int i; 1159 int i;
1054 1160
1055 field = (struct kretprobe_trace_entry *)iter->ent; 1161 field = (struct kretprobe_trace_entry_head *)iter->ent;
1056 event = ftrace_find_event(field->ent.type); 1162 event = ftrace_find_event(field->ent.type);
1057 tp = container_of(event, struct trace_probe, event); 1163 tp = container_of(event, struct trace_probe, event);
1058 1164
@@ -1071,9 +1177,10 @@ print_kretprobe_event(struct trace_iterator *iter, int flags)
1071 if (!trace_seq_puts(s, ")")) 1177 if (!trace_seq_puts(s, ")"))
1072 goto partial; 1178 goto partial;
1073 1179
1074 for (i = 0; i < field->nargs; i++) 1180 data = (u8 *)&field[1];
1075 if (!trace_seq_printf(s, " %s=%lx", 1181 for (i = 0; i < tp->nr_args; i++)
1076 tp->args[i].name, field->args[i])) 1182 if (!tp->args[i].type->print(s, tp->args[i].name,
1183 data + tp->args[i].offset))
1077 goto partial; 1184 goto partial;
1078 1185
1079 if (!trace_seq_puts(s, "\n")) 1186 if (!trace_seq_puts(s, "\n"))
@@ -1129,29 +1236,43 @@ static int probe_event_raw_init(struct ftrace_event_call *event_call)
1129static int kprobe_event_define_fields(struct ftrace_event_call *event_call) 1236static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
1130{ 1237{
1131 int ret, i; 1238 int ret, i;
1132 struct kprobe_trace_entry field; 1239 struct kprobe_trace_entry_head field;
1133 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1240 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1134 1241
1135 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 1242 DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
1136 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1137 /* Set argument names as fields */ 1243 /* Set argument names as fields */
1138 for (i = 0; i < tp->nr_args; i++) 1244 for (i = 0; i < tp->nr_args; i++) {
1139 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1245 ret = trace_define_field(event_call, tp->args[i].type->name,
1246 tp->args[i].name,
1247 sizeof(field) + tp->args[i].offset,
1248 tp->args[i].type->size,
1249 tp->args[i].type->is_signed,
1250 FILTER_OTHER);
1251 if (ret)
1252 return ret;
1253 }
1140 return 0; 1254 return 0;
1141} 1255}
1142 1256
1143static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) 1257static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
1144{ 1258{
1145 int ret, i; 1259 int ret, i;
1146 struct kretprobe_trace_entry field; 1260 struct kretprobe_trace_entry_head field;
1147 struct trace_probe *tp = (struct trace_probe *)event_call->data; 1261 struct trace_probe *tp = (struct trace_probe *)event_call->data;
1148 1262
1149 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); 1263 DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
1150 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); 1264 DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
1151 DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1);
1152 /* Set argument names as fields */ 1265 /* Set argument names as fields */
1153 for (i = 0; i < tp->nr_args; i++) 1266 for (i = 0; i < tp->nr_args; i++) {
1154 DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); 1267 ret = trace_define_field(event_call, tp->args[i].type->name,
1268 tp->args[i].name,
1269 sizeof(field) + tp->args[i].offset,
1270 tp->args[i].type->size,
1271 tp->args[i].type->is_signed,
1272 FILTER_OTHER);
1273 if (ret)
1274 return ret;
1275 }
1155 return 0; 1276 return 0;
1156} 1277}
1157 1278
@@ -1176,8 +1297,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
1176 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 1297 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
1177 1298
1178 for (i = 0; i < tp->nr_args; i++) { 1299 for (i = 0; i < tp->nr_args; i++) {
1179 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", 1300 pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
1180 tp->args[i].name); 1301 tp->args[i].name, tp->args[i].type->fmt);
1181 } 1302 }
1182 1303
1183 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 1304 pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
@@ -1219,12 +1340,13 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1219{ 1340{
1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1341 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1221 struct ftrace_event_call *call = &tp->call; 1342 struct ftrace_event_call *call = &tp->call;
1222 struct kprobe_trace_entry *entry; 1343 struct kprobe_trace_entry_head *entry;
1344 u8 *data;
1223 int size, __size, i; 1345 int size, __size, i;
1224 unsigned long irq_flags; 1346 unsigned long irq_flags;
1225 int rctx; 1347 int rctx;
1226 1348
1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1349 __size = sizeof(*entry) + tp->size;
1228 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1350 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1229 size -= sizeof(u32); 1351 size -= sizeof(u32);
1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1352 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1235,10 +1357,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1235 if (!entry) 1357 if (!entry)
1236 return; 1358 return;
1237 1359
1238 entry->nargs = tp->nr_args;
1239 entry->ip = (unsigned long)kp->addr; 1360 entry->ip = (unsigned long)kp->addr;
1361 data = (u8 *)&entry[1];
1240 for (i = 0; i < tp->nr_args; i++) 1362 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1363 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1242 1364
1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); 1365 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1244} 1366}
@@ -1249,12 +1371,13 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1249{ 1371{
1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1372 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1251 struct ftrace_event_call *call = &tp->call; 1373 struct ftrace_event_call *call = &tp->call;
1252 struct kretprobe_trace_entry *entry; 1374 struct kretprobe_trace_entry_head *entry;
1375 u8 *data;
1253 int size, __size, i; 1376 int size, __size, i;
1254 unsigned long irq_flags; 1377 unsigned long irq_flags;
1255 int rctx; 1378 int rctx;
1256 1379
1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1380 __size = sizeof(*entry) + tp->size;
1258 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1381 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1259 size -= sizeof(u32); 1382 size -= sizeof(u32);
1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, 1383 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
@@ -1265,11 +1388,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1265 if (!entry) 1388 if (!entry)
1266 return; 1389 return;
1267 1390
1268 entry->nargs = tp->nr_args;
1269 entry->func = (unsigned long)tp->rp.kp.addr; 1391 entry->func = (unsigned long)tp->rp.kp.addr;
1270 entry->ret_ip = (unsigned long)ri->ret_addr; 1392 entry->ret_ip = (unsigned long)ri->ret_addr;
1393 data = (u8 *)&entry[1];
1271 for (i = 0; i < tp->nr_args; i++) 1394 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1395 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1273 1396
1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, 1397 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs); 1398 irq_flags, regs);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 81003b4d617f..1cc9858258b3 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
17 case TRACE_BRANCH: 17 case TRACE_BRANCH:
18 case TRACE_GRAPH_ENT: 18 case TRACE_GRAPH_ENT:
19 case TRACE_GRAPH_RET: 19 case TRACE_GRAPH_RET:
20 case TRACE_HW_BRANCHES:
21 case TRACE_KSYM: 20 case TRACE_KSYM:
22 return 1; 21 return 1;
23 } 22 }
@@ -755,62 +754,6 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
755} 754}
756#endif /* CONFIG_BRANCH_TRACER */ 755#endif /* CONFIG_BRANCH_TRACER */
757 756
758#ifdef CONFIG_HW_BRANCH_TRACER
759int
760trace_selftest_startup_hw_branches(struct tracer *trace,
761 struct trace_array *tr)
762{
763 struct trace_iterator *iter;
764 struct tracer tracer;
765 unsigned long count;
766 int ret;
767
768 if (!trace->open) {
769 printk(KERN_CONT "missing open function...");
770 return -1;
771 }
772
773 ret = tracer_init(trace, tr);
774 if (ret) {
775 warn_failed_init_tracer(trace, ret);
776 return ret;
777 }
778
779 /*
780 * The hw-branch tracer needs to collect the trace from the various
781 * cpu trace buffers - before tracing is stopped.
782 */
783 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
784 if (!iter)
785 return -ENOMEM;
786
787 memcpy(&tracer, trace, sizeof(tracer));
788
789 iter->trace = &tracer;
790 iter->tr = tr;
791 iter->pos = -1;
792 mutex_init(&iter->mutex);
793
794 trace->open(iter);
795
796 mutex_destroy(&iter->mutex);
797 kfree(iter);
798
799 tracing_stop();
800
801 ret = trace_test_buffer(tr, &count);
802 trace->reset(tr);
803 tracing_start();
804
805 if (!ret && !count) {
806 printk(KERN_CONT "no entries found..");
807 ret = -1;
808 }
809
810 return ret;
811}
812#endif /* CONFIG_HW_BRANCH_TRACER */
813
814#ifdef CONFIG_KSYM_TRACER 757#ifdef CONFIG_KSYM_TRACER
815static int ksym_selftest_dummy; 758static int ksym_selftest_dummy;
816 759
diff --git a/mm/mlock.c b/mm/mlock.c
index 8f4e2dfceec1..3f82720e0515 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -607,44 +607,3 @@ void user_shm_unlock(size_t size, struct user_struct *user)
607 spin_unlock(&shmlock_user_lock); 607 spin_unlock(&shmlock_user_lock);
608 free_uid(user); 608 free_uid(user);
609} 609}
610
611int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
612 size_t size)
613{
614 unsigned long lim, vm, pgsz;
615 int error = -ENOMEM;
616
617 pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
618
619 down_write(&mm->mmap_sem);
620
621 lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT;
622 vm = mm->total_vm + pgsz;
623 if (lim < vm)
624 goto out;
625
626 lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
627 vm = mm->locked_vm + pgsz;
628 if (lim < vm)
629 goto out;
630
631 mm->total_vm += pgsz;
632 mm->locked_vm += pgsz;
633
634 error = 0;
635 out:
636 up_write(&mm->mmap_sem);
637 return error;
638}
639
640void refund_locked_memory(struct mm_struct *mm, size_t size)
641{
642 unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
643
644 down_write(&mm->mmap_sem);
645
646 mm->total_vm -= pgsz;
647 mm->locked_vm -= pgsz;
648
649 up_write(&mm->mmap_sem);
650}
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index ae525ac5a2ce..0181dddf6b61 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -19,12 +19,12 @@ COMMON OPTIONS
19-f:: 19-f::
20--format=:: 20--format=::
21Specify format style. 21Specify format style.
22Current available format styles are, 22Current available format styles are:
23 23
24'default':: 24'default'::
25Default style. This is mainly for human reading. 25Default style. This is mainly for human reading.
26--------------------- 26---------------------
27% perf bench sched pipe # with no style specify 27% perf bench sched pipe # with no style specified
28(executing 1000000 pipe operations between two tasks) 28(executing 1000000 pipe operations between two tasks)
29 Total time:5.855 sec 29 Total time:5.855 sec
30 5.855061 usecs/op 30 5.855061 usecs/op
@@ -79,7 +79,7 @@ options (20 sender and receiver processes per group)
79 79
80 Total time:0.308 sec 80 Total time:0.308 sec
81 81
82% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups 82% perf bench sched messaging -t -g 20 # be multi-thread, with 20 groups
83(20 sender and receiver threads per group) 83(20 sender and receiver threads per group)
84(20 groups == 800 threads run) 84(20 groups == 800 threads run)
85 85
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
new file mode 100644
index 000000000000..93400a0f17f0
--- /dev/null
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -0,0 +1,67 @@
1perf-kvm(1)
2==============
3
4NAME
5----
6perf-kvm - Tool to trace/measure kvm guest os
7
8SYNOPSIS
9--------
10[verse]
11'perf kvm' [--host] [--guest] [--guestmount=<path>
12 [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
13 {top|record|report|diff|buildid-list}
14'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
15 | --guestvmlinux=<path>] {top|record|report|diff|buildid-list}
16
17DESCRIPTION
18-----------
19There are a couple of variants of perf kvm:
20
21 'perf kvm [options] top <command>' to generates and displays
22 a performance counter profile of guest os in realtime
23 of an arbitrary workload.
24
25 'perf kvm record <command>' to record the performance couinter profile
26 of an arbitrary workload and save it into a perf data file. If both
27 --host and --guest are input, the perf data file name is perf.data.kvm.
28 If there is no --host but --guest, the file name is perf.data.guest.
29 If there is no --guest but --host, the file name is perf.data.host.
30
31 'perf kvm report' to display the performance counter profile information
32 recorded via perf kvm record.
33
34 'perf kvm diff' to displays the performance difference amongst two perf.data
35 files captured via perf record.
36
37 'perf kvm buildid-list' to display the buildids found in a perf data file,
38 so that other tools can be used to fetch packages with matching symbol tables
39 for use by perf report.
40
41OPTIONS
42-------
43--host=::
44 Collect host side perforamnce profile.
45--guest=::
46 Collect guest side perforamnce profile.
47--guestmount=<path>::
48 Guest os root file system mount directory. Users mounts guest os
49 root directories under <path> by a specific filesystem access method,
50 typically, sshfs. For example, start 2 guest os. The one's pid is 8888
51 and the other's is 9999.
52 #mkdir ~/guestmount; cd ~/guestmount
53 #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
54 #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
55 #perf kvm --host --guest --guestmount=~/guestmount top
56--guestkallsyms=<path>::
57 Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
58 kernel symbols. Users copy it out from guest os.
59--guestmodules=<path>::
60 Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
61 kernel module information. Users copy it out from guest os.
62--guestvmlinux=<path>::
63 Guest os kernel vmlinux.
64
65SEE ALSO
66--------
67linkperf:perf-top[1] perf-record[1] perf-report[1] perf-diff[1] perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 34202b1be0bb..63c25d304880 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -57,6 +57,11 @@ OPTIONS
57--force:: 57--force::
58 Forcibly add events with existing name. 58 Forcibly add events with existing name.
59 59
60-n::
61--dry-run::
62 Dry run. With this option, --add and --del doesn't execute actual
63 adding and removal operations.
64
60PROBE SYNTAX 65PROBE SYNTAX
61------------ 66------------
62Probe points are defined by following syntax. 67Probe points are defined by following syntax.
@@ -74,13 +79,22 @@ Probe points are defined by following syntax.
74'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'. 79'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'.
75'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. 80'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function.
76It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern. 81It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
77'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). 82'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
83
84PROBE ARGUMENT
85--------------
86Each probe argument follows below syntax.
87
88 [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
89
90'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
91'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo.
78 92
79LINE SYNTAX 93LINE SYNTAX
80----------- 94-----------
81Line range is descripted by following syntax. 95Line range is descripted by following syntax.
82 96
83 "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]" 97 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]"
84 98
85FUNC specifies the function name of showing lines. 'RLN' is the start line 99FUNC specifies the function name of showing lines. 'RLN' is the start line
86number from function entry line, and 'RLN2' is the end line number. As same as 100number from function entry line, and 'RLN2' is the end line number. As same as
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fc46c0b40f6e..020d871c7934 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -58,7 +58,7 @@ OPTIONS
58 58
59-f:: 59-f::
60--force:: 60--force::
61 Overwrite existing data file. 61 Overwrite existing data file. (deprecated)
62 62
63-c:: 63-c::
64--count=:: 64--count=::
@@ -101,7 +101,7 @@ OPTIONS
101 101
102-R:: 102-R::
103--raw-samples:: 103--raw-samples::
104Collect raw sample records from all opened counters (typically for tracepoint counters). 104Collect raw sample records from all opened counters (default for tracepoint counters).
105 105
106SEE ALSO 106SEE ALSO
107-------- 107--------
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 1ce79198997b..8417644a6166 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -12,7 +12,7 @@ SYNOPSIS
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15There's four variants of perf sched: 15There are four variants of perf sched:
16 16
17 'perf sched record <command>' to record the scheduling events 17 'perf sched record <command>' to record the scheduling events
18 of an arbitrary workload. 18 of an arbitrary workload.
@@ -27,7 +27,7 @@ There's four variants of perf sched:
27 via perf sched record. (this is done by starting up mockup threads 27 via perf sched record. (this is done by starting up mockup threads
28 that mimic the workload based on the events in the trace. These 28 that mimic the workload based on the events in the trace. These
29 threads can then replay the timings (CPU runtime and sleep patterns) 29 threads can then replay the timings (CPU runtime and sleep patterns)
30 of the workload as it occured when it was recorded - and can repeat 30 of the workload as it occurred when it was recorded - and can repeat
31 it a number of times, measuring its performance.) 31 it a number of times, measuring its performance.)
32 32
33OPTIONS 33OPTIONS
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index bc0f670a8338..3cb3449a9645 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1,3 +1,7 @@
1ifeq ("$(origin O)", "command line")
2 OUTPUT := $(O)/
3endif
4
1# The default target of this Makefile is... 5# The default target of this Makefile is...
2all:: 6all::
3 7
@@ -150,10 +154,17 @@ all::
150# Define LDFLAGS=-static to build a static binary. 154# Define LDFLAGS=-static to build a static binary.
151# 155#
152# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. 156# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
157#
158# Define NO_DWARF if you do not want debug-info analysis feature at all.
153 159
154PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE 160$(shell sh -c 'mkdir -p $(OUTPUT)scripts/python/Perf-Trace-Util/' 2> /dev/null)
155 @$(SHELL_PATH) util/PERF-VERSION-GEN 161$(shell sh -c 'mkdir -p $(OUTPUT)scripts/perl/Perf-Trace-Util/' 2> /dev/null)
156-include PERF-VERSION-FILE 162$(shell sh -c 'mkdir -p $(OUTPUT)util/scripting-engines/' 2> /dev/null)
163$(shell sh -c 'mkdir $(OUTPUT)bench' 2> /dev/null)
164
165$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
166 @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
167-include $(OUTPUT)PERF-VERSION-FILE
157 168
158uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 169uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
159uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') 170uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
@@ -308,7 +319,7 @@ PROGRAMS += $(EXTRA_PROGRAMS)
308# 319#
309# Single 'perf' binary right now: 320# Single 'perf' binary right now:
310# 321#
311PROGRAMS += perf 322PROGRAMS += $(OUTPUT)perf
312 323
313# List built-in command $C whose implementation cmd_$C() is not in 324# List built-in command $C whose implementation cmd_$C() is not in
314# builtin-$C.o but is linked in as part of some other command. 325# builtin-$C.o but is linked in as part of some other command.
@@ -318,7 +329,7 @@ PROGRAMS += perf
318ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) 329ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
319 330
320# what 'all' will build but not install in perfexecdir 331# what 'all' will build but not install in perfexecdir
321OTHER_PROGRAMS = perf$X 332OTHER_PROGRAMS = $(OUTPUT)perf$X
322 333
323# Set paths to tools early so that they can be used for version tests. 334# Set paths to tools early so that they can be used for version tests.
324ifndef SHELL_PATH 335ifndef SHELL_PATH
@@ -330,7 +341,7 @@ endif
330 341
331export PERL_PATH 342export PERL_PATH
332 343
333LIB_FILE=libperf.a 344LIB_FILE=$(OUTPUT)libperf.a
334 345
335LIB_H += ../../include/linux/perf_event.h 346LIB_H += ../../include/linux/perf_event.h
336LIB_H += ../../include/linux/rbtree.h 347LIB_H += ../../include/linux/rbtree.h
@@ -375,7 +386,6 @@ LIB_H += util/header.h
375LIB_H += util/help.h 386LIB_H += util/help.h
376LIB_H += util/session.h 387LIB_H += util/session.h
377LIB_H += util/strbuf.h 388LIB_H += util/strbuf.h
378LIB_H += util/string.h
379LIB_H += util/strlist.h 389LIB_H += util/strlist.h
380LIB_H += util/svghelper.h 390LIB_H += util/svghelper.h
381LIB_H += util/run-command.h 391LIB_H += util/run-command.h
@@ -391,77 +401,78 @@ LIB_H += util/probe-finder.h
391LIB_H += util/probe-event.h 401LIB_H += util/probe-event.h
392LIB_H += util/cpumap.h 402LIB_H += util/cpumap.h
393 403
394LIB_OBJS += util/abspath.o 404LIB_OBJS += $(OUTPUT)util/abspath.o
395LIB_OBJS += util/alias.o 405LIB_OBJS += $(OUTPUT)util/alias.o
396LIB_OBJS += util/build-id.o 406LIB_OBJS += $(OUTPUT)util/build-id.o
397LIB_OBJS += util/config.o 407LIB_OBJS += $(OUTPUT)util/config.o
398LIB_OBJS += util/ctype.o 408LIB_OBJS += $(OUTPUT)util/ctype.o
399LIB_OBJS += util/debugfs.o 409LIB_OBJS += $(OUTPUT)util/debugfs.o
400LIB_OBJS += util/environment.o 410LIB_OBJS += $(OUTPUT)util/environment.o
401LIB_OBJS += util/event.o 411LIB_OBJS += $(OUTPUT)util/event.o
402LIB_OBJS += util/exec_cmd.o 412LIB_OBJS += $(OUTPUT)util/exec_cmd.o
403LIB_OBJS += util/help.o 413LIB_OBJS += $(OUTPUT)util/help.o
404LIB_OBJS += util/levenshtein.o 414LIB_OBJS += $(OUTPUT)util/levenshtein.o
405LIB_OBJS += util/parse-options.o 415LIB_OBJS += $(OUTPUT)util/parse-options.o
406LIB_OBJS += util/parse-events.o 416LIB_OBJS += $(OUTPUT)util/parse-events.o
407LIB_OBJS += util/path.o 417LIB_OBJS += $(OUTPUT)util/path.o
408LIB_OBJS += util/rbtree.o 418LIB_OBJS += $(OUTPUT)util/rbtree.o
409LIB_OBJS += util/bitmap.o 419LIB_OBJS += $(OUTPUT)util/bitmap.o
410LIB_OBJS += util/hweight.o 420LIB_OBJS += $(OUTPUT)util/hweight.o
411LIB_OBJS += util/find_next_bit.o 421LIB_OBJS += $(OUTPUT)util/find_next_bit.o
412LIB_OBJS += util/run-command.o 422LIB_OBJS += $(OUTPUT)util/run-command.o
413LIB_OBJS += util/quote.o 423LIB_OBJS += $(OUTPUT)util/quote.o
414LIB_OBJS += util/strbuf.o 424LIB_OBJS += $(OUTPUT)util/strbuf.o
415LIB_OBJS += util/string.o 425LIB_OBJS += $(OUTPUT)util/string.o
416LIB_OBJS += util/strlist.o 426LIB_OBJS += $(OUTPUT)util/strlist.o
417LIB_OBJS += util/usage.o 427LIB_OBJS += $(OUTPUT)util/usage.o
418LIB_OBJS += util/wrapper.o 428LIB_OBJS += $(OUTPUT)util/wrapper.o
419LIB_OBJS += util/sigchain.o 429LIB_OBJS += $(OUTPUT)util/sigchain.o
420LIB_OBJS += util/symbol.o 430LIB_OBJS += $(OUTPUT)util/symbol.o
421LIB_OBJS += util/color.o 431LIB_OBJS += $(OUTPUT)util/color.o
422LIB_OBJS += util/pager.o 432LIB_OBJS += $(OUTPUT)util/pager.o
423LIB_OBJS += util/header.o 433LIB_OBJS += $(OUTPUT)util/header.o
424LIB_OBJS += util/callchain.o 434LIB_OBJS += $(OUTPUT)util/callchain.o
425LIB_OBJS += util/values.o 435LIB_OBJS += $(OUTPUT)util/values.o
426LIB_OBJS += util/debug.o 436LIB_OBJS += $(OUTPUT)util/debug.o
427LIB_OBJS += util/map.o 437LIB_OBJS += $(OUTPUT)util/map.o
428LIB_OBJS += util/session.o 438LIB_OBJS += $(OUTPUT)util/session.o
429LIB_OBJS += util/thread.o 439LIB_OBJS += $(OUTPUT)util/thread.o
430LIB_OBJS += util/trace-event-parse.o 440LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
431LIB_OBJS += util/trace-event-read.o 441LIB_OBJS += $(OUTPUT)util/trace-event-read.o
432LIB_OBJS += util/trace-event-info.o 442LIB_OBJS += $(OUTPUT)util/trace-event-info.o
433LIB_OBJS += util/trace-event-scripting.o 443LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
434LIB_OBJS += util/svghelper.o 444LIB_OBJS += $(OUTPUT)util/svghelper.o
435LIB_OBJS += util/sort.o 445LIB_OBJS += $(OUTPUT)util/sort.o
436LIB_OBJS += util/hist.o 446LIB_OBJS += $(OUTPUT)util/hist.o
437LIB_OBJS += util/probe-event.o 447LIB_OBJS += $(OUTPUT)util/probe-event.o
438LIB_OBJS += util/util.o 448LIB_OBJS += $(OUTPUT)util/util.o
439LIB_OBJS += util/cpumap.o 449LIB_OBJS += $(OUTPUT)util/cpumap.o
440 450
441BUILTIN_OBJS += builtin-annotate.o 451BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
442 452
443BUILTIN_OBJS += builtin-bench.o 453BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
444 454
445# Benchmark modules 455# Benchmark modules
446BUILTIN_OBJS += bench/sched-messaging.o 456BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
447BUILTIN_OBJS += bench/sched-pipe.o 457BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
448BUILTIN_OBJS += bench/mem-memcpy.o 458BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
449 459
450BUILTIN_OBJS += builtin-diff.o 460BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
451BUILTIN_OBJS += builtin-help.o 461BUILTIN_OBJS += $(OUTPUT)builtin-help.o
452BUILTIN_OBJS += builtin-sched.o 462BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
453BUILTIN_OBJS += builtin-buildid-list.o 463BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
454BUILTIN_OBJS += builtin-buildid-cache.o 464BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
455BUILTIN_OBJS += builtin-list.o 465BUILTIN_OBJS += $(OUTPUT)builtin-list.o
456BUILTIN_OBJS += builtin-record.o 466BUILTIN_OBJS += $(OUTPUT)builtin-record.o
457BUILTIN_OBJS += builtin-report.o 467BUILTIN_OBJS += $(OUTPUT)builtin-report.o
458BUILTIN_OBJS += builtin-stat.o 468BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
459BUILTIN_OBJS += builtin-timechart.o 469BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
460BUILTIN_OBJS += builtin-top.o 470BUILTIN_OBJS += $(OUTPUT)builtin-top.o
461BUILTIN_OBJS += builtin-trace.o 471BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
462BUILTIN_OBJS += builtin-probe.o 472BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
463BUILTIN_OBJS += builtin-kmem.o 473BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
464BUILTIN_OBJS += builtin-lock.o 474BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
475BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
465 476
466PERFLIBS = $(LIB_FILE) 477PERFLIBS = $(LIB_FILE)
467 478
@@ -492,6 +503,10 @@ ifeq ($(uname_S),Darwin)
492 PTHREAD_LIBS = 503 PTHREAD_LIBS =
493endif 504endif
494 505
506ifneq ($(OUTPUT),)
507 BASIC_CFLAGS += -I$(OUTPUT)
508endif
509
495ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 510ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
496ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 511ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
497 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); 512 msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
@@ -506,11 +521,20 @@ endif
506 521
507ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) 522ifneq ($(shell sh -c "(echo '\#include <dwarf.h>'; echo '\#include <libdw.h>'; echo 'int main(void) { Dwarf *dbg; dbg = dwarf_begin(0, DWARF_C_READ); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
508 msg := $(warning No libdw.h found or old libdw.h found, disables dwarf support. Please install elfutils-devel/elfutils-dev); 523 msg := $(warning No libdw.h found or old libdw.h found, disables dwarf support. Please install elfutils-devel/elfutils-dev);
509 BASIC_CFLAGS += -DNO_DWARF_SUPPORT
510else 524else
511 BASIC_CFLAGS += -I/usr/include/elfutils 525ifndef NO_DWARF
526 BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
512 EXTLIBS += -lelf -ldw 527 EXTLIBS += -lelf -ldw
513 LIB_OBJS += util/probe-finder.o 528 LIB_OBJS += $(OUTPUT)util/probe-finder.o
529endif
530endif
531
532ifneq ($(shell sh -c "(echo '\#include <newt.h>'; echo 'int main(void) { newtInit(); newtCls(); return newtFinished(); }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -lnewt -o $(BITBUCKET) $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
533 msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
534 BASIC_CFLAGS += -DNO_NEWT_SUPPORT
535else
536 EXTLIBS += -lnewt
537 LIB_OBJS += $(OUTPUT)util/newt.o
514endif 538endif
515 539
516ifndef NO_LIBPERL 540ifndef NO_LIBPERL
@@ -522,8 +546,8 @@ ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; e
522 BASIC_CFLAGS += -DNO_LIBPERL 546 BASIC_CFLAGS += -DNO_LIBPERL
523else 547else
524 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) 548 ALL_LDFLAGS += $(PERL_EMBED_LDOPTS)
525 LIB_OBJS += util/scripting-engines/trace-event-perl.o 549 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
526 LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o 550 LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
527endif 551endif
528 552
529ifndef NO_LIBPYTHON 553ifndef NO_LIBPYTHON
@@ -531,12 +555,12 @@ PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null`
531PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` 555PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
532endif 556endif
533 557
534ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o /dev/null $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) 558ifneq ($(shell sh -c "(echo '\#include <Python.h>'; echo 'int main(void) { Py_Initialize(); return 0; }') | $(CC) -x c - $(PYTHON_EMBED_CCOPTS) -o $(BITBUCKET) $(PYTHON_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
535 BASIC_CFLAGS += -DNO_LIBPYTHON 559 BASIC_CFLAGS += -DNO_LIBPYTHON
536else 560else
537 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) 561 ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS)
538 LIB_OBJS += util/scripting-engines/trace-event-python.o 562 LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
539 LIB_OBJS += scripts/python/Perf-Trace-Util/Context.o 563 LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
540endif 564endif
541 565
542ifdef NO_DEMANGLE 566ifdef NO_DEMANGLE
@@ -607,53 +631,53 @@ ifdef NO_C99_FORMAT
607endif 631endif
608ifdef SNPRINTF_RETURNS_BOGUS 632ifdef SNPRINTF_RETURNS_BOGUS
609 COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS 633 COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS
610 COMPAT_OBJS += compat/snprintf.o 634 COMPAT_OBJS += $(OUTPUT)compat/snprintf.o
611endif 635endif
612ifdef FREAD_READS_DIRECTORIES 636ifdef FREAD_READS_DIRECTORIES
613 COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES 637 COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES
614 COMPAT_OBJS += compat/fopen.o 638 COMPAT_OBJS += $(OUTPUT)compat/fopen.o
615endif 639endif
616ifdef NO_SYMLINK_HEAD 640ifdef NO_SYMLINK_HEAD
617 BASIC_CFLAGS += -DNO_SYMLINK_HEAD 641 BASIC_CFLAGS += -DNO_SYMLINK_HEAD
618endif 642endif
619ifdef NO_STRCASESTR 643ifdef NO_STRCASESTR
620 COMPAT_CFLAGS += -DNO_STRCASESTR 644 COMPAT_CFLAGS += -DNO_STRCASESTR
621 COMPAT_OBJS += compat/strcasestr.o 645 COMPAT_OBJS += $(OUTPUT)compat/strcasestr.o
622endif 646endif
623ifdef NO_STRTOUMAX 647ifdef NO_STRTOUMAX
624 COMPAT_CFLAGS += -DNO_STRTOUMAX 648 COMPAT_CFLAGS += -DNO_STRTOUMAX
625 COMPAT_OBJS += compat/strtoumax.o 649 COMPAT_OBJS += $(OUTPUT)compat/strtoumax.o
626endif 650endif
627ifdef NO_STRTOULL 651ifdef NO_STRTOULL
628 COMPAT_CFLAGS += -DNO_STRTOULL 652 COMPAT_CFLAGS += -DNO_STRTOULL
629endif 653endif
630ifdef NO_SETENV 654ifdef NO_SETENV
631 COMPAT_CFLAGS += -DNO_SETENV 655 COMPAT_CFLAGS += -DNO_SETENV
632 COMPAT_OBJS += compat/setenv.o 656 COMPAT_OBJS += $(OUTPUT)compat/setenv.o
633endif 657endif
634ifdef NO_MKDTEMP 658ifdef NO_MKDTEMP
635 COMPAT_CFLAGS += -DNO_MKDTEMP 659 COMPAT_CFLAGS += -DNO_MKDTEMP
636 COMPAT_OBJS += compat/mkdtemp.o 660 COMPAT_OBJS += $(OUTPUT)compat/mkdtemp.o
637endif 661endif
638ifdef NO_UNSETENV 662ifdef NO_UNSETENV
639 COMPAT_CFLAGS += -DNO_UNSETENV 663 COMPAT_CFLAGS += -DNO_UNSETENV
640 COMPAT_OBJS += compat/unsetenv.o 664 COMPAT_OBJS += $(OUTPUT)compat/unsetenv.o
641endif 665endif
642ifdef NO_SYS_SELECT_H 666ifdef NO_SYS_SELECT_H
643 BASIC_CFLAGS += -DNO_SYS_SELECT_H 667 BASIC_CFLAGS += -DNO_SYS_SELECT_H
644endif 668endif
645ifdef NO_MMAP 669ifdef NO_MMAP
646 COMPAT_CFLAGS += -DNO_MMAP 670 COMPAT_CFLAGS += -DNO_MMAP
647 COMPAT_OBJS += compat/mmap.o 671 COMPAT_OBJS += $(OUTPUT)compat/mmap.o
648else 672else
649 ifdef USE_WIN32_MMAP 673 ifdef USE_WIN32_MMAP
650 COMPAT_CFLAGS += -DUSE_WIN32_MMAP 674 COMPAT_CFLAGS += -DUSE_WIN32_MMAP
651 COMPAT_OBJS += compat/win32mmap.o 675 COMPAT_OBJS += $(OUTPUT)compat/win32mmap.o
652 endif 676 endif
653endif 677endif
654ifdef NO_PREAD 678ifdef NO_PREAD
655 COMPAT_CFLAGS += -DNO_PREAD 679 COMPAT_CFLAGS += -DNO_PREAD
656 COMPAT_OBJS += compat/pread.o 680 COMPAT_OBJS += $(OUTPUT)compat/pread.o
657endif 681endif
658ifdef NO_FAST_WORKING_DIRECTORY 682ifdef NO_FAST_WORKING_DIRECTORY
659 BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY 683 BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY
@@ -675,10 +699,10 @@ else
675endif 699endif
676endif 700endif
677ifdef NO_INET_NTOP 701ifdef NO_INET_NTOP
678 LIB_OBJS += compat/inet_ntop.o 702 LIB_OBJS += $(OUTPUT)compat/inet_ntop.o
679endif 703endif
680ifdef NO_INET_PTON 704ifdef NO_INET_PTON
681 LIB_OBJS += compat/inet_pton.o 705 LIB_OBJS += $(OUTPUT)compat/inet_pton.o
682endif 706endif
683 707
684ifdef NO_ICONV 708ifdef NO_ICONV
@@ -695,15 +719,15 @@ endif
695 719
696ifdef PPC_SHA1 720ifdef PPC_SHA1
697 SHA1_HEADER = "ppc/sha1.h" 721 SHA1_HEADER = "ppc/sha1.h"
698 LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o 722 LIB_OBJS += $(OUTPUT)ppc/sha1.o ppc/sha1ppc.o
699else 723else
700ifdef ARM_SHA1 724ifdef ARM_SHA1
701 SHA1_HEADER = "arm/sha1.h" 725 SHA1_HEADER = "arm/sha1.h"
702 LIB_OBJS += arm/sha1.o arm/sha1_arm.o 726 LIB_OBJS += $(OUTPUT)arm/sha1.o $(OUTPUT)arm/sha1_arm.o
703else 727else
704ifdef MOZILLA_SHA1 728ifdef MOZILLA_SHA1
705 SHA1_HEADER = "mozilla-sha1/sha1.h" 729 SHA1_HEADER = "mozilla-sha1/sha1.h"
706 LIB_OBJS += mozilla-sha1/sha1.o 730 LIB_OBJS += $(OUTPUT)mozilla-sha1/sha1.o
707else 731else
708 SHA1_HEADER = <openssl/sha.h> 732 SHA1_HEADER = <openssl/sha.h>
709 EXTLIBS += $(LIB_4_CRYPTO) 733 EXTLIBS += $(LIB_4_CRYPTO)
@@ -715,15 +739,15 @@ ifdef NO_PERL_MAKEMAKER
715endif 739endif
716ifdef NO_HSTRERROR 740ifdef NO_HSTRERROR
717 COMPAT_CFLAGS += -DNO_HSTRERROR 741 COMPAT_CFLAGS += -DNO_HSTRERROR
718 COMPAT_OBJS += compat/hstrerror.o 742 COMPAT_OBJS += $(OUTPUT)compat/hstrerror.o
719endif 743endif
720ifdef NO_MEMMEM 744ifdef NO_MEMMEM
721 COMPAT_CFLAGS += -DNO_MEMMEM 745 COMPAT_CFLAGS += -DNO_MEMMEM
722 COMPAT_OBJS += compat/memmem.o 746 COMPAT_OBJS += $(OUTPUT)compat/memmem.o
723endif 747endif
724ifdef INTERNAL_QSORT 748ifdef INTERNAL_QSORT
725 COMPAT_CFLAGS += -DINTERNAL_QSORT 749 COMPAT_CFLAGS += -DINTERNAL_QSORT
726 COMPAT_OBJS += compat/qsort.o 750 COMPAT_OBJS += $(OUTPUT)compat/qsort.o
727endif 751endif
728ifdef RUNTIME_PREFIX 752ifdef RUNTIME_PREFIX
729 COMPAT_CFLAGS += -DRUNTIME_PREFIX 753 COMPAT_CFLAGS += -DRUNTIME_PREFIX
@@ -803,7 +827,7 @@ export TAR INSTALL DESTDIR SHELL_PATH
803 827
804SHELL = $(SHELL_PATH) 828SHELL = $(SHELL_PATH)
805 829
806all:: .perf.dev.null shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS 830all:: .perf.dev.null shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
807ifneq (,$X) 831ifneq (,$X)
808 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) 832 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
809endif 833endif
@@ -815,39 +839,39 @@ please_set_SHELL_PATH_to_a_more_modern_shell:
815 839
816shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell 840shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
817 841
818strip: $(PROGRAMS) perf$X 842strip: $(PROGRAMS) $(OUTPUT)perf$X
819 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X 843 $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf$X
820 844
821perf.o: perf.c common-cmds.h PERF-CFLAGS 845$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
822 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ 846 $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
823 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 847 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
824 $(ALL_CFLAGS) -c $(filter %.c,$^) 848 $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
825 849
826perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) 850$(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
827 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ 851 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \
828 $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) 852 $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS)
829 853
830builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS 854$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
831 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ 855 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
832 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 856 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
833 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 857 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
834 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 858 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
835 859
836builtin-timechart.o: builtin-timechart.c common-cmds.h PERF-CFLAGS 860$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
837 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ 861 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
838 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ 862 '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
839 '-DPERF_MAN_PATH="$(mandir_SQ)"' \ 863 '-DPERF_MAN_PATH="$(mandir_SQ)"' \
840 '-DPERF_INFO_PATH="$(infodir_SQ)"' $< 864 '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
841 865
842$(BUILT_INS): perf$X 866$(BUILT_INS): $(OUTPUT)perf$X
843 $(QUIET_BUILT_IN)$(RM) $@ && \ 867 $(QUIET_BUILT_IN)$(RM) $@ && \
844 ln perf$X $@ 2>/dev/null || \ 868 ln perf$X $@ 2>/dev/null || \
845 ln -s perf$X $@ 2>/dev/null || \ 869 ln -s perf$X $@ 2>/dev/null || \
846 cp perf$X $@ 870 cp perf$X $@
847 871
848common-cmds.h: util/generate-cmdlist.sh command-list.txt 872$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
849 873
850common-cmds.h: $(wildcard Documentation/perf-*.txt) 874$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
851 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ 875 $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
852 876
853$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh 877$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
@@ -859,7 +883,7 @@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
859 -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ 883 -e 's/@@NO_CURL@@/$(NO_CURL)/g' \
860 $@.sh >$@+ && \ 884 $@.sh >$@+ && \
861 chmod +x $@+ && \ 885 chmod +x $@+ && \
862 mv $@+ $@ 886 mv $@+ $(OUTPUT)$@
863 887
864configure: configure.ac 888configure: configure.ac
865 $(QUIET_GEN)$(RM) $@ $<+ && \ 889 $(QUIET_GEN)$(RM) $@ $<+ && \
@@ -869,60 +893,60 @@ configure: configure.ac
869 $(RM) $<+ 893 $(RM) $<+
870 894
871# These can record PERF_VERSION 895# These can record PERF_VERSION
872perf.o perf.spec \ 896$(OUTPUT)perf.o perf.spec \
873 $(patsubst %.sh,%,$(SCRIPT_SH)) \ 897 $(patsubst %.sh,%,$(SCRIPT_SH)) \
874 $(patsubst %.perl,%,$(SCRIPT_PERL)) \ 898 $(patsubst %.perl,%,$(SCRIPT_PERL)) \
875 : PERF-VERSION-FILE 899 : $(OUTPUT)PERF-VERSION-FILE
876 900
877%.o: %.c PERF-CFLAGS 901$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
878 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< 902 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
879%.s: %.c PERF-CFLAGS 903$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
880 $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< 904 $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
881%.o: %.S 905$(OUTPUT)%.o: %.S
882 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< 906 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
883 907
884util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS 908$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
885 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ 909 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
886 '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ 910 '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
887 '-DBINDIR="$(bindir_relative_SQ)"' \ 911 '-DBINDIR="$(bindir_relative_SQ)"' \
888 '-DPREFIX="$(prefix_SQ)"' \ 912 '-DPREFIX="$(prefix_SQ)"' \
889 $< 913 $<
890 914
891builtin-init-db.o: builtin-init-db.c PERF-CFLAGS 915$(OUTPUT)builtin-init-db.o: builtin-init-db.c $(OUTPUT)PERF-CFLAGS
892 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< 916 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $<
893 917
894util/config.o: util/config.c PERF-CFLAGS 918$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
895 $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 919 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
896 920
897util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS 921$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
898 $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 922 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
899 923
900# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing 924# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing
901# from <string.h> that comes from kernel headers wrapping. 925# from <string.h> that comes from kernel headers wrapping.
902KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//` 926KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//`
903 927
904util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS 928$(OUTPUT)util/bitmap.o: ../../lib/bitmap.c $(OUTPUT)PERF-CFLAGS
905 $(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 929 $(QUIET_CC)$(CC) -o $@ -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
906 930
907util/hweight.o: ../../lib/hweight.c PERF-CFLAGS 931$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS
908 $(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 932 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
909 933
910util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS 934$(OUTPUT)util/find_next_bit.o: ../../lib/find_next_bit.c $(OUTPUT)PERF-CFLAGS
911 $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< 935 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
912 936
913util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c PERF-CFLAGS 937$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
914 $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< 938 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
915 939
916scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS 940$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
917 $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 941 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
918 942
919util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c PERF-CFLAGS 943$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
920 $(QUIET_CC)$(CC) -o util/scripting-engines/trace-event-python.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< 944 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
921 945
922scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c PERF-CFLAGS 946$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
923 $(QUIET_CC)$(CC) -o scripts/python/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< 947 $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
924 948
925perf-%$X: %.o $(PERFLIBS) 949$(OUTPUT)perf-%$X: %.o $(PERFLIBS)
926 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) 950 $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
927 951
928$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) 952$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
@@ -963,17 +987,17 @@ cscope:
963TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ 987TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
964 $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) 988 $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
965 989
966PERF-CFLAGS: .FORCE-PERF-CFLAGS 990$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
967 @FLAGS='$(TRACK_CFLAGS)'; \ 991 @FLAGS='$(TRACK_CFLAGS)'; \
968 if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ 992 if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
969 echo 1>&2 " * new build flags or prefix"; \ 993 echo 1>&2 " * new build flags or prefix"; \
970 echo "$$FLAGS" >PERF-CFLAGS; \ 994 echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
971 fi 995 fi
972 996
973# We need to apply sq twice, once to protect from the shell 997# We need to apply sq twice, once to protect from the shell
974# that runs PERF-BUILD-OPTIONS, and then again to protect it 998# that runs $(OUTPUT)PERF-BUILD-OPTIONS, and then again to protect it
975# and the first level quoting from the shell that runs "echo". 999# and the first level quoting from the shell that runs "echo".
976PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS 1000$(OUTPUT)PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS
977 @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ 1001 @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@
978 @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ 1002 @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
979 @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ 1003 @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
@@ -994,7 +1018,7 @@ all:: $(TEST_PROGRAMS)
994 1018
995export NO_SVN_TESTS 1019export NO_SVN_TESTS
996 1020
997check: common-cmds.h 1021check: $(OUTPUT)common-cmds.h
998 if sparse; \ 1022 if sparse; \
999 then \ 1023 then \
1000 for i in *.c */*.c; \ 1024 for i in *.c */*.c; \
@@ -1028,10 +1052,10 @@ export perfexec_instdir
1028 1052
1029install: all 1053install: all
1030 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' 1054 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
1031 $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' 1055 $(INSTALL) $(OUTPUT)perf$X '$(DESTDIR_SQ)$(bindir_SQ)'
1032 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 1056 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1033 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 1057 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
1034 $(INSTALL) perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 1058 $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1035 $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' 1059 $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
1036 $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' 1060 $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
1037 $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' 1061 $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
@@ -1045,7 +1069,7 @@ ifdef BUILT_INS
1045 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 1069 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1046 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' 1070 $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
1047ifneq (,$X) 1071ifneq (,$X)
1048 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) 1072 $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) $(OUTPUT)perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';)
1049endif 1073endif
1050endif 1074endif
1051 1075
@@ -1129,14 +1153,14 @@ clean:
1129 $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) 1153 $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
1130 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X 1154 $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
1131 $(RM) $(TEST_PROGRAMS) 1155 $(RM) $(TEST_PROGRAMS)
1132 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* 1156 $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
1133 $(RM) -r autom4te.cache 1157 $(RM) -r autom4te.cache
1134 $(RM) config.log config.mak.autogen config.mak.append config.status config.cache 1158 $(RM) config.log config.mak.autogen config.mak.append config.status config.cache
1135 $(RM) -r $(PERF_TARNAME) .doc-tmp-dir 1159 $(RM) -r $(PERF_TARNAME) .doc-tmp-dir
1136 $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz 1160 $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
1137 $(RM) $(htmldocs).tar.gz $(manpages).tar.gz 1161 $(RM) $(htmldocs).tar.gz $(manpages).tar.gz
1138 $(MAKE) -C Documentation/ clean 1162 $(MAKE) -C Documentation/ clean
1139 $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS 1163 $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS
1140 1164
1141.PHONY: all install clean strip 1165.PHONY: all install clean strip
1142.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell 1166.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 89773178e894..38dae7465142 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -10,7 +10,6 @@
10#include "../perf.h" 10#include "../perf.h"
11#include "../util/util.h" 11#include "../util/util.h"
12#include "../util/parse-options.h" 12#include "../util/parse-options.h"
13#include "../util/string.h"
14#include "../util/header.h" 13#include "../util/header.h"
15#include "bench.h" 14#include "bench.h"
16 15
@@ -24,7 +23,7 @@
24 23
25static const char *length_str = "1MB"; 24static const char *length_str = "1MB";
26static const char *routine = "default"; 25static const char *routine = "default";
27static int use_clock = 0; 26static bool use_clock = false;
28static int clock_fd; 27static int clock_fd;
29 28
30static const struct option options[] = { 29static const struct option options[] = {
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 81cee78181fa..da1b2e9f01ff 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -31,9 +31,9 @@
31 31
32#define DATASIZE 100 32#define DATASIZE 100
33 33
34static int use_pipes = 0; 34static bool use_pipes = false;
35static unsigned int loops = 100; 35static unsigned int loops = 100;
36static unsigned int thread_mode = 0; 36static bool thread_mode = false;
37static unsigned int num_groups = 10; 37static unsigned int num_groups = 10;
38 38
39struct sender_context { 39struct sender_context {
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 4f77c7c27640..d9ab3ce446ac 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -93,7 +93,7 @@ int bench_sched_pipe(int argc, const char **argv,
93 93
94 switch (bench_format) { 94 switch (bench_format) {
95 case BENCH_FORMAT_DEFAULT: 95 case BENCH_FORMAT_DEFAULT:
96 printf("# Extecuted %d pipe operations between two tasks\n\n", 96 printf("# Executed %d pipe operations between two tasks\n\n",
97 loops); 97 loops);
98 98
99 result_usec = diff.tv_sec * 1000000; 99 result_usec = diff.tv_sec * 1000000;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6ad7148451c5..f924b4332be6 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -14,7 +14,6 @@
14#include "util/cache.h" 14#include "util/cache.h"
15#include <linux/rbtree.h> 15#include <linux/rbtree.h>
16#include "util/symbol.h" 16#include "util/symbol.h"
17#include "util/string.h"
18 17
19#include "perf.h" 18#include "perf.h"
20#include "util/debug.h" 19#include "util/debug.h"
@@ -29,11 +28,11 @@
29 28
30static char const *input_name = "perf.data"; 29static char const *input_name = "perf.data";
31 30
32static int force; 31static bool force;
33 32
34static int full_paths; 33static bool full_paths;
35 34
36static int print_line; 35static bool print_line;
37 36
38struct sym_hist { 37struct sym_hist {
39 u64 sum; 38 u64 sum;
@@ -69,13 +68,13 @@ static int sym__alloc_hist(struct symbol *self)
69static int annotate__hist_hit(struct hist_entry *he, u64 ip) 68static int annotate__hist_hit(struct hist_entry *he, u64 ip)
70{ 69{
71 unsigned int sym_size, offset; 70 unsigned int sym_size, offset;
72 struct symbol *sym = he->sym; 71 struct symbol *sym = he->ms.sym;
73 struct sym_priv *priv; 72 struct sym_priv *priv;
74 struct sym_hist *h; 73 struct sym_hist *h;
75 74
76 he->count++; 75 he->count++;
77 76
78 if (!sym || !he->map) 77 if (!sym || !he->ms.map)
79 return 0; 78 return 0;
80 79
81 priv = symbol__priv(sym); 80 priv = symbol__priv(sym);
@@ -85,7 +84,7 @@ static int annotate__hist_hit(struct hist_entry *he, u64 ip)
85 sym_size = sym->end - sym->start; 84 sym_size = sym->end - sym->start;
86 offset = ip - sym->start; 85 offset = ip - sym->start;
87 86
88 pr_debug3("%s: ip=%#Lx\n", __func__, he->map->unmap_ip(he->map, ip)); 87 pr_debug3("%s: ip=%#Lx\n", __func__, he->ms.map->unmap_ip(he->ms.map, ip));
89 88
90 if (offset >= sym_size) 89 if (offset >= sym_size)
91 return 0; 90 return 0;
@@ -94,8 +93,8 @@ static int annotate__hist_hit(struct hist_entry *he, u64 ip)
94 h->sum++; 93 h->sum++;
95 h->ip[offset]++; 94 h->ip[offset]++;
96 95
97 pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->sym->start, 96 pr_debug3("%#Lx %s: count++ [ip: %#Lx, %#Lx] => %Ld\n", he->ms.sym->start,
98 he->sym->name, ip, ip - he->sym->start, h->ip[offset]); 97 he->ms.sym->name, ip, ip - he->ms.sym->start, h->ip[offset]);
99 return 0; 98 return 0;
100} 99}
101 100
@@ -187,7 +186,7 @@ static struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
187static int parse_line(FILE *file, struct hist_entry *he, 186static int parse_line(FILE *file, struct hist_entry *he,
188 struct list_head *head) 187 struct list_head *head)
189{ 188{
190 struct symbol *sym = he->sym; 189 struct symbol *sym = he->ms.sym;
191 struct objdump_line *objdump_line; 190 struct objdump_line *objdump_line;
192 char *line = NULL, *tmp, *tmp2; 191 char *line = NULL, *tmp, *tmp2;
193 size_t line_len; 192 size_t line_len;
@@ -226,7 +225,7 @@ static int parse_line(FILE *file, struct hist_entry *he,
226 } 225 }
227 226
228 if (line_ip != -1) { 227 if (line_ip != -1) {
229 u64 start = map__rip_2objdump(he->map, sym->start); 228 u64 start = map__rip_2objdump(he->ms.map, sym->start);
230 offset = line_ip - start; 229 offset = line_ip - start;
231 } 230 }
232 231
@@ -244,7 +243,7 @@ static int objdump_line__print(struct objdump_line *self,
244 struct list_head *head, 243 struct list_head *head,
245 struct hist_entry *he, u64 len) 244 struct hist_entry *he, u64 len)
246{ 245{
247 struct symbol *sym = he->sym; 246 struct symbol *sym = he->ms.sym;
248 static const char *prev_line; 247 static const char *prev_line;
249 static const char *prev_color; 248 static const char *prev_color;
250 249
@@ -327,7 +326,7 @@ static void insert_source_line(struct sym_ext *sym_ext)
327 326
328static void free_source_line(struct hist_entry *he, int len) 327static void free_source_line(struct hist_entry *he, int len)
329{ 328{
330 struct sym_priv *priv = symbol__priv(he->sym); 329 struct sym_priv *priv = symbol__priv(he->ms.sym);
331 struct sym_ext *sym_ext = priv->ext; 330 struct sym_ext *sym_ext = priv->ext;
332 int i; 331 int i;
333 332
@@ -346,7 +345,7 @@ static void free_source_line(struct hist_entry *he, int len)
346static void 345static void
347get_source_line(struct hist_entry *he, int len, const char *filename) 346get_source_line(struct hist_entry *he, int len, const char *filename)
348{ 347{
349 struct symbol *sym = he->sym; 348 struct symbol *sym = he->ms.sym;
350 u64 start; 349 u64 start;
351 int i; 350 int i;
352 char cmd[PATH_MAX * 2]; 351 char cmd[PATH_MAX * 2];
@@ -361,7 +360,7 @@ get_source_line(struct hist_entry *he, int len, const char *filename)
361 if (!priv->ext) 360 if (!priv->ext)
362 return; 361 return;
363 362
364 start = he->map->unmap_ip(he->map, sym->start); 363 start = he->ms.map->unmap_ip(he->ms.map, sym->start);
365 364
366 for (i = 0; i < len; i++) { 365 for (i = 0; i < len; i++) {
367 char *path = NULL; 366 char *path = NULL;
@@ -425,7 +424,7 @@ static void print_summary(const char *filename)
425 424
426static void hist_entry__print_hits(struct hist_entry *self) 425static void hist_entry__print_hits(struct hist_entry *self)
427{ 426{
428 struct symbol *sym = self->sym; 427 struct symbol *sym = self->ms.sym;
429 struct sym_priv *priv = symbol__priv(sym); 428 struct sym_priv *priv = symbol__priv(sym);
430 struct sym_hist *h = priv->hist; 429 struct sym_hist *h = priv->hist;
431 u64 len = sym->end - sym->start, offset; 430 u64 len = sym->end - sym->start, offset;
@@ -439,9 +438,9 @@ static void hist_entry__print_hits(struct hist_entry *self)
439 438
440static void annotate_sym(struct hist_entry *he) 439static void annotate_sym(struct hist_entry *he)
441{ 440{
442 struct map *map = he->map; 441 struct map *map = he->ms.map;
443 struct dso *dso = map->dso; 442 struct dso *dso = map->dso;
444 struct symbol *sym = he->sym; 443 struct symbol *sym = he->ms.sym;
445 const char *filename = dso->long_name, *d_filename; 444 const char *filename = dso->long_name, *d_filename;
446 u64 len; 445 u64 len;
447 char command[PATH_MAX*2]; 446 char command[PATH_MAX*2];
@@ -452,6 +451,16 @@ static void annotate_sym(struct hist_entry *he)
452 if (!filename) 451 if (!filename)
453 return; 452 return;
454 453
454 if (dso->origin == DSO__ORIG_KERNEL) {
455 if (dso->annotate_warned)
456 return;
457 dso->annotate_warned = 1;
458 pr_err("Can't annotate %s: No vmlinux file was found in the "
459 "path:\n", sym->name);
460 vmlinux_path__fprintf(stderr);
461 return;
462 }
463
455 pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__, 464 pr_debug("%s: filename=%s, sym=%s, start=%#Lx, end=%#Lx\n", __func__,
456 filename, sym->name, map->unmap_ip(map, sym->start), 465 filename, sym->name, map->unmap_ip(map, sym->start),
457 map->unmap_ip(map, sym->end)); 466 map->unmap_ip(map, sym->end));
@@ -516,17 +525,17 @@ static void perf_session__find_annotations(struct perf_session *self)
516 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 525 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
517 struct sym_priv *priv; 526 struct sym_priv *priv;
518 527
519 if (he->sym == NULL) 528 if (he->ms.sym == NULL)
520 continue; 529 continue;
521 530
522 priv = symbol__priv(he->sym); 531 priv = symbol__priv(he->ms.sym);
523 if (priv->hist == NULL) 532 if (priv->hist == NULL)
524 continue; 533 continue;
525 534
526 annotate_sym(he); 535 annotate_sym(he);
527 /* 536 /*
528 * Since we have a hist_entry per IP for the same symbol, free 537 * Since we have a hist_entry per IP for the same symbol, free
529 * he->sym->hist to signal we already processed this symbol. 538 * he->ms.sym->hist to signal we already processed this symbol.
530 */ 539 */
531 free(priv->hist); 540 free(priv->hist);
532 priv->hist = NULL; 541 priv->hist = NULL;
@@ -562,7 +571,7 @@ static int __cmd_annotate(void)
562 perf_session__fprintf(session, stdout); 571 perf_session__fprintf(session, stdout);
563 572
564 if (verbose > 2) 573 if (verbose > 2)
565 dsos__fprintf(stdout); 574 dsos__fprintf(&session->kerninfo_root, stdout);
566 575
567 perf_session__collapse_resort(&session->hists); 576 perf_session__collapse_resort(&session->hists);
568 perf_session__output_resort(&session->hists, session->event_total[0]); 577 perf_session__output_resort(&session->hists, session->event_total[0]);
@@ -581,10 +590,12 @@ static const char * const annotate_usage[] = {
581static const struct option options[] = { 590static const struct option options[] = {
582 OPT_STRING('i', "input", &input_name, "file", 591 OPT_STRING('i', "input", &input_name, "file",
583 "input file name"), 592 "input file name"),
593 OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
594 "only consider symbols in these dsos"),
584 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", 595 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
585 "symbol to annotate"), 596 "symbol to annotate"),
586 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 597 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
587 OPT_BOOLEAN('v', "verbose", &verbose, 598 OPT_INCR('v', "verbose", &verbose,
588 "be more verbose (show symbol address, etc)"), 599 "be more verbose (show symbol address, etc)"),
589 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 600 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
590 "dump raw trace in ASCII"), 601 "dump raw trace in ASCII"),
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 30a05f552c96..f8e3d1852029 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -27,7 +27,7 @@ static const struct option buildid_cache_options[] = {
27 "file list", "file(s) to add"), 27 "file list", "file(s) to add"),
28 OPT_STRING('r', "remove", &remove_name_list_str, "file list", 28 OPT_STRING('r', "remove", &remove_name_list_str, "file list",
29 "file(s) to remove"), 29 "file(s) to remove"),
30 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose"), 30 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
31 OPT_END() 31 OPT_END()
32}; 32};
33 33
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index d0675c02f81e..623afe3fdcb8 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -16,7 +16,7 @@
16#include "util/symbol.h" 16#include "util/symbol.h"
17 17
18static char const *input_name = "perf.data"; 18static char const *input_name = "perf.data";
19static int force; 19static bool force;
20static bool with_hits; 20static bool with_hits;
21 21
22static const char * const buildid_list_usage[] = { 22static const char * const buildid_list_usage[] = {
@@ -29,7 +29,7 @@ static const struct option options[] = {
29 OPT_STRING('i', "input", &input_name, "file", 29 OPT_STRING('i', "input", &input_name, "file",
30 "input file name"), 30 "input file name"),
31 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), 31 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
32 OPT_BOOLEAN('v', "verbose", &verbose, 32 OPT_INCR('v', "verbose", &verbose,
33 "be more verbose"), 33 "be more verbose"),
34 OPT_END() 34 OPT_END()
35}; 35};
@@ -46,7 +46,7 @@ static int __cmd_buildid_list(void)
46 if (with_hits) 46 if (with_hits)
47 perf_session__process_events(session, &build_id__mark_dso_hit_ops); 47 perf_session__process_events(session, &build_id__mark_dso_hit_ops);
48 48
49 dsos__fprintf_buildid(stdout, with_hits); 49 dsos__fprintf_buildid(&session->kerninfo_root, stdout, with_hits);
50 50
51 perf_session__delete(session); 51 perf_session__delete(session);
52 return err; 52 return err;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 1ea15d8aeed1..207e860591e2 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -19,7 +19,7 @@
19static char const *input_old = "perf.data.old", 19static char const *input_old = "perf.data.old",
20 *input_new = "perf.data"; 20 *input_new = "perf.data";
21static char diff__default_sort_order[] = "dso,symbol"; 21static char diff__default_sort_order[] = "dso,symbol";
22static int force; 22static bool force;
23static bool show_displacement; 23static bool show_displacement;
24 24
25static int perf_session__add_hist_entry(struct perf_session *self, 25static int perf_session__add_hist_entry(struct perf_session *self,
@@ -33,7 +33,7 @@ static int perf_session__add_hist_entry(struct perf_session *self,
33 return -ENOMEM; 33 return -ENOMEM;
34 34
35 if (hit) 35 if (hit)
36 he->count += count; 36 __perf_session__add_count(he, al, count);
37 37
38 return 0; 38 return 0;
39} 39}
@@ -188,7 +188,7 @@ static const char * const diff_usage[] = {
188}; 188};
189 189
190static const struct option options[] = { 190static const struct option options[] = {
191 OPT_BOOLEAN('v', "verbose", &verbose, 191 OPT_INCR('v', "verbose", &verbose,
192 "be more verbose (show symbol address, etc)"), 192 "be more verbose (show symbol address, etc)"),
193 OPT_BOOLEAN('m', "displacement", &show_displacement, 193 OPT_BOOLEAN('m', "displacement", &show_displacement,
194 "Show position displacement relative to baseline"), 194 "Show position displacement relative to baseline"),
@@ -225,6 +225,10 @@ int cmd_diff(int argc, const char **argv, const char *prefix __used)
225 input_new = argv[1]; 225 input_new = argv[1];
226 } else 226 } else
227 input_new = argv[0]; 227 input_new = argv[0];
228 } else if (symbol_conf.default_guest_vmlinux_name ||
229 symbol_conf.default_guest_kallsyms) {
230 input_old = "perf.data.host";
231 input_new = "perf.data.guest";
228 } 232 }
229 233
230 symbol_conf.exclude_other = false; 234 symbol_conf.exclude_other = false;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 215b584007b1..81e3ecc40fc7 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -29,7 +29,7 @@ enum help_format {
29 HELP_FORMAT_WEB, 29 HELP_FORMAT_WEB,
30}; 30};
31 31
32static int show_all = 0; 32static bool show_all = false;
33static enum help_format help_format = HELP_FORMAT_MAN; 33static enum help_format help_format = HELP_FORMAT_MAN;
34static struct option builtin_help_options[] = { 34static struct option builtin_help_options[] = {
35 OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), 35 OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 924a9518931a..db474bbf3322 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -351,6 +351,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
351 int n_lines, int is_caller) 351 int n_lines, int is_caller)
352{ 352{
353 struct rb_node *next; 353 struct rb_node *next;
354 struct kernel_info *kerninfo;
354 355
355 printf("%.102s\n", graph_dotted_line); 356 printf("%.102s\n", graph_dotted_line);
356 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); 357 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
@@ -359,23 +360,31 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
359 360
360 next = rb_first(root); 361 next = rb_first(root);
361 362
363 kerninfo = kerninfo__findhost(&session->kerninfo_root);
364 if (!kerninfo) {
365 pr_err("__print_result: couldn't find kernel information\n");
366 return;
367 }
362 while (next && n_lines--) { 368 while (next && n_lines--) {
363 struct alloc_stat *data = rb_entry(next, struct alloc_stat, 369 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
364 node); 370 node);
365 struct symbol *sym = NULL; 371 struct symbol *sym = NULL;
372 struct map_groups *kmaps = &kerninfo->kmaps;
373 struct map *map;
366 char buf[BUFSIZ]; 374 char buf[BUFSIZ];
367 u64 addr; 375 u64 addr;
368 376
369 if (is_caller) { 377 if (is_caller) {
370 addr = data->call_site; 378 addr = data->call_site;
371 if (!raw_ip) 379 if (!raw_ip)
372 sym = map_groups__find_function(&session->kmaps, addr, NULL); 380 sym = map_groups__find_function(kmaps, addr,
381 &map, NULL);
373 } else 382 } else
374 addr = data->ptr; 383 addr = data->ptr;
375 384
376 if (sym != NULL) 385 if (sym != NULL)
377 snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, 386 snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
378 addr - sym->start); 387 addr - map->unmap_ip(map, sym->start));
379 else 388 else
380 snprintf(buf, sizeof(buf), "%#Lx", addr); 389 snprintf(buf, sizeof(buf), "%#Lx", addr);
381 printf(" %-34s |", buf); 390 printf(" %-34s |", buf);
@@ -488,6 +497,9 @@ static int __cmd_kmem(void)
488 if (session == NULL) 497 if (session == NULL)
489 return -ENOMEM; 498 return -ENOMEM;
490 499
500 if (perf_session__create_kernel_maps(session) < 0)
501 goto out_delete;
502
491 if (!perf_session__has_traces(session, "kmem record")) 503 if (!perf_session__has_traces(session, "kmem record"))
492 goto out_delete; 504 goto out_delete;
493 505
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
new file mode 100644
index 000000000000..a4c7cae45024
--- /dev/null
+++ b/tools/perf/builtin-kvm.c
@@ -0,0 +1,144 @@
1#include "builtin.h"
2#include "perf.h"
3
4#include "util/util.h"
5#include "util/cache.h"
6#include "util/symbol.h"
7#include "util/thread.h"
8#include "util/header.h"
9#include "util/session.h"
10
11#include "util/parse-options.h"
12#include "util/trace-event.h"
13
14#include "util/debug.h"
15
16#include <sys/prctl.h>
17
18#include <semaphore.h>
19#include <pthread.h>
20#include <math.h>
21
22static char *file_name;
23static char name_buffer[256];
24
25int perf_host = 1;
26int perf_guest;
27
28static const char * const kvm_usage[] = {
29 "perf kvm [<options>] {top|record|report|diff|buildid-list}",
30 NULL
31};
32
33static const struct option kvm_options[] = {
34 OPT_STRING('i', "input", &file_name, "file",
35 "Input file name"),
36 OPT_STRING('o', "output", &file_name, "file",
37 "Output file name"),
38 OPT_BOOLEAN(0, "guest", &perf_guest,
39 "Collect guest os data"),
40 OPT_BOOLEAN(0, "host", &perf_host,
41 "Collect guest os data"),
42 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
43 "guest mount directory under which every guest os"
44 " instance has a subdir"),
45 OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
46 "file", "file saving guest os vmlinux"),
47 OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
48 "file", "file saving guest os /proc/kallsyms"),
49 OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
50 "file", "file saving guest os /proc/modules"),
51 OPT_END()
52};
53
54static int __cmd_record(int argc, const char **argv)
55{
56 int rec_argc, i = 0, j;
57 const char **rec_argv;
58
59 rec_argc = argc + 2;
60 rec_argv = calloc(rec_argc + 1, sizeof(char *));
61 rec_argv[i++] = strdup("record");
62 rec_argv[i++] = strdup("-o");
63 rec_argv[i++] = strdup(file_name);
64 for (j = 1; j < argc; j++, i++)
65 rec_argv[i] = argv[j];
66
67 BUG_ON(i != rec_argc);
68
69 return cmd_record(i, rec_argv, NULL);
70}
71
72static int __cmd_report(int argc, const char **argv)
73{
74 int rec_argc, i = 0, j;
75 const char **rec_argv;
76
77 rec_argc = argc + 2;
78 rec_argv = calloc(rec_argc + 1, sizeof(char *));
79 rec_argv[i++] = strdup("report");
80 rec_argv[i++] = strdup("-i");
81 rec_argv[i++] = strdup(file_name);
82 for (j = 1; j < argc; j++, i++)
83 rec_argv[i] = argv[j];
84
85 BUG_ON(i != rec_argc);
86
87 return cmd_report(i, rec_argv, NULL);
88}
89
90static int __cmd_buildid_list(int argc, const char **argv)
91{
92 int rec_argc, i = 0, j;
93 const char **rec_argv;
94
95 rec_argc = argc + 2;
96 rec_argv = calloc(rec_argc + 1, sizeof(char *));
97 rec_argv[i++] = strdup("buildid-list");
98 rec_argv[i++] = strdup("-i");
99 rec_argv[i++] = strdup(file_name);
100 for (j = 1; j < argc; j++, i++)
101 rec_argv[i] = argv[j];
102
103 BUG_ON(i != rec_argc);
104
105 return cmd_buildid_list(i, rec_argv, NULL);
106}
107
108int cmd_kvm(int argc, const char **argv, const char *prefix __used)
109{
110 perf_host = perf_guest = 0;
111
112 argc = parse_options(argc, argv, kvm_options, kvm_usage,
113 PARSE_OPT_STOP_AT_NON_OPTION);
114 if (!argc)
115 usage_with_options(kvm_usage, kvm_options);
116
117 if (!perf_host)
118 perf_guest = 1;
119
120 if (!file_name) {
121 if (perf_host && !perf_guest)
122 sprintf(name_buffer, "perf.data.host");
123 else if (!perf_host && perf_guest)
124 sprintf(name_buffer, "perf.data.guest");
125 else
126 sprintf(name_buffer, "perf.data.kvm");
127 file_name = name_buffer;
128 }
129
130 if (!strncmp(argv[0], "rec", 3))
131 return __cmd_record(argc, argv);
132 else if (!strncmp(argv[0], "rep", 3))
133 return __cmd_report(argc, argv);
134 else if (!strncmp(argv[0], "diff", 4))
135 return cmd_diff(argc, argv, NULL);
136 else if (!strncmp(argv[0], "top", 3))
137 return cmd_top(argc, argv, NULL);
138 else if (!strncmp(argv[0], "buildid-list", 12))
139 return __cmd_buildid_list(argc, argv);
140 else
141 usage_with_options(kvm_usage, kvm_options);
142
143 return 0;
144}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index e12c844df1e2..6c38e4febf9f 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -744,7 +744,7 @@ static const char * const lock_usage[] = {
744 744
745static const struct option lock_options[] = { 745static const struct option lock_options[] = {
746 OPT_STRING('i', "input", &input_name, "file", "input file name"), 746 OPT_STRING('i', "input", &input_name, "file", "input file name"),
747 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), 747 OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
748 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), 748 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
749 OPT_END() 749 OPT_END()
750}; 750};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 152d6c9b1fa4..c1e54035e8cf 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -36,13 +36,10 @@
36#include "builtin.h" 36#include "builtin.h"
37#include "util/util.h" 37#include "util/util.h"
38#include "util/strlist.h" 38#include "util/strlist.h"
39#include "util/event.h" 39#include "util/symbol.h"
40#include "util/debug.h" 40#include "util/debug.h"
41#include "util/debugfs.h" 41#include "util/debugfs.h"
42#include "util/symbol.h"
43#include "util/thread.h"
44#include "util/parse-options.h" 42#include "util/parse-options.h"
45#include "util/parse-events.h" /* For debugfs_path */
46#include "util/probe-finder.h" 43#include "util/probe-finder.h"
47#include "util/probe-event.h" 44#include "util/probe-event.h"
48 45
@@ -50,103 +47,83 @@
50 47
51/* Session management structure */ 48/* Session management structure */
52static struct { 49static struct {
53 bool need_dwarf;
54 bool list_events; 50 bool list_events;
55 bool force_add; 51 bool force_add;
56 bool show_lines; 52 bool show_lines;
57 int nr_probe; 53 int nevents;
58 struct probe_point probes[MAX_PROBES]; 54 struct perf_probe_event events[MAX_PROBES];
59 struct strlist *dellist; 55 struct strlist *dellist;
60 struct map_groups kmap_groups;
61 struct map *kmaps[MAP__NR_TYPES];
62 struct line_range line_range; 56 struct line_range line_range;
63} session; 57} params;
64 58
65 59
66/* Parse an event definition. Note that any error must die. */ 60/* Parse an event definition. Note that any error must die. */
67static void parse_probe_event(const char *str) 61static int parse_probe_event(const char *str)
68{ 62{
69 struct probe_point *pp = &session.probes[session.nr_probe]; 63 struct perf_probe_event *pev = &params.events[params.nevents];
64 int ret;
70 65
71 pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); 66 pr_debug("probe-definition(%d): %s\n", params.nevents, str);
72 if (++session.nr_probe == MAX_PROBES) 67 if (++params.nevents == MAX_PROBES)
73 die("Too many probes (> %d) are specified.", MAX_PROBES); 68 die("Too many probes (> %d) are specified.", MAX_PROBES);
74 69
75 /* Parse perf-probe event into probe_point */ 70 /* Parse a perf-probe command into event */
76 parse_perf_probe_event(str, pp, &session.need_dwarf); 71 ret = parse_perf_probe_command(str, pev);
72 pr_debug("%d arguments\n", pev->nargs);
77 73
78 pr_debug("%d arguments\n", pp->nr_args); 74 return ret;
79} 75}
80 76
81static void parse_probe_event_argv(int argc, const char **argv) 77static int parse_probe_event_argv(int argc, const char **argv)
82{ 78{
83 int i, len; 79 int i, len, ret;
84 char *buf; 80 char *buf;
85 81
86 /* Bind up rest arguments */ 82 /* Bind up rest arguments */
87 len = 0; 83 len = 0;
88 for (i = 0; i < argc; i++) 84 for (i = 0; i < argc; i++)
89 len += strlen(argv[i]) + 1; 85 len += strlen(argv[i]) + 1;
90 buf = zalloc(len + 1); 86 buf = xzalloc(len + 1);
91 if (!buf)
92 die("Failed to allocate memory for binding arguments.");
93 len = 0; 87 len = 0;
94 for (i = 0; i < argc; i++) 88 for (i = 0; i < argc; i++)
95 len += sprintf(&buf[len], "%s ", argv[i]); 89 len += sprintf(&buf[len], "%s ", argv[i]);
96 parse_probe_event(buf); 90 ret = parse_probe_event(buf);
97 free(buf); 91 free(buf);
92 return ret;
98} 93}
99 94
100static int opt_add_probe_event(const struct option *opt __used, 95static int opt_add_probe_event(const struct option *opt __used,
101 const char *str, int unset __used) 96 const char *str, int unset __used)
102{ 97{
103 if (str) 98 if (str)
104 parse_probe_event(str); 99 return parse_probe_event(str);
105 return 0; 100 else
101 return 0;
106} 102}
107 103
108static int opt_del_probe_event(const struct option *opt __used, 104static int opt_del_probe_event(const struct option *opt __used,
109 const char *str, int unset __used) 105 const char *str, int unset __used)
110{ 106{
111 if (str) { 107 if (str) {
112 if (!session.dellist) 108 if (!params.dellist)
113 session.dellist = strlist__new(true, NULL); 109 params.dellist = strlist__new(true, NULL);
114 strlist__add(session.dellist, str); 110 strlist__add(params.dellist, str);
115 } 111 }
116 return 0; 112 return 0;
117} 113}
118 114
119/* Currently just checking function name from symbol map */ 115#ifdef DWARF_SUPPORT
120static void evaluate_probe_point(struct probe_point *pp)
121{
122 struct symbol *sym;
123 sym = map__find_symbol_by_name(session.kmaps[MAP__FUNCTION],
124 pp->function, NULL);
125 if (!sym)
126 die("Kernel symbol \'%s\' not found - probe not added.",
127 pp->function);
128}
129
130#ifndef NO_DWARF_SUPPORT
131static int open_vmlinux(void)
132{
133 if (map__load(session.kmaps[MAP__FUNCTION], NULL) < 0) {
134 pr_debug("Failed to load kernel map.\n");
135 return -EINVAL;
136 }
137 pr_debug("Try to open %s\n",
138 session.kmaps[MAP__FUNCTION]->dso->long_name);
139 return open(session.kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
140}
141
142static int opt_show_lines(const struct option *opt __used, 116static int opt_show_lines(const struct option *opt __used,
143 const char *str, int unset __used) 117 const char *str, int unset __used)
144{ 118{
119 int ret = 0;
120
145 if (str) 121 if (str)
146 parse_line_range_desc(str, &session.line_range); 122 ret = parse_line_range_desc(str, &params.line_range);
147 INIT_LIST_HEAD(&session.line_range.line_list); 123 INIT_LIST_HEAD(&params.line_range.line_list);
148 session.show_lines = true; 124 params.show_lines = true;
149 return 0; 125
126 return ret;
150} 127}
151#endif 128#endif
152 129
@@ -155,29 +132,25 @@ static const char * const probe_usage[] = {
155 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", 132 "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
156 "perf probe [<options>] --del '[GROUP:]EVENT' ...", 133 "perf probe [<options>] --del '[GROUP:]EVENT' ...",
157 "perf probe --list", 134 "perf probe --list",
158#ifndef NO_DWARF_SUPPORT 135#ifdef DWARF_SUPPORT
159 "perf probe --line 'LINEDESC'", 136 "perf probe --line 'LINEDESC'",
160#endif 137#endif
161 NULL 138 NULL
162}; 139};
163 140
164static const struct option options[] = { 141static const struct option options[] = {
165 OPT_BOOLEAN('v', "verbose", &verbose, 142 OPT_INCR('v', "verbose", &verbose,
166 "be more verbose (show parsed arguments, etc)"), 143 "be more verbose (show parsed arguments, etc)"),
167#ifndef NO_DWARF_SUPPORT 144 OPT_BOOLEAN('l', "list", &params.list_events,
168 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
169 "file", "vmlinux pathname"),
170#endif
171 OPT_BOOLEAN('l', "list", &session.list_events,
172 "list up current probe events"), 145 "list up current probe events"),
173 OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", 146 OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
174 opt_del_probe_event), 147 opt_del_probe_event),
175 OPT_CALLBACK('a', "add", NULL, 148 OPT_CALLBACK('a', "add", NULL,
176#ifdef NO_DWARF_SUPPORT 149#ifdef DWARF_SUPPORT
177 "[EVENT=]FUNC[+OFF|%return] [ARG ...]",
178#else
179 "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT" 150 "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
180 " [ARG ...]", 151 " [[NAME=]ARG ...]",
152#else
153 "[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]",
181#endif 154#endif
182 "probe point definition, where\n" 155 "probe point definition, where\n"
183 "\t\tGROUP:\tGroup name (optional)\n" 156 "\t\tGROUP:\tGroup name (optional)\n"
@@ -185,51 +158,33 @@ static const struct option options[] = {
185 "\t\tFUNC:\tFunction name\n" 158 "\t\tFUNC:\tFunction name\n"
186 "\t\tOFF:\tOffset from function entry (in byte)\n" 159 "\t\tOFF:\tOffset from function entry (in byte)\n"
187 "\t\t%return:\tPut the probe at function return\n" 160 "\t\t%return:\tPut the probe at function return\n"
188#ifdef NO_DWARF_SUPPORT 161#ifdef DWARF_SUPPORT
189 "\t\tARG:\tProbe argument (only \n"
190#else
191 "\t\tSRC:\tSource code path\n" 162 "\t\tSRC:\tSource code path\n"
192 "\t\tRL:\tRelative line number from function entry.\n" 163 "\t\tRL:\tRelative line number from function entry.\n"
193 "\t\tAL:\tAbsolute line number in file.\n" 164 "\t\tAL:\tAbsolute line number in file.\n"
194 "\t\tPT:\tLazy expression of line code.\n" 165 "\t\tPT:\tLazy expression of line code.\n"
195 "\t\tARG:\tProbe argument (local variable name or\n" 166 "\t\tARG:\tProbe argument (local variable name or\n"
196#endif
197 "\t\t\tkprobe-tracer argument format.)\n", 167 "\t\t\tkprobe-tracer argument format.)\n",
168#else
169 "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
170#endif
198 opt_add_probe_event), 171 opt_add_probe_event),
199 OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events" 172 OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events"
200 " with existing name"), 173 " with existing name"),
201#ifndef NO_DWARF_SUPPORT 174#ifdef DWARF_SUPPORT
202 OPT_CALLBACK('L', "line", NULL, 175 OPT_CALLBACK('L', "line", NULL,
203 "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]", 176 "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
204 "Show source code lines.", opt_show_lines), 177 "Show source code lines.", opt_show_lines),
178 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
179 "file", "vmlinux pathname"),
205#endif 180#endif
181 OPT__DRY_RUN(&probe_event_dry_run),
206 OPT_END() 182 OPT_END()
207}; 183};
208 184
209/* Initialize symbol maps for vmlinux */
210static void init_vmlinux(void)
211{
212 symbol_conf.sort_by_name = true;
213 if (symbol_conf.vmlinux_name == NULL)
214 symbol_conf.try_vmlinux_path = true;
215 else
216 pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
217 if (symbol__init() < 0)
218 die("Failed to init symbol map.");
219
220 map_groups__init(&session.kmap_groups);
221 if (map_groups__create_kernel_maps(&session.kmap_groups,
222 session.kmaps) < 0)
223 die("Failed to create kernel maps.");
224}
225
226int cmd_probe(int argc, const char **argv, const char *prefix __used) 185int cmd_probe(int argc, const char **argv, const char *prefix __used)
227{ 186{
228 int i, ret; 187 int ret;
229#ifndef NO_DWARF_SUPPORT
230 int fd;
231#endif
232 struct probe_point *pp;
233 188
234 argc = parse_options(argc, argv, options, probe_usage, 189 argc = parse_options(argc, argv, options, probe_usage,
235 PARSE_OPT_STOP_AT_NON_OPTION); 190 PARSE_OPT_STOP_AT_NON_OPTION);
@@ -238,123 +193,65 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
238 pr_warning(" Error: '-' is not supported.\n"); 193 pr_warning(" Error: '-' is not supported.\n");
239 usage_with_options(probe_usage, options); 194 usage_with_options(probe_usage, options);
240 } 195 }
241 parse_probe_event_argv(argc, argv); 196 ret = parse_probe_event_argv(argc, argv);
197 if (ret < 0) {
198 pr_err(" Error: Parse Error. (%d)\n", ret);
199 return ret;
200 }
242 } 201 }
243 202
244 if ((!session.nr_probe && !session.dellist && !session.list_events && 203 if ((!params.nevents && !params.dellist && !params.list_events &&
245 !session.show_lines)) 204 !params.show_lines))
246 usage_with_options(probe_usage, options); 205 usage_with_options(probe_usage, options);
247 206
248 if (debugfs_valid_mountpoint(debugfs_path) < 0) 207 if (params.list_events) {
249 die("Failed to find debugfs path."); 208 if (params.nevents != 0 || params.dellist) {
250 209 pr_err(" Error: Don't use --list with --add/--del.\n");
251 if (session.list_events) {
252 if (session.nr_probe != 0 || session.dellist) {
253 pr_warning(" Error: Don't use --list with"
254 " --add/--del.\n");
255 usage_with_options(probe_usage, options); 210 usage_with_options(probe_usage, options);
256 } 211 }
257 if (session.show_lines) { 212 if (params.show_lines) {
258 pr_warning(" Error: Don't use --list with --line.\n"); 213 pr_err(" Error: Don't use --list with --line.\n");
259 usage_with_options(probe_usage, options); 214 usage_with_options(probe_usage, options);
260 } 215 }
261 show_perf_probe_events(); 216 ret = show_perf_probe_events();
262 return 0; 217 if (ret < 0)
218 pr_err(" Error: Failed to show event list. (%d)\n",
219 ret);
220 return ret;
263 } 221 }
264 222
265#ifndef NO_DWARF_SUPPORT 223#ifdef DWARF_SUPPORT
266 if (session.show_lines) { 224 if (params.show_lines) {
267 if (session.nr_probe != 0 || session.dellist) { 225 if (params.nevents != 0 || params.dellist) {
268 pr_warning(" Error: Don't use --line with" 226 pr_warning(" Error: Don't use --line with"
269 " --add/--del.\n"); 227 " --add/--del.\n");
270 usage_with_options(probe_usage, options); 228 usage_with_options(probe_usage, options);
271 } 229 }
272 init_vmlinux();
273 fd = open_vmlinux();
274 if (fd < 0)
275 die("Could not open debuginfo file.");
276 ret = find_line_range(fd, &session.line_range);
277 if (ret <= 0)
278 die("Source line is not found.\n");
279 close(fd);
280 show_line_range(&session.line_range);
281 return 0;
282 }
283#endif
284 230
285 if (session.dellist) { 231 ret = show_line_range(&params.line_range);
286 del_trace_kprobe_events(session.dellist); 232 if (ret < 0)
287 strlist__delete(session.dellist); 233 pr_err(" Error: Failed to show lines. (%d)\n", ret);
288 if (session.nr_probe == 0) 234 return ret;
289 return 0;
290 } 235 }
236#endif
291 237
292 /* Add probes */ 238 if (params.dellist) {
293 init_vmlinux(); 239 ret = del_perf_probe_events(params.dellist);
294 240 strlist__delete(params.dellist);
295 if (session.need_dwarf) 241 if (ret < 0) {
296#ifdef NO_DWARF_SUPPORT 242 pr_err(" Error: Failed to delete events. (%d)\n", ret);
297 die("Debuginfo-analysis is not supported"); 243 return ret;
298#else /* !NO_DWARF_SUPPORT */
299 pr_debug("Some probes require debuginfo.\n");
300
301 fd = open_vmlinux();
302 if (fd < 0) {
303 if (session.need_dwarf)
304 die("Could not open debuginfo file.");
305
306 pr_debug("Could not open vmlinux/module file."
307 " Try to use symbols.\n");
308 goto end_dwarf;
309 }
310
311 /* Searching probe points */
312 for (i = 0; i < session.nr_probe; i++) {
313 pp = &session.probes[i];
314 if (pp->found)
315 continue;
316
317 lseek(fd, SEEK_SET, 0);
318 ret = find_probe_point(fd, pp);
319 if (ret > 0)
320 continue;
321 if (ret == 0) { /* No error but failed to find probe point. */
322 synthesize_perf_probe_point(pp);
323 die("Probe point '%s' not found. - probe not added.",
324 pp->probes[0]);
325 }
326 /* Error path */
327 if (session.need_dwarf) {
328 if (ret == -ENOENT)
329 pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO=y.\n");
330 die("Could not analyze debuginfo.");
331 } 244 }
332 pr_debug("An error occurred in debuginfo analysis."
333 " Try to use symbols.\n");
334 break;
335 } 245 }
336 close(fd);
337
338end_dwarf:
339#endif /* !NO_DWARF_SUPPORT */
340 246
341 /* Synthesize probes without dwarf */ 247 if (params.nevents) {
342 for (i = 0; i < session.nr_probe; i++) { 248 ret = add_perf_probe_events(params.events, params.nevents,
343 pp = &session.probes[i]; 249 params.force_add);
344 if (pp->found) /* This probe is already found. */ 250 if (ret < 0) {
345 continue; 251 pr_err(" Error: Failed to add events. (%d)\n", ret);
346 252 return ret;
347 evaluate_probe_point(pp); 253 }
348 ret = synthesize_trace_kprobe_event(pp);
349 if (ret == -E2BIG)
350 die("probe point definition becomes too long.");
351 else if (ret < 0)
352 die("Failed to synthesize a probe point.");
353 } 254 }
354
355 /* Settng up probe points */
356 add_trace_kprobe_events(session.probes, session.nr_probe,
357 session.force_add);
358 return 0; 255 return 0;
359} 256}
360 257
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3b8b6387c47c..27f992aca8b5 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -15,7 +15,6 @@
15#include "util/util.h" 15#include "util/util.h"
16#include "util/parse-options.h" 16#include "util/parse-options.h"
17#include "util/parse-events.h" 17#include "util/parse-events.h"
18#include "util/string.h"
19 18
20#include "util/header.h" 19#include "util/header.h"
21#include "util/event.h" 20#include "util/event.h"
@@ -27,31 +26,41 @@
27#include <unistd.h> 26#include <unistd.h>
28#include <sched.h> 27#include <sched.h>
29 28
30static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 29enum write_mode_t {
30 WRITE_FORCE,
31 WRITE_APPEND
32};
33
34static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
31 35
36static unsigned int user_interval = UINT_MAX;
32static long default_interval = 0; 37static long default_interval = 0;
33 38
34static int nr_cpus = 0; 39static int nr_cpus = 0;
35static unsigned int page_size; 40static unsigned int page_size;
36static unsigned int mmap_pages = 128; 41static unsigned int mmap_pages = 128;
42static unsigned int user_freq = UINT_MAX;
37static int freq = 1000; 43static int freq = 1000;
38static int output; 44static int output;
45static int pipe_output = 0;
39static const char *output_name = "perf.data"; 46static const char *output_name = "perf.data";
40static int group = 0; 47static int group = 0;
41static unsigned int realtime_prio = 0; 48static unsigned int realtime_prio = 0;
42static int raw_samples = 0; 49static bool raw_samples = false;
43static int system_wide = 0; 50static bool system_wide = false;
44static int profile_cpu = -1; 51static int profile_cpu = -1;
45static pid_t target_pid = -1; 52static pid_t target_pid = -1;
53static pid_t target_tid = -1;
54static pid_t *all_tids = NULL;
55static int thread_num = 0;
46static pid_t child_pid = -1; 56static pid_t child_pid = -1;
47static int inherit = 1; 57static bool inherit = true;
48static int force = 0; 58static enum write_mode_t write_mode = WRITE_FORCE;
49static int append_file = 0; 59static bool call_graph = false;
50static int call_graph = 0; 60static bool inherit_stat = false;
51static int inherit_stat = 0; 61static bool no_samples = false;
52static int no_samples = 0; 62static bool sample_address = false;
53static int sample_address = 0; 63static bool multiplex = false;
54static int multiplex = 0;
55static int multiplex_fd = -1; 64static int multiplex_fd = -1;
56 65
57static long samples = 0; 66static long samples = 0;
@@ -60,7 +69,7 @@ static struct timeval this_read;
60 69
61static u64 bytes_written = 0; 70static u64 bytes_written = 0;
62 71
63static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 72static struct pollfd *event_array;
64 73
65static int nr_poll = 0; 74static int nr_poll = 0;
66static int nr_cpu = 0; 75static int nr_cpu = 0;
@@ -77,7 +86,7 @@ struct mmap_data {
77 unsigned int prev; 86 unsigned int prev;
78}; 87};
79 88
80static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 89static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
81 90
82static unsigned long mmap_read_head(struct mmap_data *md) 91static unsigned long mmap_read_head(struct mmap_data *md)
83{ 92{
@@ -101,6 +110,11 @@ static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
101 pc->data_tail = tail; 110 pc->data_tail = tail;
102} 111}
103 112
113static void advance_output(size_t size)
114{
115 bytes_written += size;
116}
117
104static void write_output(void *buf, size_t size) 118static void write_output(void *buf, size_t size)
105{ 119{
106 while (size) { 120 while (size) {
@@ -225,12 +239,13 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
225 return h_attr; 239 return h_attr;
226} 240}
227 241
228static void create_counter(int counter, int cpu, pid_t pid) 242static void create_counter(int counter, int cpu)
229{ 243{
230 char *filter = filters[counter]; 244 char *filter = filters[counter];
231 struct perf_event_attr *attr = attrs + counter; 245 struct perf_event_attr *attr = attrs + counter;
232 struct perf_header_attr *h_attr; 246 struct perf_header_attr *h_attr;
233 int track = !counter; /* only the first counter needs these */ 247 int track = !counter; /* only the first counter needs these */
248 int thread_index;
234 int ret; 249 int ret;
235 struct { 250 struct {
236 u64 count; 251 u64 count;
@@ -248,10 +263,19 @@ static void create_counter(int counter, int cpu, pid_t pid)
248 if (nr_counters > 1) 263 if (nr_counters > 1)
249 attr->sample_type |= PERF_SAMPLE_ID; 264 attr->sample_type |= PERF_SAMPLE_ID;
250 265
251 if (freq) { 266 /*
252 attr->sample_type |= PERF_SAMPLE_PERIOD; 267 * We default some events to a 1 default interval. But keep
253 attr->freq = 1; 268 * it a weak assumption overridable by the user.
254 attr->sample_freq = freq; 269 */
270 if (!attr->sample_period || (user_freq != UINT_MAX &&
271 user_interval != UINT_MAX)) {
272 if (freq) {
273 attr->sample_type |= PERF_SAMPLE_PERIOD;
274 attr->freq = 1;
275 attr->sample_freq = freq;
276 } else {
277 attr->sample_period = default_interval;
278 }
255 } 279 }
256 280
257 if (no_samples) 281 if (no_samples)
@@ -275,118 +299,129 @@ static void create_counter(int counter, int cpu, pid_t pid)
275 attr->mmap = track; 299 attr->mmap = track;
276 attr->comm = track; 300 attr->comm = track;
277 attr->inherit = inherit; 301 attr->inherit = inherit;
278 attr->disabled = 1; 302 if (target_pid == -1 && !system_wide) {
303 attr->disabled = 1;
304 attr->enable_on_exec = 1;
305 }
279 306
307 for (thread_index = 0; thread_index < thread_num; thread_index++) {
280try_again: 308try_again:
281 fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); 309 fd[nr_cpu][counter][thread_index] = sys_perf_event_open(attr,
282 310 all_tids[thread_index], cpu, group_fd, 0);
283 if (fd[nr_cpu][counter] < 0) { 311
284 int err = errno; 312 if (fd[nr_cpu][counter][thread_index] < 0) {
285 313 int err = errno;
286 if (err == EPERM || err == EACCES) 314
287 die("Permission error - are you root?\n"); 315 if (err == EPERM || err == EACCES)
288 else if (err == ENODEV && profile_cpu != -1) 316 die("Permission error - are you root?\n"
289 die("No such device - did you specify an out-of-range profile CPU?\n"); 317 "\t Consider tweaking"
318 " /proc/sys/kernel/perf_event_paranoid.\n");
319 else if (err == ENODEV && profile_cpu != -1) {
320 die("No such device - did you specify"
321 " an out-of-range profile CPU?\n");
322 }
290 323
291 /* 324 /*
292 * If it's cycles then fall back to hrtimer 325 * If it's cycles then fall back to hrtimer
293 * based cpu-clock-tick sw counter, which 326 * based cpu-clock-tick sw counter, which
294 * is always available even if no PMU support: 327 * is always available even if no PMU support:
295 */ 328 */
296 if (attr->type == PERF_TYPE_HARDWARE 329 if (attr->type == PERF_TYPE_HARDWARE
297 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 330 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
298 331
299 if (verbose) 332 if (verbose)
300 warning(" ... trying to fall back to cpu-clock-ticks\n"); 333 warning(" ... trying to fall back to cpu-clock-ticks\n");
301 attr->type = PERF_TYPE_SOFTWARE; 334 attr->type = PERF_TYPE_SOFTWARE;
302 attr->config = PERF_COUNT_SW_CPU_CLOCK; 335 attr->config = PERF_COUNT_SW_CPU_CLOCK;
303 goto try_again; 336 goto try_again;
304 } 337 }
305 printf("\n"); 338 printf("\n");
306 error("perfcounter syscall returned with %d (%s)\n", 339 error("perfcounter syscall returned with %d (%s)\n",
307 fd[nr_cpu][counter], strerror(err)); 340 fd[nr_cpu][counter][thread_index], strerror(err));
308 341
309#if defined(__i386__) || defined(__x86_64__) 342#if defined(__i386__) || defined(__x86_64__)
310 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) 343 if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
311 die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); 344 die("No hardware sampling interrupt available."
345 " No APIC? If so then you can boot the kernel"
346 " with the \"lapic\" boot parameter to"
347 " force-enable it.\n");
312#endif 348#endif
313 349
314 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 350 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
315 exit(-1); 351 exit(-1);
316 } 352 }
317 353
318 h_attr = get_header_attr(attr, counter); 354 h_attr = get_header_attr(attr, counter);
319 if (h_attr == NULL) 355 if (h_attr == NULL)
320 die("nomem\n"); 356 die("nomem\n");
321 357
322 if (!file_new) { 358 if (!file_new) {
323 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { 359 if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
324 fprintf(stderr, "incompatible append\n"); 360 fprintf(stderr, "incompatible append\n");
325 exit(-1); 361 exit(-1);
362 }
326 } 363 }
327 }
328 364
329 if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { 365 if (read(fd[nr_cpu][counter][thread_index], &read_data, sizeof(read_data)) == -1) {
330 perror("Unable to read perf file descriptor\n"); 366 perror("Unable to read perf file descriptor\n");
331 exit(-1); 367 exit(-1);
332 } 368 }
333 369
334 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { 370 if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
335 pr_warning("Not enough memory to add id\n"); 371 pr_warning("Not enough memory to add id\n");
336 exit(-1); 372 exit(-1);
337 } 373 }
338 374
339 assert(fd[nr_cpu][counter] >= 0); 375 assert(fd[nr_cpu][counter][thread_index] >= 0);
340 fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); 376 fcntl(fd[nr_cpu][counter][thread_index], F_SETFL, O_NONBLOCK);
341 377
342 /* 378 /*
343 * First counter acts as the group leader: 379 * First counter acts as the group leader:
344 */ 380 */
345 if (group && group_fd == -1) 381 if (group && group_fd == -1)
346 group_fd = fd[nr_cpu][counter]; 382 group_fd = fd[nr_cpu][counter][thread_index];
347 if (multiplex && multiplex_fd == -1) 383 if (multiplex && multiplex_fd == -1)
348 multiplex_fd = fd[nr_cpu][counter]; 384 multiplex_fd = fd[nr_cpu][counter][thread_index];
349 385
350 if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { 386 if (multiplex && fd[nr_cpu][counter][thread_index] != multiplex_fd) {
351 387
352 ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); 388 ret = ioctl(fd[nr_cpu][counter][thread_index], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
353 assert(ret != -1); 389 assert(ret != -1);
354 } else { 390 } else {
355 event_array[nr_poll].fd = fd[nr_cpu][counter]; 391 event_array[nr_poll].fd = fd[nr_cpu][counter][thread_index];
356 event_array[nr_poll].events = POLLIN; 392 event_array[nr_poll].events = POLLIN;
357 nr_poll++; 393 nr_poll++;
358 394
359 mmap_array[nr_cpu][counter].counter = counter; 395 mmap_array[nr_cpu][counter][thread_index].counter = counter;
360 mmap_array[nr_cpu][counter].prev = 0; 396 mmap_array[nr_cpu][counter][thread_index].prev = 0;
361 mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; 397 mmap_array[nr_cpu][counter][thread_index].mask = mmap_pages*page_size - 1;
362 mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, 398 mmap_array[nr_cpu][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
363 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); 399 PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter][thread_index], 0);
364 if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { 400 if (mmap_array[nr_cpu][counter][thread_index].base == MAP_FAILED) {
365 error("failed to mmap with %d (%s)\n", errno, strerror(errno)); 401 error("failed to mmap with %d (%s)\n", errno, strerror(errno));
366 exit(-1); 402 exit(-1);
403 }
367 } 404 }
368 }
369 405
370 if (filter != NULL) { 406 if (filter != NULL) {
371 ret = ioctl(fd[nr_cpu][counter], 407 ret = ioctl(fd[nr_cpu][counter][thread_index],
372 PERF_EVENT_IOC_SET_FILTER, filter); 408 PERF_EVENT_IOC_SET_FILTER, filter);
373 if (ret) { 409 if (ret) {
374 error("failed to set filter with %d (%s)\n", errno, 410 error("failed to set filter with %d (%s)\n", errno,
375 strerror(errno)); 411 strerror(errno));
376 exit(-1); 412 exit(-1);
413 }
377 } 414 }
378 } 415 }
379
380 ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
381} 416}
382 417
383static void open_counters(int cpu, pid_t pid) 418static void open_counters(int cpu)
384{ 419{
385 int counter; 420 int counter;
386 421
387 group_fd = -1; 422 group_fd = -1;
388 for (counter = 0; counter < nr_counters; counter++) 423 for (counter = 0; counter < nr_counters; counter++)
389 create_counter(counter, cpu, pid); 424 create_counter(counter, cpu);
390 425
391 nr_cpu++; 426 nr_cpu++;
392} 427}
@@ -406,10 +441,65 @@ static int process_buildids(void)
406 441
407static void atexit_header(void) 442static void atexit_header(void)
408{ 443{
409 session->header.data_size += bytes_written; 444 if (!pipe_output) {
445 session->header.data_size += bytes_written;
446
447 process_buildids();
448 perf_header__write(&session->header, output, true);
449 } else {
450 int err;
451
452 err = event__synthesize_build_ids(process_synthesized_event,
453 session);
454 if (err < 0)
455 pr_err("Couldn't synthesize build ids.\n");
456 }
457}
458
459static void event__synthesize_guest_os(struct kernel_info *kerninfo,
460 void *data __attribute__((unused)))
461{
462 int err;
463 char *guest_kallsyms;
464 char path[PATH_MAX];
465
466 if (is_host_kernel(kerninfo))
467 return;
468
469 /*
470 *As for guest kernel when processing subcommand record&report,
471 *we arrange module mmap prior to guest kernel mmap and trigger
472 *a preload dso because default guest module symbols are loaded
473 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
474 *method is used to avoid symbol missing when the first addr is
475 *in module instead of in guest kernel.
476 */
477 err = event__synthesize_modules(process_synthesized_event,
478 session,
479 kerninfo);
480 if (err < 0)
481 pr_err("Couldn't record guest kernel [%d]'s reference"
482 " relocation symbol.\n", kerninfo->pid);
483
484 if (is_default_guest(kerninfo))
485 guest_kallsyms = (char *) symbol_conf.default_guest_kallsyms;
486 else {
487 sprintf(path, "%s/proc/kallsyms", kerninfo->root_dir);
488 guest_kallsyms = path;
489 }
410 490
411 process_buildids(); 491 /*
412 perf_header__write(&session->header, output, true); 492 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
493 * have no _text sometimes.
494 */
495 err = event__synthesize_kernel_mmap(process_synthesized_event,
496 session, kerninfo, "_text");
497 if (err < 0)
498 err = event__synthesize_kernel_mmap(process_synthesized_event,
499 session, kerninfo, "_stext");
500 if (err < 0)
501 pr_err("Couldn't record guest kernel [%d]'s reference"
502 " relocation symbol.\n", kerninfo->pid);
413} 503}
414 504
415static int __cmd_record(int argc, const char **argv) 505static int __cmd_record(int argc, const char **argv)
@@ -421,8 +511,9 @@ static int __cmd_record(int argc, const char **argv)
421 int err; 511 int err;
422 unsigned long waking = 0; 512 unsigned long waking = 0;
423 int child_ready_pipe[2], go_pipe[2]; 513 int child_ready_pipe[2], go_pipe[2];
424 const bool forks = target_pid == -1 && argc > 0; 514 const bool forks = argc > 0;
425 char buf; 515 char buf;
516 struct kernel_info *kerninfo;
426 517
427 page_size = sysconf(_SC_PAGE_SIZE); 518 page_size = sysconf(_SC_PAGE_SIZE);
428 519
@@ -435,45 +526,44 @@ static int __cmd_record(int argc, const char **argv)
435 exit(-1); 526 exit(-1);
436 } 527 }
437 528
438 if (!stat(output_name, &st) && st.st_size) { 529 if (!strcmp(output_name, "-"))
439 if (!force) { 530 pipe_output = 1;
440 if (!append_file) { 531 else if (!stat(output_name, &st) && st.st_size) {
441 pr_err("Error, output file %s exists, use -A " 532 if (write_mode == WRITE_FORCE) {
442 "to append or -f to overwrite.\n",
443 output_name);
444 exit(-1);
445 }
446 } else {
447 char oldname[PATH_MAX]; 533 char oldname[PATH_MAX];
448 snprintf(oldname, sizeof(oldname), "%s.old", 534 snprintf(oldname, sizeof(oldname), "%s.old",
449 output_name); 535 output_name);
450 unlink(oldname); 536 unlink(oldname);
451 rename(output_name, oldname); 537 rename(output_name, oldname);
452 } 538 }
453 } else { 539 } else if (write_mode == WRITE_APPEND) {
454 append_file = 0; 540 write_mode = WRITE_FORCE;
455 } 541 }
456 542
457 flags = O_CREAT|O_RDWR; 543 flags = O_CREAT|O_RDWR;
458 if (append_file) 544 if (write_mode == WRITE_APPEND)
459 file_new = 0; 545 file_new = 0;
460 else 546 else
461 flags |= O_TRUNC; 547 flags |= O_TRUNC;
462 548
463 output = open(output_name, flags, S_IRUSR|S_IWUSR); 549 if (pipe_output)
550 output = STDOUT_FILENO;
551 else
552 output = open(output_name, flags, S_IRUSR | S_IWUSR);
464 if (output < 0) { 553 if (output < 0) {
465 perror("failed to create output file"); 554 perror("failed to create output file");
466 exit(-1); 555 exit(-1);
467 } 556 }
468 557
469 session = perf_session__new(output_name, O_WRONLY, force); 558 session = perf_session__new(output_name, O_WRONLY,
559 write_mode == WRITE_FORCE);
470 if (session == NULL) { 560 if (session == NULL) {
471 pr_err("Not enough memory for reading perf file header\n"); 561 pr_err("Not enough memory for reading perf file header\n");
472 return -1; 562 return -1;
473 } 563 }
474 564
475 if (!file_new) { 565 if (!file_new) {
476 err = perf_header__read(&session->header, output); 566 err = perf_header__read(session, output);
477 if (err < 0) 567 if (err < 0)
478 return err; 568 return err;
479 } 569 }
@@ -492,13 +582,15 @@ static int __cmd_record(int argc, const char **argv)
492 atexit(atexit_header); 582 atexit(atexit_header);
493 583
494 if (forks) { 584 if (forks) {
495 pid = fork(); 585 child_pid = fork();
496 if (pid < 0) { 586 if (pid < 0) {
497 perror("failed to fork"); 587 perror("failed to fork");
498 exit(-1); 588 exit(-1);
499 } 589 }
500 590
501 if (!pid) { 591 if (!child_pid) {
592 if (pipe_output)
593 dup2(2, 1);
502 close(child_ready_pipe[0]); 594 close(child_ready_pipe[0]);
503 close(go_pipe[1]); 595 close(go_pipe[1]);
504 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 596 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
@@ -527,10 +619,8 @@ static int __cmd_record(int argc, const char **argv)
527 exit(-1); 619 exit(-1);
528 } 620 }
529 621
530 child_pid = pid; 622 if (!system_wide && target_tid == -1 && target_pid == -1)
531 623 all_tids[0] = child_pid;
532 if (!system_wide)
533 target_pid = pid;
534 624
535 close(child_ready_pipe[1]); 625 close(child_ready_pipe[1]);
536 close(go_pipe[0]); 626 close(go_pipe[0]);
@@ -544,16 +634,19 @@ static int __cmd_record(int argc, const char **argv)
544 close(child_ready_pipe[0]); 634 close(child_ready_pipe[0]);
545 } 635 }
546 636
547
548 if ((!system_wide && !inherit) || profile_cpu != -1) { 637 if ((!system_wide && !inherit) || profile_cpu != -1) {
549 open_counters(profile_cpu, target_pid); 638 open_counters(profile_cpu);
550 } else { 639 } else {
551 nr_cpus = read_cpu_map(); 640 nr_cpus = read_cpu_map();
552 for (i = 0; i < nr_cpus; i++) 641 for (i = 0; i < nr_cpus; i++)
553 open_counters(cpumap[i], target_pid); 642 open_counters(cpumap[i]);
554 } 643 }
555 644
556 if (file_new) { 645 if (pipe_output) {
646 err = perf_header__write_pipe(output);
647 if (err < 0)
648 return err;
649 } else if (file_new) {
557 err = perf_header__write(&session->header, output, false); 650 err = perf_header__write(&session->header, output, false);
558 if (err < 0) 651 if (err < 0)
559 return err; 652 return err;
@@ -561,21 +654,62 @@ static int __cmd_record(int argc, const char **argv)
561 654
562 post_processing_offset = lseek(output, 0, SEEK_CUR); 655 post_processing_offset = lseek(output, 0, SEEK_CUR);
563 656
657 if (pipe_output) {
658 err = event__synthesize_attrs(&session->header,
659 process_synthesized_event,
660 session);
661 if (err < 0) {
662 pr_err("Couldn't synthesize attrs.\n");
663 return err;
664 }
665
666 err = event__synthesize_event_types(process_synthesized_event,
667 session);
668 if (err < 0) {
669 pr_err("Couldn't synthesize event_types.\n");
670 return err;
671 }
672
673 err = event__synthesize_tracing_data(output, attrs,
674 nr_counters,
675 process_synthesized_event,
676 session);
677 if (err <= 0) {
678 pr_err("Couldn't record tracing data.\n");
679 return err;
680 }
681
682 advance_output(err);
683 }
684
685 kerninfo = kerninfo__findhost(&session->kerninfo_root);
686 if (!kerninfo) {
687 pr_err("Couldn't find native kernel information.\n");
688 return -1;
689 }
690
564 err = event__synthesize_kernel_mmap(process_synthesized_event, 691 err = event__synthesize_kernel_mmap(process_synthesized_event,
565 session, "_text"); 692 session, kerninfo, "_text");
693 if (err < 0)
694 err = event__synthesize_kernel_mmap(process_synthesized_event,
695 session, kerninfo, "_stext");
566 if (err < 0) { 696 if (err < 0) {
567 pr_err("Couldn't record kernel reference relocation symbol.\n"); 697 pr_err("Couldn't record kernel reference relocation symbol.\n");
568 return err; 698 return err;
569 } 699 }
570 700
571 err = event__synthesize_modules(process_synthesized_event, session); 701 err = event__synthesize_modules(process_synthesized_event,
702 session, kerninfo);
572 if (err < 0) { 703 if (err < 0) {
573 pr_err("Couldn't record kernel reference relocation symbol.\n"); 704 pr_err("Couldn't record kernel reference relocation symbol.\n");
574 return err; 705 return err;
575 } 706 }
707 if (perf_guest)
708 kerninfo__process_allkernels(&session->kerninfo_root,
709 event__synthesize_guest_os, session);
576 710
577 if (!system_wide && profile_cpu == -1) 711 if (!system_wide && profile_cpu == -1)
578 event__synthesize_thread(target_pid, process_synthesized_event, 712 event__synthesize_thread(target_tid, process_synthesized_event,
579 session); 713 session);
580 else 714 else
581 event__synthesize_threads(process_synthesized_event, session); 715 event__synthesize_threads(process_synthesized_event, session);
@@ -598,11 +732,16 @@ static int __cmd_record(int argc, const char **argv)
598 732
599 for (;;) { 733 for (;;) {
600 int hits = samples; 734 int hits = samples;
735 int thread;
601 736
602 for (i = 0; i < nr_cpu; i++) { 737 for (i = 0; i < nr_cpu; i++) {
603 for (counter = 0; counter < nr_counters; counter++) { 738 for (counter = 0; counter < nr_counters; counter++) {
604 if (mmap_array[i][counter].base) 739 for (thread = 0;
605 mmap_read(&mmap_array[i][counter]); 740 thread < thread_num; thread++) {
741 if (mmap_array[i][counter][thread].base)
742 mmap_read(&mmap_array[i][counter][thread]);
743 }
744
606 } 745 }
607 } 746 }
608 747
@@ -615,8 +754,15 @@ static int __cmd_record(int argc, const char **argv)
615 754
616 if (done) { 755 if (done) {
617 for (i = 0; i < nr_cpu; i++) { 756 for (i = 0; i < nr_cpu; i++) {
618 for (counter = 0; counter < nr_counters; counter++) 757 for (counter = 0;
619 ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); 758 counter < nr_counters;
759 counter++) {
760 for (thread = 0;
761 thread < thread_num;
762 thread++)
763 ioctl(fd[i][counter][thread],
764 PERF_EVENT_IOC_DISABLE);
765 }
620 } 766 }
621 } 767 }
622 } 768 }
@@ -641,6 +787,8 @@ static const char * const record_usage[] = {
641 NULL 787 NULL
642}; 788};
643 789
790static bool force, append_file;
791
644static const struct option options[] = { 792static const struct option options[] = {
645 OPT_CALLBACK('e', "event", NULL, "event", 793 OPT_CALLBACK('e', "event", NULL, "event",
646 "event selector. use 'perf list' to list available events", 794 "event selector. use 'perf list' to list available events",
@@ -648,7 +796,9 @@ static const struct option options[] = {
648 OPT_CALLBACK(0, "filter", NULL, "filter", 796 OPT_CALLBACK(0, "filter", NULL, "filter",
649 "event filter", parse_filter), 797 "event filter", parse_filter),
650 OPT_INTEGER('p', "pid", &target_pid, 798 OPT_INTEGER('p', "pid", &target_pid,
651 "record events on existing pid"), 799 "record events on existing process id"),
800 OPT_INTEGER('t', "tid", &target_tid,
801 "record events on existing thread id"),
652 OPT_INTEGER('r', "realtime", &realtime_prio, 802 OPT_INTEGER('r', "realtime", &realtime_prio,
653 "collect data with this RT SCHED_FIFO priority"), 803 "collect data with this RT SCHED_FIFO priority"),
654 OPT_BOOLEAN('R', "raw-samples", &raw_samples, 804 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
@@ -660,20 +810,20 @@ static const struct option options[] = {
660 OPT_INTEGER('C', "profile_cpu", &profile_cpu, 810 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
661 "CPU to profile on"), 811 "CPU to profile on"),
662 OPT_BOOLEAN('f', "force", &force, 812 OPT_BOOLEAN('f', "force", &force,
663 "overwrite existing data file"), 813 "overwrite existing data file (deprecated)"),
664 OPT_LONG('c', "count", &default_interval, 814 OPT_LONG('c', "count", &user_interval,
665 "event period to sample"), 815 "event period to sample"),
666 OPT_STRING('o', "output", &output_name, "file", 816 OPT_STRING('o', "output", &output_name, "file",
667 "output file name"), 817 "output file name"),
668 OPT_BOOLEAN('i', "inherit", &inherit, 818 OPT_BOOLEAN('i', "inherit", &inherit,
669 "child tasks inherit counters"), 819 "child tasks inherit counters"),
670 OPT_INTEGER('F', "freq", &freq, 820 OPT_INTEGER('F', "freq", &user_freq,
671 "profile at this frequency"), 821 "profile at this frequency"),
672 OPT_INTEGER('m', "mmap-pages", &mmap_pages, 822 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
673 "number of mmap data pages"), 823 "number of mmap data pages"),
674 OPT_BOOLEAN('g', "call-graph", &call_graph, 824 OPT_BOOLEAN('g', "call-graph", &call_graph,
675 "do call-graph (stack chain/backtrace) recording"), 825 "do call-graph (stack chain/backtrace) recording"),
676 OPT_BOOLEAN('v', "verbose", &verbose, 826 OPT_INCR('v', "verbose", &verbose,
677 "be more verbose (show counter open errors, etc)"), 827 "be more verbose (show counter open errors, etc)"),
678 OPT_BOOLEAN('s', "stat", &inherit_stat, 828 OPT_BOOLEAN('s', "stat", &inherit_stat,
679 "per thread counts"), 829 "per thread counts"),
@@ -688,13 +838,24 @@ static const struct option options[] = {
688 838
689int cmd_record(int argc, const char **argv, const char *prefix __used) 839int cmd_record(int argc, const char **argv, const char *prefix __used)
690{ 840{
691 int counter; 841 int i,j;
692 842
693 argc = parse_options(argc, argv, options, record_usage, 843 argc = parse_options(argc, argv, options, record_usage,
694 PARSE_OPT_STOP_AT_NON_OPTION); 844 PARSE_OPT_STOP_AT_NON_OPTION);
695 if (!argc && target_pid == -1 && !system_wide && profile_cpu == -1) 845 if (!argc && target_pid == -1 && target_tid == -1 &&
846 !system_wide && profile_cpu == -1)
696 usage_with_options(record_usage, options); 847 usage_with_options(record_usage, options);
697 848
849 if (force && append_file) {
850 fprintf(stderr, "Can't overwrite and append at the same time."
851 " You need to choose between -f and -A");
852 usage_with_options(record_usage, options);
853 } else if (append_file) {
854 write_mode = WRITE_APPEND;
855 } else {
856 write_mode = WRITE_FORCE;
857 }
858
698 symbol__init(); 859 symbol__init();
699 860
700 if (!nr_counters) { 861 if (!nr_counters) {
@@ -703,6 +864,42 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
703 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; 864 attrs[0].config = PERF_COUNT_HW_CPU_CYCLES;
704 } 865 }
705 866
867 if (target_pid != -1) {
868 target_tid = target_pid;
869 thread_num = find_all_tid(target_pid, &all_tids);
870 if (thread_num <= 0) {
871 fprintf(stderr, "Can't find all threads of pid %d\n",
872 target_pid);
873 usage_with_options(record_usage, options);
874 }
875 } else {
876 all_tids=malloc(sizeof(pid_t));
877 if (!all_tids)
878 return -ENOMEM;
879
880 all_tids[0] = target_tid;
881 thread_num = 1;
882 }
883
884 for (i = 0; i < MAX_NR_CPUS; i++) {
885 for (j = 0; j < MAX_COUNTERS; j++) {
886 fd[i][j] = malloc(sizeof(int)*thread_num);
887 mmap_array[i][j] = zalloc(
888 sizeof(struct mmap_data)*thread_num);
889 if (!fd[i][j] || !mmap_array[i][j])
890 return -ENOMEM;
891 }
892 }
893 event_array = malloc(
894 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
895 if (!event_array)
896 return -ENOMEM;
897
898 if (user_interval != UINT_MAX)
899 default_interval = user_interval;
900 if (user_freq != UINT_MAX)
901 freq = user_freq;
902
706 /* 903 /*
707 * User specified count overrides default frequency. 904 * User specified count overrides default frequency.
708 */ 905 */
@@ -715,12 +912,5 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
715 exit(EXIT_FAILURE); 912 exit(EXIT_FAILURE);
716 } 913 }
717 914
718 for (counter = 0; counter < nr_counters; counter++) {
719 if (attrs[counter].sample_period)
720 continue;
721
722 attrs[counter].sample_period = default_interval;
723 }
724
725 return __cmd_record(argc, argv); 915 return __cmd_record(argc, argv);
726} 916}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f815de25d0fc..816edae7c5b2 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -14,7 +14,6 @@
14#include "util/cache.h" 14#include "util/cache.h"
15#include <linux/rbtree.h> 15#include <linux/rbtree.h>
16#include "util/symbol.h" 16#include "util/symbol.h"
17#include "util/string.h"
18#include "util/callchain.h" 17#include "util/callchain.h"
19#include "util/strlist.h" 18#include "util/strlist.h"
20#include "util/values.h" 19#include "util/values.h"
@@ -33,11 +32,11 @@
33 32
34static char const *input_name = "perf.data"; 33static char const *input_name = "perf.data";
35 34
36static int force; 35static bool force;
37static bool hide_unresolved; 36static bool hide_unresolved;
38static bool dont_use_callchains; 37static bool dont_use_callchains;
39 38
40static int show_threads; 39static bool show_threads;
41static struct perf_read_values show_threads_values; 40static struct perf_read_values show_threads_values;
42 41
43static char default_pretty_printing_style[] = "normal"; 42static char default_pretty_printing_style[] = "normal";
@@ -81,15 +80,20 @@ static int perf_session__add_hist_entry(struct perf_session *self,
81 struct addr_location *al, 80 struct addr_location *al,
82 struct sample_data *data) 81 struct sample_data *data)
83{ 82{
84 struct symbol **syms = NULL, *parent = NULL; 83 struct map_symbol *syms = NULL;
84 struct symbol *parent = NULL;
85 bool hit; 85 bool hit;
86 int err;
86 struct hist_entry *he; 87 struct hist_entry *he;
87 struct event_stat_id *stats; 88 struct event_stat_id *stats;
88 struct perf_event_attr *attr; 89 struct perf_event_attr *attr;
89 90
90 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) 91 if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) {
91 syms = perf_session__resolve_callchain(self, al->thread, 92 syms = perf_session__resolve_callchain(self, al->thread,
92 data->callchain, &parent); 93 data->callchain, &parent);
94 if (syms == NULL)
95 return -ENOMEM;
96 }
93 97
94 attr = perf_header__find_attr(data->id, &self->header); 98 attr = perf_header__find_attr(data->id, &self->header);
95 if (attr) 99 if (attr)
@@ -104,13 +108,16 @@ static int perf_session__add_hist_entry(struct perf_session *self,
104 return -ENOMEM; 108 return -ENOMEM;
105 109
106 if (hit) 110 if (hit)
107 he->count += data->period; 111 __perf_session__add_count(he, al, data->period);
108 112
109 if (symbol_conf.use_callchain) { 113 if (symbol_conf.use_callchain) {
110 if (!hit) 114 if (!hit)
111 callchain_init(&he->callchain); 115 callchain_init(he->callchain);
112 append_chain(&he->callchain, data->callchain, syms); 116 err = append_chain(he->callchain, data->callchain, syms);
113 free(syms); 117 free(syms);
118
119 if (err)
120 return err;
114 } 121 }
115 122
116 return 0; 123 return 0;
@@ -260,13 +267,27 @@ static struct perf_event_ops event_ops = {
260 .fork = event__process_task, 267 .fork = event__process_task,
261 .lost = event__process_lost, 268 .lost = event__process_lost,
262 .read = process_read_event, 269 .read = process_read_event,
270 .attr = event__process_attr,
271 .event_type = event__process_event_type,
272 .tracing_data = event__process_tracing_data,
273 .build_id = event__process_build_id,
263}; 274};
264 275
276extern volatile int session_done;
277
278static void sig_handler(int sig __attribute__((__unused__)))
279{
280 session_done = 1;
281}
282
265static int __cmd_report(void) 283static int __cmd_report(void)
266{ 284{
267 int ret = -EINVAL; 285 int ret = -EINVAL;
268 struct perf_session *session; 286 struct perf_session *session;
269 struct rb_node *next; 287 struct rb_node *next;
288 const char *help = "For a higher level overview, try: perf report --sort comm,dso";
289
290 signal(SIGINT, sig_handler);
270 291
271 session = perf_session__new(input_name, O_RDONLY, force); 292 session = perf_session__new(input_name, O_RDONLY, force);
272 if (session == NULL) 293 if (session == NULL)
@@ -292,39 +313,49 @@ static int __cmd_report(void)
292 perf_session__fprintf(session, stdout); 313 perf_session__fprintf(session, stdout);
293 314
294 if (verbose > 2) 315 if (verbose > 2)
295 dsos__fprintf(stdout); 316 dsos__fprintf(&session->kerninfo_root, stdout);
296 317
297 next = rb_first(&session->stats_by_id); 318 next = rb_first(&session->stats_by_id);
298 while (next) { 319 while (next) {
299 struct event_stat_id *stats; 320 struct event_stat_id *stats;
321 u64 nr_hists;
300 322
301 stats = rb_entry(next, struct event_stat_id, rb_node); 323 stats = rb_entry(next, struct event_stat_id, rb_node);
302 perf_session__collapse_resort(&stats->hists); 324 perf_session__collapse_resort(&stats->hists);
303 perf_session__output_resort(&stats->hists, stats->stats.total); 325 nr_hists = perf_session__output_resort(&stats->hists,
304 if (rb_first(&session->stats_by_id) == 326 stats->stats.total);
305 rb_last(&session->stats_by_id)) 327 if (use_browser)
306 fprintf(stdout, "# Samples: %Ld\n#\n", 328 perf_session__browse_hists(&stats->hists, nr_hists,
307 stats->stats.total); 329 stats->stats.total, help,
308 else 330 input_name);
309 fprintf(stdout, "# Samples: %Ld %s\n#\n", 331 else {
310 stats->stats.total, 332 if (rb_first(&session->stats_by_id) ==
311 __event_name(stats->type, stats->config)); 333 rb_last(&session->stats_by_id))
312 334 fprintf(stdout, "# Samples: %Ld\n#\n",
313 perf_session__fprintf_hists(&stats->hists, NULL, false, stdout, 335 stats->stats.total);
336 else
337 fprintf(stdout, "# Samples: %Ld %s\n#\n",
338 stats->stats.total,
339 __event_name(stats->type, stats->config));
340
341 perf_session__fprintf_hists(&stats->hists, NULL, false, stdout,
314 stats->stats.total); 342 stats->stats.total);
315 fprintf(stdout, "\n\n"); 343 fprintf(stdout, "\n\n");
344 }
345
316 next = rb_next(&stats->rb_node); 346 next = rb_next(&stats->rb_node);
317 } 347 }
318 348
319 if (sort_order == default_sort_order && 349 if (!use_browser && sort_order == default_sort_order &&
320 parent_pattern == default_parent_pattern) 350 parent_pattern == default_parent_pattern) {
321 fprintf(stdout, "#\n# (For a higher level overview, try: perf report --sort comm,dso)\n#\n"); 351 fprintf(stdout, "#\n# (%s)\n#\n", help);
322 352
323 if (show_threads) { 353 if (show_threads) {
324 bool raw_printing_style = !strcmp(pretty_printing_style, "raw"); 354 bool style = !strcmp(pretty_printing_style, "raw");
325 perf_read_values_display(stdout, &show_threads_values, 355 perf_read_values_display(stdout, &show_threads_values,
326 raw_printing_style); 356 style);
327 perf_read_values_destroy(&show_threads_values); 357 perf_read_values_destroy(&show_threads_values);
358 }
328 } 359 }
329out_delete: 360out_delete:
330 perf_session__delete(session); 361 perf_session__delete(session);
@@ -400,7 +431,7 @@ static const char * const report_usage[] = {
400static const struct option options[] = { 431static const struct option options[] = {
401 OPT_STRING('i', "input", &input_name, "file", 432 OPT_STRING('i', "input", &input_name, "file",
402 "input file name"), 433 "input file name"),
403 OPT_BOOLEAN('v', "verbose", &verbose, 434 OPT_INCR('v', "verbose", &verbose,
404 "be more verbose (show symbol address, etc)"), 435 "be more verbose (show symbol address, etc)"),
405 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 436 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
406 "dump raw trace in ASCII"), 437 "dump raw trace in ASCII"),
@@ -419,6 +450,8 @@ static const struct option options[] = {
419 "sort by key(s): pid, comm, dso, symbol, parent"), 450 "sort by key(s): pid, comm, dso, symbol, parent"),
420 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths, 451 OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
421 "Don't shorten the pathnames taking into account the cwd"), 452 "Don't shorten the pathnames taking into account the cwd"),
453 OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
454 "Show sample percentage for different cpu modes"),
422 OPT_STRING('p', "parent", &parent_pattern, "regex", 455 OPT_STRING('p', "parent", &parent_pattern, "regex",
423 "regex filter to identify parent, see: '--sort parent'"), 456 "regex filter to identify parent, see: '--sort parent'"),
424 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, 457 OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
@@ -447,7 +480,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
447{ 480{
448 argc = parse_options(argc, argv, options, report_usage, 0); 481 argc = parse_options(argc, argv, options, report_usage, 0);
449 482
450 setup_pager(); 483 if (strcmp(input_name, "-") != 0)
484 setup_browser();
451 485
452 if (symbol__init() < 0) 486 if (symbol__init() < 0)
453 return -1; 487 return -1;
@@ -455,7 +489,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
455 setup_sorting(report_usage, options); 489 setup_sorting(report_usage, options);
456 490
457 if (parent_pattern != default_parent_pattern) { 491 if (parent_pattern != default_parent_pattern) {
458 sort_dimension__add("parent"); 492 if (sort_dimension__add("parent") < 0)
493 return -1;
459 sort_parent.elide = 1; 494 sort_parent.elide = 1;
460 } else 495 } else
461 symbol_conf.exclude_other = false; 496 symbol_conf.exclude_other = false;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 4f5a03e43444..09ddc8e6d8e1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -68,10 +68,10 @@ enum sched_event_type {
68 68
69struct sched_atom { 69struct sched_atom {
70 enum sched_event_type type; 70 enum sched_event_type type;
71 int specific_wait;
71 u64 timestamp; 72 u64 timestamp;
72 u64 duration; 73 u64 duration;
73 unsigned long nr; 74 unsigned long nr;
74 int specific_wait;
75 sem_t *wait_sem; 75 sem_t *wait_sem;
76 struct task_desc *wakee; 76 struct task_desc *wakee;
77}; 77};
@@ -1790,7 +1790,7 @@ static const char * const sched_usage[] = {
1790static const struct option sched_options[] = { 1790static const struct option sched_options[] = {
1791 OPT_STRING('i', "input", &input_name, "file", 1791 OPT_STRING('i', "input", &input_name, "file",
1792 "input file name"), 1792 "input file name"),
1793 OPT_BOOLEAN('v', "verbose", &verbose, 1793 OPT_INCR('v', "verbose", &verbose,
1794 "be more verbose (show symbol address, etc)"), 1794 "be more verbose (show symbol address, etc)"),
1795 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1795 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1796 "dump raw trace in ASCII"), 1796 "dump raw trace in ASCII"),
@@ -1805,7 +1805,7 @@ static const char * const latency_usage[] = {
1805static const struct option latency_options[] = { 1805static const struct option latency_options[] = {
1806 OPT_STRING('s', "sort", &sort_order, "key[,key2...]", 1806 OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1807 "sort by key(s): runtime, switch, avg, max"), 1807 "sort by key(s): runtime, switch, avg, max"),
1808 OPT_BOOLEAN('v', "verbose", &verbose, 1808 OPT_INCR('v', "verbose", &verbose,
1809 "be more verbose (show symbol address, etc)"), 1809 "be more verbose (show symbol address, etc)"),
1810 OPT_INTEGER('C', "CPU", &profile_cpu, 1810 OPT_INTEGER('C', "CPU", &profile_cpu,
1811 "CPU to profile on"), 1811 "CPU to profile on"),
@@ -1822,7 +1822,7 @@ static const char * const replay_usage[] = {
1822static const struct option replay_options[] = { 1822static const struct option replay_options[] = {
1823 OPT_INTEGER('r', "repeat", &replay_repeat, 1823 OPT_INTEGER('r', "repeat", &replay_repeat,
1824 "repeat the workload replay N times (-1: infinite)"), 1824 "repeat the workload replay N times (-1: infinite)"),
1825 OPT_BOOLEAN('v', "verbose", &verbose, 1825 OPT_INCR('v', "verbose", &verbose,
1826 "be more verbose (show symbol address, etc)"), 1826 "be more verbose (show symbol address, etc)"),
1827 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1827 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1828 "dump raw trace in ASCII"), 1828 "dump raw trace in ASCII"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 95db31cff6fd..e619ac89dff5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
46#include "util/debug.h" 46#include "util/debug.h"
47#include "util/header.h" 47#include "util/header.h"
48#include "util/cpumap.h" 48#include "util/cpumap.h"
49#include "util/thread.h"
49 50
50#include <sys/prctl.h> 51#include <sys/prctl.h>
51#include <math.h> 52#include <math.h>
@@ -66,18 +67,21 @@ static struct perf_event_attr default_attrs[] = {
66 67
67}; 68};
68 69
69static int system_wide = 0; 70static bool system_wide = false;
70static unsigned int nr_cpus = 0; 71static unsigned int nr_cpus = 0;
71static int run_idx = 0; 72static int run_idx = 0;
72 73
73static int run_count = 1; 74static int run_count = 1;
74static int inherit = 1; 75static bool inherit = true;
75static int scale = 1; 76static bool scale = true;
76static pid_t target_pid = -1; 77static pid_t target_pid = -1;
78static pid_t target_tid = -1;
79static pid_t *all_tids = NULL;
80static int thread_num = 0;
77static pid_t child_pid = -1; 81static pid_t child_pid = -1;
78static int null_run = 0; 82static bool null_run = false;
79 83
80static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 84static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
81 85
82static int event_scaled[MAX_COUNTERS]; 86static int event_scaled[MAX_COUNTERS];
83 87
@@ -140,9 +144,11 @@ struct stats runtime_branches_stats;
140#define ERR_PERF_OPEN \ 144#define ERR_PERF_OPEN \
141"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n" 145"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
142 146
143static void create_perf_stat_counter(int counter, int pid) 147static int create_perf_stat_counter(int counter)
144{ 148{
145 struct perf_event_attr *attr = attrs + counter; 149 struct perf_event_attr *attr = attrs + counter;
150 int thread;
151 int ncreated = 0;
146 152
147 if (scale) 153 if (scale)
148 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 154 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -152,21 +158,33 @@ static void create_perf_stat_counter(int counter, int pid)
152 unsigned int cpu; 158 unsigned int cpu;
153 159
154 for (cpu = 0; cpu < nr_cpus; cpu++) { 160 for (cpu = 0; cpu < nr_cpus; cpu++) {
155 fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); 161 fd[cpu][counter][0] = sys_perf_event_open(attr,
156 if (fd[cpu][counter] < 0 && verbose) 162 -1, cpumap[cpu], -1, 0);
157 fprintf(stderr, ERR_PERF_OPEN, counter, 163 if (fd[cpu][counter][0] < 0)
158 fd[cpu][counter], strerror(errno)); 164 pr_debug(ERR_PERF_OPEN, counter,
165 fd[cpu][counter][0], strerror(errno));
166 else
167 ++ncreated;
159 } 168 }
160 } else { 169 } else {
161 attr->inherit = inherit; 170 attr->inherit = inherit;
162 attr->disabled = 1; 171 if (target_pid == -1) {
163 attr->enable_on_exec = 1; 172 attr->disabled = 1;
164 173 attr->enable_on_exec = 1;
165 fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0); 174 }
166 if (fd[0][counter] < 0 && verbose) 175 for (thread = 0; thread < thread_num; thread++) {
167 fprintf(stderr, ERR_PERF_OPEN, counter, 176 fd[0][counter][thread] = sys_perf_event_open(attr,
168 fd[0][counter], strerror(errno)); 177 all_tids[thread], -1, -1, 0);
178 if (fd[0][counter][thread] < 0)
179 pr_debug(ERR_PERF_OPEN, counter,
180 fd[0][counter][thread],
181 strerror(errno));
182 else
183 ++ncreated;
184 }
169 } 185 }
186
187 return ncreated;
170} 188}
171 189
172/* 190/*
@@ -190,25 +208,28 @@ static void read_counter(int counter)
190 unsigned int cpu; 208 unsigned int cpu;
191 size_t res, nv; 209 size_t res, nv;
192 int scaled; 210 int scaled;
193 int i; 211 int i, thread;
194 212
195 count[0] = count[1] = count[2] = 0; 213 count[0] = count[1] = count[2] = 0;
196 214
197 nv = scale ? 3 : 1; 215 nv = scale ? 3 : 1;
198 for (cpu = 0; cpu < nr_cpus; cpu++) { 216 for (cpu = 0; cpu < nr_cpus; cpu++) {
199 if (fd[cpu][counter] < 0) 217 for (thread = 0; thread < thread_num; thread++) {
200 continue; 218 if (fd[cpu][counter][thread] < 0)
201 219 continue;
202 res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); 220
203 assert(res == nv * sizeof(u64)); 221 res = read(fd[cpu][counter][thread],
204 222 single_count, nv * sizeof(u64));
205 close(fd[cpu][counter]); 223 assert(res == nv * sizeof(u64));
206 fd[cpu][counter] = -1; 224
207 225 close(fd[cpu][counter][thread]);
208 count[0] += single_count[0]; 226 fd[cpu][counter][thread] = -1;
209 if (scale) { 227
210 count[1] += single_count[1]; 228 count[0] += single_count[0];
211 count[2] += single_count[2]; 229 if (scale) {
230 count[1] += single_count[1];
231 count[2] += single_count[2];
232 }
212 } 233 }
213 } 234 }
214 235
@@ -250,10 +271,9 @@ static int run_perf_stat(int argc __used, const char **argv)
250{ 271{
251 unsigned long long t0, t1; 272 unsigned long long t0, t1;
252 int status = 0; 273 int status = 0;
253 int counter; 274 int counter, ncreated = 0;
254 int pid = target_pid;
255 int child_ready_pipe[2], go_pipe[2]; 275 int child_ready_pipe[2], go_pipe[2];
256 const bool forks = (target_pid == -1 && argc > 0); 276 const bool forks = (argc > 0);
257 char buf; 277 char buf;
258 278
259 if (!system_wide) 279 if (!system_wide)
@@ -265,10 +285,10 @@ static int run_perf_stat(int argc __used, const char **argv)
265 } 285 }
266 286
267 if (forks) { 287 if (forks) {
268 if ((pid = fork()) < 0) 288 if ((child_pid = fork()) < 0)
269 perror("failed to fork"); 289 perror("failed to fork");
270 290
271 if (!pid) { 291 if (!child_pid) {
272 close(child_ready_pipe[0]); 292 close(child_ready_pipe[0]);
273 close(go_pipe[1]); 293 close(go_pipe[1]);
274 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 294 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
@@ -297,7 +317,8 @@ static int run_perf_stat(int argc __used, const char **argv)
297 exit(-1); 317 exit(-1);
298 } 318 }
299 319
300 child_pid = pid; 320 if (target_tid == -1 && target_pid == -1 && !system_wide)
321 all_tids[0] = child_pid;
301 322
302 /* 323 /*
303 * Wait for the child to be ready to exec. 324 * Wait for the child to be ready to exec.
@@ -310,7 +331,16 @@ static int run_perf_stat(int argc __used, const char **argv)
310 } 331 }
311 332
312 for (counter = 0; counter < nr_counters; counter++) 333 for (counter = 0; counter < nr_counters; counter++)
313 create_perf_stat_counter(counter, pid); 334 ncreated += create_perf_stat_counter(counter);
335
336 if (ncreated == 0) {
337 pr_err("No permission to collect %sstats.\n"
338 "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
339 system_wide ? "system-wide " : "");
340 if (child_pid != -1)
341 kill(child_pid, SIGTERM);
342 return -1;
343 }
314 344
315 /* 345 /*
316 * Enable counters and exec the command: 346 * Enable counters and exec the command:
@@ -321,7 +351,7 @@ static int run_perf_stat(int argc __used, const char **argv)
321 close(go_pipe[1]); 351 close(go_pipe[1]);
322 wait(&status); 352 wait(&status);
323 } else { 353 } else {
324 while(!done); 354 while(!done) sleep(1);
325 } 355 }
326 356
327 t1 = rdclock(); 357 t1 = rdclock();
@@ -429,12 +459,14 @@ static void print_stat(int argc, const char **argv)
429 459
430 fprintf(stderr, "\n"); 460 fprintf(stderr, "\n");
431 fprintf(stderr, " Performance counter stats for "); 461 fprintf(stderr, " Performance counter stats for ");
432 if(target_pid == -1) { 462 if(target_pid == -1 && target_tid == -1) {
433 fprintf(stderr, "\'%s", argv[0]); 463 fprintf(stderr, "\'%s", argv[0]);
434 for (i = 1; i < argc; i++) 464 for (i = 1; i < argc; i++)
435 fprintf(stderr, " %s", argv[i]); 465 fprintf(stderr, " %s", argv[i]);
436 }else 466 } else if (target_pid != -1)
437 fprintf(stderr, "task pid \'%d", target_pid); 467 fprintf(stderr, "process id \'%d", target_pid);
468 else
469 fprintf(stderr, "thread id \'%d", target_tid);
438 470
439 fprintf(stderr, "\'"); 471 fprintf(stderr, "\'");
440 if (run_count > 1) 472 if (run_count > 1)
@@ -459,7 +491,7 @@ static volatile int signr = -1;
459 491
460static void skip_signal(int signo) 492static void skip_signal(int signo)
461{ 493{
462 if(target_pid != -1) 494 if(child_pid == -1)
463 done = 1; 495 done = 1;
464 496
465 signr = signo; 497 signr = signo;
@@ -489,12 +521,14 @@ static const struct option options[] = {
489 OPT_BOOLEAN('i', "inherit", &inherit, 521 OPT_BOOLEAN('i', "inherit", &inherit,
490 "child tasks inherit counters"), 522 "child tasks inherit counters"),
491 OPT_INTEGER('p', "pid", &target_pid, 523 OPT_INTEGER('p', "pid", &target_pid,
492 "stat events on existing pid"), 524 "stat events on existing process id"),
525 OPT_INTEGER('t', "tid", &target_tid,
526 "stat events on existing thread id"),
493 OPT_BOOLEAN('a', "all-cpus", &system_wide, 527 OPT_BOOLEAN('a', "all-cpus", &system_wide,
494 "system-wide collection from all CPUs"), 528 "system-wide collection from all CPUs"),
495 OPT_BOOLEAN('c', "scale", &scale, 529 OPT_BOOLEAN('c', "scale", &scale,
496 "scale/normalize counters"), 530 "scale/normalize counters"),
497 OPT_BOOLEAN('v', "verbose", &verbose, 531 OPT_INCR('v', "verbose", &verbose,
498 "be more verbose (show counter open errors, etc)"), 532 "be more verbose (show counter open errors, etc)"),
499 OPT_INTEGER('r', "repeat", &run_count, 533 OPT_INTEGER('r', "repeat", &run_count,
500 "repeat command and print average + stddev (max: 100)"), 534 "repeat command and print average + stddev (max: 100)"),
@@ -506,10 +540,11 @@ static const struct option options[] = {
506int cmd_stat(int argc, const char **argv, const char *prefix __used) 540int cmd_stat(int argc, const char **argv, const char *prefix __used)
507{ 541{
508 int status; 542 int status;
543 int i,j;
509 544
510 argc = parse_options(argc, argv, options, stat_usage, 545 argc = parse_options(argc, argv, options, stat_usage,
511 PARSE_OPT_STOP_AT_NON_OPTION); 546 PARSE_OPT_STOP_AT_NON_OPTION);
512 if (!argc && target_pid == -1) 547 if (!argc && target_pid == -1 && target_tid == -1)
513 usage_with_options(stat_usage, options); 548 usage_with_options(stat_usage, options);
514 if (run_count <= 0) 549 if (run_count <= 0)
515 usage_with_options(stat_usage, options); 550 usage_with_options(stat_usage, options);
@@ -525,6 +560,31 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
525 else 560 else
526 nr_cpus = 1; 561 nr_cpus = 1;
527 562
563 if (target_pid != -1) {
564 target_tid = target_pid;
565 thread_num = find_all_tid(target_pid, &all_tids);
566 if (thread_num <= 0) {
567 fprintf(stderr, "Can't find all threads of pid %d\n",
568 target_pid);
569 usage_with_options(stat_usage, options);
570 }
571 } else {
572 all_tids=malloc(sizeof(pid_t));
573 if (!all_tids)
574 return -ENOMEM;
575
576 all_tids[0] = target_tid;
577 thread_num = 1;
578 }
579
580 for (i = 0; i < MAX_NR_CPUS; i++) {
581 for (j = 0; j < MAX_COUNTERS; j++) {
582 fd[i][j] = malloc(sizeof(int)*thread_num);
583 if (!fd[i][j])
584 return -ENOMEM;
585 }
586 }
587
528 /* 588 /*
529 * We dont want to block the signals - that would cause 589 * We dont want to block the signals - that would cause
530 * child tasks to inherit that and Ctrl-C would not work. 590 * child tasks to inherit that and Ctrl-C would not work.
@@ -543,7 +603,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
543 status = run_perf_stat(argc, argv); 603 status = run_perf_stat(argc, argv);
544 } 604 }
545 605
546 print_stat(argc, argv); 606 if (status != -1)
607 print_stat(argc, argv);
547 608
548 return status; 609 return status;
549} 610}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 0d4d8ff7914b..96f4a092df37 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -21,7 +21,6 @@
21#include "util/cache.h" 21#include "util/cache.h"
22#include <linux/rbtree.h> 22#include <linux/rbtree.h>
23#include "util/symbol.h" 23#include "util/symbol.h"
24#include "util/string.h"
25#include "util/callchain.h" 24#include "util/callchain.h"
26#include "util/strlist.h" 25#include "util/strlist.h"
27 26
@@ -43,7 +42,7 @@ static u64 turbo_frequency;
43 42
44static u64 first_time, last_time; 43static u64 first_time, last_time;
45 44
46static int power_only; 45static bool power_only;
47 46
48 47
49struct per_pid; 48struct per_pid;
@@ -78,8 +77,6 @@ struct per_pid {
78 77
79 struct per_pidcomm *all; 78 struct per_pidcomm *all;
80 struct per_pidcomm *current; 79 struct per_pidcomm *current;
81
82 int painted;
83}; 80};
84 81
85 82
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1f529321607e..dfd7ea7dabdd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -55,9 +55,9 @@
55#include <linux/unistd.h> 55#include <linux/unistd.h>
56#include <linux/types.h> 56#include <linux/types.h>
57 57
58static int fd[MAX_NR_CPUS][MAX_COUNTERS]; 58static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
59 59
60static int system_wide = 0; 60static bool system_wide = false;
61 61
62static int default_interval = 0; 62static int default_interval = 0;
63 63
@@ -65,18 +65,21 @@ static int count_filter = 5;
65static int print_entries; 65static int print_entries;
66 66
67static int target_pid = -1; 67static int target_pid = -1;
68static int inherit = 0; 68static int target_tid = -1;
69static pid_t *all_tids = NULL;
70static int thread_num = 0;
71static bool inherit = false;
69static int profile_cpu = -1; 72static int profile_cpu = -1;
70static int nr_cpus = 0; 73static int nr_cpus = 0;
71static unsigned int realtime_prio = 0; 74static unsigned int realtime_prio = 0;
72static int group = 0; 75static bool group = false;
73static unsigned int page_size; 76static unsigned int page_size;
74static unsigned int mmap_pages = 16; 77static unsigned int mmap_pages = 16;
75static int freq = 1000; /* 1 KHz */ 78static int freq = 1000; /* 1 KHz */
76 79
77static int delay_secs = 2; 80static int delay_secs = 2;
78static int zero = 0; 81static bool zero = false;
79static int dump_symtab = 0; 82static bool dump_symtab = false;
80 83
81static bool hide_kernel_symbols = false; 84static bool hide_kernel_symbols = false;
82static bool hide_user_symbols = false; 85static bool hide_user_symbols = false;
@@ -133,7 +136,7 @@ static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
133 return ((void *)self) + symbol_conf.priv_size; 136 return ((void *)self) + symbol_conf.priv_size;
134} 137}
135 138
136static void get_term_dimensions(struct winsize *ws) 139void get_term_dimensions(struct winsize *ws)
137{ 140{
138 char *s = getenv("LINES"); 141 char *s = getenv("LINES");
139 142
@@ -169,7 +172,7 @@ static void sig_winch_handler(int sig __used)
169 update_print_entries(&winsize); 172 update_print_entries(&winsize);
170} 173}
171 174
172static void parse_source(struct sym_entry *syme) 175static int parse_source(struct sym_entry *syme)
173{ 176{
174 struct symbol *sym; 177 struct symbol *sym;
175 struct sym_entry_source *source; 178 struct sym_entry_source *source;
@@ -180,12 +183,21 @@ static void parse_source(struct sym_entry *syme)
180 u64 len; 183 u64 len;
181 184
182 if (!syme) 185 if (!syme)
183 return; 186 return -1;
187
188 sym = sym_entry__symbol(syme);
189 map = syme->map;
190
191 /*
192 * We can't annotate with just /proc/kallsyms
193 */
194 if (map->dso->origin == DSO__ORIG_KERNEL)
195 return -1;
184 196
185 if (syme->src == NULL) { 197 if (syme->src == NULL) {
186 syme->src = zalloc(sizeof(*source)); 198 syme->src = zalloc(sizeof(*source));
187 if (syme->src == NULL) 199 if (syme->src == NULL)
188 return; 200 return -1;
189 pthread_mutex_init(&syme->src->lock, NULL); 201 pthread_mutex_init(&syme->src->lock, NULL);
190 } 202 }
191 203
@@ -195,9 +207,6 @@ static void parse_source(struct sym_entry *syme)
195 pthread_mutex_lock(&source->lock); 207 pthread_mutex_lock(&source->lock);
196 goto out_assign; 208 goto out_assign;
197 } 209 }
198
199 sym = sym_entry__symbol(syme);
200 map = syme->map;
201 path = map->dso->long_name; 210 path = map->dso->long_name;
202 211
203 len = sym->end - sym->start; 212 len = sym->end - sym->start;
@@ -209,7 +218,7 @@ static void parse_source(struct sym_entry *syme)
209 218
210 file = popen(command, "r"); 219 file = popen(command, "r");
211 if (!file) 220 if (!file)
212 return; 221 return -1;
213 222
214 pthread_mutex_lock(&source->lock); 223 pthread_mutex_lock(&source->lock);
215 source->lines_tail = &source->lines; 224 source->lines_tail = &source->lines;
@@ -245,6 +254,7 @@ static void parse_source(struct sym_entry *syme)
245out_assign: 254out_assign:
246 sym_filter_entry = syme; 255 sym_filter_entry = syme;
247 pthread_mutex_unlock(&source->lock); 256 pthread_mutex_unlock(&source->lock);
257 return 0;
248} 258}
249 259
250static void __zero_source_counters(struct sym_entry *syme) 260static void __zero_source_counters(struct sym_entry *syme)
@@ -410,7 +420,9 @@ static double sym_weight(const struct sym_entry *sym)
410} 420}
411 421
412static long samples; 422static long samples;
413static long userspace_samples; 423static long kernel_samples, us_samples;
424static long exact_samples;
425static long guest_us_samples, guest_kernel_samples;
414static const char CONSOLE_CLEAR[] = ""; 426static const char CONSOLE_CLEAR[] = "";
415 427
416static void __list_insert_active_sym(struct sym_entry *syme) 428static void __list_insert_active_sym(struct sym_entry *syme)
@@ -450,7 +462,11 @@ static void print_sym_table(void)
450 int printed = 0, j; 462 int printed = 0, j;
451 int counter, snap = !display_weighted ? sym_counter : 0; 463 int counter, snap = !display_weighted ? sym_counter : 0;
452 float samples_per_sec = samples/delay_secs; 464 float samples_per_sec = samples/delay_secs;
453 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 465 float ksamples_per_sec = kernel_samples/delay_secs;
466 float us_samples_per_sec = (us_samples)/delay_secs;
467 float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
468 float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
469 float esamples_percent = (100.0*exact_samples)/samples;
454 float sum_ksamples = 0.0; 470 float sum_ksamples = 0.0;
455 struct sym_entry *syme, *n; 471 struct sym_entry *syme, *n;
456 struct rb_root tmp = RB_ROOT; 472 struct rb_root tmp = RB_ROOT;
@@ -458,7 +474,8 @@ static void print_sym_table(void)
458 int sym_width = 0, dso_width = 0, dso_short_width = 0; 474 int sym_width = 0, dso_width = 0, dso_short_width = 0;
459 const int win_width = winsize.ws_col - 1; 475 const int win_width = winsize.ws_col - 1;
460 476
461 samples = userspace_samples = 0; 477 samples = us_samples = kernel_samples = exact_samples = 0;
478 guest_kernel_samples = guest_us_samples = 0;
462 479
463 /* Sort the active symbols */ 480 /* Sort the active symbols */
464 pthread_mutex_lock(&active_symbols_lock); 481 pthread_mutex_lock(&active_symbols_lock);
@@ -489,9 +506,30 @@ static void print_sym_table(void)
489 puts(CONSOLE_CLEAR); 506 puts(CONSOLE_CLEAR);
490 507
491 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 508 printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
492 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", 509 if (!perf_guest) {
493 samples_per_sec, 510 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
494 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 511 " exact: %4.1f%% [",
512 samples_per_sec,
513 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
514 samples_per_sec)),
515 esamples_percent);
516 } else {
517 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
518 " guest kernel:%4.1f%% guest us:%4.1f%%"
519 " exact: %4.1f%% [",
520 samples_per_sec,
521 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
522 samples_per_sec)),
523 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
524 samples_per_sec)),
525 100.0 - (100.0 * ((samples_per_sec -
526 guest_kernel_samples_per_sec) /
527 samples_per_sec)),
528 100.0 - (100.0 * ((samples_per_sec -
529 guest_us_samples_per_sec) /
530 samples_per_sec)),
531 esamples_percent);
532 }
495 533
496 if (nr_counters == 1 || !display_weighted) { 534 if (nr_counters == 1 || !display_weighted) {
497 printf("%Ld", (u64)attrs[0].sample_period); 535 printf("%Ld", (u64)attrs[0].sample_period);
@@ -514,13 +552,15 @@ static void print_sym_table(void)
514 552
515 if (target_pid != -1) 553 if (target_pid != -1)
516 printf(" (target_pid: %d", target_pid); 554 printf(" (target_pid: %d", target_pid);
555 else if (target_tid != -1)
556 printf(" (target_tid: %d", target_tid);
517 else 557 else
518 printf(" (all"); 558 printf(" (all");
519 559
520 if (profile_cpu != -1) 560 if (profile_cpu != -1)
521 printf(", cpu: %d)\n", profile_cpu); 561 printf(", cpu: %d)\n", profile_cpu);
522 else { 562 else {
523 if (target_pid != -1) 563 if (target_tid != -1)
524 printf(")\n"); 564 printf(")\n");
525 else 565 else
526 printf(", %d CPUs)\n", nr_cpus); 566 printf(", %d CPUs)\n", nr_cpus);
@@ -582,7 +622,6 @@ static void print_sym_table(void)
582 622
583 syme = rb_entry(nd, struct sym_entry, rb_node); 623 syme = rb_entry(nd, struct sym_entry, rb_node);
584 sym = sym_entry__symbol(syme); 624 sym = sym_entry__symbol(syme);
585
586 if (++printed > print_entries || (int)syme->snap_count < count_filter) 625 if (++printed > print_entries || (int)syme->snap_count < count_filter)
587 continue; 626 continue;
588 627
@@ -746,7 +785,7 @@ static int key_mapped(int c)
746 return 0; 785 return 0;
747} 786}
748 787
749static void handle_keypress(int c) 788static void handle_keypress(struct perf_session *session, int c)
750{ 789{
751 if (!key_mapped(c)) { 790 if (!key_mapped(c)) {
752 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 791 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
@@ -815,7 +854,7 @@ static void handle_keypress(int c)
815 case 'Q': 854 case 'Q':
816 printf("exiting.\n"); 855 printf("exiting.\n");
817 if (dump_symtab) 856 if (dump_symtab)
818 dsos__fprintf(stderr); 857 dsos__fprintf(&session->kerninfo_root, stderr);
819 exit(0); 858 exit(0);
820 case 's': 859 case 's':
821 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 860 prompt_symbol(&sym_filter_entry, "Enter details symbol");
@@ -839,7 +878,7 @@ static void handle_keypress(int c)
839 display_weighted = ~display_weighted; 878 display_weighted = ~display_weighted;
840 break; 879 break;
841 case 'z': 880 case 'z':
842 zero = ~zero; 881 zero = !zero;
843 break; 882 break;
844 default: 883 default:
845 break; 884 break;
@@ -851,6 +890,7 @@ static void *display_thread(void *arg __used)
851 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 890 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
852 struct termios tc, save; 891 struct termios tc, save;
853 int delay_msecs, c; 892 int delay_msecs, c;
893 struct perf_session *session = (struct perf_session *) arg;
854 894
855 tcgetattr(0, &save); 895 tcgetattr(0, &save);
856 tc = save; 896 tc = save;
@@ -871,7 +911,7 @@ repeat:
871 c = getc(stdin); 911 c = getc(stdin);
872 tcsetattr(0, TCSAFLUSH, &save); 912 tcsetattr(0, TCSAFLUSH, &save);
873 913
874 handle_keypress(c); 914 handle_keypress(session, c);
875 goto repeat; 915 goto repeat;
876 916
877 return NULL; 917 return NULL;
@@ -942,24 +982,49 @@ static void event__process_sample(const event_t *self,
942 u64 ip = self->ip.ip; 982 u64 ip = self->ip.ip;
943 struct sym_entry *syme; 983 struct sym_entry *syme;
944 struct addr_location al; 984 struct addr_location al;
985 struct kernel_info *kerninfo;
945 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 986 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
946 987
947 ++samples; 988 ++samples;
948 989
949 switch (origin) { 990 switch (origin) {
950 case PERF_RECORD_MISC_USER: 991 case PERF_RECORD_MISC_USER:
951 ++userspace_samples; 992 ++us_samples;
952 if (hide_user_symbols) 993 if (hide_user_symbols)
953 return; 994 return;
995 kerninfo = kerninfo__findhost(&session->kerninfo_root);
954 break; 996 break;
955 case PERF_RECORD_MISC_KERNEL: 997 case PERF_RECORD_MISC_KERNEL:
998 ++kernel_samples;
956 if (hide_kernel_symbols) 999 if (hide_kernel_symbols)
957 return; 1000 return;
1001 kerninfo = kerninfo__findhost(&session->kerninfo_root);
1002 break;
1003 case PERF_RECORD_MISC_GUEST_KERNEL:
1004 ++guest_kernel_samples;
1005 kerninfo = kerninfo__find(&session->kerninfo_root,
1006 self->ip.pid);
958 break; 1007 break;
1008 case PERF_RECORD_MISC_GUEST_USER:
1009 ++guest_us_samples;
1010 /*
1011 * TODO: we don't process guest user from host side
1012 * except simple counting.
1013 */
1014 return;
959 default: 1015 default:
960 return; 1016 return;
961 } 1017 }
962 1018
1019 if (!kerninfo && perf_guest) {
1020 pr_err("Can't find guest [%d]'s kernel information\n",
1021 self->ip.pid);
1022 return;
1023 }
1024
1025 if (self->header.misc & PERF_RECORD_MISC_EXACT)
1026 exact_samples++;
1027
963 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 || 1028 if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
964 al.filtered) 1029 al.filtered)
965 return; 1030 return;
@@ -976,7 +1041,7 @@ static void event__process_sample(const event_t *self,
976 * --hide-kernel-symbols, even if the user specifies an 1041 * --hide-kernel-symbols, even if the user specifies an
977 * invalid --vmlinux ;-) 1042 * invalid --vmlinux ;-)
978 */ 1043 */
979 if (al.map == session->vmlinux_maps[MAP__FUNCTION] && 1044 if (al.map == kerninfo->vmlinux_maps[MAP__FUNCTION] &&
980 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 1045 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
981 pr_err("The %s file can't be used\n", 1046 pr_err("The %s file can't be used\n",
982 symbol_conf.vmlinux_name); 1047 symbol_conf.vmlinux_name);
@@ -990,7 +1055,17 @@ static void event__process_sample(const event_t *self,
990 if (sym_filter_entry_sched) { 1055 if (sym_filter_entry_sched) {
991 sym_filter_entry = sym_filter_entry_sched; 1056 sym_filter_entry = sym_filter_entry_sched;
992 sym_filter_entry_sched = NULL; 1057 sym_filter_entry_sched = NULL;
993 parse_source(sym_filter_entry); 1058 if (parse_source(sym_filter_entry) < 0) {
1059 struct symbol *sym = sym_entry__symbol(sym_filter_entry);
1060
1061 pr_err("Can't annotate %s", sym->name);
1062 if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) {
1063 pr_err(": No vmlinux file was found in the path:\n");
1064 vmlinux_path__fprintf(stderr);
1065 } else
1066 pr_err(".\n");
1067 exit(1);
1068 }
994 } 1069 }
995 1070
996 syme = symbol__priv(al.sym); 1071 syme = symbol__priv(al.sym);
@@ -1106,16 +1181,21 @@ static void perf_session__mmap_read_counter(struct perf_session *self,
1106 md->prev = old; 1181 md->prev = old;
1107} 1182}
1108 1183
1109static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; 1184static struct pollfd *event_array;
1110static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 1185static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
1111 1186
1112static void perf_session__mmap_read(struct perf_session *self) 1187static void perf_session__mmap_read(struct perf_session *self)
1113{ 1188{
1114 int i, counter; 1189 int i, counter, thread_index;
1115 1190
1116 for (i = 0; i < nr_cpus; i++) { 1191 for (i = 0; i < nr_cpus; i++) {
1117 for (counter = 0; counter < nr_counters; counter++) 1192 for (counter = 0; counter < nr_counters; counter++)
1118 perf_session__mmap_read_counter(self, &mmap_array[i][counter]); 1193 for (thread_index = 0;
1194 thread_index < thread_num;
1195 thread_index++) {
1196 perf_session__mmap_read_counter(self,
1197 &mmap_array[i][counter][thread_index]);
1198 }
1119 } 1199 }
1120} 1200}
1121 1201
@@ -1126,9 +1206,10 @@ static void start_counter(int i, int counter)
1126{ 1206{
1127 struct perf_event_attr *attr; 1207 struct perf_event_attr *attr;
1128 int cpu; 1208 int cpu;
1209 int thread_index;
1129 1210
1130 cpu = profile_cpu; 1211 cpu = profile_cpu;
1131 if (target_pid == -1 && profile_cpu == -1) 1212 if (target_tid == -1 && profile_cpu == -1)
1132 cpu = cpumap[i]; 1213 cpu = cpumap[i];
1133 1214
1134 attr = attrs + counter; 1215 attr = attrs + counter;
@@ -1144,55 +1225,58 @@ static void start_counter(int i, int counter)
1144 attr->inherit = (cpu < 0) && inherit; 1225 attr->inherit = (cpu < 0) && inherit;
1145 attr->mmap = 1; 1226 attr->mmap = 1;
1146 1227
1228 for (thread_index = 0; thread_index < thread_num; thread_index++) {
1147try_again: 1229try_again:
1148 fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); 1230 fd[i][counter][thread_index] = sys_perf_event_open(attr,
1149 1231 all_tids[thread_index], cpu, group_fd, 0);
1150 if (fd[i][counter] < 0) { 1232
1151 int err = errno; 1233 if (fd[i][counter][thread_index] < 0) {
1234 int err = errno;
1235
1236 if (err == EPERM || err == EACCES)
1237 die("No permission - are you root?\n");
1238 /*
1239 * If it's cycles then fall back to hrtimer
1240 * based cpu-clock-tick sw counter, which
1241 * is always available even if no PMU support:
1242 */
1243 if (attr->type == PERF_TYPE_HARDWARE
1244 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
1245
1246 if (verbose)
1247 warning(" ... trying to fall back to cpu-clock-ticks\n");
1248
1249 attr->type = PERF_TYPE_SOFTWARE;
1250 attr->config = PERF_COUNT_SW_CPU_CLOCK;
1251 goto try_again;
1252 }
1253 printf("\n");
1254 error("perfcounter syscall returned with %d (%s)\n",
1255 fd[i][counter][thread_index], strerror(err));
1256 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
1257 exit(-1);
1258 }
1259 assert(fd[i][counter][thread_index] >= 0);
1260 fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK);
1152 1261
1153 if (err == EPERM || err == EACCES)
1154 die("No permission - are you root?\n");
1155 /* 1262 /*
1156 * If it's cycles then fall back to hrtimer 1263 * First counter acts as the group leader:
1157 * based cpu-clock-tick sw counter, which
1158 * is always available even if no PMU support:
1159 */ 1264 */
1160 if (attr->type == PERF_TYPE_HARDWARE 1265 if (group && group_fd == -1)
1161 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1266 group_fd = fd[i][counter][thread_index];
1162 1267
1163 if (verbose) 1268 event_array[nr_poll].fd = fd[i][counter][thread_index];
1164 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1269 event_array[nr_poll].events = POLLIN;
1165 1270 nr_poll++;
1166 attr->type = PERF_TYPE_SOFTWARE; 1271
1167 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1272 mmap_array[i][counter][thread_index].counter = counter;
1168 goto try_again; 1273 mmap_array[i][counter][thread_index].prev = 0;
1169 } 1274 mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1;
1170 printf("\n"); 1275 mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size,
1171 error("perfcounter syscall returned with %d (%s)\n", 1276 PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0);
1172 fd[i][counter], strerror(err)); 1277 if (mmap_array[i][counter][thread_index].base == MAP_FAILED)
1173 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1278 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1174 exit(-1);
1175 } 1279 }
1176 assert(fd[i][counter] >= 0);
1177 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
1178
1179 /*
1180 * First counter acts as the group leader:
1181 */
1182 if (group && group_fd == -1)
1183 group_fd = fd[i][counter];
1184
1185 event_array[nr_poll].fd = fd[i][counter];
1186 event_array[nr_poll].events = POLLIN;
1187 nr_poll++;
1188
1189 mmap_array[i][counter].counter = counter;
1190 mmap_array[i][counter].prev = 0;
1191 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1192 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
1193 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1194 if (mmap_array[i][counter].base == MAP_FAILED)
1195 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1196} 1280}
1197 1281
1198static int __cmd_top(void) 1282static int __cmd_top(void)
@@ -1208,8 +1292,8 @@ static int __cmd_top(void)
1208 if (session == NULL) 1292 if (session == NULL)
1209 return -ENOMEM; 1293 return -ENOMEM;
1210 1294
1211 if (target_pid != -1) 1295 if (target_tid != -1)
1212 event__synthesize_thread(target_pid, event__process, session); 1296 event__synthesize_thread(target_tid, event__process, session);
1213 else 1297 else
1214 event__synthesize_threads(event__process, session); 1298 event__synthesize_threads(event__process, session);
1215 1299
@@ -1220,11 +1304,11 @@ static int __cmd_top(void)
1220 } 1304 }
1221 1305
1222 /* Wait for a minimal set of events before starting the snapshot */ 1306 /* Wait for a minimal set of events before starting the snapshot */
1223 poll(event_array, nr_poll, 100); 1307 poll(&event_array[0], nr_poll, 100);
1224 1308
1225 perf_session__mmap_read(session); 1309 perf_session__mmap_read(session);
1226 1310
1227 if (pthread_create(&thread, NULL, display_thread, NULL)) { 1311 if (pthread_create(&thread, NULL, display_thread, session)) {
1228 printf("Could not create display thread.\n"); 1312 printf("Could not create display thread.\n");
1229 exit(-1); 1313 exit(-1);
1230 } 1314 }
@@ -1263,7 +1347,9 @@ static const struct option options[] = {
1263 OPT_INTEGER('c', "count", &default_interval, 1347 OPT_INTEGER('c', "count", &default_interval,
1264 "event period to sample"), 1348 "event period to sample"),
1265 OPT_INTEGER('p', "pid", &target_pid, 1349 OPT_INTEGER('p', "pid", &target_pid,
1266 "profile events on existing pid"), 1350 "profile events on existing process id"),
1351 OPT_INTEGER('t', "tid", &target_tid,
1352 "profile events on existing thread id"),
1267 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1353 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1268 "system-wide collection from all CPUs"), 1354 "system-wide collection from all CPUs"),
1269 OPT_INTEGER('C', "CPU", &profile_cpu, 1355 OPT_INTEGER('C', "CPU", &profile_cpu,
@@ -1296,7 +1382,7 @@ static const struct option options[] = {
1296 "display this many functions"), 1382 "display this many functions"),
1297 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 1383 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
1298 "hide user symbols"), 1384 "hide user symbols"),
1299 OPT_BOOLEAN('v', "verbose", &verbose, 1385 OPT_INCR('v', "verbose", &verbose,
1300 "be more verbose (show counter open errors, etc)"), 1386 "be more verbose (show counter open errors, etc)"),
1301 OPT_END() 1387 OPT_END()
1302}; 1388};
@@ -1304,6 +1390,7 @@ static const struct option options[] = {
1304int cmd_top(int argc, const char **argv, const char *prefix __used) 1390int cmd_top(int argc, const char **argv, const char *prefix __used)
1305{ 1391{
1306 int counter; 1392 int counter;
1393 int i,j;
1307 1394
1308 page_size = sysconf(_SC_PAGE_SIZE); 1395 page_size = sysconf(_SC_PAGE_SIZE);
1309 1396
@@ -1311,8 +1398,39 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1311 if (argc) 1398 if (argc)
1312 usage_with_options(top_usage, options); 1399 usage_with_options(top_usage, options);
1313 1400
1401 if (target_pid != -1) {
1402 target_tid = target_pid;
1403 thread_num = find_all_tid(target_pid, &all_tids);
1404 if (thread_num <= 0) {
1405 fprintf(stderr, "Can't find all threads of pid %d\n",
1406 target_pid);
1407 usage_with_options(top_usage, options);
1408 }
1409 } else {
1410 all_tids=malloc(sizeof(pid_t));
1411 if (!all_tids)
1412 return -ENOMEM;
1413
1414 all_tids[0] = target_tid;
1415 thread_num = 1;
1416 }
1417
1418 for (i = 0; i < MAX_NR_CPUS; i++) {
1419 for (j = 0; j < MAX_COUNTERS; j++) {
1420 fd[i][j] = malloc(sizeof(int)*thread_num);
1421 mmap_array[i][j] = zalloc(
1422 sizeof(struct mmap_data)*thread_num);
1423 if (!fd[i][j] || !mmap_array[i][j])
1424 return -ENOMEM;
1425 }
1426 }
1427 event_array = malloc(
1428 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num);
1429 if (!event_array)
1430 return -ENOMEM;
1431
1314 /* CPU and PID are mutually exclusive */ 1432 /* CPU and PID are mutually exclusive */
1315 if (target_pid != -1 && profile_cpu != -1) { 1433 if (target_tid > 0 && profile_cpu != -1) {
1316 printf("WARNING: PID switch overriding CPU\n"); 1434 printf("WARNING: PID switch overriding CPU\n");
1317 sleep(1); 1435 sleep(1);
1318 profile_cpu = -1; 1436 profile_cpu = -1;
@@ -1353,7 +1471,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1353 attrs[counter].sample_period = default_interval; 1471 attrs[counter].sample_period = default_interval;
1354 } 1472 }
1355 1473
1356 if (target_pid != -1 || profile_cpu != -1) 1474 if (target_tid != -1 || profile_cpu != -1)
1357 nr_cpus = 1; 1475 nr_cpus = 1;
1358 else 1476 else
1359 nr_cpus = read_cpu_map(); 1477 nr_cpus = read_cpu_map();
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 407041d20de0..2eefb33c9679 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -104,10 +104,23 @@ static int process_sample_event(event_t *event, struct perf_session *session)
104static struct perf_event_ops event_ops = { 104static struct perf_event_ops event_ops = {
105 .sample = process_sample_event, 105 .sample = process_sample_event,
106 .comm = event__process_comm, 106 .comm = event__process_comm,
107 .attr = event__process_attr,
108 .event_type = event__process_event_type,
109 .tracing_data = event__process_tracing_data,
110 .build_id = event__process_build_id,
107}; 111};
108 112
113extern volatile int session_done;
114
115static void sig_handler(int sig __unused)
116{
117 session_done = 1;
118}
119
109static int __cmd_trace(struct perf_session *session) 120static int __cmd_trace(struct perf_session *session)
110{ 121{
122 signal(SIGINT, sig_handler);
123
111 return perf_session__process_events(session, &event_ops); 124 return perf_session__process_events(session, &event_ops);
112} 125}
113 126
@@ -505,7 +518,7 @@ static const char * const trace_usage[] = {
505static const struct option options[] = { 518static const struct option options[] = {
506 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 519 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
507 "dump raw trace in ASCII"), 520 "dump raw trace in ASCII"),
508 OPT_BOOLEAN('v', "verbose", &verbose, 521 OPT_INCR('v', "verbose", &verbose,
509 "be more verbose (show symbol address, etc)"), 522 "be more verbose (show symbol address, etc)"),
510 OPT_BOOLEAN('L', "Latency", &latency_format, 523 OPT_BOOLEAN('L', "Latency", &latency_format,
511 "show latency attributes (irqs/preemption disabled, etc)"), 524 "show latency attributes (irqs/preemption disabled, etc)"),
@@ -548,6 +561,65 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
548 suffix = REPORT_SUFFIX; 561 suffix = REPORT_SUFFIX;
549 } 562 }
550 563
564 if (!suffix && argc >= 2 && strncmp(argv[1], "-", strlen("-")) != 0) {
565 char *record_script_path, *report_script_path;
566 int live_pipe[2];
567 pid_t pid;
568
569 record_script_path = get_script_path(argv[1], RECORD_SUFFIX);
570 if (!record_script_path) {
571 fprintf(stderr, "record script not found\n");
572 return -1;
573 }
574
575 report_script_path = get_script_path(argv[1], REPORT_SUFFIX);
576 if (!report_script_path) {
577 fprintf(stderr, "report script not found\n");
578 return -1;
579 }
580
581 if (pipe(live_pipe) < 0) {
582 perror("failed to create pipe");
583 exit(-1);
584 }
585
586 pid = fork();
587 if (pid < 0) {
588 perror("failed to fork");
589 exit(-1);
590 }
591
592 if (!pid) {
593 dup2(live_pipe[1], 1);
594 close(live_pipe[0]);
595
596 __argv = malloc(5 * sizeof(const char *));
597 __argv[0] = "/bin/sh";
598 __argv[1] = record_script_path;
599 __argv[2] = "-o";
600 __argv[3] = "-";
601 __argv[4] = NULL;
602
603 execvp("/bin/sh", (char **)__argv);
604 exit(-1);
605 }
606
607 dup2(live_pipe[0], 0);
608 close(live_pipe[1]);
609
610 __argv = malloc((argc + 3) * sizeof(const char *));
611 __argv[0] = "/bin/sh";
612 __argv[1] = report_script_path;
613 for (i = 2; i < argc; i++)
614 __argv[i] = argv[i];
615 __argv[i++] = "-i";
616 __argv[i++] = "-";
617 __argv[i++] = NULL;
618
619 execvp("/bin/sh", (char **)__argv);
620 exit(-1);
621 }
622
551 if (suffix) { 623 if (suffix) {
552 script_path = get_script_path(argv[2], suffix); 624 script_path = get_script_path(argv[2], suffix);
553 if (!script_path) { 625 if (!script_path) {
@@ -580,7 +652,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
580 if (session == NULL) 652 if (session == NULL)
581 return -ENOMEM; 653 return -ENOMEM;
582 654
583 if (!perf_session__has_traces(session, "record -R")) 655 if (strcmp(input_name, "-") &&
656 !perf_session__has_traces(session, "record -R"))
584 return -EINVAL; 657 return -EINVAL;
585 658
586 if (generate_script_lang) { 659 if (generate_script_lang) {
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 10fe49e7048a..ab28bca92e52 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -32,5 +32,6 @@ extern int cmd_version(int argc, const char **argv, const char *prefix);
32extern int cmd_probe(int argc, const char **argv, const char *prefix); 32extern int cmd_probe(int argc, const char **argv, const char *prefix);
33extern int cmd_kmem(int argc, const char **argv, const char *prefix); 33extern int cmd_kmem(int argc, const char **argv, const char *prefix);
34extern int cmd_lock(int argc, const char **argv, const char *prefix); 34extern int cmd_lock(int argc, const char **argv, const char *prefix);
35extern int cmd_kvm(int argc, const char **argv, const char *prefix);
35 36
36#endif 37#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index db6ee94d4a8e..2a1162d413a8 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -19,3 +19,4 @@ perf-trace mainporcelain common
19perf-probe mainporcelain common 19perf-probe mainporcelain common
20perf-kmem mainporcelain common 20perf-kmem mainporcelain common
21perf-lock mainporcelain common 21perf-lock mainporcelain common
22perf-kvm mainporcelain common
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 910468e6e01c..2e7a4f417e20 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -30,4 +30,7 @@ done
30 30
31tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST 31tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST
32rm -f $MANIFEST $BUILDIDS 32rm -f $MANIFEST $BUILDIDS
33echo -e "Now please run:\n"
34echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
35echo "wherever you need to run 'perf report' on."
33exit 0 36exit 0
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index cd32c200cdb3..985cdb4bd005 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -13,9 +13,10 @@
13#include "util/quote.h" 13#include "util/quote.h"
14#include "util/run-command.h" 14#include "util/run-command.h"
15#include "util/parse-events.h" 15#include "util/parse-events.h"
16#include "util/string.h"
17#include "util/debugfs.h" 16#include "util/debugfs.h"
18 17
18bool use_browser;
19
19const char perf_usage_string[] = 20const char perf_usage_string[] =
20 "perf [--version] [--help] COMMAND [ARGS]"; 21 "perf [--version] [--help] COMMAND [ARGS]";
21 22
@@ -262,6 +263,8 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
262 set_debugfs_path(); 263 set_debugfs_path();
263 264
264 status = p->fn(argc, argv, prefix); 265 status = p->fn(argc, argv, prefix);
266 exit_browser(status);
267
265 if (status) 268 if (status)
266 return status & 0xff; 269 return status & 0xff;
267 270
@@ -304,6 +307,7 @@ static void handle_internal_command(int argc, const char **argv)
304 { "probe", cmd_probe, 0 }, 307 { "probe", cmd_probe, 0 },
305 { "kmem", cmd_kmem, 0 }, 308 { "kmem", cmd_kmem, 0 },
306 { "lock", cmd_lock, 0 }, 309 { "lock", cmd_lock, 0 },
310 { "kvm", cmd_kvm, 0 },
307 }; 311 };
308 unsigned int i; 312 unsigned int i;
309 static const char ext[] = STRIP_EXTENSION; 313 static const char ext[] = STRIP_EXTENSION;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 6fb379bc1d1f..02821febb704 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,6 +1,10 @@
1#ifndef _PERF_PERF_H 1#ifndef _PERF_PERF_H
2#define _PERF_PERF_H 2#define _PERF_PERF_H
3 3
4struct winsize;
5
6void get_term_dimensions(struct winsize *ws);
7
4#if defined(__i386__) 8#if defined(__i386__)
5#include "../../arch/x86/include/asm/unistd.h" 9#include "../../arch/x86/include/asm/unistd.h"
6#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") 10#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
@@ -102,8 +106,6 @@ static inline unsigned long long rdclock(void)
102#define __user 106#define __user
103#define asmlinkage 107#define asmlinkage
104 108
105#define __used __attribute__((__unused__))
106
107#define unlikely(x) __builtin_expect(!!(x), 0) 109#define unlikely(x) __builtin_expect(!!(x), 0)
108#define min(x, y) ({ \ 110#define min(x, y) ({ \
109 typeof(x) _min1 = (x); \ 111 typeof(x) _min1 = (x); \
@@ -129,4 +131,6 @@ struct ip_callchain {
129 u64 ips[0]; 131 u64 ips[0];
130}; 132};
131 133
134extern int perf_host, perf_guest;
135
132#endif 136#endif
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
index f869c48dc9b0..d94b40c8ac85 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -15,6 +15,7 @@ our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
15 15
16our @EXPORT = qw( 16our @EXPORT = qw(
17avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs 17avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
18clear_term
18); 19);
19 20
20our $VERSION = '0.01'; 21our $VERSION = '0.01';
@@ -55,6 +56,11 @@ sub nsecs_str {
55 return $str; 56 return $str;
56} 57}
57 58
59sub clear_term
60{
61 print "\x1b[H\x1b[2J";
62}
63
581; 641;
59__END__ 65__END__
60=head1 NAME 66=head1 NAME
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record
index f8885d389e6f..6ad9b8f5f009 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-record
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit 2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
index 8bfc660e5056..f6346082a8fc 100644
--- a/tools/perf/scripts/perl/bin/failed-syscalls-report
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -1,4 +1,10 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide failed syscalls 2# description: system-wide failed syscalls
3# args: [comm] 3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $1 4if [ $# -gt 0 ] ; then
5 if ! expr match "$1" "-" ; then
6 comm=$1
7 shift
8 fi
9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record
index b25056ebf963..a828679837a8 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-record
+++ b/tools/perf/scripts/perl/bin/rw-by-file-record
@@ -1,2 +1,3 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write 2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
3
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
index eddb9ccce6a5..d83070b7eeb5 100644
--- a/tools/perf/scripts/perl/bin/rw-by-file-report
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -1,7 +1,13 @@
1#!/bin/bash 1#!/bin/bash
2# description: r/w activity for a program, by file 2# description: r/w activity for a program, by file
3# args: <comm> 3# args: <comm>
4perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $1 4if [ $# -lt 1 ] ; then
5 echo "usage: rw-by-file <comm>"
6 exit
7fi
8comm=$1
9shift
10perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl $comm
5 11
6 12
7 13
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record
index 8903979c5b6c..63976bf11e8b 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-record
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write 2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
index 7f44c25cc857..7ef46983f62f 100644
--- a/tools/perf/scripts/perl/bin/rw-by-pid-report
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide r/w activity 2# description: system-wide r/w activity
3perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl 3perf trace $@ -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/perl/bin/rwtop-record b/tools/perf/scripts/perl/bin/rwtop-record
new file mode 100644
index 000000000000..63976bf11e8b
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rwtop-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
new file mode 100644
index 000000000000..93e698cd3f38
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -0,0 +1,23 @@
1#!/bin/bash
2# description: system-wide r/w top
3# args: [interval]
4n_args=0
5for i in "$@"
6do
7 if expr match "$i" "-" > /dev/null ; then
8 break
9 fi
10 n_args=$(( $n_args + 1 ))
11done
12if [ "$n_args" -gt 1 ] ; then
13 echo "usage: rwtop-report [interval]"
14 exit
15fi
16if [ "$n_args" -gt 0 ] ; then
17 interval=$1
18 shift
19fi
20perf trace $@ -s ~/libexec/perf-core/scripts/perl/rwtop.pl $interval
21
22
23
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record
index 6abedda911a4..9c0cf588ff8c 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-record
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-record
@@ -1,5 +1,5 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup 2perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@
3 3
4 4
5 5
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
index fce3adcb3249..a0d898f9ca1d 100644
--- a/tools/perf/scripts/perl/bin/wakeup-latency-report
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide min/max/avg wakeup latency 2# description: system-wide min/max/avg wakeup latency
3perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl 3perf trace $@ -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record
index fce6637b19ba..c2a1a9421133 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-record
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion 2perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@
diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report
index 71cfbd182fb9..35081132ef97 100644
--- a/tools/perf/scripts/perl/bin/workqueue-stats-report
+++ b/tools/perf/scripts/perl/bin/workqueue-stats-report
@@ -1,6 +1,6 @@
1#!/bin/bash 1#!/bin/bash
2# description: workqueue stats (ins/exe/create/destroy) 2# description: workqueue stats (ins/exe/create/destroy)
3perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl 3perf trace $@ -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl
4 4
5 5
6 6
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
new file mode 100644
index 000000000000..ec2ab49a6f25
--- /dev/null
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -0,0 +1,177 @@
1#!/usr/bin/perl -w
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4
5# read/write top
6#
7# Periodically displays system-wide r/w call activity, broken down by
8# pid. If an [interval] arg is specified, the display will be
9# refreshed every [interval] seconds. The default interval is 3
10# seconds.
11
12use 5.010000;
13use strict;
14use warnings;
15
16use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
17use lib "./Perf-Trace-Util/lib";
18use Perf::Trace::Core;
19use Perf::Trace::Util;
20
21my $default_interval = 3;
22my $nlines = 20;
23my $print_thread;
24
25my %reads;
26my %writes;
27
28my $interval = shift;
29if (!$interval) {
30 $interval = $default_interval;
31}
32
33sub syscalls::sys_exit_read
34{
35 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
36 $common_pid, $common_comm,
37 $nr, $ret) = @_;
38
39 if ($ret > 0) {
40 $reads{$common_pid}{bytes_read} += $ret;
41 } else {
42 if (!defined ($reads{$common_pid}{bytes_read})) {
43 $reads{$common_pid}{bytes_read} = 0;
44 }
45 $reads{$common_pid}{errors}{$ret}++;
46 }
47}
48
49sub syscalls::sys_enter_read
50{
51 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
52 $common_pid, $common_comm,
53 $nr, $fd, $buf, $count) = @_;
54
55 $reads{$common_pid}{bytes_requested} += $count;
56 $reads{$common_pid}{total_reads}++;
57 $reads{$common_pid}{comm} = $common_comm;
58}
59
60sub syscalls::sys_exit_write
61{
62 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
63 $common_pid, $common_comm,
64 $nr, $ret) = @_;
65
66 if ($ret <= 0) {
67 $writes{$common_pid}{errors}{$ret}++;
68 }
69}
70
71sub syscalls::sys_enter_write
72{
73 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
74 $common_pid, $common_comm,
75 $nr, $fd, $buf, $count) = @_;
76
77 $writes{$common_pid}{bytes_written} += $count;
78 $writes{$common_pid}{total_writes}++;
79 $writes{$common_pid}{comm} = $common_comm;
80}
81
82sub trace_begin
83{
84 $SIG{ALRM} = \&print_totals;
85 alarm 1;
86}
87
88sub trace_end
89{
90 print_unhandled();
91 print_totals();
92}
93
94sub print_totals
95{
96 my $count;
97
98 $count = 0;
99
100 clear_term();
101
102 printf("\nread counts by pid:\n\n");
103
104 printf("%6s %20s %10s %10s %10s\n", "pid", "comm",
105 "# reads", "bytes_req", "bytes_read");
106 printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------",
107 "----------", "----------", "----------");
108
109 foreach my $pid (sort {$reads{$b}{bytes_read} <=>
110 $reads{$a}{bytes_read}} keys %reads) {
111 my $comm = $reads{$pid}{comm};
112 my $total_reads = $reads{$pid}{total_reads};
113 my $bytes_requested = $reads{$pid}{bytes_requested};
114 my $bytes_read = $reads{$pid}{bytes_read};
115
116 printf("%6s %-20s %10s %10s %10s\n", $pid, $comm,
117 $total_reads, $bytes_requested, $bytes_read);
118
119 if (++$count == $nlines) {
120 last;
121 }
122 }
123
124 $count = 0;
125
126 printf("\nwrite counts by pid:\n\n");
127
128 printf("%6s %20s %10s %13s\n", "pid", "comm",
129 "# writes", "bytes_written");
130 printf("%6s %-20s %10s %13s\n", "------", "--------------------",
131 "----------", "-------------");
132
133 foreach my $pid (sort {$writes{$b}{bytes_written} <=>
134 $writes{$a}{bytes_written}} keys %writes) {
135 my $comm = $writes{$pid}{comm};
136 my $total_writes = $writes{$pid}{total_writes};
137 my $bytes_written = $writes{$pid}{bytes_written};
138
139 printf("%6s %-20s %10s %13s\n", $pid, $comm,
140 $total_writes, $bytes_written);
141
142 if (++$count == $nlines) {
143 last;
144 }
145 }
146
147 %reads = ();
148 %writes = ();
149 alarm $interval;
150}
151
152my %unhandled;
153
154sub print_unhandled
155{
156 if ((scalar keys %unhandled) == 0) {
157 return;
158 }
159
160 print "\nunhandled events:\n\n";
161
162 printf("%-40s %10s\n", "event", "count");
163 printf("%-40s %10s\n", "----------------------------------------",
164 "-----------");
165
166 foreach my $event_name (keys %unhandled) {
167 printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
168 }
169}
170
171sub trace_unhandled
172{
173 my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
174 $common_pid, $common_comm) = @_;
175
176 $unhandled{$event_name}++;
177}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index 83e91435ed09..9689bc0acd9f 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -23,3 +23,6 @@ def nsecs_nsecs(nsecs):
23def nsecs_str(nsecs): 23def nsecs_str(nsecs):
24 str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)), 24 str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
25 return str 25 return str
26
27def clear_term():
28 print("\x1b[H\x1b[2J")
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
index f8885d389e6f..6ad9b8f5f009 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit 2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
index 1e0c0a860c87..8c128eff9c0a 100644
--- a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -1,4 +1,10 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide failed syscalls, by pid 2# description: system-wide failed syscalls, by pid
3# args: [comm] 3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $1 4if [ $# -gt 0 ] ; then
5 if ! expr match "$1" "-" ; then
6 comm=$1
7 shift
8 fi
9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/sctop-record b/tools/perf/scripts/python/bin/sctop-record
new file mode 100644
index 000000000000..27ccffa26ab4
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sctop-record
@@ -0,0 +1,2 @@
1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
new file mode 100644
index 000000000000..b01c842ae7b4
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -0,0 +1,24 @@
1#!/bin/bash
2# description: syscall top
3# args: [comm] [interval]
4n_args=0
5for i in "$@"
6do
7 if expr match "$i" "-" > /dev/null ; then
8 break
9 fi
10 n_args=$(( $n_args + 1 ))
11done
12if [ "$n_args" -gt 2 ] ; then
13 echo "usage: sctop-report [comm] [interval]"
14 exit
15fi
16if [ "$n_args" -gt 1 ] ; then
17 comm=$1
18 interval=$2
19 shift 2
20elif [ "$n_args" -gt 0 ] ; then
21 interval=$1
22 shift
23fi
24perf trace $@ -s ~/libexec/perf-core/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
index 45a8c50359da..27ccffa26ab4 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter 2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
index f8044d192271..c53362e48602 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -1,4 +1,10 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide syscall counts, by pid 2# description: system-wide syscall counts, by pid
3# args: [comm] 3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $1 4if [ $# -gt 0 ] ; then
5 if ! expr match "$1" "-" ; then
6 comm=$1
7 shift
8 fi
9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record
index 45a8c50359da..27ccffa26ab4 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-record
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -1,2 +1,2 @@
1#!/bin/bash 1#!/bin/bash
2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter 2perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
index a366aa61612f..8c21552b3cdc 100644
--- a/tools/perf/scripts/python/bin/syscall-counts-report
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -1,4 +1,10 @@
1#!/bin/bash 1#!/bin/bash
2# description: system-wide syscall counts 2# description: system-wide syscall counts
3# args: [comm] 3# args: [comm]
4perf trace -s ~/libexec/perf-core/scripts/python/syscall-counts.py $1 4if [ $# -gt 0 ] ; then
5 if ! expr match "$1" "-" ; then
6 comm=$1
7 shift
8 fi
9fi
10perf trace $@ -s ~/libexec/perf-core/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
new file mode 100644
index 000000000000..6cafad40c296
--- /dev/null
+++ b/tools/perf/scripts/python/sctop.py
@@ -0,0 +1,78 @@
1# system call top
2# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
3# Licensed under the terms of the GNU GPL License version 2
4#
5# Periodically displays system-wide system call totals, broken down by
6# syscall. If a [comm] arg is specified, only syscalls called by
7# [comm] are displayed. If an [interval] arg is specified, the display
8# will be refreshed every [interval] seconds. The default interval is
9# 3 seconds.
10
11import thread
12import time
13import os
14import sys
15
16sys.path.append(os.environ['PERF_EXEC_PATH'] + \
17 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
18
19from perf_trace_context import *
20from Core import *
21from Util import *
22
23usage = "perf trace -s syscall-counts.py [comm] [interval]\n";
24
25for_comm = None
26default_interval = 3
27interval = default_interval
28
29if len(sys.argv) > 3:
30 sys.exit(usage)
31
32if len(sys.argv) > 2:
33 for_comm = sys.argv[1]
34 interval = int(sys.argv[2])
35elif len(sys.argv) > 1:
36 try:
37 interval = int(sys.argv[1])
38 except ValueError:
39 for_comm = sys.argv[1]
40 interval = default_interval
41
42syscalls = autodict()
43
44def trace_begin():
45 thread.start_new_thread(print_syscall_totals, (interval,))
46 pass
47
48def raw_syscalls__sys_enter(event_name, context, common_cpu,
49 common_secs, common_nsecs, common_pid, common_comm,
50 id, args):
51 if for_comm is not None:
52 if common_comm != for_comm:
53 return
54 try:
55 syscalls[id] += 1
56 except TypeError:
57 syscalls[id] = 1
58
59def print_syscall_totals(interval):
60 while 1:
61 clear_term()
62 if for_comm is not None:
63 print "\nsyscall events for %s:\n\n" % (for_comm),
64 else:
65 print "\nsyscall events:\n\n",
66
67 print "%-40s %10s\n" % ("event", "count"),
68 print "%-40s %10s\n" % ("----------------------------------------", \
69 "----------"),
70
71 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
72 reverse = True):
73 try:
74 print "%-40d %10d\n" % (id, val),
75 except TypeError:
76 pass
77 syscalls.clear()
78 time.sleep(interval)
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 54552a00a117..49ece7921914 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -1,6 +1,10 @@
1#!/bin/sh 1#!/bin/sh
2 2
3GVF=PERF-VERSION-FILE 3if [ $# -eq 1 ] ; then
4 OUTPUT=$1
5fi
6
7GVF=${OUTPUT}PERF-VERSION-FILE
4DEF_VER=v0.0.2.PERF 8DEF_VER=v0.0.2.PERF
5 9
6LF=' 10LF='
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 04904b35ba81..0f60a3906808 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -24,7 +24,7 @@ static int build_id__mark_dso_hit(event_t *event, struct perf_session *session)
24 } 24 }
25 25
26 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION, 26 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
27 event->ip.ip, &al); 27 event->ip.pid, event->ip.ip, &al);
28 28
29 if (al.map != NULL) 29 if (al.map != NULL)
30 al.map->dso->hit = 1; 30 al.map->dso->hit = 1;
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 918eb376abe3..4b9aab7f0405 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -1,6 +1,7 @@
1#ifndef __PERF_CACHE_H 1#ifndef __PERF_CACHE_H
2#define __PERF_CACHE_H 2#define __PERF_CACHE_H
3 3
4#include <stdbool.h>
4#include "util.h" 5#include "util.h"
5#include "strbuf.h" 6#include "strbuf.h"
6#include "../perf.h" 7#include "../perf.h"
@@ -69,6 +70,19 @@ extern const char *pager_program;
69extern int pager_in_use(void); 70extern int pager_in_use(void);
70extern int pager_use_color; 71extern int pager_use_color;
71 72
73extern bool use_browser;
74
75#ifdef NO_NEWT_SUPPORT
76static inline void setup_browser(void)
77{
78 setup_pager();
79}
80static inline void exit_browser(bool wait_for_ok __used) {}
81#else
82void setup_browser(void);
83void exit_browser(bool wait_for_ok);
84#endif
85
72extern const char *editor_program; 86extern const char *editor_program;
73extern const char *excludes_file; 87extern const char *excludes_file;
74 88
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index b3b71258272a..db628af6d20d 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> 2 * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com>
3 * 3 *
4 * Handle the callchains from the stream in an ad-hoc radix tree and then 4 * Handle the callchains from the stream in an ad-hoc radix tree and then
5 * sort them in an rbtree. 5 * sort them in an rbtree.
@@ -183,12 +183,23 @@ create_child(struct callchain_node *parent, bool inherit_children)
183 return new; 183 return new;
184} 184}
185 185
186
187struct resolved_ip {
188 u64 ip;
189 struct map_symbol ms;
190};
191
192struct resolved_chain {
193 u64 nr;
194 struct resolved_ip ips[0];
195};
196
197
186/* 198/*
187 * Fill the node with callchain values 199 * Fill the node with callchain values
188 */ 200 */
189static void 201static void
190fill_node(struct callchain_node *node, struct ip_callchain *chain, 202fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
191 int start, struct symbol **syms)
192{ 203{
193 unsigned int i; 204 unsigned int i;
194 205
@@ -200,8 +211,8 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain,
200 perror("not enough memory for the code path tree"); 211 perror("not enough memory for the code path tree");
201 return; 212 return;
202 } 213 }
203 call->ip = chain->ips[i]; 214 call->ip = chain->ips[i].ip;
204 call->sym = syms[i]; 215 call->ms = chain->ips[i].ms;
205 list_add_tail(&call->list, &node->val); 216 list_add_tail(&call->list, &node->val);
206 } 217 }
207 node->val_nr = chain->nr - start; 218 node->val_nr = chain->nr - start;
@@ -210,13 +221,13 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain,
210} 221}
211 222
212static void 223static void
213add_child(struct callchain_node *parent, struct ip_callchain *chain, 224add_child(struct callchain_node *parent, struct resolved_chain *chain,
214 int start, struct symbol **syms) 225 int start)
215{ 226{
216 struct callchain_node *new; 227 struct callchain_node *new;
217 228
218 new = create_child(parent, false); 229 new = create_child(parent, false);
219 fill_node(new, chain, start, syms); 230 fill_node(new, chain, start);
220 231
221 new->children_hit = 0; 232 new->children_hit = 0;
222 new->hit = 1; 233 new->hit = 1;
@@ -228,9 +239,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
228 * Then create another child to host the given callchain of new branch 239 * Then create another child to host the given callchain of new branch
229 */ 240 */
230static void 241static void
231split_add_child(struct callchain_node *parent, struct ip_callchain *chain, 242split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
232 struct callchain_list *to_split, int idx_parents, int idx_local, 243 struct callchain_list *to_split, int idx_parents, int idx_local)
233 struct symbol **syms)
234{ 244{
235 struct callchain_node *new; 245 struct callchain_node *new;
236 struct list_head *old_tail; 246 struct list_head *old_tail;
@@ -257,7 +267,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
257 /* create a new child for the new branch if any */ 267 /* create a new child for the new branch if any */
258 if (idx_total < chain->nr) { 268 if (idx_total < chain->nr) {
259 parent->hit = 0; 269 parent->hit = 0;
260 add_child(parent, chain, idx_total, syms); 270 add_child(parent, chain, idx_total);
261 parent->children_hit++; 271 parent->children_hit++;
262 } else { 272 } else {
263 parent->hit = 1; 273 parent->hit = 1;
@@ -265,32 +275,33 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
265} 275}
266 276
267static int 277static int
268__append_chain(struct callchain_node *root, struct ip_callchain *chain, 278__append_chain(struct callchain_node *root, struct resolved_chain *chain,
269 unsigned int start, struct symbol **syms); 279 unsigned int start);
270 280
271static void 281static void
272__append_chain_children(struct callchain_node *root, struct ip_callchain *chain, 282__append_chain_children(struct callchain_node *root,
273 struct symbol **syms, unsigned int start) 283 struct resolved_chain *chain,
284 unsigned int start)
274{ 285{
275 struct callchain_node *rnode; 286 struct callchain_node *rnode;
276 287
277 /* lookup in childrens */ 288 /* lookup in childrens */
278 chain_for_each_child(rnode, root) { 289 chain_for_each_child(rnode, root) {
279 unsigned int ret = __append_chain(rnode, chain, start, syms); 290 unsigned int ret = __append_chain(rnode, chain, start);
280 291
281 if (!ret) 292 if (!ret)
282 goto inc_children_hit; 293 goto inc_children_hit;
283 } 294 }
284 /* nothing in children, add to the current node */ 295 /* nothing in children, add to the current node */
285 add_child(root, chain, start, syms); 296 add_child(root, chain, start);
286 297
287inc_children_hit: 298inc_children_hit:
288 root->children_hit++; 299 root->children_hit++;
289} 300}
290 301
291static int 302static int
292__append_chain(struct callchain_node *root, struct ip_callchain *chain, 303__append_chain(struct callchain_node *root, struct resolved_chain *chain,
293 unsigned int start, struct symbol **syms) 304 unsigned int start)
294{ 305{
295 struct callchain_list *cnode; 306 struct callchain_list *cnode;
296 unsigned int i = start; 307 unsigned int i = start;
@@ -302,13 +313,19 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
302 * anywhere inside a function. 313 * anywhere inside a function.
303 */ 314 */
304 list_for_each_entry(cnode, &root->val, list) { 315 list_for_each_entry(cnode, &root->val, list) {
316 struct symbol *sym;
317
305 if (i == chain->nr) 318 if (i == chain->nr)
306 break; 319 break;
307 if (cnode->sym && syms[i]) { 320
308 if (cnode->sym->start != syms[i]->start) 321 sym = chain->ips[i].ms.sym;
322
323 if (cnode->ms.sym && sym) {
324 if (cnode->ms.sym->start != sym->start)
309 break; 325 break;
310 } else if (cnode->ip != chain->ips[i]) 326 } else if (cnode->ip != chain->ips[i].ip)
311 break; 327 break;
328
312 if (!found) 329 if (!found)
313 found = true; 330 found = true;
314 i++; 331 i++;
@@ -320,7 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
320 337
321 /* we match only a part of the node. Split it and add the new chain */ 338 /* we match only a part of the node. Split it and add the new chain */
322 if (i - start < root->val_nr) { 339 if (i - start < root->val_nr) {
323 split_add_child(root, chain, cnode, start, i - start, syms); 340 split_add_child(root, chain, cnode, start, i - start);
324 return 0; 341 return 0;
325 } 342 }
326 343
@@ -331,15 +348,50 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
331 } 348 }
332 349
333 /* We match the node and still have a part remaining */ 350 /* We match the node and still have a part remaining */
334 __append_chain_children(root, chain, syms, i); 351 __append_chain_children(root, chain, i);
335 352
336 return 0; 353 return 0;
337} 354}
338 355
339void append_chain(struct callchain_node *root, struct ip_callchain *chain, 356static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
340 struct symbol **syms) 357 struct map_symbol *syms)
358{
359 int i, j = 0;
360
361 for (i = 0; i < (int)old->nr; i++) {
362 if (old->ips[i] >= PERF_CONTEXT_MAX)
363 continue;
364
365 new->ips[j].ip = old->ips[i];
366 new->ips[j].ms = syms[i];
367 j++;
368 }
369
370 new->nr = j;
371}
372
373
374int append_chain(struct callchain_node *root, struct ip_callchain *chain,
375 struct map_symbol *syms)
341{ 376{
377 struct resolved_chain *filtered;
378
342 if (!chain->nr) 379 if (!chain->nr)
343 return; 380 return 0;
344 __append_chain_children(root, chain, syms, 0); 381
382 filtered = malloc(sizeof(*filtered) +
383 chain->nr * sizeof(struct resolved_ip));
384 if (!filtered)
385 return -ENOMEM;
386
387 filter_context(chain, filtered, syms);
388
389 if (!filtered->nr)
390 goto end;
391
392 __append_chain_children(root, filtered, 0);
393end:
394 free(filtered);
395
396 return 0;
345} 397}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index ad4626de4c2b..8a7e8bbd0fda 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -39,7 +39,7 @@ struct callchain_param {
39 39
40struct callchain_list { 40struct callchain_list {
41 u64 ip; 41 u64 ip;
42 struct symbol *sym; 42 struct map_symbol ms;
43 struct list_head list; 43 struct list_head list;
44}; 44};
45 45
@@ -56,6 +56,6 @@ static inline u64 cumul_hits(struct callchain_node *node)
56} 56}
57 57
58int register_callchain_param(struct callchain_param *param); 58int register_callchain_param(struct callchain_param *param);
59void append_chain(struct callchain_node *root, struct ip_callchain *chain, 59int append_chain(struct callchain_node *root, struct ip_callchain *chain,
60 struct symbol **syms); 60 struct map_symbol *syms);
61#endif /* __PERF_CALLCHAIN_H */ 61#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index e88bca55a599..e191eb9a667f 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -166,6 +166,31 @@ int perf_color_default_config(const char *var, const char *value, void *cb)
166 return perf_default_config(var, value, cb); 166 return perf_default_config(var, value, cb);
167} 167}
168 168
169static int __color_vsnprintf(char *bf, size_t size, const char *color,
170 const char *fmt, va_list args, const char *trail)
171{
172 int r = 0;
173
174 /*
175 * Auto-detect:
176 */
177 if (perf_use_color_default < 0) {
178 if (isatty(1) || pager_in_use())
179 perf_use_color_default = 1;
180 else
181 perf_use_color_default = 0;
182 }
183
184 if (perf_use_color_default && *color)
185 r += snprintf(bf, size, "%s", color);
186 r += vsnprintf(bf + r, size - r, fmt, args);
187 if (perf_use_color_default && *color)
188 r += snprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
189 if (trail)
190 r += snprintf(bf + r, size - r, "%s", trail);
191 return r;
192}
193
169static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, 194static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
170 va_list args, const char *trail) 195 va_list args, const char *trail)
171{ 196{
@@ -191,11 +216,28 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
191 return r; 216 return r;
192} 217}
193 218
219int color_vsnprintf(char *bf, size_t size, const char *color,
220 const char *fmt, va_list args)
221{
222 return __color_vsnprintf(bf, size, color, fmt, args, NULL);
223}
224
194int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) 225int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
195{ 226{
196 return __color_vfprintf(fp, color, fmt, args, NULL); 227 return __color_vfprintf(fp, color, fmt, args, NULL);
197} 228}
198 229
230int color_snprintf(char *bf, size_t size, const char *color,
231 const char *fmt, ...)
232{
233 va_list args;
234 int r;
235
236 va_start(args, fmt);
237 r = color_vsnprintf(bf, size, color, fmt, args);
238 va_end(args);
239 return r;
240}
199 241
200int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) 242int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
201{ 243{
@@ -274,3 +316,9 @@ int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
274 316
275 return r; 317 return r;
276} 318}
319
320int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent)
321{
322 const char *color = get_percent_color(percent);
323 return color_snprintf(bf, size, color, fmt, percent);
324}
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 24e8809210bb..dea082b79602 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -32,10 +32,14 @@ int perf_color_default_config(const char *var, const char *value, void *cb);
32int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); 32int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
33void color_parse(const char *value, const char *var, char *dst); 33void color_parse(const char *value, const char *var, char *dst);
34void color_parse_mem(const char *value, int len, const char *var, char *dst); 34void color_parse_mem(const char *value, int len, const char *var, char *dst);
35int color_vsnprintf(char *bf, size_t size, const char *color,
36 const char *fmt, va_list args);
35int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); 37int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
36int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); 38int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
39int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
37int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); 40int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...);
38int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); 41int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
42int percent_color_snprintf(char *bf, size_t size, const char *fmt, double percent);
39int percent_color_fprintf(FILE *fp, const char *fmt, double percent); 43int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
40const char *get_percent_color(double percent); 44const char *get_percent_color(double percent);
41 45
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 0905600c3851..dd824cf3b628 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -6,13 +6,14 @@
6#include <stdarg.h> 6#include <stdarg.h>
7#include <stdio.h> 7#include <stdio.h>
8 8
9#include "cache.h"
9#include "color.h" 10#include "color.h"
10#include "event.h" 11#include "event.h"
11#include "debug.h" 12#include "debug.h"
12#include "util.h" 13#include "util.h"
13 14
14int verbose = 0; 15int verbose = 0;
15int dump_trace = 0; 16bool dump_trace = false;
16 17
17int eprintf(int level, const char *fmt, ...) 18int eprintf(int level, const char *fmt, ...)
18{ 19{
@@ -21,7 +22,10 @@ int eprintf(int level, const char *fmt, ...)
21 22
22 if (verbose >= level) { 23 if (verbose >= level) {
23 va_start(args, fmt); 24 va_start(args, fmt);
24 ret = vfprintf(stderr, fmt, args); 25 if (use_browser)
26 ret = browser__show_help(fmt, args);
27 else
28 ret = vfprintf(stderr, fmt, args);
25 va_end(args); 29 va_end(args);
26 } 30 }
27 31
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index c6c24c522dea..047ac3324ebe 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -2,14 +2,38 @@
2#ifndef __PERF_DEBUG_H 2#ifndef __PERF_DEBUG_H
3#define __PERF_DEBUG_H 3#define __PERF_DEBUG_H
4 4
5#include <stdbool.h>
5#include "event.h" 6#include "event.h"
6 7
7extern int verbose; 8extern int verbose;
8extern int dump_trace; 9extern bool dump_trace;
9 10
10int eprintf(int level,
11 const char *fmt, ...) __attribute__((format(printf, 2, 3)));
12int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); 11int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
13void trace_event(event_t *event); 12void trace_event(event_t *event);
14 13
14struct ui_progress;
15
16#ifdef NO_NEWT_SUPPORT
17static inline int browser__show_help(const char *format __used, va_list ap __used)
18{
19 return 0;
20}
21
22static inline struct ui_progress *ui_progress__new(const char *title __used,
23 u64 total __used)
24{
25 return (struct ui_progress *)1;
26}
27
28static inline void ui_progress__update(struct ui_progress *self __used,
29 u64 curr __used) {}
30
31static inline void ui_progress__delete(struct ui_progress *self __used) {}
32#else
33int browser__show_help(const char *format, va_list ap);
34struct ui_progress *ui_progress__new(const char *title, u64 total);
35void ui_progress__update(struct ui_progress *self, u64 curr);
36void ui_progress__delete(struct ui_progress *self);
37#endif
38
15#endif /* __PERF_DEBUG_H */ 39#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 705ec63548b4..e3fa8d3d11b4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -112,7 +112,11 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
112 event_t ev = { 112 event_t ev = {
113 .header = { 113 .header = {
114 .type = PERF_RECORD_MMAP, 114 .type = PERF_RECORD_MMAP,
115 .misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */ 115 /*
116 * Just like the kernel, see __perf_event_mmap
117 * in kernel/perf_event.c
118 */
119 .misc = PERF_RECORD_MISC_USER,
116 }, 120 },
117 }; 121 };
118 int n; 122 int n;
@@ -130,6 +134,7 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
130 continue; 134 continue;
131 pbf += n + 3; 135 pbf += n + 3;
132 if (*pbf == 'x') { /* vm_exec */ 136 if (*pbf == 'x') { /* vm_exec */
137 u64 vm_pgoff;
133 char *execname = strchr(bf, '/'); 138 char *execname = strchr(bf, '/');
134 139
135 /* Catch VDSO */ 140 /* Catch VDSO */
@@ -139,6 +144,14 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
139 if (execname == NULL) 144 if (execname == NULL)
140 continue; 145 continue;
141 146
147 pbf += 3;
148 n = hex2u64(pbf, &vm_pgoff);
149 /* pgoff is in bytes, not pages */
150 if (n >= 0)
151 ev.mmap.pgoff = vm_pgoff << getpagesize();
152 else
153 ev.mmap.pgoff = 0;
154
142 size = strlen(execname); 155 size = strlen(execname);
143 execname[size - 1] = '\0'; /* Remove \n */ 156 execname[size - 1] = '\0'; /* Remove \n */
144 memcpy(ev.mmap.filename, execname, size); 157 memcpy(ev.mmap.filename, execname, size);
@@ -158,11 +171,23 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
158} 171}
159 172
160int event__synthesize_modules(event__handler_t process, 173int event__synthesize_modules(event__handler_t process,
161 struct perf_session *session) 174 struct perf_session *session,
175 struct kernel_info *kerninfo)
162{ 176{
163 struct rb_node *nd; 177 struct rb_node *nd;
178 struct map_groups *kmaps = &kerninfo->kmaps;
179 u16 misc;
180
181 /*
182 * kernel uses 0 for user space maps, see kernel/perf_event.c
183 * __perf_event_mmap
184 */
185 if (is_host_kernel(kerninfo))
186 misc = PERF_RECORD_MISC_KERNEL;
187 else
188 misc = PERF_RECORD_MISC_GUEST_KERNEL;
164 189
165 for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]); 190 for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
166 nd; nd = rb_next(nd)) { 191 nd; nd = rb_next(nd)) {
167 event_t ev; 192 event_t ev;
168 size_t size; 193 size_t size;
@@ -173,12 +198,13 @@ int event__synthesize_modules(event__handler_t process,
173 198
174 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); 199 size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
175 memset(&ev, 0, sizeof(ev)); 200 memset(&ev, 0, sizeof(ev));
176 ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */ 201 ev.mmap.header.misc = misc;
177 ev.mmap.header.type = PERF_RECORD_MMAP; 202 ev.mmap.header.type = PERF_RECORD_MMAP;
178 ev.mmap.header.size = (sizeof(ev.mmap) - 203 ev.mmap.header.size = (sizeof(ev.mmap) -
179 (sizeof(ev.mmap.filename) - size)); 204 (sizeof(ev.mmap.filename) - size));
180 ev.mmap.start = pos->start; 205 ev.mmap.start = pos->start;
181 ev.mmap.len = pos->end - pos->start; 206 ev.mmap.len = pos->end - pos->start;
207 ev.mmap.pid = kerninfo->pid;
182 208
183 memcpy(ev.mmap.filename, pos->dso->long_name, 209 memcpy(ev.mmap.filename, pos->dso->long_name,
184 pos->dso->long_name_len + 1); 210 pos->dso->long_name_len + 1);
@@ -241,13 +267,18 @@ static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
241 267
242int event__synthesize_kernel_mmap(event__handler_t process, 268int event__synthesize_kernel_mmap(event__handler_t process,
243 struct perf_session *session, 269 struct perf_session *session,
270 struct kernel_info *kerninfo,
244 const char *symbol_name) 271 const char *symbol_name)
245{ 272{
246 size_t size; 273 size_t size;
274 const char *filename, *mmap_name;
275 char path[PATH_MAX];
276 char name_buff[PATH_MAX];
277 struct map *map;
278
247 event_t ev = { 279 event_t ev = {
248 .header = { 280 .header = {
249 .type = PERF_RECORD_MMAP, 281 .type = PERF_RECORD_MMAP,
250 .misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
251 }, 282 },
252 }; 283 };
253 /* 284 /*
@@ -257,16 +288,37 @@ int event__synthesize_kernel_mmap(event__handler_t process,
257 */ 288 */
258 struct process_symbol_args args = { .name = symbol_name, }; 289 struct process_symbol_args args = { .name = symbol_name, };
259 290
260 if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0) 291 mmap_name = kern_mmap_name(kerninfo, name_buff);
292 if (is_host_kernel(kerninfo)) {
293 /*
294 * kernel uses PERF_RECORD_MISC_USER for user space maps,
295 * see kernel/perf_event.c __perf_event_mmap
296 */
297 ev.header.misc = PERF_RECORD_MISC_KERNEL;
298 filename = "/proc/kallsyms";
299 } else {
300 ev.header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
301 if (is_default_guest(kerninfo))
302 filename = (char *) symbol_conf.default_guest_kallsyms;
303 else {
304 sprintf(path, "%s/proc/kallsyms", kerninfo->root_dir);
305 filename = path;
306 }
307 }
308
309 if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0)
261 return -ENOENT; 310 return -ENOENT;
262 311
312 map = kerninfo->vmlinux_maps[MAP__FUNCTION];
263 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename), 313 size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
264 "[kernel.kallsyms.%s]", symbol_name) + 1; 314 "%s%s", mmap_name, symbol_name) + 1;
265 size = ALIGN(size, sizeof(u64)); 315 size = ALIGN(size, sizeof(u64));
266 ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size)); 316 ev.mmap.header.size = (sizeof(ev.mmap) -
317 (sizeof(ev.mmap.filename) - size));
267 ev.mmap.pgoff = args.start; 318 ev.mmap.pgoff = args.start;
268 ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start; 319 ev.mmap.start = map->start;
269 ev.mmap.len = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ; 320 ev.mmap.len = map->end - ev.mmap.start;
321 ev.mmap.pid = kerninfo->pid;
270 322
271 return process(&ev, session); 323 return process(&ev, session);
272} 324}
@@ -320,22 +372,50 @@ int event__process_lost(event_t *self, struct perf_session *session)
320 return 0; 372 return 0;
321} 373}
322 374
323int event__process_mmap(event_t *self, struct perf_session *session) 375static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
376{
377 maps[MAP__FUNCTION]->start = self->mmap.start;
378 maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
379 /*
380 * Be a bit paranoid here, some perf.data file came with
381 * a zero sized synthesized MMAP event for the kernel.
382 */
383 if (maps[MAP__FUNCTION]->end == 0)
384 maps[MAP__FUNCTION]->end = ~0UL;
385}
386
387static int event__process_kernel_mmap(event_t *self,
388 struct perf_session *session)
324{ 389{
325 struct thread *thread;
326 struct map *map; 390 struct map *map;
391 char kmmap_prefix[PATH_MAX];
392 struct kernel_info *kerninfo;
393 enum dso_kernel_type kernel_type;
394 bool is_kernel_mmap;
395
396 kerninfo = kerninfo__findnew(&session->kerninfo_root, self->mmap.pid);
397 if (!kerninfo) {
398 pr_err("Can't find id %d's kerninfo\n", self->mmap.pid);
399 goto out_problem;
400 }
327 401
328 dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n", 402 kern_mmap_name(kerninfo, kmmap_prefix);
329 self->mmap.pid, self->mmap.tid, self->mmap.start, 403 if (is_host_kernel(kerninfo))
330 self->mmap.len, self->mmap.pgoff, self->mmap.filename); 404 kernel_type = DSO_TYPE_KERNEL;
405 else
406 kernel_type = DSO_TYPE_GUEST_KERNEL;
331 407
332 if (self->mmap.pid == 0) { 408 is_kernel_mmap = memcmp(self->mmap.filename,
333 static const char kmmap_prefix[] = "[kernel.kallsyms."; 409 kmmap_prefix,
410 strlen(kmmap_prefix)) == 0;
411 if (self->mmap.filename[0] == '/' ||
412 (!is_kernel_mmap && self->mmap.filename[0] == '[')) {
334 413
335 if (self->mmap.filename[0] == '/') { 414 char short_module_name[1024];
336 char short_module_name[1024]; 415 char *name, *dot;
337 char *name = strrchr(self->mmap.filename, '/'), *dot;
338 416
417 if (self->mmap.filename[0] == '/') {
418 name = strrchr(self->mmap.filename, '/');
339 if (name == NULL) 419 if (name == NULL)
340 goto out_problem; 420 goto out_problem;
341 421
@@ -343,58 +423,86 @@ int event__process_mmap(event_t *self, struct perf_session *session)
343 dot = strrchr(name, '.'); 423 dot = strrchr(name, '.');
344 if (dot == NULL) 424 if (dot == NULL)
345 goto out_problem; 425 goto out_problem;
346
347 snprintf(short_module_name, sizeof(short_module_name), 426 snprintf(short_module_name, sizeof(short_module_name),
348 "[%.*s]", (int)(dot - name), name); 427 "[%.*s]", (int)(dot - name), name);
349 strxfrchar(short_module_name, '-', '_'); 428 strxfrchar(short_module_name, '-', '_');
350 429 } else
351 map = perf_session__new_module_map(session, 430 strcpy(short_module_name, self->mmap.filename);
352 self->mmap.start, 431
353 self->mmap.filename); 432 map = map_groups__new_module(&kerninfo->kmaps,
354 if (map == NULL) 433 self->mmap.start,
355 goto out_problem; 434 self->mmap.filename,
356 435 kerninfo);
357 name = strdup(short_module_name); 436 if (map == NULL)
358 if (name == NULL) 437 goto out_problem;
359 goto out_problem; 438
360 439 name = strdup(short_module_name);
361 map->dso->short_name = name; 440 if (name == NULL)
362 map->end = map->start + self->mmap.len; 441 goto out_problem;
363 } else if (memcmp(self->mmap.filename, kmmap_prefix, 442
364 sizeof(kmmap_prefix) - 1) == 0) { 443 map->dso->short_name = name;
365 const char *symbol_name = (self->mmap.filename + 444 map->end = map->start + self->mmap.len;
366 sizeof(kmmap_prefix) - 1); 445 } else if (is_kernel_mmap) {
446 const char *symbol_name = (self->mmap.filename +
447 strlen(kmmap_prefix));
448 /*
449 * Should be there already, from the build-id table in
450 * the header.
451 */
452 struct dso *kernel = __dsos__findnew(&kerninfo->dsos__kernel,
453 kmmap_prefix);
454 if (kernel == NULL)
455 goto out_problem;
456
457 kernel->kernel = kernel_type;
458 if (__map_groups__create_kernel_maps(&kerninfo->kmaps,
459 kerninfo->vmlinux_maps, kernel) < 0)
460 goto out_problem;
461
462 event_set_kernel_mmap_len(kerninfo->vmlinux_maps, self);
463 perf_session__set_kallsyms_ref_reloc_sym(kerninfo->vmlinux_maps,
464 symbol_name,
465 self->mmap.pgoff);
466 if (is_default_guest(kerninfo)) {
367 /* 467 /*
368 * Should be there already, from the build-id table in 468 * preload dso of guest kernel and modules
369 * the header.
370 */ 469 */
371 struct dso *kernel = __dsos__findnew(&dsos__kernel, 470 dso__load(kernel,
372 "[kernel.kallsyms]"); 471 kerninfo->vmlinux_maps[MAP__FUNCTION],
373 if (kernel == NULL) 472 NULL);
374 goto out_problem; 473 }
375 474 }
376 kernel->kernel = 1; 475 return 0;
377 if (__perf_session__create_kernel_maps(session, kernel) < 0) 476out_problem:
378 goto out_problem; 477 return -1;
478}
379 479
380 session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start; 480int event__process_mmap(event_t *self, struct perf_session *session)
381 session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len; 481{
382 /* 482 struct kernel_info *kerninfo;
383 * Be a bit paranoid here, some perf.data file came with 483 struct thread *thread;
384 * a zero sized synthesized MMAP event for the kernel. 484 struct map *map;
385 */ 485 u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
386 if (session->vmlinux_maps[MAP__FUNCTION]->end == 0) 486 int ret = 0;
387 session->vmlinux_maps[MAP__FUNCTION]->end = ~0UL;
388 487
389 perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name, 488 dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
390 self->mmap.pgoff); 489 self->mmap.pid, self->mmap.tid, self->mmap.start,
391 } 490 self->mmap.len, self->mmap.pgoff, self->mmap.filename);
491
492 if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
493 cpumode == PERF_RECORD_MISC_KERNEL) {
494 ret = event__process_kernel_mmap(self, session);
495 if (ret < 0)
496 goto out_problem;
392 return 0; 497 return 0;
393 } 498 }
394 499
395 thread = perf_session__findnew(session, self->mmap.pid); 500 thread = perf_session__findnew(session, self->mmap.pid);
396 map = map__new(&self->mmap, MAP__FUNCTION, 501 kerninfo = kerninfo__findhost(&session->kerninfo_root);
397 session->cwd, session->cwdlen); 502 map = map__new(&kerninfo->dsos__user, self->mmap.start,
503 self->mmap.len, self->mmap.pgoff,
504 self->mmap.pid, self->mmap.filename,
505 MAP__FUNCTION, session->cwd, session->cwdlen);
398 506
399 if (thread == NULL || map == NULL) 507 if (thread == NULL || map == NULL)
400 goto out_problem; 508 goto out_problem;
@@ -434,22 +542,52 @@ int event__process_task(event_t *self, struct perf_session *session)
434 542
435void thread__find_addr_map(struct thread *self, 543void thread__find_addr_map(struct thread *self,
436 struct perf_session *session, u8 cpumode, 544 struct perf_session *session, u8 cpumode,
437 enum map_type type, u64 addr, 545 enum map_type type, pid_t pid, u64 addr,
438 struct addr_location *al) 546 struct addr_location *al)
439{ 547{
440 struct map_groups *mg = &self->mg; 548 struct map_groups *mg = &self->mg;
549 struct kernel_info *kerninfo = NULL;
441 550
442 al->thread = self; 551 al->thread = self;
443 al->addr = addr; 552 al->addr = addr;
553 al->cpumode = cpumode;
554 al->filtered = false;
444 555
445 if (cpumode == PERF_RECORD_MISC_KERNEL) { 556 if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
446 al->level = 'k'; 557 al->level = 'k';
447 mg = &session->kmaps; 558 kerninfo = kerninfo__findhost(&session->kerninfo_root);
448 } else if (cpumode == PERF_RECORD_MISC_USER) 559 mg = &kerninfo->kmaps;
560 } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
449 al->level = '.'; 561 al->level = '.';
450 else { 562 kerninfo = kerninfo__findhost(&session->kerninfo_root);
451 al->level = 'H'; 563 } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
564 al->level = 'g';
565 kerninfo = kerninfo__find(&session->kerninfo_root, pid);
566 if (!kerninfo) {
567 al->map = NULL;
568 return;
569 }
570 mg = &kerninfo->kmaps;
571 } else {
572 /*
573 * 'u' means guest os user space.
574 * TODO: We don't support guest user space. Might support late.
575 */
576 if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest)
577 al->level = 'u';
578 else
579 al->level = 'H';
452 al->map = NULL; 580 al->map = NULL;
581
582 if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
583 cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
584 !perf_guest)
585 al->filtered = true;
586 if ((cpumode == PERF_RECORD_MISC_USER ||
587 cpumode == PERF_RECORD_MISC_KERNEL) &&
588 !perf_host)
589 al->filtered = true;
590
453 return; 591 return;
454 } 592 }
455try_again: 593try_again:
@@ -464,8 +602,11 @@ try_again:
464 * "[vdso]" dso, but for now lets use the old trick of looking 602 * "[vdso]" dso, but for now lets use the old trick of looking
465 * in the whole kernel symbol list. 603 * in the whole kernel symbol list.
466 */ 604 */
467 if ((long long)al->addr < 0 && mg != &session->kmaps) { 605 if ((long long)al->addr < 0 &&
468 mg = &session->kmaps; 606 cpumode == PERF_RECORD_MISC_KERNEL &&
607 kerninfo &&
608 mg != &kerninfo->kmaps) {
609 mg = &kerninfo->kmaps;
469 goto try_again; 610 goto try_again;
470 } 611 }
471 } else 612 } else
@@ -474,11 +615,11 @@ try_again:
474 615
475void thread__find_addr_location(struct thread *self, 616void thread__find_addr_location(struct thread *self,
476 struct perf_session *session, u8 cpumode, 617 struct perf_session *session, u8 cpumode,
477 enum map_type type, u64 addr, 618 enum map_type type, pid_t pid, u64 addr,
478 struct addr_location *al, 619 struct addr_location *al,
479 symbol_filter_t filter) 620 symbol_filter_t filter)
480{ 621{
481 thread__find_addr_map(self, session, cpumode, type, addr, al); 622 thread__find_addr_map(self, session, cpumode, type, pid, addr, al);
482 if (al->map != NULL) 623 if (al->map != NULL)
483 al->sym = map__find_symbol(al->map, al->addr, filter); 624 al->sym = map__find_symbol(al->map, al->addr, filter);
484 else 625 else
@@ -513,30 +654,37 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
513 654
514 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); 655 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
515 656
516 thread__find_addr_location(thread, session, cpumode, MAP__FUNCTION, 657 thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
517 self->ip.ip, al, filter); 658 self->ip.pid, self->ip.ip, al);
518 dump_printf(" ...... dso: %s\n", 659 dump_printf(" ...... dso: %s\n",
519 al->map ? al->map->dso->long_name : 660 al->map ? al->map->dso->long_name :
520 al->level == 'H' ? "[hypervisor]" : "<not found>"); 661 al->level == 'H' ? "[hypervisor]" : "<not found>");
521 /* 662 al->sym = NULL;
522 * We have to do this here as we may have a dso with no symbol hit that 663
523 * has a name longer than the ones with symbols sampled. 664 if (al->map) {
524 */ 665 if (symbol_conf.dso_list &&
525 if (al->map && !sort_dso.elide && !al->map->dso->slen_calculated) 666 (!al->map || !al->map->dso ||
526 dso__calc_col_width(al->map->dso); 667 !(strlist__has_entry(symbol_conf.dso_list,
527 668 al->map->dso->short_name) ||
528 if (symbol_conf.dso_list && 669 (al->map->dso->short_name != al->map->dso->long_name &&
529 (!al->map || !al->map->dso || 670 strlist__has_entry(symbol_conf.dso_list,
530 !(strlist__has_entry(symbol_conf.dso_list, al->map->dso->short_name) || 671 al->map->dso->long_name)))))
531 (al->map->dso->short_name != al->map->dso->long_name && 672 goto out_filtered;
532 strlist__has_entry(symbol_conf.dso_list, al->map->dso->long_name))))) 673 /*
533 goto out_filtered; 674 * We have to do this here as we may have a dso with no symbol
675 * hit that has a name longer than the ones with symbols
676 * sampled.
677 */
678 if (!sort_dso.elide && !al->map->dso->slen_calculated)
679 dso__calc_col_width(al->map->dso);
680
681 al->sym = map__find_symbol(al->map, al->addr, filter);
682 }
534 683
535 if (symbol_conf.sym_list && al->sym && 684 if (symbol_conf.sym_list && al->sym &&
536 !strlist__has_entry(symbol_conf.sym_list, al->sym->name)) 685 !strlist__has_entry(symbol_conf.sym_list, al->sym->name))
537 goto out_filtered; 686 goto out_filtered;
538 687
539 al->filtered = false;
540 return 0; 688 return 0;
541 689
542out_filtered: 690out_filtered:
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a33b94952e34..4af2ed5d48ad 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -68,21 +68,53 @@ struct sample_data {
68 u64 addr; 68 u64 addr;
69 u64 id; 69 u64 id;
70 u64 stream_id; 70 u64 stream_id;
71 u32 cpu;
72 u64 period; 71 u64 period;
73 struct ip_callchain *callchain; 72 u32 cpu;
74 u32 raw_size; 73 u32 raw_size;
75 void *raw_data; 74 void *raw_data;
75 struct ip_callchain *callchain;
76}; 76};
77 77
78#define BUILD_ID_SIZE 20 78#define BUILD_ID_SIZE 20
79 79
80struct build_id_event { 80struct build_id_event {
81 struct perf_event_header header; 81 struct perf_event_header header;
82 pid_t pid;
82 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; 83 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
83 char filename[]; 84 char filename[];
84}; 85};
85 86
87enum perf_header_event_type { /* above any possible kernel type */
88 PERF_RECORD_HEADER_ATTR = 64,
89 PERF_RECORD_HEADER_EVENT_TYPE = 65,
90 PERF_RECORD_HEADER_TRACING_DATA = 66,
91 PERF_RECORD_HEADER_BUILD_ID = 67,
92 PERF_RECORD_HEADER_MAX
93};
94
95struct attr_event {
96 struct perf_event_header header;
97 struct perf_event_attr attr;
98 u64 id[];
99};
100
101#define MAX_EVENT_NAME 64
102
103struct perf_trace_event_type {
104 u64 event_id;
105 char name[MAX_EVENT_NAME];
106};
107
108struct event_type_event {
109 struct perf_event_header header;
110 struct perf_trace_event_type event_type;
111};
112
113struct tracing_data_event {
114 struct perf_event_header header;
115 u32 size;
116};
117
86typedef union event_union { 118typedef union event_union {
87 struct perf_event_header header; 119 struct perf_event_header header;
88 struct ip_event ip; 120 struct ip_event ip;
@@ -92,6 +124,10 @@ typedef union event_union {
92 struct lost_event lost; 124 struct lost_event lost;
93 struct read_event read; 125 struct read_event read;
94 struct sample_event sample; 126 struct sample_event sample;
127 struct attr_event attr;
128 struct event_type_event event_type;
129 struct tracing_data_event tracing_data;
130 struct build_id_event build_id;
95} event_t; 131} event_t;
96 132
97struct events_stats { 133struct events_stats {
@@ -119,10 +155,13 @@ int event__synthesize_thread(pid_t pid, event__handler_t process,
119void event__synthesize_threads(event__handler_t process, 155void event__synthesize_threads(event__handler_t process,
120 struct perf_session *session); 156 struct perf_session *session);
121int event__synthesize_kernel_mmap(event__handler_t process, 157int event__synthesize_kernel_mmap(event__handler_t process,
122 struct perf_session *session, 158 struct perf_session *session,
123 const char *symbol_name); 159 struct kernel_info *kerninfo,
160 const char *symbol_name);
161
124int event__synthesize_modules(event__handler_t process, 162int event__synthesize_modules(event__handler_t process,
125 struct perf_session *session); 163 struct perf_session *session,
164 struct kernel_info *kerninfo);
126 165
127int event__process_comm(event_t *self, struct perf_session *session); 166int event__process_comm(event_t *self, struct perf_session *session);
128int event__process_lost(event_t *self, struct perf_session *session); 167int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 6c9aa16ee51f..75d016768021 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -99,13 +99,6 @@ int perf_header__add_attr(struct perf_header *self,
99 return 0; 99 return 0;
100} 100}
101 101
102#define MAX_EVENT_NAME 64
103
104struct perf_trace_event_type {
105 u64 event_id;
106 char name[MAX_EVENT_NAME];
107};
108
109static int event_count; 102static int event_count;
110static struct perf_trace_event_type *events; 103static struct perf_trace_event_type *events;
111 104
@@ -197,7 +190,8 @@ static int write_padded(int fd, const void *bf, size_t count,
197 continue; \ 190 continue; \
198 else 191 else
199 192
200static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd) 193static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
194 u16 misc, int fd)
201{ 195{
202 struct dso *pos; 196 struct dso *pos;
203 197
@@ -212,6 +206,7 @@ static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
212 len = ALIGN(len, NAME_ALIGN); 206 len = ALIGN(len, NAME_ALIGN);
213 memset(&b, 0, sizeof(b)); 207 memset(&b, 0, sizeof(b));
214 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); 208 memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
209 b.pid = pid;
215 b.header.misc = misc; 210 b.header.misc = misc;
216 b.header.size = sizeof(b) + len; 211 b.header.size = sizeof(b) + len;
217 err = do_write(fd, &b, sizeof(b)); 212 err = do_write(fd, &b, sizeof(b));
@@ -226,13 +221,33 @@ static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
226 return 0; 221 return 0;
227} 222}
228 223
229static int dsos__write_buildid_table(int fd) 224static int dsos__write_buildid_table(struct perf_header *header, int fd)
230{ 225{
231 int err = __dsos__write_buildid_table(&dsos__kernel, 226 struct perf_session *session = container_of(header,
232 PERF_RECORD_MISC_KERNEL, fd); 227 struct perf_session, header);
233 if (err == 0) 228 struct rb_node *nd;
234 err = __dsos__write_buildid_table(&dsos__user, 229 int err = 0;
235 PERF_RECORD_MISC_USER, fd); 230 u16 kmisc, umisc;
231
232 for (nd = rb_first(&session->kerninfo_root); nd; nd = rb_next(nd)) {
233 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
234 rb_node);
235 if (is_host_kernel(pos)) {
236 kmisc = PERF_RECORD_MISC_KERNEL;
237 umisc = PERF_RECORD_MISC_USER;
238 } else {
239 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
240 umisc = PERF_RECORD_MISC_GUEST_USER;
241 }
242
243 err = __dsos__write_buildid_table(&pos->dsos__kernel, pos->pid,
244 kmisc, fd);
245 if (err == 0)
246 err = __dsos__write_buildid_table(&pos->dsos__user,
247 pos->pid, umisc, fd);
248 if (err)
249 break;
250 }
236 return err; 251 return err;
237} 252}
238 253
@@ -349,9 +364,12 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
349 return err; 364 return err;
350} 365}
351 366
352static int dsos__cache_build_ids(void) 367static int dsos__cache_build_ids(struct perf_header *self)
353{ 368{
354 int err_kernel, err_user; 369 struct perf_session *session = container_of(self,
370 struct perf_session, header);
371 struct rb_node *nd;
372 int ret = 0;
355 char debugdir[PATH_MAX]; 373 char debugdir[PATH_MAX];
356 374
357 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"), 375 snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
@@ -360,9 +378,30 @@ static int dsos__cache_build_ids(void)
360 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) 378 if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
361 return -1; 379 return -1;
362 380
363 err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir); 381 for (nd = rb_first(&session->kerninfo_root); nd; nd = rb_next(nd)) {
364 err_user = __dsos__cache_build_ids(&dsos__user, debugdir); 382 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
365 return err_kernel || err_user ? -1 : 0; 383 rb_node);
384 ret |= __dsos__cache_build_ids(&pos->dsos__kernel, debugdir);
385 ret |= __dsos__cache_build_ids(&pos->dsos__user, debugdir);
386 }
387 return ret ? -1 : 0;
388}
389
390static bool dsos__read_build_ids(struct perf_header *self, bool with_hits)
391{
392 bool ret = false;
393 struct perf_session *session = container_of(self,
394 struct perf_session, header);
395 struct rb_node *nd;
396
397 for (nd = rb_first(&session->kerninfo_root); nd; nd = rb_next(nd)) {
398 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
399 rb_node);
400 ret |= __dsos__read_build_ids(&pos->dsos__kernel, with_hits);
401 ret |= __dsos__read_build_ids(&pos->dsos__user, with_hits);
402 }
403
404 return ret;
366} 405}
367 406
368static int perf_header__adds_write(struct perf_header *self, int fd) 407static int perf_header__adds_write(struct perf_header *self, int fd)
@@ -373,7 +412,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
373 u64 sec_start; 412 u64 sec_start;
374 int idx = 0, err; 413 int idx = 0, err;
375 414
376 if (dsos__read_build_ids(true)) 415 if (dsos__read_build_ids(self, true))
377 perf_header__set_feat(self, HEADER_BUILD_ID); 416 perf_header__set_feat(self, HEADER_BUILD_ID);
378 417
379 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); 418 nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
@@ -408,14 +447,14 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
408 447
409 /* Write build-ids */ 448 /* Write build-ids */
410 buildid_sec->offset = lseek(fd, 0, SEEK_CUR); 449 buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
411 err = dsos__write_buildid_table(fd); 450 err = dsos__write_buildid_table(self, fd);
412 if (err < 0) { 451 if (err < 0) {
413 pr_debug("failed to write buildid table\n"); 452 pr_debug("failed to write buildid table\n");
414 goto out_free; 453 goto out_free;
415 } 454 }
416 buildid_sec->size = lseek(fd, 0, SEEK_CUR) - 455 buildid_sec->size = lseek(fd, 0, SEEK_CUR) -
417 buildid_sec->offset; 456 buildid_sec->offset;
418 dsos__cache_build_ids(); 457 dsos__cache_build_ids(self);
419 } 458 }
420 459
421 lseek(fd, sec_start, SEEK_SET); 460 lseek(fd, sec_start, SEEK_SET);
@@ -427,6 +466,25 @@ out_free:
427 return err; 466 return err;
428} 467}
429 468
469int perf_header__write_pipe(int fd)
470{
471 struct perf_pipe_file_header f_header;
472 int err;
473
474 f_header = (struct perf_pipe_file_header){
475 .magic = PERF_MAGIC,
476 .size = sizeof(f_header),
477 };
478
479 err = do_write(fd, &f_header, sizeof(f_header));
480 if (err < 0) {
481 pr_debug("failed to write perf pipe header\n");
482 return err;
483 }
484
485 return 0;
486}
487
430int perf_header__write(struct perf_header *self, int fd, bool at_exit) 488int perf_header__write(struct perf_header *self, int fd, bool at_exit)
431{ 489{
432 struct perf_file_header f_header; 490 struct perf_file_header f_header;
@@ -518,25 +576,10 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
518 return 0; 576 return 0;
519} 577}
520 578
521static int do_read(int fd, void *buf, size_t size)
522{
523 while (size) {
524 int ret = read(fd, buf, size);
525
526 if (ret <= 0)
527 return -1;
528
529 size -= ret;
530 buf += ret;
531 }
532
533 return 0;
534}
535
536static int perf_header__getbuffer64(struct perf_header *self, 579static int perf_header__getbuffer64(struct perf_header *self,
537 int fd, void *buf, size_t size) 580 int fd, void *buf, size_t size)
538{ 581{
539 if (do_read(fd, buf, size)) 582 if (do_read(fd, buf, size) <= 0)
540 return -1; 583 return -1;
541 584
542 if (self->needs_swap) 585 if (self->needs_swap)
@@ -592,7 +635,7 @@ int perf_file_header__read(struct perf_file_header *self,
592{ 635{
593 lseek(fd, 0, SEEK_SET); 636 lseek(fd, 0, SEEK_SET);
594 637
595 if (do_read(fd, self, sizeof(*self)) || 638 if (do_read(fd, self, sizeof(*self)) <= 0 ||
596 memcmp(&self->magic, __perf_magic, sizeof(self->magic))) 639 memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
597 return -1; 640 return -1;
598 641
@@ -636,6 +679,85 @@ int perf_file_header__read(struct perf_file_header *self,
636 return 0; 679 return 0;
637} 680}
638 681
682static int __event_process_build_id(struct build_id_event *bev,
683 char *filename,
684 struct perf_session *session)
685{
686 int err = -1;
687 struct list_head *head;
688 struct kernel_info *kerninfo;
689 u16 misc;
690 struct dso *dso;
691 enum dso_kernel_type dso_type;
692
693 kerninfo = kerninfo__findnew(&session->kerninfo_root, bev->pid);
694 if (!kerninfo)
695 goto out;
696
697 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
698
699 switch (misc) {
700 case PERF_RECORD_MISC_KERNEL:
701 dso_type = DSO_TYPE_KERNEL;
702 head = &kerninfo->dsos__kernel;
703 break;
704 case PERF_RECORD_MISC_GUEST_KERNEL:
705 dso_type = DSO_TYPE_GUEST_KERNEL;
706 head = &kerninfo->dsos__kernel;
707 break;
708 case PERF_RECORD_MISC_USER:
709 case PERF_RECORD_MISC_GUEST_USER:
710 dso_type = DSO_TYPE_USER;
711 head = &kerninfo->dsos__user;
712 break;
713 default:
714 goto out;
715 }
716
717 dso = __dsos__findnew(head, filename);
718 if (dso != NULL) {
719 dso__set_build_id(dso, &bev->build_id);
720 if (filename[0] == '[')
721 dso->kernel = dso_type;
722 }
723
724 err = 0;
725out:
726 return err;
727}
728
729static int perf_header__read_build_ids(struct perf_header *self,
730 int input, u64 offset, u64 size)
731{
732 struct perf_session *session = container_of(self,
733 struct perf_session, header);
734 struct build_id_event bev;
735 char filename[PATH_MAX];
736 u64 limit = offset + size;
737 int err = -1;
738
739 while (offset < limit) {
740 ssize_t len;
741
742 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
743 goto out;
744
745 if (self->needs_swap)
746 perf_event_header__bswap(&bev.header);
747
748 len = bev.header.size - sizeof(bev);
749 if (read(input, filename, len) != len)
750 goto out;
751
752 __event_process_build_id(&bev, filename, session);
753
754 offset += bev.header.size;
755 }
756 err = 0;
757out:
758 return err;
759}
760
639static int perf_file_section__process(struct perf_file_section *self, 761static int perf_file_section__process(struct perf_file_section *self,
640 struct perf_header *ph, 762 struct perf_header *ph,
641 int feat, int fd) 763 int feat, int fd)
@@ -662,13 +784,51 @@ static int perf_file_section__process(struct perf_file_section *self,
662 return 0; 784 return 0;
663} 785}
664 786
665int perf_header__read(struct perf_header *self, int fd) 787static int perf_file_header__read_pipe(struct perf_pipe_file_header *self,
788 struct perf_header *ph, int fd)
789{
790 if (do_read(fd, self, sizeof(*self)) <= 0 ||
791 memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
792 return -1;
793
794 if (self->size != sizeof(*self)) {
795 u64 size = bswap_64(self->size);
796
797 if (size != sizeof(*self))
798 return -1;
799
800 ph->needs_swap = true;
801 }
802
803 return 0;
804}
805
806static int perf_header__read_pipe(struct perf_session *session, int fd)
807{
808 struct perf_header *self = &session->header;
809 struct perf_pipe_file_header f_header;
810
811 if (perf_file_header__read_pipe(&f_header, self, fd) < 0) {
812 pr_debug("incompatible file format\n");
813 return -EINVAL;
814 }
815
816 session->fd = fd;
817
818 return 0;
819}
820
821int perf_header__read(struct perf_session *session, int fd)
666{ 822{
823 struct perf_header *self = &session->header;
667 struct perf_file_header f_header; 824 struct perf_file_header f_header;
668 struct perf_file_attr f_attr; 825 struct perf_file_attr f_attr;
669 u64 f_id; 826 u64 f_id;
670 int nr_attrs, nr_ids, i, j; 827 int nr_attrs, nr_ids, i, j;
671 828
829 if (session->fd_pipe)
830 return perf_header__read_pipe(session, fd);
831
672 if (perf_file_header__read(&f_header, self, fd) < 0) { 832 if (perf_file_header__read(&f_header, self, fd) < 0) {
673 pr_debug("incompatible file format\n"); 833 pr_debug("incompatible file format\n");
674 return -EINVAL; 834 return -EINVAL;
@@ -765,3 +925,287 @@ perf_header__find_attr(u64 id, struct perf_header *header)
765 925
766 return NULL; 926 return NULL;
767} 927}
928
929int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
930 event__handler_t process,
931 struct perf_session *session)
932{
933 event_t *ev;
934 size_t size;
935 int err;
936
937 size = sizeof(struct perf_event_attr);
938 size = ALIGN(size, sizeof(u64));
939 size += sizeof(struct perf_event_header);
940 size += ids * sizeof(u64);
941
942 ev = malloc(size);
943
944 ev->attr.attr = *attr;
945 memcpy(ev->attr.id, id, ids * sizeof(u64));
946
947 ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
948 ev->attr.header.size = size;
949
950 err = process(ev, session);
951
952 free(ev);
953
954 return err;
955}
956
957int event__synthesize_attrs(struct perf_header *self,
958 event__handler_t process,
959 struct perf_session *session)
960{
961 struct perf_header_attr *attr;
962 int i, err = 0;
963
964 for (i = 0; i < self->attrs; i++) {
965 attr = self->attr[i];
966
967 err = event__synthesize_attr(&attr->attr, attr->ids, attr->id,
968 process, session);
969 if (err) {
970 pr_debug("failed to create perf header attribute\n");
971 return err;
972 }
973 }
974
975 return err;
976}
977
978int event__process_attr(event_t *self, struct perf_session *session)
979{
980 struct perf_header_attr *attr;
981 unsigned int i, ids, n_ids;
982
983 attr = perf_header_attr__new(&self->attr.attr);
984 if (attr == NULL)
985 return -ENOMEM;
986
987 ids = self->header.size;
988 ids -= (void *)&self->attr.id - (void *)self;
989 n_ids = ids / sizeof(u64);
990
991 for (i = 0; i < n_ids; i++) {
992 if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) {
993 perf_header_attr__delete(attr);
994 return -ENOMEM;
995 }
996 }
997
998 if (perf_header__add_attr(&session->header, attr) < 0) {
999 perf_header_attr__delete(attr);
1000 return -ENOMEM;
1001 }
1002
1003 perf_session__update_sample_type(session);
1004
1005 return 0;
1006}
1007
1008int event__synthesize_event_type(u64 event_id, char *name,
1009 event__handler_t process,
1010 struct perf_session *session)
1011{
1012 event_t ev;
1013 size_t size = 0;
1014 int err = 0;
1015
1016 memset(&ev, 0, sizeof(ev));
1017
1018 ev.event_type.event_type.event_id = event_id;
1019 memset(ev.event_type.event_type.name, 0, MAX_EVENT_NAME);
1020 strncpy(ev.event_type.event_type.name, name, MAX_EVENT_NAME - 1);
1021
1022 ev.event_type.header.type = PERF_RECORD_HEADER_EVENT_TYPE;
1023 size = strlen(name);
1024 size = ALIGN(size, sizeof(u64));
1025 ev.event_type.header.size = sizeof(ev.event_type) -
1026 (sizeof(ev.event_type.event_type.name) - size);
1027
1028 err = process(&ev, session);
1029
1030 return err;
1031}
1032
1033int event__synthesize_event_types(event__handler_t process,
1034 struct perf_session *session)
1035{
1036 struct perf_trace_event_type *type;
1037 int i, err = 0;
1038
1039 for (i = 0; i < event_count; i++) {
1040 type = &events[i];
1041
1042 err = event__synthesize_event_type(type->event_id, type->name,
1043 process, session);
1044 if (err) {
1045 pr_debug("failed to create perf header event type\n");
1046 return err;
1047 }
1048 }
1049
1050 return err;
1051}
1052
1053int event__process_event_type(event_t *self,
1054 struct perf_session *session __unused)
1055{
1056 if (perf_header__push_event(self->event_type.event_type.event_id,
1057 self->event_type.event_type.name) < 0)
1058 return -ENOMEM;
1059
1060 return 0;
1061}
1062
1063int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
1064 int nb_events,
1065 event__handler_t process,
1066 struct perf_session *session __unused)
1067{
1068 event_t ev;
1069 ssize_t size = 0, aligned_size = 0, padding;
1070 int err = 0;
1071
1072 memset(&ev, 0, sizeof(ev));
1073
1074 ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
1075 size = read_tracing_data_size(fd, pattrs, nb_events);
1076 if (size <= 0)
1077 return size;
1078 aligned_size = ALIGN(size, sizeof(u64));
1079 padding = aligned_size - size;
1080 ev.tracing_data.header.size = sizeof(ev.tracing_data);
1081 ev.tracing_data.size = aligned_size;
1082
1083 process(&ev, session);
1084
1085 err = read_tracing_data(fd, pattrs, nb_events);
1086 write_padded(fd, NULL, 0, padding);
1087
1088 return aligned_size;
1089}
1090
1091int event__process_tracing_data(event_t *self,
1092 struct perf_session *session)
1093{
1094 ssize_t size_read, padding, size = self->tracing_data.size;
1095 off_t offset = lseek(session->fd, 0, SEEK_CUR);
1096 char buf[BUFSIZ];
1097
1098 /* setup for reading amidst mmap */
1099 lseek(session->fd, offset + sizeof(struct tracing_data_event),
1100 SEEK_SET);
1101
1102 size_read = trace_report(session->fd);
1103
1104 padding = ALIGN(size_read, sizeof(u64)) - size_read;
1105
1106 if (read(session->fd, buf, padding) < 0)
1107 die("reading input file");
1108
1109 if (size_read + padding != size)
1110 die("tracing data size mismatch");
1111
1112 return size_read + padding;
1113}
1114
1115int event__synthesize_build_id(struct dso *pos, u16 misc,
1116 event__handler_t process,
1117 struct kernel_info *kerninfo,
1118 struct perf_session *session)
1119{
1120 event_t ev;
1121 size_t len;
1122 int err = 0;
1123
1124 if (!pos->hit)
1125 return err;
1126
1127 memset(&ev, 0, sizeof(ev));
1128
1129 len = pos->long_name_len + 1;
1130 len = ALIGN(len, NAME_ALIGN);
1131 memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
1132 ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
1133 ev.build_id.header.misc = misc;
1134 ev.build_id.pid = kerninfo->pid;
1135 ev.build_id.header.size = sizeof(ev.build_id) + len;
1136 memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
1137
1138 err = process(&ev, session);
1139
1140 return err;
1141}
1142
1143static int __event_synthesize_build_ids(struct list_head *head, u16 misc,
1144 event__handler_t process,
1145 struct kernel_info *kerninfo,
1146 struct perf_session *session)
1147{
1148 struct dso *pos;
1149
1150 dsos__for_each_with_build_id(pos, head) {
1151 int err;
1152 if (!pos->hit)
1153 continue;
1154
1155 err = event__synthesize_build_id(pos, misc, process,
1156 kerninfo, session);
1157 if (err < 0)
1158 return err;
1159 }
1160
1161 return 0;
1162}
1163
1164int event__synthesize_build_ids(event__handler_t process,
1165 struct perf_session *session)
1166{
1167 int err = 0;
1168 u16 kmisc, umisc;
1169 struct kernel_info *pos;
1170 struct rb_node *nd;
1171
1172 if (!dsos__read_build_ids(&session->header, true))
1173 return 0;
1174
1175 for (nd = rb_first(&session->kerninfo_root); nd; nd = rb_next(nd)) {
1176 pos = rb_entry(nd, struct kernel_info, rb_node);
1177 if (is_host_kernel(pos)) {
1178 kmisc = PERF_RECORD_MISC_KERNEL;
1179 umisc = PERF_RECORD_MISC_USER;
1180 } else {
1181 kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
1182 umisc = PERF_RECORD_MISC_GUEST_USER;
1183 }
1184
1185 err = __event_synthesize_build_ids(&pos->dsos__kernel,
1186 kmisc, process, pos, session);
1187 if (err == 0)
1188 err = __event_synthesize_build_ids(&pos->dsos__user,
1189 umisc, process, pos, session);
1190 if (err)
1191 break;
1192 }
1193
1194 if (err < 0) {
1195 pr_debug("failed to synthesize build ids\n");
1196 return err;
1197 }
1198
1199 dsos__cache_build_ids(&session->header);
1200
1201 return 0;
1202}
1203
1204int event__process_build_id(event_t *self,
1205 struct perf_session *session)
1206{
1207 __event_process_build_id(&self->build_id,
1208 self->build_id.filename,
1209 session);
1210 return 0;
1211}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 82a6af72d4cc..275915458148 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -39,6 +39,11 @@ struct perf_file_header {
39 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 39 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
40}; 40};
41 41
42struct perf_pipe_file_header {
43 u64 magic;
44 u64 size;
45};
46
42struct perf_header; 47struct perf_header;
43 48
44int perf_file_header__read(struct perf_file_header *self, 49int perf_file_header__read(struct perf_file_header *self,
@@ -47,21 +52,22 @@ int perf_file_header__read(struct perf_file_header *self,
47struct perf_header { 52struct perf_header {
48 int frozen; 53 int frozen;
49 int attrs, size; 54 int attrs, size;
55 bool needs_swap;
50 struct perf_header_attr **attr; 56 struct perf_header_attr **attr;
51 s64 attr_offset; 57 s64 attr_offset;
52 u64 data_offset; 58 u64 data_offset;
53 u64 data_size; 59 u64 data_size;
54 u64 event_offset; 60 u64 event_offset;
55 u64 event_size; 61 u64 event_size;
56 bool needs_swap;
57 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); 62 DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
58}; 63};
59 64
60int perf_header__init(struct perf_header *self); 65int perf_header__init(struct perf_header *self);
61void perf_header__exit(struct perf_header *self); 66void perf_header__exit(struct perf_header *self);
62 67
63int perf_header__read(struct perf_header *self, int fd); 68int perf_header__read(struct perf_session *session, int fd);
64int perf_header__write(struct perf_header *self, int fd, bool at_exit); 69int perf_header__write(struct perf_header *self, int fd, bool at_exit);
70int perf_header__write_pipe(int fd);
65 71
66int perf_header__add_attr(struct perf_header *self, 72int perf_header__add_attr(struct perf_header *self,
67 struct perf_header_attr *attr); 73 struct perf_header_attr *attr);
@@ -89,4 +95,35 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
89 const char *name, bool is_kallsyms); 95 const char *name, bool is_kallsyms);
90int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); 96int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
91 97
98int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
99 event__handler_t process,
100 struct perf_session *session);
101int event__synthesize_attrs(struct perf_header *self,
102 event__handler_t process,
103 struct perf_session *session);
104int event__process_attr(event_t *self, struct perf_session *session);
105
106int event__synthesize_event_type(u64 event_id, char *name,
107 event__handler_t process,
108 struct perf_session *session);
109int event__synthesize_event_types(event__handler_t process,
110 struct perf_session *session);
111int event__process_event_type(event_t *self,
112 struct perf_session *session);
113
114int event__synthesize_tracing_data(int fd, struct perf_event_attr *pattrs,
115 int nb_events,
116 event__handler_t process,
117 struct perf_session *session);
118int event__process_tracing_data(event_t *self,
119 struct perf_session *session);
120
121int event__synthesize_build_id(struct dso *pos, u16 misc,
122 event__handler_t process,
123 struct kernel_info *kerninfo,
124 struct perf_session *session);
125int event__synthesize_build_ids(event__handler_t process,
126 struct perf_session *session);
127int event__process_build_id(event_t *self, struct perf_session *session);
128
92#endif /* __PERF_HEADER_H */ 129#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2be33c7dbf03..ad6b22dde27f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -8,6 +8,30 @@ struct callchain_param callchain_param = {
8 .min_percent = 0.5 8 .min_percent = 0.5
9}; 9};
10 10
11void __perf_session__add_count(struct hist_entry *he,
12 struct addr_location *al,
13 u64 count)
14{
15 he->count += count;
16
17 switch (al->cpumode) {
18 case PERF_RECORD_MISC_KERNEL:
19 he->count_sys += count;
20 break;
21 case PERF_RECORD_MISC_USER:
22 he->count_us += count;
23 break;
24 case PERF_RECORD_MISC_GUEST_KERNEL:
25 he->count_guest_sys += count;
26 break;
27 case PERF_RECORD_MISC_GUEST_USER:
28 he->count_guest_us += count;
29 break;
30 default:
31 break;
32 }
33}
34
11/* 35/*
12 * histogram, sorted on item, collects counts 36 * histogram, sorted on item, collects counts
13 */ 37 */
@@ -22,8 +46,10 @@ struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists,
22 struct hist_entry *he; 46 struct hist_entry *he;
23 struct hist_entry entry = { 47 struct hist_entry entry = {
24 .thread = al->thread, 48 .thread = al->thread,
25 .map = al->map, 49 .ms = {
26 .sym = al->sym, 50 .map = al->map,
51 .sym = al->sym,
52 },
27 .ip = al->addr, 53 .ip = al->addr,
28 .level = al->level, 54 .level = al->level,
29 .count = count, 55 .count = count,
@@ -48,7 +74,8 @@ struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists,
48 p = &(*p)->rb_right; 74 p = &(*p)->rb_right;
49 } 75 }
50 76
51 he = malloc(sizeof(*he)); 77 he = malloc(sizeof(*he) + (symbol_conf.use_callchain ?
78 sizeof(struct callchain_node) : 0));
52 if (!he) 79 if (!he)
53 return NULL; 80 return NULL;
54 *he = entry; 81 *he = entry;
@@ -65,7 +92,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
65 int64_t cmp = 0; 92 int64_t cmp = 0;
66 93
67 list_for_each_entry(se, &hist_entry__sort_list, list) { 94 list_for_each_entry(se, &hist_entry__sort_list, list) {
68 cmp = se->cmp(left, right); 95 cmp = se->se_cmp(left, right);
69 if (cmp) 96 if (cmp)
70 break; 97 break;
71 } 98 }
@@ -82,7 +109,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
82 list_for_each_entry(se, &hist_entry__sort_list, list) { 109 list_for_each_entry(se, &hist_entry__sort_list, list) {
83 int64_t (*f)(struct hist_entry *, struct hist_entry *); 110 int64_t (*f)(struct hist_entry *, struct hist_entry *);
84 111
85 f = se->collapse ?: se->cmp; 112 f = se->se_collapse ?: se->se_cmp;
86 113
87 cmp = f(left, right); 114 cmp = f(left, right);
88 if (cmp) 115 if (cmp)
@@ -166,7 +193,7 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root,
166 struct hist_entry *iter; 193 struct hist_entry *iter;
167 194
168 if (symbol_conf.use_callchain) 195 if (symbol_conf.use_callchain)
169 callchain_param.sort(&he->sorted_chain, &he->callchain, 196 callchain_param.sort(&he->sorted_chain, he->callchain,
170 min_callchain_hits, &callchain_param); 197 min_callchain_hits, &callchain_param);
171 198
172 while (*p != NULL) { 199 while (*p != NULL) {
@@ -183,12 +210,13 @@ static void perf_session__insert_output_hist_entry(struct rb_root *root,
183 rb_insert_color(&he->rb_node, root); 210 rb_insert_color(&he->rb_node, root);
184} 211}
185 212
186void perf_session__output_resort(struct rb_root *hists, u64 total_samples) 213u64 perf_session__output_resort(struct rb_root *hists, u64 total_samples)
187{ 214{
188 struct rb_root tmp; 215 struct rb_root tmp;
189 struct rb_node *next; 216 struct rb_node *next;
190 struct hist_entry *n; 217 struct hist_entry *n;
191 u64 min_callchain_hits; 218 u64 min_callchain_hits;
219 u64 nr_hists = 0;
192 220
193 min_callchain_hits = 221 min_callchain_hits =
194 total_samples * (callchain_param.min_percent / 100); 222 total_samples * (callchain_param.min_percent / 100);
@@ -203,9 +231,11 @@ void perf_session__output_resort(struct rb_root *hists, u64 total_samples)
203 rb_erase(&n->rb_node, hists); 231 rb_erase(&n->rb_node, hists);
204 perf_session__insert_output_hist_entry(&tmp, n, 232 perf_session__insert_output_hist_entry(&tmp, n,
205 min_callchain_hits); 233 min_callchain_hits);
234 ++nr_hists;
206 } 235 }
207 236
208 *hists = tmp; 237 *hists = tmp;
238 return nr_hists;
209} 239}
210 240
211static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) 241static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
@@ -258,8 +288,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
258 } else 288 } else
259 ret += fprintf(fp, "%s", " "); 289 ret += fprintf(fp, "%s", " ");
260 } 290 }
261 if (chain->sym) 291 if (chain->ms.sym)
262 ret += fprintf(fp, "%s\n", chain->sym->name); 292 ret += fprintf(fp, "%s\n", chain->ms.sym->name);
263 else 293 else
264 ret += fprintf(fp, "%p\n", (void *)(long)chain->ip); 294 ret += fprintf(fp, "%p\n", (void *)(long)chain->ip);
265 295
@@ -278,7 +308,7 @@ static void init_rem_hits(void)
278 } 308 }
279 309
280 strcpy(rem_sq_bracket->name, "[...]"); 310 strcpy(rem_sq_bracket->name, "[...]");
281 rem_hits.sym = rem_sq_bracket; 311 rem_hits.ms.sym = rem_sq_bracket;
282} 312}
283 313
284static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, 314static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
@@ -328,8 +358,6 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
328 left_margin); 358 left_margin);
329 i = 0; 359 i = 0;
330 list_for_each_entry(chain, &child->val, list) { 360 list_for_each_entry(chain, &child->val, list) {
331 if (chain->ip >= PERF_CONTEXT_MAX)
332 continue;
333 ret += ipchain__fprintf_graph(fp, chain, depth, 361 ret += ipchain__fprintf_graph(fp, chain, depth,
334 new_depth_mask, i++, 362 new_depth_mask, i++,
335 new_total, 363 new_total,
@@ -368,9 +396,6 @@ static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
368 int ret = 0; 396 int ret = 0;
369 397
370 list_for_each_entry(chain, &self->val, list) { 398 list_for_each_entry(chain, &self->val, list) {
371 if (chain->ip >= PERF_CONTEXT_MAX)
372 continue;
373
374 if (!i++ && sort__first_dimension == SORT_SYM) 399 if (!i++ && sort__first_dimension == SORT_SYM)
375 continue; 400 continue;
376 401
@@ -385,8 +410,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
385 } else 410 } else
386 ret += callchain__fprintf_left_margin(fp, left_margin); 411 ret += callchain__fprintf_left_margin(fp, left_margin);
387 412
388 if (chain->sym) 413 if (chain->ms.sym)
389 ret += fprintf(fp, " %s\n", chain->sym->name); 414 ret += fprintf(fp, " %s\n", chain->ms.sym->name);
390 else 415 else
391 ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); 416 ret += fprintf(fp, " %p\n", (void *)(long)chain->ip);
392 } 417 }
@@ -411,8 +436,8 @@ static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self,
411 list_for_each_entry(chain, &self->val, list) { 436 list_for_each_entry(chain, &self->val, list) {
412 if (chain->ip >= PERF_CONTEXT_MAX) 437 if (chain->ip >= PERF_CONTEXT_MAX)
413 continue; 438 continue;
414 if (chain->sym) 439 if (chain->ms.sym)
415 ret += fprintf(fp, " %s\n", chain->sym->name); 440 ret += fprintf(fp, " %s\n", chain->ms.sym->name);
416 else 441 else
417 ret += fprintf(fp, " %p\n", 442 ret += fprintf(fp, " %p\n",
418 (void *)(long)chain->ip); 443 (void *)(long)chain->ip);
@@ -455,16 +480,17 @@ static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
455 return ret; 480 return ret;
456} 481}
457 482
458static size_t hist_entry__fprintf(struct hist_entry *self, 483int hist_entry__snprintf(struct hist_entry *self,
459 struct perf_session *pair_session, 484 char *s, size_t size,
460 bool show_displacement, 485 struct perf_session *pair_session,
461 long displacement, FILE *fp, 486 bool show_displacement,
462 u64 session_total) 487 long displacement, bool color,
488 u64 session_total)
463{ 489{
464 struct sort_entry *se; 490 struct sort_entry *se;
465 u64 count, total; 491 u64 count, total, count_sys, count_us, count_guest_sys, count_guest_us;
466 const char *sep = symbol_conf.field_sep; 492 const char *sep = symbol_conf.field_sep;
467 size_t ret; 493 int ret;
468 494
469 if (symbol_conf.exclude_other && !self->parent) 495 if (symbol_conf.exclude_other && !self->parent)
470 return 0; 496 return 0;
@@ -472,22 +498,55 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
472 if (pair_session) { 498 if (pair_session) {
473 count = self->pair ? self->pair->count : 0; 499 count = self->pair ? self->pair->count : 0;
474 total = pair_session->events_stats.total; 500 total = pair_session->events_stats.total;
501 count_sys = self->pair ? self->pair->count_sys : 0;
502 count_us = self->pair ? self->pair->count_us : 0;
503 count_guest_sys = self->pair ? self->pair->count_guest_sys : 0;
504 count_guest_us = self->pair ? self->pair->count_guest_us : 0;
475 } else { 505 } else {
476 count = self->count; 506 count = self->count;
477 total = session_total; 507 total = session_total;
508 count_sys = self->count_sys;
509 count_us = self->count_us;
510 count_guest_sys = self->count_guest_sys;
511 count_guest_us = self->count_guest_us;
478 } 512 }
479 513
480 if (total) 514 if (total) {
481 ret = percent_color_fprintf(fp, sep ? "%.2f" : " %6.2f%%", 515 if (color)
482 (count * 100.0) / total); 516 ret = percent_color_snprintf(s, size,
483 else 517 sep ? "%.2f" : " %6.2f%%",
484 ret = fprintf(fp, sep ? "%lld" : "%12lld ", count); 518 (count * 100.0) / total);
519 else
520 ret = snprintf(s, size, sep ? "%.2f" : " %6.2f%%",
521 (count * 100.0) / total);
522 if (symbol_conf.show_cpu_utilization) {
523 ret += percent_color_snprintf(s + ret, size - ret,
524 sep ? "%.2f" : " %6.2f%%",
525 (count_sys * 100.0) / total);
526 ret += percent_color_snprintf(s + ret, size - ret,
527 sep ? "%.2f" : " %6.2f%%",
528 (count_us * 100.0) / total);
529 if (perf_guest) {
530 ret += percent_color_snprintf(s + ret,
531 size - ret,
532 sep ? "%.2f" : " %6.2f%%",
533 (count_guest_sys * 100.0) /
534 total);
535 ret += percent_color_snprintf(s + ret,
536 size - ret,
537 sep ? "%.2f" : " %6.2f%%",
538 (count_guest_us * 100.0) /
539 total);
540 }
541 }
542 } else
543 ret = snprintf(s, size, sep ? "%lld" : "%12lld ", count);
485 544
486 if (symbol_conf.show_nr_samples) { 545 if (symbol_conf.show_nr_samples) {
487 if (sep) 546 if (sep)
488 fprintf(fp, "%c%lld", *sep, count); 547 ret += snprintf(s + ret, size - ret, "%c%lld", *sep, count);
489 else 548 else
490 fprintf(fp, "%11lld", count); 549 ret += snprintf(s + ret, size - ret, "%11lld", count);
491 } 550 }
492 551
493 if (pair_session) { 552 if (pair_session) {
@@ -507,9 +566,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
507 snprintf(bf, sizeof(bf), " "); 566 snprintf(bf, sizeof(bf), " ");
508 567
509 if (sep) 568 if (sep)
510 ret += fprintf(fp, "%c%s", *sep, bf); 569 ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf);
511 else 570 else
512 ret += fprintf(fp, "%11.11s", bf); 571 ret += snprintf(s + ret, size - ret, "%11.11s", bf);
513 572
514 if (show_displacement) { 573 if (show_displacement) {
515 if (displacement) 574 if (displacement)
@@ -518,9 +577,9 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
518 snprintf(bf, sizeof(bf), " "); 577 snprintf(bf, sizeof(bf), " ");
519 578
520 if (sep) 579 if (sep)
521 fprintf(fp, "%c%s", *sep, bf); 580 ret += snprintf(s + ret, size - ret, "%c%s", *sep, bf);
522 else 581 else
523 fprintf(fp, "%6.6s", bf); 582 ret += snprintf(s + ret, size - ret, "%6.6s", bf);
524 } 583 }
525 } 584 }
526 585
@@ -528,27 +587,41 @@ static size_t hist_entry__fprintf(struct hist_entry *self,
528 if (se->elide) 587 if (se->elide)
529 continue; 588 continue;
530 589
531 fprintf(fp, "%s", sep ?: " "); 590 ret += snprintf(s + ret, size - ret, "%s", sep ?: " ");
532 ret += se->print(fp, self, se->width ? *se->width : 0); 591 ret += se->se_snprintf(self, s + ret, size - ret,
592 se->se_width ? *se->se_width : 0);
533 } 593 }
534 594
535 ret += fprintf(fp, "\n"); 595 return ret;
596}
536 597
537 if (symbol_conf.use_callchain) { 598int hist_entry__fprintf(struct hist_entry *self,
538 int left_margin = 0; 599 struct perf_session *pair_session,
600 bool show_displacement,
601 long displacement, FILE *fp,
602 u64 session_total)
603{
604 char bf[512];
605 hist_entry__snprintf(self, bf, sizeof(bf), pair_session,
606 show_displacement, displacement,
607 true, session_total);
608 return fprintf(fp, "%s\n", bf);
609}
539 610
540 if (sort__first_dimension == SORT_COMM) { 611static size_t hist_entry__fprintf_callchain(struct hist_entry *self, FILE *fp,
541 se = list_first_entry(&hist_entry__sort_list, typeof(*se), 612 u64 session_total)
542 list); 613{
543 left_margin = se->width ? *se->width : 0; 614 int left_margin = 0;
544 left_margin -= thread__comm_len(self->thread);
545 }
546 615
547 hist_entry_callchain__fprintf(fp, self, session_total, 616 if (sort__first_dimension == SORT_COMM) {
548 left_margin); 617 struct sort_entry *se = list_first_entry(&hist_entry__sort_list,
618 typeof(*se), list);
619 left_margin = se->se_width ? *se->se_width : 0;
620 left_margin -= thread__comm_len(self->thread);
549 } 621 }
550 622
551 return ret; 623 return hist_entry_callchain__fprintf(fp, self, session_total,
624 left_margin);
552} 625}
553 626
554size_t perf_session__fprintf_hists(struct rb_root *hists, 627size_t perf_session__fprintf_hists(struct rb_root *hists,
@@ -576,6 +649,24 @@ size_t perf_session__fprintf_hists(struct rb_root *hists,
576 fputs(" Samples ", fp); 649 fputs(" Samples ", fp);
577 } 650 }
578 651
652 if (symbol_conf.show_cpu_utilization) {
653 if (sep) {
654 ret += fprintf(fp, "%csys", *sep);
655 ret += fprintf(fp, "%cus", *sep);
656 if (perf_guest) {
657 ret += fprintf(fp, "%cguest sys", *sep);
658 ret += fprintf(fp, "%cguest us", *sep);
659 }
660 } else {
661 ret += fprintf(fp, " sys ");
662 ret += fprintf(fp, " us ");
663 if (perf_guest) {
664 ret += fprintf(fp, " guest sys ");
665 ret += fprintf(fp, " guest us ");
666 }
667 }
668 }
669
579 if (pair) { 670 if (pair) {
580 if (sep) 671 if (sep)
581 ret += fprintf(fp, "%cDelta", *sep); 672 ret += fprintf(fp, "%cDelta", *sep);
@@ -594,22 +685,22 @@ size_t perf_session__fprintf_hists(struct rb_root *hists,
594 if (se->elide) 685 if (se->elide)
595 continue; 686 continue;
596 if (sep) { 687 if (sep) {
597 fprintf(fp, "%c%s", *sep, se->header); 688 fprintf(fp, "%c%s", *sep, se->se_header);
598 continue; 689 continue;
599 } 690 }
600 width = strlen(se->header); 691 width = strlen(se->se_header);
601 if (se->width) { 692 if (se->se_width) {
602 if (symbol_conf.col_width_list_str) { 693 if (symbol_conf.col_width_list_str) {
603 if (col_width) { 694 if (col_width) {
604 *se->width = atoi(col_width); 695 *se->se_width = atoi(col_width);
605 col_width = strchr(col_width, ','); 696 col_width = strchr(col_width, ',');
606 if (col_width) 697 if (col_width)
607 ++col_width; 698 ++col_width;
608 } 699 }
609 } 700 }
610 width = *se->width = max(*se->width, width); 701 width = *se->se_width = max(*se->se_width, width);
611 } 702 }
612 fprintf(fp, " %*s", width, se->header); 703 fprintf(fp, " %*s", width, se->se_header);
613 } 704 }
614 fprintf(fp, "\n"); 705 fprintf(fp, "\n");
615 706
@@ -631,10 +722,10 @@ size_t perf_session__fprintf_hists(struct rb_root *hists,
631 continue; 722 continue;
632 723
633 fprintf(fp, " "); 724 fprintf(fp, " ");
634 if (se->width) 725 if (se->se_width)
635 width = *se->width; 726 width = *se->se_width;
636 else 727 else
637 width = strlen(se->header); 728 width = strlen(se->se_header);
638 for (i = 0; i < width; i++) 729 for (i = 0; i < width; i++)
639 fprintf(fp, "."); 730 fprintf(fp, ".");
640 } 731 }
@@ -655,9 +746,13 @@ print_entries:
655 } 746 }
656 ret += hist_entry__fprintf(h, pair, show_displacement, 747 ret += hist_entry__fprintf(h, pair, show_displacement,
657 displacement, fp, session_total); 748 displacement, fp, session_total);
658 if (h->map == NULL && verbose > 1) { 749
750 if (symbol_conf.use_callchain)
751 ret += hist_entry__fprintf_callchain(h, fp, session_total);
752
753 if (h->ms.map == NULL && verbose > 1) {
659 __map_groups__fprintf_maps(&h->thread->mg, 754 __map_groups__fprintf_maps(&h->thread->mg,
660 MAP__FUNCTION, fp); 755 MAP__FUNCTION, verbose, fp);
661 fprintf(fp, "%.10s end\n", graph_dotted_line); 756 fprintf(fp, "%.10s end\n", graph_dotted_line);
662 } 757 }
663 } 758 }
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 16f360cce5bf..9df1c340ec92 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -12,15 +12,28 @@ struct addr_location;
12struct symbol; 12struct symbol;
13struct rb_root; 13struct rb_root;
14 14
15void __perf_session__add_count(struct hist_entry *he,
16 struct addr_location *al,
17 u64 count);
15struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists, 18struct hist_entry *__perf_session__add_hist_entry(struct rb_root *hists,
16 struct addr_location *al, 19 struct addr_location *al,
17 struct symbol *parent, 20 struct symbol *parent,
18 u64 count, bool *hit); 21 u64 count, bool *hit);
19extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); 22extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *);
20extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); 23extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *);
24int hist_entry__fprintf(struct hist_entry *self,
25 struct perf_session *pair_session,
26 bool show_displacement,
27 long displacement, FILE *fp,
28 u64 session_total);
29int hist_entry__snprintf(struct hist_entry *self,
30 char *bf, size_t size,
31 struct perf_session *pair_session,
32 bool show_displacement, long displacement,
33 bool color, u64 session_total);
21void hist_entry__free(struct hist_entry *); 34void hist_entry__free(struct hist_entry *);
22 35
23void perf_session__output_resort(struct rb_root *hists, u64 total_samples); 36u64 perf_session__output_resort(struct rb_root *hists, u64 total_samples);
24void perf_session__collapse_resort(struct rb_root *hists); 37void perf_session__collapse_resort(struct rb_root *hists);
25size_t perf_session__fprintf_hists(struct rb_root *hists, 38size_t perf_session__fprintf_hists(struct rb_root *hists,
26 struct perf_session *pair, 39 struct perf_session *pair,
diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h
index dfb0713ed47f..791f9dd27ebf 100644
--- a/tools/perf/util/include/linux/compiler.h
+++ b/tools/perf/util/include/linux/compiler.h
@@ -7,4 +7,6 @@
7#define __user 7#define __user
8#define __attribute_const__ 8#define __attribute_const__
9 9
10#define __used __attribute__((__unused__))
11
10#endif 12#endif
diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h
index f2611655ab51..388ab1bfd114 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/perf/util/include/linux/kernel.h
@@ -85,16 +85,19 @@ simple_strtoul(const char *nptr, char **endptr, int base)
85 return strtoul(nptr, endptr, base); 85 return strtoul(nptr, endptr, base);
86} 86}
87 87
88int eprintf(int level,
89 const char *fmt, ...) __attribute__((format(printf, 2, 3)));
90
88#ifndef pr_fmt 91#ifndef pr_fmt
89#define pr_fmt(fmt) fmt 92#define pr_fmt(fmt) fmt
90#endif 93#endif
91 94
92#define pr_err(fmt, ...) \ 95#define pr_err(fmt, ...) \
93 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) 96 eprintf(0, pr_fmt(fmt), ##__VA_ARGS__)
94#define pr_warning(fmt, ...) \ 97#define pr_warning(fmt, ...) \
95 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) 98 eprintf(0, pr_fmt(fmt), ##__VA_ARGS__)
96#define pr_info(fmt, ...) \ 99#define pr_info(fmt, ...) \
97 do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) 100 eprintf(0, pr_fmt(fmt), ##__VA_ARGS__)
98#define pr_debug(fmt, ...) \ 101#define pr_debug(fmt, ...) \
99 eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) 102 eprintf(1, pr_fmt(fmt), ##__VA_ARGS__)
100#define pr_debugN(n, fmt, ...) \ 103#define pr_debugN(n, fmt, ...) \
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index e509cd59c67d..7facd016ec97 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,9 +1,11 @@
1#include "event.h"
2#include "symbol.h" 1#include "symbol.h"
2#include <errno.h>
3#include <limits.h>
3#include <stdlib.h> 4#include <stdlib.h>
4#include <string.h> 5#include <string.h>
5#include <stdio.h> 6#include <stdio.h>
6#include "debug.h" 7#include <unistd.h>
8#include "map.h"
7 9
8const char *map_type__name[MAP__NR_TYPES] = { 10const char *map_type__name[MAP__NR_TYPES] = {
9 [MAP__FUNCTION] = "Functions", 11 [MAP__FUNCTION] = "Functions",
@@ -36,15 +38,16 @@ void map__init(struct map *self, enum map_type type,
36 self->map_ip = map__map_ip; 38 self->map_ip = map__map_ip;
37 self->unmap_ip = map__unmap_ip; 39 self->unmap_ip = map__unmap_ip;
38 RB_CLEAR_NODE(&self->rb_node); 40 RB_CLEAR_NODE(&self->rb_node);
41 self->groups = NULL;
39} 42}
40 43
41struct map *map__new(struct mmap_event *event, enum map_type type, 44struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
42 char *cwd, int cwdlen) 45 u64 pgoff, u32 pid, char *filename,
46 enum map_type type, char *cwd, int cwdlen)
43{ 47{
44 struct map *self = malloc(sizeof(*self)); 48 struct map *self = malloc(sizeof(*self));
45 49
46 if (self != NULL) { 50 if (self != NULL) {
47 const char *filename = event->filename;
48 char newfilename[PATH_MAX]; 51 char newfilename[PATH_MAX];
49 struct dso *dso; 52 struct dso *dso;
50 int anon; 53 int anon;
@@ -62,16 +65,15 @@ struct map *map__new(struct mmap_event *event, enum map_type type,
62 anon = is_anon_memory(filename); 65 anon = is_anon_memory(filename);
63 66
64 if (anon) { 67 if (anon) {
65 snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); 68 snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid);
66 filename = newfilename; 69 filename = newfilename;
67 } 70 }
68 71
69 dso = dsos__findnew(filename); 72 dso = __dsos__findnew(dsos__list, filename);
70 if (dso == NULL) 73 if (dso == NULL)
71 goto out_delete; 74 goto out_delete;
72 75
73 map__init(self, type, event->start, event->start + event->len, 76 map__init(self, type, start, start + len, pgoff, dso);
74 event->pgoff, dso);
75 77
76 if (anon) { 78 if (anon) {
77set_identity: 79set_identity:
@@ -235,3 +237,409 @@ u64 map__objdump_2ip(struct map *map, u64 addr)
235 map->unmap_ip(map, addr); /* RIP -> IP */ 237 map->unmap_ip(map, addr); /* RIP -> IP */
236 return ip; 238 return ip;
237} 239}
240
241void map_groups__init(struct map_groups *self)
242{
243 int i;
244 for (i = 0; i < MAP__NR_TYPES; ++i) {
245 self->maps[i] = RB_ROOT;
246 INIT_LIST_HEAD(&self->removed_maps[i]);
247 }
248 self->this_kerninfo = NULL;
249}
250
251void map_groups__flush(struct map_groups *self)
252{
253 int type;
254
255 for (type = 0; type < MAP__NR_TYPES; type++) {
256 struct rb_root *root = &self->maps[type];
257 struct rb_node *next = rb_first(root);
258
259 while (next) {
260 struct map *pos = rb_entry(next, struct map, rb_node);
261 next = rb_next(&pos->rb_node);
262 rb_erase(&pos->rb_node, root);
263 /*
264 * We may have references to this map, for
265 * instance in some hist_entry instances, so
266 * just move them to a separate list.
267 */
268 list_add_tail(&pos->node, &self->removed_maps[pos->type]);
269 }
270 }
271}
272
273struct symbol *map_groups__find_symbol(struct map_groups *self,
274 enum map_type type, u64 addr,
275 struct map **mapp,
276 symbol_filter_t filter)
277{
278 struct map *map = map_groups__find(self, type, addr);
279
280 if (map != NULL) {
281 if (mapp != NULL)
282 *mapp = map;
283 return map__find_symbol(map, map->map_ip(map, addr), filter);
284 }
285
286 return NULL;
287}
288
289struct symbol *map_groups__find_symbol_by_name(struct map_groups *self,
290 enum map_type type,
291 const char *name,
292 struct map **mapp,
293 symbol_filter_t filter)
294{
295 struct rb_node *nd;
296
297 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
298 struct map *pos = rb_entry(nd, struct map, rb_node);
299 struct symbol *sym = map__find_symbol_by_name(pos, name, filter);
300
301 if (sym == NULL)
302 continue;
303 if (mapp != NULL)
304 *mapp = pos;
305 return sym;
306 }
307
308 return NULL;
309}
310
311size_t __map_groups__fprintf_maps(struct map_groups *self,
312 enum map_type type, int verbose, FILE *fp)
313{
314 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
315 struct rb_node *nd;
316
317 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
318 struct map *pos = rb_entry(nd, struct map, rb_node);
319 printed += fprintf(fp, "Map:");
320 printed += map__fprintf(pos, fp);
321 if (verbose > 2) {
322 printed += dso__fprintf(pos->dso, type, fp);
323 printed += fprintf(fp, "--\n");
324 }
325 }
326
327 return printed;
328}
329
330size_t map_groups__fprintf_maps(struct map_groups *self, int verbose, FILE *fp)
331{
332 size_t printed = 0, i;
333 for (i = 0; i < MAP__NR_TYPES; ++i)
334 printed += __map_groups__fprintf_maps(self, i, verbose, fp);
335 return printed;
336}
337
338static size_t __map_groups__fprintf_removed_maps(struct map_groups *self,
339 enum map_type type,
340 int verbose, FILE *fp)
341{
342 struct map *pos;
343 size_t printed = 0;
344
345 list_for_each_entry(pos, &self->removed_maps[type], node) {
346 printed += fprintf(fp, "Map:");
347 printed += map__fprintf(pos, fp);
348 if (verbose > 1) {
349 printed += dso__fprintf(pos->dso, type, fp);
350 printed += fprintf(fp, "--\n");
351 }
352 }
353 return printed;
354}
355
356static size_t map_groups__fprintf_removed_maps(struct map_groups *self,
357 int verbose, FILE *fp)
358{
359 size_t printed = 0, i;
360 for (i = 0; i < MAP__NR_TYPES; ++i)
361 printed += __map_groups__fprintf_removed_maps(self, i, verbose, fp);
362 return printed;
363}
364
365size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp)
366{
367 size_t printed = map_groups__fprintf_maps(self, verbose, fp);
368 printed += fprintf(fp, "Removed maps:\n");
369 return printed + map_groups__fprintf_removed_maps(self, verbose, fp);
370}
371
372int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
373 int verbose, FILE *fp)
374{
375 struct rb_root *root = &self->maps[map->type];
376 struct rb_node *next = rb_first(root);
377
378 while (next) {
379 struct map *pos = rb_entry(next, struct map, rb_node);
380 next = rb_next(&pos->rb_node);
381
382 if (!map__overlap(pos, map))
383 continue;
384
385 if (verbose >= 2) {
386 fputs("overlapping maps:\n", fp);
387 map__fprintf(map, fp);
388 map__fprintf(pos, fp);
389 }
390
391 rb_erase(&pos->rb_node, root);
392 /*
393 * We may have references to this map, for instance in some
394 * hist_entry instances, so just move them to a separate
395 * list.
396 */
397 list_add_tail(&pos->node, &self->removed_maps[map->type]);
398 /*
399 * Now check if we need to create new maps for areas not
400 * overlapped by the new map:
401 */
402 if (map->start > pos->start) {
403 struct map *before = map__clone(pos);
404
405 if (before == NULL)
406 return -ENOMEM;
407
408 before->end = map->start - 1;
409 map_groups__insert(self, before);
410 if (verbose >= 2)
411 map__fprintf(before, fp);
412 }
413
414 if (map->end < pos->end) {
415 struct map *after = map__clone(pos);
416
417 if (after == NULL)
418 return -ENOMEM;
419
420 after->start = map->end + 1;
421 map_groups__insert(self, after);
422 if (verbose >= 2)
423 map__fprintf(after, fp);
424 }
425 }
426
427 return 0;
428}
429
430/*
431 * XXX This should not really _copy_ te maps, but refcount them.
432 */
433int map_groups__clone(struct map_groups *self,
434 struct map_groups *parent, enum map_type type)
435{
436 struct rb_node *nd;
437 for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
438 struct map *map = rb_entry(nd, struct map, rb_node);
439 struct map *new = map__clone(map);
440 if (new == NULL)
441 return -ENOMEM;
442 map_groups__insert(self, new);
443 }
444 return 0;
445}
446
447static u64 map__reloc_map_ip(struct map *map, u64 ip)
448{
449 return ip + (s64)map->pgoff;
450}
451
452static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
453{
454 return ip - (s64)map->pgoff;
455}
456
457void map__reloc_vmlinux(struct map *self)
458{
459 struct kmap *kmap = map__kmap(self);
460 s64 reloc;
461
462 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
463 return;
464
465 reloc = (kmap->ref_reloc_sym->unrelocated_addr -
466 kmap->ref_reloc_sym->addr);
467
468 if (!reloc)
469 return;
470
471 self->map_ip = map__reloc_map_ip;
472 self->unmap_ip = map__reloc_unmap_ip;
473 self->pgoff = reloc;
474}
475
476void maps__insert(struct rb_root *maps, struct map *map)
477{
478 struct rb_node **p = &maps->rb_node;
479 struct rb_node *parent = NULL;
480 const u64 ip = map->start;
481 struct map *m;
482
483 while (*p != NULL) {
484 parent = *p;
485 m = rb_entry(parent, struct map, rb_node);
486 if (ip < m->start)
487 p = &(*p)->rb_left;
488 else
489 p = &(*p)->rb_right;
490 }
491
492 rb_link_node(&map->rb_node, parent, p);
493 rb_insert_color(&map->rb_node, maps);
494}
495
496struct map *maps__find(struct rb_root *maps, u64 ip)
497{
498 struct rb_node **p = &maps->rb_node;
499 struct rb_node *parent = NULL;
500 struct map *m;
501
502 while (*p != NULL) {
503 parent = *p;
504 m = rb_entry(parent, struct map, rb_node);
505 if (ip < m->start)
506 p = &(*p)->rb_left;
507 else if (ip > m->end)
508 p = &(*p)->rb_right;
509 else
510 return m;
511 }
512
513 return NULL;
514}
515
516struct kernel_info *add_new_kernel_info(struct rb_root *kerninfo_root,
517 pid_t pid, const char *root_dir)
518{
519 struct rb_node **p = &kerninfo_root->rb_node;
520 struct rb_node *parent = NULL;
521 struct kernel_info *kerninfo, *pos;
522
523 kerninfo = malloc(sizeof(struct kernel_info));
524 if (!kerninfo)
525 return NULL;
526
527 kerninfo->pid = pid;
528 map_groups__init(&kerninfo->kmaps);
529 kerninfo->root_dir = strdup(root_dir);
530 RB_CLEAR_NODE(&kerninfo->rb_node);
531 INIT_LIST_HEAD(&kerninfo->dsos__user);
532 INIT_LIST_HEAD(&kerninfo->dsos__kernel);
533 kerninfo->kmaps.this_kerninfo = kerninfo;
534
535 while (*p != NULL) {
536 parent = *p;
537 pos = rb_entry(parent, struct kernel_info, rb_node);
538 if (pid < pos->pid)
539 p = &(*p)->rb_left;
540 else
541 p = &(*p)->rb_right;
542 }
543
544 rb_link_node(&kerninfo->rb_node, parent, p);
545 rb_insert_color(&kerninfo->rb_node, kerninfo_root);
546
547 return kerninfo;
548}
549
550struct kernel_info *kerninfo__find(struct rb_root *kerninfo_root, pid_t pid)
551{
552 struct rb_node **p = &kerninfo_root->rb_node;
553 struct rb_node *parent = NULL;
554 struct kernel_info *kerninfo;
555 struct kernel_info *default_kerninfo = NULL;
556
557 while (*p != NULL) {
558 parent = *p;
559 kerninfo = rb_entry(parent, struct kernel_info, rb_node);
560 if (pid < kerninfo->pid)
561 p = &(*p)->rb_left;
562 else if (pid > kerninfo->pid)
563 p = &(*p)->rb_right;
564 else
565 return kerninfo;
566 if (!kerninfo->pid)
567 default_kerninfo = kerninfo;
568 }
569
570 return default_kerninfo;
571}
572
573struct kernel_info *kerninfo__findhost(struct rb_root *kerninfo_root)
574{
575 struct rb_node **p = &kerninfo_root->rb_node;
576 struct rb_node *parent = NULL;
577 struct kernel_info *kerninfo;
578 pid_t pid = HOST_KERNEL_ID;
579
580 while (*p != NULL) {
581 parent = *p;
582 kerninfo = rb_entry(parent, struct kernel_info, rb_node);
583 if (pid < kerninfo->pid)
584 p = &(*p)->rb_left;
585 else if (pid > kerninfo->pid)
586 p = &(*p)->rb_right;
587 else
588 return kerninfo;
589 }
590
591 return NULL;
592}
593
594struct kernel_info *kerninfo__findnew(struct rb_root *kerninfo_root, pid_t pid)
595{
596 char path[PATH_MAX];
597 const char *root_dir;
598 int ret;
599 struct kernel_info *kerninfo = kerninfo__find(kerninfo_root, pid);
600
601 if (!kerninfo || kerninfo->pid != pid) {
602 if (pid == HOST_KERNEL_ID || pid == DEFAULT_GUEST_KERNEL_ID)
603 root_dir = "";
604 else {
605 if (!symbol_conf.guestmount)
606 goto out;
607 sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
608 ret = access(path, R_OK);
609 if (ret) {
610 pr_err("Can't access file %s\n", path);
611 goto out;
612 }
613 root_dir = path;
614 }
615 kerninfo = add_new_kernel_info(kerninfo_root, pid, root_dir);
616 }
617
618out:
619 return kerninfo;
620}
621
622void kerninfo__process_allkernels(struct rb_root *kerninfo_root,
623 process_kernel_info process,
624 void *data)
625{
626 struct rb_node *nd;
627
628 for (nd = rb_first(kerninfo_root); nd; nd = rb_next(nd)) {
629 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
630 rb_node);
631 process(pos, data);
632 }
633}
634
635char *kern_mmap_name(struct kernel_info *kerninfo, char *buff)
636{
637 if (is_host_kernel(kerninfo))
638 sprintf(buff, "[%s]", "kernel.kallsyms");
639 else if (is_default_guest(kerninfo))
640 sprintf(buff, "[%s]", "guest.kernel.kallsyms");
641 else
642 sprintf(buff, "[%s.%d]", "guest.kernel.kallsyms", kerninfo->pid);
643
644 return buff;
645}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index b756368076c6..30d38d634e09 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -4,7 +4,8 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/rbtree.h> 6#include <linux/rbtree.h>
7#include <linux/types.h> 7#include <stdio.h>
8#include "types.h"
8 9
9enum map_type { 10enum map_type {
10 MAP__FUNCTION = 0, 11 MAP__FUNCTION = 0,
@@ -18,6 +19,7 @@ extern const char *map_type__name[MAP__NR_TYPES];
18struct dso; 19struct dso;
19struct ref_reloc_sym; 20struct ref_reloc_sym;
20struct map_groups; 21struct map_groups;
22struct kernel_info;
21 23
22struct map { 24struct map {
23 union { 25 union {
@@ -35,6 +37,7 @@ struct map {
35 u64 (*unmap_ip)(struct map *, u64); 37 u64 (*unmap_ip)(struct map *, u64);
36 38
37 struct dso *dso; 39 struct dso *dso;
40 struct map_groups *groups;
38}; 41};
39 42
40struct kmap { 43struct kmap {
@@ -42,6 +45,26 @@ struct kmap {
42 struct map_groups *kmaps; 45 struct map_groups *kmaps;
43}; 46};
44 47
48struct map_groups {
49 struct rb_root maps[MAP__NR_TYPES];
50 struct list_head removed_maps[MAP__NR_TYPES];
51 struct kernel_info *this_kerninfo;
52};
53
54/* Native host kernel uses -1 as pid index in kernel_info */
55#define HOST_KERNEL_ID (-1)
56#define DEFAULT_GUEST_KERNEL_ID (0)
57
58struct kernel_info {
59 struct rb_node rb_node;
60 pid_t pid;
61 char *root_dir;
62 struct list_head dsos__user;
63 struct list_head dsos__kernel;
64 struct map_groups kmaps;
65 struct map *vmlinux_maps[MAP__NR_TYPES];
66};
67
45static inline struct kmap *map__kmap(struct map *self) 68static inline struct kmap *map__kmap(struct map *self)
46{ 69{
47 return (struct kmap *)(self + 1); 70 return (struct kmap *)(self + 1);
@@ -68,14 +91,14 @@ u64 map__rip_2objdump(struct map *map, u64 rip);
68u64 map__objdump_2ip(struct map *map, u64 addr); 91u64 map__objdump_2ip(struct map *map, u64 addr);
69 92
70struct symbol; 93struct symbol;
71struct mmap_event;
72 94
73typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); 95typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
74 96
75void map__init(struct map *self, enum map_type type, 97void map__init(struct map *self, enum map_type type,
76 u64 start, u64 end, u64 pgoff, struct dso *dso); 98 u64 start, u64 end, u64 pgoff, struct dso *dso);
77struct map *map__new(struct mmap_event *event, enum map_type, 99struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
78 char *cwd, int cwdlen); 100 u64 pgoff, u32 pid, char *filename,
101 enum map_type type, char *cwd, int cwdlen);
79void map__delete(struct map *self); 102void map__delete(struct map *self);
80struct map *map__clone(struct map *self); 103struct map *map__clone(struct map *self);
81int map__overlap(struct map *l, struct map *r); 104int map__overlap(struct map *l, struct map *r);
@@ -91,4 +114,94 @@ void map__fixup_end(struct map *self);
91 114
92void map__reloc_vmlinux(struct map *self); 115void map__reloc_vmlinux(struct map *self);
93 116
117size_t __map_groups__fprintf_maps(struct map_groups *self,
118 enum map_type type, int verbose, FILE *fp);
119void maps__insert(struct rb_root *maps, struct map *map);
120struct map *maps__find(struct rb_root *maps, u64 addr);
121void map_groups__init(struct map_groups *self);
122int map_groups__clone(struct map_groups *self,
123 struct map_groups *parent, enum map_type type);
124size_t map_groups__fprintf(struct map_groups *self, int verbose, FILE *fp);
125size_t map_groups__fprintf_maps(struct map_groups *self, int verbose, FILE *fp);
126
127struct kernel_info *add_new_kernel_info(struct rb_root *kerninfo_root,
128 pid_t pid, const char *root_dir);
129struct kernel_info *kerninfo__find(struct rb_root *kerninfo_root, pid_t pid);
130struct kernel_info *kerninfo__findnew(struct rb_root *kerninfo_root, pid_t pid);
131struct kernel_info *kerninfo__findhost(struct rb_root *kerninfo_root);
132char *kern_mmap_name(struct kernel_info *kerninfo, char *buff);
133
134/*
135 * Default guest kernel is defined by parameter --guestkallsyms
136 * and --guestmodules
137 */
138static inline int is_default_guest(struct kernel_info *kerninfo)
139{
140 if (!kerninfo)
141 return 0;
142 return kerninfo->pid == DEFAULT_GUEST_KERNEL_ID;
143}
144
145static inline int is_host_kernel(struct kernel_info *kerninfo)
146{
147 if (!kerninfo)
148 return 0;
149 return kerninfo->pid == HOST_KERNEL_ID;
150}
151
152typedef void (*process_kernel_info)(struct kernel_info *kerninfo, void *data);
153void kerninfo__process_allkernels(struct rb_root *kerninfo_root,
154 process_kernel_info process,
155 void *data);
156
157static inline void map_groups__insert(struct map_groups *self, struct map *map)
158{
159 maps__insert(&self->maps[map->type], map);
160 map->groups = self;
161}
162
163static inline struct map *map_groups__find(struct map_groups *self,
164 enum map_type type, u64 addr)
165{
166 return maps__find(&self->maps[type], addr);
167}
168
169struct symbol *map_groups__find_symbol(struct map_groups *self,
170 enum map_type type, u64 addr,
171 struct map **mapp,
172 symbol_filter_t filter);
173
174struct symbol *map_groups__find_symbol_by_name(struct map_groups *self,
175 enum map_type type,
176 const char *name,
177 struct map **mapp,
178 symbol_filter_t filter);
179
180static inline
181struct symbol *map_groups__find_function(struct map_groups *self, u64 addr,
182 struct map **mapp, symbol_filter_t filter)
183{
184 return map_groups__find_symbol(self, MAP__FUNCTION, addr, mapp, filter);
185}
186
187static inline
188struct symbol *map_groups__find_function_by_name(struct map_groups *self,
189 const char *name, struct map **mapp,
190 symbol_filter_t filter)
191{
192 return map_groups__find_symbol_by_name(self, MAP__FUNCTION, name, mapp, filter);
193}
194
195int map_groups__fixup_overlappings(struct map_groups *self, struct map *map,
196 int verbose, FILE *fp);
197
198struct map *map_groups__find_by_name(struct map_groups *self,
199 enum map_type type, const char *name);
200struct map *map_groups__new_module(struct map_groups *self,
201 u64 start,
202 const char *filename,
203 struct kernel_info *kerninfo);
204
205void map_groups__flush(struct map_groups *self);
206
94#endif /* __PERF_MAP_H */ 207#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/newt.c b/tools/perf/util/newt.c
new file mode 100644
index 000000000000..7a123a94e3fc
--- /dev/null
+++ b/tools/perf/util/newt.c
@@ -0,0 +1,725 @@
1#define _GNU_SOURCE
2#include <stdio.h>
3#undef _GNU_SOURCE
4
5#include <stdlib.h>
6#include <newt.h>
7#include <sys/ttydefaults.h>
8
9#include "cache.h"
10#include "hist.h"
11#include "session.h"
12#include "sort.h"
13#include "symbol.h"
14
15struct ui_progress {
16 newtComponent form, scale;
17};
18
19struct ui_progress *ui_progress__new(const char *title, u64 total)
20{
21 struct ui_progress *self = malloc(sizeof(*self));
22
23 if (self != NULL) {
24 int cols;
25 newtGetScreenSize(&cols, NULL);
26 cols -= 4;
27 newtCenteredWindow(cols, 1, title);
28 self->form = newtForm(NULL, NULL, 0);
29 if (self->form == NULL)
30 goto out_free_self;
31 self->scale = newtScale(0, 0, cols, total);
32 if (self->scale == NULL)
33 goto out_free_form;
34 newtFormAddComponents(self->form, self->scale, NULL);
35 newtRefresh();
36 }
37
38 return self;
39
40out_free_form:
41 newtFormDestroy(self->form);
42out_free_self:
43 free(self);
44 return NULL;
45}
46
47void ui_progress__update(struct ui_progress *self, u64 curr)
48{
49 newtScaleSet(self->scale, curr);
50 newtRefresh();
51}
52
53void ui_progress__delete(struct ui_progress *self)
54{
55 newtFormDestroy(self->form);
56 newtPopWindow();
57 free(self);
58}
59
60static char browser__last_msg[1024];
61
62int browser__show_help(const char *format, va_list ap)
63{
64 int ret;
65 static int backlog;
66
67 ret = vsnprintf(browser__last_msg + backlog,
68 sizeof(browser__last_msg) - backlog, format, ap);
69 backlog += ret;
70
71 if (browser__last_msg[backlog - 1] == '\n') {
72 newtPopHelpLine();
73 newtPushHelpLine(browser__last_msg);
74 newtRefresh();
75 backlog = 0;
76 }
77
78 return ret;
79}
80
81static void newt_form__set_exit_keys(newtComponent self)
82{
83 newtFormAddHotKey(self, NEWT_KEY_ESCAPE);
84 newtFormAddHotKey(self, 'Q');
85 newtFormAddHotKey(self, 'q');
86 newtFormAddHotKey(self, CTRL('c'));
87}
88
89static newtComponent newt_form__new(void)
90{
91 newtComponent self = newtForm(NULL, NULL, 0);
92 if (self)
93 newt_form__set_exit_keys(self);
94 return self;
95}
96
97static int popup_menu(int argc, char * const argv[])
98{
99 struct newtExitStruct es;
100 int i, rc = -1, max_len = 5;
101 newtComponent listbox, form = newt_form__new();
102
103 if (form == NULL)
104 return -1;
105
106 listbox = newtListbox(0, 0, argc, NEWT_FLAG_RETURNEXIT);
107 if (listbox == NULL)
108 goto out_destroy_form;
109
110 newtFormAddComponents(form, listbox, NULL);
111
112 for (i = 0; i < argc; ++i) {
113 int len = strlen(argv[i]);
114 if (len > max_len)
115 max_len = len;
116 if (newtListboxAddEntry(listbox, argv[i], (void *)(long)i))
117 goto out_destroy_form;
118 }
119
120 newtCenteredWindow(max_len, argc, NULL);
121 newtFormRun(form, &es);
122 rc = newtListboxGetCurrent(listbox) - NULL;
123 if (es.reason == NEWT_EXIT_HOTKEY)
124 rc = -1;
125 newtPopWindow();
126out_destroy_form:
127 newtFormDestroy(form);
128 return rc;
129}
130
131static bool dialog_yesno(const char *msg)
132{
133 /* newtWinChoice should really be accepting const char pointers... */
134 char yes[] = "Yes", no[] = "No";
135 return newtWinChoice(NULL, yes, no, (char *)msg) == 1;
136}
137
138/*
139 * When debugging newt problems it was useful to be able to "unroll"
140 * the calls to newtCheckBoxTreeAdd{Array,Item}, so that we can generate
141 * a source file with the sequence of calls to these methods, to then
142 * tweak the arrays to get the intended results, so I'm keeping this code
143 * here, may be useful again in the future.
144 */
145#undef NEWT_DEBUG
146
147static void newt_checkbox_tree__add(newtComponent tree, const char *str,
148 void *priv, int *indexes)
149{
150#ifdef NEWT_DEBUG
151 /* Print the newtCheckboxTreeAddArray to tinker with its index arrays */
152 int i = 0, len = 40 - strlen(str);
153
154 fprintf(stderr,
155 "\tnewtCheckboxTreeAddItem(tree, %*.*s\"%s\", (void *)%p, 0, ",
156 len, len, " ", str, priv);
157 while (indexes[i] != NEWT_ARG_LAST) {
158 if (indexes[i] != NEWT_ARG_APPEND)
159 fprintf(stderr, " %d,", indexes[i]);
160 else
161 fprintf(stderr, " %s,", "NEWT_ARG_APPEND");
162 ++i;
163 }
164 fprintf(stderr, " %s", " NEWT_ARG_LAST);\n");
165 fflush(stderr);
166#endif
167 newtCheckboxTreeAddArray(tree, str, priv, 0, indexes);
168}
169
170static char *callchain_list__sym_name(struct callchain_list *self,
171 char *bf, size_t bfsize)
172{
173 if (self->ms.sym)
174 return self->ms.sym->name;
175
176 snprintf(bf, bfsize, "%#Lx", self->ip);
177 return bf;
178}
179
180static void __callchain__append_graph_browser(struct callchain_node *self,
181 newtComponent tree, u64 total,
182 int *indexes, int depth)
183{
184 struct rb_node *node;
185 u64 new_total, remaining;
186 int idx = 0;
187
188 if (callchain_param.mode == CHAIN_GRAPH_REL)
189 new_total = self->children_hit;
190 else
191 new_total = total;
192
193 remaining = new_total;
194 node = rb_first(&self->rb_root);
195 while (node) {
196 struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
197 struct rb_node *next = rb_next(node);
198 u64 cumul = cumul_hits(child);
199 struct callchain_list *chain;
200 int first = true, printed = 0;
201 int chain_idx = -1;
202 remaining -= cumul;
203
204 indexes[depth] = NEWT_ARG_APPEND;
205 indexes[depth + 1] = NEWT_ARG_LAST;
206
207 list_for_each_entry(chain, &child->val, list) {
208 char ipstr[BITS_PER_LONG / 4 + 1],
209 *alloc_str = NULL;
210 const char *str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
211
212 if (first) {
213 double percent = cumul * 100.0 / new_total;
214
215 first = false;
216 if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0)
217 str = "Not enough memory!";
218 else
219 str = alloc_str;
220 } else {
221 indexes[depth] = idx;
222 indexes[depth + 1] = NEWT_ARG_APPEND;
223 indexes[depth + 2] = NEWT_ARG_LAST;
224 ++chain_idx;
225 }
226 newt_checkbox_tree__add(tree, str, &chain->ms, indexes);
227 free(alloc_str);
228 ++printed;
229 }
230
231 indexes[depth] = idx;
232 if (chain_idx != -1)
233 indexes[depth + 1] = chain_idx;
234 if (printed != 0)
235 ++idx;
236 __callchain__append_graph_browser(child, tree, new_total, indexes,
237 depth + (chain_idx != -1 ? 2 : 1));
238 node = next;
239 }
240}
241
242static void callchain__append_graph_browser(struct callchain_node *self,
243 newtComponent tree, u64 total,
244 int *indexes, int parent_idx)
245{
246 struct callchain_list *chain;
247 int i = 0;
248
249 indexes[1] = NEWT_ARG_APPEND;
250 indexes[2] = NEWT_ARG_LAST;
251
252 list_for_each_entry(chain, &self->val, list) {
253 char ipstr[BITS_PER_LONG / 4 + 1], *str;
254
255 if (chain->ip >= PERF_CONTEXT_MAX)
256 continue;
257
258 if (!i++ && sort__first_dimension == SORT_SYM)
259 continue;
260
261 str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr));
262 newt_checkbox_tree__add(tree, str, &chain->ms, indexes);
263 }
264
265 indexes[1] = parent_idx;
266 indexes[2] = NEWT_ARG_APPEND;
267 indexes[3] = NEWT_ARG_LAST;
268 __callchain__append_graph_browser(self, tree, total, indexes, 2);
269}
270
271static void hist_entry__append_callchain_browser(struct hist_entry *self,
272 newtComponent tree, u64 total, int parent_idx)
273{
274 struct rb_node *rb_node;
275 int indexes[1024] = { [0] = parent_idx, };
276 int idx = 0;
277 struct callchain_node *chain;
278
279 rb_node = rb_first(&self->sorted_chain);
280 while (rb_node) {
281 chain = rb_entry(rb_node, struct callchain_node, rb_node);
282 switch (callchain_param.mode) {
283 case CHAIN_FLAT:
284 break;
285 case CHAIN_GRAPH_ABS: /* falldown */
286 case CHAIN_GRAPH_REL:
287 callchain__append_graph_browser(chain, tree, total, indexes, idx++);
288 break;
289 case CHAIN_NONE:
290 default:
291 break;
292 }
293 rb_node = rb_next(rb_node);
294 }
295}
296
297static size_t hist_entry__append_browser(struct hist_entry *self,
298 newtComponent tree, u64 total)
299{
300 char s[256];
301 size_t ret;
302
303 if (symbol_conf.exclude_other && !self->parent)
304 return 0;
305
306 ret = hist_entry__snprintf(self, s, sizeof(s), NULL,
307 false, 0, false, total);
308 if (symbol_conf.use_callchain) {
309 int indexes[2];
310
311 indexes[0] = NEWT_ARG_APPEND;
312 indexes[1] = NEWT_ARG_LAST;
313 newt_checkbox_tree__add(tree, s, &self->ms, indexes);
314 } else
315 newtListboxAppendEntry(tree, s, &self->ms);
316
317 return ret;
318}
319
320static void map_symbol__annotate_browser(const struct map_symbol *self,
321 const char *input_name)
322{
323 FILE *fp;
324 int cols, rows;
325 newtComponent form, tree;
326 struct newtExitStruct es;
327 char *str;
328 size_t line_len, max_line_len = 0;
329 size_t max_usable_width;
330 char *line = NULL;
331
332 if (self->sym == NULL)
333 return;
334
335 if (asprintf(&str, "perf annotate -i \"%s\" -d \"%s\" %s 2>&1 | expand",
336 input_name, self->map->dso->name, self->sym->name) < 0)
337 return;
338
339 fp = popen(str, "r");
340 if (fp == NULL)
341 goto out_free_str;
342
343 newtPushHelpLine("Press ESC to exit");
344 newtGetScreenSize(&cols, &rows);
345 tree = newtListbox(0, 0, rows - 5, NEWT_FLAG_SCROLL);
346
347 while (!feof(fp)) {
348 if (getline(&line, &line_len, fp) < 0 || !line_len)
349 break;
350 while (line_len != 0 && isspace(line[line_len - 1]))
351 line[--line_len] = '\0';
352
353 if (line_len > max_line_len)
354 max_line_len = line_len;
355 newtListboxAppendEntry(tree, line, NULL);
356 }
357 fclose(fp);
358 free(line);
359
360 max_usable_width = cols - 22;
361 if (max_line_len > max_usable_width)
362 max_line_len = max_usable_width;
363
364 newtListboxSetWidth(tree, max_line_len);
365
366 newtCenteredWindow(max_line_len + 2, rows - 5, self->sym->name);
367 form = newt_form__new();
368 newtFormAddComponents(form, tree, NULL);
369
370 newtFormRun(form, &es);
371 newtFormDestroy(form);
372 newtPopWindow();
373 newtPopHelpLine();
374out_free_str:
375 free(str);
376}
377
378static const void *newt__symbol_tree_get_current(newtComponent self)
379{
380 if (symbol_conf.use_callchain)
381 return newtCheckboxTreeGetCurrent(self);
382 return newtListboxGetCurrent(self);
383}
384
385static void hist_browser__selection(newtComponent self, void *data)
386{
387 const struct map_symbol **symbol_ptr = data;
388 *symbol_ptr = newt__symbol_tree_get_current(self);
389}
390
391struct hist_browser {
392 newtComponent form, tree;
393 const struct map_symbol *selection;
394};
395
396static struct hist_browser *hist_browser__new(void)
397{
398 struct hist_browser *self = malloc(sizeof(*self));
399
400 if (self != NULL)
401 self->form = NULL;
402
403 return self;
404}
405
406static void hist_browser__delete(struct hist_browser *self)
407{
408 newtFormDestroy(self->form);
409 newtPopWindow();
410 free(self);
411}
412
413static int hist_browser__populate(struct hist_browser *self, struct rb_root *hists,
414 u64 nr_hists, u64 session_total, const char *title)
415{
416 int max_len = 0, idx, cols, rows;
417 struct ui_progress *progress;
418 struct rb_node *nd;
419 u64 curr_hist = 0;
420 char seq[] = ".";
421 char str[256];
422
423 if (self->form) {
424 newtFormDestroy(self->form);
425 newtPopWindow();
426 }
427
428 snprintf(str, sizeof(str), "Samples: %Ld ",
429 session_total);
430 newtDrawRootText(0, 0, str);
431
432 newtGetScreenSize(NULL, &rows);
433
434 if (symbol_conf.use_callchain)
435 self->tree = newtCheckboxTreeMulti(0, 0, rows - 5, seq,
436 NEWT_FLAG_SCROLL);
437 else
438 self->tree = newtListbox(0, 0, rows - 5,
439 (NEWT_FLAG_SCROLL |
440 NEWT_FLAG_RETURNEXIT));
441
442 newtComponentAddCallback(self->tree, hist_browser__selection,
443 &self->selection);
444
445 progress = ui_progress__new("Adding entries to the browser...", nr_hists);
446 if (progress == NULL)
447 return -1;
448
449 idx = 0;
450 for (nd = rb_first(hists); nd; nd = rb_next(nd)) {
451 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
452 int len;
453
454 if (h->filtered)
455 continue;
456
457 len = hist_entry__append_browser(h, self->tree, session_total);
458 if (len > max_len)
459 max_len = len;
460 if (symbol_conf.use_callchain)
461 hist_entry__append_callchain_browser(h, self->tree,
462 session_total, idx++);
463 ++curr_hist;
464 if (curr_hist % 5)
465 ui_progress__update(progress, curr_hist);
466 }
467
468 ui_progress__delete(progress);
469
470 newtGetScreenSize(&cols, &rows);
471
472 if (max_len > cols)
473 max_len = cols - 3;
474
475 if (!symbol_conf.use_callchain)
476 newtListboxSetWidth(self->tree, max_len);
477
478 newtCenteredWindow(max_len + (symbol_conf.use_callchain ? 5 : 0),
479 rows - 5, title);
480 self->form = newt_form__new();
481 if (self->form == NULL)
482 return -1;
483
484 newtFormAddHotKey(self->form, 'A');
485 newtFormAddHotKey(self->form, 'a');
486 newtFormAddHotKey(self->form, NEWT_KEY_RIGHT);
487 newtFormAddComponents(self->form, self->tree, NULL);
488 self->selection = newt__symbol_tree_get_current(self->tree);
489
490 return 0;
491}
492
493enum hist_filter {
494 HIST_FILTER__DSO,
495 HIST_FILTER__THREAD,
496};
497
498static u64 hists__filter_by_dso(struct rb_root *hists, const struct dso *dso,
499 u64 *session_total)
500{
501 struct rb_node *nd;
502 u64 nr_hists = 0;
503
504 *session_total = 0;
505
506 for (nd = rb_first(hists); nd; nd = rb_next(nd)) {
507 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
508
509 if (dso != NULL && (h->ms.map == NULL || h->ms.map->dso != dso)) {
510 h->filtered |= (1 << HIST_FILTER__DSO);
511 continue;
512 }
513 h->filtered &= ~(1 << HIST_FILTER__DSO);
514 ++nr_hists;
515 *session_total += h->count;
516 }
517
518 return nr_hists;
519}
520
521static u64 hists__filter_by_thread(struct rb_root *hists, const struct thread *thread,
522 u64 *session_total)
523{
524 struct rb_node *nd;
525 u64 nr_hists = 0;
526
527 *session_total = 0;
528
529 for (nd = rb_first(hists); nd; nd = rb_next(nd)) {
530 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
531
532 if (thread != NULL && h->thread != thread) {
533 h->filtered |= (1 << HIST_FILTER__THREAD);
534 continue;
535 }
536 h->filtered &= ~(1 << HIST_FILTER__THREAD);
537 ++nr_hists;
538 *session_total += h->count;
539 }
540
541 return nr_hists;
542}
543
544static struct thread *hist_browser__selected_thread(struct hist_browser *self)
545{
546 int *indexes;
547
548 if (!symbol_conf.use_callchain)
549 goto out;
550
551 indexes = newtCheckboxTreeFindItem(self->tree, (void *)self->selection);
552 if (indexes) {
553 bool is_hist_entry = indexes[1] == NEWT_ARG_LAST;
554 free(indexes);
555 if (is_hist_entry)
556 goto out;
557 }
558 return NULL;
559out:
560 return *(struct thread **)(self->selection + 1);
561}
562
563static int hist_browser__title(char *bf, size_t size, const char *input_name,
564 const struct dso *dso, const struct thread *thread)
565{
566 int printed = 0;
567
568 if (thread)
569 printed += snprintf(bf + printed, size - printed,
570 "Thread: %s(%d)",
571 (thread->comm_set ? thread->comm : ""),
572 thread->pid);
573 if (dso)
574 printed += snprintf(bf + printed, size - printed,
575 "%sDSO: %s", thread ? " " : "",
576 dso->short_name);
577 return printed ?: snprintf(bf, size, "Report: %s", input_name);
578}
579
580int perf_session__browse_hists(struct rb_root *hists, u64 nr_hists,
581 u64 session_total, const char *helpline,
582 const char *input_name)
583{
584 struct hist_browser *browser = hist_browser__new();
585 const struct thread *thread_filter = NULL;
586 const struct dso *dso_filter = NULL;
587 struct newtExitStruct es;
588 char msg[160];
589 int err = -1;
590
591 if (browser == NULL)
592 return -1;
593
594 newtPushHelpLine(helpline);
595
596 hist_browser__title(msg, sizeof(msg), input_name,
597 dso_filter, thread_filter);
598 if (hist_browser__populate(browser, hists, nr_hists, session_total, msg) < 0)
599 goto out;
600
601 while (1) {
602 const struct thread *thread;
603 const struct dso *dso;
604 char *options[16];
605 int nr_options = 0, choice = 0, i,
606 annotate = -2, zoom_dso = -2, zoom_thread = -2;
607
608 newtFormRun(browser->form, &es);
609 if (es.reason == NEWT_EXIT_HOTKEY) {
610 if (toupper(es.u.key) == 'A')
611 goto do_annotate;
612 if (es.u.key == NEWT_KEY_ESCAPE ||
613 toupper(es.u.key) == 'Q' ||
614 es.u.key == CTRL('c')) {
615 if (dialog_yesno("Do you really want to exit?"))
616 break;
617 else
618 continue;
619 }
620 }
621
622 if (browser->selection->sym != NULL &&
623 asprintf(&options[nr_options], "Annotate %s",
624 browser->selection->sym->name) > 0)
625 annotate = nr_options++;
626
627 thread = hist_browser__selected_thread(browser);
628 if (thread != NULL &&
629 asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
630 (thread_filter ? "out of" : "into"),
631 (thread->comm_set ? thread->comm : ""),
632 thread->pid) > 0)
633 zoom_thread = nr_options++;
634
635 dso = browser->selection->map ? browser->selection->map->dso : NULL;
636 if (dso != NULL &&
637 asprintf(&options[nr_options], "Zoom %s %s DSO",
638 (dso_filter ? "out of" : "into"),
639 (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
640 zoom_dso = nr_options++;
641
642 options[nr_options++] = (char *)"Exit";
643
644 choice = popup_menu(nr_options, options);
645
646 for (i = 0; i < nr_options - 1; ++i)
647 free(options[i]);
648
649 if (choice == nr_options - 1)
650 break;
651
652 if (choice == -1)
653 continue;
654do_annotate:
655 if (choice == annotate) {
656 if (browser->selection->map->dso->origin == DSO__ORIG_KERNEL) {
657 newtPopHelpLine();
658 newtPushHelpLine("No vmlinux file found, can't "
659 "annotate with just a "
660 "kallsyms file");
661 continue;
662 }
663 map_symbol__annotate_browser(browser->selection, input_name);
664 } else if (choice == zoom_dso) {
665 if (dso_filter) {
666 newtPopHelpLine();
667 dso_filter = NULL;
668 } else {
669 snprintf(msg, sizeof(msg),
670 "To zoom out press -> + \"Zoom out of %s DSO\"",
671 dso->kernel ? "the Kernel" : dso->short_name);
672 newtPushHelpLine(msg);
673 dso_filter = dso;
674 }
675 nr_hists = hists__filter_by_dso(hists, dso_filter, &session_total);
676 hist_browser__title(msg, sizeof(msg), input_name,
677 dso_filter, thread_filter);
678 if (hist_browser__populate(browser, hists, nr_hists, session_total, msg) < 0)
679 goto out;
680 } else if (choice == zoom_thread) {
681 if (thread_filter) {
682 newtPopHelpLine();
683 thread_filter = NULL;
684 } else {
685 snprintf(msg, sizeof(msg),
686 "To zoom out press -> + \"Zoom out of %s(%d) thread\"",
687 (thread->comm_set ? thread->comm : ""),
688 thread->pid);
689 newtPushHelpLine(msg);
690 thread_filter = thread;
691 }
692 nr_hists = hists__filter_by_thread(hists, thread_filter, &session_total);
693 hist_browser__title(msg, sizeof(msg), input_name,
694 dso_filter, thread_filter);
695 if (hist_browser__populate(browser, hists, nr_hists, session_total, msg) < 0)
696 goto out;
697 }
698 }
699 err = 0;
700out:
701 hist_browser__delete(browser);
702 return err;
703}
704
705void setup_browser(void)
706{
707 if (!isatty(1))
708 return;
709
710 use_browser = true;
711 newtInit();
712 newtCls();
713 newtPushHelpLine(" ");
714}
715
716void exit_browser(bool wait_for_ok)
717{
718 if (use_browser) {
719 if (wait_for_ok) {
720 char title[] = "Fatal Error", ok[] = "Ok";
721 newtWinMessage(title, ok, browser__last_msg);
722 }
723 newtFinished();
724 }
725}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 05d0c5c2030c..3b4ec6797565 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -5,6 +5,7 @@
5#include "parse-events.h" 5#include "parse-events.h"
6#include "exec_cmd.h" 6#include "exec_cmd.h"
7#include "string.h" 7#include "string.h"
8#include "symbol.h"
8#include "cache.h" 9#include "cache.h"
9#include "header.h" 10#include "header.h"
10#include "debugfs.h" 11#include "debugfs.h"
@@ -409,7 +410,6 @@ static enum event_result
409parse_single_tracepoint_event(char *sys_name, 410parse_single_tracepoint_event(char *sys_name,
410 const char *evt_name, 411 const char *evt_name,
411 unsigned int evt_length, 412 unsigned int evt_length,
412 char *flags,
413 struct perf_event_attr *attr, 413 struct perf_event_attr *attr,
414 const char **strp) 414 const char **strp)
415{ 415{
@@ -418,13 +418,11 @@ parse_single_tracepoint_event(char *sys_name,
418 u64 id; 418 u64 id;
419 int fd; 419 int fd;
420 420
421 if (flags) { 421 attr->sample_type |= PERF_SAMPLE_RAW;
422 if (!strncmp(flags, "record", strlen(flags))) { 422 attr->sample_type |= PERF_SAMPLE_TIME;
423 attr->sample_type |= PERF_SAMPLE_RAW; 423 attr->sample_type |= PERF_SAMPLE_CPU;
424 attr->sample_type |= PERF_SAMPLE_TIME; 424
425 attr->sample_type |= PERF_SAMPLE_CPU; 425 attr->sample_period = 1;
426 }
427 }
428 426
429 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path, 427 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
430 sys_name, evt_name); 428 sys_name, evt_name);
@@ -532,8 +530,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
532 flags); 530 flags);
533 } else 531 } else
534 return parse_single_tracepoint_event(sys_name, evt_name, 532 return parse_single_tracepoint_event(sys_name, evt_name,
535 evt_length, flags, 533 evt_length, attr, strp);
536 attr, strp);
537} 534}
538 535
539static enum event_result 536static enum event_result
@@ -656,6 +653,10 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
656 return EVT_FAILED; 653 return EVT_FAILED;
657 n = hex2u64(str + 1, &config); 654 n = hex2u64(str + 1, &config);
658 if (n > 0) { 655 if (n > 0) {
656 if (str[n+1] == 'p') {
657 attr->precise = 1;
658 n++;
659 }
659 *strp = str + n + 1; 660 *strp = str + n + 1;
660 attr->type = PERF_TYPE_RAW; 661 attr->type = PERF_TYPE_RAW;
661 attr->config = config; 662 attr->config = config;
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index efebd5b476b3..ed887642460c 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -49,6 +49,7 @@ static int get_value(struct parse_opt_ctx_t *p,
49 break; 49 break;
50 /* FALLTHROUGH */ 50 /* FALLTHROUGH */
51 case OPTION_BOOLEAN: 51 case OPTION_BOOLEAN:
52 case OPTION_INCR:
52 case OPTION_BIT: 53 case OPTION_BIT:
53 case OPTION_SET_INT: 54 case OPTION_SET_INT:
54 case OPTION_SET_PTR: 55 case OPTION_SET_PTR:
@@ -73,6 +74,10 @@ static int get_value(struct parse_opt_ctx_t *p,
73 return 0; 74 return 0;
74 75
75 case OPTION_BOOLEAN: 76 case OPTION_BOOLEAN:
77 *(bool *)opt->value = unset ? false : true;
78 return 0;
79
80 case OPTION_INCR:
76 *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; 81 *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
77 return 0; 82 return 0;
78 83
@@ -478,6 +483,7 @@ int usage_with_options_internal(const char * const *usagestr,
478 case OPTION_GROUP: 483 case OPTION_GROUP:
479 case OPTION_BIT: 484 case OPTION_BIT:
480 case OPTION_BOOLEAN: 485 case OPTION_BOOLEAN:
486 case OPTION_INCR:
481 case OPTION_SET_INT: 487 case OPTION_SET_INT:
482 case OPTION_SET_PTR: 488 case OPTION_SET_PTR:
483 case OPTION_LONG: 489 case OPTION_LONG:
@@ -500,6 +506,7 @@ int usage_with_options_internal(const char * const *usagestr,
500void usage_with_options(const char * const *usagestr, 506void usage_with_options(const char * const *usagestr,
501 const struct option *opts) 507 const struct option *opts)
502{ 508{
509 exit_browser(false);
503 usage_with_options_internal(usagestr, opts, 0); 510 usage_with_options_internal(usagestr, opts, 0);
504 exit(129); 511 exit(129);
505} 512}
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 948805af43c2..b2da725f102a 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -8,7 +8,8 @@ enum parse_opt_type {
8 OPTION_GROUP, 8 OPTION_GROUP,
9 /* options with no arguments */ 9 /* options with no arguments */
10 OPTION_BIT, 10 OPTION_BIT,
11 OPTION_BOOLEAN, /* _INCR would have been a better name */ 11 OPTION_BOOLEAN,
12 OPTION_INCR,
12 OPTION_SET_INT, 13 OPTION_SET_INT,
13 OPTION_SET_PTR, 14 OPTION_SET_PTR,
14 /* options with arguments (usually) */ 15 /* options with arguments (usually) */
@@ -95,6 +96,7 @@ struct option {
95#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } 96#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) }
96#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (b) } 97#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (b) }
97#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } 98#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = (v), .help = (h) }
99#define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = (v), .help = (h) }
98#define OPT_SET_INT(s, l, v, h, i) { .type = OPTION_SET_INT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (i) } 100#define OPT_SET_INT(s, l, v, h, i) { .type = OPTION_SET_INT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (i) }
99#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } 101#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) }
100#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } 102#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = (v), .help = (h) }
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 7c004b6ef24f..3967f8f63d0d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -33,20 +33,27 @@
33#include <limits.h> 33#include <limits.h>
34 34
35#undef _GNU_SOURCE 35#undef _GNU_SOURCE
36#include "util.h"
36#include "event.h" 37#include "event.h"
37#include "string.h" 38#include "string.h"
38#include "strlist.h" 39#include "strlist.h"
39#include "debug.h" 40#include "debug.h"
40#include "cache.h" 41#include "cache.h"
41#include "color.h" 42#include "color.h"
42#include "parse-events.h" /* For debugfs_path */ 43#include "symbol.h"
44#include "thread.h"
45#include "debugfs.h"
46#include "trace-event.h" /* For __unused */
43#include "probe-event.h" 47#include "probe-event.h"
48#include "probe-finder.h"
44 49
45#define MAX_CMDLEN 256 50#define MAX_CMDLEN 256
46#define MAX_PROBE_ARGS 128 51#define MAX_PROBE_ARGS 128
47#define PERFPROBE_GROUP "probe" 52#define PERFPROBE_GROUP "probe"
48 53
49#define semantic_error(msg ...) die("Semantic error :" msg) 54bool probe_event_dry_run; /* Dry run flag */
55
56#define semantic_error(msg ...) pr_err("Semantic error :" msg)
50 57
51/* If there is no space to write, returns -E2BIG. */ 58/* If there is no space to write, returns -E2BIG. */
52static int e_snprintf(char *str, size_t size, const char *format, ...) 59static int e_snprintf(char *str, size_t size, const char *format, ...)
@@ -64,7 +71,270 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
64 return ret; 71 return ret;
65} 72}
66 73
67void parse_line_range_desc(const char *arg, struct line_range *lr) 74static char *synthesize_perf_probe_point(struct perf_probe_point *pp);
75static struct map_groups kmap_groups;
76static struct map *kmaps[MAP__NR_TYPES];
77
78/* Initialize symbol maps and path of vmlinux */
79static int init_vmlinux(void)
80{
81 struct dso *kernel;
82 int ret;
83
84 symbol_conf.sort_by_name = true;
85 if (symbol_conf.vmlinux_name == NULL)
86 symbol_conf.try_vmlinux_path = true;
87 else
88 pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
89 ret = symbol__init();
90 if (ret < 0) {
91 pr_debug("Failed to init symbol map.\n");
92 goto out;
93 }
94
95 kernel = dso__new_kernel(symbol_conf.vmlinux_name);
96 if (kernel == NULL)
97 die("Failed to create kernel dso.");
98
99 map_groups__init(&kmap_groups);
100 ret = __map_groups__create_kernel_maps(&kmap_groups, kmaps, kernel);
101 if (ret < 0)
102 pr_debug("Failed to create kernel maps.\n");
103
104out:
105 if (ret < 0)
106 pr_warning("Failed to init vmlinux path.\n");
107 return ret;
108}
109
110#ifdef DWARF_SUPPORT
111static int open_vmlinux(void)
112{
113 if (map__load(kmaps[MAP__FUNCTION], NULL) < 0) {
114 pr_debug("Failed to load kernel map.\n");
115 return -EINVAL;
116 }
117 pr_debug("Try to open %s\n", kmaps[MAP__FUNCTION]->dso->long_name);
118 return open(kmaps[MAP__FUNCTION]->dso->long_name, O_RDONLY);
119}
120
121/* Convert trace point to probe point with debuginfo */
122static int convert_to_perf_probe_point(struct kprobe_trace_point *tp,
123 struct perf_probe_point *pp)
124{
125 struct symbol *sym;
126 int fd, ret = -ENOENT;
127
128 sym = map__find_symbol_by_name(kmaps[MAP__FUNCTION],
129 tp->symbol, NULL);
130 if (sym) {
131 fd = open_vmlinux();
132 if (fd >= 0) {
133 ret = find_perf_probe_point(fd,
134 sym->start + tp->offset, pp);
135 close(fd);
136 }
137 }
138 if (ret <= 0) {
139 pr_debug("Failed to find corresponding probes from "
140 "debuginfo. Use kprobe event information.\n");
141 pp->function = strdup(tp->symbol);
142 if (pp->function == NULL)
143 return -ENOMEM;
144 pp->offset = tp->offset;
145 }
146 pp->retprobe = tp->retprobe;
147
148 return 0;
149}
150
151/* Try to find perf_probe_event with debuginfo */
152static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev,
153 struct kprobe_trace_event **tevs)
154{
155 bool need_dwarf = perf_probe_event_need_dwarf(pev);
156 int fd, ntevs;
157
158 fd = open_vmlinux();
159 if (fd < 0) {
160 if (need_dwarf) {
161 pr_warning("Failed to open debuginfo file.\n");
162 return fd;
163 }
164 pr_debug("Could not open vmlinux. Try to use symbols.\n");
165 return 0;
166 }
167
168 /* Searching trace events corresponding to probe event */
169 ntevs = find_kprobe_trace_events(fd, pev, tevs);
170 close(fd);
171
172 if (ntevs > 0) { /* Succeeded to find trace events */
173 pr_debug("find %d kprobe_trace_events.\n", ntevs);
174 return ntevs;
175 }
176
177 if (ntevs == 0) { /* No error but failed to find probe point. */
178 pr_warning("Probe point '%s' not found.\n",
179 synthesize_perf_probe_point(&pev->point));
180 return -ENOENT;
181 }
182 /* Error path : ntevs < 0 */
183 if (need_dwarf) {
184 if (ntevs == -EBADF)
185 pr_warning("No dwarf info found in the vmlinux - "
186 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
187 return ntevs;
188 }
189 pr_debug("An error occurred in debuginfo analysis."
190 " Try to use symbols.\n");
191 return 0;
192}
193
194#define LINEBUF_SIZE 256
195#define NR_ADDITIONAL_LINES 2
196
197static int show_one_line(FILE *fp, int l, bool skip, bool show_num)
198{
199 char buf[LINEBUF_SIZE];
200 const char *color = PERF_COLOR_BLUE;
201
202 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
203 goto error;
204 if (!skip) {
205 if (show_num)
206 fprintf(stdout, "%7d %s", l, buf);
207 else
208 color_fprintf(stdout, color, " %s", buf);
209 }
210
211 while (strlen(buf) == LINEBUF_SIZE - 1 &&
212 buf[LINEBUF_SIZE - 2] != '\n') {
213 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
214 goto error;
215 if (!skip) {
216 if (show_num)
217 fprintf(stdout, "%s", buf);
218 else
219 color_fprintf(stdout, color, "%s", buf);
220 }
221 }
222
223 return 0;
224error:
225 if (feof(fp))
226 pr_warning("Source file is shorter than expected.\n");
227 else
228 pr_warning("File read error: %s\n", strerror(errno));
229
230 return -1;
231}
232
233/*
234 * Show line-range always requires debuginfo to find source file and
235 * line number.
236 */
237int show_line_range(struct line_range *lr)
238{
239 int l = 1;
240 struct line_node *ln;
241 FILE *fp;
242 int fd, ret;
243
244 /* Search a line range */
245 ret = init_vmlinux();
246 if (ret < 0)
247 return ret;
248
249 fd = open_vmlinux();
250 if (fd < 0) {
251 pr_warning("Failed to open debuginfo file.\n");
252 return fd;
253 }
254
255 ret = find_line_range(fd, lr);
256 close(fd);
257 if (ret == 0) {
258 pr_warning("Specified source line is not found.\n");
259 return -ENOENT;
260 } else if (ret < 0) {
261 pr_warning("Debuginfo analysis failed. (%d)\n", ret);
262 return ret;
263 }
264
265 setup_pager();
266
267 if (lr->function)
268 fprintf(stdout, "<%s:%d>\n", lr->function,
269 lr->start - lr->offset);
270 else
271 fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
272
273 fp = fopen(lr->path, "r");
274 if (fp == NULL) {
275 pr_warning("Failed to open %s: %s\n", lr->path,
276 strerror(errno));
277 return -errno;
278 }
279 /* Skip to starting line number */
280 while (l < lr->start && ret >= 0)
281 ret = show_one_line(fp, l++, true, false);
282 if (ret < 0)
283 goto end;
284
285 list_for_each_entry(ln, &lr->line_list, list) {
286 while (ln->line > l && ret >= 0)
287 ret = show_one_line(fp, (l++) - lr->offset,
288 false, false);
289 if (ret >= 0)
290 ret = show_one_line(fp, (l++) - lr->offset,
291 false, true);
292 if (ret < 0)
293 goto end;
294 }
295
296 if (lr->end == INT_MAX)
297 lr->end = l + NR_ADDITIONAL_LINES;
298 while (l <= lr->end && !feof(fp) && ret >= 0)
299 ret = show_one_line(fp, (l++) - lr->offset, false, false);
300end:
301 fclose(fp);
302 return ret;
303}
304
305#else /* !DWARF_SUPPORT */
306
307static int convert_to_perf_probe_point(struct kprobe_trace_point *tp,
308 struct perf_probe_point *pp)
309{
310 pp->function = strdup(tp->symbol);
311 if (pp->function == NULL)
312 return -ENOMEM;
313 pp->offset = tp->offset;
314 pp->retprobe = tp->retprobe;
315
316 return 0;
317}
318
319static int try_to_find_kprobe_trace_events(struct perf_probe_event *pev,
320 struct kprobe_trace_event **tevs __unused)
321{
322 if (perf_probe_event_need_dwarf(pev)) {
323 pr_warning("Debuginfo-analysis is not supported.\n");
324 return -ENOSYS;
325 }
326 return 0;
327}
328
329int show_line_range(struct line_range *lr __unused)
330{
331 pr_warning("Debuginfo-analysis is not supported.\n");
332 return -ENOSYS;
333}
334
335#endif
336
337int parse_line_range_desc(const char *arg, struct line_range *lr)
68{ 338{
69 const char *ptr; 339 const char *ptr;
70 char *tmp; 340 char *tmp;
@@ -75,29 +345,45 @@ void parse_line_range_desc(const char *arg, struct line_range *lr)
75 */ 345 */
76 ptr = strchr(arg, ':'); 346 ptr = strchr(arg, ':');
77 if (ptr) { 347 if (ptr) {
78 lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0); 348 lr->start = (int)strtoul(ptr + 1, &tmp, 0);
79 if (*tmp == '+') 349 if (*tmp == '+') {
80 lr->end = lr->start + (unsigned int)strtoul(tmp + 1, 350 lr->end = lr->start + (int)strtoul(tmp + 1, &tmp, 0);
81 &tmp, 0); 351 lr->end--; /*
82 else if (*tmp == '-') 352 * Adjust the number of lines here.
83 lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0); 353 * If the number of lines == 1, the
354 * the end of line should be equal to
355 * the start of line.
356 */
357 } else if (*tmp == '-')
358 lr->end = (int)strtoul(tmp + 1, &tmp, 0);
84 else 359 else
85 lr->end = 0; 360 lr->end = INT_MAX;
86 pr_debug("Line range is %u to %u\n", lr->start, lr->end); 361 pr_debug("Line range is %d to %d\n", lr->start, lr->end);
87 if (lr->end && lr->start > lr->end) 362 if (lr->start > lr->end) {
88 semantic_error("Start line must be smaller" 363 semantic_error("Start line must be smaller"
89 " than end line."); 364 " than end line.\n");
90 if (*tmp != '\0') 365 return -EINVAL;
91 semantic_error("Tailing with invalid character '%d'.", 366 }
367 if (*tmp != '\0') {
368 semantic_error("Tailing with invalid character '%d'.\n",
92 *tmp); 369 *tmp);
370 return -EINVAL;
371 }
93 tmp = strndup(arg, (ptr - arg)); 372 tmp = strndup(arg, (ptr - arg));
94 } else 373 } else {
95 tmp = strdup(arg); 374 tmp = strdup(arg);
375 lr->end = INT_MAX;
376 }
377
378 if (tmp == NULL)
379 return -ENOMEM;
96 380
97 if (strchr(tmp, '.')) 381 if (strchr(tmp, '.'))
98 lr->file = tmp; 382 lr->file = tmp;
99 else 383 else
100 lr->function = tmp; 384 lr->function = tmp;
385
386 return 0;
101} 387}
102 388
103/* Check the name is good for event/group */ 389/* Check the name is good for event/group */
@@ -113,8 +399,9 @@ static bool check_event_name(const char *name)
113} 399}
114 400
115/* Parse probepoint definition. */ 401/* Parse probepoint definition. */
116static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) 402static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
117{ 403{
404 struct perf_probe_point *pp = &pev->point;
118 char *ptr, *tmp; 405 char *ptr, *tmp;
119 char c, nc = 0; 406 char c, nc = 0;
120 /* 407 /*
@@ -129,13 +416,19 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
129 if (ptr && *ptr == '=') { /* Event name */ 416 if (ptr && *ptr == '=') { /* Event name */
130 *ptr = '\0'; 417 *ptr = '\0';
131 tmp = ptr + 1; 418 tmp = ptr + 1;
132 ptr = strchr(arg, ':'); 419 if (strchr(arg, ':')) {
133 if (ptr) /* Group name is not supported yet. */ 420 semantic_error("Group name is not supported yet.\n");
134 semantic_error("Group name is not supported yet."); 421 return -ENOTSUP;
135 if (!check_event_name(arg)) 422 }
423 if (!check_event_name(arg)) {
136 semantic_error("%s is bad for event name -it must " 424 semantic_error("%s is bad for event name -it must "
137 "follow C symbol-naming rule.", arg); 425 "follow C symbol-naming rule.\n", arg);
138 pp->event = strdup(arg); 426 return -EINVAL;
427 }
428 pev->event = strdup(arg);
429 if (pev->event == NULL)
430 return -ENOMEM;
431 pev->group = NULL;
139 arg = tmp; 432 arg = tmp;
140 } 433 }
141 434
@@ -145,12 +438,15 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
145 *ptr++ = '\0'; 438 *ptr++ = '\0';
146 } 439 }
147 440
441 tmp = strdup(arg);
442 if (tmp == NULL)
443 return -ENOMEM;
444
148 /* Check arg is function or file and copy it */ 445 /* Check arg is function or file and copy it */
149 if (strchr(arg, '.')) /* File */ 446 if (strchr(tmp, '.')) /* File */
150 pp->file = strdup(arg); 447 pp->file = tmp;
151 else /* Function */ 448 else /* Function */
152 pp->function = strdup(arg); 449 pp->function = tmp;
153 DIE_IF(pp->file == NULL && pp->function == NULL);
154 450
155 /* Parse other options */ 451 /* Parse other options */
156 while (ptr) { 452 while (ptr) {
@@ -158,6 +454,8 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
158 c = nc; 454 c = nc;
159 if (c == ';') { /* Lazy pattern must be the last part */ 455 if (c == ';') { /* Lazy pattern must be the last part */
160 pp->lazy_line = strdup(arg); 456 pp->lazy_line = strdup(arg);
457 if (pp->lazy_line == NULL)
458 return -ENOMEM;
161 break; 459 break;
162 } 460 }
163 ptr = strpbrk(arg, ";:+@%"); 461 ptr = strpbrk(arg, ";:+@%");
@@ -168,266 +466,658 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp)
168 switch (c) { 466 switch (c) {
169 case ':': /* Line number */ 467 case ':': /* Line number */
170 pp->line = strtoul(arg, &tmp, 0); 468 pp->line = strtoul(arg, &tmp, 0);
171 if (*tmp != '\0') 469 if (*tmp != '\0') {
172 semantic_error("There is non-digit char" 470 semantic_error("There is non-digit char"
173 " in line number."); 471 " in line number.\n");
472 return -EINVAL;
473 }
174 break; 474 break;
175 case '+': /* Byte offset from a symbol */ 475 case '+': /* Byte offset from a symbol */
176 pp->offset = strtoul(arg, &tmp, 0); 476 pp->offset = strtoul(arg, &tmp, 0);
177 if (*tmp != '\0') 477 if (*tmp != '\0') {
178 semantic_error("There is non-digit character" 478 semantic_error("There is non-digit character"
179 " in offset."); 479 " in offset.\n");
480 return -EINVAL;
481 }
180 break; 482 break;
181 case '@': /* File name */ 483 case '@': /* File name */
182 if (pp->file) 484 if (pp->file) {
183 semantic_error("SRC@SRC is not allowed."); 485 semantic_error("SRC@SRC is not allowed.\n");
486 return -EINVAL;
487 }
184 pp->file = strdup(arg); 488 pp->file = strdup(arg);
185 DIE_IF(pp->file == NULL); 489 if (pp->file == NULL)
490 return -ENOMEM;
186 break; 491 break;
187 case '%': /* Probe places */ 492 case '%': /* Probe places */
188 if (strcmp(arg, "return") == 0) { 493 if (strcmp(arg, "return") == 0) {
189 pp->retprobe = 1; 494 pp->retprobe = 1;
190 } else /* Others not supported yet */ 495 } else { /* Others not supported yet */
191 semantic_error("%%%s is not supported.", arg); 496 semantic_error("%%%s is not supported.\n", arg);
497 return -ENOTSUP;
498 }
192 break; 499 break;
193 default: 500 default: /* Buggy case */
194 DIE_IF("Program has a bug."); 501 pr_err("This program has a bug at %s:%d.\n",
502 __FILE__, __LINE__);
503 return -ENOTSUP;
195 break; 504 break;
196 } 505 }
197 } 506 }
198 507
199 /* Exclusion check */ 508 /* Exclusion check */
200 if (pp->lazy_line && pp->line) 509 if (pp->lazy_line && pp->line) {
201 semantic_error("Lazy pattern can't be used with line number."); 510 semantic_error("Lazy pattern can't be used with line number.");
511 return -EINVAL;
512 }
202 513
203 if (pp->lazy_line && pp->offset) 514 if (pp->lazy_line && pp->offset) {
204 semantic_error("Lazy pattern can't be used with offset."); 515 semantic_error("Lazy pattern can't be used with offset.");
516 return -EINVAL;
517 }
205 518
206 if (pp->line && pp->offset) 519 if (pp->line && pp->offset) {
207 semantic_error("Offset can't be used with line number."); 520 semantic_error("Offset can't be used with line number.");
521 return -EINVAL;
522 }
208 523
209 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) 524 if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
210 semantic_error("File always requires line number or " 525 semantic_error("File always requires line number or "
211 "lazy pattern."); 526 "lazy pattern.");
527 return -EINVAL;
528 }
212 529
213 if (pp->offset && !pp->function) 530 if (pp->offset && !pp->function) {
214 semantic_error("Offset requires an entry function."); 531 semantic_error("Offset requires an entry function.");
532 return -EINVAL;
533 }
215 534
216 if (pp->retprobe && !pp->function) 535 if (pp->retprobe && !pp->function) {
217 semantic_error("Return probe requires an entry function."); 536 semantic_error("Return probe requires an entry function.");
537 return -EINVAL;
538 }
218 539
219 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) 540 if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
220 semantic_error("Offset/Line/Lazy pattern can't be used with " 541 semantic_error("Offset/Line/Lazy pattern can't be used with "
221 "return probe."); 542 "return probe.");
543 return -EINVAL;
544 }
222 545
223 pr_debug("symbol:%s file:%s line:%d offset:%d return:%d lazy:%s\n", 546 pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n",
224 pp->function, pp->file, pp->line, pp->offset, pp->retprobe, 547 pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
225 pp->lazy_line); 548 pp->lazy_line);
549 return 0;
226} 550}
227 551
228/* Parse perf-probe event definition */ 552/* Parse perf-probe event argument */
229void parse_perf_probe_event(const char *str, struct probe_point *pp, 553static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
230 bool *need_dwarf)
231{ 554{
232 char **argv; 555 char *tmp;
233 int argc, i; 556 struct perf_probe_arg_field **fieldp;
557
558 pr_debug("parsing arg: %s into ", str);
559
560 tmp = strchr(str, '=');
561 if (tmp) {
562 arg->name = strndup(str, tmp - str);
563 if (arg->name == NULL)
564 return -ENOMEM;
565 pr_debug("name:%s ", arg->name);
566 str = tmp + 1;
567 }
568
569 tmp = strchr(str, ':');
570 if (tmp) { /* Type setting */
571 *tmp = '\0';
572 arg->type = strdup(tmp + 1);
573 if (arg->type == NULL)
574 return -ENOMEM;
575 pr_debug("type:%s ", arg->type);
576 }
577
578 tmp = strpbrk(str, "-.");
579 if (!is_c_varname(str) || !tmp) {
580 /* A variable, register, symbol or special value */
581 arg->var = strdup(str);
582 if (arg->var == NULL)
583 return -ENOMEM;
584 pr_debug("%s\n", arg->var);
585 return 0;
586 }
234 587
235 *need_dwarf = false; 588 /* Structure fields */
589 arg->var = strndup(str, tmp - str);
590 if (arg->var == NULL)
591 return -ENOMEM;
592 pr_debug("%s, ", arg->var);
593 fieldp = &arg->field;
594
595 do {
596 *fieldp = zalloc(sizeof(struct perf_probe_arg_field));
597 if (*fieldp == NULL)
598 return -ENOMEM;
599 if (*tmp == '.') {
600 str = tmp + 1;
601 (*fieldp)->ref = false;
602 } else if (tmp[1] == '>') {
603 str = tmp + 2;
604 (*fieldp)->ref = true;
605 } else {
606 semantic_error("Argument parse error: %s\n", str);
607 return -EINVAL;
608 }
609
610 tmp = strpbrk(str, "-.");
611 if (tmp) {
612 (*fieldp)->name = strndup(str, tmp - str);
613 if ((*fieldp)->name == NULL)
614 return -ENOMEM;
615 pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
616 fieldp = &(*fieldp)->next;
617 }
618 } while (tmp);
619 (*fieldp)->name = strdup(str);
620 if ((*fieldp)->name == NULL)
621 return -ENOMEM;
622 pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
623
624 /* If no name is specified, set the last field name */
625 if (!arg->name) {
626 arg->name = strdup((*fieldp)->name);
627 if (arg->name == NULL)
628 return -ENOMEM;
629 }
630 return 0;
631}
236 632
237 argv = argv_split(str, &argc); 633/* Parse perf-probe event command */
238 if (!argv) 634int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
239 die("argv_split failed."); 635{
240 if (argc > MAX_PROBE_ARGS + 1) 636 char **argv;
241 semantic_error("Too many arguments"); 637 int argc, i, ret = 0;
242 638
639 argv = argv_split(cmd, &argc);
640 if (!argv) {
641 pr_debug("Failed to split arguments.\n");
642 return -ENOMEM;
643 }
644 if (argc - 1 > MAX_PROBE_ARGS) {
645 semantic_error("Too many probe arguments (%d).\n", argc - 1);
646 ret = -ERANGE;
647 goto out;
648 }
243 /* Parse probe point */ 649 /* Parse probe point */
244 parse_perf_probe_probepoint(argv[0], pp); 650 ret = parse_perf_probe_point(argv[0], pev);
245 if (pp->file || pp->line || pp->lazy_line) 651 if (ret < 0)
246 *need_dwarf = true; 652 goto out;
247 653
248 /* Copy arguments and ensure return probe has no C argument */ 654 /* Copy arguments and ensure return probe has no C argument */
249 pp->nr_args = argc - 1; 655 pev->nargs = argc - 1;
250 pp->args = zalloc(sizeof(char *) * pp->nr_args); 656 pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
251 for (i = 0; i < pp->nr_args; i++) { 657 if (pev->args == NULL) {
252 pp->args[i] = strdup(argv[i + 1]); 658 ret = -ENOMEM;
253 if (!pp->args[i]) 659 goto out;
254 die("Failed to copy argument."); 660 }
255 if (is_c_varname(pp->args[i])) { 661 for (i = 0; i < pev->nargs && ret >= 0; i++) {
256 if (pp->retprobe) 662 ret = parse_perf_probe_arg(argv[i + 1], &pev->args[i]);
257 semantic_error("You can't specify local" 663 if (ret >= 0 &&
258 " variable for kretprobe"); 664 is_c_varname(pev->args[i].var) && pev->point.retprobe) {
259 *need_dwarf = true; 665 semantic_error("You can't specify local variable for"
666 " kretprobe.\n");
667 ret = -EINVAL;
260 } 668 }
261 } 669 }
262 670out:
263 argv_free(argv); 671 argv_free(argv);
672
673 return ret;
674}
675
676/* Return true if this perf_probe_event requires debuginfo */
677bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
678{
679 int i;
680
681 if (pev->point.file || pev->point.line || pev->point.lazy_line)
682 return true;
683
684 for (i = 0; i < pev->nargs; i++)
685 if (is_c_varname(pev->args[i].var))
686 return true;
687
688 return false;
264} 689}
265 690
266/* Parse kprobe_events event into struct probe_point */ 691/* Parse kprobe_events event into struct probe_point */
267void parse_trace_kprobe_event(const char *str, struct probe_point *pp) 692int parse_kprobe_trace_command(const char *cmd, struct kprobe_trace_event *tev)
268{ 693{
694 struct kprobe_trace_point *tp = &tev->point;
269 char pr; 695 char pr;
270 char *p; 696 char *p;
271 int ret, i, argc; 697 int ret, i, argc;
272 char **argv; 698 char **argv;
273 699
274 pr_debug("Parsing kprobe_events: %s\n", str); 700 pr_debug("Parsing kprobe_events: %s\n", cmd);
275 argv = argv_split(str, &argc); 701 argv = argv_split(cmd, &argc);
276 if (!argv) 702 if (!argv) {
277 die("argv_split failed."); 703 pr_debug("Failed to split arguments.\n");
278 if (argc < 2) 704 return -ENOMEM;
279 semantic_error("Too less arguments."); 705 }
706 if (argc < 2) {
707 semantic_error("Too few probe arguments.\n");
708 ret = -ERANGE;
709 goto out;
710 }
280 711
281 /* Scan event and group name. */ 712 /* Scan event and group name. */
282 ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", 713 ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]",
283 &pr, (float *)(void *)&pp->group, 714 &pr, (float *)(void *)&tev->group,
284 (float *)(void *)&pp->event); 715 (float *)(void *)&tev->event);
285 if (ret != 3) 716 if (ret != 3) {
286 semantic_error("Failed to parse event name: %s", argv[0]); 717 semantic_error("Failed to parse event name: %s\n", argv[0]);
287 pr_debug("Group:%s Event:%s probe:%c\n", pp->group, pp->event, pr); 718 ret = -EINVAL;
719 goto out;
720 }
721 pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr);
288 722
289 pp->retprobe = (pr == 'r'); 723 tp->retprobe = (pr == 'r');
290 724
291 /* Scan function name and offset */ 725 /* Scan function name and offset */
292 ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, 726 ret = sscanf(argv[1], "%a[^+]+%lu", (float *)(void *)&tp->symbol,
293 &pp->offset); 727 &tp->offset);
294 if (ret == 1) 728 if (ret == 1)
295 pp->offset = 0; 729 tp->offset = 0;
296 730
297 /* kprobe_events doesn't have this information */ 731 tev->nargs = argc - 2;
298 pp->line = 0; 732 tev->args = zalloc(sizeof(struct kprobe_trace_arg) * tev->nargs);
299 pp->file = NULL; 733 if (tev->args == NULL) {
300 734 ret = -ENOMEM;
301 pp->nr_args = argc - 2; 735 goto out;
302 pp->args = zalloc(sizeof(char *) * pp->nr_args); 736 }
303 for (i = 0; i < pp->nr_args; i++) { 737 for (i = 0; i < tev->nargs; i++) {
304 p = strchr(argv[i + 2], '='); 738 p = strchr(argv[i + 2], '=');
305 if (p) /* We don't need which register is assigned. */ 739 if (p) /* We don't need which register is assigned. */
306 *p = '\0'; 740 *p++ = '\0';
307 pp->args[i] = strdup(argv[i + 2]); 741 else
308 if (!pp->args[i]) 742 p = argv[i + 2];
309 die("Failed to copy argument."); 743 tev->args[i].name = strdup(argv[i + 2]);
744 /* TODO: parse regs and offset */
745 tev->args[i].value = strdup(p);
746 if (tev->args[i].name == NULL || tev->args[i].value == NULL) {
747 ret = -ENOMEM;
748 goto out;
749 }
310 } 750 }
311 751 ret = 0;
752out:
312 argv_free(argv); 753 argv_free(argv);
754 return ret;
313} 755}
314 756
315/* Synthesize only probe point (not argument) */ 757/* Compose only probe arg */
316int synthesize_perf_probe_point(struct probe_point *pp) 758int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len)
317{ 759{
318 char *buf; 760 struct perf_probe_arg_field *field = pa->field;
319 char offs[64] = "", line[64] = "";
320 int ret; 761 int ret;
762 char *tmp = buf;
321 763
322 pp->probes[0] = buf = zalloc(MAX_CMDLEN); 764 if (pa->name && pa->var)
323 pp->found = 1; 765 ret = e_snprintf(tmp, len, "%s=%s", pa->name, pa->var);
324 if (!buf) 766 else
325 die("Failed to allocate memory by zalloc."); 767 ret = e_snprintf(tmp, len, "%s", pa->name ? pa->name : pa->var);
768 if (ret <= 0)
769 goto error;
770 tmp += ret;
771 len -= ret;
772
773 while (field) {
774 ret = e_snprintf(tmp, len, "%s%s", field->ref ? "->" : ".",
775 field->name);
776 if (ret <= 0)
777 goto error;
778 tmp += ret;
779 len -= ret;
780 field = field->next;
781 }
782
783 if (pa->type) {
784 ret = e_snprintf(tmp, len, ":%s", pa->type);
785 if (ret <= 0)
786 goto error;
787 tmp += ret;
788 len -= ret;
789 }
790
791 return tmp - buf;
792error:
793 pr_debug("Failed to synthesize perf probe argument: %s",
794 strerror(-ret));
795 return ret;
796}
797
798/* Compose only probe point (not argument) */
799static char *synthesize_perf_probe_point(struct perf_probe_point *pp)
800{
801 char *buf, *tmp;
802 char offs[32] = "", line[32] = "", file[32] = "";
803 int ret, len;
804
805 buf = zalloc(MAX_CMDLEN);
806 if (buf == NULL) {
807 ret = -ENOMEM;
808 goto error;
809 }
326 if (pp->offset) { 810 if (pp->offset) {
327 ret = e_snprintf(offs, 64, "+%d", pp->offset); 811 ret = e_snprintf(offs, 32, "+%lu", pp->offset);
328 if (ret <= 0) 812 if (ret <= 0)
329 goto error; 813 goto error;
330 } 814 }
331 if (pp->line) { 815 if (pp->line) {
332 ret = e_snprintf(line, 64, ":%d", pp->line); 816 ret = e_snprintf(line, 32, ":%d", pp->line);
817 if (ret <= 0)
818 goto error;
819 }
820 if (pp->file) {
821 len = strlen(pp->file) - 31;
822 if (len < 0)
823 len = 0;
824 tmp = strchr(pp->file + len, '/');
825 if (!tmp)
826 tmp = pp->file + len;
827 ret = e_snprintf(file, 32, "@%s", tmp + 1);
333 if (ret <= 0) 828 if (ret <= 0)
334 goto error; 829 goto error;
335 } 830 }
336 831
337 if (pp->function) 832 if (pp->function)
338 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, 833 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s%s", pp->function,
339 offs, pp->retprobe ? "%return" : "", line); 834 offs, pp->retprobe ? "%return" : "", line,
835 file);
340 else 836 else
341 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", pp->file, line); 837 ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", file, line);
342 if (ret <= 0) { 838 if (ret <= 0)
839 goto error;
840
841 return buf;
343error: 842error:
344 free(pp->probes[0]); 843 pr_debug("Failed to synthesize perf probe point: %s",
345 pp->probes[0] = NULL; 844 strerror(-ret));
346 pp->found = 0; 845 if (buf)
347 } 846 free(buf);
348 return ret; 847 return NULL;
349} 848}
350 849
351int synthesize_perf_probe_event(struct probe_point *pp) 850#if 0
851char *synthesize_perf_probe_command(struct perf_probe_event *pev)
352{ 852{
353 char *buf; 853 char *buf;
354 int i, len, ret; 854 int i, len, ret;
355 855
356 len = synthesize_perf_probe_point(pp); 856 buf = synthesize_perf_probe_point(&pev->point);
357 if (len < 0) 857 if (!buf)
358 return 0; 858 return NULL;
359 859
360 buf = pp->probes[0]; 860 len = strlen(buf);
361 for (i = 0; i < pp->nr_args; i++) { 861 for (i = 0; i < pev->nargs; i++) {
362 ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", 862 ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s",
363 pp->args[i]); 863 pev->args[i].name);
364 if (ret <= 0) 864 if (ret <= 0) {
365 goto error; 865 free(buf);
866 return NULL;
867 }
366 len += ret; 868 len += ret;
367 } 869 }
368 pp->found = 1;
369 870
370 return pp->found; 871 return buf;
371error: 872}
372 free(pp->probes[0]); 873#endif
373 pp->probes[0] = NULL; 874
875static int __synthesize_kprobe_trace_arg_ref(struct kprobe_trace_arg_ref *ref,
876 char **buf, size_t *buflen,
877 int depth)
878{
879 int ret;
880 if (ref->next) {
881 depth = __synthesize_kprobe_trace_arg_ref(ref->next, buf,
882 buflen, depth + 1);
883 if (depth < 0)
884 goto out;
885 }
886
887 ret = e_snprintf(*buf, *buflen, "%+ld(", ref->offset);
888 if (ret < 0)
889 depth = ret;
890 else {
891 *buf += ret;
892 *buflen -= ret;
893 }
894out:
895 return depth;
374 896
375 return ret;
376} 897}
377 898
378int synthesize_trace_kprobe_event(struct probe_point *pp) 899static int synthesize_kprobe_trace_arg(struct kprobe_trace_arg *arg,
900 char *buf, size_t buflen)
379{ 901{
902 int ret, depth = 0;
903 char *tmp = buf;
904
905 /* Argument name or separator */
906 if (arg->name)
907 ret = e_snprintf(buf, buflen, " %s=", arg->name);
908 else
909 ret = e_snprintf(buf, buflen, " ");
910 if (ret < 0)
911 return ret;
912 buf += ret;
913 buflen -= ret;
914
915 /* Dereferencing arguments */
916 if (arg->ref) {
917 depth = __synthesize_kprobe_trace_arg_ref(arg->ref, &buf,
918 &buflen, 1);
919 if (depth < 0)
920 return depth;
921 }
922
923 /* Print argument value */
924 ret = e_snprintf(buf, buflen, "%s", arg->value);
925 if (ret < 0)
926 return ret;
927 buf += ret;
928 buflen -= ret;
929
930 /* Closing */
931 while (depth--) {
932 ret = e_snprintf(buf, buflen, ")");
933 if (ret < 0)
934 return ret;
935 buf += ret;
936 buflen -= ret;
937 }
938 /* Print argument type */
939 if (arg->type) {
940 ret = e_snprintf(buf, buflen, ":%s", arg->type);
941 if (ret <= 0)
942 return ret;
943 buf += ret;
944 }
945
946 return buf - tmp;
947}
948
949char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev)
950{
951 struct kprobe_trace_point *tp = &tev->point;
380 char *buf; 952 char *buf;
381 int i, len, ret; 953 int i, len, ret;
382 954
383 pp->probes[0] = buf = zalloc(MAX_CMDLEN); 955 buf = zalloc(MAX_CMDLEN);
384 if (!buf) 956 if (buf == NULL)
385 die("Failed to allocate memory by zalloc."); 957 return NULL;
386 ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); 958
387 if (ret <= 0) 959 len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s+%lu",
960 tp->retprobe ? 'r' : 'p',
961 tev->group, tev->event,
962 tp->symbol, tp->offset);
963 if (len <= 0)
388 goto error; 964 goto error;
389 len = ret;
390 965
391 for (i = 0; i < pp->nr_args; i++) { 966 for (i = 0; i < tev->nargs; i++) {
392 ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", 967 ret = synthesize_kprobe_trace_arg(&tev->args[i], buf + len,
393 pp->args[i]); 968 MAX_CMDLEN - len);
394 if (ret <= 0) 969 if (ret <= 0)
395 goto error; 970 goto error;
396 len += ret; 971 len += ret;
397 } 972 }
398 pp->found = 1;
399 973
400 return pp->found; 974 return buf;
401error: 975error:
402 free(pp->probes[0]); 976 free(buf);
403 pp->probes[0] = NULL; 977 return NULL;
978}
979
980int convert_to_perf_probe_event(struct kprobe_trace_event *tev,
981 struct perf_probe_event *pev)
982{
983 char buf[64] = "";
984 int i, ret;
985
986 /* Convert event/group name */
987 pev->event = strdup(tev->event);
988 pev->group = strdup(tev->group);
989 if (pev->event == NULL || pev->group == NULL)
990 return -ENOMEM;
991
992 /* Convert trace_point to probe_point */
993 ret = convert_to_perf_probe_point(&tev->point, &pev->point);
994 if (ret < 0)
995 return ret;
996
997 /* Convert trace_arg to probe_arg */
998 pev->nargs = tev->nargs;
999 pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
1000 if (pev->args == NULL)
1001 return -ENOMEM;
1002 for (i = 0; i < tev->nargs && ret >= 0; i++) {
1003 if (tev->args[i].name)
1004 pev->args[i].name = strdup(tev->args[i].name);
1005 else {
1006 ret = synthesize_kprobe_trace_arg(&tev->args[i],
1007 buf, 64);
1008 pev->args[i].name = strdup(buf);
1009 }
1010 if (pev->args[i].name == NULL && ret >= 0)
1011 ret = -ENOMEM;
1012 }
1013
1014 if (ret < 0)
1015 clear_perf_probe_event(pev);
404 1016
405 return ret; 1017 return ret;
406} 1018}
407 1019
408static int open_kprobe_events(int flags, int mode) 1020void clear_perf_probe_event(struct perf_probe_event *pev)
1021{
1022 struct perf_probe_point *pp = &pev->point;
1023 struct perf_probe_arg_field *field, *next;
1024 int i;
1025
1026 if (pev->event)
1027 free(pev->event);
1028 if (pev->group)
1029 free(pev->group);
1030 if (pp->file)
1031 free(pp->file);
1032 if (pp->function)
1033 free(pp->function);
1034 if (pp->lazy_line)
1035 free(pp->lazy_line);
1036 for (i = 0; i < pev->nargs; i++) {
1037 if (pev->args[i].name)
1038 free(pev->args[i].name);
1039 if (pev->args[i].var)
1040 free(pev->args[i].var);
1041 if (pev->args[i].type)
1042 free(pev->args[i].type);
1043 field = pev->args[i].field;
1044 while (field) {
1045 next = field->next;
1046 if (field->name)
1047 free(field->name);
1048 free(field);
1049 field = next;
1050 }
1051 }
1052 if (pev->args)
1053 free(pev->args);
1054 memset(pev, 0, sizeof(*pev));
1055}
1056
1057void clear_kprobe_trace_event(struct kprobe_trace_event *tev)
1058{
1059 struct kprobe_trace_arg_ref *ref, *next;
1060 int i;
1061
1062 if (tev->event)
1063 free(tev->event);
1064 if (tev->group)
1065 free(tev->group);
1066 if (tev->point.symbol)
1067 free(tev->point.symbol);
1068 for (i = 0; i < tev->nargs; i++) {
1069 if (tev->args[i].name)
1070 free(tev->args[i].name);
1071 if (tev->args[i].value)
1072 free(tev->args[i].value);
1073 if (tev->args[i].type)
1074 free(tev->args[i].type);
1075 ref = tev->args[i].ref;
1076 while (ref) {
1077 next = ref->next;
1078 free(ref);
1079 ref = next;
1080 }
1081 }
1082 if (tev->args)
1083 free(tev->args);
1084 memset(tev, 0, sizeof(*tev));
1085}
1086
1087static int open_kprobe_events(bool readwrite)
409{ 1088{
410 char buf[PATH_MAX]; 1089 char buf[PATH_MAX];
1090 const char *__debugfs;
411 int ret; 1091 int ret;
412 1092
413 ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path); 1093 __debugfs = debugfs_find_mountpoint();
414 if (ret < 0) 1094 if (__debugfs == NULL) {
415 die("Failed to make kprobe_events path."); 1095 pr_warning("Debugfs is not mounted.\n");
1096 return -ENOENT;
1097 }
1098
1099 ret = e_snprintf(buf, PATH_MAX, "%stracing/kprobe_events", __debugfs);
1100 if (ret >= 0) {
1101 pr_debug("Opening %s write=%d\n", buf, readwrite);
1102 if (readwrite && !probe_event_dry_run)
1103 ret = open(buf, O_RDWR, O_APPEND);
1104 else
1105 ret = open(buf, O_RDONLY, 0);
1106 }
416 1107
417 ret = open(buf, flags, mode);
418 if (ret < 0) { 1108 if (ret < 0) {
419 if (errno == ENOENT) 1109 if (errno == ENOENT)
420 die("kprobe_events file does not exist -" 1110 pr_warning("kprobe_events file does not exist - please"
421 " please rebuild with CONFIG_KPROBE_EVENT."); 1111 " rebuild kernel with CONFIG_KPROBE_EVENT.\n");
422 else 1112 else
423 die("Could not open kprobe_events file: %s", 1113 pr_warning("Failed to open kprobe_events file: %s\n",
424 strerror(errno)); 1114 strerror(errno));
425 } 1115 }
426 return ret; 1116 return ret;
427} 1117}
428 1118
429/* Get raw string list of current kprobe_events */ 1119/* Get raw string list of current kprobe_events */
430static struct strlist *get_trace_kprobe_event_rawlist(int fd) 1120static struct strlist *get_kprobe_trace_command_rawlist(int fd)
431{ 1121{
432 int ret, idx; 1122 int ret, idx;
433 FILE *fp; 1123 FILE *fp;
@@ -447,271 +1137,485 @@ static struct strlist *get_trace_kprobe_event_rawlist(int fd)
447 if (p[idx] == '\n') 1137 if (p[idx] == '\n')
448 p[idx] = '\0'; 1138 p[idx] = '\0';
449 ret = strlist__add(sl, buf); 1139 ret = strlist__add(sl, buf);
450 if (ret < 0) 1140 if (ret < 0) {
451 die("strlist__add failed: %s", strerror(-ret)); 1141 pr_debug("strlist__add failed: %s\n", strerror(-ret));
1142 strlist__delete(sl);
1143 return NULL;
1144 }
452 } 1145 }
453 fclose(fp); 1146 fclose(fp);
454 1147
455 return sl; 1148 return sl;
456} 1149}
457 1150
458/* Free and zero clear probe_point */
459static void clear_probe_point(struct probe_point *pp)
460{
461 int i;
462
463 if (pp->event)
464 free(pp->event);
465 if (pp->group)
466 free(pp->group);
467 if (pp->function)
468 free(pp->function);
469 if (pp->file)
470 free(pp->file);
471 if (pp->lazy_line)
472 free(pp->lazy_line);
473 for (i = 0; i < pp->nr_args; i++)
474 free(pp->args[i]);
475 if (pp->args)
476 free(pp->args);
477 for (i = 0; i < pp->found; i++)
478 free(pp->probes[i]);
479 memset(pp, 0, sizeof(*pp));
480}
481
482/* Show an event */ 1151/* Show an event */
483static void show_perf_probe_event(const char *event, const char *place, 1152static int show_perf_probe_event(struct perf_probe_event *pev)
484 struct probe_point *pp)
485{ 1153{
486 int i, ret; 1154 int i, ret;
487 char buf[128]; 1155 char buf[128];
1156 char *place;
488 1157
489 ret = e_snprintf(buf, 128, "%s:%s", pp->group, event); 1158 /* Synthesize only event probe point */
1159 place = synthesize_perf_probe_point(&pev->point);
1160 if (!place)
1161 return -EINVAL;
1162
1163 ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
490 if (ret < 0) 1164 if (ret < 0)
491 die("Failed to copy event: %s", strerror(-ret)); 1165 return ret;
492 printf(" %-40s (on %s", buf, place); 1166
1167 printf(" %-20s (on %s", buf, place);
493 1168
494 if (pp->nr_args > 0) { 1169 if (pev->nargs > 0) {
495 printf(" with"); 1170 printf(" with");
496 for (i = 0; i < pp->nr_args; i++) 1171 for (i = 0; i < pev->nargs; i++) {
497 printf(" %s", pp->args[i]); 1172 ret = synthesize_perf_probe_arg(&pev->args[i],
1173 buf, 128);
1174 if (ret < 0)
1175 break;
1176 printf(" %s", buf);
1177 }
498 } 1178 }
499 printf(")\n"); 1179 printf(")\n");
1180 free(place);
1181 return ret;
500} 1182}
501 1183
502/* List up current perf-probe events */ 1184/* List up current perf-probe events */
503void show_perf_probe_events(void) 1185int show_perf_probe_events(void)
504{ 1186{
505 int fd; 1187 int fd, ret;
506 struct probe_point pp; 1188 struct kprobe_trace_event tev;
1189 struct perf_probe_event pev;
507 struct strlist *rawlist; 1190 struct strlist *rawlist;
508 struct str_node *ent; 1191 struct str_node *ent;
509 1192
510 setup_pager(); 1193 setup_pager();
511 memset(&pp, 0, sizeof(pp)); 1194 ret = init_vmlinux();
1195 if (ret < 0)
1196 return ret;
1197
1198 memset(&tev, 0, sizeof(tev));
1199 memset(&pev, 0, sizeof(pev));
1200
1201 fd = open_kprobe_events(false);
1202 if (fd < 0)
1203 return fd;
512 1204
513 fd = open_kprobe_events(O_RDONLY, 0); 1205 rawlist = get_kprobe_trace_command_rawlist(fd);
514 rawlist = get_trace_kprobe_event_rawlist(fd);
515 close(fd); 1206 close(fd);
1207 if (!rawlist)
1208 return -ENOENT;
516 1209
517 strlist__for_each(ent, rawlist) { 1210 strlist__for_each(ent, rawlist) {
518 parse_trace_kprobe_event(ent->s, &pp); 1211 ret = parse_kprobe_trace_command(ent->s, &tev);
519 /* Synthesize only event probe point */ 1212 if (ret >= 0) {
520 synthesize_perf_probe_point(&pp); 1213 ret = convert_to_perf_probe_event(&tev, &pev);
521 /* Show an event */ 1214 if (ret >= 0)
522 show_perf_probe_event(pp.event, pp.probes[0], &pp); 1215 ret = show_perf_probe_event(&pev);
523 clear_probe_point(&pp); 1216 }
1217 clear_perf_probe_event(&pev);
1218 clear_kprobe_trace_event(&tev);
1219 if (ret < 0)
1220 break;
524 } 1221 }
525
526 strlist__delete(rawlist); 1222 strlist__delete(rawlist);
1223
1224 return ret;
527} 1225}
528 1226
529/* Get current perf-probe event names */ 1227/* Get current perf-probe event names */
530static struct strlist *get_perf_event_names(int fd, bool include_group) 1228static struct strlist *get_kprobe_trace_event_names(int fd, bool include_group)
531{ 1229{
532 char buf[128]; 1230 char buf[128];
533 struct strlist *sl, *rawlist; 1231 struct strlist *sl, *rawlist;
534 struct str_node *ent; 1232 struct str_node *ent;
535 struct probe_point pp; 1233 struct kprobe_trace_event tev;
1234 int ret = 0;
536 1235
537 memset(&pp, 0, sizeof(pp)); 1236 memset(&tev, 0, sizeof(tev));
538 rawlist = get_trace_kprobe_event_rawlist(fd);
539 1237
1238 rawlist = get_kprobe_trace_command_rawlist(fd);
540 sl = strlist__new(true, NULL); 1239 sl = strlist__new(true, NULL);
541 strlist__for_each(ent, rawlist) { 1240 strlist__for_each(ent, rawlist) {
542 parse_trace_kprobe_event(ent->s, &pp); 1241 ret = parse_kprobe_trace_command(ent->s, &tev);
1242 if (ret < 0)
1243 break;
543 if (include_group) { 1244 if (include_group) {
544 if (e_snprintf(buf, 128, "%s:%s", pp.group, 1245 ret = e_snprintf(buf, 128, "%s:%s", tev.group,
545 pp.event) < 0) 1246 tev.event);
546 die("Failed to copy group:event name."); 1247 if (ret >= 0)
547 strlist__add(sl, buf); 1248 ret = strlist__add(sl, buf);
548 } else 1249 } else
549 strlist__add(sl, pp.event); 1250 ret = strlist__add(sl, tev.event);
550 clear_probe_point(&pp); 1251 clear_kprobe_trace_event(&tev);
1252 if (ret < 0)
1253 break;
551 } 1254 }
552
553 strlist__delete(rawlist); 1255 strlist__delete(rawlist);
554 1256
1257 if (ret < 0) {
1258 strlist__delete(sl);
1259 return NULL;
1260 }
555 return sl; 1261 return sl;
556} 1262}
557 1263
558static void write_trace_kprobe_event(int fd, const char *buf) 1264static int write_kprobe_trace_event(int fd, struct kprobe_trace_event *tev)
559{ 1265{
560 int ret; 1266 int ret;
1267 char *buf = synthesize_kprobe_trace_command(tev);
1268
1269 if (!buf) {
1270 pr_debug("Failed to synthesize kprobe trace event.\n");
1271 return -EINVAL;
1272 }
561 1273
562 pr_debug("Writing event: %s\n", buf); 1274 pr_debug("Writing event: %s\n", buf);
563 ret = write(fd, buf, strlen(buf)); 1275 if (!probe_event_dry_run) {
564 if (ret <= 0) 1276 ret = write(fd, buf, strlen(buf));
565 die("Failed to write event: %s", strerror(errno)); 1277 if (ret <= 0)
1278 pr_warning("Failed to write event: %s\n",
1279 strerror(errno));
1280 }
1281 free(buf);
1282 return ret;
566} 1283}
567 1284
568static void get_new_event_name(char *buf, size_t len, const char *base, 1285static int get_new_event_name(char *buf, size_t len, const char *base,
569 struct strlist *namelist, bool allow_suffix) 1286 struct strlist *namelist, bool allow_suffix)
570{ 1287{
571 int i, ret; 1288 int i, ret;
572 1289
573 /* Try no suffix */ 1290 /* Try no suffix */
574 ret = e_snprintf(buf, len, "%s", base); 1291 ret = e_snprintf(buf, len, "%s", base);
575 if (ret < 0) 1292 if (ret < 0) {
576 die("snprintf() failed: %s", strerror(-ret)); 1293 pr_debug("snprintf() failed: %s\n", strerror(-ret));
1294 return ret;
1295 }
577 if (!strlist__has_entry(namelist, buf)) 1296 if (!strlist__has_entry(namelist, buf))
578 return; 1297 return 0;
579 1298
580 if (!allow_suffix) { 1299 if (!allow_suffix) {
581 pr_warning("Error: event \"%s\" already exists. " 1300 pr_warning("Error: event \"%s\" already exists. "
582 "(Use -f to force duplicates.)\n", base); 1301 "(Use -f to force duplicates.)\n", base);
583 die("Can't add new event."); 1302 return -EEXIST;
584 } 1303 }
585 1304
586 /* Try to add suffix */ 1305 /* Try to add suffix */
587 for (i = 1; i < MAX_EVENT_INDEX; i++) { 1306 for (i = 1; i < MAX_EVENT_INDEX; i++) {
588 ret = e_snprintf(buf, len, "%s_%d", base, i); 1307 ret = e_snprintf(buf, len, "%s_%d", base, i);
589 if (ret < 0) 1308 if (ret < 0) {
590 die("snprintf() failed: %s", strerror(-ret)); 1309 pr_debug("snprintf() failed: %s\n", strerror(-ret));
1310 return ret;
1311 }
591 if (!strlist__has_entry(namelist, buf)) 1312 if (!strlist__has_entry(namelist, buf))
592 break; 1313 break;
593 } 1314 }
594 if (i == MAX_EVENT_INDEX) 1315 if (i == MAX_EVENT_INDEX) {
595 die("Too many events are on the same function."); 1316 pr_warning("Too many events are on the same function.\n");
1317 ret = -ERANGE;
1318 }
1319
1320 return ret;
596} 1321}
597 1322
598void add_trace_kprobe_events(struct probe_point *probes, int nr_probes, 1323static int __add_kprobe_trace_events(struct perf_probe_event *pev,
599 bool force_add) 1324 struct kprobe_trace_event *tevs,
1325 int ntevs, bool allow_suffix)
600{ 1326{
601 int i, j, fd; 1327 int i, fd, ret;
602 struct probe_point *pp; 1328 struct kprobe_trace_event *tev = NULL;
603 char buf[MAX_CMDLEN]; 1329 char buf[64];
604 char event[64]; 1330 const char *event, *group;
605 struct strlist *namelist; 1331 struct strlist *namelist;
606 bool allow_suffix;
607 1332
608 fd = open_kprobe_events(O_RDWR, O_APPEND); 1333 fd = open_kprobe_events(true);
1334 if (fd < 0)
1335 return fd;
609 /* Get current event names */ 1336 /* Get current event names */
610 namelist = get_perf_event_names(fd, false); 1337 namelist = get_kprobe_trace_event_names(fd, false);
611 1338 if (!namelist) {
612 for (j = 0; j < nr_probes; j++) { 1339 pr_debug("Failed to get current event list.\n");
613 pp = probes + j; 1340 return -EIO;
614 if (!pp->event) 1341 }
615 pp->event = strdup(pp->function); 1342
616 if (!pp->group) 1343 ret = 0;
617 pp->group = strdup(PERFPROBE_GROUP); 1344 printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":");
618 DIE_IF(!pp->event || !pp->group); 1345 for (i = 0; i < ntevs; i++) {
619 /* If force_add is true, suffix search is allowed */ 1346 tev = &tevs[i];
620 allow_suffix = force_add; 1347 if (pev->event)
621 for (i = 0; i < pp->found; i++) { 1348 event = pev->event;
622 /* Get an unused new event name */ 1349 else
623 get_new_event_name(event, 64, pp->event, namelist, 1350 if (pev->point.function)
624 allow_suffix); 1351 event = pev->point.function;
625 snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n", 1352 else
626 pp->retprobe ? 'r' : 'p', 1353 event = tev->point.symbol;
627 pp->group, event, 1354 if (pev->group)
628 pp->probes[i]); 1355 group = pev->group;
629 write_trace_kprobe_event(fd, buf); 1356 else
630 printf("Added new event:\n"); 1357 group = PERFPROBE_GROUP;
631 /* Get the first parameter (probe-point) */ 1358
632 sscanf(pp->probes[i], "%s", buf); 1359 /* Get an unused new event name */
633 show_perf_probe_event(event, buf, pp); 1360 ret = get_new_event_name(buf, 64, event,
634 /* Add added event name to namelist */ 1361 namelist, allow_suffix);
635 strlist__add(namelist, event); 1362 if (ret < 0)
636 /* 1363 break;
637 * Probes after the first probe which comes from same 1364 event = buf;
638 * user input are always allowed to add suffix, because 1365
639 * there might be several addresses corresponding to 1366 tev->event = strdup(event);
640 * one code line. 1367 tev->group = strdup(group);
641 */ 1368 if (tev->event == NULL || tev->group == NULL) {
642 allow_suffix = true; 1369 ret = -ENOMEM;
1370 break;
643 } 1371 }
1372 ret = write_kprobe_trace_event(fd, tev);
1373 if (ret < 0)
1374 break;
1375 /* Add added event name to namelist */
1376 strlist__add(namelist, event);
1377
1378 /* Trick here - save current event/group */
1379 event = pev->event;
1380 group = pev->group;
1381 pev->event = tev->event;
1382 pev->group = tev->group;
1383 show_perf_probe_event(pev);
1384 /* Trick here - restore current event/group */
1385 pev->event = (char *)event;
1386 pev->group = (char *)group;
1387
1388 /*
1389 * Probes after the first probe which comes from same
1390 * user input are always allowed to add suffix, because
1391 * there might be several addresses corresponding to
1392 * one code line.
1393 */
1394 allow_suffix = true;
1395 }
1396
1397 if (ret >= 0) {
1398 /* Show how to use the event. */
1399 printf("\nYou can now use it on all perf tools, such as:\n\n");
1400 printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
1401 tev->event);
644 } 1402 }
645 /* Show how to use the event. */
646 printf("\nYou can now use it on all perf tools, such as:\n\n");
647 printf("\tperf record -e %s:%s -a sleep 1\n\n", PERFPROBE_GROUP, event);
648 1403
649 strlist__delete(namelist); 1404 strlist__delete(namelist);
650 close(fd); 1405 close(fd);
1406 return ret;
651} 1407}
652 1408
653static void __del_trace_kprobe_event(int fd, struct str_node *ent) 1409static int convert_to_kprobe_trace_events(struct perf_probe_event *pev,
1410 struct kprobe_trace_event **tevs)
1411{
1412 struct symbol *sym;
1413 int ret = 0, i;
1414 struct kprobe_trace_event *tev;
1415
1416 /* Convert perf_probe_event with debuginfo */
1417 ret = try_to_find_kprobe_trace_events(pev, tevs);
1418 if (ret != 0)
1419 return ret;
1420
1421 /* Allocate trace event buffer */
1422 tev = *tevs = zalloc(sizeof(struct kprobe_trace_event));
1423 if (tev == NULL)
1424 return -ENOMEM;
1425
1426 /* Copy parameters */
1427 tev->point.symbol = strdup(pev->point.function);
1428 if (tev->point.symbol == NULL) {
1429 ret = -ENOMEM;
1430 goto error;
1431 }
1432 tev->point.offset = pev->point.offset;
1433 tev->nargs = pev->nargs;
1434 if (tev->nargs) {
1435 tev->args = zalloc(sizeof(struct kprobe_trace_arg)
1436 * tev->nargs);
1437 if (tev->args == NULL) {
1438 ret = -ENOMEM;
1439 goto error;
1440 }
1441 for (i = 0; i < tev->nargs; i++) {
1442 if (pev->args[i].name) {
1443 tev->args[i].name = strdup(pev->args[i].name);
1444 if (tev->args[i].name == NULL) {
1445 ret = -ENOMEM;
1446 goto error;
1447 }
1448 }
1449 tev->args[i].value = strdup(pev->args[i].var);
1450 if (tev->args[i].value == NULL) {
1451 ret = -ENOMEM;
1452 goto error;
1453 }
1454 if (pev->args[i].type) {
1455 tev->args[i].type = strdup(pev->args[i].type);
1456 if (tev->args[i].type == NULL) {
1457 ret = -ENOMEM;
1458 goto error;
1459 }
1460 }
1461 }
1462 }
1463
1464 /* Currently just checking function name from symbol map */
1465 sym = map__find_symbol_by_name(kmaps[MAP__FUNCTION],
1466 tev->point.symbol, NULL);
1467 if (!sym) {
1468 pr_warning("Kernel symbol \'%s\' not found.\n",
1469 tev->point.symbol);
1470 ret = -ENOENT;
1471 goto error;
1472 }
1473
1474 return 1;
1475error:
1476 clear_kprobe_trace_event(tev);
1477 free(tev);
1478 *tevs = NULL;
1479 return ret;
1480}
1481
1482struct __event_package {
1483 struct perf_probe_event *pev;
1484 struct kprobe_trace_event *tevs;
1485 int ntevs;
1486};
1487
1488int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1489 bool force_add)
1490{
1491 int i, j, ret;
1492 struct __event_package *pkgs;
1493
1494 pkgs = zalloc(sizeof(struct __event_package) * npevs);
1495 if (pkgs == NULL)
1496 return -ENOMEM;
1497
1498 /* Init vmlinux path */
1499 ret = init_vmlinux();
1500 if (ret < 0)
1501 return ret;
1502
1503 /* Loop 1: convert all events */
1504 for (i = 0; i < npevs; i++) {
1505 pkgs[i].pev = &pevs[i];
1506 /* Convert with or without debuginfo */
1507 ret = convert_to_kprobe_trace_events(pkgs[i].pev,
1508 &pkgs[i].tevs);
1509 if (ret < 0)
1510 goto end;
1511 pkgs[i].ntevs = ret;
1512 }
1513
1514 /* Loop 2: add all events */
1515 for (i = 0; i < npevs && ret >= 0; i++)
1516 ret = __add_kprobe_trace_events(pkgs[i].pev, pkgs[i].tevs,
1517 pkgs[i].ntevs, force_add);
1518end:
1519 /* Loop 3: cleanup trace events */
1520 for (i = 0; i < npevs; i++)
1521 for (j = 0; j < pkgs[i].ntevs; j++)
1522 clear_kprobe_trace_event(&pkgs[i].tevs[j]);
1523
1524 return ret;
1525}
1526
1527static int __del_trace_kprobe_event(int fd, struct str_node *ent)
654{ 1528{
655 char *p; 1529 char *p;
656 char buf[128]; 1530 char buf[128];
1531 int ret;
657 1532
658 /* Convert from perf-probe event to trace-kprobe event */ 1533 /* Convert from perf-probe event to trace-kprobe event */
659 if (e_snprintf(buf, 128, "-:%s", ent->s) < 0) 1534 ret = e_snprintf(buf, 128, "-:%s", ent->s);
660 die("Failed to copy event."); 1535 if (ret < 0)
1536 goto error;
1537
661 p = strchr(buf + 2, ':'); 1538 p = strchr(buf + 2, ':');
662 if (!p) 1539 if (!p) {
663 die("Internal error: %s should have ':' but not.", ent->s); 1540 pr_debug("Internal error: %s should have ':' but not.\n",
1541 ent->s);
1542 ret = -ENOTSUP;
1543 goto error;
1544 }
664 *p = '/'; 1545 *p = '/';
665 1546
666 write_trace_kprobe_event(fd, buf); 1547 pr_debug("Writing event: %s\n", buf);
1548 ret = write(fd, buf, strlen(buf));
1549 if (ret < 0)
1550 goto error;
1551
667 printf("Remove event: %s\n", ent->s); 1552 printf("Remove event: %s\n", ent->s);
1553 return 0;
1554error:
1555 pr_warning("Failed to delete event: %s\n", strerror(-ret));
1556 return ret;
668} 1557}
669 1558
670static void del_trace_kprobe_event(int fd, const char *group, 1559static int del_trace_kprobe_event(int fd, const char *group,
671 const char *event, struct strlist *namelist) 1560 const char *event, struct strlist *namelist)
672{ 1561{
673 char buf[128]; 1562 char buf[128];
674 struct str_node *ent, *n; 1563 struct str_node *ent, *n;
675 int found = 0; 1564 int found = 0, ret = 0;
676 1565
677 if (e_snprintf(buf, 128, "%s:%s", group, event) < 0) 1566 ret = e_snprintf(buf, 128, "%s:%s", group, event);
678 die("Failed to copy event."); 1567 if (ret < 0) {
1568 pr_err("Failed to copy event.");
1569 return ret;
1570 }
679 1571
680 if (strpbrk(buf, "*?")) { /* Glob-exp */ 1572 if (strpbrk(buf, "*?")) { /* Glob-exp */
681 strlist__for_each_safe(ent, n, namelist) 1573 strlist__for_each_safe(ent, n, namelist)
682 if (strglobmatch(ent->s, buf)) { 1574 if (strglobmatch(ent->s, buf)) {
683 found++; 1575 found++;
684 __del_trace_kprobe_event(fd, ent); 1576 ret = __del_trace_kprobe_event(fd, ent);
1577 if (ret < 0)
1578 break;
685 strlist__remove(namelist, ent); 1579 strlist__remove(namelist, ent);
686 } 1580 }
687 } else { 1581 } else {
688 ent = strlist__find(namelist, buf); 1582 ent = strlist__find(namelist, buf);
689 if (ent) { 1583 if (ent) {
690 found++; 1584 found++;
691 __del_trace_kprobe_event(fd, ent); 1585 ret = __del_trace_kprobe_event(fd, ent);
692 strlist__remove(namelist, ent); 1586 if (ret >= 0)
1587 strlist__remove(namelist, ent);
693 } 1588 }
694 } 1589 }
695 if (found == 0) 1590 if (found == 0 && ret >= 0)
696 pr_info("Info: event \"%s\" does not exist, could not remove it.\n", buf); 1591 pr_info("Info: Event \"%s\" does not exist.\n", buf);
1592
1593 return ret;
697} 1594}
698 1595
699void del_trace_kprobe_events(struct strlist *dellist) 1596int del_perf_probe_events(struct strlist *dellist)
700{ 1597{
701 int fd; 1598 int fd, ret = 0;
702 const char *group, *event; 1599 const char *group, *event;
703 char *p, *str; 1600 char *p, *str;
704 struct str_node *ent; 1601 struct str_node *ent;
705 struct strlist *namelist; 1602 struct strlist *namelist;
706 1603
707 fd = open_kprobe_events(O_RDWR, O_APPEND); 1604 fd = open_kprobe_events(true);
1605 if (fd < 0)
1606 return fd;
1607
708 /* Get current event names */ 1608 /* Get current event names */
709 namelist = get_perf_event_names(fd, true); 1609 namelist = get_kprobe_trace_event_names(fd, true);
1610 if (namelist == NULL)
1611 return -EINVAL;
710 1612
711 strlist__for_each(ent, dellist) { 1613 strlist__for_each(ent, dellist) {
712 str = strdup(ent->s); 1614 str = strdup(ent->s);
713 if (!str) 1615 if (str == NULL) {
714 die("Failed to copy event."); 1616 ret = -ENOMEM;
1617 break;
1618 }
715 pr_debug("Parsing: %s\n", str); 1619 pr_debug("Parsing: %s\n", str);
716 p = strchr(str, ':'); 1620 p = strchr(str, ':');
717 if (p) { 1621 if (p) {
@@ -723,80 +1627,14 @@ void del_trace_kprobe_events(struct strlist *dellist)
723 event = str; 1627 event = str;
724 } 1628 }
725 pr_debug("Group: %s, Event: %s\n", group, event); 1629 pr_debug("Group: %s, Event: %s\n", group, event);
726 del_trace_kprobe_event(fd, group, event, namelist); 1630 ret = del_trace_kprobe_event(fd, group, event, namelist);
727 free(str); 1631 free(str);
1632 if (ret < 0)
1633 break;
728 } 1634 }
729 strlist__delete(namelist); 1635 strlist__delete(namelist);
730 close(fd); 1636 close(fd);
731}
732 1637
733#define LINEBUF_SIZE 256 1638 return ret;
734#define NR_ADDITIONAL_LINES 2
735
736static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
737{
738 char buf[LINEBUF_SIZE];
739 const char *color = PERF_COLOR_BLUE;
740
741 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
742 goto error;
743 if (!skip) {
744 if (show_num)
745 fprintf(stdout, "%7u %s", l, buf);
746 else
747 color_fprintf(stdout, color, " %s", buf);
748 }
749
750 while (strlen(buf) == LINEBUF_SIZE - 1 &&
751 buf[LINEBUF_SIZE - 2] != '\n') {
752 if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
753 goto error;
754 if (!skip) {
755 if (show_num)
756 fprintf(stdout, "%s", buf);
757 else
758 color_fprintf(stdout, color, "%s", buf);
759 }
760 }
761 return;
762error:
763 if (feof(fp))
764 die("Source file is shorter than expected.");
765 else
766 die("File read error: %s", strerror(errno));
767} 1639}
768 1640
769void show_line_range(struct line_range *lr)
770{
771 unsigned int l = 1;
772 struct line_node *ln;
773 FILE *fp;
774
775 setup_pager();
776
777 if (lr->function)
778 fprintf(stdout, "<%s:%d>\n", lr->function,
779 lr->start - lr->offset);
780 else
781 fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
782
783 fp = fopen(lr->path, "r");
784 if (fp == NULL)
785 die("Failed to open %s: %s", lr->path, strerror(errno));
786 /* Skip to starting line number */
787 while (l < lr->start)
788 show_one_line(fp, l++, true, false);
789
790 list_for_each_entry(ln, &lr->line_list, list) {
791 while (ln->line > l)
792 show_one_line(fp, (l++) - lr->offset, false, false);
793 show_one_line(fp, (l++) - lr->offset, false, true);
794 }
795
796 if (lr->end == INT_MAX)
797 lr->end = l + NR_ADDITIONAL_LINES;
798 while (l < lr->end && !feof(fp))
799 show_one_line(fp, (l++) - lr->offset, false, false);
800
801 fclose(fp);
802}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 711287d4baea..e7ff0d02c0d4 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -2,21 +2,125 @@
2#define _PROBE_EVENT_H 2#define _PROBE_EVENT_H
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include "probe-finder.h"
6#include "strlist.h" 5#include "strlist.h"
7 6
8extern void parse_line_range_desc(const char *arg, struct line_range *lr); 7extern bool probe_event_dry_run;
9extern void parse_perf_probe_event(const char *str, struct probe_point *pp, 8
10 bool *need_dwarf); 9/* kprobe-tracer tracing point */
11extern int synthesize_perf_probe_point(struct probe_point *pp); 10struct kprobe_trace_point {
12extern int synthesize_perf_probe_event(struct probe_point *pp); 11 char *symbol; /* Base symbol */
13extern void parse_trace_kprobe_event(const char *str, struct probe_point *pp); 12 unsigned long offset; /* Offset from symbol */
14extern int synthesize_trace_kprobe_event(struct probe_point *pp); 13 bool retprobe; /* Return probe flag */
15extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes, 14};
16 bool force_add); 15
17extern void del_trace_kprobe_events(struct strlist *dellist); 16/* kprobe-tracer tracing argument referencing offset */
18extern void show_perf_probe_events(void); 17struct kprobe_trace_arg_ref {
19extern void show_line_range(struct line_range *lr); 18 struct kprobe_trace_arg_ref *next; /* Next reference */
19 long offset; /* Offset value */
20};
21
22/* kprobe-tracer tracing argument */
23struct kprobe_trace_arg {
24 char *name; /* Argument name */
25 char *value; /* Base value */
26 char *type; /* Type name */
27 struct kprobe_trace_arg_ref *ref; /* Referencing offset */
28};
29
30/* kprobe-tracer tracing event (point + arg) */
31struct kprobe_trace_event {
32 char *event; /* Event name */
33 char *group; /* Group name */
34 struct kprobe_trace_point point; /* Trace point */
35 int nargs; /* Number of args */
36 struct kprobe_trace_arg *args; /* Arguments */
37};
38
39/* Perf probe probing point */
40struct perf_probe_point {
41 char *file; /* File path */
42 char *function; /* Function name */
43 int line; /* Line number */
44 bool retprobe; /* Return probe flag */
45 char *lazy_line; /* Lazy matching pattern */
46 unsigned long offset; /* Offset from function entry */
47};
48
49/* Perf probe probing argument field chain */
50struct perf_probe_arg_field {
51 struct perf_probe_arg_field *next; /* Next field */
52 char *name; /* Name of the field */
53 bool ref; /* Referencing flag */
54};
55
56/* Perf probe probing argument */
57struct perf_probe_arg {
58 char *name; /* Argument name */
59 char *var; /* Variable name */
60 char *type; /* Type name */
61 struct perf_probe_arg_field *field; /* Structure fields */
62};
63
64/* Perf probe probing event (point + arg) */
65struct perf_probe_event {
66 char *event; /* Event name */
67 char *group; /* Group name */
68 struct perf_probe_point point; /* Probe point */
69 int nargs; /* Number of arguments */
70 struct perf_probe_arg *args; /* Arguments */
71};
72
73
74/* Line number container */
75struct line_node {
76 struct list_head list;
77 int line;
78};
79
80/* Line range */
81struct line_range {
82 char *file; /* File name */
83 char *function; /* Function name */
84 int start; /* Start line number */
85 int end; /* End line number */
86 int offset; /* Start line offset */
87 char *path; /* Real path name */
88 struct list_head line_list; /* Visible lines */
89};
90
91/* Command string to events */
92extern int parse_perf_probe_command(const char *cmd,
93 struct perf_probe_event *pev);
94extern int parse_kprobe_trace_command(const char *cmd,
95 struct kprobe_trace_event *tev);
96
97/* Events to command string */
98extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
99extern char *synthesize_kprobe_trace_command(struct kprobe_trace_event *tev);
100extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf,
101 size_t len);
102
103/* Check the perf_probe_event needs debuginfo */
104extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
105
106/* Convert from kprobe_trace_event to perf_probe_event */
107extern int convert_to_perf_probe_event(struct kprobe_trace_event *tev,
108 struct perf_probe_event *pev);
109
110/* Release event contents */
111extern void clear_perf_probe_event(struct perf_probe_event *pev);
112extern void clear_kprobe_trace_event(struct kprobe_trace_event *tev);
113
114/* Command string to line-range */
115extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
116
117
118extern int add_perf_probe_events(struct perf_probe_event *pevs, int ntevs,
119 bool force_add);
120extern int del_perf_probe_events(struct strlist *dellist);
121extern int show_perf_probe_events(void);
122extern int show_line_range(struct line_range *lr);
123
20 124
21/* Maximum index number of event-name postfix */ 125/* Maximum index number of event-name postfix */
22#define MAX_EVENT_INDEX 1024 126#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index c171a243d05b..3e7977560be5 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -84,6 +84,9 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = {
84#define arch_regs_table x86_32_regs_table 84#define arch_regs_table x86_32_regs_table
85#endif 85#endif
86 86
87/* Kprobe tracer basic type is up to u64 */
88#define MAX_BASIC_TYPE_BITS 64
89
87/* Return architecture dependent register string (for kprobe-tracer) */ 90/* Return architecture dependent register string (for kprobe-tracer) */
88static const char *get_arch_regstr(unsigned int n) 91static const char *get_arch_regstr(unsigned int n)
89{ 92{
@@ -108,7 +111,7 @@ static int strtailcmp(const char *s1, const char *s2)
108/* Line number list operations */ 111/* Line number list operations */
109 112
110/* Add a line to line number list */ 113/* Add a line to line number list */
111static void line_list__add_line(struct list_head *head, unsigned int line) 114static int line_list__add_line(struct list_head *head, int line)
112{ 115{
113 struct line_node *ln; 116 struct line_node *ln;
114 struct list_head *p; 117 struct list_head *p;
@@ -119,21 +122,23 @@ static void line_list__add_line(struct list_head *head, unsigned int line)
119 p = &ln->list; 122 p = &ln->list;
120 goto found; 123 goto found;
121 } else if (ln->line == line) /* Already exist */ 124 } else if (ln->line == line) /* Already exist */
122 return ; 125 return 1;
123 } 126 }
124 /* List is empty, or the smallest entry */ 127 /* List is empty, or the smallest entry */
125 p = head; 128 p = head;
126found: 129found:
127 pr_debug("line list: add a line %u\n", line); 130 pr_debug("line list: add a line %u\n", line);
128 ln = zalloc(sizeof(struct line_node)); 131 ln = zalloc(sizeof(struct line_node));
129 DIE_IF(ln == NULL); 132 if (ln == NULL)
133 return -ENOMEM;
130 ln->line = line; 134 ln->line = line;
131 INIT_LIST_HEAD(&ln->list); 135 INIT_LIST_HEAD(&ln->list);
132 list_add(&ln->list, p); 136 list_add(&ln->list, p);
137 return 0;
133} 138}
134 139
135/* Check if the line in line number list */ 140/* Check if the line in line number list */
136static int line_list__has_line(struct list_head *head, unsigned int line) 141static int line_list__has_line(struct list_head *head, int line)
137{ 142{
138 struct line_node *ln; 143 struct line_node *ln;
139 144
@@ -184,9 +189,129 @@ static const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
184 if (strtailcmp(src, fname) == 0) 189 if (strtailcmp(src, fname) == 0)
185 break; 190 break;
186 } 191 }
192 if (i == nfiles)
193 return NULL;
187 return src; 194 return src;
188} 195}
189 196
197/* Compare diename and tname */
198static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
199{
200 const char *name;
201 name = dwarf_diename(dw_die);
202 return name ? strcmp(tname, name) : -1;
203}
204
205/* Get type die, but skip qualifiers and typedef */
206static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
207{
208 Dwarf_Attribute attr;
209 int tag;
210
211 do {
212 if (dwarf_attr(vr_die, DW_AT_type, &attr) == NULL ||
213 dwarf_formref_die(&attr, die_mem) == NULL)
214 return NULL;
215
216 tag = dwarf_tag(die_mem);
217 vr_die = die_mem;
218 } while (tag == DW_TAG_const_type ||
219 tag == DW_TAG_restrict_type ||
220 tag == DW_TAG_volatile_type ||
221 tag == DW_TAG_shared_type ||
222 tag == DW_TAG_typedef);
223
224 return die_mem;
225}
226
227static bool die_is_signed_type(Dwarf_Die *tp_die)
228{
229 Dwarf_Attribute attr;
230 Dwarf_Word ret;
231
232 if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL ||
233 dwarf_formudata(&attr, &ret) != 0)
234 return false;
235
236 return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
237 ret == DW_ATE_signed_fixed);
238}
239
240static int die_get_byte_size(Dwarf_Die *tp_die)
241{
242 Dwarf_Attribute attr;
243 Dwarf_Word ret;
244
245 if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL ||
246 dwarf_formudata(&attr, &ret) != 0)
247 return 0;
248
249 return (int)ret;
250}
251
252/* Get data_member_location offset */
253static int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
254{
255 Dwarf_Attribute attr;
256 Dwarf_Op *expr;
257 size_t nexpr;
258 int ret;
259
260 if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
261 return -ENOENT;
262
263 if (dwarf_formudata(&attr, offs) != 0) {
264 /* DW_AT_data_member_location should be DW_OP_plus_uconst */
265 ret = dwarf_getlocation(&attr, &expr, &nexpr);
266 if (ret < 0 || nexpr == 0)
267 return -ENOENT;
268
269 if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
270 pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
271 expr[0].atom, nexpr);
272 return -ENOTSUP;
273 }
274 *offs = (Dwarf_Word)expr[0].number;
275 }
276 return 0;
277}
278
279/* Return values for die_find callbacks */
280enum {
281 DIE_FIND_CB_FOUND = 0, /* End of Search */
282 DIE_FIND_CB_CHILD = 1, /* Search only children */
283 DIE_FIND_CB_SIBLING = 2, /* Search only siblings */
284 DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */
285};
286
287/* Search a child die */
288static Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
289 int (*callback)(Dwarf_Die *, void *),
290 void *data, Dwarf_Die *die_mem)
291{
292 Dwarf_Die child_die;
293 int ret;
294
295 ret = dwarf_child(rt_die, die_mem);
296 if (ret != 0)
297 return NULL;
298
299 do {
300 ret = callback(die_mem, data);
301 if (ret == DIE_FIND_CB_FOUND)
302 return die_mem;
303
304 if ((ret & DIE_FIND_CB_CHILD) &&
305 die_find_child(die_mem, callback, data, &child_die)) {
306 memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
307 return die_mem;
308 }
309 } while ((ret & DIE_FIND_CB_SIBLING) &&
310 dwarf_siblingof(die_mem, die_mem) == 0);
311
312 return NULL;
313}
314
190struct __addr_die_search_param { 315struct __addr_die_search_param {
191 Dwarf_Addr addr; 316 Dwarf_Addr addr;
192 Dwarf_Die *die_mem; 317 Dwarf_Die *die_mem;
@@ -205,8 +330,8 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
205} 330}
206 331
207/* Search a real subprogram including this line, */ 332/* Search a real subprogram including this line, */
208static Dwarf_Die *die_get_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr, 333static Dwarf_Die *die_find_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr,
209 Dwarf_Die *die_mem) 334 Dwarf_Die *die_mem)
210{ 335{
211 struct __addr_die_search_param ad; 336 struct __addr_die_search_param ad;
212 ad.addr = addr; 337 ad.addr = addr;
@@ -218,77 +343,64 @@ static Dwarf_Die *die_get_real_subprogram(Dwarf_Die *cu_die, Dwarf_Addr addr,
218 return die_mem; 343 return die_mem;
219} 344}
220 345
221/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */ 346/* die_find callback for inline function search */
222static Dwarf_Die *die_get_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, 347static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
223 Dwarf_Die *die_mem)
224{ 348{
225 Dwarf_Die child_die; 349 Dwarf_Addr *addr = data;
226 int ret;
227 350
228 ret = dwarf_child(sp_die, die_mem); 351 if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
229 if (ret != 0) 352 dwarf_haspc(die_mem, *addr))
230 return NULL; 353 return DIE_FIND_CB_FOUND;
231 354
232 do { 355 return DIE_FIND_CB_CONTINUE;
233 if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
234 dwarf_haspc(die_mem, addr))
235 return die_mem;
236
237 if (die_get_inlinefunc(die_mem, addr, &child_die)) {
238 memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
239 return die_mem;
240 }
241 } while (dwarf_siblingof(die_mem, die_mem) == 0);
242
243 return NULL;
244} 356}
245 357
246/* Compare diename and tname */ 358/* Similar to dwarf_getfuncs, but returns inlined_subroutine if exists. */
247static bool die_compare_name(Dwarf_Die *dw_die, const char *tname) 359static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
360 Dwarf_Die *die_mem)
248{ 361{
249 const char *name; 362 return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
250 name = dwarf_diename(dw_die);
251 DIE_IF(name == NULL);
252 return strcmp(tname, name);
253} 363}
254 364
255/* Get entry pc(or low pc, 1st entry of ranges) of the die */ 365static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
256static Dwarf_Addr die_get_entrypc(Dwarf_Die *dw_die)
257{ 366{
258 Dwarf_Addr epc; 367 const char *name = data;
259 int ret; 368 int tag;
260 369
261 ret = dwarf_entrypc(dw_die, &epc); 370 tag = dwarf_tag(die_mem);
262 DIE_IF(ret == -1); 371 if ((tag == DW_TAG_formal_parameter ||
263 return epc; 372 tag == DW_TAG_variable) &&
373 (die_compare_name(die_mem, name) == 0))
374 return DIE_FIND_CB_FOUND;
375
376 return DIE_FIND_CB_CONTINUE;
264} 377}
265 378
266/* Get a variable die */ 379/* Find a variable called 'name' */
267static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name, 380static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name,
268 Dwarf_Die *die_mem) 381 Dwarf_Die *die_mem)
269{ 382{
270 Dwarf_Die child_die; 383 return die_find_child(sp_die, __die_find_variable_cb, (void *)name,
271 int tag; 384 die_mem);
272 int ret; 385}
273 386
274 ret = dwarf_child(sp_die, die_mem); 387static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
275 if (ret != 0) 388{
276 return NULL; 389 const char *name = data;
277 390
278 do { 391 if ((dwarf_tag(die_mem) == DW_TAG_member) &&
279 tag = dwarf_tag(die_mem); 392 (die_compare_name(die_mem, name) == 0))
280 if ((tag == DW_TAG_formal_parameter || 393 return DIE_FIND_CB_FOUND;
281 tag == DW_TAG_variable) &&
282 (die_compare_name(die_mem, name) == 0))
283 return die_mem;
284 394
285 if (die_find_variable(die_mem, name, &child_die)) { 395 return DIE_FIND_CB_SIBLING;
286 memcpy(die_mem, &child_die, sizeof(Dwarf_Die)); 396}
287 return die_mem;
288 }
289 } while (dwarf_siblingof(die_mem, die_mem) == 0);
290 397
291 return NULL; 398/* Find a member called 'name' */
399static Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
400 Dwarf_Die *die_mem)
401{
402 return die_find_child(st_die, __die_find_member_cb, (void *)name,
403 die_mem);
292} 404}
293 405
294/* 406/*
@@ -296,19 +408,22 @@ static Dwarf_Die *die_find_variable(Dwarf_Die *sp_die, const char *name,
296 */ 408 */
297 409
298/* Show a location */ 410/* Show a location */
299static void show_location(Dwarf_Op *op, struct probe_finder *pf) 411static int convert_location(Dwarf_Op *op, struct probe_finder *pf)
300{ 412{
301 unsigned int regn; 413 unsigned int regn;
302 Dwarf_Word offs = 0; 414 Dwarf_Word offs = 0;
303 int deref = 0, ret; 415 bool ref = false;
304 const char *regs; 416 const char *regs;
417 struct kprobe_trace_arg *tvar = pf->tvar;
305 418
306 /* TODO: support CFA */
307 /* If this is based on frame buffer, set the offset */ 419 /* If this is based on frame buffer, set the offset */
308 if (op->atom == DW_OP_fbreg) { 420 if (op->atom == DW_OP_fbreg) {
309 if (pf->fb_ops == NULL) 421 if (pf->fb_ops == NULL) {
310 die("The attribute of frame base is not supported.\n"); 422 pr_warning("The attribute of frame base is not "
311 deref = 1; 423 "supported.\n");
424 return -ENOTSUP;
425 }
426 ref = true;
312 offs = op->number; 427 offs = op->number;
313 op = &pf->fb_ops[0]; 428 op = &pf->fb_ops[0];
314 } 429 }
@@ -316,35 +431,164 @@ static void show_location(Dwarf_Op *op, struct probe_finder *pf)
316 if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) { 431 if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
317 regn = op->atom - DW_OP_breg0; 432 regn = op->atom - DW_OP_breg0;
318 offs += op->number; 433 offs += op->number;
319 deref = 1; 434 ref = true;
320 } else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) { 435 } else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
321 regn = op->atom - DW_OP_reg0; 436 regn = op->atom - DW_OP_reg0;
322 } else if (op->atom == DW_OP_bregx) { 437 } else if (op->atom == DW_OP_bregx) {
323 regn = op->number; 438 regn = op->number;
324 offs += op->number2; 439 offs += op->number2;
325 deref = 1; 440 ref = true;
326 } else if (op->atom == DW_OP_regx) { 441 } else if (op->atom == DW_OP_regx) {
327 regn = op->number; 442 regn = op->number;
328 } else 443 } else {
329 die("DW_OP %d is not supported.", op->atom); 444 pr_warning("DW_OP %x is not supported.\n", op->atom);
445 return -ENOTSUP;
446 }
330 447
331 regs = get_arch_regstr(regn); 448 regs = get_arch_regstr(regn);
332 if (!regs) 449 if (!regs) {
333 die("%u exceeds max register number.", regn); 450 pr_warning("%u exceeds max register number.\n", regn);
451 return -ERANGE;
452 }
453
454 tvar->value = strdup(regs);
455 if (tvar->value == NULL)
456 return -ENOMEM;
457
458 if (ref) {
459 tvar->ref = zalloc(sizeof(struct kprobe_trace_arg_ref));
460 if (tvar->ref == NULL)
461 return -ENOMEM;
462 tvar->ref->offset = (long)offs;
463 }
464 return 0;
465}
466
467static int convert_variable_type(Dwarf_Die *vr_die,
468 struct kprobe_trace_arg *targ)
469{
470 Dwarf_Die type;
471 char buf[16];
472 int ret;
473
474 if (die_get_real_type(vr_die, &type) == NULL) {
475 pr_warning("Failed to get a type information of %s.\n",
476 dwarf_diename(vr_die));
477 return -ENOENT;
478 }
479
480 ret = die_get_byte_size(&type) * 8;
481 if (ret) {
482 /* Check the bitwidth */
483 if (ret > MAX_BASIC_TYPE_BITS) {
484 pr_info("%s exceeds max-bitwidth."
485 " Cut down to %d bits.\n",
486 dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
487 ret = MAX_BASIC_TYPE_BITS;
488 }
489
490 ret = snprintf(buf, 16, "%c%d",
491 die_is_signed_type(&type) ? 's' : 'u', ret);
492 if (ret < 0 || ret >= 16) {
493 if (ret >= 16)
494 ret = -E2BIG;
495 pr_warning("Failed to convert variable type: %s\n",
496 strerror(-ret));
497 return ret;
498 }
499 targ->type = strdup(buf);
500 if (targ->type == NULL)
501 return -ENOMEM;
502 }
503 return 0;
504}
505
506static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
507 struct perf_probe_arg_field *field,
508 struct kprobe_trace_arg_ref **ref_ptr,
509 Dwarf_Die *die_mem)
510{
511 struct kprobe_trace_arg_ref *ref = *ref_ptr;
512 Dwarf_Die type;
513 Dwarf_Word offs;
514 int ret;
515
516 pr_debug("converting %s in %s\n", field->name, varname);
517 if (die_get_real_type(vr_die, &type) == NULL) {
518 pr_warning("Failed to get the type of %s.\n", varname);
519 return -ENOENT;
520 }
334 521
335 if (deref) 522 /* Check the pointer and dereference */
336 ret = snprintf(pf->buf, pf->len, " %s=%+jd(%s)", 523 if (dwarf_tag(&type) == DW_TAG_pointer_type) {
337 pf->var, (intmax_t)offs, regs); 524 if (!field->ref) {
525 pr_err("Semantic error: %s must be referred by '->'\n",
526 field->name);
527 return -EINVAL;
528 }
529 /* Get the type pointed by this pointer */
530 if (die_get_real_type(&type, &type) == NULL) {
531 pr_warning("Failed to get the type of %s.\n", varname);
532 return -ENOENT;
533 }
534 /* Verify it is a data structure */
535 if (dwarf_tag(&type) != DW_TAG_structure_type) {
536 pr_warning("%s is not a data structure.\n", varname);
537 return -EINVAL;
538 }
539
540 ref = zalloc(sizeof(struct kprobe_trace_arg_ref));
541 if (ref == NULL)
542 return -ENOMEM;
543 if (*ref_ptr)
544 (*ref_ptr)->next = ref;
545 else
546 *ref_ptr = ref;
547 } else {
548 /* Verify it is a data structure */
549 if (dwarf_tag(&type) != DW_TAG_structure_type) {
550 pr_warning("%s is not a data structure.\n", varname);
551 return -EINVAL;
552 }
553 if (field->ref) {
554 pr_err("Semantic error: %s must be referred by '.'\n",
555 field->name);
556 return -EINVAL;
557 }
558 if (!ref) {
559 pr_warning("Structure on a register is not "
560 "supported yet.\n");
561 return -ENOTSUP;
562 }
563 }
564
565 if (die_find_member(&type, field->name, die_mem) == NULL) {
566 pr_warning("%s(tyep:%s) has no member %s.\n", varname,
567 dwarf_diename(&type), field->name);
568 return -EINVAL;
569 }
570
571 /* Get the offset of the field */
572 ret = die_get_data_member_location(die_mem, &offs);
573 if (ret < 0) {
574 pr_warning("Failed to get the offset of %s.\n", field->name);
575 return ret;
576 }
577 ref->offset += (long)offs;
578
579 /* Converting next field */
580 if (field->next)
581 return convert_variable_fields(die_mem, field->name,
582 field->next, &ref, die_mem);
338 else 583 else
339 ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); 584 return 0;
340 DIE_IF(ret < 0);
341 DIE_IF(ret >= pf->len);
342} 585}
343 586
344/* Show a variables in kprobe event format */ 587/* Show a variables in kprobe event format */
345static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf) 588static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
346{ 589{
347 Dwarf_Attribute attr; 590 Dwarf_Attribute attr;
591 Dwarf_Die die_mem;
348 Dwarf_Op *expr; 592 Dwarf_Op *expr;
349 size_t nexpr; 593 size_t nexpr;
350 int ret; 594 int ret;
@@ -356,142 +600,190 @@ static void show_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
356 if (ret <= 0 || nexpr == 0) 600 if (ret <= 0 || nexpr == 0)
357 goto error; 601 goto error;
358 602
359 show_location(expr, pf); 603 ret = convert_location(expr, pf);
604 if (ret == 0 && pf->pvar->field) {
605 ret = convert_variable_fields(vr_die, pf->pvar->var,
606 pf->pvar->field, &pf->tvar->ref,
607 &die_mem);
608 vr_die = &die_mem;
609 }
610 if (ret == 0) {
611 if (pf->pvar->type) {
612 pf->tvar->type = strdup(pf->pvar->type);
613 if (pf->tvar->type == NULL)
614 ret = -ENOMEM;
615 } else
616 ret = convert_variable_type(vr_die, pf->tvar);
617 }
360 /* *expr will be cached in libdw. Don't free it. */ 618 /* *expr will be cached in libdw. Don't free it. */
361 return ; 619 return ret;
362error: 620error:
363 /* TODO: Support const_value */ 621 /* TODO: Support const_value */
364 die("Failed to find the location of %s at this address.\n" 622 pr_err("Failed to find the location of %s at this address.\n"
365 " Perhaps, it has been optimized out.", pf->var); 623 " Perhaps, it has been optimized out.\n", pf->pvar->var);
624 return -ENOENT;
366} 625}
367 626
368/* Find a variable in a subprogram die */ 627/* Find a variable in a subprogram die */
369static void find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) 628static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf)
370{ 629{
371 int ret;
372 Dwarf_Die vr_die; 630 Dwarf_Die vr_die;
631 char buf[32], *ptr;
632 int ret;
373 633
374 /* TODO: Support struct members and arrays */ 634 /* TODO: Support arrays */
375 if (!is_c_varname(pf->var)) { 635 if (pf->pvar->name)
376 /* Output raw parameters */ 636 pf->tvar->name = strdup(pf->pvar->name);
377 ret = snprintf(pf->buf, pf->len, " %s", pf->var); 637 else {
378 DIE_IF(ret < 0); 638 ret = synthesize_perf_probe_arg(pf->pvar, buf, 32);
379 DIE_IF(ret >= pf->len); 639 if (ret < 0)
380 return ; 640 return ret;
641 ptr = strchr(buf, ':'); /* Change type separator to _ */
642 if (ptr)
643 *ptr = '_';
644 pf->tvar->name = strdup(buf);
645 }
646 if (pf->tvar->name == NULL)
647 return -ENOMEM;
648
649 if (!is_c_varname(pf->pvar->var)) {
650 /* Copy raw parameters */
651 pf->tvar->value = strdup(pf->pvar->var);
652 if (pf->tvar->value == NULL)
653 return -ENOMEM;
654 else
655 return 0;
381 } 656 }
382 657
383 pr_debug("Searching '%s' variable in context.\n", pf->var); 658 pr_debug("Searching '%s' variable in context.\n",
659 pf->pvar->var);
384 /* Search child die for local variables and parameters. */ 660 /* Search child die for local variables and parameters. */
385 if (!die_find_variable(sp_die, pf->var, &vr_die)) 661 if (!die_find_variable(sp_die, pf->pvar->var, &vr_die)) {
386 die("Failed to find '%s' in this function.", pf->var); 662 pr_warning("Failed to find '%s' in this function.\n",
387 663 pf->pvar->var);
388 show_variable(&vr_die, pf); 664 return -ENOENT;
665 }
666 return convert_variable(&vr_die, pf);
389} 667}
390 668
391/* Show a probe point to output buffer */ 669/* Show a probe point to output buffer */
392static void show_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf) 670static int convert_probe_point(Dwarf_Die *sp_die, struct probe_finder *pf)
393{ 671{
394 struct probe_point *pp = pf->pp; 672 struct kprobe_trace_event *tev;
395 Dwarf_Addr eaddr; 673 Dwarf_Addr eaddr;
396 Dwarf_Die die_mem; 674 Dwarf_Die die_mem;
397 const char *name; 675 const char *name;
398 char tmp[MAX_PROBE_BUFFER]; 676 int ret, i;
399 int ret, i, len;
400 Dwarf_Attribute fb_attr; 677 Dwarf_Attribute fb_attr;
401 size_t nops; 678 size_t nops;
402 679
680 if (pf->ntevs == MAX_PROBES) {
681 pr_warning("Too many( > %d) probe point found.\n", MAX_PROBES);
682 return -ERANGE;
683 }
684 tev = &pf->tevs[pf->ntevs++];
685
403 /* If no real subprogram, find a real one */ 686 /* If no real subprogram, find a real one */
404 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { 687 if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) {
405 sp_die = die_get_real_subprogram(&pf->cu_die, 688 sp_die = die_find_real_subprogram(&pf->cu_die,
406 pf->addr, &die_mem); 689 pf->addr, &die_mem);
407 if (!sp_die) 690 if (!sp_die) {
408 die("Probe point is not found in subprograms."); 691 pr_warning("Failed to find probe point in any "
692 "functions.\n");
693 return -ENOENT;
694 }
409 } 695 }
410 696
411 /* Output name of probe point */ 697 /* Copy the name of probe point */
412 name = dwarf_diename(sp_die); 698 name = dwarf_diename(sp_die);
413 if (name) { 699 if (name) {
414 dwarf_entrypc(sp_die, &eaddr); 700 if (dwarf_entrypc(sp_die, &eaddr) != 0) {
415 ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%lu", name, 701 pr_warning("Failed to get entry pc of %s\n",
416 (unsigned long)(pf->addr - eaddr)); 702 dwarf_diename(sp_die));
417 /* Copy the function name if possible */ 703 return -ENOENT;
418 if (!pp->function) {
419 pp->function = strdup(name);
420 pp->offset = (size_t)(pf->addr - eaddr);
421 } 704 }
422 } else { 705 tev->point.symbol = strdup(name);
706 if (tev->point.symbol == NULL)
707 return -ENOMEM;
708 tev->point.offset = (unsigned long)(pf->addr - eaddr);
709 } else
423 /* This function has no name. */ 710 /* This function has no name. */
424 ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%jx", 711 tev->point.offset = (unsigned long)pf->addr;
425 (uintmax_t)pf->addr); 712
426 if (!pp->function) { 713 pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
427 /* TODO: Use _stext */ 714 tev->point.offset);
428 pp->function = strdup("");
429 pp->offset = (size_t)pf->addr;
430 }
431 }
432 DIE_IF(ret < 0);
433 DIE_IF(ret >= MAX_PROBE_BUFFER);
434 len = ret;
435 pr_debug("Probe point found: %s\n", tmp);
436 715
437 /* Get the frame base attribute/ops */ 716 /* Get the frame base attribute/ops */
438 dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); 717 dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr);
439 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); 718 ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
440 if (ret <= 0 || nops == 0) 719 if (ret <= 0 || nops == 0) {
441 pf->fb_ops = NULL; 720 pf->fb_ops = NULL;
721 } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
722 pf->cfi != NULL) {
723 Dwarf_Frame *frame;
724 if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
725 dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
726 pr_warning("Failed to get CFA on 0x%jx\n",
727 (uintmax_t)pf->addr);
728 return -ENOENT;
729 }
730 }
442 731
443 /* Find each argument */ 732 /* Find each argument */
444 /* TODO: use dwarf_cfi_addrframe */ 733 tev->nargs = pf->pev->nargs;
445 for (i = 0; i < pp->nr_args; i++) { 734 tev->args = zalloc(sizeof(struct kprobe_trace_arg) * tev->nargs);
446 pf->var = pp->args[i]; 735 if (tev->args == NULL)
447 pf->buf = &tmp[len]; 736 return -ENOMEM;
448 pf->len = MAX_PROBE_BUFFER - len; 737 for (i = 0; i < pf->pev->nargs; i++) {
449 find_variable(sp_die, pf); 738 pf->pvar = &pf->pev->args[i];
450 len += strlen(pf->buf); 739 pf->tvar = &tev->args[i];
740 ret = find_variable(sp_die, pf);
741 if (ret != 0)
742 return ret;
451 } 743 }
452 744
453 /* *pf->fb_ops will be cached in libdw. Don't free it. */ 745 /* *pf->fb_ops will be cached in libdw. Don't free it. */
454 pf->fb_ops = NULL; 746 pf->fb_ops = NULL;
455 747 return 0;
456 if (pp->found == MAX_PROBES)
457 die("Too many( > %d) probe point found.\n", MAX_PROBES);
458
459 pp->probes[pp->found] = strdup(tmp);
460 pp->found++;
461} 748}
462 749
463/* Find probe point from its line number */ 750/* Find probe point from its line number */
464static void find_probe_point_by_line(struct probe_finder *pf) 751static int find_probe_point_by_line(struct probe_finder *pf)
465{ 752{
466 Dwarf_Lines *lines; 753 Dwarf_Lines *lines;
467 Dwarf_Line *line; 754 Dwarf_Line *line;
468 size_t nlines, i; 755 size_t nlines, i;
469 Dwarf_Addr addr; 756 Dwarf_Addr addr;
470 int lineno; 757 int lineno;
471 int ret; 758 int ret = 0;
472 759
473 ret = dwarf_getsrclines(&pf->cu_die, &lines, &nlines); 760 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
474 DIE_IF(ret != 0); 761 pr_warning("No source lines found in this CU.\n");
762 return -ENOENT;
763 }
475 764
476 for (i = 0; i < nlines; i++) { 765 for (i = 0; i < nlines && ret == 0; i++) {
477 line = dwarf_onesrcline(lines, i); 766 line = dwarf_onesrcline(lines, i);
478 dwarf_lineno(line, &lineno); 767 if (dwarf_lineno(line, &lineno) != 0 ||
479 if (lineno != pf->lno) 768 lineno != pf->lno)
480 continue; 769 continue;
481 770
482 /* TODO: Get fileno from line, but how? */ 771 /* TODO: Get fileno from line, but how? */
483 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) 772 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
484 continue; 773 continue;
485 774
486 ret = dwarf_lineaddr(line, &addr); 775 if (dwarf_lineaddr(line, &addr) != 0) {
487 DIE_IF(ret != 0); 776 pr_warning("Failed to get the address of the line.\n");
777 return -ENOENT;
778 }
488 pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n", 779 pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
489 (int)i, lineno, (uintmax_t)addr); 780 (int)i, lineno, (uintmax_t)addr);
490 pf->addr = addr; 781 pf->addr = addr;
491 782
492 show_probe_point(NULL, pf); 783 ret = convert_probe_point(NULL, pf);
493 /* Continuing, because target line might be inlined. */ 784 /* Continuing, because target line might be inlined. */
494 } 785 }
786 return ret;
495} 787}
496 788
497/* Find lines which match lazy pattern */ 789/* Find lines which match lazy pattern */
@@ -499,16 +791,27 @@ static int find_lazy_match_lines(struct list_head *head,
499 const char *fname, const char *pat) 791 const char *fname, const char *pat)
500{ 792{
501 char *fbuf, *p1, *p2; 793 char *fbuf, *p1, *p2;
502 int fd, line, nlines = 0; 794 int fd, ret, line, nlines = 0;
503 struct stat st; 795 struct stat st;
504 796
505 fd = open(fname, O_RDONLY); 797 fd = open(fname, O_RDONLY);
506 if (fd < 0) 798 if (fd < 0) {
507 die("failed to open %s", fname); 799 pr_warning("Failed to open %s: %s\n", fname, strerror(-fd));
508 DIE_IF(fstat(fd, &st) < 0); 800 return fd;
509 fbuf = malloc(st.st_size + 2); 801 }
510 DIE_IF(fbuf == NULL); 802
511 DIE_IF(read(fd, fbuf, st.st_size) < 0); 803 ret = fstat(fd, &st);
804 if (ret < 0) {
805 pr_warning("Failed to get the size of %s: %s\n",
806 fname, strerror(errno));
807 return ret;
808 }
809 fbuf = xmalloc(st.st_size + 2);
810 ret = read(fd, fbuf, st.st_size);
811 if (ret < 0) {
812 pr_warning("Failed to read %s: %s\n", fname, strerror(errno));
813 return ret;
814 }
512 close(fd); 815 close(fd);
513 fbuf[st.st_size] = '\n'; /* Dummy line */ 816 fbuf[st.st_size] = '\n'; /* Dummy line */
514 fbuf[st.st_size + 1] = '\0'; 817 fbuf[st.st_size + 1] = '\0';
@@ -528,7 +831,7 @@ static int find_lazy_match_lines(struct list_head *head,
528} 831}
529 832
530/* Find probe points from lazy pattern */ 833/* Find probe points from lazy pattern */
531static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) 834static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
532{ 835{
533 Dwarf_Lines *lines; 836 Dwarf_Lines *lines;
534 Dwarf_Line *line; 837 Dwarf_Line *line;
@@ -536,37 +839,46 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
536 Dwarf_Addr addr; 839 Dwarf_Addr addr;
537 Dwarf_Die die_mem; 840 Dwarf_Die die_mem;
538 int lineno; 841 int lineno;
539 int ret; 842 int ret = 0;
540 843
541 if (list_empty(&pf->lcache)) { 844 if (list_empty(&pf->lcache)) {
542 /* Matching lazy line pattern */ 845 /* Matching lazy line pattern */
543 ret = find_lazy_match_lines(&pf->lcache, pf->fname, 846 ret = find_lazy_match_lines(&pf->lcache, pf->fname,
544 pf->pp->lazy_line); 847 pf->pev->point.lazy_line);
545 if (ret <= 0) 848 if (ret == 0) {
546 die("No matched lines found in %s.", pf->fname); 849 pr_debug("No matched lines found in %s.\n", pf->fname);
850 return 0;
851 } else if (ret < 0)
852 return ret;
547 } 853 }
548 854
549 ret = dwarf_getsrclines(&pf->cu_die, &lines, &nlines); 855 if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
550 DIE_IF(ret != 0); 856 pr_warning("No source lines found in this CU.\n");
551 for (i = 0; i < nlines; i++) { 857 return -ENOENT;
858 }
859
860 for (i = 0; i < nlines && ret >= 0; i++) {
552 line = dwarf_onesrcline(lines, i); 861 line = dwarf_onesrcline(lines, i);
553 862
554 dwarf_lineno(line, &lineno); 863 if (dwarf_lineno(line, &lineno) != 0 ||
555 if (!line_list__has_line(&pf->lcache, lineno)) 864 !line_list__has_line(&pf->lcache, lineno))
556 continue; 865 continue;
557 866
558 /* TODO: Get fileno from line, but how? */ 867 /* TODO: Get fileno from line, but how? */
559 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0) 868 if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
560 continue; 869 continue;
561 870
562 ret = dwarf_lineaddr(line, &addr); 871 if (dwarf_lineaddr(line, &addr) != 0) {
563 DIE_IF(ret != 0); 872 pr_debug("Failed to get the address of line %d.\n",
873 lineno);
874 continue;
875 }
564 if (sp_die) { 876 if (sp_die) {
565 /* Address filtering 1: does sp_die include addr? */ 877 /* Address filtering 1: does sp_die include addr? */
566 if (!dwarf_haspc(sp_die, addr)) 878 if (!dwarf_haspc(sp_die, addr))
567 continue; 879 continue;
568 /* Address filtering 2: No child include addr? */ 880 /* Address filtering 2: No child include addr? */
569 if (die_get_inlinefunc(sp_die, addr, &die_mem)) 881 if (die_find_inlinefunc(sp_die, addr, &die_mem))
570 continue; 882 continue;
571 } 883 }
572 884
@@ -574,27 +886,42 @@ static void find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
574 (int)i, lineno, (unsigned long long)addr); 886 (int)i, lineno, (unsigned long long)addr);
575 pf->addr = addr; 887 pf->addr = addr;
576 888
577 show_probe_point(sp_die, pf); 889 ret = convert_probe_point(sp_die, pf);
578 /* Continuing, because target line might be inlined. */ 890 /* Continuing, because target line might be inlined. */
579 } 891 }
580 /* TODO: deallocate lines, but how? */ 892 /* TODO: deallocate lines, but how? */
893 return ret;
581} 894}
582 895
896/* Callback parameter with return value */
897struct dwarf_callback_param {
898 void *data;
899 int retval;
900};
901
583static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) 902static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
584{ 903{
585 struct probe_finder *pf = (struct probe_finder *)data; 904 struct dwarf_callback_param *param = data;
586 struct probe_point *pp = pf->pp; 905 struct probe_finder *pf = param->data;
906 struct perf_probe_point *pp = &pf->pev->point;
907 Dwarf_Addr addr;
587 908
588 if (pp->lazy_line) 909 if (pp->lazy_line)
589 find_probe_point_lazy(in_die, pf); 910 param->retval = find_probe_point_lazy(in_die, pf);
590 else { 911 else {
591 /* Get probe address */ 912 /* Get probe address */
592 pf->addr = die_get_entrypc(in_die); 913 if (dwarf_entrypc(in_die, &addr) != 0) {
914 pr_warning("Failed to get entry pc of %s.\n",
915 dwarf_diename(in_die));
916 param->retval = -ENOENT;
917 return DWARF_CB_ABORT;
918 }
919 pf->addr = addr;
593 pf->addr += pp->offset; 920 pf->addr += pp->offset;
594 pr_debug("found inline addr: 0x%jx\n", 921 pr_debug("found inline addr: 0x%jx\n",
595 (uintmax_t)pf->addr); 922 (uintmax_t)pf->addr);
596 923
597 show_probe_point(in_die, pf); 924 param->retval = convert_probe_point(in_die, pf);
598 } 925 }
599 926
600 return DWARF_CB_OK; 927 return DWARF_CB_OK;
@@ -603,59 +930,88 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
603/* Search function from function name */ 930/* Search function from function name */
604static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) 931static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
605{ 932{
606 struct probe_finder *pf = (struct probe_finder *)data; 933 struct dwarf_callback_param *param = data;
607 struct probe_point *pp = pf->pp; 934 struct probe_finder *pf = param->data;
935 struct perf_probe_point *pp = &pf->pev->point;
608 936
609 /* Check tag and diename */ 937 /* Check tag and diename */
610 if (dwarf_tag(sp_die) != DW_TAG_subprogram || 938 if (dwarf_tag(sp_die) != DW_TAG_subprogram ||
611 die_compare_name(sp_die, pp->function) != 0) 939 die_compare_name(sp_die, pp->function) != 0)
612 return 0; 940 return DWARF_CB_OK;
613 941
614 pf->fname = dwarf_decl_file(sp_die); 942 pf->fname = dwarf_decl_file(sp_die);
615 if (pp->line) { /* Function relative line */ 943 if (pp->line) { /* Function relative line */
616 dwarf_decl_line(sp_die, &pf->lno); 944 dwarf_decl_line(sp_die, &pf->lno);
617 pf->lno += pp->line; 945 pf->lno += pp->line;
618 find_probe_point_by_line(pf); 946 param->retval = find_probe_point_by_line(pf);
619 } else if (!dwarf_func_inline(sp_die)) { 947 } else if (!dwarf_func_inline(sp_die)) {
620 /* Real function */ 948 /* Real function */
621 if (pp->lazy_line) 949 if (pp->lazy_line)
622 find_probe_point_lazy(sp_die, pf); 950 param->retval = find_probe_point_lazy(sp_die, pf);
623 else { 951 else {
624 pf->addr = die_get_entrypc(sp_die); 952 if (dwarf_entrypc(sp_die, &pf->addr) != 0) {
953 pr_warning("Failed to get entry pc of %s.\n",
954 dwarf_diename(sp_die));
955 param->retval = -ENOENT;
956 return DWARF_CB_ABORT;
957 }
625 pf->addr += pp->offset; 958 pf->addr += pp->offset;
626 /* TODO: Check the address in this function */ 959 /* TODO: Check the address in this function */
627 show_probe_point(sp_die, pf); 960 param->retval = convert_probe_point(sp_die, pf);
628 } 961 }
629 } else 962 } else {
963 struct dwarf_callback_param _param = {.data = (void *)pf,
964 .retval = 0};
630 /* Inlined function: search instances */ 965 /* Inlined function: search instances */
631 dwarf_func_inline_instances(sp_die, probe_point_inline_cb, pf); 966 dwarf_func_inline_instances(sp_die, probe_point_inline_cb,
967 &_param);
968 param->retval = _param.retval;
969 }
632 970
633 return 1; /* Exit; no same symbol in this CU. */ 971 return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
634} 972}
635 973
636static void find_probe_point_by_func(struct probe_finder *pf) 974static int find_probe_point_by_func(struct probe_finder *pf)
637{ 975{
638 dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, pf, 0); 976 struct dwarf_callback_param _param = {.data = (void *)pf,
977 .retval = 0};
978 dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, &_param, 0);
979 return _param.retval;
639} 980}
640 981
641/* Find a probe point */ 982/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */
642int find_probe_point(int fd, struct probe_point *pp) 983int find_kprobe_trace_events(int fd, struct perf_probe_event *pev,
984 struct kprobe_trace_event **tevs)
643{ 985{
644 struct probe_finder pf = {.pp = pp}; 986 struct probe_finder pf = {.pev = pev};
987 struct perf_probe_point *pp = &pev->point;
645 Dwarf_Off off, noff; 988 Dwarf_Off off, noff;
646 size_t cuhl; 989 size_t cuhl;
647 Dwarf_Die *diep; 990 Dwarf_Die *diep;
648 Dwarf *dbg; 991 Dwarf *dbg;
992 int ret = 0;
993
994 pf.tevs = zalloc(sizeof(struct kprobe_trace_event) * MAX_PROBES);
995 if (pf.tevs == NULL)
996 return -ENOMEM;
997 *tevs = pf.tevs;
998 pf.ntevs = 0;
649 999
650 dbg = dwarf_begin(fd, DWARF_C_READ); 1000 dbg = dwarf_begin(fd, DWARF_C_READ);
651 if (!dbg) 1001 if (!dbg) {
652 return -ENOENT; 1002 pr_warning("No dwarf info found in the vmlinux - "
1003 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1004 return -EBADF;
1005 }
1006
1007 /* Get the call frame information from this dwarf */
1008 pf.cfi = dwarf_getcfi(dbg);
653 1009
654 pp->found = 0;
655 off = 0; 1010 off = 0;
656 line_list__init(&pf.lcache); 1011 line_list__init(&pf.lcache);
657 /* Loop on CUs (Compilation Unit) */ 1012 /* Loop on CUs (Compilation Unit) */
658 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL)) { 1013 while (!dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) &&
1014 ret >= 0) {
659 /* Get the DIE(Debugging Information Entry) of this CU */ 1015 /* Get the DIE(Debugging Information Entry) of this CU */
660 diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die); 1016 diep = dwarf_offdie(dbg, off + cuhl, &pf.cu_die);
661 if (!diep) 1017 if (!diep)
@@ -669,12 +1025,12 @@ int find_probe_point(int fd, struct probe_point *pp)
669 1025
670 if (!pp->file || pf.fname) { 1026 if (!pp->file || pf.fname) {
671 if (pp->function) 1027 if (pp->function)
672 find_probe_point_by_func(&pf); 1028 ret = find_probe_point_by_func(&pf);
673 else if (pp->lazy_line) 1029 else if (pp->lazy_line)
674 find_probe_point_lazy(NULL, &pf); 1030 ret = find_probe_point_lazy(NULL, &pf);
675 else { 1031 else {
676 pf.lno = pp->line; 1032 pf.lno = pp->line;
677 find_probe_point_by_line(&pf); 1033 ret = find_probe_point_by_line(&pf);
678 } 1034 }
679 } 1035 }
680 off = noff; 1036 off = noff;
@@ -682,41 +1038,167 @@ int find_probe_point(int fd, struct probe_point *pp)
682 line_list__free(&pf.lcache); 1038 line_list__free(&pf.lcache);
683 dwarf_end(dbg); 1039 dwarf_end(dbg);
684 1040
685 return pp->found; 1041 return (ret < 0) ? ret : pf.ntevs;
1042}
1043
1044/* Reverse search */
1045int find_perf_probe_point(int fd, unsigned long addr,
1046 struct perf_probe_point *ppt)
1047{
1048 Dwarf_Die cudie, spdie, indie;
1049 Dwarf *dbg;
1050 Dwarf_Line *line;
1051 Dwarf_Addr laddr, eaddr;
1052 const char *tmp;
1053 int lineno, ret = 0;
1054 bool found = false;
1055
1056 dbg = dwarf_begin(fd, DWARF_C_READ);
1057 if (!dbg)
1058 return -EBADF;
1059
1060 /* Find cu die */
1061 if (!dwarf_addrdie(dbg, (Dwarf_Addr)addr, &cudie)) {
1062 ret = -EINVAL;
1063 goto end;
1064 }
1065
1066 /* Find a corresponding line */
1067 line = dwarf_getsrc_die(&cudie, (Dwarf_Addr)addr);
1068 if (line) {
1069 if (dwarf_lineaddr(line, &laddr) == 0 &&
1070 (Dwarf_Addr)addr == laddr &&
1071 dwarf_lineno(line, &lineno) == 0) {
1072 tmp = dwarf_linesrc(line, NULL, NULL);
1073 if (tmp) {
1074 ppt->line = lineno;
1075 ppt->file = strdup(tmp);
1076 if (ppt->file == NULL) {
1077 ret = -ENOMEM;
1078 goto end;
1079 }
1080 found = true;
1081 }
1082 }
1083 }
1084
1085 /* Find a corresponding function */
1086 if (die_find_real_subprogram(&cudie, (Dwarf_Addr)addr, &spdie)) {
1087 tmp = dwarf_diename(&spdie);
1088 if (!tmp || dwarf_entrypc(&spdie, &eaddr) != 0)
1089 goto end;
1090
1091 if (ppt->line) {
1092 if (die_find_inlinefunc(&spdie, (Dwarf_Addr)addr,
1093 &indie)) {
1094 /* addr in an inline function */
1095 tmp = dwarf_diename(&indie);
1096 if (!tmp)
1097 goto end;
1098 ret = dwarf_decl_line(&indie, &lineno);
1099 } else {
1100 if (eaddr == addr) { /* Function entry */
1101 lineno = ppt->line;
1102 ret = 0;
1103 } else
1104 ret = dwarf_decl_line(&spdie, &lineno);
1105 }
1106 if (ret == 0) {
1107 /* Make a relative line number */
1108 ppt->line -= lineno;
1109 goto found;
1110 }
1111 }
1112 /* We don't have a line number, let's use offset */
1113 ppt->offset = addr - (unsigned long)eaddr;
1114found:
1115 ppt->function = strdup(tmp);
1116 if (ppt->function == NULL) {
1117 ret = -ENOMEM;
1118 goto end;
1119 }
1120 found = true;
1121 }
1122
1123end:
1124 dwarf_end(dbg);
1125 if (ret >= 0)
1126 ret = found ? 1 : 0;
1127 return ret;
1128}
1129
1130/* Add a line and store the src path */
1131static int line_range_add_line(const char *src, unsigned int lineno,
1132 struct line_range *lr)
1133{
1134 /* Copy real path */
1135 if (!lr->path) {
1136 lr->path = strdup(src);
1137 if (lr->path == NULL)
1138 return -ENOMEM;
1139 }
1140 return line_list__add_line(&lr->line_list, lineno);
1141}
1142
1143/* Search function declaration lines */
1144static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data)
1145{
1146 struct dwarf_callback_param *param = data;
1147 struct line_finder *lf = param->data;
1148 const char *src;
1149 int lineno;
1150
1151 src = dwarf_decl_file(sp_die);
1152 if (src && strtailcmp(src, lf->fname) != 0)
1153 return DWARF_CB_OK;
1154
1155 if (dwarf_decl_line(sp_die, &lineno) != 0 ||
1156 (lf->lno_s > lineno || lf->lno_e < lineno))
1157 return DWARF_CB_OK;
1158
1159 param->retval = line_range_add_line(src, lineno, lf->lr);
1160 return DWARF_CB_OK;
1161}
1162
1163static int find_line_range_func_decl_lines(struct line_finder *lf)
1164{
1165 struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
1166 dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
1167 return param.retval;
686} 1168}
687 1169
688/* Find line range from its line number */ 1170/* Find line range from its line number */
689static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) 1171static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
690{ 1172{
691 Dwarf_Lines *lines; 1173 Dwarf_Lines *lines;
692 Dwarf_Line *line; 1174 Dwarf_Line *line;
693 size_t nlines, i; 1175 size_t nlines, i;
694 Dwarf_Addr addr; 1176 Dwarf_Addr addr;
695 int lineno; 1177 int lineno, ret = 0;
696 int ret;
697 const char *src; 1178 const char *src;
698 Dwarf_Die die_mem; 1179 Dwarf_Die die_mem;
699 1180
700 line_list__init(&lf->lr->line_list); 1181 line_list__init(&lf->lr->line_list);
701 ret = dwarf_getsrclines(&lf->cu_die, &lines, &nlines); 1182 if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
702 DIE_IF(ret != 0); 1183 pr_warning("No source lines found in this CU.\n");
1184 return -ENOENT;
1185 }
703 1186
1187 /* Search probable lines on lines list */
704 for (i = 0; i < nlines; i++) { 1188 for (i = 0; i < nlines; i++) {
705 line = dwarf_onesrcline(lines, i); 1189 line = dwarf_onesrcline(lines, i);
706 ret = dwarf_lineno(line, &lineno); 1190 if (dwarf_lineno(line, &lineno) != 0 ||
707 DIE_IF(ret != 0); 1191 (lf->lno_s > lineno || lf->lno_e < lineno))
708 if (lf->lno_s > lineno || lf->lno_e < lineno)
709 continue; 1192 continue;
710 1193
711 if (sp_die) { 1194 if (sp_die) {
712 /* Address filtering 1: does sp_die include addr? */ 1195 /* Address filtering 1: does sp_die include addr? */
713 ret = dwarf_lineaddr(line, &addr); 1196 if (dwarf_lineaddr(line, &addr) != 0 ||
714 DIE_IF(ret != 0); 1197 !dwarf_haspc(sp_die, addr))
715 if (!dwarf_haspc(sp_die, addr))
716 continue; 1198 continue;
717 1199
718 /* Address filtering 2: No child include addr? */ 1200 /* Address filtering 2: No child include addr? */
719 if (die_get_inlinefunc(sp_die, addr, &die_mem)) 1201 if (die_find_inlinefunc(sp_die, addr, &die_mem))
720 continue; 1202 continue;
721 } 1203 }
722 1204
@@ -725,30 +1207,49 @@ static void find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
725 if (strtailcmp(src, lf->fname) != 0) 1207 if (strtailcmp(src, lf->fname) != 0)
726 continue; 1208 continue;
727 1209
728 /* Copy real path */ 1210 ret = line_range_add_line(src, lineno, lf->lr);
729 if (!lf->lr->path) 1211 if (ret < 0)
730 lf->lr->path = strdup(src); 1212 return ret;
731 line_list__add_line(&lf->lr->line_list, (unsigned int)lineno);
732 } 1213 }
1214
1215 /*
1216 * Dwarf lines doesn't include function declarations. We have to
1217 * check functions list or given function.
1218 */
1219 if (sp_die) {
1220 src = dwarf_decl_file(sp_die);
1221 if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
1222 (lf->lno_s <= lineno && lf->lno_e >= lineno))
1223 ret = line_range_add_line(src, lineno, lf->lr);
1224 } else
1225 ret = find_line_range_func_decl_lines(lf);
1226
733 /* Update status */ 1227 /* Update status */
734 if (!list_empty(&lf->lr->line_list)) 1228 if (ret >= 0)
735 lf->found = 1; 1229 if (!list_empty(&lf->lr->line_list))
1230 ret = lf->found = 1;
1231 else
1232 ret = 0; /* Lines are not found */
736 else { 1233 else {
737 free(lf->lr->path); 1234 free(lf->lr->path);
738 lf->lr->path = NULL; 1235 lf->lr->path = NULL;
739 } 1236 }
1237 return ret;
740} 1238}
741 1239
742static int line_range_inline_cb(Dwarf_Die *in_die, void *data) 1240static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
743{ 1241{
744 find_line_range_by_line(in_die, (struct line_finder *)data); 1242 struct dwarf_callback_param *param = data;
1243
1244 param->retval = find_line_range_by_line(in_die, param->data);
745 return DWARF_CB_ABORT; /* No need to find other instances */ 1245 return DWARF_CB_ABORT; /* No need to find other instances */
746} 1246}
747 1247
748/* Search function from function name */ 1248/* Search function from function name */
749static int line_range_search_cb(Dwarf_Die *sp_die, void *data) 1249static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
750{ 1250{
751 struct line_finder *lf = (struct line_finder *)data; 1251 struct dwarf_callback_param *param = data;
1252 struct line_finder *lf = param->data;
752 struct line_range *lr = lf->lr; 1253 struct line_range *lr = lf->lr;
753 1254
754 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1255 if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
@@ -757,44 +1258,55 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
757 dwarf_decl_line(sp_die, &lr->offset); 1258 dwarf_decl_line(sp_die, &lr->offset);
758 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); 1259 pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
759 lf->lno_s = lr->offset + lr->start; 1260 lf->lno_s = lr->offset + lr->start;
760 if (!lr->end) 1261 if (lf->lno_s < 0) /* Overflow */
1262 lf->lno_s = INT_MAX;
1263 lf->lno_e = lr->offset + lr->end;
1264 if (lf->lno_e < 0) /* Overflow */
761 lf->lno_e = INT_MAX; 1265 lf->lno_e = INT_MAX;
762 else 1266 pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
763 lf->lno_e = lr->offset + lr->end;
764 lr->start = lf->lno_s; 1267 lr->start = lf->lno_s;
765 lr->end = lf->lno_e; 1268 lr->end = lf->lno_e;
766 if (dwarf_func_inline(sp_die)) 1269 if (dwarf_func_inline(sp_die)) {
1270 struct dwarf_callback_param _param;
1271 _param.data = (void *)lf;
1272 _param.retval = 0;
767 dwarf_func_inline_instances(sp_die, 1273 dwarf_func_inline_instances(sp_die,
768 line_range_inline_cb, lf); 1274 line_range_inline_cb,
769 else 1275 &_param);
770 find_line_range_by_line(sp_die, lf); 1276 param->retval = _param.retval;
771 return 1; 1277 } else
1278 param->retval = find_line_range_by_line(sp_die, lf);
1279 return DWARF_CB_ABORT;
772 } 1280 }
773 return 0; 1281 return DWARF_CB_OK;
774} 1282}
775 1283
776static void find_line_range_by_func(struct line_finder *lf) 1284static int find_line_range_by_func(struct line_finder *lf)
777{ 1285{
778 dwarf_getfuncs(&lf->cu_die, line_range_search_cb, lf, 0); 1286 struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
1287 dwarf_getfuncs(&lf->cu_die, line_range_search_cb, &param, 0);
1288 return param.retval;
779} 1289}
780 1290
781int find_line_range(int fd, struct line_range *lr) 1291int find_line_range(int fd, struct line_range *lr)
782{ 1292{
783 struct line_finder lf = {.lr = lr, .found = 0}; 1293 struct line_finder lf = {.lr = lr, .found = 0};
784 int ret; 1294 int ret = 0;
785 Dwarf_Off off = 0, noff; 1295 Dwarf_Off off = 0, noff;
786 size_t cuhl; 1296 size_t cuhl;
787 Dwarf_Die *diep; 1297 Dwarf_Die *diep;
788 Dwarf *dbg; 1298 Dwarf *dbg;
789 1299
790 dbg = dwarf_begin(fd, DWARF_C_READ); 1300 dbg = dwarf_begin(fd, DWARF_C_READ);
791 if (!dbg) 1301 if (!dbg) {
792 return -ENOENT; 1302 pr_warning("No dwarf info found in the vmlinux - "
1303 "please rebuild with CONFIG_DEBUG_INFO=y.\n");
1304 return -EBADF;
1305 }
793 1306
794 /* Loop on CUs (Compilation Unit) */ 1307 /* Loop on CUs (Compilation Unit) */
795 while (!lf.found) { 1308 while (!lf.found && ret >= 0) {
796 ret = dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL); 1309 if (dwarf_nextcu(dbg, off, &noff, &cuhl, NULL, NULL, NULL) != 0)
797 if (ret != 0)
798 break; 1310 break;
799 1311
800 /* Get the DIE(Debugging Information Entry) of this CU */ 1312 /* Get the DIE(Debugging Information Entry) of this CU */
@@ -810,20 +1322,18 @@ int find_line_range(int fd, struct line_range *lr)
810 1322
811 if (!lr->file || lf.fname) { 1323 if (!lr->file || lf.fname) {
812 if (lr->function) 1324 if (lr->function)
813 find_line_range_by_func(&lf); 1325 ret = find_line_range_by_func(&lf);
814 else { 1326 else {
815 lf.lno_s = lr->start; 1327 lf.lno_s = lr->start;
816 if (!lr->end) 1328 lf.lno_e = lr->end;
817 lf.lno_e = INT_MAX; 1329 ret = find_line_range_by_line(NULL, &lf);
818 else
819 lf.lno_e = lr->end;
820 find_line_range_by_line(NULL, &lf);
821 } 1330 }
822 } 1331 }
823 off = noff; 1332 off = noff;
824 } 1333 }
825 pr_debug("path: %lx\n", (unsigned long)lr->path); 1334 pr_debug("path: %lx\n", (unsigned long)lr->path);
826 dwarf_end(dbg); 1335 dwarf_end(dbg);
827 return lf.found; 1336
1337 return (ret < 0) ? ret : lf.found;
828} 1338}
829 1339
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 21f7354397b4..310ce897229c 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -3,6 +3,7 @@
3 3
4#include <stdbool.h> 4#include <stdbool.h>
5#include "util.h" 5#include "util.h"
6#include "probe-event.h"
6 7
7#define MAX_PATH_LEN 256 8#define MAX_PATH_LEN 256
8#define MAX_PROBE_BUFFER 1024 9#define MAX_PROBE_BUFFER 1024
@@ -14,67 +15,37 @@ static inline int is_c_varname(const char *name)
14 return isalpha(name[0]) || name[0] == '_'; 15 return isalpha(name[0]) || name[0] == '_';
15} 16}
16 17
17struct probe_point { 18#ifdef DWARF_SUPPORT
18 char *event; /* Event name */ 19/* Find kprobe_trace_events specified by perf_probe_event from debuginfo */
19 char *group; /* Event group */ 20extern int find_kprobe_trace_events(int fd, struct perf_probe_event *pev,
21 struct kprobe_trace_event **tevs);
20 22
21 /* Inputs */ 23/* Find a perf_probe_point from debuginfo */
22 char *file; /* File name */ 24extern int find_perf_probe_point(int fd, unsigned long addr,
23 int line; /* Line number */ 25 struct perf_probe_point *ppt);
24 char *lazy_line; /* Lazy line pattern */
25 26
26 char *function; /* Function name */
27 int offset; /* Offset bytes */
28
29 int nr_args; /* Number of arguments */
30 char **args; /* Arguments */
31
32 int retprobe; /* Return probe */
33
34 /* Output */
35 int found; /* Number of found probe points */
36 char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
37};
38
39/* Line number container */
40struct line_node {
41 struct list_head list;
42 unsigned int line;
43};
44
45/* Line range */
46struct line_range {
47 char *file; /* File name */
48 char *function; /* Function name */
49 unsigned int start; /* Start line number */
50 unsigned int end; /* End line number */
51 int offset; /* Start line offset */
52 char *path; /* Real path name */
53 struct list_head line_list; /* Visible lines */
54};
55
56#ifndef NO_DWARF_SUPPORT
57extern int find_probe_point(int fd, struct probe_point *pp);
58extern int find_line_range(int fd, struct line_range *lr); 27extern int find_line_range(int fd, struct line_range *lr);
59 28
60#include <dwarf.h> 29#include <dwarf.h>
61#include <libdw.h> 30#include <libdw.h>
62 31
63struct probe_finder { 32struct probe_finder {
64 struct probe_point *pp; /* Target probe point */ 33 struct perf_probe_event *pev; /* Target probe event */
34 struct kprobe_trace_event *tevs; /* Result trace events */
35 int ntevs; /* number of trace events */
65 36
66 /* For function searching */ 37 /* For function searching */
67 Dwarf_Addr addr; /* Address */
68 const char *fname; /* File name */
69 int lno; /* Line number */ 38 int lno; /* Line number */
39 Dwarf_Addr addr; /* Address */
40 const char *fname; /* Real file name */
70 Dwarf_Die cu_die; /* Current CU */ 41 Dwarf_Die cu_die; /* Current CU */
42 struct list_head lcache; /* Line cache for lazy match */
71 43
72 /* For variable searching */ 44 /* For variable searching */
45 Dwarf_CFI *cfi; /* Call Frame Information */
73 Dwarf_Op *fb_ops; /* Frame base attribute */ 46 Dwarf_Op *fb_ops; /* Frame base attribute */
74 const char *var; /* Current variable name */ 47 struct perf_probe_arg *pvar; /* Current target variable */
75 char *buf; /* Current output buffer */ 48 struct kprobe_trace_arg *tvar; /* Current result variable */
76 int len; /* Length of output buffer */
77 struct list_head lcache; /* Line cache for lazy match */
78}; 49};
79 50
80struct line_finder { 51struct line_finder {
@@ -87,6 +58,6 @@ struct line_finder {
87 int found; 58 int found;
88}; 59};
89 60
90#endif /* NO_DWARF_SUPPORT */ 61#endif /* DWARF_SUPPORT */
91 62
92#endif /*_PROBE_FINDER_H */ 63#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index eed1cb889008..7d88ae5c270f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -14,6 +14,16 @@ static int perf_session__open(struct perf_session *self, bool force)
14{ 14{
15 struct stat input_stat; 15 struct stat input_stat;
16 16
17 if (!strcmp(self->filename, "-")) {
18 self->fd_pipe = true;
19 self->fd = STDIN_FILENO;
20
21 if (perf_header__read(self, self->fd) < 0)
22 pr_err("incompatible file format");
23
24 return 0;
25 }
26
17 self->fd = open(self->filename, O_RDONLY); 27 self->fd = open(self->filename, O_RDONLY);
18 if (self->fd < 0) { 28 if (self->fd < 0) {
19 pr_err("failed to open file: %s", self->filename); 29 pr_err("failed to open file: %s", self->filename);
@@ -38,7 +48,7 @@ static int perf_session__open(struct perf_session *self, bool force)
38 goto out_close; 48 goto out_close;
39 } 49 }
40 50
41 if (perf_header__read(&self->header, self->fd) < 0) { 51 if (perf_header__read(self, self->fd) < 0) {
42 pr_err("incompatible file format"); 52 pr_err("incompatible file format");
43 goto out_close; 53 goto out_close;
44 } 54 }
@@ -52,9 +62,20 @@ out_close:
52 return -1; 62 return -1;
53} 63}
54 64
55static inline int perf_session__create_kernel_maps(struct perf_session *self) 65void perf_session__update_sample_type(struct perf_session *self)
56{ 66{
57 return map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps); 67 self->sample_type = perf_header__sample_type(&self->header);
68}
69
70int perf_session__create_kernel_maps(struct perf_session *self)
71{
72 int ret;
73 struct rb_root *root = &self->kerninfo_root;
74
75 ret = map_groups__create_kernel_maps(root, HOST_KERNEL_ID);
76 if (ret >= 0)
77 ret = map_groups__create_guest_kernel_maps(root);
78 return ret;
58} 79}
59 80
60struct perf_session *perf_session__new(const char *filename, int mode, bool force) 81struct perf_session *perf_session__new(const char *filename, int mode, bool force)
@@ -76,7 +97,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
76 self->cwd = NULL; 97 self->cwd = NULL;
77 self->cwdlen = 0; 98 self->cwdlen = 0;
78 self->unknown_events = 0; 99 self->unknown_events = 0;
79 map_groups__init(&self->kmaps); 100 self->kerninfo_root = RB_ROOT;
80 101
81 if (mode == O_RDONLY) { 102 if (mode == O_RDONLY) {
82 if (perf_session__open(self, force) < 0) 103 if (perf_session__open(self, force) < 0)
@@ -90,7 +111,7 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
90 goto out_delete; 111 goto out_delete;
91 } 112 }
92 113
93 self->sample_type = perf_header__sample_type(&self->header); 114 perf_session__update_sample_type(self);
94out: 115out:
95 return self; 116 return self;
96out_free: 117out_free:
@@ -117,22 +138,17 @@ static bool symbol__match_parent_regex(struct symbol *sym)
117 return 0; 138 return 0;
118} 139}
119 140
120struct symbol **perf_session__resolve_callchain(struct perf_session *self, 141struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
121 struct thread *thread, 142 struct thread *thread,
122 struct ip_callchain *chain, 143 struct ip_callchain *chain,
123 struct symbol **parent) 144 struct symbol **parent)
124{ 145{
125 u8 cpumode = PERF_RECORD_MISC_USER; 146 u8 cpumode = PERF_RECORD_MISC_USER;
126 struct symbol **syms = NULL;
127 unsigned int i; 147 unsigned int i;
148 struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
128 149
129 if (symbol_conf.use_callchain) { 150 if (!syms)
130 syms = calloc(chain->nr, sizeof(*syms)); 151 return NULL;
131 if (!syms) {
132 fprintf(stderr, "Can't allocate memory for symbols\n");
133 exit(-1);
134 }
135 }
136 152
137 for (i = 0; i < chain->nr; i++) { 153 for (i = 0; i < chain->nr; i++) {
138 u64 ip = chain->ips[i]; 154 u64 ip = chain->ips[i];
@@ -152,15 +168,17 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
152 continue; 168 continue;
153 } 169 }
154 170
171 al.filtered = false;
155 thread__find_addr_location(thread, self, cpumode, 172 thread__find_addr_location(thread, self, cpumode,
156 MAP__FUNCTION, ip, &al, NULL); 173 MAP__FUNCTION, thread->pid, ip, &al, NULL);
157 if (al.sym != NULL) { 174 if (al.sym != NULL) {
158 if (sort__has_parent && !*parent && 175 if (sort__has_parent && !*parent &&
159 symbol__match_parent_regex(al.sym)) 176 symbol__match_parent_regex(al.sym))
160 *parent = al.sym; 177 *parent = al.sym;
161 if (!symbol_conf.use_callchain) 178 if (!symbol_conf.use_callchain)
162 break; 179 break;
163 syms[i] = al.sym; 180 syms[i].map = al.map;
181 syms[i].sym = al.sym;
164 } 182 }
165 } 183 }
166 184
@@ -194,6 +212,14 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
194 handler->throttle = process_event_stub; 212 handler->throttle = process_event_stub;
195 if (handler->unthrottle == NULL) 213 if (handler->unthrottle == NULL)
196 handler->unthrottle = process_event_stub; 214 handler->unthrottle = process_event_stub;
215 if (handler->attr == NULL)
216 handler->attr = process_event_stub;
217 if (handler->event_type == NULL)
218 handler->event_type = process_event_stub;
219 if (handler->tracing_data == NULL)
220 handler->tracing_data = process_event_stub;
221 if (handler->build_id == NULL)
222 handler->build_id = process_event_stub;
197} 223}
198 224
199static const char *event__name[] = { 225static const char *event__name[] = {
@@ -207,16 +233,23 @@ static const char *event__name[] = {
207 [PERF_RECORD_FORK] = "FORK", 233 [PERF_RECORD_FORK] = "FORK",
208 [PERF_RECORD_READ] = "READ", 234 [PERF_RECORD_READ] = "READ",
209 [PERF_RECORD_SAMPLE] = "SAMPLE", 235 [PERF_RECORD_SAMPLE] = "SAMPLE",
236 [PERF_RECORD_HEADER_ATTR] = "ATTR",
237 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
238 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
239 [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
210}; 240};
211 241
212unsigned long event__total[PERF_RECORD_MAX]; 242unsigned long event__total[PERF_RECORD_HEADER_MAX];
213 243
214void event__print_totals(void) 244void event__print_totals(void)
215{ 245{
216 int i; 246 int i;
217 for (i = 0; i < PERF_RECORD_MAX; ++i) 247 for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
248 if (!event__name[i])
249 continue;
218 pr_info("%10s events: %10ld\n", 250 pr_info("%10s events: %10ld\n",
219 event__name[i], event__total[i]); 251 event__name[i], event__total[i]);
252 }
220} 253}
221 254
222void mem_bswap_64(void *src, int byte_size) 255void mem_bswap_64(void *src, int byte_size)
@@ -270,6 +303,37 @@ static void event__read_swap(event_t *self)
270 self->read.id = bswap_64(self->read.id); 303 self->read.id = bswap_64(self->read.id);
271} 304}
272 305
306static void event__attr_swap(event_t *self)
307{
308 size_t size;
309
310 self->attr.attr.type = bswap_32(self->attr.attr.type);
311 self->attr.attr.size = bswap_32(self->attr.attr.size);
312 self->attr.attr.config = bswap_64(self->attr.attr.config);
313 self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
314 self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
315 self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
316 self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
317 self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
318 self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
319 self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
320
321 size = self->header.size;
322 size -= (void *)&self->attr.id - (void *)self;
323 mem_bswap_64(self->attr.id, size);
324}
325
326static void event__event_type_swap(event_t *self)
327{
328 self->event_type.event_type.event_id =
329 bswap_64(self->event_type.event_type.event_id);
330}
331
332static void event__tracing_data_swap(event_t *self)
333{
334 self->tracing_data.size = bswap_32(self->tracing_data.size);
335}
336
273typedef void (*event__swap_op)(event_t *self); 337typedef void (*event__swap_op)(event_t *self);
274 338
275static event__swap_op event__swap_ops[] = { 339static event__swap_op event__swap_ops[] = {
@@ -280,7 +344,11 @@ static event__swap_op event__swap_ops[] = {
280 [PERF_RECORD_LOST] = event__all64_swap, 344 [PERF_RECORD_LOST] = event__all64_swap,
281 [PERF_RECORD_READ] = event__read_swap, 345 [PERF_RECORD_READ] = event__read_swap,
282 [PERF_RECORD_SAMPLE] = event__all64_swap, 346 [PERF_RECORD_SAMPLE] = event__all64_swap,
283 [PERF_RECORD_MAX] = NULL, 347 [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
348 [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
349 [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
350 [PERF_RECORD_HEADER_BUILD_ID] = NULL,
351 [PERF_RECORD_HEADER_MAX] = NULL,
284}; 352};
285 353
286static int perf_session__process_event(struct perf_session *self, 354static int perf_session__process_event(struct perf_session *self,
@@ -290,7 +358,7 @@ static int perf_session__process_event(struct perf_session *self,
290{ 358{
291 trace_event(event); 359 trace_event(event);
292 360
293 if (event->header.type < PERF_RECORD_MAX) { 361 if (event->header.type < PERF_RECORD_HEADER_MAX) {
294 dump_printf("%#Lx [%#x]: PERF_RECORD_%s", 362 dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
295 offset + head, event->header.size, 363 offset + head, event->header.size,
296 event__name[event->header.type]); 364 event__name[event->header.type]);
@@ -320,6 +388,16 @@ static int perf_session__process_event(struct perf_session *self,
320 return ops->throttle(event, self); 388 return ops->throttle(event, self);
321 case PERF_RECORD_UNTHROTTLE: 389 case PERF_RECORD_UNTHROTTLE:
322 return ops->unthrottle(event, self); 390 return ops->unthrottle(event, self);
391 case PERF_RECORD_HEADER_ATTR:
392 return ops->attr(event, self);
393 case PERF_RECORD_HEADER_EVENT_TYPE:
394 return ops->event_type(event, self);
395 case PERF_RECORD_HEADER_TRACING_DATA:
396 /* setup for reading amidst mmap */
397 lseek(self->fd, offset + head, SEEK_SET);
398 return ops->tracing_data(event, self);
399 case PERF_RECORD_HEADER_BUILD_ID:
400 return ops->build_id(event, self);
323 default: 401 default:
324 self->unknown_events++; 402 self->unknown_events++;
325 return -1; 403 return -1;
@@ -333,46 +411,6 @@ void perf_event_header__bswap(struct perf_event_header *self)
333 self->size = bswap_16(self->size); 411 self->size = bswap_16(self->size);
334} 412}
335 413
336int perf_header__read_build_ids(struct perf_header *self,
337 int input, u64 offset, u64 size)
338{
339 struct build_id_event bev;
340 char filename[PATH_MAX];
341 u64 limit = offset + size;
342 int err = -1;
343
344 while (offset < limit) {
345 struct dso *dso;
346 ssize_t len;
347 struct list_head *head = &dsos__user;
348
349 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
350 goto out;
351
352 if (self->needs_swap)
353 perf_event_header__bswap(&bev.header);
354
355 len = bev.header.size - sizeof(bev);
356 if (read(input, filename, len) != len)
357 goto out;
358
359 if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
360 head = &dsos__kernel;
361
362 dso = __dsos__findnew(head, filename);
363 if (dso != NULL) {
364 dso__set_build_id(dso, &bev.build_id);
365 if (head == &dsos__kernel && filename[0] == '[')
366 dso->kernel = 1;
367 }
368
369 offset += bev.header.size;
370 }
371 err = 0;
372out:
373 return err;
374}
375
376static struct thread *perf_session__register_idle_thread(struct perf_session *self) 414static struct thread *perf_session__register_idle_thread(struct perf_session *self)
377{ 415{
378 struct thread *thread = perf_session__findnew(self, 0); 416 struct thread *thread = perf_session__findnew(self, 0);
@@ -385,6 +423,101 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
385 return thread; 423 return thread;
386} 424}
387 425
426int do_read(int fd, void *buf, size_t size)
427{
428 void *buf_start = buf;
429
430 while (size) {
431 int ret = read(fd, buf, size);
432
433 if (ret <= 0)
434 return ret;
435
436 size -= ret;
437 buf += ret;
438 }
439
440 return buf - buf_start;
441}
442
443#define session_done() (*(volatile int *)(&session_done))
444volatile int session_done;
445
446static int __perf_session__process_pipe_events(struct perf_session *self,
447 struct perf_event_ops *ops)
448{
449 event_t event;
450 uint32_t size;
451 int skip = 0;
452 u64 head;
453 int err;
454 void *p;
455
456 perf_event_ops__fill_defaults(ops);
457
458 head = 0;
459more:
460 err = do_read(self->fd, &event, sizeof(struct perf_event_header));
461 if (err <= 0) {
462 if (err == 0)
463 goto done;
464
465 pr_err("failed to read event header\n");
466 goto out_err;
467 }
468
469 if (self->header.needs_swap)
470 perf_event_header__bswap(&event.header);
471
472 size = event.header.size;
473 if (size == 0)
474 size = 8;
475
476 p = &event;
477 p += sizeof(struct perf_event_header);
478
479 err = do_read(self->fd, p, size - sizeof(struct perf_event_header));
480 if (err <= 0) {
481 if (err == 0) {
482 pr_err("unexpected end of event stream\n");
483 goto done;
484 }
485
486 pr_err("failed to read event data\n");
487 goto out_err;
488 }
489
490 if (size == 0 ||
491 (skip = perf_session__process_event(self, &event, ops,
492 0, head)) < 0) {
493 dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
494 head, event.header.size, event.header.type);
495 /*
496 * assume we lost track of the stream, check alignment, and
497 * increment a single u64 in the hope to catch on again 'soon'.
498 */
499 if (unlikely(head & 7))
500 head &= ~7ULL;
501
502 size = 8;
503 }
504
505 head += size;
506
507 dump_printf("\n%#Lx [%#x]: event: %d\n",
508 head, event.header.size, event.header.type);
509
510 if (skip > 0)
511 head += skip;
512
513 if (!session_done())
514 goto more;
515done:
516 err = 0;
517out_err:
518 return err;
519}
520
388int __perf_session__process_events(struct perf_session *self, 521int __perf_session__process_events(struct perf_session *self,
389 u64 data_offset, u64 data_size, 522 u64 data_offset, u64 data_size,
390 u64 file_size, struct perf_event_ops *ops) 523 u64 file_size, struct perf_event_ops *ops)
@@ -396,6 +529,10 @@ int __perf_session__process_events(struct perf_session *self,
396 event_t *event; 529 event_t *event;
397 uint32_t size; 530 uint32_t size;
398 char *buf; 531 char *buf;
532 struct ui_progress *progress = ui_progress__new("Processing events...",
533 self->size);
534 if (progress == NULL)
535 return -1;
399 536
400 perf_event_ops__fill_defaults(ops); 537 perf_event_ops__fill_defaults(ops);
401 538
@@ -424,6 +561,7 @@ remap:
424 561
425more: 562more:
426 event = (event_t *)(buf + head); 563 event = (event_t *)(buf + head);
564 ui_progress__update(progress, offset);
427 565
428 if (self->header.needs_swap) 566 if (self->header.needs_swap)
429 perf_event_header__bswap(&event->header); 567 perf_event_header__bswap(&event->header);
@@ -474,6 +612,7 @@ more:
474done: 612done:
475 err = 0; 613 err = 0;
476out_err: 614out_err:
615 ui_progress__delete(progress);
477 return err; 616 return err;
478} 617}
479 618
@@ -502,9 +641,13 @@ out_getcwd_err:
502 self->cwdlen = strlen(self->cwd); 641 self->cwdlen = strlen(self->cwd);
503 } 642 }
504 643
505 err = __perf_session__process_events(self, self->header.data_offset, 644 if (!self->fd_pipe)
506 self->header.data_size, 645 err = __perf_session__process_events(self,
507 self->size, ops); 646 self->header.data_offset,
647 self->header.data_size,
648 self->size, ops);
649 else
650 err = __perf_session__process_pipe_events(self, ops);
508out_err: 651out_err:
509 return err; 652 return err;
510} 653}
@@ -519,56 +662,34 @@ bool perf_session__has_traces(struct perf_session *self, const char *msg)
519 return true; 662 return true;
520} 663}
521 664
522int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self, 665int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
523 const char *symbol_name, 666 const char *symbol_name,
524 u64 addr) 667 u64 addr)
525{ 668{
526 char *bracket; 669 char *bracket;
527 enum map_type i; 670 enum map_type i;
671 struct ref_reloc_sym *ref;
528 672
529 self->ref_reloc_sym.name = strdup(symbol_name); 673 ref = zalloc(sizeof(struct ref_reloc_sym));
530 if (self->ref_reloc_sym.name == NULL) 674 if (ref == NULL)
531 return -ENOMEM; 675 return -ENOMEM;
532 676
533 bracket = strchr(self->ref_reloc_sym.name, ']'); 677 ref->name = strdup(symbol_name);
678 if (ref->name == NULL) {
679 free(ref);
680 return -ENOMEM;
681 }
682
683 bracket = strchr(ref->name, ']');
534 if (bracket) 684 if (bracket)
535 *bracket = '\0'; 685 *bracket = '\0';
536 686
537 self->ref_reloc_sym.addr = addr; 687 ref->addr = addr;
538 688
539 for (i = 0; i < MAP__NR_TYPES; ++i) { 689 for (i = 0; i < MAP__NR_TYPES; ++i) {
540 struct kmap *kmap = map__kmap(self->vmlinux_maps[i]); 690 struct kmap *kmap = map__kmap(maps[i]);
541 kmap->ref_reloc_sym = &self->ref_reloc_sym; 691 kmap->ref_reloc_sym = ref;
542 } 692 }
543 693
544 return 0; 694 return 0;
545} 695}
546
547static u64 map__reloc_map_ip(struct map *map, u64 ip)
548{
549 return ip + (s64)map->pgoff;
550}
551
552static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
553{
554 return ip - (s64)map->pgoff;
555}
556
557void map__reloc_vmlinux(struct map *self)
558{
559 struct kmap *kmap = map__kmap(self);
560 s64 reloc;
561
562 if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr)
563 return;
564
565 reloc = (kmap->ref_reloc_sym->unrelocated_addr -
566 kmap->ref_reloc_sym->addr);
567
568 if (!reloc)
569 return;
570
571 self->map_ip = map__reloc_map_ip;
572 self->unmap_ip = map__reloc_unmap_ip;
573 self->pgoff = reloc;
574}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 5c33417eebb3..5e47c87b9266 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -15,18 +15,17 @@ struct perf_session {
15 struct perf_header header; 15 struct perf_header header;
16 unsigned long size; 16 unsigned long size;
17 unsigned long mmap_window; 17 unsigned long mmap_window;
18 struct map_groups kmaps;
19 struct rb_root threads; 18 struct rb_root threads;
20 struct thread *last_match; 19 struct thread *last_match;
21 struct map *vmlinux_maps[MAP__NR_TYPES]; 20 struct rb_root kerninfo_root;
22 struct events_stats events_stats; 21 struct events_stats events_stats;
23 struct rb_root stats_by_id; 22 struct rb_root stats_by_id;
24 unsigned long event_total[PERF_RECORD_MAX]; 23 unsigned long event_total[PERF_RECORD_MAX];
25 unsigned long unknown_events; 24 unsigned long unknown_events;
26 struct rb_root hists; 25 struct rb_root hists;
27 u64 sample_type; 26 u64 sample_type;
28 struct ref_reloc_sym ref_reloc_sym;
29 int fd; 27 int fd;
28 bool fd_pipe;
30 int cwdlen; 29 int cwdlen;
31 char *cwd; 30 char *cwd;
32 char filename[0]; 31 char filename[0];
@@ -43,7 +42,11 @@ struct perf_event_ops {
43 lost, 42 lost,
44 read, 43 read,
45 throttle, 44 throttle,
46 unthrottle; 45 unthrottle,
46 attr,
47 event_type,
48 tracing_data,
49 build_id;
47}; 50};
48 51
49struct perf_session *perf_session__new(const char *filename, int mode, bool force); 52struct perf_session *perf_session__new(const char *filename, int mode, bool force);
@@ -57,33 +60,36 @@ int __perf_session__process_events(struct perf_session *self,
57int perf_session__process_events(struct perf_session *self, 60int perf_session__process_events(struct perf_session *self,
58 struct perf_event_ops *event_ops); 61 struct perf_event_ops *event_ops);
59 62
60struct symbol **perf_session__resolve_callchain(struct perf_session *self, 63struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
61 struct thread *thread, 64 struct thread *thread,
62 struct ip_callchain *chain, 65 struct ip_callchain *chain,
63 struct symbol **parent); 66 struct symbol **parent);
64 67
65bool perf_session__has_traces(struct perf_session *self, const char *msg); 68bool perf_session__has_traces(struct perf_session *self, const char *msg);
66 69
67int perf_header__read_build_ids(struct perf_header *self, int input, 70int perf_session__set_kallsyms_ref_reloc_sym(struct map **maps,
68 u64 offset, u64 file_size);
69
70int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
71 const char *symbol_name, 71 const char *symbol_name,
72 u64 addr); 72 u64 addr);
73 73
74void mem_bswap_64(void *src, int byte_size); 74void mem_bswap_64(void *src, int byte_size);
75 75
76static inline int __perf_session__create_kernel_maps(struct perf_session *self, 76int perf_session__create_kernel_maps(struct perf_session *self);
77 struct dso *kernel) 77
78{ 78int do_read(int fd, void *buf, size_t size);
79 return __map_groups__create_kernel_maps(&self->kmaps, 79void perf_session__update_sample_type(struct perf_session *self);
80 self->vmlinux_maps, kernel);
81}
82 80
83static inline struct map * 81#ifdef NO_NEWT_SUPPORT
84 perf_session__new_module_map(struct perf_session *self, 82static inline int perf_session__browse_hists(struct rb_root *hists __used,
85 u64 start, const char *filename) 83 u64 nr_hists __used,
84 u64 session_total __used,
85 const char *helpline __used,
86 const char *input_name __used)
86{ 87{
87 return map_groups__new_module(&self->kmaps, start, filename); 88 return 0;
88} 89}
90#else
91int perf_session__browse_hists(struct rb_root *hists, u64 nr_hists,
92 u64 session_total, const char *helpline,
93 const char *input_name);
94#endif
89#endif /* __PERF_SESSION_H */ 95#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index cb0f327de9e8..da30b305fba0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -18,39 +18,50 @@ char * field_sep;
18 18
19LIST_HEAD(hist_entry__sort_list); 19LIST_HEAD(hist_entry__sort_list);
20 20
21static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
22 size_t size, unsigned int width);
23static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
24 size_t size, unsigned int width);
25static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
26 size_t size, unsigned int width);
27static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
28 size_t size, unsigned int width);
29static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
30 size_t size, unsigned int width);
31
21struct sort_entry sort_thread = { 32struct sort_entry sort_thread = {
22 .header = "Command: Pid", 33 .se_header = "Command: Pid",
23 .cmp = sort__thread_cmp, 34 .se_cmp = sort__thread_cmp,
24 .print = sort__thread_print, 35 .se_snprintf = hist_entry__thread_snprintf,
25 .width = &threads__col_width, 36 .se_width = &threads__col_width,
26}; 37};
27 38
28struct sort_entry sort_comm = { 39struct sort_entry sort_comm = {
29 .header = "Command", 40 .se_header = "Command",
30 .cmp = sort__comm_cmp, 41 .se_cmp = sort__comm_cmp,
31 .collapse = sort__comm_collapse, 42 .se_collapse = sort__comm_collapse,
32 .print = sort__comm_print, 43 .se_snprintf = hist_entry__comm_snprintf,
33 .width = &comms__col_width, 44 .se_width = &comms__col_width,
34}; 45};
35 46
36struct sort_entry sort_dso = { 47struct sort_entry sort_dso = {
37 .header = "Shared Object", 48 .se_header = "Shared Object",
38 .cmp = sort__dso_cmp, 49 .se_cmp = sort__dso_cmp,
39 .print = sort__dso_print, 50 .se_snprintf = hist_entry__dso_snprintf,
40 .width = &dsos__col_width, 51 .se_width = &dsos__col_width,
41}; 52};
42 53
43struct sort_entry sort_sym = { 54struct sort_entry sort_sym = {
44 .header = "Symbol", 55 .se_header = "Symbol",
45 .cmp = sort__sym_cmp, 56 .se_cmp = sort__sym_cmp,
46 .print = sort__sym_print, 57 .se_snprintf = hist_entry__sym_snprintf,
47}; 58};
48 59
49struct sort_entry sort_parent = { 60struct sort_entry sort_parent = {
50 .header = "Parent symbol", 61 .se_header = "Parent symbol",
51 .cmp = sort__parent_cmp, 62 .se_cmp = sort__parent_cmp,
52 .print = sort__parent_print, 63 .se_snprintf = hist_entry__parent_snprintf,
53 .width = &parent_symbol__col_width, 64 .se_width = &parent_symbol__col_width,
54}; 65};
55 66
56struct sort_dimension { 67struct sort_dimension {
@@ -85,45 +96,38 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
85 return right->thread->pid - left->thread->pid; 96 return right->thread->pid - left->thread->pid;
86} 97}
87 98
88int repsep_fprintf(FILE *fp, const char *fmt, ...) 99static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
89{ 100{
90 int n; 101 int n;
91 va_list ap; 102 va_list ap;
92 103
93 va_start(ap, fmt); 104 va_start(ap, fmt);
94 if (!field_sep) 105 n = vsnprintf(bf, size, fmt, ap);
95 n = vfprintf(fp, fmt, ap); 106 if (field_sep && n > 0) {
96 else { 107 char *sep = bf;
97 char *bf = NULL; 108
98 n = vasprintf(&bf, fmt, ap); 109 while (1) {
99 if (n > 0) { 110 sep = strchr(sep, *field_sep);
100 char *sep = bf; 111 if (sep == NULL)
101 112 break;
102 while (1) { 113 *sep = '.';
103 sep = strchr(sep, *field_sep);
104 if (sep == NULL)
105 break;
106 *sep = '.';
107 }
108 } 114 }
109 fputs(bf, fp);
110 free(bf);
111 } 115 }
112 va_end(ap); 116 va_end(ap);
113 return n; 117 return n;
114} 118}
115 119
116size_t 120static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf,
117sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) 121 size_t size, unsigned int width)
118{ 122{
119 return repsep_fprintf(fp, "%*s:%5d", width - 6, 123 return repsep_snprintf(bf, size, "%*s:%5d", width,
120 self->thread->comm ?: "", self->thread->pid); 124 self->thread->comm ?: "", self->thread->pid);
121} 125}
122 126
123size_t 127static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf,
124sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) 128 size_t size, unsigned int width)
125{ 129{
126 return repsep_fprintf(fp, "%*s", width, self->thread->comm); 130 return repsep_snprintf(bf, size, "%*s", width, self->thread->comm);
127} 131}
128 132
129/* --sort dso */ 133/* --sort dso */
@@ -131,8 +135,8 @@ sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width)
131int64_t 135int64_t
132sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) 136sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
133{ 137{
134 struct dso *dso_l = left->map ? left->map->dso : NULL; 138 struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL;
135 struct dso *dso_r = right->map ? right->map->dso : NULL; 139 struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL;
136 const char *dso_name_l, *dso_name_r; 140 const char *dso_name_l, *dso_name_r;
137 141
138 if (!dso_l || !dso_r) 142 if (!dso_l || !dso_r)
@@ -149,16 +153,16 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
149 return strcmp(dso_name_l, dso_name_r); 153 return strcmp(dso_name_l, dso_name_r);
150} 154}
151 155
152size_t 156static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf,
153sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) 157 size_t size, unsigned int width)
154{ 158{
155 if (self->map && self->map->dso) { 159 if (self->ms.map && self->ms.map->dso) {
156 const char *dso_name = !verbose ? self->map->dso->short_name : 160 const char *dso_name = !verbose ? self->ms.map->dso->short_name :
157 self->map->dso->long_name; 161 self->ms.map->dso->long_name;
158 return repsep_fprintf(fp, "%-*s", width, dso_name); 162 return repsep_snprintf(bf, size, "%-*s", width, dso_name);
159 } 163 }
160 164
161 return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); 165 return repsep_snprintf(bf, size, "%*Lx", width, self->ip);
162} 166}
163 167
164/* --sort symbol */ 168/* --sort symbol */
@@ -168,31 +172,31 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
168{ 172{
169 u64 ip_l, ip_r; 173 u64 ip_l, ip_r;
170 174
171 if (left->sym == right->sym) 175 if (left->ms.sym == right->ms.sym)
172 return 0; 176 return 0;
173 177
174 ip_l = left->sym ? left->sym->start : left->ip; 178 ip_l = left->ms.sym ? left->ms.sym->start : left->ip;
175 ip_r = right->sym ? right->sym->start : right->ip; 179 ip_r = right->ms.sym ? right->ms.sym->start : right->ip;
176 180
177 return (int64_t)(ip_r - ip_l); 181 return (int64_t)(ip_r - ip_l);
178} 182}
179 183
180 184static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf,
181size_t 185 size_t size, unsigned int width __used)
182sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
183{ 186{
184 size_t ret = 0; 187 size_t ret = 0;
185 188
186 if (verbose) { 189 if (verbose) {
187 char o = self->map ? dso__symtab_origin(self->map->dso) : '!'; 190 char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!';
188 ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, o); 191 ret += repsep_snprintf(bf, size, "%#018llx %c ", self->ip, o);
189 } 192 }
190 193
191 ret += repsep_fprintf(fp, "[%c] ", self->level); 194 ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level);
192 if (self->sym) 195 if (self->ms.sym)
193 ret += repsep_fprintf(fp, "%s", self->sym->name); 196 ret += repsep_snprintf(bf + ret, size - ret, "%s",
197 self->ms.sym->name);
194 else 198 else
195 ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); 199 ret += repsep_snprintf(bf + ret, size - ret, "%#016llx", self->ip);
196 200
197 return ret; 201 return ret;
198} 202}
@@ -231,10 +235,10 @@ sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
231 return strcmp(sym_l->name, sym_r->name); 235 return strcmp(sym_l->name, sym_r->name);
232} 236}
233 237
234size_t 238static int hist_entry__parent_snprintf(struct hist_entry *self, char *bf,
235sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) 239 size_t size, unsigned int width)
236{ 240{
237 return repsep_fprintf(fp, "%-*s", width, 241 return repsep_snprintf(bf, size, "%-*s", width,
238 self->parent ? self->parent->name : "[other]"); 242 self->parent ? self->parent->name : "[other]");
239} 243}
240 244
@@ -251,7 +255,7 @@ int sort_dimension__add(const char *tok)
251 if (strncasecmp(tok, sd->name, strlen(tok))) 255 if (strncasecmp(tok, sd->name, strlen(tok)))
252 continue; 256 continue;
253 257
254 if (sd->entry->collapse) 258 if (sd->entry->se_collapse)
255 sort__need_collapse = 1; 259 sort__need_collapse = 1;
256 260
257 if (sd->entry == &sort_parent) { 261 if (sd->entry == &sort_parent) {
@@ -260,9 +264,8 @@ int sort_dimension__add(const char *tok)
260 char err[BUFSIZ]; 264 char err[BUFSIZ];
261 265
262 regerror(ret, &parent_regex, err, sizeof(err)); 266 regerror(ret, &parent_regex, err, sizeof(err));
263 fprintf(stderr, "Invalid regex: %s\n%s", 267 pr_err("Invalid regex: %s\n%s", parent_pattern, err);
264 parent_pattern, err); 268 return -EINVAL;
265 exit(-1);
266 } 269 }
267 sort__has_parent = 1; 270 sort__has_parent = 1;
268 } 271 }
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 753f9ea99fb0..b7c54eeed9c9 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -44,18 +44,28 @@ extern enum sort_type sort__first_dimension;
44struct hist_entry { 44struct hist_entry {
45 struct rb_node rb_node; 45 struct rb_node rb_node;
46 u64 count; 46 u64 count;
47 u64 count_sys;
48 u64 count_us;
49 u64 count_guest_sys;
50 u64 count_guest_us;
51
52 /*
53 * XXX WARNING!
54 * thread _has_ to come after ms, see
55 * hist_browser__selected_thread in util/newt.c
56 */
57 struct map_symbol ms;
47 struct thread *thread; 58 struct thread *thread;
48 struct map *map;
49 struct symbol *sym;
50 u64 ip; 59 u64 ip;
51 char level; 60 char level;
52 struct symbol *parent; 61 u8 filtered;
53 struct callchain_node callchain; 62 struct symbol *parent;
54 union { 63 union {
55 unsigned long position; 64 unsigned long position;
56 struct hist_entry *pair; 65 struct hist_entry *pair;
57 struct rb_root sorted_chain; 66 struct rb_root sorted_chain;
58 }; 67 };
68 struct callchain_node callchain[0];
59}; 69};
60 70
61enum sort_type { 71enum sort_type {
@@ -73,12 +83,13 @@ enum sort_type {
73struct sort_entry { 83struct sort_entry {
74 struct list_head list; 84 struct list_head list;
75 85
76 const char *header; 86 const char *se_header;
77 87
78 int64_t (*cmp)(struct hist_entry *, struct hist_entry *); 88 int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
79 int64_t (*collapse)(struct hist_entry *, struct hist_entry *); 89 int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
80 size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); 90 int (*se_snprintf)(struct hist_entry *self, char *bf, size_t size,
81 unsigned int *width; 91 unsigned int width);
92 unsigned int *se_width;
82 bool elide; 93 bool elide;
83}; 94};
84 95
@@ -87,7 +98,6 @@ extern struct list_head hist_entry__sort_list;
87 98
88void setup_sorting(const char * const usagestr[], const struct option *opts); 99void setup_sorting(const char * const usagestr[], const struct option *opts);
89 100
90extern int repsep_fprintf(FILE *fp, const char *fmt, ...);
91extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int); 101extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int);
92extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int); 102extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int);
93extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int); 103extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index a175949ed216..0409fc7c0058 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -1,48 +1,5 @@
1#include "string.h"
2#include "util.h" 1#include "util.h"
3 2#include "string.h"
4static int hex(char ch)
5{
6 if ((ch >= '0') && (ch <= '9'))
7 return ch - '0';
8 if ((ch >= 'a') && (ch <= 'f'))
9 return ch - 'a' + 10;
10 if ((ch >= 'A') && (ch <= 'F'))
11 return ch - 'A' + 10;
12 return -1;
13}
14
15/*
16 * While we find nice hex chars, build a long_val.
17 * Return number of chars processed.
18 */
19int hex2u64(const char *ptr, u64 *long_val)
20{
21 const char *p = ptr;
22 *long_val = 0;
23
24 while (*p) {
25 const int hex_val = hex(*p);
26
27 if (hex_val < 0)
28 break;
29
30 *long_val = (*long_val << 4) | hex_val;
31 p++;
32 }
33
34 return p - ptr;
35}
36
37char *strxfrchar(char *s, char from, char to)
38{
39 char *p = s;
40
41 while ((p = strchr(p, from)) != NULL)
42 *p++ = to;
43
44 return s;
45}
46 3
47#define K 1024LL 4#define K 1024LL
48/* 5/*
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
deleted file mode 100644
index 542e44de3719..000000000000
--- a/tools/perf/util/string.h
+++ /dev/null
@@ -1,18 +0,0 @@
1#ifndef __PERF_STRING_H_
2#define __PERF_STRING_H_
3
4#include <stdbool.h>
5#include "types.h"
6
7int hex2u64(const char *ptr, u64 *val);
8char *strxfrchar(char *s, char from, char to);
9s64 perf_atoll(const char *str);
10char **argv_split(const char *str, int *argcp);
11void argv_free(char **argv);
12bool strglobmatch(const char *str, const char *pat);
13bool strlazymatch(const char *str, const char *pat);
14
15#define _STR(x) #x
16#define STR(x) _STR(x)
17
18#endif /* __PERF_STRING_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index c458c4a371d1..e782e7db16c5 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,13 +1,19 @@
1#include "util.h" 1#define _GNU_SOURCE
2#include "../perf.h" 2#include <ctype.h>
3#include "sort.h" 3#include <dirent.h>
4#include "string.h" 4#include <errno.h>
5#include <libgen.h>
6#include <stdlib.h>
7#include <stdio.h>
8#include <string.h>
9#include <sys/types.h>
10#include <sys/stat.h>
11#include <sys/param.h>
12#include <fcntl.h>
13#include <unistd.h>
5#include "symbol.h" 14#include "symbol.h"
6#include "thread.h" 15#include "strlist.h"
7 16
8#include "debug.h"
9
10#include <asm/bug.h>
11#include <libelf.h> 17#include <libelf.h>
12#include <gelf.h> 18#include <gelf.h>
13#include <elf.h> 19#include <elf.h>
@@ -18,22 +24,12 @@
18#define NT_GNU_BUILD_ID 3 24#define NT_GNU_BUILD_ID 3
19#endif 25#endif
20 26
21enum dso_origin {
22 DSO__ORIG_KERNEL = 0,
23 DSO__ORIG_JAVA_JIT,
24 DSO__ORIG_BUILD_ID_CACHE,
25 DSO__ORIG_FEDORA,
26 DSO__ORIG_UBUNTU,
27 DSO__ORIG_BUILDID,
28 DSO__ORIG_DSO,
29 DSO__ORIG_KMODULE,
30 DSO__ORIG_NOT_FOUND,
31};
32
33static void dsos__add(struct list_head *head, struct dso *dso); 27static void dsos__add(struct list_head *head, struct dso *dso);
34static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); 28static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
35static int dso__load_kernel_sym(struct dso *self, struct map *map, 29static int dso__load_kernel_sym(struct dso *self, struct map *map,
36 symbol_filter_t filter); 30 symbol_filter_t filter);
31static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
32 symbol_filter_t filter);
37static int vmlinux_path__nr_entries; 33static int vmlinux_path__nr_entries;
38static char **vmlinux_path; 34static char **vmlinux_path;
39 35
@@ -126,8 +122,8 @@ static void map_groups__fixup_end(struct map_groups *self)
126static struct symbol *symbol__new(u64 start, u64 len, const char *name) 122static struct symbol *symbol__new(u64 start, u64 len, const char *name)
127{ 123{
128 size_t namelen = strlen(name) + 1; 124 size_t namelen = strlen(name) + 1;
129 struct symbol *self = zalloc(symbol_conf.priv_size + 125 struct symbol *self = calloc(1, (symbol_conf.priv_size +
130 sizeof(*self) + namelen); 126 sizeof(*self) + namelen));
131 if (self == NULL) 127 if (self == NULL)
132 return NULL; 128 return NULL;
133 129
@@ -178,7 +174,7 @@ static void dso__set_basename(struct dso *self)
178 174
179struct dso *dso__new(const char *name) 175struct dso *dso__new(const char *name)
180{ 176{
181 struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1); 177 struct dso *self = calloc(1, sizeof(*self) + strlen(name) + 1);
182 178
183 if (self != NULL) { 179 if (self != NULL) {
184 int i; 180 int i;
@@ -192,6 +188,7 @@ struct dso *dso__new(const char *name)
192 self->loaded = 0; 188 self->loaded = 0;
193 self->sorted_by_name = 0; 189 self->sorted_by_name = 0;
194 self->has_build_id = 0; 190 self->has_build_id = 0;
191 self->kernel = DSO_TYPE_USER;
195 } 192 }
196 193
197 return self; 194 return self;
@@ -408,12 +405,9 @@ int kallsyms__parse(const char *filename, void *arg,
408 char *symbol_name; 405 char *symbol_name;
409 406
410 line_len = getline(&line, &n, file); 407 line_len = getline(&line, &n, file);
411 if (line_len < 0) 408 if (line_len < 0 || !line)
412 break; 409 break;
413 410
414 if (!line)
415 goto out_failure;
416
417 line[--line_len] = '\0'; /* \n */ 411 line[--line_len] = '\0'; /* \n */
418 412
419 len = hex2u64(line, &start); 413 len = hex2u64(line, &start);
@@ -465,6 +459,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
465 * map__split_kallsyms, when we have split the maps per module 459 * map__split_kallsyms, when we have split the maps per module
466 */ 460 */
467 symbols__insert(root, sym); 461 symbols__insert(root, sym);
462
468 return 0; 463 return 0;
469} 464}
470 465
@@ -489,6 +484,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
489 symbol_filter_t filter) 484 symbol_filter_t filter)
490{ 485{
491 struct map_groups *kmaps = map__kmap(map)->kmaps; 486 struct map_groups *kmaps = map__kmap(map)->kmaps;
487 struct kernel_info *kerninfo = kmaps->this_kerninfo;
492 struct map *curr_map = map; 488 struct map *curr_map = map;
493 struct symbol *pos; 489 struct symbol *pos;
494 int count = 0; 490 int count = 0;
@@ -510,15 +506,33 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
510 *module++ = '\0'; 506 *module++ = '\0';
511 507
512 if (strcmp(curr_map->dso->short_name, module)) { 508 if (strcmp(curr_map->dso->short_name, module)) {
513 curr_map = map_groups__find_by_name(kmaps, map->type, module); 509 if (curr_map != map &&
510 self->kernel == DSO_TYPE_GUEST_KERNEL &&
511 is_default_guest(kerninfo)) {
512 /*
513 * We assume all symbols of a module are
514 * continuous in * kallsyms, so curr_map
515 * points to a module and all its
516 * symbols are in its kmap. Mark it as
517 * loaded.
518 */
519 dso__set_loaded(curr_map->dso,
520 curr_map->type);
521 }
522
523 curr_map = map_groups__find_by_name(kmaps,
524 map->type, module);
514 if (curr_map == NULL) { 525 if (curr_map == NULL) {
515 pr_debug("/proc/{kallsyms,modules} " 526 pr_err("%s/proc/{kallsyms,modules} "
516 "inconsistency while looking " 527 "inconsistency while looking "
517 "for \"%s\" module!\n", module); 528 "for \"%s\" module!\n",
518 return -1; 529 kerninfo->root_dir, module);
530 curr_map = map;
531 goto discard_symbol;
519 } 532 }
520 533
521 if (curr_map->dso->loaded) 534 if (curr_map->dso->loaded &&
535 !is_default_guest(kmaps->this_kerninfo))
522 goto discard_symbol; 536 goto discard_symbol;
523 } 537 }
524 /* 538 /*
@@ -531,13 +545,21 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
531 char dso_name[PATH_MAX]; 545 char dso_name[PATH_MAX];
532 struct dso *dso; 546 struct dso *dso;
533 547
534 snprintf(dso_name, sizeof(dso_name), "[kernel].%d", 548 if (self->kernel == DSO_TYPE_GUEST_KERNEL)
535 kernel_range++); 549 snprintf(dso_name, sizeof(dso_name),
550 "[guest.kernel].%d",
551 kernel_range++);
552 else
553 snprintf(dso_name, sizeof(dso_name),
554 "[kernel].%d",
555 kernel_range++);
536 556
537 dso = dso__new(dso_name); 557 dso = dso__new(dso_name);
538 if (dso == NULL) 558 if (dso == NULL)
539 return -1; 559 return -1;
540 560
561 dso->kernel = self->kernel;
562
541 curr_map = map__new2(pos->start, dso, map->type); 563 curr_map = map__new2(pos->start, dso, map->type);
542 if (curr_map == NULL) { 564 if (curr_map == NULL) {
543 dso__delete(dso); 565 dso__delete(dso);
@@ -561,6 +583,12 @@ discard_symbol: rb_erase(&pos->rb_node, root);
561 } 583 }
562 } 584 }
563 585
586 if (curr_map != map &&
587 self->kernel == DSO_TYPE_GUEST_KERNEL &&
588 is_default_guest(kmaps->this_kerninfo)) {
589 dso__set_loaded(curr_map->dso, curr_map->type);
590 }
591
564 return count; 592 return count;
565} 593}
566 594
@@ -571,7 +599,10 @@ int dso__load_kallsyms(struct dso *self, const char *filename,
571 return -1; 599 return -1;
572 600
573 symbols__fixup_end(&self->symbols[map->type]); 601 symbols__fixup_end(&self->symbols[map->type]);
574 self->origin = DSO__ORIG_KERNEL; 602 if (self->kernel == DSO_TYPE_GUEST_KERNEL)
603 self->origin = DSO__ORIG_GUEST_KERNEL;
604 else
605 self->origin = DSO__ORIG_KERNEL;
575 606
576 return dso__split_kallsyms(self, map, filter); 607 return dso__split_kallsyms(self, map, filter);
577} 608}
@@ -870,8 +901,8 @@ out_close:
870 if (err == 0) 901 if (err == 0)
871 return nr; 902 return nr;
872out: 903out:
873 pr_warning("%s: problems reading %s PLT info.\n", 904 pr_debug("%s: problems reading %s PLT info.\n",
874 __func__, self->long_name); 905 __func__, self->long_name);
875 return 0; 906 return 0;
876} 907}
877 908
@@ -958,7 +989,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
958 nr_syms = shdr.sh_size / shdr.sh_entsize; 989 nr_syms = shdr.sh_size / shdr.sh_entsize;
959 990
960 memset(&sym, 0, sizeof(sym)); 991 memset(&sym, 0, sizeof(sym));
961 if (!self->kernel) { 992 if (self->kernel == DSO_TYPE_USER) {
962 self->adjust_symbols = (ehdr.e_type == ET_EXEC || 993 self->adjust_symbols = (ehdr.e_type == ET_EXEC ||
963 elf_section_by_name(elf, &ehdr, &shdr, 994 elf_section_by_name(elf, &ehdr, &shdr,
964 ".gnu.prelink_undo", 995 ".gnu.prelink_undo",
@@ -990,7 +1021,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
990 1021
991 section_name = elf_sec__name(&shdr, secstrs); 1022 section_name = elf_sec__name(&shdr, secstrs);
992 1023
993 if (self->kernel || kmodule) { 1024 if (self->kernel != DSO_TYPE_USER || kmodule) {
994 char dso_name[PATH_MAX]; 1025 char dso_name[PATH_MAX];
995 1026
996 if (strcmp(section_name, 1027 if (strcmp(section_name,
@@ -1017,6 +1048,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
1017 curr_dso = dso__new(dso_name); 1048 curr_dso = dso__new(dso_name);
1018 if (curr_dso == NULL) 1049 if (curr_dso == NULL)
1019 goto out_elf_end; 1050 goto out_elf_end;
1051 curr_dso->kernel = self->kernel;
1020 curr_map = map__new2(start, curr_dso, 1052 curr_map = map__new2(start, curr_dso,
1021 map->type); 1053 map->type);
1022 if (curr_map == NULL) { 1054 if (curr_map == NULL) {
@@ -1025,9 +1057,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name,
1025 } 1057 }
1026 curr_map->map_ip = identity__map_ip; 1058 curr_map->map_ip = identity__map_ip;
1027 curr_map->unmap_ip = identity__map_ip; 1059 curr_map->unmap_ip = identity__map_ip;
1028 curr_dso->origin = DSO__ORIG_KERNEL; 1060 curr_dso->origin = self->origin;
1029 map_groups__insert(kmap->kmaps, curr_map); 1061 map_groups__insert(kmap->kmaps, curr_map);
1030 dsos__add(&dsos__kernel, curr_dso); 1062 dsos__add(&self->node, curr_dso);
1031 dso__set_loaded(curr_dso, map->type); 1063 dso__set_loaded(curr_dso, map->type);
1032 } else 1064 } else
1033 curr_dso = curr_map->dso; 1065 curr_dso = curr_map->dso;
@@ -1089,7 +1121,7 @@ static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
1089 return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; 1121 return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
1090} 1122}
1091 1123
1092static bool __dsos__read_build_ids(struct list_head *head, bool with_hits) 1124bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
1093{ 1125{
1094 bool have_build_id = false; 1126 bool have_build_id = false;
1095 struct dso *pos; 1127 struct dso *pos;
@@ -1107,13 +1139,6 @@ static bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
1107 return have_build_id; 1139 return have_build_id;
1108} 1140}
1109 1141
1110bool dsos__read_build_ids(bool with_hits)
1111{
1112 bool kbuildids = __dsos__read_build_ids(&dsos__kernel, with_hits),
1113 ubuildids = __dsos__read_build_ids(&dsos__user, with_hits);
1114 return kbuildids || ubuildids;
1115}
1116
1117/* 1142/*
1118 * Align offset to 4 bytes as needed for note name and descriptor data. 1143 * Align offset to 4 bytes as needed for note name and descriptor data.
1119 */ 1144 */
@@ -1248,6 +1273,8 @@ char dso__symtab_origin(const struct dso *self)
1248 [DSO__ORIG_BUILDID] = 'b', 1273 [DSO__ORIG_BUILDID] = 'b',
1249 [DSO__ORIG_DSO] = 'd', 1274 [DSO__ORIG_DSO] = 'd',
1250 [DSO__ORIG_KMODULE] = 'K', 1275 [DSO__ORIG_KMODULE] = 'K',
1276 [DSO__ORIG_GUEST_KERNEL] = 'g',
1277 [DSO__ORIG_GUEST_KMODULE] = 'G',
1251 }; 1278 };
1252 1279
1253 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) 1280 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
@@ -1263,11 +1290,20 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
1263 char build_id_hex[BUILD_ID_SIZE * 2 + 1]; 1290 char build_id_hex[BUILD_ID_SIZE * 2 + 1];
1264 int ret = -1; 1291 int ret = -1;
1265 int fd; 1292 int fd;
1293 struct kernel_info *kerninfo;
1294 const char *root_dir;
1266 1295
1267 dso__set_loaded(self, map->type); 1296 dso__set_loaded(self, map->type);
1268 1297
1269 if (self->kernel) 1298 if (self->kernel == DSO_TYPE_KERNEL)
1270 return dso__load_kernel_sym(self, map, filter); 1299 return dso__load_kernel_sym(self, map, filter);
1300 else if (self->kernel == DSO_TYPE_GUEST_KERNEL)
1301 return dso__load_guest_kernel_sym(self, map, filter);
1302
1303 if (map->groups && map->groups->this_kerninfo)
1304 kerninfo = map->groups->this_kerninfo;
1305 else
1306 kerninfo = NULL;
1271 1307
1272 name = malloc(size); 1308 name = malloc(size);
1273 if (!name) 1309 if (!name)
@@ -1321,6 +1357,13 @@ more:
1321 case DSO__ORIG_DSO: 1357 case DSO__ORIG_DSO:
1322 snprintf(name, size, "%s", self->long_name); 1358 snprintf(name, size, "%s", self->long_name);
1323 break; 1359 break;
1360 case DSO__ORIG_GUEST_KMODULE:
1361 if (map->groups && map->groups->this_kerninfo)
1362 root_dir = map->groups->this_kerninfo->root_dir;
1363 else
1364 root_dir = "";
1365 snprintf(name, size, "%s%s", root_dir, self->long_name);
1366 break;
1324 1367
1325 default: 1368 default:
1326 goto out; 1369 goto out;
@@ -1374,7 +1417,8 @@ struct map *map_groups__find_by_name(struct map_groups *self,
1374 return NULL; 1417 return NULL;
1375} 1418}
1376 1419
1377static int dso__kernel_module_get_build_id(struct dso *self) 1420static int dso__kernel_module_get_build_id(struct dso *self,
1421 const char *root_dir)
1378{ 1422{
1379 char filename[PATH_MAX]; 1423 char filename[PATH_MAX];
1380 /* 1424 /*
@@ -1384,8 +1428,8 @@ static int dso__kernel_module_get_build_id(struct dso *self)
1384 const char *name = self->short_name + 1; 1428 const char *name = self->short_name + 1;
1385 1429
1386 snprintf(filename, sizeof(filename), 1430 snprintf(filename, sizeof(filename),
1387 "/sys/module/%.*s/notes/.note.gnu.build-id", 1431 "%s/sys/module/%.*s/notes/.note.gnu.build-id",
1388 (int)strlen(name - 1), name); 1432 root_dir, (int)strlen(name) - 1, name);
1389 1433
1390 if (sysfs__read_build_id(filename, self->build_id, 1434 if (sysfs__read_build_id(filename, self->build_id,
1391 sizeof(self->build_id)) == 0) 1435 sizeof(self->build_id)) == 0)
@@ -1394,26 +1438,33 @@ static int dso__kernel_module_get_build_id(struct dso *self)
1394 return 0; 1438 return 0;
1395} 1439}
1396 1440
1397static int map_groups__set_modules_path_dir(struct map_groups *self, char *dirname) 1441static int map_groups__set_modules_path_dir(struct map_groups *self,
1442 const char *dir_name)
1398{ 1443{
1399 struct dirent *dent; 1444 struct dirent *dent;
1400 DIR *dir = opendir(dirname); 1445 DIR *dir = opendir(dir_name);
1401 1446
1402 if (!dir) { 1447 if (!dir) {
1403 pr_debug("%s: cannot open %s dir\n", __func__, dirname); 1448 pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
1404 return -1; 1449 return -1;
1405 } 1450 }
1406 1451
1407 while ((dent = readdir(dir)) != NULL) { 1452 while ((dent = readdir(dir)) != NULL) {
1408 char path[PATH_MAX]; 1453 char path[PATH_MAX];
1454 struct stat st;
1409 1455
1410 if (dent->d_type == DT_DIR) { 1456 /*sshfs might return bad dent->d_type, so we have to stat*/
1457 sprintf(path, "%s/%s", dir_name, dent->d_name);
1458 if (stat(path, &st))
1459 continue;
1460
1461 if (S_ISDIR(st.st_mode)) {
1411 if (!strcmp(dent->d_name, ".") || 1462 if (!strcmp(dent->d_name, ".") ||
1412 !strcmp(dent->d_name, "..")) 1463 !strcmp(dent->d_name, ".."))
1413 continue; 1464 continue;
1414 1465
1415 snprintf(path, sizeof(path), "%s/%s", 1466 snprintf(path, sizeof(path), "%s/%s",
1416 dirname, dent->d_name); 1467 dir_name, dent->d_name);
1417 if (map_groups__set_modules_path_dir(self, path) < 0) 1468 if (map_groups__set_modules_path_dir(self, path) < 0)
1418 goto failure; 1469 goto failure;
1419 } else { 1470 } else {
@@ -1433,13 +1484,13 @@ static int map_groups__set_modules_path_dir(struct map_groups *self, char *dirna
1433 continue; 1484 continue;
1434 1485
1435 snprintf(path, sizeof(path), "%s/%s", 1486 snprintf(path, sizeof(path), "%s/%s",
1436 dirname, dent->d_name); 1487 dir_name, dent->d_name);
1437 1488
1438 long_name = strdup(path); 1489 long_name = strdup(path);
1439 if (long_name == NULL) 1490 if (long_name == NULL)
1440 goto failure; 1491 goto failure;
1441 dso__set_long_name(map->dso, long_name); 1492 dso__set_long_name(map->dso, long_name);
1442 dso__kernel_module_get_build_id(map->dso); 1493 dso__kernel_module_get_build_id(map->dso, "");
1443 } 1494 }
1444 } 1495 }
1445 1496
@@ -1449,16 +1500,46 @@ failure:
1449 return -1; 1500 return -1;
1450} 1501}
1451 1502
1452static int map_groups__set_modules_path(struct map_groups *self) 1503static char *get_kernel_version(const char *root_dir)
1453{ 1504{
1454 struct utsname uts; 1505 char version[PATH_MAX];
1506 FILE *file;
1507 char *name, *tmp;
1508 const char *prefix = "Linux version ";
1509
1510 sprintf(version, "%s/proc/version", root_dir);
1511 file = fopen(version, "r");
1512 if (!file)
1513 return NULL;
1514
1515 version[0] = '\0';
1516 tmp = fgets(version, sizeof(version), file);
1517 fclose(file);
1518
1519 name = strstr(version, prefix);
1520 if (!name)
1521 return NULL;
1522 name += strlen(prefix);
1523 tmp = strchr(name, ' ');
1524 if (tmp)
1525 *tmp = '\0';
1526
1527 return strdup(name);
1528}
1529
1530static int map_groups__set_modules_path(struct map_groups *self,
1531 const char *root_dir)
1532{
1533 char *version;
1455 char modules_path[PATH_MAX]; 1534 char modules_path[PATH_MAX];
1456 1535
1457 if (uname(&uts) < 0) 1536 version = get_kernel_version(root_dir);
1537 if (!version)
1458 return -1; 1538 return -1;
1459 1539
1460 snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", 1540 snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel",
1461 uts.release); 1541 root_dir, version);
1542 free(version);
1462 1543
1463 return map_groups__set_modules_path_dir(self, modules_path); 1544 return map_groups__set_modules_path_dir(self, modules_path);
1464} 1545}
@@ -1470,8 +1551,8 @@ static int map_groups__set_modules_path(struct map_groups *self)
1470 */ 1551 */
1471static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) 1552static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
1472{ 1553{
1473 struct map *self = zalloc(sizeof(*self) + 1554 struct map *self = calloc(1, (sizeof(*self) +
1474 (dso->kernel ? sizeof(struct kmap) : 0)); 1555 (dso->kernel ? sizeof(struct kmap) : 0)));
1475 if (self != NULL) { 1556 if (self != NULL) {
1476 /* 1557 /*
1477 * ->end will be filled after we load all the symbols 1558 * ->end will be filled after we load all the symbols
@@ -1483,11 +1564,13 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
1483} 1564}
1484 1565
1485struct map *map_groups__new_module(struct map_groups *self, u64 start, 1566struct map *map_groups__new_module(struct map_groups *self, u64 start,
1486 const char *filename) 1567 const char *filename,
1568 struct kernel_info *kerninfo)
1487{ 1569{
1488 struct map *map; 1570 struct map *map;
1489 struct dso *dso = __dsos__findnew(&dsos__kernel, filename); 1571 struct dso *dso;
1490 1572
1573 dso = __dsos__findnew(&kerninfo->dsos__kernel, filename);
1491 if (dso == NULL) 1574 if (dso == NULL)
1492 return NULL; 1575 return NULL;
1493 1576
@@ -1495,21 +1578,37 @@ struct map *map_groups__new_module(struct map_groups *self, u64 start,
1495 if (map == NULL) 1578 if (map == NULL)
1496 return NULL; 1579 return NULL;
1497 1580
1498 dso->origin = DSO__ORIG_KMODULE; 1581 if (is_host_kernel(kerninfo))
1582 dso->origin = DSO__ORIG_KMODULE;
1583 else
1584 dso->origin = DSO__ORIG_GUEST_KMODULE;
1499 map_groups__insert(self, map); 1585 map_groups__insert(self, map);
1500 return map; 1586 return map;
1501} 1587}
1502 1588
1503static int map_groups__create_modules(struct map_groups *self) 1589static int map_groups__create_modules(struct kernel_info *kerninfo)
1504{ 1590{
1505 char *line = NULL; 1591 char *line = NULL;
1506 size_t n; 1592 size_t n;
1507 FILE *file = fopen("/proc/modules", "r"); 1593 FILE *file;
1508 struct map *map; 1594 struct map *map;
1595 const char *root_dir;
1596 const char *modules;
1597 char path[PATH_MAX];
1598
1599 if (is_default_guest(kerninfo))
1600 modules = symbol_conf.default_guest_modules;
1601 else {
1602 sprintf(path, "%s/proc/modules", kerninfo->root_dir);
1603 modules = path;
1604 }
1509 1605
1606 file = fopen(modules, "r");
1510 if (file == NULL) 1607 if (file == NULL)
1511 return -1; 1608 return -1;
1512 1609
1610 root_dir = kerninfo->root_dir;
1611
1513 while (!feof(file)) { 1612 while (!feof(file)) {
1514 char name[PATH_MAX]; 1613 char name[PATH_MAX];
1515 u64 start; 1614 u64 start;
@@ -1538,16 +1637,17 @@ static int map_groups__create_modules(struct map_groups *self)
1538 *sep = '\0'; 1637 *sep = '\0';
1539 1638
1540 snprintf(name, sizeof(name), "[%s]", line); 1639 snprintf(name, sizeof(name), "[%s]", line);
1541 map = map_groups__new_module(self, start, name); 1640 map = map_groups__new_module(&kerninfo->kmaps,
1641 start, name, kerninfo);
1542 if (map == NULL) 1642 if (map == NULL)
1543 goto out_delete_line; 1643 goto out_delete_line;
1544 dso__kernel_module_get_build_id(map->dso); 1644 dso__kernel_module_get_build_id(map->dso, root_dir);
1545 } 1645 }
1546 1646
1547 free(line); 1647 free(line);
1548 fclose(file); 1648 fclose(file);
1549 1649
1550 return map_groups__set_modules_path(self); 1650 return map_groups__set_modules_path(&kerninfo->kmaps, root_dir);
1551 1651
1552out_delete_line: 1652out_delete_line:
1553 free(line); 1653 free(line);
@@ -1714,8 +1814,57 @@ out_fixup:
1714 return err; 1814 return err;
1715} 1815}
1716 1816
1717LIST_HEAD(dsos__user); 1817static int dso__load_guest_kernel_sym(struct dso *self, struct map *map,
1718LIST_HEAD(dsos__kernel); 1818 symbol_filter_t filter)
1819{
1820 int err;
1821 const char *kallsyms_filename = NULL;
1822 struct kernel_info *kerninfo;
1823 char path[PATH_MAX];
1824
1825 if (!map->groups) {
1826 pr_debug("Guest kernel map hasn't the point to groups\n");
1827 return -1;
1828 }
1829 kerninfo = map->groups->this_kerninfo;
1830
1831 if (is_default_guest(kerninfo)) {
1832 /*
1833 * if the user specified a vmlinux filename, use it and only
1834 * it, reporting errors to the user if it cannot be used.
1835 * Or use file guest_kallsyms inputted by user on commandline
1836 */
1837 if (symbol_conf.default_guest_vmlinux_name != NULL) {
1838 err = dso__load_vmlinux(self, map,
1839 symbol_conf.default_guest_vmlinux_name, filter);
1840 goto out_try_fixup;
1841 }
1842
1843 kallsyms_filename = symbol_conf.default_guest_kallsyms;
1844 if (!kallsyms_filename)
1845 return -1;
1846 } else {
1847 sprintf(path, "%s/proc/kallsyms", kerninfo->root_dir);
1848 kallsyms_filename = path;
1849 }
1850
1851 err = dso__load_kallsyms(self, kallsyms_filename, map, filter);
1852 if (err > 0)
1853 pr_debug("Using %s for symbols\n", kallsyms_filename);
1854
1855out_try_fixup:
1856 if (err > 0) {
1857 if (kallsyms_filename != NULL) {
1858 kern_mmap_name(kerninfo, path);
1859 dso__set_long_name(self,
1860 strdup(path));
1861 }
1862 map__fixup_start(map);
1863 map__fixup_end(map);
1864 }
1865
1866 return err;
1867}
1719 1868
1720static void dsos__add(struct list_head *head, struct dso *dso) 1869static void dsos__add(struct list_head *head, struct dso *dso)
1721{ 1870{
@@ -1758,10 +1907,16 @@ static void __dsos__fprintf(struct list_head *head, FILE *fp)
1758 } 1907 }
1759} 1908}
1760 1909
1761void dsos__fprintf(FILE *fp) 1910void dsos__fprintf(struct rb_root *kerninfo_root, FILE *fp)
1762{ 1911{
1763 __dsos__fprintf(&dsos__kernel, fp); 1912 struct rb_node *nd;
1764 __dsos__fprintf(&dsos__user, fp); 1913
1914 for (nd = rb_first(kerninfo_root); nd; nd = rb_next(nd)) {
1915 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
1916 rb_node);
1917 __dsos__fprintf(&pos->dsos__kernel, fp);
1918 __dsos__fprintf(&pos->dsos__user, fp);
1919 }
1765} 1920}
1766 1921
1767static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, 1922static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
@@ -1779,10 +1934,21 @@ static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
1779 return ret; 1934 return ret;
1780} 1935}
1781 1936
1782size_t dsos__fprintf_buildid(FILE *fp, bool with_hits) 1937size_t dsos__fprintf_buildid(struct rb_root *kerninfo_root,
1938 FILE *fp, bool with_hits)
1783{ 1939{
1784 return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) + 1940 struct rb_node *nd;
1785 __dsos__fprintf_buildid(&dsos__user, fp, with_hits)); 1941 size_t ret = 0;
1942
1943 for (nd = rb_first(kerninfo_root); nd; nd = rb_next(nd)) {
1944 struct kernel_info *pos = rb_entry(nd, struct kernel_info,
1945 rb_node);
1946 ret += __dsos__fprintf_buildid(&pos->dsos__kernel,
1947 fp, with_hits);
1948 ret += __dsos__fprintf_buildid(&pos->dsos__user,
1949 fp, with_hits);
1950 }
1951 return ret;
1786} 1952}
1787 1953
1788struct dso *dso__new_kernel(const char *name) 1954struct dso *dso__new_kernel(const char *name)
@@ -1791,28 +1957,59 @@ struct dso *dso__new_kernel(const char *name)
1791 1957
1792 if (self != NULL) { 1958 if (self != NULL) {
1793 dso__set_short_name(self, "[kernel]"); 1959 dso__set_short_name(self, "[kernel]");
1794 self->kernel = 1; 1960 self->kernel = DSO_TYPE_KERNEL;
1961 }
1962
1963 return self;
1964}
1965
1966static struct dso *dso__new_guest_kernel(struct kernel_info *kerninfo,
1967 const char *name)
1968{
1969 char buff[PATH_MAX];
1970 struct dso *self;
1971
1972 kern_mmap_name(kerninfo, buff);
1973 self = dso__new(name ?: buff);
1974 if (self != NULL) {
1975 dso__set_short_name(self, "[guest.kernel]");
1976 self->kernel = DSO_TYPE_GUEST_KERNEL;
1795 } 1977 }
1796 1978
1797 return self; 1979 return self;
1798} 1980}
1799 1981
1800void dso__read_running_kernel_build_id(struct dso *self) 1982void dso__read_running_kernel_build_id(struct dso *self,
1983 struct kernel_info *kerninfo)
1801{ 1984{
1802 if (sysfs__read_build_id("/sys/kernel/notes", self->build_id, 1985 char path[PATH_MAX];
1986
1987 if (is_default_guest(kerninfo))
1988 return;
1989 sprintf(path, "%s/sys/kernel/notes", kerninfo->root_dir);
1990 if (sysfs__read_build_id(path, self->build_id,
1803 sizeof(self->build_id)) == 0) 1991 sizeof(self->build_id)) == 0)
1804 self->has_build_id = true; 1992 self->has_build_id = true;
1805} 1993}
1806 1994
1807static struct dso *dsos__create_kernel(const char *vmlinux) 1995static struct dso *dsos__create_kernel(struct kernel_info *kerninfo)
1808{ 1996{
1809 struct dso *kernel = dso__new_kernel(vmlinux); 1997 const char *vmlinux_name = NULL;
1998 struct dso *kernel;
1810 1999
1811 if (kernel != NULL) { 2000 if (is_host_kernel(kerninfo)) {
1812 dso__read_running_kernel_build_id(kernel); 2001 vmlinux_name = symbol_conf.vmlinux_name;
1813 dsos__add(&dsos__kernel, kernel); 2002 kernel = dso__new_kernel(vmlinux_name);
2003 } else {
2004 if (is_default_guest(kerninfo))
2005 vmlinux_name = symbol_conf.default_guest_vmlinux_name;
2006 kernel = dso__new_guest_kernel(kerninfo, vmlinux_name);
1814 } 2007 }
1815 2008
2009 if (kernel != NULL) {
2010 dso__read_running_kernel_build_id(kernel, kerninfo);
2011 dsos__add(&kerninfo->dsos__kernel, kernel);
2012 }
1816 return kernel; 2013 return kernel;
1817} 2014}
1818 2015
@@ -1895,6 +2092,17 @@ out_fail:
1895 return -1; 2092 return -1;
1896} 2093}
1897 2094
2095size_t vmlinux_path__fprintf(FILE *fp)
2096{
2097 int i;
2098 size_t printed = 0;
2099
2100 for (i = 0; i < vmlinux_path__nr_entries; ++i)
2101 printed += fprintf(fp, "[%d] %s\n", i, vmlinux_path[i]);
2102
2103 return printed;
2104}
2105
1898static int setup_list(struct strlist **list, const char *list_str, 2106static int setup_list(struct strlist **list, const char *list_str,
1899 const char *list_name) 2107 const char *list_name)
1900{ 2108{
@@ -1945,22 +2153,114 @@ out_free_comm_list:
1945 return -1; 2153 return -1;
1946} 2154}
1947 2155
1948int map_groups__create_kernel_maps(struct map_groups *self, 2156int map_groups__create_kernel_maps(struct rb_root *kerninfo_root, pid_t pid)
1949 struct map *vmlinux_maps[MAP__NR_TYPES])
1950{ 2157{
1951 struct dso *kernel = dsos__create_kernel(symbol_conf.vmlinux_name); 2158 struct kernel_info *kerninfo;
2159 struct dso *kernel;
1952 2160
2161 kerninfo = kerninfo__findnew(kerninfo_root, pid);
2162 if (kerninfo == NULL)
2163 return -1;
2164 kernel = dsos__create_kernel(kerninfo);
1953 if (kernel == NULL) 2165 if (kernel == NULL)
1954 return -1; 2166 return -1;
1955 2167
1956 if (__map_groups__create_kernel_maps(self, vmlinux_maps, kernel) < 0) 2168 if (__map_groups__create_kernel_maps(&kerninfo->kmaps,
2169 kerninfo->vmlinux_maps, kernel) < 0)
1957 return -1; 2170 return -1;
1958 2171
1959 if (symbol_conf.use_modules && map_groups__create_modules(self) < 0) 2172 if (symbol_conf.use_modules &&
2173 map_groups__create_modules(kerninfo) < 0)
1960 pr_debug("Problems creating module maps, continuing anyway...\n"); 2174 pr_debug("Problems creating module maps, continuing anyway...\n");
1961 /* 2175 /*
1962 * Now that we have all the maps created, just set the ->end of them: 2176 * Now that we have all the maps created, just set the ->end of them:
1963 */ 2177 */
1964 map_groups__fixup_end(self); 2178 map_groups__fixup_end(&kerninfo->kmaps);
1965 return 0; 2179 return 0;
1966} 2180}
2181
2182static int hex(char ch)
2183{
2184 if ((ch >= '0') && (ch <= '9'))
2185 return ch - '0';
2186 if ((ch >= 'a') && (ch <= 'f'))
2187 return ch - 'a' + 10;
2188 if ((ch >= 'A') && (ch <= 'F'))
2189 return ch - 'A' + 10;
2190 return -1;
2191}
2192
2193/*
2194 * While we find nice hex chars, build a long_val.
2195 * Return number of chars processed.
2196 */
2197int hex2u64(const char *ptr, u64 *long_val)
2198{
2199 const char *p = ptr;
2200 *long_val = 0;
2201
2202 while (*p) {
2203 const int hex_val = hex(*p);
2204
2205 if (hex_val < 0)
2206 break;
2207
2208 *long_val = (*long_val << 4) | hex_val;
2209 p++;
2210 }
2211
2212 return p - ptr;
2213}
2214
2215char *strxfrchar(char *s, char from, char to)
2216{
2217 char *p = s;
2218
2219 while ((p = strchr(p, from)) != NULL)
2220 *p++ = to;
2221
2222 return s;
2223}
2224
2225int map_groups__create_guest_kernel_maps(struct rb_root *kerninfo_root)
2226{
2227 int ret = 0;
2228 struct dirent **namelist = NULL;
2229 int i, items = 0;
2230 char path[PATH_MAX];
2231 pid_t pid;
2232
2233 if (symbol_conf.default_guest_vmlinux_name ||
2234 symbol_conf.default_guest_modules ||
2235 symbol_conf.default_guest_kallsyms) {
2236 map_groups__create_kernel_maps(kerninfo_root,
2237 DEFAULT_GUEST_KERNEL_ID);
2238 }
2239
2240 if (symbol_conf.guestmount) {
2241 items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
2242 if (items <= 0)
2243 return -ENOENT;
2244 for (i = 0; i < items; i++) {
2245 if (!isdigit(namelist[i]->d_name[0])) {
2246 /* Filter out . and .. */
2247 continue;
2248 }
2249 pid = atoi(namelist[i]->d_name);
2250 sprintf(path, "%s/%s/proc/kallsyms",
2251 symbol_conf.guestmount,
2252 namelist[i]->d_name);
2253 ret = access(path, R_OK);
2254 if (ret) {
2255 pr_debug("Can't access file %s\n", path);
2256 goto failure;
2257 }
2258 map_groups__create_kernel_maps(kerninfo_root,
2259 pid);
2260 }
2261failure:
2262 free(namelist);
2263 }
2264
2265 return ret;
2266}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f30a37428919..478f5ab37787 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -3,10 +3,11 @@
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <stdbool.h> 5#include <stdbool.h>
6#include "types.h" 6#include <stdint.h>
7#include "map.h"
7#include <linux/list.h> 8#include <linux/list.h>
8#include <linux/rbtree.h> 9#include <linux/rbtree.h>
9#include "event.h" 10#include <stdio.h>
10 11
11#define DEBUG_CACHE_DIR ".debug" 12#define DEBUG_CACHE_DIR ".debug"
12 13
@@ -29,6 +30,9 @@ static inline char *bfd_demangle(void __used *v, const char __used *c,
29#endif 30#endif
30#endif 31#endif
31 32
33int hex2u64(const char *ptr, u64 *val);
34char *strxfrchar(char *s, char from, char to);
35
32/* 36/*
33 * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; 37 * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
34 * for newer versions we can use mmap to reduce memory usage: 38 * for newer versions we can use mmap to reduce memory usage:
@@ -44,6 +48,8 @@ static inline char *bfd_demangle(void __used *v, const char __used *c,
44#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ 48#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
45#endif 49#endif
46 50
51#define BUILD_ID_SIZE 20
52
47struct symbol { 53struct symbol {
48 struct rb_node rb_node; 54 struct rb_node rb_node;
49 u64 start; 55 u64 start;
@@ -63,10 +69,15 @@ struct symbol_conf {
63 show_nr_samples, 69 show_nr_samples,
64 use_callchain, 70 use_callchain,
65 exclude_other, 71 exclude_other,
66 full_paths; 72 full_paths,
73 show_cpu_utilization;
67 const char *vmlinux_name, 74 const char *vmlinux_name,
68 *field_sep; 75 *field_sep;
69 char *dso_list_str, 76 const char *default_guest_vmlinux_name,
77 *default_guest_kallsyms,
78 *default_guest_modules;
79 const char *guestmount;
80 char *dso_list_str,
70 *comm_list_str, 81 *comm_list_str,
71 *sym_list_str, 82 *sym_list_str,
72 *col_width_list_str; 83 *col_width_list_str;
@@ -88,6 +99,11 @@ struct ref_reloc_sym {
88 u64 unrelocated_addr; 99 u64 unrelocated_addr;
89}; 100};
90 101
102struct map_symbol {
103 struct map *map;
104 struct symbol *sym;
105};
106
91struct addr_location { 107struct addr_location {
92 struct thread *thread; 108 struct thread *thread;
93 struct map *map; 109 struct map *map;
@@ -95,6 +111,13 @@ struct addr_location {
95 u64 addr; 111 u64 addr;
96 char level; 112 char level;
97 bool filtered; 113 bool filtered;
114 unsigned int cpumode;
115};
116
117enum dso_kernel_type {
118 DSO_TYPE_USER = 0,
119 DSO_TYPE_KERNEL,
120 DSO_TYPE_GUEST_KERNEL
98}; 121};
99 122
100struct dso { 123struct dso {
@@ -104,8 +127,9 @@ struct dso {
104 u8 adjust_symbols:1; 127 u8 adjust_symbols:1;
105 u8 slen_calculated:1; 128 u8 slen_calculated:1;
106 u8 has_build_id:1; 129 u8 has_build_id:1;
107 u8 kernel:1; 130 enum dso_kernel_type kernel;
108 u8 hit:1; 131 u8 hit:1;
132 u8 annotate_warned:1;
109 unsigned char origin; 133 unsigned char origin;
110 u8 sorted_by_name; 134 u8 sorted_by_name;
111 u8 loaded; 135 u8 loaded;
@@ -131,42 +155,60 @@ static inline void dso__set_loaded(struct dso *self, enum map_type type)
131 155
132void dso__sort_by_name(struct dso *self, enum map_type type); 156void dso__sort_by_name(struct dso *self, enum map_type type);
133 157
134extern struct list_head dsos__user, dsos__kernel;
135
136struct dso *__dsos__findnew(struct list_head *head, const char *name); 158struct dso *__dsos__findnew(struct list_head *head, const char *name);
137 159
138static inline struct dso *dsos__findnew(const char *name)
139{
140 return __dsos__findnew(&dsos__user, name);
141}
142
143int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); 160int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
144int dso__load_vmlinux_path(struct dso *self, struct map *map, 161int dso__load_vmlinux_path(struct dso *self, struct map *map,
145 symbol_filter_t filter); 162 symbol_filter_t filter);
146int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map, 163int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
147 symbol_filter_t filter); 164 symbol_filter_t filter);
148void dsos__fprintf(FILE *fp); 165void dsos__fprintf(struct rb_root *kerninfo_root, FILE *fp);
149size_t dsos__fprintf_buildid(FILE *fp, bool with_hits); 166size_t dsos__fprintf_buildid(struct rb_root *kerninfo_root,
167 FILE *fp, bool with_hits);
150 168
151size_t dso__fprintf_buildid(struct dso *self, FILE *fp); 169size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
152size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); 170size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
171
172enum dso_origin {
173 DSO__ORIG_KERNEL = 0,
174 DSO__ORIG_GUEST_KERNEL,
175 DSO__ORIG_JAVA_JIT,
176 DSO__ORIG_BUILD_ID_CACHE,
177 DSO__ORIG_FEDORA,
178 DSO__ORIG_UBUNTU,
179 DSO__ORIG_BUILDID,
180 DSO__ORIG_DSO,
181 DSO__ORIG_GUEST_KMODULE,
182 DSO__ORIG_KMODULE,
183 DSO__ORIG_NOT_FOUND,
184};
185
153char dso__symtab_origin(const struct dso *self); 186char dso__symtab_origin(const struct dso *self);
154void dso__set_long_name(struct dso *self, char *name); 187void dso__set_long_name(struct dso *self, char *name);
155void dso__set_build_id(struct dso *self, void *build_id); 188void dso__set_build_id(struct dso *self, void *build_id);
156void dso__read_running_kernel_build_id(struct dso *self); 189void dso__read_running_kernel_build_id(struct dso *self,
190 struct kernel_info *kerninfo);
157struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); 191struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
158struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type, 192struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
159 const char *name); 193 const char *name);
160 194
161int filename__read_build_id(const char *filename, void *bf, size_t size); 195int filename__read_build_id(const char *filename, void *bf, size_t size);
162int sysfs__read_build_id(const char *filename, void *bf, size_t size); 196int sysfs__read_build_id(const char *filename, void *bf, size_t size);
163bool dsos__read_build_ids(bool with_hits); 197bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
164int build_id__sprintf(const u8 *self, int len, char *bf); 198int build_id__sprintf(const u8 *self, int len, char *bf);
165int kallsyms__parse(const char *filename, void *arg, 199int kallsyms__parse(const char *filename, void *arg,
166 int (*process_symbol)(void *arg, const char *name, 200 int (*process_symbol)(void *arg, const char *name,
167 char type, u64 start)); 201 char type, u64 start));
168 202
203int __map_groups__create_kernel_maps(struct map_groups *self,
204 struct map *vmlinux_maps[MAP__NR_TYPES],
205 struct dso *kernel);
206int map_groups__create_kernel_maps(struct rb_root *kerninfo_root, pid_t pid);
207int map_groups__create_guest_kernel_maps(struct rb_root *kerninfo_root);
208
169int symbol__init(void); 209int symbol__init(void);
170bool symbol_type__is_a(char symbol_type, enum map_type map_type); 210bool symbol_type__is_a(char symbol_type, enum map_type map_type);
171 211
212size_t vmlinux_path__fprintf(FILE *fp);
213
172#endif /* __PERF_SYMBOL */ 214#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index fa968312ee7d..1f7ecd47f499 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,13 +7,35 @@
7#include "util.h" 7#include "util.h"
8#include "debug.h" 8#include "debug.h"
9 9
10void map_groups__init(struct map_groups *self) 10int find_all_tid(int pid, pid_t ** all_tid)
11{ 11{
12 char name[256];
13 int items;
14 struct dirent **namelist = NULL;
15 int ret = 0;
12 int i; 16 int i;
13 for (i = 0; i < MAP__NR_TYPES; ++i) { 17
14 self->maps[i] = RB_ROOT; 18 sprintf(name, "/proc/%d/task", pid);
15 INIT_LIST_HEAD(&self->removed_maps[i]); 19 items = scandir(name, &namelist, NULL, NULL);
20 if (items <= 0)
21 return -ENOENT;
22 *all_tid = malloc(sizeof(pid_t) * items);
23 if (!*all_tid) {
24 ret = -ENOMEM;
25 goto failure;
16 } 26 }
27
28 for (i = 0; i < items; i++)
29 (*all_tid)[i] = atoi(namelist[i]->d_name);
30
31 ret = items;
32
33failure:
34 for (i=0; i<items; i++)
35 free(namelist[i]);
36 free(namelist);
37
38 return ret;
17} 39}
18 40
19static struct thread *thread__new(pid_t pid) 41static struct thread *thread__new(pid_t pid)
@@ -31,28 +53,6 @@ static struct thread *thread__new(pid_t pid)
31 return self; 53 return self;
32} 54}
33 55
34static void map_groups__flush(struct map_groups *self)
35{
36 int type;
37
38 for (type = 0; type < MAP__NR_TYPES; type++) {
39 struct rb_root *root = &self->maps[type];
40 struct rb_node *next = rb_first(root);
41
42 while (next) {
43 struct map *pos = rb_entry(next, struct map, rb_node);
44 next = rb_next(&pos->rb_node);
45 rb_erase(&pos->rb_node, root);
46 /*
47 * We may have references to this map, for
48 * instance in some hist_entry instances, so
49 * just move them to a separate list.
50 */
51 list_add_tail(&pos->node, &self->removed_maps[pos->type]);
52 }
53 }
54}
55
56int thread__set_comm(struct thread *self, const char *comm) 56int thread__set_comm(struct thread *self, const char *comm)
57{ 57{
58 int err; 58 int err;
@@ -79,69 +79,10 @@ int thread__comm_len(struct thread *self)
79 return self->comm_len; 79 return self->comm_len;
80} 80}
81 81
82size_t __map_groups__fprintf_maps(struct map_groups *self,
83 enum map_type type, FILE *fp)
84{
85 size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
86 struct rb_node *nd;
87
88 for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
89 struct map *pos = rb_entry(nd, struct map, rb_node);
90 printed += fprintf(fp, "Map:");
91 printed += map__fprintf(pos, fp);
92 if (verbose > 2) {
93 printed += dso__fprintf(pos->dso, type, fp);
94 printed += fprintf(fp, "--\n");
95 }
96 }
97
98 return printed;
99}
100
101size_t map_groups__fprintf_maps(struct map_groups *self, FILE *fp)
102{
103 size_t printed = 0, i;
104 for (i = 0; i < MAP__NR_TYPES; ++i)
105 printed += __map_groups__fprintf_maps(self, i, fp);
106 return printed;
107}
108
109static size_t __map_groups__fprintf_removed_maps(struct map_groups *self,
110 enum map_type type, FILE *fp)
111{
112 struct map *pos;
113 size_t printed = 0;
114
115 list_for_each_entry(pos, &self->removed_maps[type], node) {
116 printed += fprintf(fp, "Map:");
117 printed += map__fprintf(pos, fp);
118 if (verbose > 1) {
119 printed += dso__fprintf(pos->dso, type, fp);
120 printed += fprintf(fp, "--\n");
121 }
122 }
123 return printed;
124}
125
126static size_t map_groups__fprintf_removed_maps(struct map_groups *self, FILE *fp)
127{
128 size_t printed = 0, i;
129 for (i = 0; i < MAP__NR_TYPES; ++i)
130 printed += __map_groups__fprintf_removed_maps(self, i, fp);
131 return printed;
132}
133
134static size_t map_groups__fprintf(struct map_groups *self, FILE *fp)
135{
136 size_t printed = map_groups__fprintf_maps(self, fp);
137 printed += fprintf(fp, "Removed maps:\n");
138 return printed + map_groups__fprintf_removed_maps(self, fp);
139}
140
141static size_t thread__fprintf(struct thread *self, FILE *fp) 82static size_t thread__fprintf(struct thread *self, FILE *fp)
142{ 83{
143 return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) + 84 return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) +
144 map_groups__fprintf(&self->mg, fp); 85 map_groups__fprintf(&self->mg, verbose, fp);
145} 86}
146 87
147struct thread *perf_session__findnew(struct perf_session *self, pid_t pid) 88struct thread *perf_session__findnew(struct perf_session *self, pid_t pid)
@@ -183,127 +124,12 @@ struct thread *perf_session__findnew(struct perf_session *self, pid_t pid)
183 return th; 124 return th;
184} 125}
185 126
186static int map_groups__fixup_overlappings(struct map_groups *self,
187 struct map *map)
188{
189 struct rb_root *root = &self->maps[map->type];
190 struct rb_node *next = rb_first(root);
191
192 while (next) {
193 struct map *pos = rb_entry(next, struct map, rb_node);
194 next = rb_next(&pos->rb_node);
195
196 if (!map__overlap(pos, map))
197 continue;
198
199 if (verbose >= 2) {
200 fputs("overlapping maps:\n", stderr);
201 map__fprintf(map, stderr);
202 map__fprintf(pos, stderr);
203 }
204
205 rb_erase(&pos->rb_node, root);
206 /*
207 * We may have references to this map, for instance in some
208 * hist_entry instances, so just move them to a separate
209 * list.
210 */
211 list_add_tail(&pos->node, &self->removed_maps[map->type]);
212 /*
213 * Now check if we need to create new maps for areas not
214 * overlapped by the new map:
215 */
216 if (map->start > pos->start) {
217 struct map *before = map__clone(pos);
218
219 if (before == NULL)
220 return -ENOMEM;
221
222 before->end = map->start - 1;
223 map_groups__insert(self, before);
224 if (verbose >= 2)
225 map__fprintf(before, stderr);
226 }
227
228 if (map->end < pos->end) {
229 struct map *after = map__clone(pos);
230
231 if (after == NULL)
232 return -ENOMEM;
233
234 after->start = map->end + 1;
235 map_groups__insert(self, after);
236 if (verbose >= 2)
237 map__fprintf(after, stderr);
238 }
239 }
240
241 return 0;
242}
243
244void maps__insert(struct rb_root *maps, struct map *map)
245{
246 struct rb_node **p = &maps->rb_node;
247 struct rb_node *parent = NULL;
248 const u64 ip = map->start;
249 struct map *m;
250
251 while (*p != NULL) {
252 parent = *p;
253 m = rb_entry(parent, struct map, rb_node);
254 if (ip < m->start)
255 p = &(*p)->rb_left;
256 else
257 p = &(*p)->rb_right;
258 }
259
260 rb_link_node(&map->rb_node, parent, p);
261 rb_insert_color(&map->rb_node, maps);
262}
263
264struct map *maps__find(struct rb_root *maps, u64 ip)
265{
266 struct rb_node **p = &maps->rb_node;
267 struct rb_node *parent = NULL;
268 struct map *m;
269
270 while (*p != NULL) {
271 parent = *p;
272 m = rb_entry(parent, struct map, rb_node);
273 if (ip < m->start)
274 p = &(*p)->rb_left;
275 else if (ip > m->end)
276 p = &(*p)->rb_right;
277 else
278 return m;
279 }
280
281 return NULL;
282}
283
284void thread__insert_map(struct thread *self, struct map *map) 127void thread__insert_map(struct thread *self, struct map *map)
285{ 128{
286 map_groups__fixup_overlappings(&self->mg, map); 129 map_groups__fixup_overlappings(&self->mg, map, verbose, stderr);
287 map_groups__insert(&self->mg, map); 130 map_groups__insert(&self->mg, map);
288} 131}
289 132
290/*
291 * XXX This should not really _copy_ te maps, but refcount them.
292 */
293static int map_groups__clone(struct map_groups *self,
294 struct map_groups *parent, enum map_type type)
295{
296 struct rb_node *nd;
297 for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
298 struct map *map = rb_entry(nd, struct map, rb_node);
299 struct map *new = map__clone(map);
300 if (new == NULL)
301 return -ENOMEM;
302 map_groups__insert(self, new);
303 }
304 return 0;
305}
306
307int thread__fork(struct thread *self, struct thread *parent) 133int thread__fork(struct thread *self, struct thread *parent)
308{ 134{
309 int i; 135 int i;
@@ -336,15 +162,3 @@ size_t perf_session__fprintf(struct perf_session *self, FILE *fp)
336 162
337 return ret; 163 return ret;
338} 164}
339
340struct symbol *map_groups__find_symbol(struct map_groups *self,
341 enum map_type type, u64 addr,
342 symbol_filter_t filter)
343{
344 struct map *map = map_groups__find(self, type, addr);
345
346 if (map != NULL)
347 return map__find_symbol(map, map->map_ip(map, addr), filter);
348
349 return NULL;
350}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index dcf70303e58e..1dfd9ff8bdcd 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -5,14 +5,6 @@
5#include <unistd.h> 5#include <unistd.h>
6#include "symbol.h" 6#include "symbol.h"
7 7
8struct map_groups {
9 struct rb_root maps[MAP__NR_TYPES];
10 struct list_head removed_maps[MAP__NR_TYPES];
11};
12
13size_t __map_groups__fprintf_maps(struct map_groups *self,
14 enum map_type type, FILE *fp);
15
16struct thread { 8struct thread {
17 struct rb_node rb_node; 9 struct rb_node rb_node;
18 struct map_groups mg; 10 struct map_groups mg;
@@ -23,29 +15,16 @@ struct thread {
23 int comm_len; 15 int comm_len;
24}; 16};
25 17
26void map_groups__init(struct map_groups *self); 18struct perf_session;
19
20int find_all_tid(int pid, pid_t ** all_tid);
27int thread__set_comm(struct thread *self, const char *comm); 21int thread__set_comm(struct thread *self, const char *comm);
28int thread__comm_len(struct thread *self); 22int thread__comm_len(struct thread *self);
29struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); 23struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
30void thread__insert_map(struct thread *self, struct map *map); 24void thread__insert_map(struct thread *self, struct map *map);
31int thread__fork(struct thread *self, struct thread *parent); 25int thread__fork(struct thread *self, struct thread *parent);
32size_t map_groups__fprintf_maps(struct map_groups *self, FILE *fp);
33size_t perf_session__fprintf(struct perf_session *self, FILE *fp); 26size_t perf_session__fprintf(struct perf_session *self, FILE *fp);
34 27
35void maps__insert(struct rb_root *maps, struct map *map);
36struct map *maps__find(struct rb_root *maps, u64 addr);
37
38static inline void map_groups__insert(struct map_groups *self, struct map *map)
39{
40 maps__insert(&self->maps[map->type], map);
41}
42
43static inline struct map *map_groups__find(struct map_groups *self,
44 enum map_type type, u64 addr)
45{
46 return maps__find(&self->maps[type], addr);
47}
48
49static inline struct map *thread__find_map(struct thread *self, 28static inline struct map *thread__find_map(struct thread *self,
50 enum map_type type, u64 addr) 29 enum map_type type, u64 addr)
51{ 30{
@@ -54,34 +33,12 @@ static inline struct map *thread__find_map(struct thread *self,
54 33
55void thread__find_addr_map(struct thread *self, 34void thread__find_addr_map(struct thread *self,
56 struct perf_session *session, u8 cpumode, 35 struct perf_session *session, u8 cpumode,
57 enum map_type type, u64 addr, 36 enum map_type type, pid_t pid, u64 addr,
58 struct addr_location *al); 37 struct addr_location *al);
59 38
60void thread__find_addr_location(struct thread *self, 39void thread__find_addr_location(struct thread *self,
61 struct perf_session *session, u8 cpumode, 40 struct perf_session *session, u8 cpumode,
62 enum map_type type, u64 addr, 41 enum map_type type, pid_t pid, u64 addr,
63 struct addr_location *al, 42 struct addr_location *al,
64 symbol_filter_t filter); 43 symbol_filter_t filter);
65struct symbol *map_groups__find_symbol(struct map_groups *self,
66 enum map_type type, u64 addr,
67 symbol_filter_t filter);
68
69static inline struct symbol *map_groups__find_function(struct map_groups *self,
70 u64 addr,
71 symbol_filter_t filter)
72{
73 return map_groups__find_symbol(self, MAP__FUNCTION, addr, filter);
74}
75
76struct map *map_groups__find_by_name(struct map_groups *self,
77 enum map_type type, const char *name);
78
79int __map_groups__create_kernel_maps(struct map_groups *self,
80 struct map *vmlinux_maps[MAP__NR_TYPES],
81 struct dso *kernel);
82int map_groups__create_kernel_maps(struct map_groups *self,
83 struct map *vmlinux_maps[MAP__NR_TYPES]);
84
85struct map *map_groups__new_module(struct map_groups *self, u64 start,
86 const char *filename);
87#endif /* __PERF_THREAD_H */ 44#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 5ea8973ad331..30cd9b575953 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -154,10 +154,17 @@ static void put_tracing_file(char *file)
154 free(file); 154 free(file);
155} 155}
156 156
157static ssize_t calc_data_size;
158
157static ssize_t write_or_die(const void *buf, size_t len) 159static ssize_t write_or_die(const void *buf, size_t len)
158{ 160{
159 int ret; 161 int ret;
160 162
163 if (calc_data_size) {
164 calc_data_size += len;
165 return len;
166 }
167
161 ret = write(output_fd, buf, len); 168 ret = write(output_fd, buf, len);
162 if (ret < 0) 169 if (ret < 0)
163 die("writing to '%s'", output_file); 170 die("writing to '%s'", output_file);
@@ -526,3 +533,20 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
526 533
527 return 0; 534 return 0;
528} 535}
536
537ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
538 int nb_events)
539{
540 ssize_t size;
541 int err = 0;
542
543 calc_data_size = 1;
544 err = read_tracing_data(fd, pattrs, nb_events);
545 size = calc_data_size - 1;
546 calc_data_size = 0;
547
548 if (err < 0)
549 return err;
550
551 return size;
552}
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 9b3c20f42f98..d6ef414075a6 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -37,10 +37,12 @@ int header_page_ts_offset;
37int header_page_ts_size; 37int header_page_ts_size;
38int header_page_size_offset; 38int header_page_size_offset;
39int header_page_size_size; 39int header_page_size_size;
40int header_page_overwrite_offset;
41int header_page_overwrite_size;
40int header_page_data_offset; 42int header_page_data_offset;
41int header_page_data_size; 43int header_page_data_size;
42 44
43int latency_format; 45bool latency_format;
44 46
45static char *input_buf; 47static char *input_buf;
46static unsigned long long input_buf_ptr; 48static unsigned long long input_buf_ptr;
@@ -628,23 +630,32 @@ static int test_type(enum event_type type, enum event_type expect)
628 return 0; 630 return 0;
629} 631}
630 632
631static int test_type_token(enum event_type type, char *token, 633static int __test_type_token(enum event_type type, char *token,
632 enum event_type expect, const char *expect_tok) 634 enum event_type expect, const char *expect_tok,
635 bool warn)
633{ 636{
634 if (type != expect) { 637 if (type != expect) {
635 warning("Error: expected type %d but read %d", 638 if (warn)
636 expect, type); 639 warning("Error: expected type %d but read %d",
640 expect, type);
637 return -1; 641 return -1;
638 } 642 }
639 643
640 if (strcmp(token, expect_tok) != 0) { 644 if (strcmp(token, expect_tok) != 0) {
641 warning("Error: expected '%s' but read '%s'", 645 if (warn)
642 expect_tok, token); 646 warning("Error: expected '%s' but read '%s'",
647 expect_tok, token);
643 return -1; 648 return -1;
644 } 649 }
645 return 0; 650 return 0;
646} 651}
647 652
653static int test_type_token(enum event_type type, char *token,
654 enum event_type expect, const char *expect_tok)
655{
656 return __test_type_token(type, token, expect, expect_tok, true);
657}
658
648static int __read_expect_type(enum event_type expect, char **tok, int newline_ok) 659static int __read_expect_type(enum event_type expect, char **tok, int newline_ok)
649{ 660{
650 enum event_type type; 661 enum event_type type;
@@ -661,7 +672,8 @@ static int read_expect_type(enum event_type expect, char **tok)
661 return __read_expect_type(expect, tok, 1); 672 return __read_expect_type(expect, tok, 1);
662} 673}
663 674
664static int __read_expected(enum event_type expect, const char *str, int newline_ok) 675static int __read_expected(enum event_type expect, const char *str,
676 int newline_ok, bool warn)
665{ 677{
666 enum event_type type; 678 enum event_type type;
667 char *token; 679 char *token;
@@ -672,21 +684,26 @@ static int __read_expected(enum event_type expect, const char *str, int newline_
672 else 684 else
673 type = read_token_item(&token); 685 type = read_token_item(&token);
674 686
675 ret = test_type_token(type, token, expect, str); 687 ret = __test_type_token(type, token, expect, str, warn);
676 688
677 free_token(token); 689 free_token(token);
678 690
679 return ret; 691 return ret;
680} 692}
681 693
694static int read_expected_warn(enum event_type expect, const char *str, bool warn)
695{
696 return __read_expected(expect, str, 1, warn);
697}
698
682static int read_expected(enum event_type expect, const char *str) 699static int read_expected(enum event_type expect, const char *str)
683{ 700{
684 return __read_expected(expect, str, 1); 701 return __read_expected(expect, str, 1, true);
685} 702}
686 703
687static int read_expected_item(enum event_type expect, const char *str) 704static int read_expected_item(enum event_type expect, const char *str)
688{ 705{
689 return __read_expected(expect, str, 0); 706 return __read_expected(expect, str, 0, true);
690} 707}
691 708
692static char *event_read_name(void) 709static char *event_read_name(void)
@@ -744,7 +761,7 @@ static int field_is_string(struct format_field *field)
744 761
745static int field_is_dynamic(struct format_field *field) 762static int field_is_dynamic(struct format_field *field)
746{ 763{
747 if (!strcmp(field->type, "__data_loc")) 764 if (!strncmp(field->type, "__data_loc", 10))
748 return 1; 765 return 1;
749 766
750 return 0; 767 return 0;
@@ -3088,7 +3105,7 @@ static void print_args(struct print_arg *args)
3088} 3105}
3089 3106
3090static void parse_header_field(const char *field, 3107static void parse_header_field(const char *field,
3091 int *offset, int *size) 3108 int *offset, int *size, bool warn)
3092{ 3109{
3093 char *token; 3110 char *token;
3094 int type; 3111 int type;
@@ -3103,7 +3120,7 @@ static void parse_header_field(const char *field,
3103 goto fail; 3120 goto fail;
3104 free_token(token); 3121 free_token(token);
3105 3122
3106 if (read_expected(EVENT_ITEM, field) < 0) 3123 if (read_expected_warn(EVENT_ITEM, field, warn) < 0)
3107 return; 3124 return;
3108 if (read_expected(EVENT_OP, ";") < 0) 3125 if (read_expected(EVENT_OP, ";") < 0)
3109 return; 3126 return;
@@ -3160,11 +3177,13 @@ int parse_header_page(char *buf, unsigned long size)
3160 init_input_buf(buf, size); 3177 init_input_buf(buf, size);
3161 3178
3162 parse_header_field("timestamp", &header_page_ts_offset, 3179 parse_header_field("timestamp", &header_page_ts_offset,
3163 &header_page_ts_size); 3180 &header_page_ts_size, true);
3164 parse_header_field("commit", &header_page_size_offset, 3181 parse_header_field("commit", &header_page_size_offset,
3165 &header_page_size_size); 3182 &header_page_size_size, true);
3183 parse_header_field("overwrite", &header_page_overwrite_offset,
3184 &header_page_overwrite_size, false);
3166 parse_header_field("data", &header_page_data_offset, 3185 parse_header_field("data", &header_page_data_offset,
3167 &header_page_data_size); 3186 &header_page_data_size, true);
3168 3187
3169 return 0; 3188 return 0;
3170} 3189}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 7cd1193918c7..44889c9b5630 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -50,14 +50,37 @@ static int long_size;
50 50
51static unsigned long page_size; 51static unsigned long page_size;
52 52
53static ssize_t calc_data_size;
54
55static int do_read(int fd, void *buf, int size)
56{
57 int rsize = size;
58
59 while (size) {
60 int ret = read(fd, buf, size);
61
62 if (ret <= 0)
63 return -1;
64
65 size -= ret;
66 buf += ret;
67 }
68
69 return rsize;
70}
71
53static int read_or_die(void *data, int size) 72static int read_or_die(void *data, int size)
54{ 73{
55 int r; 74 int r;
56 75
57 r = read(input_fd, data, size); 76 r = do_read(input_fd, data, size);
58 if (r != size) 77 if (r <= 0)
59 die("reading input file (size expected=%d received=%d)", 78 die("reading input file (size expected=%d received=%d)",
60 size, r); 79 size, r);
80
81 if (calc_data_size)
82 calc_data_size += r;
83
61 return r; 84 return r;
62} 85}
63 86
@@ -82,56 +105,28 @@ static char *read_string(void)
82 char buf[BUFSIZ]; 105 char buf[BUFSIZ];
83 char *str = NULL; 106 char *str = NULL;
84 int size = 0; 107 int size = 0;
85 int i;
86 off_t r; 108 off_t r;
109 char c;
87 110
88 for (;;) { 111 for (;;) {
89 r = read(input_fd, buf, BUFSIZ); 112 r = read(input_fd, &c, 1);
90 if (r < 0) 113 if (r < 0)
91 die("reading input file"); 114 die("reading input file");
92 115
93 if (!r) 116 if (!r)
94 die("no data"); 117 die("no data");
95 118
96 for (i = 0; i < r; i++) { 119 buf[size++] = c;
97 if (!buf[i])
98 break;
99 }
100 if (i < r)
101 break;
102 120
103 if (str) { 121 if (!c)
104 size += BUFSIZ; 122 break;
105 str = realloc(str, size);
106 if (!str)
107 die("malloc of size %d", size);
108 memcpy(str + (size - BUFSIZ), buf, BUFSIZ);
109 } else {
110 size = BUFSIZ;
111 str = malloc_or_die(size);
112 memcpy(str, buf, size);
113 }
114 } 123 }
115 124
116 /* trailing \0: */ 125 if (calc_data_size)
117 i++; 126 calc_data_size += size;
118 127
119 /* move the file descriptor to the end of the string */ 128 str = malloc_or_die(size);
120 r = lseek(input_fd, -(r - i), SEEK_CUR); 129 memcpy(str, buf, size);
121 if (r == (off_t)-1)
122 die("lseek");
123
124 if (str) {
125 size += i;
126 str = realloc(str, size);
127 if (!str)
128 die("malloc of size %d", size);
129 memcpy(str + (size - i), buf, i);
130 } else {
131 size = i;
132 str = malloc_or_die(i);
133 memcpy(str, buf, i);
134 }
135 130
136 return str; 131 return str;
137} 132}
@@ -459,7 +454,7 @@ struct record *trace_read_data(int cpu)
459 return data; 454 return data;
460} 455}
461 456
462void trace_report(int fd) 457ssize_t trace_report(int fd)
463{ 458{
464 char buf[BUFSIZ]; 459 char buf[BUFSIZ];
465 char test[] = { 23, 8, 68 }; 460 char test[] = { 23, 8, 68 };
@@ -467,6 +462,9 @@ void trace_report(int fd)
467 int show_version = 0; 462 int show_version = 0;
468 int show_funcs = 0; 463 int show_funcs = 0;
469 int show_printk = 0; 464 int show_printk = 0;
465 ssize_t size;
466
467 calc_data_size = 1;
470 468
471 input_fd = fd; 469 input_fd = fd;
472 470
@@ -499,14 +497,17 @@ void trace_report(int fd)
499 read_proc_kallsyms(); 497 read_proc_kallsyms();
500 read_ftrace_printk(); 498 read_ftrace_printk();
501 499
500 size = calc_data_size - 1;
501 calc_data_size = 0;
502
502 if (show_funcs) { 503 if (show_funcs) {
503 print_funcs(); 504 print_funcs();
504 return; 505 return size;
505 } 506 }
506 if (show_printk) { 507 if (show_printk) {
507 print_printk(); 508 print_printk();
508 return; 509 return size;
509 } 510 }
510 511
511 return; 512 return size;
512} 513}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index c3269b937db4..1f45d468fd9a 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -1,6 +1,7 @@
1#ifndef __PERF_TRACE_EVENTS_H 1#ifndef __PERF_TRACE_EVENTS_H
2#define __PERF_TRACE_EVENTS_H 2#define __PERF_TRACE_EVENTS_H
3 3
4#include <stdbool.h>
4#include "parse-events.h" 5#include "parse-events.h"
5 6
6#define __unused __attribute__((unused)) 7#define __unused __attribute__((unused))
@@ -162,7 +163,7 @@ struct record *trace_read_data(int cpu);
162 163
163void parse_set_info(int nr_cpus, int long_sz); 164void parse_set_info(int nr_cpus, int long_sz);
164 165
165void trace_report(int fd); 166ssize_t trace_report(int fd);
166 167
167void *malloc_or_die(unsigned int size); 168void *malloc_or_die(unsigned int size);
168 169
@@ -241,7 +242,7 @@ extern int header_page_size_size;
241extern int header_page_data_offset; 242extern int header_page_data_offset;
242extern int header_page_data_size; 243extern int header_page_data_size;
243 244
244extern int latency_format; 245extern bool latency_format;
245 246
246int parse_header_page(char *buf, unsigned long size); 247int parse_header_page(char *buf, unsigned long size);
247int trace_parse_common_type(void *data); 248int trace_parse_common_type(void *data);
@@ -258,6 +259,8 @@ void *raw_field_ptr(struct event *event, const char *name, void *data);
258unsigned long long eval_flag(const char *flag); 259unsigned long long eval_flag(const char *flag);
259 260
260int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); 261int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
262ssize_t read_tracing_data_size(int fd, struct perf_event_attr *pattrs,
263 int nb_events);
261 264
262/* taken from kernel/trace/trace.h */ 265/* taken from kernel/trace/trace.h */
263enum trace_flag_type { 266enum trace_flag_type {
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 0f5b2a6f1080..fbf45d1b26f7 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -42,12 +42,14 @@
42#define _ALL_SOURCE 1 42#define _ALL_SOURCE 1
43#define _GNU_SOURCE 1 43#define _GNU_SOURCE 1
44#define _BSD_SOURCE 1 44#define _BSD_SOURCE 1
45#define HAS_BOOL
45 46
46#include <unistd.h> 47#include <unistd.h>
47#include <stdio.h> 48#include <stdio.h>
48#include <sys/stat.h> 49#include <sys/stat.h>
49#include <sys/statfs.h> 50#include <sys/statfs.h>
50#include <fcntl.h> 51#include <fcntl.h>
52#include <stdbool.h>
51#include <stddef.h> 53#include <stddef.h>
52#include <stdlib.h> 54#include <stdlib.h>
53#include <stdarg.h> 55#include <stdarg.h>
@@ -78,6 +80,7 @@
78#include <pwd.h> 80#include <pwd.h>
79#include <inttypes.h> 81#include <inttypes.h>
80#include "../../../include/linux/magic.h" 82#include "../../../include/linux/magic.h"
83#include "types.h"
81 84
82 85
83#ifndef NO_ICONV 86#ifndef NO_ICONV
@@ -295,6 +298,13 @@ extern void *xmemdupz(const void *data, size_t len);
295extern char *xstrndup(const char *str, size_t len); 298extern char *xstrndup(const char *str, size_t len);
296extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); 299extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
297 300
301static inline void *xzalloc(size_t size)
302{
303 void *buf = xmalloc(size);
304
305 return memset(buf, 0, size);
306}
307
298static inline void *zalloc(size_t size) 308static inline void *zalloc(size_t size)
299{ 309{
300 return calloc(1, size); 310 return calloc(1, size);
@@ -309,6 +319,7 @@ static inline int has_extension(const char *filename, const char *ext)
309{ 319{
310 size_t len = strlen(filename); 320 size_t len = strlen(filename);
311 size_t extlen = strlen(ext); 321 size_t extlen = strlen(ext);
322
312 return len > extlen && !memcmp(filename + len - extlen, ext, extlen); 323 return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
313} 324}
314 325
@@ -322,6 +333,7 @@ static inline int has_extension(const char *filename, const char *ext)
322#undef isalnum 333#undef isalnum
323#undef tolower 334#undef tolower
324#undef toupper 335#undef toupper
336
325extern unsigned char sane_ctype[256]; 337extern unsigned char sane_ctype[256];
326#define GIT_SPACE 0x01 338#define GIT_SPACE 0x01
327#define GIT_DIGIT 0x02 339#define GIT_DIGIT 0x02
@@ -406,4 +418,13 @@ void git_qsort(void *base, size_t nmemb, size_t size,
406int mkdir_p(char *path, mode_t mode); 418int mkdir_p(char *path, mode_t mode);
407int copyfile(const char *from, const char *to); 419int copyfile(const char *from, const char *to);
408 420
421s64 perf_atoll(const char *str);
422char **argv_split(const char *str, int *argcp);
423void argv_free(char **argv);
424bool strglobmatch(const char *str, const char *pat);
425bool strlazymatch(const char *str, const char *pat);
426
427#define _STR(x) #x
428#define STR(x) _STR(x)
429
409#endif 430#endif