aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/lockup-watchdogs.txt63
-rw-r--r--Documentation/nmi_watchdog.txt83
-rw-r--r--arch/Kconfig17
-rw-r--r--arch/arm/include/asm/perf_event.h4
-rw-r--r--arch/frv/include/asm/perf_event.h2
-rw-r--r--arch/hexagon/include/asm/perf_event.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/kernel/perf_event.c6
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/uprobes.h43
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c82
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/uprobes.c423
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--include/linux/jump_label.h32
-rw-r--r--include/linux/perf_event.h11
-rw-r--r--include/linux/uprobes.h76
-rw-r--r--include/trace/events/signal.h85
-rw-r--r--kernel/events/Makefile3
-rw-r--r--kernel/events/core.c53
-rw-r--r--kernel/events/hw_breakpoint.c7
-rw-r--r--kernel/events/uprobes.c1029
-rw-r--r--kernel/signal.c28
-rw-r--r--kernel/watchdog.c24
-rw-r--r--lib/Kconfig.debug18
-rw-r--r--mm/mmap.c23
-rw-r--r--tools/perf/Documentation/Makefile86
-rw-r--r--tools/perf/Documentation/perf-lock.txt20
-rw-r--r--tools/perf/Documentation/perf-record.txt8
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile12
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h8
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S6
-rw-r--r--tools/perf/bench/mem-memcpy.c12
-rw-r--r--tools/perf/bench/mem-memset-arch.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S13
-rw-r--r--tools/perf/bench/mem-memset.c297
-rw-r--r--tools/perf/builtin-bench.c3
-rw-r--r--tools/perf/builtin-lock.c4
-rw-r--r--tools/perf/builtin-probe.c12
-rw-r--r--tools/perf/builtin-record.c86
-rw-r--r--tools/perf/builtin-script.c80
-rw-r--r--tools/perf/builtin-stat.c41
-rw-r--r--tools/perf/builtin-test.c188
-rw-r--r--tools/perf/builtin-top.c64
-rw-r--r--tools/perf/perf.h9
-rwxr-xr-xtools/perf/python/twatch.py2
-rw-r--r--tools/perf/util/bitmap.c10
-rw-r--r--tools/perf/util/cpumap.c11
-rw-r--r--tools/perf/util/cpumap.h4
-rw-r--r--tools/perf/util/ctype.c2
-rw-r--r--tools/perf/util/debugfs.c141
-rw-r--r--tools/perf/util/debugfs.h6
-rw-r--r--tools/perf/util/evlist.c16
-rw-r--r--tools/perf/util/evlist.h9
-rw-r--r--tools/perf/util/evsel.c8
-rw-r--r--tools/perf/util/header.c421
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/asm/dwarf2.h4
-rw-r--r--tools/perf/util/include/linux/bitmap.h11
-rw-r--r--tools/perf/util/map.c15
-rw-r--r--tools/perf/util/map.h1
-rw-r--r--tools/perf/util/probe-event.c32
-rw-r--r--tools/perf/util/probe-finder.c1
-rw-r--r--tools/perf/util/python-ext-sources19
-rw-r--r--tools/perf/util/python.c10
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1
-rw-r--r--tools/perf/util/session.c49
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/symbol.c24
-rw-r--r--tools/perf/util/symbol.h4
-rw-r--r--tools/perf/util/sysfs.c60
-rw-r--r--tools/perf/util/sysfs.h6
-rw-r--r--tools/perf/util/thread_map.c237
-rw-r--r--tools/perf/util/thread_map.h11
-rw-r--r--tools/perf/util/top.c13
-rw-r--r--tools/perf/util/top.h7
-rw-r--r--tools/perf/util/trace-event-parse.c1
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/trace-event-scripting.c1
-rw-r--r--tools/perf/util/ui/browsers/hists.c3
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/usage.c39
-rw-r--r--tools/perf/util/util.c4
-rw-r--r--tools/perf/util/util.h6
99 files changed, 3601 insertions, 804 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
new file mode 100644
index 00000000000..d2a36602ca8
--- /dev/null
+++ b/Documentation/lockup-watchdogs.txt
@@ -0,0 +1,63 @@
1===============================================================
2Softlockup detector and hardlockup detector (aka nmi_watchdog)
3===============================================================
4
5The Linux kernel can act as a watchdog to detect both soft and hard
6lockups.
7
8A 'softlockup' is defined as a bug that causes the kernel to loop in
9kernel mode for more than 20 seconds (see "Implementation" below for
10details), without giving other tasks a chance to run. The current
11stack trace is displayed upon detection and, by default, the system
12will stay locked up. Alternatively, the kernel can be configured to
13panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
14"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
15details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are
16provided for this.
17
18A 'hardlockup' is defined as a bug that causes the CPU to loop in
19kernel mode for more than 10 seconds (see "Implementation" below for
20details), without letting other interrupts have a chance to run.
21Similarly to the softlockup case, the current stack trace is displayed
22upon detection and the system will stay locked up unless the default
23behavior is changed, which can be done through a compile time knob,
24"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
25(see "Documentation/kernel-parameters.txt" for details).
26
27The panic option can be used in combination with panic_timeout (this
28timeout is set through the confusingly named "kernel.panic" sysctl),
29to cause the system to reboot automatically after a specified amount
30of time.
31
32=== Implementation ===
33
34The soft and hard lockup detectors are built on top of the hrtimer and
35perf subsystems, respectively. A direct consequence of this is that,
36in principle, they should work in any architecture where these
37subsystems are present.
38
39A periodic hrtimer runs to generate interrupts and kick the watchdog
40task. An NMI perf event is generated every "watchdog_thresh"
41(compile-time initialized to 10 and configurable through sysctl of the
42same name) seconds to check for hardlockups. If any CPU in the system
43does not receive any hrtimer interrupt during that time the
44'hardlockup detector' (the handler for the NMI perf event) will
45generate a kernel warning or call panic, depending on the
46configuration.
47
48The watchdog task is a high priority kernel thread that updates a
49timestamp every time it is scheduled. If that timestamp is not updated
50for 2*watchdog_thresh seconds (the softlockup threshold) the
51'softlockup detector' (coded inside the hrtimer callback function)
52will dump useful debug information to the system log, after which it
53will call panic if it was instructed to do so or resume execution of
54other kernel code.
55
56The period of the hrtimer is 2*watchdog_thresh/5, which means it has
57two or three chances to generate an interrupt before the hardlockup
58detector kicks in.
59
60As explained above, a kernel knob is provided that allows
61administrators to configure the period of the hrtimer and the perf
62event. The right value for a particular environment is a trade-off
63between fast response to lockups and detection overhead.
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
deleted file mode 100644
index bf9f80a9828..00000000000
--- a/Documentation/nmi_watchdog.txt
+++ /dev/null
@@ -1,83 +0,0 @@
1
2[NMI watchdog is available for x86 and x86-64 architectures]
3
4Is your system locking up unpredictably? No keyboard activity, just
5a frustrating complete hard lockup? Do you want to help us debugging
6such lockups? If all yes then this document is definitely for you.
7
8On many x86/x86-64 type hardware there is a feature that enables
9us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt
10which get executed even if the system is otherwise locked up hard).
11This can be used to debug hard kernel lockups. By executing periodic
12NMI interrupts, the kernel can monitor whether any CPU has locked up,
13and print out debugging messages if so.
14
15In order to use the NMI watchdog, you need to have APIC support in your
16kernel. For SMP kernels, APIC support gets compiled in automatically. For
17UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local
18APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and
19features -> IO-APIC support on uniprocessors) in your kernel config.
20CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC.
21CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
22kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
23may implicitly disable the NMI watchdog.]
24
25For x86-64, the needed APIC is always compiled in.
26
27Using local APIC (nmi_watchdog=2) needs the first performance register, so
28you can't use it for other purposes (such as high precision performance
29profiling.) However, at least oprofile and the perfctr driver disable the
30local APIC NMI watchdog automatically.
31
32To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot
33parameter. Eg. the relevant lilo.conf entry:
34
35 append="nmi_watchdog=1"
36
37For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1.
38For UP machines without an IO-APIC use nmi_watchdog=2, this only works
39for some processor types. If in doubt, boot with nmi_watchdog=1 and
40check the NMI count in /proc/interrupts; if the count is zero then
41reboot with nmi_watchdog=2 and check the NMI count. If it is still
42zero then log a problem, you probably have a processor that needs to be
43added to the nmi code.
44
45A 'lockup' is the following scenario: if any CPU in the system does not
46execute the period local timer interrupt for more than 5 seconds, then
47the NMI handler generates an oops and kills the process. This
48'controlled crash' (and the resulting kernel messages) can be used to
49debug the lockup. Thus whenever the lockup happens, wait 5 seconds and
50the oops will show up automatically. If the kernel produces no messages
51then the system has crashed so hard (eg. hardware-wise) that either it
52cannot even accept NMI interrupts, or the crash has made the kernel
53unable to print messages.
54
55Be aware that when using local APIC, the frequency of NMI interrupts
56it generates, depends on the system load. The local APIC NMI watchdog,
57lacking a better source, uses the "cycles unhalted" event. As you may
58guess it doesn't tick when the CPU is in the halted state (which happens
59when the system is idle), but if your system locks up on anything but the
60"hlt" processor instruction, the watchdog will trigger very soon as the
61"cycles unhalted" event will happen every clock tick. If it locks up on
62"hlt", then you are out of luck -- the event will not happen at all and the
63watchdog won't trigger. This is a shortcoming of the local APIC watchdog
64-- unfortunately there is no "clock ticks" event that would work all the
65time. The I/O APIC watchdog is driven externally and has no such shortcoming.
66But its NMI frequency is much higher, resulting in a more significant hit
67to the overall system performance.
68
69On x86 nmi_watchdog is disabled by default so you have to enable it with
70a boot time parameter.
71
72It's possible to disable the NMI watchdog in run-time by writing "0" to
73/proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable
74the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter
75at boot time.
76
77NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
78on x86 SMP boxes.
79
80[ feel free to send bug reports, suggestions and patches to
81 Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
82 list at <linux-smp@vger.kernel.org> ]
83
diff --git a/arch/Kconfig b/arch/Kconfig
index 4f55c736be1..d0e37c9d5f6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -65,6 +65,23 @@ config OPTPROBES
65 depends on KPROBES && HAVE_OPTPROBES 65 depends on KPROBES && HAVE_OPTPROBES
66 depends on !PREEMPT 66 depends on !PREEMPT
67 67
68config UPROBES
69 bool "Transparent user-space probes (EXPERIMENTAL)"
70 depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS
71 default n
72 help
73 Uprobes is the user-space counterpart to kprobes: they
74 enable instrumentation applications (such as 'perf probe')
75 to establish unintrusive probes in user-space binaries and
76 libraries, by executing handler functions when the probes
77 are hit by user-space applications.
78
79 ( These probes come in the form of single-byte breakpoints,
80 managed by the kernel and kept transparent to the probed
81 application. )
82
83 If in doubt, say "N".
84
68config HAVE_EFFICIENT_UNALIGNED_ACCESS 85config HAVE_EFFICIENT_UNALIGNED_ACCESS
69 bool 86 bool
70 help 87 help
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe360798..7523340afb8 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
12#ifndef __ARM_PERF_EVENT_H__ 12#ifndef __ARM_PERF_EVENT_H__
13#define __ARM_PERF_EVENT_H__ 13#define __ARM_PERF_EVENT_H__
14 14
15/* ARM performance counters start from 1 (in the cp15 accesses) so use the
16 * same indexes here for consistency. */
17#define PERF_EVENT_INDEX_OFFSET 1
18
19/* ARM perf PMU IDs for use by internal perf clients. */ 15/* ARM perf PMU IDs for use by internal perf clients. */
20enum arm_perf_pmu_ids { 16enum arm_perf_pmu_ids {
21 ARM_PERF_PMU_ID_XSCALE1 = 0, 17 ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e0155d14..c52ea5546b5 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
12#ifndef _ASM_PERF_EVENT_H 12#ifndef _ASM_PERF_EVENT_H
13#define _ASM_PERF_EVENT_H 13#define _ASM_PERF_EVENT_H
14 14
15#define PERF_EVENT_INDEX_OFFSET 0
16
17#endif /* _ASM_PERF_EVENT_H */ 15#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f9118..8b8526b491c 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
19#ifndef _ASM_PERF_EVENT_H 19#ifndef _ASM_PERF_EVENT_H
20#define _ASM_PERF_EVENT_H 20#define _ASM_PERF_EVENT_H
21 21
22#define PERF_EVENT_INDEX_OFFSET 0
23
24#endif /* _ASM_PERF_EVENT_H */ 22#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df1208d2..1a8093fa8f7 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@ struct pt_regs;
61extern unsigned long perf_misc_flags(struct pt_regs *regs); 61extern unsigned long perf_misc_flags(struct pt_regs *regs);
62extern unsigned long perf_instruction_pointer(struct pt_regs *regs); 62extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
63 63
64#define PERF_EVENT_INDEX_OFFSET 1
65
66/* 64/*
67 * Only override the default definitions in include/linux/perf_event.h 65 * Only override the default definitions in include/linux/perf_event.h
68 * if we have hardware PMU support. 66 * if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 64483fde95c..f04c2301725 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1193,6 +1193,11 @@ static int power_pmu_event_init(struct perf_event *event)
1193 return err; 1193 return err;
1194} 1194}
1195 1195
1196static int power_pmu_event_idx(struct perf_event *event)
1197{
1198 return event->hw.idx;
1199}
1200
1196struct pmu power_pmu = { 1201struct pmu power_pmu = {
1197 .pmu_enable = power_pmu_enable, 1202 .pmu_enable = power_pmu_enable,
1198 .pmu_disable = power_pmu_disable, 1203 .pmu_disable = power_pmu_disable,
@@ -1205,6 +1210,7 @@ struct pmu power_pmu = {
1205 .start_txn = power_pmu_start_txn, 1210 .start_txn = power_pmu_start_txn,
1206 .cancel_txn = power_pmu_cancel_txn, 1211 .cancel_txn = power_pmu_cancel_txn,
1207 .commit_txn = power_pmu_commit_txn, 1212 .commit_txn = power_pmu_commit_txn,
1213 .event_idx = power_pmu_event_idx,
1208}; 1214};
1209 1215
1210/* 1216/*
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168d271..4eb444edbe4 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
6 6
7/* Empty, just to avoid compiling error */ 7/* Empty, just to avoid compiling error */
8 8
9#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2b38b4bffd..d2a540f7d6c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -84,7 +84,7 @@ config X86
84 select GENERIC_IOMAP 84 select GENERIC_IOMAP
85 85
86config INSTRUCTION_DECODER 86config INSTRUCTION_DECODER
87 def_bool (KPROBES || PERF_EVENTS) 87 def_bool (KPROBES || PERF_EVENTS || UPROBES)
88 88
89config OUTPUT_FORMAT 89config OUTPUT_FORMAT
90 string 90 string
@@ -240,6 +240,9 @@ config ARCH_CPU_PROBE_RELEASE
240 def_bool y 240 def_bool y
241 depends on HOTPLUG_CPU 241 depends on HOTPLUG_CPU
242 242
243config ARCH_SUPPORTS_UPROBES
244 def_bool y
245
243source "init/Kconfig" 246source "init/Kconfig"
244source "kernel/Kconfig.freezer" 247source "kernel/Kconfig.freezer"
245 248
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063e3e3..74a2e312e8a 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
97 97
98/* Attribute search APIs */ 98/* Attribute search APIs */
99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); 99extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
100extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
100extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, 101extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
101 insn_byte_t last_pfx, 102 int lpfx_id,
102 insn_attr_t esc_attr); 103 insn_attr_t esc_attr);
103extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, 104extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
104 insn_byte_t last_pfx, 105 int lpfx_id,
105 insn_attr_t esc_attr); 106 insn_attr_t esc_attr);
106extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, 107extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
107 insn_byte_t vex_m, 108 insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1eddf..48eb30a8606 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ 96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ 97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
98 98
99/* The last prefix is needed for two-byte and three-byte opcodes */
100static inline insn_byte_t insn_last_prefix(struct insn *insn)
101{
102 return insn->prefixes.bytes[3];
103}
104
105extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); 99extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
106extern void insn_get_prefixes(struct insn *insn); 100extern void insn_get_prefixes(struct insn *insn);
107extern void insn_get_opcode(struct insn *insn); 101extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
160 return X86_VEX_P(insn->vex_prefix.bytes[2]); 154 return X86_VEX_P(insn->vex_prefix.bytes[2]);
161} 155}
162 156
157/* Get the last prefix id from last prefix or VEX prefix */
158static inline int insn_last_prefix_id(struct insn *insn)
159{
160 if (insn_is_avx(insn))
161 return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
162
163 if (insn->prefixes.bytes[3])
164 return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
165
166 return 0;
167}
168
163/* Offset of each field from kaddr */ 169/* Offset of each field from kaddr */
164static inline int insn_offset_rex_prefix(struct insn *insn) 170static inline int insn_offset_rex_prefix(struct insn *insn)
165{ 171{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 461ce432b1c..e8fb2c7a5f4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
188#ifdef CONFIG_PERF_EVENTS 188#ifdef CONFIG_PERF_EVENTS
189extern void perf_events_lapic_init(void); 189extern void perf_events_lapic_init(void);
190 190
191#define PERF_EVENT_INDEX_OFFSET 0
192
193/* 191/*
194 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. 192 * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
195 * This flag is otherwise unused and ABI specified to be 0, so nobody should 193 * This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
new file mode 100644
index 00000000000..0500391f57d
--- /dev/null
+++ b/arch/x86/include/asm/uprobes.h
@@ -0,0 +1,43 @@
1#ifndef _ASM_UPROBES_H
2#define _ASM_UPROBES_H
3/*
4 * User-space Probes (UProbes) for x86
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2008-2011
21 * Authors:
22 * Srikar Dronamraju
23 * Jim Keniston
24 */
25
26typedef u8 uprobe_opcode_t;
27
28#define MAX_UINSN_BYTES 16
29#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
30
31#define UPROBE_SWBP_INSN 0xcc
32#define UPROBE_SWBP_INSN_SIZE 1
33
34struct arch_uprobe {
35 u16 fixups;
36 u8 insn[MAX_UINSN_BYTES];
37#ifdef CONFIG_X86_64
38 unsigned long rip_rela_target_address;
39#endif
40};
41
42extern int arch_uprobes_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
43#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369059c07a..8c8c365a3bc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
100 100
101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
102obj-$(CONFIG_OF) += devicetree.o 102obj-$(CONFIG_OF) += devicetree.o
103obj-$(CONFIG_UPROBES) += uprobes.o
103 104
104### 105###
105# 64 bit specific files 106# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4aae3..0a44b90602b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6 6
7#include <linux/io.h> 7#include <linux/io.h>
8#include <linux/sched.h>
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/cpu.h> 11#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
456 if (c->x86_power & (1 << 8)) { 457 if (c->x86_power & (1 << 8)) {
457 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 458 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
458 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 459 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
460 if (!check_tsc_unstable())
461 sched_clock_stable = 1;
459 } 462 }
460 463
461#ifdef CONFIG_X86_64 464#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 63c0e058a40..1c52bdbb9b8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,12 +24,14 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/device.h>
27 28
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
30#include <asm/nmi.h> 31#include <asm/nmi.h>
31#include <asm/smp.h> 32#include <asm/smp.h>
32#include <asm/alternative.h> 33#include <asm/alternative.h>
34#include <asm/timer.h>
33 35
34#include "perf_event.h" 36#include "perf_event.h"
35 37
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1209 break; 1211 break;
1210 1212
1211 case CPU_STARTING: 1213 case CPU_STARTING:
1214 if (x86_pmu.attr_rdpmc)
1215 set_in_cr4(X86_CR4_PCE);
1212 if (x86_pmu.cpu_starting) 1216 if (x86_pmu.cpu_starting)
1213 x86_pmu.cpu_starting(cpu); 1217 x86_pmu.cpu_starting(cpu);
1214 break; 1218 break;
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void)
1318 } 1322 }
1319 } 1323 }
1320 1324
1325 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1326
1321 pr_info("... version: %d\n", x86_pmu.version); 1327 pr_info("... version: %d\n", x86_pmu.version);
1322 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1328 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1323 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1329 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event)
1541 return err; 1547 return err;
1542} 1548}
1543 1549
1550static int x86_pmu_event_idx(struct perf_event *event)
1551{
1552 int idx = event->hw.idx;
1553
1554 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1555 idx -= X86_PMC_IDX_FIXED;
1556 idx |= 1 << 30;
1557 }
1558
1559 return idx + 1;
1560}
1561
1562static ssize_t get_attr_rdpmc(struct device *cdev,
1563 struct device_attribute *attr,
1564 char *buf)
1565{
1566 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
1567}
1568
1569static void change_rdpmc(void *info)
1570{
1571 bool enable = !!(unsigned long)info;
1572
1573 if (enable)
1574 set_in_cr4(X86_CR4_PCE);
1575 else
1576 clear_in_cr4(X86_CR4_PCE);
1577}
1578
1579static ssize_t set_attr_rdpmc(struct device *cdev,
1580 struct device_attribute *attr,
1581 const char *buf, size_t count)
1582{
1583 unsigned long val = simple_strtoul(buf, NULL, 0);
1584
1585 if (!!val != !!x86_pmu.attr_rdpmc) {
1586 x86_pmu.attr_rdpmc = !!val;
1587 smp_call_function(change_rdpmc, (void *)val, 1);
1588 }
1589
1590 return count;
1591}
1592
1593static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
1594
1595static struct attribute *x86_pmu_attrs[] = {
1596 &dev_attr_rdpmc.attr,
1597 NULL,
1598};
1599
1600static struct attribute_group x86_pmu_attr_group = {
1601 .attrs = x86_pmu_attrs,
1602};
1603
1604static const struct attribute_group *x86_pmu_attr_groups[] = {
1605 &x86_pmu_attr_group,
1606 NULL,
1607};
1608
1544static struct pmu pmu = { 1609static struct pmu pmu = {
1545 .pmu_enable = x86_pmu_enable, 1610 .pmu_enable = x86_pmu_enable,
1546 .pmu_disable = x86_pmu_disable, 1611 .pmu_disable = x86_pmu_disable,
1547 1612
1613 .attr_groups = x86_pmu_attr_groups,
1614
1548 .event_init = x86_pmu_event_init, 1615 .event_init = x86_pmu_event_init,
1549 1616
1550 .add = x86_pmu_add, 1617 .add = x86_pmu_add,
@@ -1556,8 +1623,23 @@ static struct pmu pmu = {
1556 .start_txn = x86_pmu_start_txn, 1623 .start_txn = x86_pmu_start_txn,
1557 .cancel_txn = x86_pmu_cancel_txn, 1624 .cancel_txn = x86_pmu_cancel_txn,
1558 .commit_txn = x86_pmu_commit_txn, 1625 .commit_txn = x86_pmu_commit_txn,
1626
1627 .event_idx = x86_pmu_event_idx,
1559}; 1628};
1560 1629
1630void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
1631{
1632 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1633 return;
1634
1635 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1636 return;
1637
1638 userpg->time_mult = this_cpu_read(cyc2ns);
1639 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1640 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1641}
1642
1561/* 1643/*
1562 * callchain support 1644 * callchain support
1563 */ 1645 */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c30c807ddc7..82db83b5c3b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -309,6 +309,14 @@ struct x86_pmu {
309 struct x86_pmu_quirk *quirks; 309 struct x86_pmu_quirk *quirks;
310 int perfctr_second_write; 310 int perfctr_second_write;
311 311
312 /*
313 * sysfs attrs
314 */
315 int attr_rdpmc;
316
317 /*
318 * CPU Hotplug hooks
319 */
312 int (*cpu_prepare)(int cpu); 320 int (*cpu_prepare)(int cpu);
313 void (*cpu_starting)(int cpu); 321 void (*cpu_starting)(int cpu);
314 void (*cpu_dying)(int cpu); 322 void (*cpu_dying)(int cpu);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
new file mode 100644
index 00000000000..851a11b0d38
--- /dev/null
+++ b/arch/x86/kernel/uprobes.c
@@ -0,0 +1,423 @@
1/*
2 * User-space Probes (UProbes) for x86
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
27
28#include <linux/kdebug.h>
29#include <asm/insn.h>
30
31/* Post-execution fixups. */
32
33/* No fixup needed */
34#define UPROBE_FIX_NONE 0x0
35/* Adjust IP back to vicinity of actual insn */
36#define UPROBE_FIX_IP 0x1
37/* Adjust the return address of a call insn */
38#define UPROBE_FIX_CALL 0x2
39
40#define UPROBE_FIX_RIP_AX 0x8000
41#define UPROBE_FIX_RIP_CX 0x4000
42
43/* Adaptations for mhiramat x86 decoder v14. */
44#define OPCODE1(insn) ((insn)->opcode.bytes[0])
45#define OPCODE2(insn) ((insn)->opcode.bytes[1])
46#define OPCODE3(insn) ((insn)->opcode.bytes[2])
47#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
48
49#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
50 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
51 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
52 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
53 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
54 << (row % 32))
55
56/*
57 * Good-instruction tables for 32-bit apps. This is non-const and volatile
58 * to keep gcc from statically optimizing it out, as variable_test_bit makes
59 * some versions of gcc to think only *(unsigned long*) is used.
60 */
61static volatile u32 good_insns_32[256 / 32] = {
62 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
63 /* ---------------------------------------------- */
64 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
65 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
66 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
67 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
68 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
69 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
70 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
71 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
72 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
73 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
74 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
75 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
76 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
77 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
78 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
79 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
80 /* ---------------------------------------------- */
81 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
82};
83
84/* Using this for both 64-bit and 32-bit apps */
85static volatile u32 good_2byte_insns[256 / 32] = {
86 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
87 /* ---------------------------------------------- */
88 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
89 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
90 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
91 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
92 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
93 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
94 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
95 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
96 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
97 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
98 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
99 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
100 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
101 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
102 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
103 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
104 /* ---------------------------------------------- */
105 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
106};
107
108#ifdef CONFIG_X86_64
109/* Good-instruction tables for 64-bit apps */
110static volatile u32 good_insns_64[256 / 32] = {
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112 /* ---------------------------------------------- */
113 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
114 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
115 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
116 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
117 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
118 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
119 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
120 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
121 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
122 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
123 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
124 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
125 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
126 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
127 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
128 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
129 /* ---------------------------------------------- */
130 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
131};
132#endif
133#undef W
134
135/*
136 * opcodes we'll probably never support:
137 *
138 * 6c-6d, e4-e5, ec-ed - in
139 * 6e-6f, e6-e7, ee-ef - out
140 * cc, cd - int3, int
141 * cf - iret
142 * d6 - illegal instruction
143 * f1 - int1/icebp
144 * f4 - hlt
145 * fa, fb - cli, sti
146 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
147 *
148 * invalid opcodes in 64-bit mode:
149 *
150 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
151 * 63 - we support this opcode in x86_64 but not in i386.
152 *
153 * opcodes we may need to refine support for:
154 *
155 * 0f - 2-byte instructions: For many of these instructions, the validity
156 * depends on the prefix and/or the reg field. On such instructions, we
157 * just consider the opcode combination valid if it corresponds to any
158 * valid instruction.
159 *
160 * 8f - Group 1 - only reg = 0 is OK
161 * c6-c7 - Group 11 - only reg = 0 is OK
162 * d9-df - fpu insns with some illegal encodings
163 * f2, f3 - repnz, repz prefixes. These are also the first byte for
164 * certain floating-point instructions, such as addsd.
165 *
166 * fe - Group 4 - only reg = 0 or 1 is OK
167 * ff - Group 5 - only reg = 0-6 is OK
168 *
169 * others -- Do we need to support these?
170 *
171 * 0f - (floating-point?) prefetch instructions
172 * 07, 17, 1f - pop es, pop ss, pop ds
173 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
174 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
175 * 67 - addr16 prefix
176 * ce - into
177 * f0 - lock prefix
178 */
179
180/*
181 * TODO:
182 * - Where necessary, examine the modrm byte and allow only valid instructions
183 * in the different Groups and fpu instructions.
184 */
185
186static bool is_prefix_bad(struct insn *insn)
187{
188 int i;
189
190 for (i = 0; i < insn->prefixes.nbytes; i++) {
191 switch (insn->prefixes.bytes[i]) {
192 case 0x26: /* INAT_PFX_ES */
193 case 0x2E: /* INAT_PFX_CS */
194 case 0x36: /* INAT_PFX_DS */
195 case 0x3E: /* INAT_PFX_SS */
196 case 0xF0: /* INAT_PFX_LOCK */
197 return true;
198 }
199 }
200 return false;
201}
202
203static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
204{
205 insn_init(insn, auprobe->insn, false);
206
207 /* Skip good instruction prefixes; reject "bad" ones. */
208 insn_get_opcode(insn);
209 if (is_prefix_bad(insn))
210 return -ENOTSUPP;
211
212 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
213 return 0;
214
215 if (insn->opcode.nbytes == 2) {
216 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
217 return 0;
218 }
219
220 return -ENOTSUPP;
221}
222
223/*
224 * Figure out which fixups post_xol() will need to perform, and annotate
225 * arch_uprobe->fixups accordingly. To start with,
226 * arch_uprobe->fixups is either zero or it reflects rip-related
227 * fixups.
228 */
229static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
230{
231 bool fix_ip = true, fix_call = false; /* defaults */
232 int reg;
233
234 insn_get_opcode(insn); /* should be a nop */
235
236 switch (OPCODE1(insn)) {
237 case 0xc3: /* ret/lret */
238 case 0xcb:
239 case 0xc2:
240 case 0xca:
241 /* ip is correct */
242 fix_ip = false;
243 break;
244 case 0xe8: /* call relative - Fix return addr */
245 fix_call = true;
246 break;
247 case 0x9a: /* call absolute - Fix return addr, not ip */
248 fix_call = true;
249 fix_ip = false;
250 break;
251 case 0xff:
252 insn_get_modrm(insn);
253 reg = MODRM_REG(insn);
254 if (reg == 2 || reg == 3) {
255 /* call or lcall, indirect */
256 /* Fix return addr; ip is correct. */
257 fix_call = true;
258 fix_ip = false;
259 } else if (reg == 4 || reg == 5) {
260 /* jmp or ljmp, indirect */
261 /* ip is correct. */
262 fix_ip = false;
263 }
264 break;
265 case 0xea: /* jmp absolute -- ip is correct */
266 fix_ip = false;
267 break;
268 default:
269 break;
270 }
271 if (fix_ip)
272 auprobe->fixups |= UPROBE_FIX_IP;
273 if (fix_call)
274 auprobe->fixups |= UPROBE_FIX_CALL;
275}
276
277#ifdef CONFIG_X86_64
278/*
279 * If arch_uprobe->insn doesn't use rip-relative addressing, return
280 * immediately. Otherwise, rewrite the instruction so that it accesses
281 * its memory operand indirectly through a scratch register. Set
282 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
283 * accordingly. (The contents of the scratch register will be saved
284 * before we single-step the modified instruction, and restored
285 * afterward.)
286 *
287 * We do this because a rip-relative instruction can access only a
288 * relatively small area (+/- 2 GB from the instruction), and the XOL
289 * area typically lies beyond that area. At least for instructions
290 * that store to memory, we can't execute the original instruction
291 * and "fix things up" later, because the misdirected store could be
292 * disastrous.
293 *
294 * Some useful facts about rip-relative instructions:
295 *
296 * - There's always a modrm byte.
297 * - There's never a SIB byte.
298 * - The displacement is always 4 bytes.
299 */
300static void
301handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
302{
303 u8 *cursor;
304 u8 reg;
305
306 if (mm->context.ia32_compat)
307 return;
308
309 auprobe->rip_rela_target_address = 0x0;
310 if (!insn_rip_relative(insn))
311 return;
312
313 /*
314 * insn_rip_relative() would have decoded rex_prefix, modrm.
315 * Clear REX.b bit (extension of MODRM.rm field):
316 * we want to encode rax/rcx, not r8/r9.
317 */
318 if (insn->rex_prefix.nbytes) {
319 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
320 *cursor &= 0xfe; /* Clearing REX.B bit */
321 }
322
323 /*
324 * Point cursor at the modrm byte. The next 4 bytes are the
325 * displacement. Beyond the displacement, for some instructions,
326 * is the immediate operand.
327 */
328 cursor = auprobe->insn + insn_offset_modrm(insn);
329 insn_get_length(insn);
330
331 /*
332 * Convert from rip-relative addressing to indirect addressing
333 * via a scratch register. Change the r/m field from 0x5 (%rip)
334 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
335 */
336 reg = MODRM_REG(insn);
337 if (reg == 0) {
338 /*
339 * The register operand (if any) is either the A register
340 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
341 * REX prefix) %r8. In any case, we know the C register
342 * is NOT the register operand, so we use %rcx (register
343 * #1) for the scratch register.
344 */
345 auprobe->fixups = UPROBE_FIX_RIP_CX;
346 /* Change modrm from 00 000 101 to 00 000 001. */
347 *cursor = 0x1;
348 } else {
349 /* Use %rax (register #0) for the scratch register. */
350 auprobe->fixups = UPROBE_FIX_RIP_AX;
351 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
352 *cursor = (reg << 3);
353 }
354
355 /* Target address = address of next instruction + (signed) offset */
356 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
357
358 /* Displacement field is gone; slide immediate field (if any) over. */
359 if (insn->immediate.nbytes) {
360 cursor++;
361 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
362 }
363 return;
364}
365
366static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
367{
368 insn_init(insn, auprobe->insn, true);
369
370 /* Skip good instruction prefixes; reject "bad" ones. */
371 insn_get_opcode(insn);
372 if (is_prefix_bad(insn))
373 return -ENOTSUPP;
374
375 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
376 return 0;
377
378 if (insn->opcode.nbytes == 2) {
379 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
380 return 0;
381 }
382 return -ENOTSUPP;
383}
384
385static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
386{
387 if (mm->context.ia32_compat)
388 return validate_insn_32bits(auprobe, insn);
389 return validate_insn_64bits(auprobe, insn);
390}
391#else /* 32-bit: */
392static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
393{
394 /* No RIP-relative addressing on 32-bit */
395}
396
397static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
398{
399 return validate_insn_32bits(auprobe, insn);
400}
401#endif /* CONFIG_X86_64 */
402
403/**
404 * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
405 * @mm: the probed address space.
406 * @arch_uprobe: the probepoint information.
407 * Return 0 on success or a -ve number on error.
408 */
409int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
410{
411 int ret;
412 struct insn insn;
413
414 auprobe->fixups = 0;
415 ret = validate_insn_bits(auprobe, mm, &insn);
416 if (ret != 0)
417 return ret;
418
419 handle_riprel_insn(auprobe, mm, &insn);
420 prepare_fixups(auprobe, &insn);
421
422 return 0;
423}
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fbda6e..c1f01a8e9f6 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
29 return inat_primary_table[opcode]; 29 return inat_primary_table[opcode];
30} 30}
31 31
32insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, 32int inat_get_last_prefix_id(insn_byte_t last_pfx)
33{
34 insn_attr_t lpfx_attr;
35
36 lpfx_attr = inat_get_opcode_attribute(last_pfx);
37 return inat_last_prefix_id(lpfx_attr);
38}
39
40insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
33 insn_attr_t esc_attr) 41 insn_attr_t esc_attr)
34{ 42{
35 const insn_attr_t *table; 43 const insn_attr_t *table;
36 insn_attr_t lpfx_attr; 44 int n;
37 int n, m = 0;
38 45
39 n = inat_escape_id(esc_attr); 46 n = inat_escape_id(esc_attr);
40 if (last_pfx) { 47
41 lpfx_attr = inat_get_opcode_attribute(last_pfx);
42 m = inat_last_prefix_id(lpfx_attr);
43 }
44 table = inat_escape_tables[n][0]; 48 table = inat_escape_tables[n][0];
45 if (!table) 49 if (!table)
46 return 0; 50 return 0;
47 if (inat_has_variant(table[opcode]) && m) { 51 if (inat_has_variant(table[opcode]) && lpfx_id) {
48 table = inat_escape_tables[n][m]; 52 table = inat_escape_tables[n][lpfx_id];
49 if (!table) 53 if (!table)
50 return 0; 54 return 0;
51 } 55 }
52 return table[opcode]; 56 return table[opcode];
53} 57}
54 58
55insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, 59insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
56 insn_attr_t grp_attr) 60 insn_attr_t grp_attr)
57{ 61{
58 const insn_attr_t *table; 62 const insn_attr_t *table;
59 insn_attr_t lpfx_attr; 63 int n;
60 int n, m = 0;
61 64
62 n = inat_group_id(grp_attr); 65 n = inat_group_id(grp_attr);
63 if (last_pfx) { 66
64 lpfx_attr = inat_get_opcode_attribute(last_pfx);
65 m = inat_last_prefix_id(lpfx_attr);
66 }
67 table = inat_group_tables[n][0]; 67 table = inat_group_tables[n][0];
68 if (!table) 68 if (!table)
69 return inat_group_common_attribute(grp_attr); 69 return inat_group_common_attribute(grp_attr);
70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { 70 if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
71 table = inat_group_tables[n][m]; 71 table = inat_group_tables[n][lpfx_id];
72 if (!table) 72 if (!table)
73 return inat_group_common_attribute(grp_attr); 73 return inat_group_common_attribute(grp_attr);
74 } 74 }
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3e3fb..25feb1ae71c 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
185void insn_get_opcode(struct insn *insn) 185void insn_get_opcode(struct insn *insn)
186{ 186{
187 struct insn_field *opcode = &insn->opcode; 187 struct insn_field *opcode = &insn->opcode;
188 insn_byte_t op, pfx; 188 insn_byte_t op;
189 int pfx_id;
189 if (opcode->got) 190 if (opcode->got)
190 return; 191 return;
191 if (!insn->prefixes.got) 192 if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
212 /* Get escaped opcode */ 213 /* Get escaped opcode */
213 op = get_next(insn_byte_t, insn); 214 op = get_next(insn_byte_t, insn);
214 opcode->bytes[opcode->nbytes++] = op; 215 opcode->bytes[opcode->nbytes++] = op;
215 pfx = insn_last_prefix(insn); 216 pfx_id = insn_last_prefix_id(insn);
216 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); 217 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
217 } 218 }
218 if (inat_must_vex(insn->attr)) 219 if (inat_must_vex(insn->attr))
219 insn->attr = 0; /* This instruction is bad */ 220 insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
235void insn_get_modrm(struct insn *insn) 236void insn_get_modrm(struct insn *insn)
236{ 237{
237 struct insn_field *modrm = &insn->modrm; 238 struct insn_field *modrm = &insn->modrm;
238 insn_byte_t pfx, mod; 239 insn_byte_t pfx_id, mod;
239 if (modrm->got) 240 if (modrm->got)
240 return; 241 return;
241 if (!insn->opcode.got) 242 if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
246 modrm->value = mod; 247 modrm->value = mod;
247 modrm->nbytes = 1; 248 modrm->nbytes = 1;
248 if (inat_is_group(insn->attr)) { 249 if (inat_is_group(insn->attr)) {
249 pfx = insn_last_prefix(insn); 250 pfx_id = insn_last_prefix_id(insn);
250 insn->attr = inat_get_group_attribute(mod, pfx, 251 insn->attr = inat_get_group_attribute(mod, pfx_id,
251 insn->attr); 252 insn->attr);
252 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 253 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
253 insn->attr = 0; /* This is bad */ 254 insn->attr = 0; /* This is bad */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5ce8b140428..f7c69580fea 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,6 +1,38 @@
1#ifndef _LINUX_JUMP_LABEL_H 1#ifndef _LINUX_JUMP_LABEL_H
2#define _LINUX_JUMP_LABEL_H 2#define _LINUX_JUMP_LABEL_H
3 3
4/*
5 * Jump label support
6 *
7 * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
8 * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
9 *
10 * Jump labels provide an interface to generate dynamic branches using
11 * self-modifying code. Assuming toolchain and architecture support the result
12 * of a "if (static_branch(&key))" statement is a unconditional branch (which
13 * defaults to false - and the true block is placed out of line).
14 *
15 * However at runtime we can change the 'static' branch target using
16 * jump_label_{inc,dec}(). These function as a 'reference' count on the key
17 * object and for as long as there are references all branches referring to
18 * that particular key will point to the (out of line) true block.
19 *
20 * Since this relies on modifying code the jump_label_{inc,dec}() functions
21 * must be considered absolute slow paths (machine wide synchronization etc.).
22 * OTOH, since the affected branches are unconditional their runtime overhead
23 * will be absolutely minimal, esp. in the default (off) case where the total
24 * effect is a single NOP of appropriate size. The on case will patch in a jump
25 * to the out-of-line block.
26 *
27 * When the control is directly exposed to userspace it is prudent to delay the
28 * decrement to avoid high frequency code modifications which can (and do)
29 * cause significant performance degradation. Struct jump_label_key_deferred and
30 * jump_label_dec_deferred() provide for this.
31 *
32 * Lacking toolchain and or architecture support, it falls back to a simple
33 * conditional branch.
34 */
35
4#include <linux/types.h> 36#include <linux/types.h>
5#include <linux/compiler.h> 37#include <linux/compiler.h>
6#include <linux/workqueue.h> 38#include <linux/workqueue.h>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index abb2776be1b..412b790f5da 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
291 __s64 offset; /* add to hardware event value */ 291 __s64 offset; /* add to hardware event value */
292 __u64 time_enabled; /* time event active */ 292 __u64 time_enabled; /* time event active */
293 __u64 time_running; /* time event on cpu */ 293 __u64 time_running; /* time event on cpu */
294 __u32 time_mult, time_shift;
295 __u64 time_offset;
294 296
295 /* 297 /*
296 * Hole for extension of the self monitor capabilities 298 * Hole for extension of the self monitor capabilities
297 */ 299 */
298 300
299 __u64 __reserved[123]; /* align to 1k */ 301 __u64 __reserved[121]; /* align to 1k */
300 302
301 /* 303 /*
302 * Control data for the mmap() data buffer. 304 * Control data for the mmap() data buffer.
@@ -616,6 +618,7 @@ struct pmu {
616 struct list_head entry; 618 struct list_head entry;
617 619
618 struct device *dev; 620 struct device *dev;
621 const struct attribute_group **attr_groups;
619 char *name; 622 char *name;
620 int type; 623 int type;
621 624
@@ -681,6 +684,12 @@ struct pmu {
681 * for each successful ->add() during the transaction. 684 * for each successful ->add() during the transaction.
682 */ 685 */
683 void (*cancel_txn) (struct pmu *pmu); /* optional */ 686 void (*cancel_txn) (struct pmu *pmu); /* optional */
687
688 /*
689 * Will return the value for perf_event_mmap_page::index for this event,
690 * if no implementation is provided it will default to: event->hw.idx + 1.
691 */
692 int (*event_idx) (struct perf_event *event); /*optional */
684}; 693};
685 694
686/** 695/**
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
new file mode 100644
index 00000000000..eac525f41b9
--- /dev/null
+++ b/include/linux/uprobes.h
@@ -0,0 +1,76 @@
1#ifndef _LINUX_UPROBES_H
2#define _LINUX_UPROBES_H
3/*
4 * User-space Probes (UProbes)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright (C) IBM Corporation, 2008-2012
21 * Authors:
22 * Srikar Dronamraju
23 * Jim Keniston
24 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
25 */
26
27#include <linux/errno.h>
28#include <linux/rbtree.h>
29
30struct vm_area_struct;
31#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
32#include <asm/uprobes.h>
33#endif
34
35/* flags that denote/change uprobes behaviour */
36
37/* Have a copy of original instruction */
38#define UPROBE_COPY_INSN 0x1
39
40/* Dont run handlers when first register/ last unregister in progress*/
41#define UPROBE_RUN_HANDLER 0x2
42
43struct uprobe_consumer {
44 int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
45 /*
46 * filter is optional; If a filter exists, handler is run
47 * if and only if filter returns true.
48 */
49 bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
50
51 struct uprobe_consumer *next;
52};
53
54#ifdef CONFIG_UPROBES
55extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
56extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
57extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
58extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
59extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
60extern int uprobe_mmap(struct vm_area_struct *vma);
61#else /* CONFIG_UPROBES is not defined */
62static inline int
63uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
64{
65 return -ENOSYS;
66}
67static inline void
68uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
69{
70}
71static inline int uprobe_mmap(struct vm_area_struct *vma)
72{
73 return 0;
74}
75#endif /* CONFIG_UPROBES */
76#endif /* _LINUX_UPROBES_H */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 17df43464df..39a8a430d90 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -23,11 +23,23 @@
23 } \ 23 } \
24 } while (0) 24 } while (0)
25 25
26#ifndef TRACE_HEADER_MULTI_READ
27enum {
28 TRACE_SIGNAL_DELIVERED,
29 TRACE_SIGNAL_IGNORED,
30 TRACE_SIGNAL_ALREADY_PENDING,
31 TRACE_SIGNAL_OVERFLOW_FAIL,
32 TRACE_SIGNAL_LOSE_INFO,
33};
34#endif
35
26/** 36/**
27 * signal_generate - called when a signal is generated 37 * signal_generate - called when a signal is generated
28 * @sig: signal number 38 * @sig: signal number
29 * @info: pointer to struct siginfo 39 * @info: pointer to struct siginfo
30 * @task: pointer to struct task_struct 40 * @task: pointer to struct task_struct
41 * @group: shared or private
42 * @result: TRACE_SIGNAL_*
31 * 43 *
32 * Current process sends a 'sig' signal to 'task' process with 44 * Current process sends a 'sig' signal to 'task' process with
33 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, 45 * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
@@ -37,9 +49,10 @@
37 */ 49 */
38TRACE_EVENT(signal_generate, 50TRACE_EVENT(signal_generate,
39 51
40 TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), 52 TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
53 int group, int result),
41 54
42 TP_ARGS(sig, info, task), 55 TP_ARGS(sig, info, task, group, result),
43 56
44 TP_STRUCT__entry( 57 TP_STRUCT__entry(
45 __field( int, sig ) 58 __field( int, sig )
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate,
47 __field( int, code ) 60 __field( int, code )
48 __array( char, comm, TASK_COMM_LEN ) 61 __array( char, comm, TASK_COMM_LEN )
49 __field( pid_t, pid ) 62 __field( pid_t, pid )
63 __field( int, group )
64 __field( int, result )
50 ), 65 ),
51 66
52 TP_fast_assign( 67 TP_fast_assign(
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate,
54 TP_STORE_SIGINFO(__entry, info); 69 TP_STORE_SIGINFO(__entry, info);
55 memcpy(__entry->comm, task->comm, TASK_COMM_LEN); 70 memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
56 __entry->pid = task->pid; 71 __entry->pid = task->pid;
72 __entry->group = group;
73 __entry->result = result;
57 ), 74 ),
58 75
59 TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", 76 TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
60 __entry->sig, __entry->errno, __entry->code, 77 __entry->sig, __entry->errno, __entry->code,
61 __entry->comm, __entry->pid) 78 __entry->comm, __entry->pid, __entry->group,
79 __entry->result)
62); 80);
63 81
64/** 82/**
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver,
101 __entry->sa_handler, __entry->sa_flags) 119 __entry->sa_handler, __entry->sa_flags)
102); 120);
103 121
104DECLARE_EVENT_CLASS(signal_queue_overflow,
105
106 TP_PROTO(int sig, int group, struct siginfo *info),
107
108 TP_ARGS(sig, group, info),
109
110 TP_STRUCT__entry(
111 __field( int, sig )
112 __field( int, group )
113 __field( int, errno )
114 __field( int, code )
115 ),
116
117 TP_fast_assign(
118 __entry->sig = sig;
119 __entry->group = group;
120 TP_STORE_SIGINFO(__entry, info);
121 ),
122
123 TP_printk("sig=%d group=%d errno=%d code=%d",
124 __entry->sig, __entry->group, __entry->errno, __entry->code)
125);
126
127/**
128 * signal_overflow_fail - called when signal queue is overflow
129 * @sig: signal number
130 * @group: signal to process group or not (bool)
131 * @info: pointer to struct siginfo
132 *
133 * Kernel fails to generate 'sig' signal with 'info' siginfo, because
134 * siginfo queue is overflow, and the signal is dropped.
135 * 'group' is not 0 if the signal will be sent to a process group.
136 * 'sig' is always one of RT signals.
137 */
138DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
139
140 TP_PROTO(int sig, int group, struct siginfo *info),
141
142 TP_ARGS(sig, group, info)
143);
144
145/**
146 * signal_lose_info - called when siginfo is lost
147 * @sig: signal number
148 * @group: signal to process group or not (bool)
149 * @info: pointer to struct siginfo
150 *
151 * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
152 * queue is overflow.
153 * 'group' is not 0 if the signal will be sent to a process group.
154 * 'sig' is always one of non-RT signals.
155 */
156DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
157
158 TP_PROTO(int sig, int group, struct siginfo *info),
159
160 TP_ARGS(sig, group, info)
161);
162
163#endif /* _TRACE_SIGNAL_H */ 122#endif /* _TRACE_SIGNAL_H */
164 123
165/* This part must be outside protection */ 124/* This part must be outside protection */
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 22d901f9caf..103f5d147b2 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg
3endif 3endif
4 4
5obj-y := core.o ring_buffer.o callchain.o 5obj-y := core.o ring_buffer.o callchain.o
6
6obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 7obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
8obj-$(CONFIG_UPROBES) += uprobes.o
9
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1b5c081d8b9..94afe5b91c6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3238,10 +3238,6 @@ int perf_event_task_disable(void)
3238 return 0; 3238 return 0;
3239} 3239}
3240 3240
3241#ifndef PERF_EVENT_INDEX_OFFSET
3242# define PERF_EVENT_INDEX_OFFSET 0
3243#endif
3244
3245static int perf_event_index(struct perf_event *event) 3241static int perf_event_index(struct perf_event *event)
3246{ 3242{
3247 if (event->hw.state & PERF_HES_STOPPED) 3243 if (event->hw.state & PERF_HES_STOPPED)
@@ -3250,21 +3246,26 @@ static int perf_event_index(struct perf_event *event)
3250 if (event->state != PERF_EVENT_STATE_ACTIVE) 3246 if (event->state != PERF_EVENT_STATE_ACTIVE)
3251 return 0; 3247 return 0;
3252 3248
3253 return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; 3249 return event->pmu->event_idx(event);
3254} 3250}
3255 3251
3256static void calc_timer_values(struct perf_event *event, 3252static void calc_timer_values(struct perf_event *event,
3253 u64 *now,
3257 u64 *enabled, 3254 u64 *enabled,
3258 u64 *running) 3255 u64 *running)
3259{ 3256{
3260 u64 now, ctx_time; 3257 u64 ctx_time;
3261 3258
3262 now = perf_clock(); 3259 *now = perf_clock();
3263 ctx_time = event->shadow_ctx_time + now; 3260 ctx_time = event->shadow_ctx_time + *now;
3264 *enabled = ctx_time - event->tstamp_enabled; 3261 *enabled = ctx_time - event->tstamp_enabled;
3265 *running = ctx_time - event->tstamp_running; 3262 *running = ctx_time - event->tstamp_running;
3266} 3263}
3267 3264
3265void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
3266{
3267}
3268
3268/* 3269/*
3269 * Callers need to ensure there can be no nesting of this function, otherwise 3270 * Callers need to ensure there can be no nesting of this function, otherwise
3270 * the seqlock logic goes bad. We can not serialize this because the arch 3271 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -3274,7 +3275,7 @@ void perf_event_update_userpage(struct perf_event *event)
3274{ 3275{
3275 struct perf_event_mmap_page *userpg; 3276 struct perf_event_mmap_page *userpg;
3276 struct ring_buffer *rb; 3277 struct ring_buffer *rb;
3277 u64 enabled, running; 3278 u64 enabled, running, now;
3278 3279
3279 rcu_read_lock(); 3280 rcu_read_lock();
3280 /* 3281 /*
@@ -3286,7 +3287,7 @@ void perf_event_update_userpage(struct perf_event *event)
3286 * because of locking issue as we can be called in 3287 * because of locking issue as we can be called in
3287 * NMI context 3288 * NMI context
3288 */ 3289 */
3289 calc_timer_values(event, &enabled, &running); 3290 calc_timer_values(event, &now, &enabled, &running);
3290 rb = rcu_dereference(event->rb); 3291 rb = rcu_dereference(event->rb);
3291 if (!rb) 3292 if (!rb)
3292 goto unlock; 3293 goto unlock;
@@ -3302,7 +3303,7 @@ void perf_event_update_userpage(struct perf_event *event)
3302 barrier(); 3303 barrier();
3303 userpg->index = perf_event_index(event); 3304 userpg->index = perf_event_index(event);
3304 userpg->offset = perf_event_count(event); 3305 userpg->offset = perf_event_count(event);
3305 if (event->state == PERF_EVENT_STATE_ACTIVE) 3306 if (userpg->index)
3306 userpg->offset -= local64_read(&event->hw.prev_count); 3307 userpg->offset -= local64_read(&event->hw.prev_count);
3307 3308
3308 userpg->time_enabled = enabled + 3309 userpg->time_enabled = enabled +
@@ -3311,6 +3312,8 @@ void perf_event_update_userpage(struct perf_event *event)
3311 userpg->time_running = running + 3312 userpg->time_running = running +
3312 atomic64_read(&event->child_total_time_running); 3313 atomic64_read(&event->child_total_time_running);
3313 3314
3315 perf_update_user_clock(userpg, now);
3316
3314 barrier(); 3317 barrier();
3315 ++userpg->lock; 3318 ++userpg->lock;
3316 preempt_enable(); 3319 preempt_enable();
@@ -3568,6 +3571,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3568 event->mmap_user = get_current_user(); 3571 event->mmap_user = get_current_user();
3569 vma->vm_mm->pinned_vm += event->mmap_locked; 3572 vma->vm_mm->pinned_vm += event->mmap_locked;
3570 3573
3574 perf_event_update_userpage(event);
3575
3571unlock: 3576unlock:
3572 if (!ret) 3577 if (!ret)
3573 atomic_inc(&event->mmap_count); 3578 atomic_inc(&event->mmap_count);
@@ -3799,7 +3804,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3799static void perf_output_read(struct perf_output_handle *handle, 3804static void perf_output_read(struct perf_output_handle *handle,
3800 struct perf_event *event) 3805 struct perf_event *event)
3801{ 3806{
3802 u64 enabled = 0, running = 0; 3807 u64 enabled = 0, running = 0, now;
3803 u64 read_format = event->attr.read_format; 3808 u64 read_format = event->attr.read_format;
3804 3809
3805 /* 3810 /*
@@ -3812,7 +3817,7 @@ static void perf_output_read(struct perf_output_handle *handle,
3812 * NMI context 3817 * NMI context
3813 */ 3818 */
3814 if (read_format & PERF_FORMAT_TOTAL_TIMES) 3819 if (read_format & PERF_FORMAT_TOTAL_TIMES)
3815 calc_timer_values(event, &enabled, &running); 3820 calc_timer_values(event, &now, &enabled, &running);
3816 3821
3817 if (event->attr.read_format & PERF_FORMAT_GROUP) 3822 if (event->attr.read_format & PERF_FORMAT_GROUP)
3818 perf_output_read_group(handle, event, enabled, running); 3823 perf_output_read_group(handle, event, enabled, running);
@@ -5031,6 +5036,11 @@ static int perf_swevent_init(struct perf_event *event)
5031 return 0; 5036 return 0;
5032} 5037}
5033 5038
5039static int perf_swevent_event_idx(struct perf_event *event)
5040{
5041 return 0;
5042}
5043
5034static struct pmu perf_swevent = { 5044static struct pmu perf_swevent = {
5035 .task_ctx_nr = perf_sw_context, 5045 .task_ctx_nr = perf_sw_context,
5036 5046
@@ -5040,6 +5050,8 @@ static struct pmu perf_swevent = {
5040 .start = perf_swevent_start, 5050 .start = perf_swevent_start,
5041 .stop = perf_swevent_stop, 5051 .stop = perf_swevent_stop,
5042 .read = perf_swevent_read, 5052 .read = perf_swevent_read,
5053
5054 .event_idx = perf_swevent_event_idx,
5043}; 5055};
5044 5056
5045#ifdef CONFIG_EVENT_TRACING 5057#ifdef CONFIG_EVENT_TRACING
@@ -5126,6 +5138,8 @@ static struct pmu perf_tracepoint = {
5126 .start = perf_swevent_start, 5138 .start = perf_swevent_start,
5127 .stop = perf_swevent_stop, 5139 .stop = perf_swevent_stop,
5128 .read = perf_swevent_read, 5140 .read = perf_swevent_read,
5141
5142 .event_idx = perf_swevent_event_idx,
5129}; 5143};
5130 5144
5131static inline void perf_tp_register(void) 5145static inline void perf_tp_register(void)
@@ -5345,6 +5359,8 @@ static struct pmu perf_cpu_clock = {
5345 .start = cpu_clock_event_start, 5359 .start = cpu_clock_event_start,
5346 .stop = cpu_clock_event_stop, 5360 .stop = cpu_clock_event_stop,
5347 .read = cpu_clock_event_read, 5361 .read = cpu_clock_event_read,
5362
5363 .event_idx = perf_swevent_event_idx,
5348}; 5364};
5349 5365
5350/* 5366/*
@@ -5417,6 +5433,8 @@ static struct pmu perf_task_clock = {
5417 .start = task_clock_event_start, 5433 .start = task_clock_event_start,
5418 .stop = task_clock_event_stop, 5434 .stop = task_clock_event_stop,
5419 .read = task_clock_event_read, 5435 .read = task_clock_event_read,
5436
5437 .event_idx = perf_swevent_event_idx,
5420}; 5438};
5421 5439
5422static void perf_pmu_nop_void(struct pmu *pmu) 5440static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5444,6 +5462,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
5444 perf_pmu_enable(pmu); 5462 perf_pmu_enable(pmu);
5445} 5463}
5446 5464
5465static int perf_event_idx_default(struct perf_event *event)
5466{
5467 return event->hw.idx + 1;
5468}
5469
5447/* 5470/*
5448 * Ensures all contexts with the same task_ctx_nr have the same 5471 * Ensures all contexts with the same task_ctx_nr have the same
5449 * pmu_cpu_context too. 5472 * pmu_cpu_context too.
@@ -5530,6 +5553,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
5530 if (!pmu->dev) 5553 if (!pmu->dev)
5531 goto out; 5554 goto out;
5532 5555
5556 pmu->dev->groups = pmu->attr_groups;
5533 device_initialize(pmu->dev); 5557 device_initialize(pmu->dev);
5534 ret = dev_set_name(pmu->dev, "%s", pmu->name); 5558 ret = dev_set_name(pmu->dev, "%s", pmu->name);
5535 if (ret) 5559 if (ret)
@@ -5633,6 +5657,9 @@ got_cpu_context:
5633 pmu->pmu_disable = perf_pmu_nop_void; 5657 pmu->pmu_disable = perf_pmu_nop_void;
5634 } 5658 }
5635 5659
5660 if (!pmu->event_idx)
5661 pmu->event_idx = perf_event_idx_default;
5662
5636 list_add_rcu(&pmu->entry, &pmus); 5663 list_add_rcu(&pmu->entry, &pmus);
5637 ret = 0; 5664 ret = 0;
5638unlock: 5665unlock:
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index ee706ce44aa..3330022a7ac 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
613 bp->hw.state = PERF_HES_STOPPED; 613 bp->hw.state = PERF_HES_STOPPED;
614} 614}
615 615
616static int hw_breakpoint_event_idx(struct perf_event *bp)
617{
618 return 0;
619}
620
616static struct pmu perf_breakpoint = { 621static struct pmu perf_breakpoint = {
617 .task_ctx_nr = perf_sw_context, /* could eventually get its own */ 622 .task_ctx_nr = perf_sw_context, /* could eventually get its own */
618 623
@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = {
622 .start = hw_breakpoint_start, 627 .start = hw_breakpoint_start,
623 .stop = hw_breakpoint_stop, 628 .stop = hw_breakpoint_stop,
624 .read = hw_breakpoint_pmu_read, 629 .read = hw_breakpoint_pmu_read,
630
631 .event_idx = hw_breakpoint_event_idx,
625}; 632};
626 633
627int __init init_hw_breakpoint(void) 634int __init init_hw_breakpoint(void)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
new file mode 100644
index 00000000000..e56e56aa753
--- /dev/null
+++ b/kernel/events/uprobes.c
@@ -0,0 +1,1029 @@
1/*
2 * User-space Probes (UProbes)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2012
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
23 */
24
25#include <linux/kernel.h>
26#include <linux/highmem.h>
27#include <linux/pagemap.h> /* read_mapping_page */
28#include <linux/slab.h>
29#include <linux/sched.h>
30#include <linux/rmap.h> /* anon_vma_prepare */
31#include <linux/mmu_notifier.h> /* set_pte_at_notify */
32#include <linux/swap.h> /* try_to_free_swap */
33
34#include <linux/uprobes.h>
35
36static struct rb_root uprobes_tree = RB_ROOT;
37
38static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
39
40#define UPROBES_HASH_SZ 13
41
42/* serialize (un)register */
43static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
44
45#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
46
47/* serialize uprobe->pending_list */
48static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
49#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
50
51/*
52 * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
53 * events active at this time. Probably a fine grained per inode count is
54 * better?
55 */
56static atomic_t uprobe_events = ATOMIC_INIT(0);
57
58/*
59 * Maintain a temporary per vma info that can be used to search if a vma
60 * has already been handled. This structure is introduced since extending
61 * vm_area_struct wasnt recommended.
62 */
63struct vma_info {
64 struct list_head probe_list;
65 struct mm_struct *mm;
66 loff_t vaddr;
67};
68
69struct uprobe {
70 struct rb_node rb_node; /* node in the rb tree */
71 atomic_t ref;
72 struct rw_semaphore consumer_rwsem;
73 struct list_head pending_list;
74 struct uprobe_consumer *consumers;
75 struct inode *inode; /* Also hold a ref to inode */
76 loff_t offset;
77 int flags;
78 struct arch_uprobe arch;
79};
80
81/*
82 * valid_vma: Verify if the specified vma is an executable vma
83 * Relax restrictions while unregistering: vm_flags might have
84 * changed after breakpoint was inserted.
85 * - is_register: indicates if we are in register context.
86 * - Return 1 if the specified virtual address is in an
87 * executable vma.
88 */
89static bool valid_vma(struct vm_area_struct *vma, bool is_register)
90{
91 if (!vma->vm_file)
92 return false;
93
94 if (!is_register)
95 return true;
96
97 if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
98 return true;
99
100 return false;
101}
102
103static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
104{
105 loff_t vaddr;
106
107 vaddr = vma->vm_start + offset;
108 vaddr -= vma->vm_pgoff << PAGE_SHIFT;
109
110 return vaddr;
111}
112
113/**
114 * __replace_page - replace page in vma by new page.
115 * based on replace_page in mm/ksm.c
116 *
117 * @vma: vma that holds the pte pointing to page
118 * @page: the cowed page we are replacing by kpage
119 * @kpage: the modified page we replace page by
120 *
121 * Returns 0 on success, -EFAULT on failure.
122 */
123static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
124{
125 struct mm_struct *mm = vma->vm_mm;
126 pgd_t *pgd;
127 pud_t *pud;
128 pmd_t *pmd;
129 pte_t *ptep;
130 spinlock_t *ptl;
131 unsigned long addr;
132 int err = -EFAULT;
133
134 addr = page_address_in_vma(page, vma);
135 if (addr == -EFAULT)
136 goto out;
137
138 pgd = pgd_offset(mm, addr);
139 if (!pgd_present(*pgd))
140 goto out;
141
142 pud = pud_offset(pgd, addr);
143 if (!pud_present(*pud))
144 goto out;
145
146 pmd = pmd_offset(pud, addr);
147 if (!pmd_present(*pmd))
148 goto out;
149
150 ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
151 if (!ptep)
152 goto out;
153
154 get_page(kpage);
155 page_add_new_anon_rmap(kpage, vma, addr);
156
157 flush_cache_page(vma, addr, pte_pfn(*ptep));
158 ptep_clear_flush(vma, addr, ptep);
159 set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
160
161 page_remove_rmap(page);
162 if (!page_mapped(page))
163 try_to_free_swap(page);
164 put_page(page);
165 pte_unmap_unlock(ptep, ptl);
166 err = 0;
167
168out:
169 return err;
170}
171
172/**
173 * is_swbp_insn - check if instruction is breakpoint instruction.
174 * @insn: instruction to be checked.
175 * Default implementation of is_swbp_insn
176 * Returns true if @insn is a breakpoint instruction.
177 */
178bool __weak is_swbp_insn(uprobe_opcode_t *insn)
179{
180 return *insn == UPROBE_SWBP_INSN;
181}
182
183/*
184 * NOTE:
185 * Expect the breakpoint instruction to be the smallest size instruction for
186 * the architecture. If an arch has variable length instruction and the
187 * breakpoint instruction is not of the smallest length instruction
188 * supported by that architecture then we need to modify read_opcode /
189 * write_opcode accordingly. This would never be a problem for archs that
190 * have fixed length instructions.
191 */
192
193/*
194 * write_opcode - write the opcode at a given virtual address.
195 * @auprobe: arch breakpointing information.
196 * @mm: the probed process address space.
197 * @vaddr: the virtual address to store the opcode.
198 * @opcode: opcode to be written at @vaddr.
199 *
200 * Called with mm->mmap_sem held (for read and with a reference to
201 * mm).
202 *
203 * For mm @mm, write the opcode at @vaddr.
204 * Return 0 (success) or a negative errno.
205 */
206static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
207 unsigned long vaddr, uprobe_opcode_t opcode)
208{
209 struct page *old_page, *new_page;
210 struct address_space *mapping;
211 void *vaddr_old, *vaddr_new;
212 struct vm_area_struct *vma;
213 struct uprobe *uprobe;
214 loff_t addr;
215 int ret;
216
217 /* Read the page with vaddr into memory */
218 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
219 if (ret <= 0)
220 return ret;
221
222 ret = -EINVAL;
223
224 /*
225 * We are interested in text pages only. Our pages of interest
226 * should be mapped for read and execute only. We desist from
227 * adding probes in write mapped pages since the breakpoints
228 * might end up in the file copy.
229 */
230 if (!valid_vma(vma, is_swbp_insn(&opcode)))
231 goto put_out;
232
233 uprobe = container_of(auprobe, struct uprobe, arch);
234 mapping = uprobe->inode->i_mapping;
235 if (mapping != vma->vm_file->f_mapping)
236 goto put_out;
237
238 addr = vma_address(vma, uprobe->offset);
239 if (vaddr != (unsigned long)addr)
240 goto put_out;
241
242 ret = -ENOMEM;
243 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
244 if (!new_page)
245 goto put_out;
246
247 __SetPageUptodate(new_page);
248
249 /*
250 * lock page will serialize against do_wp_page()'s
251 * PageAnon() handling
252 */
253 lock_page(old_page);
254 /* copy the page now that we've got it stable */
255 vaddr_old = kmap_atomic(old_page);
256 vaddr_new = kmap_atomic(new_page);
257
258 memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
259
260 /* poke the new insn in, ASSUMES we don't cross page boundary */
261 vaddr &= ~PAGE_MASK;
262 BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
263 memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
264
265 kunmap_atomic(vaddr_new);
266 kunmap_atomic(vaddr_old);
267
268 ret = anon_vma_prepare(vma);
269 if (ret)
270 goto unlock_out;
271
272 lock_page(new_page);
273 ret = __replace_page(vma, old_page, new_page);
274 unlock_page(new_page);
275
276unlock_out:
277 unlock_page(old_page);
278 page_cache_release(new_page);
279
280put_out:
281 put_page(old_page);
282
283 return ret;
284}
285
286/**
287 * read_opcode - read the opcode at a given virtual address.
288 * @mm: the probed process address space.
289 * @vaddr: the virtual address to read the opcode.
290 * @opcode: location to store the read opcode.
291 *
292 * Called with mm->mmap_sem held (for read and with a reference to
293 * mm.
294 *
295 * For mm @mm, read the opcode at @vaddr and store it in @opcode.
296 * Return 0 (success) or a negative errno.
297 */
298static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
299{
300 struct page *page;
301 void *vaddr_new;
302 int ret;
303
304 ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
305 if (ret <= 0)
306 return ret;
307
308 lock_page(page);
309 vaddr_new = kmap_atomic(page);
310 vaddr &= ~PAGE_MASK;
311 memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
312 kunmap_atomic(vaddr_new);
313 unlock_page(page);
314
315 put_page(page);
316
317 return 0;
318}
319
320static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
321{
322 uprobe_opcode_t opcode;
323 int result;
324
325 result = read_opcode(mm, vaddr, &opcode);
326 if (result)
327 return result;
328
329 if (is_swbp_insn(&opcode))
330 return 1;
331
332 return 0;
333}
334
335/**
336 * set_swbp - store breakpoint at a given address.
337 * @auprobe: arch specific probepoint information.
338 * @mm: the probed process address space.
339 * @vaddr: the virtual address to insert the opcode.
340 *
341 * For mm @mm, store the breakpoint instruction at @vaddr.
342 * Return 0 (success) or a negative errno.
343 */
344int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
345{
346 int result;
347
348 result = is_swbp_at_addr(mm, vaddr);
349 if (result == 1)
350 return -EEXIST;
351
352 if (result)
353 return result;
354
355 return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
356}
357
358/**
359 * set_orig_insn - Restore the original instruction.
360 * @mm: the probed process address space.
361 * @auprobe: arch specific probepoint information.
362 * @vaddr: the virtual address to insert the opcode.
363 * @verify: if true, verify existance of breakpoint instruction.
364 *
365 * For mm @mm, restore the original opcode (opcode) at @vaddr.
366 * Return 0 (success) or a negative errno.
367 */
368int __weak
369set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify)
370{
371 if (verify) {
372 int result;
373
374 result = is_swbp_at_addr(mm, vaddr);
375 if (!result)
376 return -EINVAL;
377
378 if (result != 1)
379 return result;
380 }
381 return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
382}
383
384static int match_uprobe(struct uprobe *l, struct uprobe *r)
385{
386 if (l->inode < r->inode)
387 return -1;
388
389 if (l->inode > r->inode)
390 return 1;
391
392 if (l->offset < r->offset)
393 return -1;
394
395 if (l->offset > r->offset)
396 return 1;
397
398 return 0;
399}
400
401static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
402{
403 struct uprobe u = { .inode = inode, .offset = offset };
404 struct rb_node *n = uprobes_tree.rb_node;
405 struct uprobe *uprobe;
406 int match;
407
408 while (n) {
409 uprobe = rb_entry(n, struct uprobe, rb_node);
410 match = match_uprobe(&u, uprobe);
411 if (!match) {
412 atomic_inc(&uprobe->ref);
413 return uprobe;
414 }
415
416 if (match < 0)
417 n = n->rb_left;
418 else
419 n = n->rb_right;
420 }
421 return NULL;
422}
423
424/*
425 * Find a uprobe corresponding to a given inode:offset
426 * Acquires uprobes_treelock
427 */
428static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
429{
430 struct uprobe *uprobe;
431 unsigned long flags;
432
433 spin_lock_irqsave(&uprobes_treelock, flags);
434 uprobe = __find_uprobe(inode, offset);
435 spin_unlock_irqrestore(&uprobes_treelock, flags);
436
437 return uprobe;
438}
439
440static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
441{
442 struct rb_node **p = &uprobes_tree.rb_node;
443 struct rb_node *parent = NULL;
444 struct uprobe *u;
445 int match;
446
447 while (*p) {
448 parent = *p;
449 u = rb_entry(parent, struct uprobe, rb_node);
450 match = match_uprobe(uprobe, u);
451 if (!match) {
452 atomic_inc(&u->ref);
453 return u;
454 }
455
456 if (match < 0)
457 p = &parent->rb_left;
458 else
459 p = &parent->rb_right;
460
461 }
462
463 u = NULL;
464 rb_link_node(&uprobe->rb_node, parent, p);
465 rb_insert_color(&uprobe->rb_node, &uprobes_tree);
466 /* get access + creation ref */
467 atomic_set(&uprobe->ref, 2);
468
469 return u;
470}
471
472/*
473 * Acquire uprobes_treelock.
474 * Matching uprobe already exists in rbtree;
475 * increment (access refcount) and return the matching uprobe.
476 *
477 * No matching uprobe; insert the uprobe in rb_tree;
478 * get a double refcount (access + creation) and return NULL.
479 */
480static struct uprobe *insert_uprobe(struct uprobe *uprobe)
481{
482 unsigned long flags;
483 struct uprobe *u;
484
485 spin_lock_irqsave(&uprobes_treelock, flags);
486 u = __insert_uprobe(uprobe);
487 spin_unlock_irqrestore(&uprobes_treelock, flags);
488
489 return u;
490}
491
492static void put_uprobe(struct uprobe *uprobe)
493{
494 if (atomic_dec_and_test(&uprobe->ref))
495 kfree(uprobe);
496}
497
498static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
499{
500 struct uprobe *uprobe, *cur_uprobe;
501
502 uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
503 if (!uprobe)
504 return NULL;
505
506 uprobe->inode = igrab(inode);
507 uprobe->offset = offset;
508 init_rwsem(&uprobe->consumer_rwsem);
509 INIT_LIST_HEAD(&uprobe->pending_list);
510
511 /* add to uprobes_tree, sorted on inode:offset */
512 cur_uprobe = insert_uprobe(uprobe);
513
514 /* a uprobe exists for this inode:offset combination */
515 if (cur_uprobe) {
516 kfree(uprobe);
517 uprobe = cur_uprobe;
518 iput(inode);
519 } else {
520 atomic_inc(&uprobe_events);
521 }
522
523 return uprobe;
524}
525
526/* Returns the previous consumer */
527static struct uprobe_consumer *
528consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
529{
530 down_write(&uprobe->consumer_rwsem);
531 uc->next = uprobe->consumers;
532 uprobe->consumers = uc;
533 up_write(&uprobe->consumer_rwsem);
534
535 return uc->next;
536}
537
538/*
539 * For uprobe @uprobe, delete the consumer @uc.
540 * Return true if the @uc is deleted successfully
541 * or return false.
542 */
543static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
544{
545 struct uprobe_consumer **con;
546 bool ret = false;
547
548 down_write(&uprobe->consumer_rwsem);
549 for (con = &uprobe->consumers; *con; con = &(*con)->next) {
550 if (*con == uc) {
551 *con = uc->next;
552 ret = true;
553 break;
554 }
555 }
556 up_write(&uprobe->consumer_rwsem);
557
558 return ret;
559}
560
561static int
562__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
563 unsigned long nbytes, unsigned long offset)
564{
565 struct file *filp = vma->vm_file;
566 struct page *page;
567 void *vaddr;
568 unsigned long off1;
569 unsigned long idx;
570
571 if (!filp)
572 return -EINVAL;
573
574 idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
575 off1 = offset &= ~PAGE_MASK;
576
577 /*
578 * Ensure that the page that has the original instruction is
579 * populated and in page-cache.
580 */
581 page = read_mapping_page(mapping, idx, filp);
582 if (IS_ERR(page))
583 return PTR_ERR(page);
584
585 vaddr = kmap_atomic(page);
586 memcpy(insn, vaddr + off1, nbytes);
587 kunmap_atomic(vaddr);
588 page_cache_release(page);
589
590 return 0;
591}
592
593static int
594copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
595{
596 struct address_space *mapping;
597 unsigned long nbytes;
598 int bytes;
599
600 addr &= ~PAGE_MASK;
601 nbytes = PAGE_SIZE - addr;
602 mapping = uprobe->inode->i_mapping;
603
604 /* Instruction at end of binary; copy only available bytes */
605 if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
606 bytes = uprobe->inode->i_size - uprobe->offset;
607 else
608 bytes = MAX_UINSN_BYTES;
609
610 /* Instruction at the page-boundary; copy bytes in second page */
611 if (nbytes < bytes) {
612 if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
613 bytes - nbytes, uprobe->offset + nbytes))
614 return -ENOMEM;
615
616 bytes = nbytes;
617 }
618 return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
619}
620
621static int
622install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
623 struct vm_area_struct *vma, loff_t vaddr)
624{
625 unsigned long addr;
626 int ret;
627
628 /*
629 * If probe is being deleted, unregister thread could be done with
630 * the vma-rmap-walk through. Adding a probe now can be fatal since
631 * nobody will be able to cleanup. Also we could be from fork or
632 * mremap path, where the probe might have already been inserted.
633 * Hence behave as if probe already existed.
634 */
635 if (!uprobe->consumers)
636 return -EEXIST;
637
638 addr = (unsigned long)vaddr;
639
640 if (!(uprobe->flags & UPROBE_COPY_INSN)) {
641 ret = copy_insn(uprobe, vma, addr);
642 if (ret)
643 return ret;
644
645 if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
646 return -EEXIST;
647
648 ret = arch_uprobes_analyze_insn(&uprobe->arch, mm);
649 if (ret)
650 return ret;
651
652 uprobe->flags |= UPROBE_COPY_INSN;
653 }
654 ret = set_swbp(&uprobe->arch, mm, addr);
655
656 return ret;
657}
658
659static void
660remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
661{
662 set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true);
663}
664
665static void delete_uprobe(struct uprobe *uprobe)
666{
667 unsigned long flags;
668
669 spin_lock_irqsave(&uprobes_treelock, flags);
670 rb_erase(&uprobe->rb_node, &uprobes_tree);
671 spin_unlock_irqrestore(&uprobes_treelock, flags);
672 iput(uprobe->inode);
673 put_uprobe(uprobe);
674 atomic_dec(&uprobe_events);
675}
676
677static struct vma_info *
678__find_next_vma_info(struct address_space *mapping, struct list_head *head,
679 struct vma_info *vi, loff_t offset, bool is_register)
680{
681 struct prio_tree_iter iter;
682 struct vm_area_struct *vma;
683 struct vma_info *tmpvi;
684 unsigned long pgoff;
685 int existing_vma;
686 loff_t vaddr;
687
688 pgoff = offset >> PAGE_SHIFT;
689
690 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
691 if (!valid_vma(vma, is_register))
692 continue;
693
694 existing_vma = 0;
695 vaddr = vma_address(vma, offset);
696
697 list_for_each_entry(tmpvi, head, probe_list) {
698 if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
699 existing_vma = 1;
700 break;
701 }
702 }
703
704 /*
705 * Another vma needs a probe to be installed. However skip
706 * installing the probe if the vma is about to be unlinked.
707 */
708 if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
709 vi->mm = vma->vm_mm;
710 vi->vaddr = vaddr;
711 list_add(&vi->probe_list, head);
712
713 return vi;
714 }
715 }
716
717 return NULL;
718}
719
720/*
721 * Iterate in the rmap prio tree and find a vma where a probe has not
722 * yet been inserted.
723 */
724static struct vma_info *
725find_next_vma_info(struct address_space *mapping, struct list_head *head,
726 loff_t offset, bool is_register)
727{
728 struct vma_info *vi, *retvi;
729
730 vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
731 if (!vi)
732 return ERR_PTR(-ENOMEM);
733
734 mutex_lock(&mapping->i_mmap_mutex);
735 retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
736 mutex_unlock(&mapping->i_mmap_mutex);
737
738 if (!retvi)
739 kfree(vi);
740
741 return retvi;
742}
743
744static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
745{
746 struct list_head try_list;
747 struct vm_area_struct *vma;
748 struct address_space *mapping;
749 struct vma_info *vi, *tmpvi;
750 struct mm_struct *mm;
751 loff_t vaddr;
752 int ret;
753
754 mapping = uprobe->inode->i_mapping;
755 INIT_LIST_HEAD(&try_list);
756
757 ret = 0;
758
759 for (;;) {
760 vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
761 if (!vi)
762 break;
763
764 if (IS_ERR(vi)) {
765 ret = PTR_ERR(vi);
766 break;
767 }
768
769 mm = vi->mm;
770 down_read(&mm->mmap_sem);
771 vma = find_vma(mm, (unsigned long)vi->vaddr);
772 if (!vma || !valid_vma(vma, is_register)) {
773 list_del(&vi->probe_list);
774 kfree(vi);
775 up_read(&mm->mmap_sem);
776 mmput(mm);
777 continue;
778 }
779 vaddr = vma_address(vma, uprobe->offset);
780 if (vma->vm_file->f_mapping->host != uprobe->inode ||
781 vaddr != vi->vaddr) {
782 list_del(&vi->probe_list);
783 kfree(vi);
784 up_read(&mm->mmap_sem);
785 mmput(mm);
786 continue;
787 }
788
789 if (is_register)
790 ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
791 else
792 remove_breakpoint(uprobe, mm, vi->vaddr);
793
794 up_read(&mm->mmap_sem);
795 mmput(mm);
796 if (is_register) {
797 if (ret && ret == -EEXIST)
798 ret = 0;
799 if (ret)
800 break;
801 }
802 }
803
804 list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
805 list_del(&vi->probe_list);
806 kfree(vi);
807 }
808
809 return ret;
810}
811
812static int __uprobe_register(struct uprobe *uprobe)
813{
814 return register_for_each_vma(uprobe, true);
815}
816
817static void __uprobe_unregister(struct uprobe *uprobe)
818{
819 if (!register_for_each_vma(uprobe, false))
820 delete_uprobe(uprobe);
821
822 /* TODO : cant unregister? schedule a worker thread */
823}
824
825/*
826 * uprobe_register - register a probe
827 * @inode: the file in which the probe has to be placed.
828 * @offset: offset from the start of the file.
829 * @uc: information on howto handle the probe..
830 *
831 * Apart from the access refcount, uprobe_register() takes a creation
832 * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
833 * inserted into the rbtree (i.e first consumer for a @inode:@offset
834 * tuple). Creation refcount stops uprobe_unregister from freeing the
835 * @uprobe even before the register operation is complete. Creation
836 * refcount is released when the last @uc for the @uprobe
837 * unregisters.
838 *
839 * Return errno if it cannot successully install probes
840 * else return 0 (success)
841 */
842int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
843{
844 struct uprobe *uprobe;
845 int ret;
846
847 if (!inode || !uc || uc->next)
848 return -EINVAL;
849
850 if (offset > i_size_read(inode))
851 return -EINVAL;
852
853 ret = 0;
854 mutex_lock(uprobes_hash(inode));
855 uprobe = alloc_uprobe(inode, offset);
856
857 if (uprobe && !consumer_add(uprobe, uc)) {
858 ret = __uprobe_register(uprobe);
859 if (ret) {
860 uprobe->consumers = NULL;
861 __uprobe_unregister(uprobe);
862 } else {
863 uprobe->flags |= UPROBE_RUN_HANDLER;
864 }
865 }
866
867 mutex_unlock(uprobes_hash(inode));
868 put_uprobe(uprobe);
869
870 return ret;
871}
872
873/*
874 * uprobe_unregister - unregister a already registered probe.
875 * @inode: the file in which the probe has to be removed.
876 * @offset: offset from the start of the file.
877 * @uc: identify which probe if multiple probes are colocated.
878 */
879void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
880{
881 struct uprobe *uprobe;
882
883 if (!inode || !uc)
884 return;
885
886 uprobe = find_uprobe(inode, offset);
887 if (!uprobe)
888 return;
889
890 mutex_lock(uprobes_hash(inode));
891
892 if (consumer_del(uprobe, uc)) {
893 if (!uprobe->consumers) {
894 __uprobe_unregister(uprobe);
895 uprobe->flags &= ~UPROBE_RUN_HANDLER;
896 }
897 }
898
899 mutex_unlock(uprobes_hash(inode));
900 if (uprobe)
901 put_uprobe(uprobe);
902}
903
904/*
905 * Of all the nodes that correspond to the given inode, return the node
906 * with the least offset.
907 */
908static struct rb_node *find_least_offset_node(struct inode *inode)
909{
910 struct uprobe u = { .inode = inode, .offset = 0};
911 struct rb_node *n = uprobes_tree.rb_node;
912 struct rb_node *close_node = NULL;
913 struct uprobe *uprobe;
914 int match;
915
916 while (n) {
917 uprobe = rb_entry(n, struct uprobe, rb_node);
918 match = match_uprobe(&u, uprobe);
919
920 if (uprobe->inode == inode)
921 close_node = n;
922
923 if (!match)
924 return close_node;
925
926 if (match < 0)
927 n = n->rb_left;
928 else
929 n = n->rb_right;
930 }
931
932 return close_node;
933}
934
935/*
936 * For a given inode, build a list of probes that need to be inserted.
937 */
938static void build_probe_list(struct inode *inode, struct list_head *head)
939{
940 struct uprobe *uprobe;
941 unsigned long flags;
942 struct rb_node *n;
943
944 spin_lock_irqsave(&uprobes_treelock, flags);
945
946 n = find_least_offset_node(inode);
947
948 for (; n; n = rb_next(n)) {
949 uprobe = rb_entry(n, struct uprobe, rb_node);
950 if (uprobe->inode != inode)
951 break;
952
953 list_add(&uprobe->pending_list, head);
954 atomic_inc(&uprobe->ref);
955 }
956
957 spin_unlock_irqrestore(&uprobes_treelock, flags);
958}
959
960/*
961 * Called from mmap_region.
962 * called with mm->mmap_sem acquired.
963 *
964 * Return -ve no if we fail to insert probes and we cannot
965 * bail-out.
966 * Return 0 otherwise. i.e:
967 *
968 * - successful insertion of probes
969 * - (or) no possible probes to be inserted.
970 * - (or) insertion of probes failed but we can bail-out.
971 */
972int uprobe_mmap(struct vm_area_struct *vma)
973{
974 struct list_head tmp_list;
975 struct uprobe *uprobe, *u;
976 struct inode *inode;
977 int ret;
978
979 if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
980 return 0;
981
982 inode = vma->vm_file->f_mapping->host;
983 if (!inode)
984 return 0;
985
986 INIT_LIST_HEAD(&tmp_list);
987 mutex_lock(uprobes_mmap_hash(inode));
988 build_probe_list(inode, &tmp_list);
989
990 ret = 0;
991
992 list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
993 loff_t vaddr;
994
995 list_del(&uprobe->pending_list);
996 if (!ret) {
997 vaddr = vma_address(vma, uprobe->offset);
998 if (vaddr >= vma->vm_start && vaddr < vma->vm_end) {
999 ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
1000 /* Ignore double add: */
1001 if (ret == -EEXIST)
1002 ret = 0;
1003 }
1004 }
1005 put_uprobe(uprobe);
1006 }
1007
1008 mutex_unlock(uprobes_mmap_hash(inode));
1009
1010 return ret;
1011}
1012
1013static int __init init_uprobes(void)
1014{
1015 int i;
1016
1017 for (i = 0; i < UPROBES_HASH_SZ; i++) {
1018 mutex_init(&uprobes_mutex[i]);
1019 mutex_init(&uprobes_mmap_mutex[i]);
1020 }
1021 return 0;
1022}
1023
1024static void __exit exit_uprobes(void)
1025{
1026}
1027
1028module_init(init_uprobes);
1029module_exit(exit_uprobes);
diff --git a/kernel/signal.c b/kernel/signal.c
index c73c4284160..8511e39813c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1054 struct sigpending *pending; 1054 struct sigpending *pending;
1055 struct sigqueue *q; 1055 struct sigqueue *q;
1056 int override_rlimit; 1056 int override_rlimit;
1057 1057 int ret = 0, result;
1058 trace_signal_generate(sig, info, t);
1059 1058
1060 assert_spin_locked(&t->sighand->siglock); 1059 assert_spin_locked(&t->sighand->siglock);
1061 1060
1061 result = TRACE_SIGNAL_IGNORED;
1062 if (!prepare_signal(sig, t, from_ancestor_ns)) 1062 if (!prepare_signal(sig, t, from_ancestor_ns))
1063 return 0; 1063 goto ret;
1064 1064
1065 pending = group ? &t->signal->shared_pending : &t->pending; 1065 pending = group ? &t->signal->shared_pending : &t->pending;
1066 /* 1066 /*
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1068 * exactly one non-rt signal, so that we can get more 1068 * exactly one non-rt signal, so that we can get more
1069 * detailed information about the cause of the signal. 1069 * detailed information about the cause of the signal.
1070 */ 1070 */
1071 result = TRACE_SIGNAL_ALREADY_PENDING;
1071 if (legacy_queue(pending, sig)) 1072 if (legacy_queue(pending, sig))
1072 return 0; 1073 goto ret;
1074
1075 result = TRACE_SIGNAL_DELIVERED;
1073 /* 1076 /*
1074 * fast-pathed signals for kernel-internal things like SIGSTOP 1077 * fast-pathed signals for kernel-internal things like SIGSTOP
1075 * or SIGKILL. 1078 * or SIGKILL.
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
1127 * signal was rt and sent by user using something 1130 * signal was rt and sent by user using something
1128 * other than kill(). 1131 * other than kill().
1129 */ 1132 */
1130 trace_signal_overflow_fail(sig, group, info); 1133 result = TRACE_SIGNAL_OVERFLOW_FAIL;
1131 return -EAGAIN; 1134 ret = -EAGAIN;
1135 goto ret;
1132 } else { 1136 } else {
1133 /* 1137 /*
1134 * This is a silent loss of information. We still 1138 * This is a silent loss of information. We still
1135 * send the signal, but the *info bits are lost. 1139 * send the signal, but the *info bits are lost.
1136 */ 1140 */
1137 trace_signal_lose_info(sig, group, info); 1141 result = TRACE_SIGNAL_LOSE_INFO;
1138 } 1142 }
1139 } 1143 }
1140 1144
@@ -1142,7 +1146,9 @@ out_set:
1142 signalfd_notify(t, sig); 1146 signalfd_notify(t, sig);
1143 sigaddset(&pending->signal, sig); 1147 sigaddset(&pending->signal, sig);
1144 complete_signal(sig, t, group); 1148 complete_signal(sig, t, group);
1145 return 0; 1149ret:
1150 trace_signal_generate(sig, info, t, group, result);
1151 return ret;
1146} 1152}
1147 1153
1148static int send_signal(int sig, struct siginfo *info, struct task_struct *t, 1154static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1585 int sig = q->info.si_signo; 1591 int sig = q->info.si_signo;
1586 struct sigpending *pending; 1592 struct sigpending *pending;
1587 unsigned long flags; 1593 unsigned long flags;
1588 int ret; 1594 int ret, result;
1589 1595
1590 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); 1596 BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
1591 1597
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1594 goto ret; 1600 goto ret;
1595 1601
1596 ret = 1; /* the signal is ignored */ 1602 ret = 1; /* the signal is ignored */
1603 result = TRACE_SIGNAL_IGNORED;
1597 if (!prepare_signal(sig, t, 0)) 1604 if (!prepare_signal(sig, t, 0))
1598 goto out; 1605 goto out;
1599 1606
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1605 */ 1612 */
1606 BUG_ON(q->info.si_code != SI_TIMER); 1613 BUG_ON(q->info.si_code != SI_TIMER);
1607 q->info.si_overrun++; 1614 q->info.si_overrun++;
1615 result = TRACE_SIGNAL_ALREADY_PENDING;
1608 goto out; 1616 goto out;
1609 } 1617 }
1610 q->info.si_overrun = 0; 1618 q->info.si_overrun = 0;
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
1614 list_add_tail(&q->list, &pending->list); 1622 list_add_tail(&q->list, &pending->list);
1615 sigaddset(&pending->signal, sig); 1623 sigaddset(&pending->signal, sig);
1616 complete_signal(sig, t, group); 1624 complete_signal(sig, t, group);
1625 result = TRACE_SIGNAL_DELIVERED;
1617out: 1626out:
1627 trace_signal_generate(sig, &q->info, t, group, result);
1618 unlock_task_sighand(t, &flags); 1628 unlock_task_sighand(t, &flags);
1619ret: 1629ret:
1620 return ret; 1630 return ret;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d117262deba..14bc092fb12 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -3,12 +3,9 @@
3 * 3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. 4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 * 5 *
6 * this code detects hard lockups: incidents in where on a CPU 6 * Note: Most of this code is borrowed heavily from the original softlockup
7 * the kernel does not respond to anything except NMI. 7 * detector, so thanks to Ingo for the initial implementation.
8 * 8 * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well. 9 * to those contributors as well.
13 */ 10 */
14 11
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void)
117{ 114{
118 /* 115 /*
119 * convert watchdog_thresh from seconds to ns 116 * convert watchdog_thresh from seconds to ns
120 * the divide by 5 is to give hrtimer 5 chances to 117 * the divide by 5 is to give hrtimer several chances (two
121 * increment before the hardlockup detector generates 118 * or three with the current relation between the soft
122 * a warning 119 * and hard thresholds) to increment before the
120 * hardlockup detector generates a warning
123 */ 121 */
124 return get_softlockup_thresh() * (NSEC_PER_SEC / 5); 122 return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
125} 123}
@@ -336,9 +334,11 @@ static int watchdog(void *unused)
336 334
337 set_current_state(TASK_INTERRUPTIBLE); 335 set_current_state(TASK_INTERRUPTIBLE);
338 /* 336 /*
339 * Run briefly once per second to reset the softlockup timestamp. 337 * Run briefly (kicked by the hrtimer callback function) once every
340 * If this gets delayed for more than 60 seconds then the 338 * get_sample_period() seconds (4 seconds by default) to reset the
341 * debug-printout triggers in watchdog_timer_fn(). 339 * softlockup timestamp. If this gets delayed for more than
340 * 2*watchdog_thresh seconds then the debug-printout triggers in
341 * watchdog_timer_fn().
342 */ 342 */
343 while (!kthread_should_stop()) { 343 while (!kthread_should_stop()) {
344 __touch_watchdog(); 344 __touch_watchdog();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7d1f7..9739c0b45e9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR
166 hard and soft lockups. 166 hard and soft lockups.
167 167
168 Softlockups are bugs that cause the kernel to loop in kernel 168 Softlockups are bugs that cause the kernel to loop in kernel
169 mode for more than 60 seconds, without giving other tasks a 169 mode for more than 20 seconds, without giving other tasks a
170 chance to run. The current stack trace is displayed upon 170 chance to run. The current stack trace is displayed upon
171 detection and the system will stay locked up. 171 detection and the system will stay locked up.
172 172
173 Hardlockups are bugs that cause the CPU to loop in kernel mode 173 Hardlockups are bugs that cause the CPU to loop in kernel mode
174 for more than 60 seconds, without letting other interrupts have a 174 for more than 10 seconds, without letting other interrupts have a
175 chance to run. The current stack trace is displayed upon detection 175 chance to run. The current stack trace is displayed upon detection
176 and the system will stay locked up. 176 and the system will stay locked up.
177 177
178 The overhead should be minimal. A periodic hrtimer runs to 178 The overhead should be minimal. A periodic hrtimer runs to
179 generate interrupts and kick the watchdog task every 10-12 seconds. 179 generate interrupts and kick the watchdog task every 4 seconds.
180 An NMI is generated every 60 seconds or so to check for hardlockups. 180 An NMI is generated every 10 seconds or so to check for hardlockups.
181
182 The frequency of hrtimer and NMI events and the soft and hard lockup
183 thresholds can be controlled through the sysctl watchdog_thresh.
181 184
182config HARDLOCKUP_DETECTOR 185config HARDLOCKUP_DETECTOR
183 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ 186 def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
189 help 192 help
190 Say Y here to enable the kernel to panic on "hard lockups", 193 Say Y here to enable the kernel to panic on "hard lockups",
191 which are bugs that cause the kernel to loop in kernel 194 which are bugs that cause the kernel to loop in kernel
192 mode with interrupts disabled for more than 60 seconds. 195 mode with interrupts disabled for more than 10 seconds (configurable
196 using the watchdog_thresh sysctl).
193 197
194 Say N if unsure. 198 Say N if unsure.
195 199
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
206 help 210 help
207 Say Y here to enable the kernel to panic on "soft lockups", 211 Say Y here to enable the kernel to panic on "soft lockups",
208 which are bugs that cause the kernel to loop in kernel 212 which are bugs that cause the kernel to loop in kernel
209 mode for more than 60 seconds, without giving other tasks a 213 mode for more than 20 seconds (configurable using the watchdog_thresh
210 chance to run. 214 sysctl), without giving other tasks a chance to run.
211 215
212 The panic can be used in combination with panic_timeout, 216 The panic can be used in combination with panic_timeout,
213 to cause the system to reboot automatically after a 217 to cause the system to reboot automatically after a
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f758c7f4c8..5a863d328a4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -30,6 +30,7 @@
30#include <linux/perf_event.h> 30#include <linux/perf_event.h>
31#include <linux/audit.h> 31#include <linux/audit.h>
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h>
33 34
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include <asm/cacheflush.h> 36#include <asm/cacheflush.h>
@@ -616,6 +617,13 @@ again: remove_next = 1 + (end > next->vm_end);
616 if (mapping) 617 if (mapping)
617 mutex_unlock(&mapping->i_mmap_mutex); 618 mutex_unlock(&mapping->i_mmap_mutex);
618 619
620 if (root) {
621 uprobe_mmap(vma);
622
623 if (adjust_next)
624 uprobe_mmap(next);
625 }
626
619 if (remove_next) { 627 if (remove_next) {
620 if (file) { 628 if (file) {
621 fput(file); 629 fput(file);
@@ -637,6 +645,8 @@ again: remove_next = 1 + (end > next->vm_end);
637 goto again; 645 goto again;
638 } 646 }
639 } 647 }
648 if (insert && file)
649 uprobe_mmap(insert);
640 650
641 validate_mm(mm); 651 validate_mm(mm);
642 652
@@ -1329,6 +1339,11 @@ out:
1329 mm->locked_vm += (len >> PAGE_SHIFT); 1339 mm->locked_vm += (len >> PAGE_SHIFT);
1330 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1340 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1331 make_pages_present(addr, addr + len); 1341 make_pages_present(addr, addr + len);
1342
1343 if (file && uprobe_mmap(vma))
1344 /* matching probes but cannot insert */
1345 goto unmap_and_free_vma;
1346
1332 return addr; 1347 return addr;
1333 1348
1334unmap_and_free_vma: 1349unmap_and_free_vma:
@@ -2285,6 +2300,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2285 if ((vma->vm_flags & VM_ACCOUNT) && 2300 if ((vma->vm_flags & VM_ACCOUNT) &&
2286 security_vm_enough_memory_mm(mm, vma_pages(vma))) 2301 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2287 return -ENOMEM; 2302 return -ENOMEM;
2303
2304 if (vma->vm_file && uprobe_mmap(vma))
2305 return -EINVAL;
2306
2288 vma_link(mm, vma, prev, rb_link, rb_parent); 2307 vma_link(mm, vma, prev, rb_link, rb_parent);
2289 return 0; 2308 return 0;
2290} 2309}
@@ -2354,6 +2373,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2354 new_vma->vm_pgoff = pgoff; 2373 new_vma->vm_pgoff = pgoff;
2355 if (new_vma->vm_file) { 2374 if (new_vma->vm_file) {
2356 get_file(new_vma->vm_file); 2375 get_file(new_vma->vm_file);
2376
2377 if (uprobe_mmap(new_vma))
2378 goto out_free_mempol;
2379
2357 if (vma->vm_flags & VM_EXECUTABLE) 2380 if (vma->vm_flags & VM_EXECUTABLE)
2358 added_exe_file_vma(mm); 2381 added_exe_file_vma(mm);
2359 } 2382 }
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 4626a398836..ca600e09c8d 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,3 +1,10 @@
1OUTPUT := ./
2ifeq ("$(origin O)", "command line")
3 ifneq ($(O),)
4 OUTPUT := $(O)/
5 endif
6endif
7
1MAN1_TXT= \ 8MAN1_TXT= \
2 $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ 9 $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
3 $(wildcard perf-*.txt)) \ 10 $(wildcard perf-*.txt)) \
@@ -6,10 +13,11 @@ MAN5_TXT=
6MAN7_TXT= 13MAN7_TXT=
7 14
8MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) 15MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
9MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) 16_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
10MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) 17_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
11 18
12DOC_HTML=$(MAN_HTML) 19MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
20MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
13 21
14ARTICLES = 22ARTICLES =
15# with their own formatting rules. 23# with their own formatting rules.
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica
18SP_ARTICLES += $(API_DOCS) 26SP_ARTICLES += $(API_DOCS)
19SP_ARTICLES += technical/api-index 27SP_ARTICLES += technical/api-index
20 28
21DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) 29_DOC_HTML = $(_MAN_HTML)
30_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
31DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
22 32
23DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) 33_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
24DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) 34_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
25DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) 35_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
36
37DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
38DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
39DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
26 40
27# Make the path relative to DESTDIR, not prefix 41# Make the path relative to DESTDIR, not prefix
28ifndef DESTDIR 42ifndef DESTDIR
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1)
150man5: $(DOC_MAN5) 164man5: $(DOC_MAN5)
151man7: $(DOC_MAN7) 165man7: $(DOC_MAN7)
152 166
153info: perf.info perfman.info 167info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
154 168
155pdf: user-manual.pdf 169pdf: $(OUTPUT)user-manual.pdf
156 170
157install: install-man 171install: install-man
158 172
@@ -166,7 +180,7 @@ install-man: man
166 180
167install-info: info 181install-info: info
168 $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) 182 $(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
169 $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) 183 $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
170 if test -r $(DESTDIR)$(infodir)/dir; then \ 184 if test -r $(DESTDIR)$(infodir)/dir; then \
171 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ 185 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
172 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ 186 $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
@@ -176,7 +190,7 @@ install-info: info
176 190
177install-pdf: pdf 191install-pdf: pdf
178 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) 192 $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
179 $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) 193 $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
180 194
181#install-html: html 195#install-html: html
182# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) 196# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
@@ -189,14 +203,14 @@ install-pdf: pdf
189# 203#
190# Determine "include::" file references in asciidoc files. 204# Determine "include::" file references in asciidoc files.
191# 205#
192doc.dep : $(wildcard *.txt) build-docdep.perl 206$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
193 $(QUIET_GEN)$(RM) $@+ $@ && \ 207 $(QUIET_GEN)$(RM) $@+ $@ && \
194 $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ 208 $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
195 mv $@+ $@ 209 mv $@+ $@
196 210
197-include doc.dep 211-include $(OUPTUT)doc.dep
198 212
199cmds_txt = cmds-ancillaryinterrogators.txt \ 213_cmds_txt = cmds-ancillaryinterrogators.txt \
200 cmds-ancillarymanipulators.txt \ 214 cmds-ancillarymanipulators.txt \
201 cmds-mainporcelain.txt \ 215 cmds-mainporcelain.txt \
202 cmds-plumbinginterrogators.txt \ 216 cmds-plumbinginterrogators.txt \
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \
205 cmds-synchelpers.txt \ 219 cmds-synchelpers.txt \
206 cmds-purehelpers.txt \ 220 cmds-purehelpers.txt \
207 cmds-foreignscminterface.txt 221 cmds-foreignscminterface.txt
222cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
208 223
209$(cmds_txt): cmd-list.made 224$(cmds_txt): $(OUTPUT)cmd-list.made
210 225
211cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) 226$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
212 $(QUIET_GEN)$(RM) $@ && \ 227 $(QUIET_GEN)$(RM) $@ && \
213 $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ 228 $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
214 date >$@ 229 date >$@
215 230
216clean: 231clean:
217 $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 232 $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
218 $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info 233 $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
219 $(RM) howto-index.txt howto/*.html doc.dep 234 $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
220 $(RM) technical/api-*.html technical/api-index.txt 235 $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
221 $(RM) $(cmds_txt) *.made 236 $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
222 237 $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
223$(MAN_HTML): %.html : %.txt 238 $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
239 $(RM) $(cmds_txt) $(OUTPUT)*.made
240
241$(MAN_HTML): $(OUTPUT)%.html : %.txt
224 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 242 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
225 $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ 243 $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
226 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 244 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
227 mv $@+ $@ 245 mv $@+ $@
228 246
229%.1 %.5 %.7 : %.xml 247$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
230 $(QUIET_XMLTO)$(RM) $@ && \ 248 $(QUIET_XMLTO)$(RM) $@ && \
231 xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< 249 xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
232 250
233%.xml : %.txt 251$(OUTPUT)%.xml : %.txt
234 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ 252 $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
235 $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ 253 $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
236 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ 254 $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt
239XSLT = docbook.xsl 257XSLT = docbook.xsl
240XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css 258XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
241 259
242user-manual.html: user-manual.xml 260$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
243 $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< 261 $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
244 262
245perf.info: user-manual.texi 263$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
246 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi 264 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
247 265
248user-manual.texi: user-manual.xml 266$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
249 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ 267 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
250 $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ 268 $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
251 $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ 269 $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
252 rm $@++ && \ 270 rm $@++ && \
253 mv $@+ $@ 271 mv $@+ $@
254 272
255user-manual.pdf: user-manual.xml 273$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
256 $(QUIET_DBLATEX)$(RM) $@+ $@ && \ 274 $(QUIET_DBLATEX)$(RM) $@+ $@ && \
257 $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ 275 $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
258 mv $@+ $@ 276 mv $@+ $@
259 277
260perfman.texi: $(MAN_XML) cat-texi.perl 278$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
261 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ 279 $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
262 ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ 280 ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
263 --to-stdout $(xml) &&) true) > $@++ && \ 281 --to-stdout $(xml) &&) true) > $@++ && \
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl
265 rm $@++ && \ 283 rm $@++ && \
266 mv $@+ $@ 284 mv $@+ $@
267 285
268perfman.info: perfman.texi 286$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
269 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi 287 $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
270 288
271$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml 289$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index d6b2a4f2108..c7f5f55634a 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf lock' {record|report|trace} 11'perf lock' {record|report|script|info}
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command.
20 produces the file "perf.data" which contains tracing 20 produces the file "perf.data" which contains tracing
21 results of lock events. 21 results of lock events.
22 22
23 'perf lock trace' shows raw lock events.
24
25 'perf lock report' reports statistical data. 23 'perf lock report' reports statistical data.
26 24
25 'perf lock script' shows raw lock events.
26
27 'perf lock info' shows metadata like threads or addresses
28 of lock instances.
29
27COMMON OPTIONS 30COMMON OPTIONS
28-------------- 31--------------
29 32
@@ -47,6 +50,17 @@ REPORT OPTIONS
47 Sorting key. Possible values: acquired (default), contended, 50 Sorting key. Possible values: acquired (default), contended,
48 wait_total, wait_max, wait_min. 51 wait_total, wait_max, wait_min.
49 52
53INFO OPTIONS
54------------
55
56-t::
57--threads::
58 dump thread list in perf.data
59
60-m::
61--map::
62 dump map of lock instances (address:name table)
63
50SEE ALSO 64SEE ALSO
51-------- 65--------
52linkperf:perf[1] 66linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2937f7e14bb..a5766b4b012 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,11 +52,15 @@ OPTIONS
52 52
53-p:: 53-p::
54--pid=:: 54--pid=::
55 Record events on existing process ID. 55 Record events on existing process ID (comma separated list).
56 56
57-t:: 57-t::
58--tid=:: 58--tid=::
59 Record events on existing thread ID. 59 Record events on existing thread ID (comma separated list).
60
61-u::
62--uid=::
63 Record events in threads owned by uid. Name or number.
60 64
61-r:: 65-r::
62--realtime=:: 66--realtime=::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2f6cef43da2..e9cbfcddfa3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
115-f:: 115-f::
116--fields:: 116--fields::
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
119 Field list can be prepended with the type, trace, sw or hw, 119 Field list can be prepended with the type, trace, sw or hw,
120 to indicate to which event type the field list applies. 120 to indicate to which event type the field list applies.
121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace 121 e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -200,6 +200,9 @@ OPTIONS
200 It currently includes: cpu and numa topology of the host system. 200 It currently includes: cpu and numa topology of the host system.
201 It can only be used with the perf script report mode. 201 It can only be used with the perf script report mode.
202 202
203--show-kernel-path::
204 Try to resolve the path of [kernel.kallsyms]
205
203SEE ALSO 206SEE ALSO
204-------- 207--------
205linkperf:perf-record[1], linkperf:perf-script-perl[1], 208linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8966b9ab201..2fa173b5197 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -35,11 +35,11 @@ OPTIONS
35 child tasks do not inherit counters 35 child tasks do not inherit counters
36-p:: 36-p::
37--pid=<pid>:: 37--pid=<pid>::
38 stat events on existing process id 38 stat events on existing process id (comma separated list)
39 39
40-t:: 40-t::
41--tid=<tid>:: 41--tid=<tid>::
42 stat events on existing thread id 42 stat events on existing thread id (comma separated list)
43 43
44 44
45-a:: 45-a::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b1a5bbbfebe..4a5680cb242 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -72,11 +72,15 @@ Default is to monitor all CPUS.
72 72
73-p <pid>:: 73-p <pid>::
74--pid=<pid>:: 74--pid=<pid>::
75 Profile events on existing Process ID. 75 Profile events on existing Process ID (comma separated list).
76 76
77-t <tid>:: 77-t <tid>::
78--tid=<tid>:: 78--tid=<tid>::
79 Profile events on existing thread ID. 79 Profile events on existing thread ID (comma separated list).
80
81-u::
82--uid=::
83 Record events in threads owned by uid. Name or number.
80 84
81-r <priority>:: 85-r <priority>::
82--realtime=<priority>:: 86--realtime=<priority>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1078c5fadd5..5476bc0a1ea 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@ lib/rbtree.c
9include/linux/swab.h 9include/linux/swab.h
10arch/*/include/asm/unistd*.h 10arch/*/include/asm/unistd*.h
11arch/*/lib/memcpy*.S 11arch/*/lib/memcpy*.S
12arch/*/lib/memset*.S
12include/linux/poison.h 13include/linux/poison.h
13include/linux/magic.h 14include/linux/magic.h
14include/linux/hw_breakpoint.h 15include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7c12650165a..e011b5060f9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64)
61 ifeq (${IS_X86_64}, 1) 61 ifeq (${IS_X86_64}, 1)
62 RAW_ARCH := x86_64 62 RAW_ARCH := x86_64
63 ARCH_CFLAGS := -DARCH_X86_64 63 ARCH_CFLAGS := -DARCH_X86_64
64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S 64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
65 endif 65 endif
66endif 66endif
67 67
@@ -183,7 +183,10 @@ SCRIPT_SH += perf-archive.sh
183grep-libs = $(filter -l%,$(1)) 183grep-libs = $(filter -l%,$(1))
184strip-libs = $(filter-out -l%,$(1)) 184strip-libs = $(filter-out -l%,$(1))
185 185
186$(OUTPUT)python/perf.so: $(PYRF_OBJS) 186PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
187PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
188
189$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
187 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ 190 $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
188 --quiet build_ext; \ 191 --quiet build_ext; \
189 mkdir -p $(OUTPUT)python && \ 192 mkdir -p $(OUTPUT)python && \
@@ -256,6 +259,7 @@ LIB_H += util/callchain.h
256LIB_H += util/build-id.h 259LIB_H += util/build-id.h
257LIB_H += util/debug.h 260LIB_H += util/debug.h
258LIB_H += util/debugfs.h 261LIB_H += util/debugfs.h
262LIB_H += util/sysfs.h
259LIB_H += util/event.h 263LIB_H += util/event.h
260LIB_H += util/evsel.h 264LIB_H += util/evsel.h
261LIB_H += util/evlist.h 265LIB_H += util/evlist.h
@@ -302,6 +306,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o
302LIB_OBJS += $(OUTPUT)util/config.o 306LIB_OBJS += $(OUTPUT)util/config.o
303LIB_OBJS += $(OUTPUT)util/ctype.o 307LIB_OBJS += $(OUTPUT)util/ctype.o
304LIB_OBJS += $(OUTPUT)util/debugfs.o 308LIB_OBJS += $(OUTPUT)util/debugfs.o
309LIB_OBJS += $(OUTPUT)util/sysfs.o
305LIB_OBJS += $(OUTPUT)util/environment.o 310LIB_OBJS += $(OUTPUT)util/environment.o
306LIB_OBJS += $(OUTPUT)util/event.o 311LIB_OBJS += $(OUTPUT)util/event.o
307LIB_OBJS += $(OUTPUT)util/evlist.o 312LIB_OBJS += $(OUTPUT)util/evlist.o
@@ -359,8 +364,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
359BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o 364BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
360ifeq ($(RAW_ARCH),x86_64) 365ifeq ($(RAW_ARCH),x86_64)
361BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o 366BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
367BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
362endif 368endif
363BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o 369BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
370BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
364 371
365BUILTIN_OBJS += $(OUTPUT)builtin-diff.o 372BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
366BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o 373BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
@@ -792,7 +799,6 @@ help:
792 @echo ' quick-install-html - install the html documentation quickly' 799 @echo ' quick-install-html - install the html documentation quickly'
793 @echo '' 800 @echo ''
794 @echo 'Perf maintainer targets:' 801 @echo 'Perf maintainer targets:'
795 @echo ' distclean - alias to clean'
796 @echo ' clean - clean all binary objects and build output' 802 @echo ' clean - clean all binary objects and build output'
797 803
798doc: 804doc:
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index f7781c6267c..a09bece6dad 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -4,6 +4,7 @@
4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); 4extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); 5extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); 6extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
7extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
7 8
8#define BENCH_FORMAT_DEFAULT_STR "default" 9#define BENCH_FORMAT_DEFAULT_STR "default"
9#define BENCH_FORMAT_DEFAULT 0 10#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d588b87696f..d66ab799b35 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -2,3 +2,11 @@
2MEMCPY_FN(__memcpy, 2MEMCPY_FN(__memcpy,
3 "x86-64-unrolled", 3 "x86-64-unrolled",
4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S") 4 "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
5
6MEMCPY_FN(memcpy_c,
7 "x86-64-movsq",
8 "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
9
10MEMCPY_FN(memcpy_c_e,
11 "x86-64-movsb",
12 "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 185a96d66dd..fcd9cf00600 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,4 +1,8 @@
1 1#define memcpy MEMCPY /* don't hide glibc's memcpy() */
2#define altinstr_replacement text
3#define globl p2align 4; .globl
4#define Lmemcpy_c globl memcpy_c; memcpy_c
5#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
2#include "../../../arch/x86/lib/memcpy_64.S" 6#include "../../../arch/x86/lib/memcpy_64.S"
3/* 7/*
4 * We need to provide note.GNU-stack section, saying that we want 8 * We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db82021f4b9..71557225bf9 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -5,7 +5,6 @@
5 * 5 *
6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> 6 * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7 */ 7 */
8#include <ctype.h>
9 8
10#include "../perf.h" 9#include "../perf.h"
11#include "../util/util.h" 10#include "../util/util.h"
@@ -24,6 +23,7 @@
24 23
25static const char *length_str = "1MB"; 24static const char *length_str = "1MB";
26static const char *routine = "default"; 25static const char *routine = "default";
26static int iterations = 1;
27static bool use_clock; 27static bool use_clock;
28static int clock_fd; 28static int clock_fd;
29static bool only_prefault; 29static bool only_prefault;
@@ -35,6 +35,8 @@ static const struct option options[] = {
35 "available unit: B, MB, GB (upper and lower)"), 35 "available unit: B, MB, GB (upper and lower)"),
36 OPT_STRING('r', "routine", &routine, "default", 36 OPT_STRING('r', "routine", &routine, "default",
37 "Specify routine to copy"), 37 "Specify routine to copy"),
38 OPT_INTEGER('i', "iterations", &iterations,
39 "repeat memcpy() invocation this number of times"),
38 OPT_BOOLEAN('c', "clock", &use_clock, 40 OPT_BOOLEAN('c', "clock", &use_clock,
39 "Use CPU clock for measuring"), 41 "Use CPU clock for measuring"),
40 OPT_BOOLEAN('o', "only-prefault", &only_prefault, 42 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
121{ 123{
122 u64 clock_start = 0ULL, clock_end = 0ULL; 124 u64 clock_start = 0ULL, clock_end = 0ULL;
123 void *src = NULL, *dst = NULL; 125 void *src = NULL, *dst = NULL;
126 int i;
124 127
125 alloc_mem(&src, &dst, len); 128 alloc_mem(&src, &dst, len);
126 129
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
128 fn(dst, src, len); 131 fn(dst, src, len);
129 132
130 clock_start = get_clock(); 133 clock_start = get_clock();
131 fn(dst, src, len); 134 for (i = 0; i < iterations; ++i)
135 fn(dst, src, len);
132 clock_end = get_clock(); 136 clock_end = get_clock();
133 137
134 free(src); 138 free(src);
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
140{ 144{
141 struct timeval tv_start, tv_end, tv_diff; 145 struct timeval tv_start, tv_end, tv_diff;
142 void *src = NULL, *dst = NULL; 146 void *src = NULL, *dst = NULL;
147 int i;
143 148
144 alloc_mem(&src, &dst, len); 149 alloc_mem(&src, &dst, len);
145 150
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
147 fn(dst, src, len); 152 fn(dst, src, len);
148 153
149 BUG_ON(gettimeofday(&tv_start, NULL)); 154 BUG_ON(gettimeofday(&tv_start, NULL));
150 fn(dst, src, len); 155 for (i = 0; i < iterations; ++i)
156 fn(dst, src, len);
151 BUG_ON(gettimeofday(&tv_end, NULL)); 157 BUG_ON(gettimeofday(&tv_end, NULL));
152 158
153 timersub(&tv_end, &tv_start, &tv_diff); 159 timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 00000000000..a040fa77665
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
1
2#ifdef ARCH_X86_64
3
4#define MEMSET_FN(fn, name, desc) \
5 extern void *fn(void *, int, size_t);
6
7#include "mem-memset-x86-64-asm-def.h"
8
9#undef MEMSET_FN
10
11#endif
12
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 00000000000..a71dff97c1f
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
1
2MEMSET_FN(__memset,
3 "x86-64-unrolled",
4 "unrolled memset() in arch/x86/lib/memset_64.S")
5
6MEMSET_FN(memset_c,
7 "x86-64-stosq",
8 "movsq-based memset() in arch/x86/lib/memset_64.S")
9
10MEMSET_FN(memset_c_e,
11 "x86-64-stosb",
12 "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 00000000000..9e5af89ed13
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,13 @@
1#define memset MEMSET /* don't hide glibc's memset() */
2#define altinstr_replacement text
3#define globl p2align 4; .globl
4#define Lmemset_c globl memset_c; memset_c
5#define Lmemset_c_e globl memset_c_e; memset_c_e
6#include "../../../arch/x86/lib/memset_64.S"
7
8/*
9 * We need to provide note.GNU-stack section, saying that we want
10 * NOT executable stack. Otherwise the final linking will assume that
11 * the ELF stack should not be restricted at all and set it RWX.
12 */
13.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644
index 00000000000..e9079185bd7
--- /dev/null
+++ b/tools/perf/bench/mem-memset.c
@@ -0,0 +1,297 @@
1/*
2 * mem-memset.c
3 *
4 * memset: Simple memory set in various ways
5 *
6 * Trivial clone of mem-memcpy.c.
7 */
8
9#include "../perf.h"
10#include "../util/util.h"
11#include "../util/parse-options.h"
12#include "../util/header.h"
13#include "bench.h"
14#include "mem-memset-arch.h"
15
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/time.h>
20#include <errno.h>
21
22#define K 1024
23
24static const char *length_str = "1MB";
25static const char *routine = "default";
26static int iterations = 1;
27static bool use_clock;
28static int clock_fd;
29static bool only_prefault;
30static bool no_prefault;
31
32static const struct option options[] = {
33 OPT_STRING('l', "length", &length_str, "1MB",
34 "Specify length of memory to copy. "
35 "available unit: B, MB, GB (upper and lower)"),
36 OPT_STRING('r', "routine", &routine, "default",
37 "Specify routine to copy"),
38 OPT_INTEGER('i', "iterations", &iterations,
39 "repeat memset() invocation this number of times"),
40 OPT_BOOLEAN('c', "clock", &use_clock,
41 "Use CPU clock for measuring"),
42 OPT_BOOLEAN('o', "only-prefault", &only_prefault,
43 "Show only the result with page faults before memset()"),
44 OPT_BOOLEAN('n', "no-prefault", &no_prefault,
45 "Show only the result without page faults before memset()"),
46 OPT_END()
47};
48
49typedef void *(*memset_t)(void *, int, size_t);
50
51struct routine {
52 const char *name;
53 const char *desc;
54 memset_t fn;
55};
56
57static const struct routine routines[] = {
58 { "default",
59 "Default memset() provided by glibc",
60 memset },
61#ifdef ARCH_X86_64
62
63#define MEMSET_FN(fn, name, desc) { name, desc, fn },
64#include "mem-memset-x86-64-asm-def.h"
65#undef MEMSET_FN
66
67#endif
68
69 { NULL,
70 NULL,
71 NULL }
72};
73
74static const char * const bench_mem_memset_usage[] = {
75 "perf bench mem memset <options>",
76 NULL
77};
78
79static struct perf_event_attr clock_attr = {
80 .type = PERF_TYPE_HARDWARE,
81 .config = PERF_COUNT_HW_CPU_CYCLES
82};
83
84static void init_clock(void)
85{
86 clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
87
88 if (clock_fd < 0 && errno == ENOSYS)
89 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
90 else
91 BUG_ON(clock_fd < 0);
92}
93
94static u64 get_clock(void)
95{
96 int ret;
97 u64 clk;
98
99 ret = read(clock_fd, &clk, sizeof(u64));
100 BUG_ON(ret != sizeof(u64));
101
102 return clk;
103}
104
105static double timeval2double(struct timeval *ts)
106{
107 return (double)ts->tv_sec +
108 (double)ts->tv_usec / (double)1000000;
109}
110
111static void alloc_mem(void **dst, size_t length)
112{
113 *dst = zalloc(length);
114 if (!dst)
115 die("memory allocation failed - maybe length is too large?\n");
116}
117
118static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
119{
120 u64 clock_start = 0ULL, clock_end = 0ULL;
121 void *dst = NULL;
122 int i;
123
124 alloc_mem(&dst, len);
125
126 if (prefault)
127 fn(dst, -1, len);
128
129 clock_start = get_clock();
130 for (i = 0; i < iterations; ++i)
131 fn(dst, i, len);
132 clock_end = get_clock();
133
134 free(dst);
135 return clock_end - clock_start;
136}
137
138static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
139{
140 struct timeval tv_start, tv_end, tv_diff;
141 void *dst = NULL;
142 int i;
143
144 alloc_mem(&dst, len);
145
146 if (prefault)
147 fn(dst, -1, len);
148
149 BUG_ON(gettimeofday(&tv_start, NULL));
150 for (i = 0; i < iterations; ++i)
151 fn(dst, i, len);
152 BUG_ON(gettimeofday(&tv_end, NULL));
153
154 timersub(&tv_end, &tv_start, &tv_diff);
155
156 free(dst);
157 return (double)((double)len / timeval2double(&tv_diff));
158}
159
160#define pf (no_prefault ? 0 : 1)
161
162#define print_bps(x) do { \
163 if (x < K) \
164 printf(" %14lf B/Sec", x); \
165 else if (x < K * K) \
166 printf(" %14lfd KB/Sec", x / K); \
167 else if (x < K * K * K) \
168 printf(" %14lf MB/Sec", x / K / K); \
169 else \
170 printf(" %14lf GB/Sec", x / K / K / K); \
171 } while (0)
172
173int bench_mem_memset(int argc, const char **argv,
174 const char *prefix __used)
175{
176 int i;
177 size_t len;
178 double result_bps[2];
179 u64 result_clock[2];
180
181 argc = parse_options(argc, argv, options,
182 bench_mem_memset_usage, 0);
183
184 if (use_clock)
185 init_clock();
186
187 len = (size_t)perf_atoll((char *)length_str);
188
189 result_clock[0] = result_clock[1] = 0ULL;
190 result_bps[0] = result_bps[1] = 0.0;
191
192 if ((s64)len <= 0) {
193 fprintf(stderr, "Invalid length:%s\n", length_str);
194 return 1;
195 }
196
197 /* same to without specifying either of prefault and no-prefault */
198 if (only_prefault && no_prefault)
199 only_prefault = no_prefault = false;
200
201 for (i = 0; routines[i].name; i++) {
202 if (!strcmp(routines[i].name, routine))
203 break;
204 }
205 if (!routines[i].name) {
206 printf("Unknown routine:%s\n", routine);
207 printf("Available routines...\n");
208 for (i = 0; routines[i].name; i++) {
209 printf("\t%s ... %s\n",
210 routines[i].name, routines[i].desc);
211 }
212 return 1;
213 }
214
215 if (bench_format == BENCH_FORMAT_DEFAULT)
216 printf("# Copying %s Bytes ...\n\n", length_str);
217
218 if (!only_prefault && !no_prefault) {
219 /* show both of results */
220 if (use_clock) {
221 result_clock[0] =
222 do_memset_clock(routines[i].fn, len, false);
223 result_clock[1] =
224 do_memset_clock(routines[i].fn, len, true);
225 } else {
226 result_bps[0] =
227 do_memset_gettimeofday(routines[i].fn,
228 len, false);
229 result_bps[1] =
230 do_memset_gettimeofday(routines[i].fn,
231 len, true);
232 }
233 } else {
234 if (use_clock) {
235 result_clock[pf] =
236 do_memset_clock(routines[i].fn,
237 len, only_prefault);
238 } else {
239 result_bps[pf] =
240 do_memset_gettimeofday(routines[i].fn,
241 len, only_prefault);
242 }
243 }
244
245 switch (bench_format) {
246 case BENCH_FORMAT_DEFAULT:
247 if (!only_prefault && !no_prefault) {
248 if (use_clock) {
249 printf(" %14lf Clock/Byte\n",
250 (double)result_clock[0]
251 / (double)len);
252 printf(" %14lf Clock/Byte (with prefault)\n ",
253 (double)result_clock[1]
254 / (double)len);
255 } else {
256 print_bps(result_bps[0]);
257 printf("\n");
258 print_bps(result_bps[1]);
259 printf(" (with prefault)\n");
260 }
261 } else {
262 if (use_clock) {
263 printf(" %14lf Clock/Byte",
264 (double)result_clock[pf]
265 / (double)len);
266 } else
267 print_bps(result_bps[pf]);
268
269 printf("%s\n", only_prefault ? " (with prefault)" : "");
270 }
271 break;
272 case BENCH_FORMAT_SIMPLE:
273 if (!only_prefault && !no_prefault) {
274 if (use_clock) {
275 printf("%lf %lf\n",
276 (double)result_clock[0] / (double)len,
277 (double)result_clock[1] / (double)len);
278 } else {
279 printf("%lf %lf\n",
280 result_bps[0], result_bps[1]);
281 }
282 } else {
283 if (use_clock) {
284 printf("%lf\n", (double)result_clock[pf]
285 / (double)len);
286 } else
287 printf("%lf\n", result_bps[pf]);
288 }
289 break;
290 default:
291 /* reaching this means there's some disaster: */
292 die("unknown format: %d\n", bench_format);
293 break;
294 }
295
296 return 0;
297}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index fcb96269852..b0e74ab2d7a 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
52 { "memcpy", 52 { "memcpy",
53 "Simple memory copy in various ways", 53 "Simple memory copy in various ways",
54 bench_mem_memcpy }, 54 bench_mem_memcpy },
55 { "memset",
56 "Simple memory set in various ways",
57 bench_mem_memset },
55 suite_all, 58 suite_all,
56 { NULL, 59 { NULL,
57 NULL, 60 NULL,
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2296c391d0f..12c81483899 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -922,12 +922,12 @@ static const struct option info_options[] = {
922 OPT_BOOLEAN('t', "threads", &info_threads, 922 OPT_BOOLEAN('t', "threads", &info_threads,
923 "dump thread list in perf.data"), 923 "dump thread list in perf.data"),
924 OPT_BOOLEAN('m', "map", &info_map, 924 OPT_BOOLEAN('m', "map", &info_map,
925 "map of lock instances (name:address table)"), 925 "map of lock instances (address:name table)"),
926 OPT_END() 926 OPT_END()
927}; 927};
928 928
929static const char * const lock_usage[] = { 929static const char * const lock_usage[] = {
930 "perf lock [<options>] {record|trace|report}", 930 "perf lock [<options>] {record|report|script|info}",
931 NULL 931 NULL
932}; 932};
933 933
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index fb8566181f2..4935c09dd5b 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -58,7 +58,7 @@ static struct {
58 struct perf_probe_event events[MAX_PROBES]; 58 struct perf_probe_event events[MAX_PROBES];
59 struct strlist *dellist; 59 struct strlist *dellist;
60 struct line_range line_range; 60 struct line_range line_range;
61 const char *target_module; 61 const char *target;
62 int max_probe_points; 62 int max_probe_points;
63 struct strfilter *filter; 63 struct strfilter *filter;
64} params; 64} params;
@@ -246,7 +246,7 @@ static const struct option options[] = {
246 "file", "vmlinux pathname"), 246 "file", "vmlinux pathname"),
247 OPT_STRING('s', "source", &symbol_conf.source_prefix, 247 OPT_STRING('s', "source", &symbol_conf.source_prefix,
248 "directory", "path to kernel source"), 248 "directory", "path to kernel source"),
249 OPT_STRING('m', "module", &params.target_module, 249 OPT_STRING('m', "module", &params.target,
250 "modname|path", 250 "modname|path",
251 "target module name (for online) or path (for offline)"), 251 "target module name (for online) or path (for offline)"),
252#endif 252#endif
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
333 if (!params.filter) 333 if (!params.filter)
334 params.filter = strfilter__new(DEFAULT_FUNC_FILTER, 334 params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
335 NULL); 335 NULL);
336 ret = show_available_funcs(params.target_module, 336 ret = show_available_funcs(params.target,
337 params.filter); 337 params.filter);
338 strfilter__delete(params.filter); 338 strfilter__delete(params.filter);
339 if (ret < 0) 339 if (ret < 0)
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
354 usage_with_options(probe_usage, options); 354 usage_with_options(probe_usage, options);
355 } 355 }
356 356
357 ret = show_line_range(&params.line_range, params.target_module); 357 ret = show_line_range(&params.line_range, params.target);
358 if (ret < 0) 358 if (ret < 0)
359 pr_err(" Error: Failed to show lines. (%d)\n", ret); 359 pr_err(" Error: Failed to show lines. (%d)\n", ret);
360 return ret; 360 return ret;
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
371 371
372 ret = show_available_vars(params.events, params.nevents, 372 ret = show_available_vars(params.events, params.nevents,
373 params.max_probe_points, 373 params.max_probe_points,
374 params.target_module, 374 params.target,
375 params.filter, 375 params.filter,
376 params.show_ext_vars); 376 params.show_ext_vars);
377 strfilter__delete(params.filter); 377 strfilter__delete(params.filter);
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
393 if (params.nevents) { 393 if (params.nevents) {
394 ret = add_perf_probe_events(params.events, params.nevents, 394 ret = add_perf_probe_events(params.events, params.nevents,
395 params.max_probe_points, 395 params.max_probe_points,
396 params.target_module, 396 params.target,
397 params.force_add); 397 params.force_add);
398 if (ret < 0) { 398 if (ret < 0) {
399 pr_err(" Error: Failed to add events. (%d)\n", ret); 399 pr_err(" Error: Failed to add events. (%d)\n", ret);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0abfb18b911..75d230fef20 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -44,6 +44,7 @@ struct perf_record {
44 struct perf_evlist *evlist; 44 struct perf_evlist *evlist;
45 struct perf_session *session; 45 struct perf_session *session;
46 const char *progname; 46 const char *progname;
47 const char *uid_str;
47 int output; 48 int output;
48 unsigned int page_size; 49 unsigned int page_size;
49 int realtime_prio; 50 int realtime_prio;
@@ -204,8 +205,11 @@ static void perf_record__open(struct perf_record *rec)
204 205
205 if (opts->group && pos != first) 206 if (opts->group && pos != first)
206 group_fd = first->fd; 207 group_fd = first->fd;
208fallback_missing_features:
209 if (opts->exclude_guest_missing)
210 attr->exclude_guest = attr->exclude_host = 0;
207retry_sample_id: 211retry_sample_id:
208 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 212 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
209try_again: 213try_again:
210 if (perf_evsel__open(pos, evlist->cpus, evlist->threads, 214 if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
211 opts->group, group_fd) < 0) { 215 opts->group, group_fd) < 0) {
@@ -217,15 +221,23 @@ try_again:
217 } else if (err == ENODEV && opts->cpu_list) { 221 } else if (err == ENODEV && opts->cpu_list) {
218 die("No such device - did you specify" 222 die("No such device - did you specify"
219 " an out-of-range profile CPU?\n"); 223 " an out-of-range profile CPU?\n");
220 } else if (err == EINVAL && opts->sample_id_all_avail) { 224 } else if (err == EINVAL) {
221 /* 225 if (!opts->exclude_guest_missing &&
222 * Old kernel, no attr->sample_id_type_all field 226 (attr->exclude_guest || attr->exclude_host)) {
223 */ 227 pr_debug("Old kernel, cannot exclude "
224 opts->sample_id_all_avail = false; 228 "guest or host samples.\n");
225 if (!opts->sample_time && !opts->raw_samples && !time_needed) 229 opts->exclude_guest_missing = true;
226 attr->sample_type &= ~PERF_SAMPLE_TIME; 230 goto fallback_missing_features;
227 231 } else if (!opts->sample_id_all_missing) {
228 goto retry_sample_id; 232 /*
233 * Old kernel, no attr->sample_id_type_all field
234 */
235 opts->sample_id_all_missing = true;
236 if (!opts->sample_time && !opts->raw_samples && !time_needed)
237 attr->sample_type &= ~PERF_SAMPLE_TIME;
238
239 goto retry_sample_id;
240 }
229 } 241 }
230 242
231 /* 243 /*
@@ -385,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
385{ 397{
386 struct stat st; 398 struct stat st;
387 int flags; 399 int flags;
388 int err, output; 400 int err, output, feat;
389 unsigned long waking = 0; 401 unsigned long waking = 0;
390 const bool forks = argc > 0; 402 const bool forks = argc > 0;
391 struct machine *machine; 403 struct machine *machine;
@@ -452,8 +464,14 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
452 464
453 rec->session = session; 465 rec->session = session;
454 466
455 if (!rec->no_buildid) 467 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
456 perf_header__set_feat(&session->header, HEADER_BUILD_ID); 468 perf_header__set_feat(&session->header, feat);
469
470 if (rec->no_buildid)
471 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
472
473 if (!have_tracepoints(&evsel_list->entries))
474 perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
457 475
458 if (!rec->file_new) { 476 if (!rec->file_new) {
459 err = perf_session__read_header(session, output); 477 err = perf_session__read_header(session, output);
@@ -461,22 +479,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
461 goto out_delete_session; 479 goto out_delete_session;
462 } 480 }
463 481
464 if (have_tracepoints(&evsel_list->entries))
465 perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
466
467 perf_header__set_feat(&session->header, HEADER_HOSTNAME);
468 perf_header__set_feat(&session->header, HEADER_OSRELEASE);
469 perf_header__set_feat(&session->header, HEADER_ARCH);
470 perf_header__set_feat(&session->header, HEADER_CPUDESC);
471 perf_header__set_feat(&session->header, HEADER_NRCPUS);
472 perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
473 perf_header__set_feat(&session->header, HEADER_CMDLINE);
474 perf_header__set_feat(&session->header, HEADER_VERSION);
475 perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
476 perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
477 perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
478 perf_header__set_feat(&session->header, HEADER_CPUID);
479
480 if (forks) { 482 if (forks) {
481 err = perf_evlist__prepare_workload(evsel_list, opts, argv); 483 err = perf_evlist__prepare_workload(evsel_list, opts, argv);
482 if (err < 0) { 484 if (err < 0) {
@@ -503,9 +505,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
503 return err; 505 return err;
504 } 506 }
505 507
506 if (!!rec->no_buildid 508 if (!rec->no_buildid
507 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { 509 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
508 pr_err("Couldn't generating buildids. " 510 pr_err("Couldn't generate buildids. "
509 "Use --no-buildid to profile anyway.\n"); 511 "Use --no-buildid to profile anyway.\n");
510 return -1; 512 return -1;
511 } 513 }
@@ -654,13 +656,10 @@ static const char * const record_usage[] = {
654 */ 656 */
655static struct perf_record record = { 657static struct perf_record record = {
656 .opts = { 658 .opts = {
657 .target_pid = -1,
658 .target_tid = -1,
659 .mmap_pages = UINT_MAX, 659 .mmap_pages = UINT_MAX,
660 .user_freq = UINT_MAX, 660 .user_freq = UINT_MAX,
661 .user_interval = ULLONG_MAX, 661 .user_interval = ULLONG_MAX,
662 .freq = 1000, 662 .freq = 1000,
663 .sample_id_all_avail = true,
664 }, 663 },
665 .write_mode = WRITE_FORCE, 664 .write_mode = WRITE_FORCE,
666 .file_new = true, 665 .file_new = true,
@@ -679,9 +678,9 @@ const struct option record_options[] = {
679 parse_events_option), 678 parse_events_option),
680 OPT_CALLBACK(0, "filter", &record.evlist, "filter", 679 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
681 "event filter", parse_filter), 680 "event filter", parse_filter),
682 OPT_INTEGER('p', "pid", &record.opts.target_pid, 681 OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
683 "record events on existing process id"), 682 "record events on existing process id"),
684 OPT_INTEGER('t', "tid", &record.opts.target_tid, 683 OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
685 "record events on existing thread id"), 684 "record events on existing thread id"),
686 OPT_INTEGER('r', "realtime", &record.realtime_prio, 685 OPT_INTEGER('r', "realtime", &record.realtime_prio,
687 "collect data with this RT SCHED_FIFO priority"), 686 "collect data with this RT SCHED_FIFO priority"),
@@ -727,6 +726,7 @@ const struct option record_options[] = {
727 OPT_CALLBACK('G', "cgroup", &record.evlist, "name", 726 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
728 "monitor event in cgroup name only", 727 "monitor event in cgroup name only",
729 parse_cgroups), 728 parse_cgroups),
729 OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
730 OPT_END() 730 OPT_END()
731}; 731};
732 732
@@ -747,8 +747,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
747 747
748 argc = parse_options(argc, argv, record_options, record_usage, 748 argc = parse_options(argc, argv, record_options, record_usage,
749 PARSE_OPT_STOP_AT_NON_OPTION); 749 PARSE_OPT_STOP_AT_NON_OPTION);
750 if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && 750 if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
751 !rec->opts.system_wide && !rec->opts.cpu_list) 751 !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
752 usage_with_options(record_usage, record_options); 752 usage_with_options(record_usage, record_options);
753 753
754 if (rec->force && rec->append_file) { 754 if (rec->force && rec->append_file) {
@@ -788,11 +788,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
788 goto out_symbol_exit; 788 goto out_symbol_exit;
789 } 789 }
790 790
791 if (rec->opts.target_pid != -1) 791 rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
792 rec->opts.target_pid);
793 if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
794 goto out_free_fd;
795
796 if (rec->opts.target_pid)
792 rec->opts.target_tid = rec->opts.target_pid; 797 rec->opts.target_tid = rec->opts.target_pid;
793 798
794 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, 799 if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
795 rec->opts.target_tid, rec->opts.cpu_list) < 0) 800 rec->opts.target_tid, rec->opts.uid,
801 rec->opts.cpu_list) < 0)
796 usage_with_options(record_usage, record_options); 802 usage_with_options(record_usage, record_options);
797 803
798 list_for_each_entry(pos, &evsel_list->entries, node) { 804 list_for_each_entry(pos, &evsel_list->entries, node) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb68ddf257b..d4ce733b9eb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -40,6 +40,7 @@ enum perf_output_field {
40 PERF_OUTPUT_SYM = 1U << 8, 40 PERF_OUTPUT_SYM = 1U << 8,
41 PERF_OUTPUT_DSO = 1U << 9, 41 PERF_OUTPUT_DSO = 1U << 9,
42 PERF_OUTPUT_ADDR = 1U << 10, 42 PERF_OUTPUT_ADDR = 1U << 10,
43 PERF_OUTPUT_SYMOFFSET = 1U << 11,
43}; 44};
44 45
45struct output_option { 46struct output_option {
@@ -57,6 +58,7 @@ struct output_option {
57 {.str = "sym", .field = PERF_OUTPUT_SYM}, 58 {.str = "sym", .field = PERF_OUTPUT_SYM},
58 {.str = "dso", .field = PERF_OUTPUT_DSO}, 59 {.str = "dso", .field = PERF_OUTPUT_DSO},
59 {.str = "addr", .field = PERF_OUTPUT_ADDR}, 60 {.str = "addr", .field = PERF_OUTPUT_ADDR},
61 {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
60}; 62};
61 63
62/* default set to maintain compatibility with current format */ 64/* default set to maintain compatibility with current format */
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
193 "to symbols.\n"); 195 "to symbols.\n");
194 return -EINVAL; 196 return -EINVAL;
195 } 197 }
198 if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
199 pr_err("Display of offsets requested but symbol is not"
200 "selected.\n");
201 return -EINVAL;
202 }
196 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { 203 if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
197 pr_err("Display of DSO requested but neither sample IP nor " 204 pr_err("Display of DSO requested but neither sample IP nor "
198 "sample address\nis selected. Hence, no addresses to convert " 205 "sample address\nis selected. Hence, no addresses to convert "
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample,
300 } else 307 } else
301 evname = __event_name(attr->type, attr->config); 308 evname = __event_name(attr->type, attr->config);
302 309
303 printf("%s: ", evname ? evname : "(unknown)"); 310 printf("%s: ", evname ? evname : "[unknown]");
304 } 311 }
305} 312}
306 313
314static bool is_bts_event(struct perf_event_attr *attr)
315{
316 return ((attr->type == PERF_TYPE_HARDWARE) &&
317 (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
318 (attr->sample_period == 1));
319}
320
307static bool sample_addr_correlates_sym(struct perf_event_attr *attr) 321static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
308{ 322{
309 if ((attr->type == PERF_TYPE_SOFTWARE) && 323 if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
312 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) 326 (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
313 return true; 327 return true;
314 328
329 if (is_bts_event(attr))
330 return true;
331
315 return false; 332 return false;
316} 333}
317 334
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event,
323{ 340{
324 struct addr_location al; 341 struct addr_location al;
325 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 342 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
326 const char *symname, *dsoname;
327 343
328 printf("%16" PRIx64, sample->addr); 344 printf("%16" PRIx64, sample->addr);
329 345
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event,
343 al.sym = map__find_symbol(al.map, al.addr, NULL); 359 al.sym = map__find_symbol(al.map, al.addr, NULL);
344 360
345 if (PRINT_FIELD(SYM)) { 361 if (PRINT_FIELD(SYM)) {
346 if (al.sym && al.sym->name) 362 printf(" ");
347 symname = al.sym->name; 363 if (PRINT_FIELD(SYMOFFSET))
364 symbol__fprintf_symname_offs(al.sym, &al, stdout);
348 else 365 else
349 symname = ""; 366 symbol__fprintf_symname(al.sym, stdout);
350
351 printf(" %16s", symname);
352 } 367 }
353 368
354 if (PRINT_FIELD(DSO)) { 369 if (PRINT_FIELD(DSO)) {
355 if (al.map && al.map->dso && al.map->dso->name) 370 printf(" (");
356 dsoname = al.map->dso->name; 371 map__fprintf_dsoname(al.map, stdout);
357 else 372 printf(")");
358 dsoname = ""; 373 }
374}
359 375
360 printf(" (%s)", dsoname); 376static void print_sample_bts(union perf_event *event,
377 struct perf_sample *sample,
378 struct perf_evsel *evsel,
379 struct machine *machine,
380 struct thread *thread)
381{
382 struct perf_event_attr *attr = &evsel->attr;
383
384 /* print branch_from information */
385 if (PRINT_FIELD(IP)) {
386 if (!symbol_conf.use_callchain)
387 printf(" ");
388 else
389 printf("\n");
390 perf_event__print_ip(event, sample, machine, evsel,
391 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
392 PRINT_FIELD(SYMOFFSET));
361 } 393 }
394
395 printf(" => ");
396
397 /* print branch_to information */
398 if (PRINT_FIELD(ADDR))
399 print_sample_addr(event, sample, machine, thread, attr);
400
401 printf("\n");
362} 402}
363 403
364static void process_event(union perf_event *event __unused, 404static void process_event(union perf_event *event __unused,
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused,
374 414
375 print_sample_start(sample, thread, attr); 415 print_sample_start(sample, thread, attr);
376 416
417 if (is_bts_event(attr)) {
418 print_sample_bts(event, sample, evsel, machine, thread);
419 return;
420 }
421
377 if (PRINT_FIELD(TRACE)) 422 if (PRINT_FIELD(TRACE))
378 print_trace_event(sample->cpu, sample->raw_data, 423 print_trace_event(sample->cpu, sample->raw_data,
379 sample->raw_size); 424 sample->raw_size);
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused,
387 else 432 else
388 printf("\n"); 433 printf("\n");
389 perf_event__print_ip(event, sample, machine, evsel, 434 perf_event__print_ip(event, sample, machine, evsel,
390 PRINT_FIELD(SYM), PRINT_FIELD(DSO)); 435 PRINT_FIELD(SYM), PRINT_FIELD(DSO),
436 PRINT_FIELD(SYMOFFSET));
391 } 437 }
392 438
393 printf("\n"); 439 printf("\n");
@@ -1097,7 +1143,10 @@ static const struct option options[] = {
1097 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", 1143 OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
1098 "Look for files with symbols relative to this directory"), 1144 "Look for files with symbols relative to this directory"),
1099 OPT_CALLBACK('f', "fields", NULL, "str", 1145 OPT_CALLBACK('f', "fields", NULL, "str",
1100 "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", 1146 "comma separated output fields prepend with 'type:'. "
1147 "Valid types: hw,sw,trace,raw. "
1148 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
1149 "addr,symoff",
1101 parse_output_fields), 1150 parse_output_fields),
1102 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1151 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1103 "system-wide collection from all CPUs"), 1152 "system-wide collection from all CPUs"),
@@ -1106,6 +1155,9 @@ static const struct option options[] = {
1106 "only display events for these comms"), 1155 "only display events for these comms"),
1107 OPT_BOOLEAN('I', "show-info", &show_full_info, 1156 OPT_BOOLEAN('I', "show-info", &show_full_info,
1108 "display extended information from perf.data file"), 1157 "display extended information from perf.data file"),
1158 OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
1159 "Show the path of [kernel.kallsyms]"),
1160
1109 OPT_END() 1161 OPT_END()
1110}; 1162};
1111 1163
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f5d2a63eba6..ea40e4e8b22 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -182,8 +182,8 @@ static int run_count = 1;
182static bool no_inherit = false; 182static bool no_inherit = false;
183static bool scale = true; 183static bool scale = true;
184static bool no_aggr = false; 184static bool no_aggr = false;
185static pid_t target_pid = -1; 185static const char *target_pid;
186static pid_t target_tid = -1; 186static const char *target_tid;
187static pid_t child_pid = -1; 187static pid_t child_pid = -1;
188static bool null_run = false; 188static bool null_run = false;
189static int detailed_run = 0; 189static int detailed_run = 0;
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel,
296 if (system_wide) 296 if (system_wide)
297 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, 297 return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
298 group, group_fd); 298 group, group_fd);
299 if (target_pid == -1 && target_tid == -1) { 299 if (!target_pid && !target_tid) {
300 attr->disabled = 1; 300 attr->disabled = 1;
301 attr->enable_on_exec = 1; 301 attr->enable_on_exec = 1;
302 } 302 }
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv)
446 exit(-1); 446 exit(-1);
447 } 447 }
448 448
449 if (target_tid == -1 && target_pid == -1 && !system_wide) 449 if (!target_tid && !target_pid && !system_wide)
450 evsel_list->threads->map[0] = child_pid; 450 evsel_list->threads->map[0] = child_pid;
451 451
452 /* 452 /*
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
576 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) 576 if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
577 fprintf(output, " # %8.3f CPUs utilized ", 577 fprintf(output, " # %8.3f CPUs utilized ",
578 avg / avg_stats(&walltime_nsecs_stats)); 578 avg / avg_stats(&walltime_nsecs_stats));
579 else
580 fprintf(output, " ");
579} 581}
580 582
581/* used for get_ratio_color() */ 583/* used for get_ratio_color() */
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
844 846
845 fprintf(output, " # %8.3f GHz ", ratio); 847 fprintf(output, " # %8.3f GHz ", ratio);
846 } else if (runtime_nsecs_stats[cpu].n != 0) { 848 } else if (runtime_nsecs_stats[cpu].n != 0) {
849 char unit = 'M';
850
847 total = avg_stats(&runtime_nsecs_stats[cpu]); 851 total = avg_stats(&runtime_nsecs_stats[cpu]);
848 852
849 if (total) 853 if (total)
850 ratio = 1000.0 * avg / total; 854 ratio = 1000.0 * avg / total;
855 if (ratio < 0.001) {
856 ratio *= 1000;
857 unit = 'K';
858 }
851 859
852 fprintf(output, " # %8.3f M/sec ", ratio); 860 fprintf(output, " # %8.3f %c/sec ", ratio, unit);
853 } else { 861 } else {
854 fprintf(output, " "); 862 fprintf(output, " ");
855 } 863 }
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv)
960 if (!csv_output) { 968 if (!csv_output) {
961 fprintf(output, "\n"); 969 fprintf(output, "\n");
962 fprintf(output, " Performance counter stats for "); 970 fprintf(output, " Performance counter stats for ");
963 if(target_pid == -1 && target_tid == -1) { 971 if (!target_pid && !target_tid) {
964 fprintf(output, "\'%s", argv[0]); 972 fprintf(output, "\'%s", argv[0]);
965 for (i = 1; i < argc; i++) 973 for (i = 1; i < argc; i++)
966 fprintf(output, " %s", argv[i]); 974 fprintf(output, " %s", argv[i]);
967 } else if (target_pid != -1) 975 } else if (target_pid)
968 fprintf(output, "process id \'%d", target_pid); 976 fprintf(output, "process id \'%s", target_pid);
969 else 977 else
970 fprintf(output, "thread id \'%d", target_tid); 978 fprintf(output, "thread id \'%s", target_tid);
971 979
972 fprintf(output, "\'"); 980 fprintf(output, "\'");
973 if (run_count > 1) 981 if (run_count > 1)
@@ -1041,10 +1049,10 @@ static const struct option options[] = {
1041 "event filter", parse_filter), 1049 "event filter", parse_filter),
1042 OPT_BOOLEAN('i', "no-inherit", &no_inherit, 1050 OPT_BOOLEAN('i', "no-inherit", &no_inherit,
1043 "child tasks do not inherit counters"), 1051 "child tasks do not inherit counters"),
1044 OPT_INTEGER('p', "pid", &target_pid, 1052 OPT_STRING('p', "pid", &target_pid, "pid",
1045 "stat events on existing process id"), 1053 "stat events on existing process id"),
1046 OPT_INTEGER('t', "tid", &target_tid, 1054 OPT_STRING('t', "tid", &target_tid, "tid",
1047 "stat events on existing thread id"), 1055 "stat events on existing thread id"),
1048 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1056 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1049 "system-wide collection from all CPUs"), 1057 "system-wide collection from all CPUs"),
1050 OPT_BOOLEAN('g', "group", &group, 1058 OPT_BOOLEAN('g', "group", &group,
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
1182 } else if (big_num_opt == 0) /* User passed --no-big-num */ 1190 } else if (big_num_opt == 0) /* User passed --no-big-num */
1183 big_num = false; 1191 big_num = false;
1184 1192
1185 if (!argc && target_pid == -1 && target_tid == -1) 1193 if (!argc && !target_pid && !target_tid)
1186 usage_with_options(stat_usage, options); 1194 usage_with_options(stat_usage, options);
1187 if (run_count <= 0) 1195 if (run_count <= 0)
1188 usage_with_options(stat_usage, options); 1196 usage_with_options(stat_usage, options);
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
1198 if (add_default_attributes()) 1206 if (add_default_attributes())
1199 goto out; 1207 goto out;
1200 1208
1201 if (target_pid != -1) 1209 if (target_pid)
1202 target_tid = target_pid; 1210 target_tid = target_pid;
1203 1211
1204 evsel_list->threads = thread_map__new(target_pid, target_tid); 1212 evsel_list->threads = thread_map__new_str(target_pid,
1213 target_tid, UINT_MAX);
1205 if (evsel_list->threads == NULL) { 1214 if (evsel_list->threads == NULL) {
1206 pr_err("Problems finding threads of monitor\n"); 1215 pr_err("Problems finding threads of monitor\n");
1207 usage_with_options(stat_usage, options); 1216 usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 3854e869dce..3e087ce8daa 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,6 +15,8 @@
15#include "util/thread_map.h" 15#include "util/thread_map.h"
16#include "../../include/linux/hw_breakpoint.h" 16#include "../../include/linux/hw_breakpoint.h"
17 17
18#include <sys/mman.h>
19
18static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) 20static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
19{ 21{
20 bool *visited = symbol__priv(sym); 22 bool *visited = symbol__priv(sym);
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void)
276 return -1; 278 return -1;
277 } 279 }
278 280
279 threads = thread_map__new(-1, getpid()); 281 threads = thread_map__new(-1, getpid(), UINT_MAX);
280 if (threads == NULL) { 282 if (threads == NULL) {
281 pr_debug("thread_map__new\n"); 283 pr_debug("thread_map__new\n");
282 return -1; 284 return -1;
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void)
342 return -1; 344 return -1;
343 } 345 }
344 346
345 threads = thread_map__new(-1, getpid()); 347 threads = thread_map__new(-1, getpid(), UINT_MAX);
346 if (threads == NULL) { 348 if (threads == NULL) {
347 pr_debug("thread_map__new\n"); 349 pr_debug("thread_map__new\n");
348 return -1; 350 return -1;
@@ -490,7 +492,7 @@ static int test__basic_mmap(void)
490 expected_nr_events[i] = random() % 257; 492 expected_nr_events[i] = random() % 257;
491 } 493 }
492 494
493 threads = thread_map__new(-1, getpid()); 495 threads = thread_map__new(-1, getpid(), UINT_MAX);
494 if (threads == NULL) { 496 if (threads == NULL) {
495 pr_debug("thread_map__new\n"); 497 pr_debug("thread_map__new\n");
496 return -1; 498 return -1;
@@ -1008,12 +1010,9 @@ realloc:
1008static int test__PERF_RECORD(void) 1010static int test__PERF_RECORD(void)
1009{ 1011{
1010 struct perf_record_opts opts = { 1012 struct perf_record_opts opts = {
1011 .target_pid = -1,
1012 .target_tid = -1,
1013 .no_delay = true, 1013 .no_delay = true,
1014 .freq = 10, 1014 .freq = 10,
1015 .mmap_pages = 256, 1015 .mmap_pages = 256,
1016 .sample_id_all_avail = true,
1017 }; 1016 };
1018 cpu_set_t *cpu_mask = NULL; 1017 cpu_set_t *cpu_mask = NULL;
1019 size_t cpu_mask_size = 0; 1018 size_t cpu_mask_size = 0;
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void)
1054 * we're monitoring, the one forked there. 1053 * we're monitoring, the one forked there.
1055 */ 1054 */
1056 err = perf_evlist__create_maps(evlist, opts.target_pid, 1055 err = perf_evlist__create_maps(evlist, opts.target_pid,
1057 opts.target_tid, opts.cpu_list); 1056 opts.target_tid, UINT_MAX, opts.cpu_list);
1058 if (err < 0) { 1057 if (err < 0) {
1059 pr_debug("Not enough memory to create thread/cpu maps\n"); 1058 pr_debug("Not enough memory to create thread/cpu maps\n");
1060 goto out_delete_evlist; 1059 goto out_delete_evlist;
@@ -1296,6 +1295,173 @@ out:
1296 return (err < 0 || errs > 0) ? -1 : 0; 1295 return (err < 0 || errs > 0) ? -1 : 0;
1297} 1296}
1298 1297
1298
1299#if defined(__x86_64__) || defined(__i386__)
1300
1301#define barrier() asm volatile("" ::: "memory")
1302
1303static u64 rdpmc(unsigned int counter)
1304{
1305 unsigned int low, high;
1306
1307 asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
1308
1309 return low | ((u64)high) << 32;
1310}
1311
1312static u64 rdtsc(void)
1313{
1314 unsigned int low, high;
1315
1316 asm volatile("rdtsc" : "=a" (low), "=d" (high));
1317
1318 return low | ((u64)high) << 32;
1319}
1320
1321static u64 mmap_read_self(void *addr)
1322{
1323 struct perf_event_mmap_page *pc = addr;
1324 u32 seq, idx, time_mult = 0, time_shift = 0;
1325 u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
1326
1327 do {
1328 seq = pc->lock;
1329 barrier();
1330
1331 enabled = pc->time_enabled;
1332 running = pc->time_running;
1333
1334 if (enabled != running) {
1335 cyc = rdtsc();
1336 time_mult = pc->time_mult;
1337 time_shift = pc->time_shift;
1338 time_offset = pc->time_offset;
1339 }
1340
1341 idx = pc->index;
1342 count = pc->offset;
1343 if (idx)
1344 count += rdpmc(idx - 1);
1345
1346 barrier();
1347 } while (pc->lock != seq);
1348
1349 if (enabled != running) {
1350 u64 quot, rem;
1351
1352 quot = (cyc >> time_shift);
1353 rem = cyc & ((1 << time_shift) - 1);
1354 delta = time_offset + quot * time_mult +
1355 ((rem * time_mult) >> time_shift);
1356
1357 enabled += delta;
1358 if (idx)
1359 running += delta;
1360
1361 quot = count / running;
1362 rem = count % running;
1363 count = quot * enabled + (rem * enabled) / running;
1364 }
1365
1366 return count;
1367}
1368
1369/*
1370 * If the RDPMC instruction faults then signal this back to the test parent task:
1371 */
1372static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
1373{
1374 exit(-1);
1375}
1376
1377static int __test__rdpmc(void)
1378{
1379 long page_size = sysconf(_SC_PAGE_SIZE);
1380 volatile int tmp = 0;
1381 u64 i, loops = 1000;
1382 int n;
1383 int fd;
1384 void *addr;
1385 struct perf_event_attr attr = {
1386 .type = PERF_TYPE_HARDWARE,
1387 .config = PERF_COUNT_HW_INSTRUCTIONS,
1388 .exclude_kernel = 1,
1389 };
1390 u64 delta_sum = 0;
1391 struct sigaction sa;
1392
1393 sigfillset(&sa.sa_mask);
1394 sa.sa_sigaction = segfault_handler;
1395 sigaction(SIGSEGV, &sa, NULL);
1396
1397 fprintf(stderr, "\n\n");
1398
1399 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
1400 if (fd < 0) {
1401 die("Error: sys_perf_event_open() syscall returned "
1402 "with %d (%s)\n", fd, strerror(errno));
1403 }
1404
1405 addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
1406 if (addr == (void *)(-1)) {
1407 die("Error: mmap() syscall returned "
1408 "with (%s)\n", strerror(errno));
1409 }
1410
1411 for (n = 0; n < 6; n++) {
1412 u64 stamp, now, delta;
1413
1414 stamp = mmap_read_self(addr);
1415
1416 for (i = 0; i < loops; i++)
1417 tmp++;
1418
1419 now = mmap_read_self(addr);
1420 loops *= 10;
1421
1422 delta = now - stamp;
1423 fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
1424
1425 delta_sum += delta;
1426 }
1427
1428 munmap(addr, page_size);
1429 close(fd);
1430
1431 fprintf(stderr, " ");
1432
1433 if (!delta_sum)
1434 return -1;
1435
1436 return 0;
1437}
1438
1439static int test__rdpmc(void)
1440{
1441 int status = 0;
1442 int wret = 0;
1443 int ret;
1444 int pid;
1445
1446 pid = fork();
1447 if (pid < 0)
1448 return -1;
1449
1450 if (!pid) {
1451 ret = __test__rdpmc();
1452
1453 exit(ret);
1454 }
1455
1456 wret = waitpid(pid, &status, 0);
1457 if (wret < 0 || status)
1458 return -1;
1459
1460 return 0;
1461}
1462
1463#endif
1464
1299static struct test { 1465static struct test {
1300 const char *desc; 1466 const char *desc;
1301 int (*func)(void); 1467 int (*func)(void);
@@ -1320,6 +1486,12 @@ static struct test {
1320 .desc = "parse events tests", 1486 .desc = "parse events tests",
1321 .func = test__parse_events, 1487 .func = test__parse_events,
1322 }, 1488 },
1489#if defined(__x86_64__) || defined(__i386__)
1490 {
1491 .desc = "x86 rdpmc test",
1492 .func = test__rdpmc,
1493 },
1494#endif
1323 { 1495 {
1324 .desc = "Validate PERF_RECORD_* events & perf_sample fields", 1496 .desc = "Validate PERF_RECORD_* events & perf_sample fields",
1325 .func = test__PERF_RECORD, 1497 .func = test__PERF_RECORD,
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
1412 if (symbol__init() < 0) 1584 if (symbol__init() < 0)
1413 return -1; 1585 return -1;
1414 1586
1415 setup_pager();
1416
1417 return __cmd_test(argc, argv); 1587 return __cmd_test(argc, argv);
1418} 1588}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd162aa24ba..e3c63aef8ef 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,7 +64,6 @@
64#include <linux/unistd.h> 64#include <linux/unistd.h>
65#include <linux/types.h> 65#include <linux/types.h>
66 66
67
68void get_term_dimensions(struct winsize *ws) 67void get_term_dimensions(struct winsize *ws)
69{ 68{
70 char *s = getenv("LINES"); 69 char *s = getenv("LINES");
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg)
544 543
545static void *display_thread_tui(void *arg) 544static void *display_thread_tui(void *arg)
546{ 545{
546 struct perf_evsel *pos;
547 struct perf_top *top = arg; 547 struct perf_top *top = arg;
548 const char *help = "For a higher level overview, try: perf top --sort comm,dso"; 548 const char *help = "For a higher level overview, try: perf top --sort comm,dso";
549 549
550 perf_top__sort_new_samples(top); 550 perf_top__sort_new_samples(top);
551
552 /*
553 * Initialize the uid_filter_str, in the future the TUI will allow
554 * Zooming in/out UIDs. For now juse use whatever the user passed
555 * via --uid.
556 */
557 list_for_each_entry(pos, &top->evlist->entries, node)
558 pos->hists.uid_filter_str = top->uid_str;
559
551 perf_evlist__tui_browse_hists(top->evlist, help, 560 perf_evlist__tui_browse_hists(top->evlist, help,
552 perf_top__sort_new_samples, 561 perf_top__sort_new_samples,
553 top, top->delay_secs); 562 top, top->delay_secs);
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool,
668 return; 677 return;
669 } 678 }
670 679
680 if (!machine) {
681 pr_err("%u unprocessable samples recorded.",
682 top->session->hists.stats.nr_unprocessable_samples++);
683 return;
684 }
685
671 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) 686 if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
672 top->exact_samples++; 687 top->exact_samples++;
673 688
@@ -857,8 +872,11 @@ static void perf_top__start_counters(struct perf_top *top)
857 attr->mmap = 1; 872 attr->mmap = 1;
858 attr->comm = 1; 873 attr->comm = 1;
859 attr->inherit = top->inherit; 874 attr->inherit = top->inherit;
875fallback_missing_features:
876 if (top->exclude_guest_missing)
877 attr->exclude_guest = attr->exclude_host = 0;
860retry_sample_id: 878retry_sample_id:
861 attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; 879 attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
862try_again: 880try_again:
863 if (perf_evsel__open(counter, top->evlist->cpus, 881 if (perf_evsel__open(counter, top->evlist->cpus,
864 top->evlist->threads, top->group, 882 top->evlist->threads, top->group,
@@ -868,12 +886,20 @@ try_again:
868 if (err == EPERM || err == EACCES) { 886 if (err == EPERM || err == EACCES) {
869 ui__error_paranoid(); 887 ui__error_paranoid();
870 goto out_err; 888 goto out_err;
871 } else if (err == EINVAL && top->sample_id_all_avail) { 889 } else if (err == EINVAL) {
872 /* 890 if (!top->exclude_guest_missing &&
873 * Old kernel, no attr->sample_id_type_all field 891 (attr->exclude_guest || attr->exclude_host)) {
874 */ 892 pr_debug("Old kernel, cannot exclude "
875 top->sample_id_all_avail = false; 893 "guest or host samples.\n");
876 goto retry_sample_id; 894 top->exclude_guest_missing = true;
895 goto fallback_missing_features;
896 } else if (!top->sample_id_all_missing) {
897 /*
898 * Old kernel, no attr->sample_id_type_all field
899 */
900 top->sample_id_all_missing = true;
901 goto retry_sample_id;
902 }
877 } 903 }
878 /* 904 /*
879 * If it's cycles then fall back to hrtimer 905 * If it's cycles then fall back to hrtimer
@@ -956,7 +982,7 @@ static int __cmd_top(struct perf_top *top)
956 if (ret) 982 if (ret)
957 goto out_delete; 983 goto out_delete;
958 984
959 if (top->target_tid != -1) 985 if (top->target_tid || top->uid != UINT_MAX)
960 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, 986 perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
961 perf_event__process, 987 perf_event__process,
962 &top->session->host_machine); 988 &top->session->host_machine);
@@ -1094,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1094 struct perf_top top = { 1120 struct perf_top top = {
1095 .count_filter = 5, 1121 .count_filter = 5,
1096 .delay_secs = 2, 1122 .delay_secs = 2,
1097 .target_pid = -1, 1123 .uid = UINT_MAX,
1098 .target_tid = -1,
1099 .freq = 1000, /* 1 KHz */ 1124 .freq = 1000, /* 1 KHz */
1100 .sample_id_all_avail = true,
1101 .mmap_pages = 128, 1125 .mmap_pages = 128,
1102 .sym_pcnt_filter = 5, 1126 .sym_pcnt_filter = 5,
1103 }; 1127 };
@@ -1108,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1108 parse_events_option), 1132 parse_events_option),
1109 OPT_INTEGER('c', "count", &top.default_interval, 1133 OPT_INTEGER('c', "count", &top.default_interval,
1110 "event period to sample"), 1134 "event period to sample"),
1111 OPT_INTEGER('p', "pid", &top.target_pid, 1135 OPT_STRING('p', "pid", &top.target_pid, "pid",
1112 "profile events on existing process id"), 1136 "profile events on existing process id"),
1113 OPT_INTEGER('t', "tid", &top.target_tid, 1137 OPT_STRING('t', "tid", &top.target_tid, "tid",
1114 "profile events on existing thread id"), 1138 "profile events on existing thread id"),
1115 OPT_BOOLEAN('a', "all-cpus", &top.system_wide, 1139 OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
1116 "system-wide collection from all CPUs"), 1140 "system-wide collection from all CPUs"),
@@ -1169,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1169 "Display raw encoding of assembly instructions (default)"), 1193 "Display raw encoding of assembly instructions (default)"),
1170 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", 1194 OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1171 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1195 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1196 OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
1172 OPT_END() 1197 OPT_END()
1173 }; 1198 };
1174 1199
@@ -1194,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1194 1219
1195 setup_browser(false); 1220 setup_browser(false);
1196 1221
1222 top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
1223 if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
1224 goto out_delete_evlist;
1225
1197 /* CPU and PID are mutually exclusive */ 1226 /* CPU and PID are mutually exclusive */
1198 if (top.target_tid > 0 && top.cpu_list) { 1227 if (top.target_tid && top.cpu_list) {
1199 printf("WARNING: PID switch overriding CPU\n"); 1228 printf("WARNING: PID switch overriding CPU\n");
1200 sleep(1); 1229 sleep(1);
1201 top.cpu_list = NULL; 1230 top.cpu_list = NULL;
1202 } 1231 }
1203 1232
1204 if (top.target_pid != -1) 1233 if (top.target_pid)
1205 top.target_tid = top.target_pid; 1234 top.target_tid = top.target_pid;
1206 1235
1207 if (perf_evlist__create_maps(top.evlist, top.target_pid, 1236 if (perf_evlist__create_maps(top.evlist, top.target_pid,
1208 top.target_tid, top.cpu_list) < 0) 1237 top.target_tid, top.uid, top.cpu_list) < 0)
1209 usage_with_options(top_usage, options); 1238 usage_with_options(top_usage, options);
1210 1239
1211 if (!top.evlist->nr_entries && 1240 if (!top.evlist->nr_entries &&
@@ -1269,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
1269 1298
1270 status = __cmd_top(&top); 1299 status = __cmd_top(&top);
1271 1300
1301out_delete_evlist:
1272 perf_evlist__delete(top.evlist); 1302 perf_evlist__delete(top.evlist);
1273 1303
1274 return status; 1304 return status;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 64f8bee31ce..f0227e93665 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -167,7 +167,6 @@ sys_perf_event_open(struct perf_event_attr *attr,
167 pid_t pid, int cpu, int group_fd, 167 pid_t pid, int cpu, int group_fd,
168 unsigned long flags) 168 unsigned long flags)
169{ 169{
170 attr->size = sizeof(*attr);
171 return syscall(__NR_perf_event_open, attr, pid, cpu, 170 return syscall(__NR_perf_event_open, attr, pid, cpu,
172 group_fd, flags); 171 group_fd, flags);
173} 172}
@@ -186,8 +185,9 @@ extern const char perf_version_string[];
186void pthread__unblock_sigwinch(void); 185void pthread__unblock_sigwinch(void);
187 186
188struct perf_record_opts { 187struct perf_record_opts {
189 pid_t target_pid; 188 const char *target_pid;
190 pid_t target_tid; 189 const char *target_tid;
190 uid_t uid;
191 bool call_graph; 191 bool call_graph;
192 bool group; 192 bool group;
193 bool inherit_stat; 193 bool inherit_stat;
@@ -198,7 +198,8 @@ struct perf_record_opts {
198 bool raw_samples; 198 bool raw_samples;
199 bool sample_address; 199 bool sample_address;
200 bool sample_time; 200 bool sample_time;
201 bool sample_id_all_avail; 201 bool sample_id_all_missing;
202 bool exclude_guest_missing;
202 bool system_wide; 203 bool system_wide;
203 bool period; 204 bool period;
204 unsigned int freq; 205 unsigned int freq;
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index df638c438a9..b11cca58423 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -19,7 +19,7 @@ def main():
19 cpus = perf.cpu_map() 19 cpus = perf.cpu_map()
20 threads = perf.thread_map() 20 threads = perf.thread_map()
21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0, 21 evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
22 wakeup_events = 1, sample_period = 1, 22 wakeup_events = 1, watermark = 1,
23 sample_id_all = 1, 23 sample_id_all = 1,
24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) 24 sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
25 evsel.open(cpus = cpus, threads = threads); 25 evsel.open(cpus = cpus, threads = threads);
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c
index 5e230acae1e..0a1adc1111f 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/perf/util/bitmap.c
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
19 19
20 return w; 20 return w;
21} 21}
22
23void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
24 const unsigned long *bitmap2, int bits)
25{
26 int k;
27 int nr = BITS_TO_LONGS(bits);
28
29 for (k = 0; k < nr; k++)
30 dst[k] = bitmap1[k] | bitmap2[k];
31}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 6893eec693a..adc72f09914 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -166,6 +166,17 @@ out:
166 return cpus; 166 return cpus;
167} 167}
168 168
169size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
170{
171 int i;
172 size_t printed = fprintf(fp, "%d cpu%s: ",
173 map->nr, map->nr > 1 ? "s" : "");
174 for (i = 0; i < map->nr; ++i)
175 printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
176
177 return printed + fprintf(fp, "\n");
178}
179
169struct cpu_map *cpu_map__dummy_new(void) 180struct cpu_map *cpu_map__dummy_new(void)
170{ 181{
171 struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); 182 struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 072c0a37479..c41518573c6 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,6 +1,8 @@
1#ifndef __PERF_CPUMAP_H 1#ifndef __PERF_CPUMAP_H
2#define __PERF_CPUMAP_H 2#define __PERF_CPUMAP_H
3 3
4#include <stdio.h>
5
4struct cpu_map { 6struct cpu_map {
5 int nr; 7 int nr;
6 int map[]; 8 int map[];
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list);
10struct cpu_map *cpu_map__dummy_new(void); 12struct cpu_map *cpu_map__dummy_new(void);
11void cpu_map__delete(struct cpu_map *map); 13void cpu_map__delete(struct cpu_map *map);
12 14
15size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
16
13#endif /* __PERF_CPUMAP_H */ 17#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 35073621e5d..aada3ac5e89 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * No surprises, and works with signed and unsigned chars. 4 * No surprises, and works with signed and unsigned chars.
5 */ 5 */
6#include "cache.h" 6#include "util.h"
7 7
8enum { 8enum {
9 S = GIT_SPACE, 9 S = GIT_SPACE,
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index ffc35e748e8..dd8b19319c0 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = {
15 0, 15 0,
16}; 16};
17 17
18/* use this to force a umount */
19void debugfs_force_cleanup(void)
20{
21 debugfs_find_mountpoint();
22 debugfs_premounted = 0;
23 debugfs_umount();
24}
25
26/* construct a full path to a debugfs element */
27int debugfs_make_path(const char *element, char *buffer, int size)
28{
29 int len;
30
31 if (strlen(debugfs_mountpoint) == 0) {
32 buffer[0] = '\0';
33 return -1;
34 }
35
36 len = strlen(debugfs_mountpoint) + strlen(element) + 1;
37 if (len >= size)
38 return len+1;
39
40 snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
41 return 0;
42}
43
44static int debugfs_found; 18static int debugfs_found;
45 19
46/* find the path to the mounted debugfs */ 20/* find the path to the mounted debugfs */
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs)
97 return 0; 71 return 0;
98} 72}
99 73
100
101int debugfs_valid_entry(const char *path)
102{
103 struct stat st;
104
105 if (stat(path, &st))
106 return -errno;
107
108 return 0;
109}
110
111static void debugfs_set_tracing_events_path(const char *mountpoint) 74static void debugfs_set_tracing_events_path(const char *mountpoint)
112{ 75{
113 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", 76 snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint)
149 snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); 112 snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
150 debugfs_set_tracing_events_path(mountpoint); 113 debugfs_set_tracing_events_path(mountpoint);
151} 114}
152
153/* umount the debugfs */
154
155int debugfs_umount(void)
156{
157 char umountcmd[128];
158 int ret;
159
160 /* if it was already mounted, leave it */
161 if (debugfs_premounted)
162 return 0;
163
164 /* make sure it's a valid mount point */
165 ret = debugfs_valid_mountpoint(debugfs_mountpoint);
166 if (ret)
167 return ret;
168
169 snprintf(umountcmd, sizeof(umountcmd),
170 "/bin/umount %s", debugfs_mountpoint);
171 return system(umountcmd);
172}
173
174int debugfs_write(const char *entry, const char *value)
175{
176 char path[PATH_MAX + 1];
177 int ret, count;
178 int fd;
179
180 /* construct the path */
181 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
182
183 /* verify that it exists */
184 ret = debugfs_valid_entry(path);
185 if (ret)
186 return ret;
187
188 /* get how many chars we're going to write */
189 count = strlen(value);
190
191 /* open the debugfs entry */
192 fd = open(path, O_RDWR);
193 if (fd < 0)
194 return -errno;
195
196 while (count > 0) {
197 /* write it */
198 ret = write(fd, value, count);
199 if (ret <= 0) {
200 if (ret == EAGAIN)
201 continue;
202 close(fd);
203 return -errno;
204 }
205 count -= ret;
206 }
207
208 /* close it */
209 close(fd);
210
211 /* return success */
212 return 0;
213}
214
215/*
216 * read a debugfs entry
217 * returns the number of chars read or a negative errno
218 */
219int debugfs_read(const char *entry, char *buffer, size_t size)
220{
221 char path[PATH_MAX + 1];
222 int ret;
223 int fd;
224
225 /* construct the path */
226 snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
227
228 /* verify that it exists */
229 ret = debugfs_valid_entry(path);
230 if (ret)
231 return ret;
232
233 /* open the debugfs entry */
234 fd = open(path, O_RDONLY);
235 if (fd < 0)
236 return -errno;
237
238 do {
239 /* read it */
240 ret = read(fd, buffer, size);
241 if (ret == 0) {
242 close(fd);
243 return EOF;
244 }
245 } while (ret < 0 && errno == EAGAIN);
246
247 /* close it */
248 close(fd);
249
250 /* make *sure* there's a null character at the end */
251 buffer[ret] = '\0';
252
253 /* return the number of chars read */
254 return ret;
255}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 4a878f735eb..68f3e87ec57 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -3,14 +3,8 @@
3 3
4const char *debugfs_find_mountpoint(void); 4const char *debugfs_find_mountpoint(void);
5int debugfs_valid_mountpoint(const char *debugfs); 5int debugfs_valid_mountpoint(const char *debugfs);
6int debugfs_valid_entry(const char *path);
7char *debugfs_mount(const char *mountpoint); 6char *debugfs_mount(const char *mountpoint);
8int debugfs_umount(void);
9void debugfs_set_path(const char *mountpoint); 7void debugfs_set_path(const char *mountpoint);
10int debugfs_write(const char *entry, const char *value);
11int debugfs_read(const char *entry, char *buffer, size_t size);
12void debugfs_force_cleanup(void);
13int debugfs_make_path(const char *element, char *buffer, int size);
14 8
15extern char debugfs_mountpoint[]; 9extern char debugfs_mountpoint[];
16extern char tracing_events_path[]; 10extern char tracing_events_path[];
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ea32a061f1c..f8da9fada00 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
97 ++evlist->nr_entries; 97 ++evlist->nr_entries;
98} 98}
99 99
100static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 100void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
101 struct list_head *list, 101 struct list_head *list,
102 int nr_entries) 102 int nr_entries)
103{ 103{
104 list_splice_tail(list, &evlist->entries); 104 list_splice_tail(list, &evlist->entries);
105 evlist->nr_entries += nr_entries; 105 evlist->nr_entries += nr_entries;
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
597 return perf_evlist__mmap_per_cpu(evlist, prot, mask); 597 return perf_evlist__mmap_per_cpu(evlist, prot, mask);
598} 598}
599 599
600int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, 600int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
601 pid_t target_tid, const char *cpu_list) 601 const char *target_tid, uid_t uid, const char *cpu_list)
602{ 602{
603 evlist->threads = thread_map__new(target_pid, target_tid); 603 evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
604 604
605 if (evlist->threads == NULL) 605 if (evlist->threads == NULL)
606 return -1; 606 return -1;
607 607
608 if (cpu_list == NULL && target_tid != -1) 608 if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
609 evlist->cpus = cpu_map__dummy_new(); 609 evlist->cpus = cpu_map__dummy_new();
610 else 610 else
611 evlist->cpus = cpu_map__new(cpu_list); 611 evlist->cpus = cpu_map__new(cpu_list);
@@ -824,7 +824,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
824 exit(-1); 824 exit(-1);
825 } 825 }
826 826
827 if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) 827 if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
828 evlist->threads->map[0] = evlist->workload.pid; 828 evlist->threads->map[0] = evlist->workload.pid;
829 829
830 close(child_ready_pipe[1]); 830 close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8922aeed046..21f1c9e57f1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
106 evlist->threads = threads; 106 evlist->threads = threads;
107} 107}
108 108
109int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, 109int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
110 pid_t target_tid, const char *cpu_list); 110 const char *tid, uid_t uid, const char *cpu_list);
111void perf_evlist__delete_maps(struct perf_evlist *evlist); 111void perf_evlist__delete_maps(struct perf_evlist *evlist);
112int perf_evlist__set_filters(struct perf_evlist *evlist); 112int perf_evlist__set_filters(struct perf_evlist *evlist);
113 113
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
117 117
118bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); 118bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
119bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); 119bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
120
121void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
122 struct list_head *list,
123 int nr_entries);
124
120#endif /* __PERF_EVLIST_H */ 125#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7132ee834e0..302d49a9f98 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
68 struct perf_event_attr *attr = &evsel->attr; 68 struct perf_event_attr *attr = &evsel->attr;
69 int track = !evsel->idx; /* only the first counter needs these */ 69 int track = !evsel->idx; /* only the first counter needs these */
70 70
71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 71 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
72 attr->inherit = !opts->no_inherit; 72 attr->inherit = !opts->no_inherit;
73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
74 PERF_FORMAT_TOTAL_TIME_RUNNING | 74 PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
111 if (opts->period) 111 if (opts->period)
112 attr->sample_type |= PERF_SAMPLE_PERIOD; 112 attr->sample_type |= PERF_SAMPLE_PERIOD;
113 113
114 if (opts->sample_id_all_avail && 114 if (!opts->sample_id_all_missing &&
115 (opts->sample_time || opts->system_wide || 115 (opts->sample_time || opts->system_wide ||
116 !opts->no_inherit || opts->cpu_list)) 116 !opts->no_inherit || opts->cpu_list))
117 attr->sample_type |= PERF_SAMPLE_TIME; 117 attr->sample_type |= PERF_SAMPLE_TIME;
@@ -130,7 +130,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
130 attr->mmap = track; 130 attr->mmap = track;
131 attr->comm = track; 131 attr->comm = track;
132 132
133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 133 if (!opts->target_pid && !opts->target_tid && !opts->system_wide) {
134 attr->disabled = 1; 134 attr->disabled = 1;
135 attr->enable_on_exec = 1; 135 attr->enable_on_exec = 1;
136 } 136 }
@@ -536,7 +536,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
536 } 536 }
537 537
538 if (type & PERF_SAMPLE_READ) { 538 if (type & PERF_SAMPLE_READ) {
539 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 539 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
540 return -1; 540 return -1;
541 } 541 }
542 542
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ecd7f4dd7ee..9f867d96c6a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id)
63 return NULL; 63 return NULL;
64} 64}
65 65
66static const char *__perf_magic = "PERFFILE"; 66/*
67 * magic2 = "PERFILE2"
68 * must be a numerical value to let the endianness
69 * determine the memory layout. That way we are able
70 * to detect endianness when reading the perf.data file
71 * back.
72 *
73 * we check for legacy (PERFFILE) format.
74 */
75static const char *__perf_magic1 = "PERFFILE";
76static const u64 __perf_magic2 = 0x32454c4946524550ULL;
77static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
67 78
68#define PERF_MAGIC (*(u64 *)__perf_magic) 79#define PERF_MAGIC __perf_magic2
69 80
70struct perf_file_attr { 81struct perf_file_attr {
71 struct perf_event_attr attr; 82 struct perf_event_attr attr;
@@ -1305,25 +1316,198 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
1305 free(str); 1316 free(str);
1306} 1317}
1307 1318
1319static int __event_process_build_id(struct build_id_event *bev,
1320 char *filename,
1321 struct perf_session *session)
1322{
1323 int err = -1;
1324 struct list_head *head;
1325 struct machine *machine;
1326 u16 misc;
1327 struct dso *dso;
1328 enum dso_kernel_type dso_type;
1329
1330 machine = perf_session__findnew_machine(session, bev->pid);
1331 if (!machine)
1332 goto out;
1333
1334 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1335
1336 switch (misc) {
1337 case PERF_RECORD_MISC_KERNEL:
1338 dso_type = DSO_TYPE_KERNEL;
1339 head = &machine->kernel_dsos;
1340 break;
1341 case PERF_RECORD_MISC_GUEST_KERNEL:
1342 dso_type = DSO_TYPE_GUEST_KERNEL;
1343 head = &machine->kernel_dsos;
1344 break;
1345 case PERF_RECORD_MISC_USER:
1346 case PERF_RECORD_MISC_GUEST_USER:
1347 dso_type = DSO_TYPE_USER;
1348 head = &machine->user_dsos;
1349 break;
1350 default:
1351 goto out;
1352 }
1353
1354 dso = __dsos__findnew(head, filename);
1355 if (dso != NULL) {
1356 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1357
1358 dso__set_build_id(dso, &bev->build_id);
1359
1360 if (filename[0] == '[')
1361 dso->kernel = dso_type;
1362
1363 build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1364 sbuild_id);
1365 pr_debug("build id event received for %s: %s\n",
1366 dso->long_name, sbuild_id);
1367 }
1368
1369 err = 0;
1370out:
1371 return err;
1372}
1373
1374static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1375 int input, u64 offset, u64 size)
1376{
1377 struct perf_session *session = container_of(header, struct perf_session, header);
1378 struct {
1379 struct perf_event_header header;
1380 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
1381 char filename[0];
1382 } old_bev;
1383 struct build_id_event bev;
1384 char filename[PATH_MAX];
1385 u64 limit = offset + size;
1386
1387 while (offset < limit) {
1388 ssize_t len;
1389
1390 if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1391 return -1;
1392
1393 if (header->needs_swap)
1394 perf_event_header__bswap(&old_bev.header);
1395
1396 len = old_bev.header.size - sizeof(old_bev);
1397 if (read(input, filename, len) != len)
1398 return -1;
1399
1400 bev.header = old_bev.header;
1401
1402 /*
1403 * As the pid is the missing value, we need to fill
1404 * it properly. The header.misc value give us nice hint.
1405 */
1406 bev.pid = HOST_KERNEL_ID;
1407 if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
1408 bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
1409 bev.pid = DEFAULT_GUEST_KERNEL_ID;
1410
1411 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
1412 __event_process_build_id(&bev, filename, session);
1413
1414 offset += bev.header.size;
1415 }
1416
1417 return 0;
1418}
1419
1420static int perf_header__read_build_ids(struct perf_header *header,
1421 int input, u64 offset, u64 size)
1422{
1423 struct perf_session *session = container_of(header, struct perf_session, header);
1424 struct build_id_event bev;
1425 char filename[PATH_MAX];
1426 u64 limit = offset + size, orig_offset = offset;
1427 int err = -1;
1428
1429 while (offset < limit) {
1430 ssize_t len;
1431
1432 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
1433 goto out;
1434
1435 if (header->needs_swap)
1436 perf_event_header__bswap(&bev.header);
1437
1438 len = bev.header.size - sizeof(bev);
1439 if (read(input, filename, len) != len)
1440 goto out;
1441 /*
1442 * The a1645ce1 changeset:
1443 *
1444 * "perf: 'perf kvm' tool for monitoring guest performance from host"
1445 *
1446 * Added a field to struct build_id_event that broke the file
1447 * format.
1448 *
1449 * Since the kernel build-id is the first entry, process the
1450 * table using the old format if the well known
1451 * '[kernel.kallsyms]' string for the kernel build-id has the
1452 * first 4 characters chopped off (where the pid_t sits).
1453 */
1454 if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
1455 if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
1456 return -1;
1457 return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
1458 }
1459
1460 __event_process_build_id(&bev, filename, session);
1461
1462 offset += bev.header.size;
1463 }
1464 err = 0;
1465out:
1466 return err;
1467}
1468
1469static int process_trace_info(struct perf_file_section *section __unused,
1470 struct perf_header *ph __unused,
1471 int feat __unused, int fd)
1472{
1473 trace_report(fd, false);
1474 return 0;
1475}
1476
1477static int process_build_id(struct perf_file_section *section,
1478 struct perf_header *ph,
1479 int feat __unused, int fd)
1480{
1481 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1482 pr_debug("Failed to read buildids, continuing...\n");
1483 return 0;
1484}
1485
1308struct feature_ops { 1486struct feature_ops {
1309 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); 1487 int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
1310 void (*print)(struct perf_header *h, int fd, FILE *fp); 1488 void (*print)(struct perf_header *h, int fd, FILE *fp);
1489 int (*process)(struct perf_file_section *section,
1490 struct perf_header *h, int feat, int fd);
1311 const char *name; 1491 const char *name;
1312 bool full_only; 1492 bool full_only;
1313}; 1493};
1314 1494
1315#define FEAT_OPA(n, func) \ 1495#define FEAT_OPA(n, func) \
1316 [n] = { .name = #n, .write = write_##func, .print = print_##func } 1496 [n] = { .name = #n, .write = write_##func, .print = print_##func }
1497#define FEAT_OPP(n, func) \
1498 [n] = { .name = #n, .write = write_##func, .print = print_##func, \
1499 .process = process_##func }
1317#define FEAT_OPF(n, func) \ 1500#define FEAT_OPF(n, func) \
1318 [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } 1501 [n] = { .name = #n, .write = write_##func, .print = print_##func, \
1502 .full_only = true }
1319 1503
1320/* feature_ops not implemented: */ 1504/* feature_ops not implemented: */
1321#define print_trace_info NULL 1505#define print_trace_info NULL
1322#define print_build_id NULL 1506#define print_build_id NULL
1323 1507
1324static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { 1508static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
1325 FEAT_OPA(HEADER_TRACE_INFO, trace_info), 1509 FEAT_OPP(HEADER_TRACE_INFO, trace_info),
1326 FEAT_OPA(HEADER_BUILD_ID, build_id), 1510 FEAT_OPP(HEADER_BUILD_ID, build_id),
1327 FEAT_OPA(HEADER_HOSTNAME, hostname), 1511 FEAT_OPA(HEADER_HOSTNAME, hostname),
1328 FEAT_OPA(HEADER_OSRELEASE, osrelease), 1512 FEAT_OPA(HEADER_OSRELEASE, osrelease),
1329 FEAT_OPA(HEADER_VERSION, version), 1513 FEAT_OPA(HEADER_VERSION, version),
@@ -1620,24 +1804,59 @@ out_free:
1620 return err; 1804 return err;
1621} 1805}
1622 1806
1807static int check_magic_endian(u64 *magic, struct perf_file_header *header,
1808 struct perf_header *ph)
1809{
1810 int ret;
1811
1812 /* check for legacy format */
1813 ret = memcmp(magic, __perf_magic1, sizeof(*magic));
1814 if (ret == 0) {
1815 pr_debug("legacy perf.data format\n");
1816 if (!header)
1817 return -1;
1818
1819 if (header->attr_size != sizeof(struct perf_file_attr)) {
1820 u64 attr_size = bswap_64(header->attr_size);
1821
1822 if (attr_size != sizeof(struct perf_file_attr))
1823 return -1;
1824
1825 ph->needs_swap = true;
1826 }
1827 return 0;
1828 }
1829
1830 /* check magic number with same endianness */
1831 if (*magic == __perf_magic2)
1832 return 0;
1833
1834 /* check magic number but opposite endianness */
1835 if (*magic != __perf_magic2_sw)
1836 return -1;
1837
1838 ph->needs_swap = true;
1839
1840 return 0;
1841}
1842
1623int perf_file_header__read(struct perf_file_header *header, 1843int perf_file_header__read(struct perf_file_header *header,
1624 struct perf_header *ph, int fd) 1844 struct perf_header *ph, int fd)
1625{ 1845{
1846 int ret;
1847
1626 lseek(fd, 0, SEEK_SET); 1848 lseek(fd, 0, SEEK_SET);
1627 1849
1628 if (readn(fd, header, sizeof(*header)) <= 0 || 1850 ret = readn(fd, header, sizeof(*header));
1629 memcmp(&header->magic, __perf_magic, sizeof(header->magic))) 1851 if (ret <= 0)
1630 return -1; 1852 return -1;
1631 1853
1632 if (header->attr_size != sizeof(struct perf_file_attr)) { 1854 if (check_magic_endian(&header->magic, header, ph) < 0)
1633 u64 attr_size = bswap_64(header->attr_size); 1855 return -1;
1634
1635 if (attr_size != sizeof(struct perf_file_attr))
1636 return -1;
1637 1856
1857 if (ph->needs_swap) {
1638 mem_bswap_64(header, offsetof(struct perf_file_header, 1858 mem_bswap_64(header, offsetof(struct perf_file_header,
1639 adds_features)); 1859 adds_features));
1640 ph->needs_swap = true;
1641 } 1860 }
1642 1861
1643 if (header->size != sizeof(*header)) { 1862 if (header->size != sizeof(*header)) {
@@ -1689,156 +1908,6 @@ int perf_file_header__read(struct perf_file_header *header,
1689 return 0; 1908 return 0;
1690} 1909}
1691 1910
1692static int __event_process_build_id(struct build_id_event *bev,
1693 char *filename,
1694 struct perf_session *session)
1695{
1696 int err = -1;
1697 struct list_head *head;
1698 struct machine *machine;
1699 u16 misc;
1700 struct dso *dso;
1701 enum dso_kernel_type dso_type;
1702
1703 machine = perf_session__findnew_machine(session, bev->pid);
1704 if (!machine)
1705 goto out;
1706
1707 misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1708
1709 switch (misc) {
1710 case PERF_RECORD_MISC_KERNEL:
1711 dso_type = DSO_TYPE_KERNEL;
1712 head = &machine->kernel_dsos;
1713 break;
1714 case PERF_RECORD_MISC_GUEST_KERNEL:
1715 dso_type = DSO_TYPE_GUEST_KERNEL;
1716 head = &machine->kernel_dsos;
1717 break;
1718 case PERF_RECORD_MISC_USER:
1719 case PERF_RECORD_MISC_GUEST_USER:
1720 dso_type = DSO_TYPE_USER;
1721 head = &machine->user_dsos;
1722 break;
1723 default:
1724 goto out;
1725 }
1726
1727 dso = __dsos__findnew(head, filename);
1728 if (dso != NULL) {
1729 char sbuild_id[BUILD_ID_SIZE * 2 + 1];
1730
1731 dso__set_build_id(dso, &bev->build_id);
1732
1733 if (filename[0] == '[')
1734 dso->kernel = dso_type;
1735
1736 build_id__sprintf(dso->build_id, sizeof(dso->build_id),
1737 sbuild_id);
1738 pr_debug("build id event received for %s: %s\n",
1739 dso->long_name, sbuild_id);
1740 }
1741
1742 err = 0;
1743out:
1744 return err;
1745}
1746
1747static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
1748 int input, u64 offset, u64 size)
1749{
1750 struct perf_session *session = container_of(header, struct perf_session, header);
1751 struct {
1752 struct perf_event_header header;
1753 u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
1754 char filename[0];
1755 } old_bev;
1756 struct build_id_event bev;
1757 char filename[PATH_MAX];
1758 u64 limit = offset + size;
1759
1760 while (offset < limit) {
1761 ssize_t len;
1762
1763 if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
1764 return -1;
1765
1766 if (header->needs_swap)
1767 perf_event_header__bswap(&old_bev.header);
1768
1769 len = old_bev.header.size - sizeof(old_bev);
1770 if (read(input, filename, len) != len)
1771 return -1;
1772
1773 bev.header = old_bev.header;
1774
1775 /*
1776 * As the pid is the missing value, we need to fill
1777 * it properly. The header.misc value give us nice hint.
1778 */
1779 bev.pid = HOST_KERNEL_ID;
1780 if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
1781 bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
1782 bev.pid = DEFAULT_GUEST_KERNEL_ID;
1783
1784 memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
1785 __event_process_build_id(&bev, filename, session);
1786
1787 offset += bev.header.size;
1788 }
1789
1790 return 0;
1791}
1792
1793static int perf_header__read_build_ids(struct perf_header *header,
1794 int input, u64 offset, u64 size)
1795{
1796 struct perf_session *session = container_of(header, struct perf_session, header);
1797 struct build_id_event bev;
1798 char filename[PATH_MAX];
1799 u64 limit = offset + size, orig_offset = offset;
1800 int err = -1;
1801
1802 while (offset < limit) {
1803 ssize_t len;
1804
1805 if (read(input, &bev, sizeof(bev)) != sizeof(bev))
1806 goto out;
1807
1808 if (header->needs_swap)
1809 perf_event_header__bswap(&bev.header);
1810
1811 len = bev.header.size - sizeof(bev);
1812 if (read(input, filename, len) != len)
1813 goto out;
1814 /*
1815 * The a1645ce1 changeset:
1816 *
1817 * "perf: 'perf kvm' tool for monitoring guest performance from host"
1818 *
1819 * Added a field to struct build_id_event that broke the file
1820 * format.
1821 *
1822 * Since the kernel build-id is the first entry, process the
1823 * table using the old format if the well known
1824 * '[kernel.kallsyms]' string for the kernel build-id has the
1825 * first 4 characters chopped off (where the pid_t sits).
1826 */
1827 if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
1828 if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
1829 return -1;
1830 return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
1831 }
1832
1833 __event_process_build_id(&bev, filename, session);
1834
1835 offset += bev.header.size;
1836 }
1837 err = 0;
1838out:
1839 return err;
1840}
1841
1842static int perf_file_section__process(struct perf_file_section *section, 1911static int perf_file_section__process(struct perf_file_section *section,
1843 struct perf_header *ph, 1912 struct perf_header *ph,
1844 int feat, int fd, void *data __used) 1913 int feat, int fd, void *data __used)
@@ -1854,27 +1923,23 @@ static int perf_file_section__process(struct perf_file_section *section,
1854 return 0; 1923 return 0;
1855 } 1924 }
1856 1925
1857 switch (feat) { 1926 if (!feat_ops[feat].process)
1858 case HEADER_TRACE_INFO: 1927 return 0;
1859 trace_report(fd, false);
1860 break;
1861 case HEADER_BUILD_ID:
1862 if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
1863 pr_debug("Failed to read buildids, continuing...\n");
1864 break;
1865 default:
1866 break;
1867 }
1868 1928
1869 return 0; 1929 return feat_ops[feat].process(section, ph, feat, fd);
1870} 1930}
1871 1931
1872static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, 1932static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
1873 struct perf_header *ph, int fd, 1933 struct perf_header *ph, int fd,
1874 bool repipe) 1934 bool repipe)
1875{ 1935{
1876 if (readn(fd, header, sizeof(*header)) <= 0 || 1936 int ret;
1877 memcmp(&header->magic, __perf_magic, sizeof(header->magic))) 1937
1938 ret = readn(fd, header, sizeof(*header));
1939 if (ret <= 0)
1940 return -1;
1941
1942 if (check_magic_endian(&header->magic, NULL, ph) < 0)
1878 return -1; 1943 return -1;
1879 1944
1880 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) 1945 if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ac4ec956024..e68f617d082 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -11,6 +11,7 @@
11 11
12enum { 12enum {
13 HEADER_RESERVED = 0, /* always cleared */ 13 HEADER_RESERVED = 0, /* always cleared */
14 HEADER_FIRST_FEATURE = 1,
14 HEADER_TRACE_INFO = 1, 15 HEADER_TRACE_INFO = 1,
15 HEADER_BUILD_ID, 16 HEADER_BUILD_ID,
16 17
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f55f0a8d1f8..48e5acd1e86 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -32,6 +32,7 @@ struct events_stats {
32 u32 nr_unknown_events; 32 u32 nr_unknown_events;
33 u32 nr_invalid_chains; 33 u32 nr_invalid_chains;
34 u32 nr_unknown_id; 34 u32 nr_unknown_id;
35 u32 nr_unprocessable_samples;
35}; 36};
36 37
37enum hist_column { 38enum hist_column {
@@ -55,6 +56,7 @@ struct hists {
55 u64 nr_entries; 56 u64 nr_entries;
56 const struct thread *thread_filter; 57 const struct thread *thread_filter;
57 const struct dso *dso_filter; 58 const struct dso *dso_filter;
59 const char *uid_filter_str;
58 pthread_mutex_t lock; 60 pthread_mutex_t lock;
59 struct events_stats stats; 61 struct events_stats stats;
60 u64 event_stream; 62 u64 event_stream;
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
index bb4198e7837..afe38199e92 100644
--- a/tools/perf/util/include/asm/dwarf2.h
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -2,10 +2,12 @@
2#ifndef PERF_DWARF2_H 2#ifndef PERF_DWARF2_H
3#define PERF_DWARF2_H 3#define PERF_DWARF2_H
4 4
5/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ 5/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
6 6
7#define CFI_STARTPROC 7#define CFI_STARTPROC
8#define CFI_ENDPROC 8#define CFI_ENDPROC
9#define CFI_REMEMBER_STATE
10#define CFI_RESTORE_STATE
9 11
10#endif /* PERF_DWARF2_H */ 12#endif /* PERF_DWARF2_H */
11 13
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
index eda4416efa0..bb162e40c76 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -5,6 +5,8 @@
5#include <linux/bitops.h> 5#include <linux/bitops.h>
6 6
7int __bitmap_weight(const unsigned long *bitmap, int bits); 7int __bitmap_weight(const unsigned long *bitmap, int bits);
8void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
9 const unsigned long *bitmap2, int bits);
8 10
9#define BITMAP_LAST_WORD_MASK(nbits) \ 11#define BITMAP_LAST_WORD_MASK(nbits) \
10( \ 12( \
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
32 return __bitmap_weight(src, nbits); 34 return __bitmap_weight(src, nbits);
33} 35}
34 36
37static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
38 const unsigned long *src2, int nbits)
39{
40 if (small_const_nbits(nbits))
41 *dst = *src1 | *src2;
42 else
43 __bitmap_or(dst, src1, src2, nbits);
44}
45
35#endif /* _PERF_BITOPS_H */ 46#endif /* _PERF_BITOPS_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 316aa0ab712..dea6d1c1a95 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp)
212 self->start, self->end, self->pgoff, self->dso->name); 212 self->start, self->end, self->pgoff, self->dso->name);
213} 213}
214 214
215size_t map__fprintf_dsoname(struct map *map, FILE *fp)
216{
217 const char *dsoname;
218
219 if (map && map->dso && (map->dso->name || map->dso->long_name)) {
220 if (symbol_conf.show_kernel_path && map->dso->long_name)
221 dsoname = map->dso->long_name;
222 else if (map->dso->name)
223 dsoname = map->dso->name;
224 } else
225 dsoname = "[unknown]";
226
227 return fprintf(fp, "%s", dsoname);
228}
229
215/* 230/*
216 * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. 231 * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
217 * map->dso->adjust_symbols==1 for ET_EXEC-like cases. 232 * map->dso->adjust_symbols==1 for ET_EXEC-like cases.
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 2b8017f8a93..b100c20b7f9 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -118,6 +118,7 @@ void map__delete(struct map *self);
118struct map *map__clone(struct map *self); 118struct map *map__clone(struct map *self);
119int map__overlap(struct map *l, struct map *r); 119int map__overlap(struct map *l, struct map *r);
120size_t map__fprintf(struct map *self, FILE *fp); 120size_t map__fprintf(struct map *self, FILE *fp);
121size_t map__fprintf_dsoname(struct map *map, FILE *fp);
121 122
122int map__load(struct map *self, symbol_filter_t filter); 123int map__load(struct map *self, symbol_filter_t filter);
123struct symbol *map__find_symbol(struct map *self, 124struct symbol *map__find_symbol(struct map *self,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e33554a562b..15f9bb1b5f0 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -273,10 +273,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
273/* Try to find perf_probe_event with debuginfo */ 273/* Try to find perf_probe_event with debuginfo */
274static int try_to_find_probe_trace_events(struct perf_probe_event *pev, 274static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
275 struct probe_trace_event **tevs, 275 struct probe_trace_event **tevs,
276 int max_tevs, const char *module) 276 int max_tevs, const char *target)
277{ 277{
278 bool need_dwarf = perf_probe_event_need_dwarf(pev); 278 bool need_dwarf = perf_probe_event_need_dwarf(pev);
279 struct debuginfo *dinfo = open_debuginfo(module); 279 struct debuginfo *dinfo = open_debuginfo(target);
280 int ntevs, ret = 0; 280 int ntevs, ret = 0;
281 281
282 if (!dinfo) { 282 if (!dinfo) {
@@ -295,9 +295,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
295 295
296 if (ntevs > 0) { /* Succeeded to find trace events */ 296 if (ntevs > 0) { /* Succeeded to find trace events */
297 pr_debug("find %d probe_trace_events.\n", ntevs); 297 pr_debug("find %d probe_trace_events.\n", ntevs);
298 if (module) 298 if (target)
299 ret = add_module_to_probe_trace_events(*tevs, ntevs, 299 ret = add_module_to_probe_trace_events(*tevs, ntevs,
300 module); 300 target);
301 return ret < 0 ? ret : ntevs; 301 return ret < 0 ? ret : ntevs;
302 } 302 }
303 303
@@ -1729,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1729 } 1729 }
1730 1730
1731 ret = 0; 1731 ret = 0;
1732 printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); 1732 printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
1733 for (i = 0; i < ntevs; i++) { 1733 for (i = 0; i < ntevs; i++) {
1734 tev = &tevs[i]; 1734 tev = &tevs[i];
1735 if (pev->event) 1735 if (pev->event)
@@ -1784,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1784 1784
1785 if (ret >= 0) { 1785 if (ret >= 0) {
1786 /* Show how to use the event. */ 1786 /* Show how to use the event. */
1787 printf("\nYou can now use it on all perf tools, such as:\n\n"); 1787 printf("\nYou can now use it in all perf tools, such as:\n\n");
1788 printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, 1788 printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
1789 tev->event); 1789 tev->event);
1790 } 1790 }
@@ -1796,14 +1796,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
1796 1796
1797static int convert_to_probe_trace_events(struct perf_probe_event *pev, 1797static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1798 struct probe_trace_event **tevs, 1798 struct probe_trace_event **tevs,
1799 int max_tevs, const char *module) 1799 int max_tevs, const char *target)
1800{ 1800{
1801 struct symbol *sym; 1801 struct symbol *sym;
1802 int ret = 0, i; 1802 int ret = 0, i;
1803 struct probe_trace_event *tev; 1803 struct probe_trace_event *tev;
1804 1804
1805 /* Convert perf_probe_event with debuginfo */ 1805 /* Convert perf_probe_event with debuginfo */
1806 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); 1806 ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
1807 if (ret != 0) 1807 if (ret != 0)
1808 return ret; /* Found in debuginfo or got an error */ 1808 return ret; /* Found in debuginfo or got an error */
1809 1809
@@ -1819,8 +1819,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
1819 goto error; 1819 goto error;
1820 } 1820 }
1821 1821
1822 if (module) { 1822 if (target) {
1823 tev->point.module = strdup(module); 1823 tev->point.module = strdup(target);
1824 if (tev->point.module == NULL) { 1824 if (tev->point.module == NULL) {
1825 ret = -ENOMEM; 1825 ret = -ENOMEM;
1826 goto error; 1826 goto error;
@@ -1890,7 +1890,7 @@ struct __event_package {
1890}; 1890};
1891 1891
1892int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, 1892int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1893 int max_tevs, const char *module, bool force_add) 1893 int max_tevs, const char *target, bool force_add)
1894{ 1894{
1895 int i, j, ret; 1895 int i, j, ret;
1896 struct __event_package *pkgs; 1896 struct __event_package *pkgs;
@@ -1913,7 +1913,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
1913 ret = convert_to_probe_trace_events(pkgs[i].pev, 1913 ret = convert_to_probe_trace_events(pkgs[i].pev,
1914 &pkgs[i].tevs, 1914 &pkgs[i].tevs,
1915 max_tevs, 1915 max_tevs,
1916 module); 1916 target);
1917 if (ret < 0) 1917 if (ret < 0)
1918 goto end; 1918 goto end;
1919 pkgs[i].ntevs = ret; 1919 pkgs[i].ntevs = ret;
@@ -1965,7 +1965,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
1965 goto error; 1965 goto error;
1966 } 1966 }
1967 1967
1968 printf("Remove event: %s\n", ent->s); 1968 printf("Removed event: %s\n", ent->s);
1969 return 0; 1969 return 0;
1970error: 1970error:
1971 pr_warning("Failed to delete event: %s\n", strerror(-ret)); 1971 pr_warning("Failed to delete event: %s\n", strerror(-ret));
@@ -2069,7 +2069,7 @@ static int filter_available_functions(struct map *map __unused,
2069 return 1; 2069 return 1;
2070} 2070}
2071 2071
2072int show_available_funcs(const char *module, struct strfilter *_filter) 2072int show_available_funcs(const char *target, struct strfilter *_filter)
2073{ 2073{
2074 struct map *map; 2074 struct map *map;
2075 int ret; 2075 int ret;
@@ -2080,9 +2080,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter)
2080 if (ret < 0) 2080 if (ret < 0)
2081 return ret; 2081 return ret;
2082 2082
2083 map = kernel_get_module_map(module); 2083 map = kernel_get_module_map(target);
2084 if (!map) { 2084 if (!map) {
2085 pr_err("Failed to find %s map.\n", (module) ? : "kernel"); 2085 pr_err("Failed to find %s map.\n", (target) ? : "kernel");
2086 return -EINVAL; 2086 return -EINVAL;
2087 } 2087 }
2088 available_func_filter = _filter; 2088 available_func_filter = _filter;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 74bd2e63c4b..2cc162d3b78 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -30,7 +30,6 @@
30#include <stdlib.h> 30#include <stdlib.h>
31#include <string.h> 31#include <string.h>
32#include <stdarg.h> 32#include <stdarg.h>
33#include <ctype.h>
34#include <dwarf-regs.h> 33#include <dwarf-regs.h>
35 34
36#include <linux/bitops.h> 35#include <linux/bitops.h>
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 00000000000..2884e67ee62
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,19 @@
1#
2# List of files needed by perf python extention
3#
4# Each source file must be placed on its own line so that it can be
5# processed by Makefile and util/setup.py accordingly.
6#
7
8util/python.c
9util/ctype.c
10util/evlist.c
11util/evsel.c
12util/cpumap.c
13util/thread_map.c
14util/util.c
15util/xyarray.c
16util/cgroup.c
17util/debugfs.c
18util/strlist.c
19../../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 9dd47a4f259..e03b58a4842 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -425,14 +425,14 @@ struct pyrf_thread_map {
425static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, 425static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
426 PyObject *args, PyObject *kwargs) 426 PyObject *args, PyObject *kwargs)
427{ 427{
428 static char *kwlist[] = { "pid", "tid", NULL }; 428 static char *kwlist[] = { "pid", "tid", "uid", NULL };
429 int pid = -1, tid = -1; 429 int pid = -1, tid = -1, uid = UINT_MAX;
430 430
431 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", 431 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
432 kwlist, &pid, &tid)) 432 kwlist, &pid, &tid, &uid))
433 return -1; 433 return -1;
434 434
435 pthreads->threads = thread_map__new(pid, tid); 435 pthreads->threads = thread_map__new(pid, tid, uid);
436 if (pthreads->threads == NULL) 436 if (pthreads->threads == NULL)
437 return -1; 437 return -1;
438 return 0; 438 return 0;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 0b2a4878317..c2623c6f9b5 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -24,7 +24,6 @@
24#include <stdio.h> 24#include <stdio.h>
25#include <stdlib.h> 25#include <stdlib.h>
26#include <string.h> 26#include <string.h>
27#include <ctype.h>
28#include <errno.h> 27#include <errno.h>
29 28
30#include "../../perf.h" 29#include "../../perf.h"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b5ca2558c7b..9f833cf9c6a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -796,6 +796,10 @@ static int perf_session_deliver_event(struct perf_session *session,
796 ++session->hists.stats.nr_unknown_id; 796 ++session->hists.stats.nr_unknown_id;
797 return -1; 797 return -1;
798 } 798 }
799 if (machine == NULL) {
800 ++session->hists.stats.nr_unprocessable_samples;
801 return -1;
802 }
799 return tool->sample(tool, event, sample, evsel, machine); 803 return tool->sample(tool, event, sample, evsel, machine);
800 case PERF_RECORD_MMAP: 804 case PERF_RECORD_MMAP:
801 return tool->mmap(tool, event, sample, machine); 805 return tool->mmap(tool, event, sample, machine);
@@ -964,6 +968,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
964 session->hists.stats.nr_invalid_chains, 968 session->hists.stats.nr_invalid_chains,
965 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); 969 session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
966 } 970 }
971
972 if (session->hists.stats.nr_unprocessable_samples != 0) {
973 ui__warning("%u unprocessable samples recorded.\n"
974 "Do you have a KVM guest running and not using 'perf kvm'?\n",
975 session->hists.stats.nr_unprocessable_samples);
976 }
967} 977}
968 978
969#define session_done() (*(volatile int *)(&session_done)) 979#define session_done() (*(volatile int *)(&session_done))
@@ -1293,10 +1303,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
1293 1303
1294void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 1304void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1295 struct machine *machine, struct perf_evsel *evsel, 1305 struct machine *machine, struct perf_evsel *evsel,
1296 int print_sym, int print_dso) 1306 int print_sym, int print_dso, int print_symoffset)
1297{ 1307{
1298 struct addr_location al; 1308 struct addr_location al;
1299 const char *symname, *dsoname;
1300 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; 1309 struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
1301 struct callchain_cursor_node *node; 1310 struct callchain_cursor_node *node;
1302 1311
@@ -1324,20 +1333,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1324 1333
1325 printf("\t%16" PRIx64, node->ip); 1334 printf("\t%16" PRIx64, node->ip);
1326 if (print_sym) { 1335 if (print_sym) {
1327 if (node->sym && node->sym->name) 1336 printf(" ");
1328 symname = node->sym->name; 1337 symbol__fprintf_symname(node->sym, stdout);
1329 else
1330 symname = "";
1331
1332 printf(" %s", symname);
1333 } 1338 }
1334 if (print_dso) { 1339 if (print_dso) {
1335 if (node->map && node->map->dso && node->map->dso->name) 1340 printf(" (");
1336 dsoname = node->map->dso->name; 1341 map__fprintf_dsoname(al.map, stdout);
1337 else 1342 printf(")");
1338 dsoname = "";
1339
1340 printf(" (%s)", dsoname);
1341 } 1343 }
1342 printf("\n"); 1344 printf("\n");
1343 1345
@@ -1347,21 +1349,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
1347 } else { 1349 } else {
1348 printf("%16" PRIx64, sample->ip); 1350 printf("%16" PRIx64, sample->ip);
1349 if (print_sym) { 1351 if (print_sym) {
1350 if (al.sym && al.sym->name) 1352 printf(" ");
1351 symname = al.sym->name; 1353 if (print_symoffset)
1354 symbol__fprintf_symname_offs(al.sym, &al,
1355 stdout);
1352 else 1356 else
1353 symname = ""; 1357 symbol__fprintf_symname(al.sym, stdout);
1354
1355 printf(" %s", symname);
1356 } 1358 }
1357 1359
1358 if (print_dso) { 1360 if (print_dso) {
1359 if (al.map && al.map->dso && al.map->dso->name) 1361 printf(" (");
1360 dsoname = al.map->dso->name; 1362 map__fprintf_dsoname(al.map, stdout);
1361 else 1363 printf(")");
1362 dsoname = "";
1363
1364 printf(" (%s)", dsoname);
1365 } 1364 }
1366 } 1365 }
1367} 1366}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 37bc38381fb..c8d90178e7d 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -147,7 +147,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
147 147
148void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, 148void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
149 struct machine *machine, struct perf_evsel *evsel, 149 struct machine *machine, struct perf_evsel *evsel,
150 int print_sym, int print_dso); 150 int print_sym, int print_dso, int print_symoffset);
151 151
152int perf_session__cpu_bitmap(struct perf_session *session, 152int perf_session__cpu_bitmap(struct perf_session *session,
153 const char *cpu_list, unsigned long *cpu_bitmap); 153 const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 36d4c561957..d0f9f29cf18 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split()
24build_lib = getenv('PYTHON_EXTBUILD_LIB') 24build_lib = getenv('PYTHON_EXTBUILD_LIB')
25build_tmp = getenv('PYTHON_EXTBUILD_TMP') 25build_tmp = getenv('PYTHON_EXTBUILD_TMP')
26 26
27ext_sources = [f.strip() for f in file('util/python-ext-sources')
28 if len(f.strip()) > 0 and f[0] != '#']
29
27perf = Extension('perf', 30perf = Extension('perf',
28 sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', 31 sources = ext_sources,
29 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
30 'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
31 'util/debugfs.c'],
32 include_dirs = ['util/include'], 32 include_dirs = ['util/include'],
33 extra_compile_args = cflags, 33 extra_compile_args = cflags,
34 ) 34 )
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 0975438c3e7..5dd83c3e2c0 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,4 +1,3 @@
1#include <ctype.h>
2#include <dirent.h> 1#include <dirent.h>
3#include <errno.h> 2#include <errno.h>
4#include <libgen.h> 3#include <libgen.h>
@@ -12,6 +11,7 @@
12#include <unistd.h> 11#include <unistd.h>
13#include <inttypes.h> 12#include <inttypes.h>
14#include "build-id.h" 13#include "build-id.h"
14#include "util.h"
15#include "debug.h" 15#include "debug.h"
16#include "symbol.h" 16#include "symbol.h"
17#include "strlist.h" 17#include "strlist.h"
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
263 sym->name); 263 sym->name);
264} 264}
265 265
266size_t symbol__fprintf_symname_offs(const struct symbol *sym,
267 const struct addr_location *al, FILE *fp)
268{
269 unsigned long offset;
270 size_t length;
271
272 if (sym && sym->name) {
273 length = fprintf(fp, "%s", sym->name);
274 if (al) {
275 offset = al->addr - sym->start;
276 length += fprintf(fp, "+0x%lx", offset);
277 }
278 return length;
279 } else
280 return fprintf(fp, "[unknown]");
281}
282
283size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
284{
285 return symbol__fprintf_symname_offs(sym, NULL, fp);
286}
287
266void dso__set_long_name(struct dso *dso, char *name) 288void dso__set_long_name(struct dso *dso, char *name)
267{ 289{
268 if (name == NULL) 290 if (name == NULL)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 123c2e14353..2a683d4fc91 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -70,6 +70,7 @@ struct symbol_conf {
70 unsigned short priv_size; 70 unsigned short priv_size;
71 unsigned short nr_events; 71 unsigned short nr_events;
72 bool try_vmlinux_path, 72 bool try_vmlinux_path,
73 show_kernel_path,
73 use_modules, 74 use_modules,
74 sort_by_name, 75 sort_by_name,
75 show_nr_samples, 76 show_nr_samples,
@@ -241,6 +242,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines);
241 242
242int symbol__init(void); 243int symbol__init(void);
243void symbol__exit(void); 244void symbol__exit(void);
245size_t symbol__fprintf_symname_offs(const struct symbol *sym,
246 const struct addr_location *al, FILE *fp);
247size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
244bool symbol_type__is_a(char symbol_type, enum map_type map_type); 248bool symbol_type__is_a(char symbol_type, enum map_type map_type);
245 249
246size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); 250size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
new file mode 100644
index 00000000000..48c6902e749
--- /dev/null
+++ b/tools/perf/util/sysfs.c
@@ -0,0 +1,60 @@
1
2#include "util.h"
3#include "sysfs.h"
4
5static const char * const sysfs_known_mountpoints[] = {
6 "/sys",
7 0,
8};
9
10static int sysfs_found;
11char sysfs_mountpoint[PATH_MAX];
12
13static int sysfs_valid_mountpoint(const char *sysfs)
14{
15 struct statfs st_fs;
16
17 if (statfs(sysfs, &st_fs) < 0)
18 return -ENOENT;
19 else if (st_fs.f_type != (long) SYSFS_MAGIC)
20 return -ENOENT;
21
22 return 0;
23}
24
25const char *sysfs_find_mountpoint(void)
26{
27 const char * const *ptr;
28 char type[100];
29 FILE *fp;
30
31 if (sysfs_found)
32 return (const char *) sysfs_mountpoint;
33
34 ptr = sysfs_known_mountpoints;
35 while (*ptr) {
36 if (sysfs_valid_mountpoint(*ptr) == 0) {
37 sysfs_found = 1;
38 strcpy(sysfs_mountpoint, *ptr);
39 return sysfs_mountpoint;
40 }
41 ptr++;
42 }
43
44 /* give up and parse /proc/mounts */
45 fp = fopen("/proc/mounts", "r");
46 if (fp == NULL)
47 return NULL;
48
49 while (!sysfs_found &&
50 fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
51 sysfs_mountpoint, type) == 2) {
52
53 if (strcmp(type, "sysfs") == 0)
54 sysfs_found = 1;
55 }
56
57 fclose(fp);
58
59 return sysfs_found ? sysfs_mountpoint : NULL;
60}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
new file mode 100644
index 00000000000..a813b720393
--- /dev/null
+++ b/tools/perf/util/sysfs.h
@@ -0,0 +1,6 @@
1#ifndef __SYSFS_H__
2#define __SYSFS_H__
3
4const char *sysfs_find_mountpoint(void);
5
6#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index a5df131b77c..e15983cf077 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -1,6 +1,13 @@
1#include <dirent.h> 1#include <dirent.h>
2#include <limits.h>
3#include <stdbool.h>
2#include <stdlib.h> 4#include <stdlib.h>
3#include <stdio.h> 5#include <stdio.h>
6#include <sys/types.h>
7#include <sys/stat.h>
8#include <unistd.h>
9#include "strlist.h"
10#include <string.h>
4#include "thread_map.h" 11#include "thread_map.h"
5 12
6/* Skip "." and ".." directories */ 13/* Skip "." and ".." directories */
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
23 sprintf(name, "/proc/%d/task", pid); 30 sprintf(name, "/proc/%d/task", pid);
24 items = scandir(name, &namelist, filter, NULL); 31 items = scandir(name, &namelist, filter, NULL);
25 if (items <= 0) 32 if (items <= 0)
26 return NULL; 33 return NULL;
27 34
28 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); 35 threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
29 if (threads != NULL) { 36 if (threads != NULL) {
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
51 return threads; 58 return threads;
52} 59}
53 60
54struct thread_map *thread_map__new(pid_t pid, pid_t tid) 61struct thread_map *thread_map__new_by_uid(uid_t uid)
62{
63 DIR *proc;
64 int max_threads = 32, items, i;
65 char path[256];
66 struct dirent dirent, *next, **namelist = NULL;
67 struct thread_map *threads = malloc(sizeof(*threads) +
68 max_threads * sizeof(pid_t));
69 if (threads == NULL)
70 goto out;
71
72 proc = opendir("/proc");
73 if (proc == NULL)
74 goto out_free_threads;
75
76 threads->nr = 0;
77
78 while (!readdir_r(proc, &dirent, &next) && next) {
79 char *end;
80 bool grow = false;
81 struct stat st;
82 pid_t pid = strtol(dirent.d_name, &end, 10);
83
84 if (*end) /* only interested in proper numerical dirents */
85 continue;
86
87 snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
88
89 if (stat(path, &st) != 0)
90 continue;
91
92 if (st.st_uid != uid)
93 continue;
94
95 snprintf(path, sizeof(path), "/proc/%d/task", pid);
96 items = scandir(path, &namelist, filter, NULL);
97 if (items <= 0)
98 goto out_free_closedir;
99
100 while (threads->nr + items >= max_threads) {
101 max_threads *= 2;
102 grow = true;
103 }
104
105 if (grow) {
106 struct thread_map *tmp;
107
108 tmp = realloc(threads, (sizeof(*threads) +
109 max_threads * sizeof(pid_t)));
110 if (tmp == NULL)
111 goto out_free_namelist;
112
113 threads = tmp;
114 }
115
116 for (i = 0; i < items; i++)
117 threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
118
119 for (i = 0; i < items; i++)
120 free(namelist[i]);
121 free(namelist);
122
123 threads->nr += items;
124 }
125
126out_closedir:
127 closedir(proc);
128out:
129 return threads;
130
131out_free_threads:
132 free(threads);
133 return NULL;
134
135out_free_namelist:
136 for (i = 0; i < items; i++)
137 free(namelist[i]);
138 free(namelist);
139
140out_free_closedir:
141 free(threads);
142 threads = NULL;
143 goto out_closedir;
144}
145
146struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
55{ 147{
56 if (pid != -1) 148 if (pid != -1)
57 return thread_map__new_by_pid(pid); 149 return thread_map__new_by_pid(pid);
150
151 if (tid == -1 && uid != UINT_MAX)
152 return thread_map__new_by_uid(uid);
153
58 return thread_map__new_by_tid(tid); 154 return thread_map__new_by_tid(tid);
59} 155}
60 156
157static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
158{
159 struct thread_map *threads = NULL, *nt;
160 char name[256];
161 int items, total_tasks = 0;
162 struct dirent **namelist = NULL;
163 int i, j = 0;
164 pid_t pid, prev_pid = INT_MAX;
165 char *end_ptr;
166 struct str_node *pos;
167 struct strlist *slist = strlist__new(false, pid_str);
168
169 if (!slist)
170 return NULL;
171
172 strlist__for_each(pos, slist) {
173 pid = strtol(pos->s, &end_ptr, 10);
174
175 if (pid == INT_MIN || pid == INT_MAX ||
176 (*end_ptr != '\0' && *end_ptr != ','))
177 goto out_free_threads;
178
179 if (pid == prev_pid)
180 continue;
181
182 sprintf(name, "/proc/%d/task", pid);
183 items = scandir(name, &namelist, filter, NULL);
184 if (items <= 0)
185 goto out_free_threads;
186
187 total_tasks += items;
188 nt = realloc(threads, (sizeof(*threads) +
189 sizeof(pid_t) * total_tasks));
190 if (nt == NULL)
191 goto out_free_threads;
192
193 threads = nt;
194
195 if (threads) {
196 for (i = 0; i < items; i++)
197 threads->map[j++] = atoi(namelist[i]->d_name);
198 threads->nr = total_tasks;
199 }
200
201 for (i = 0; i < items; i++)
202 free(namelist[i]);
203 free(namelist);
204
205 if (!threads)
206 break;
207 }
208
209out:
210 strlist__delete(slist);
211 return threads;
212
213out_free_threads:
214 free(threads);
215 threads = NULL;
216 goto out;
217}
218
219static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
220{
221 struct thread_map *threads = NULL, *nt;
222 int ntasks = 0;
223 pid_t tid, prev_tid = INT_MAX;
224 char *end_ptr;
225 struct str_node *pos;
226 struct strlist *slist;
227
228 /* perf-stat expects threads to be generated even if tid not given */
229 if (!tid_str) {
230 threads = malloc(sizeof(*threads) + sizeof(pid_t));
231 if (threads != NULL) {
232 threads->map[1] = -1;
233 threads->nr = 1;
234 }
235 return threads;
236 }
237
238 slist = strlist__new(false, tid_str);
239 if (!slist)
240 return NULL;
241
242 strlist__for_each(pos, slist) {
243 tid = strtol(pos->s, &end_ptr, 10);
244
245 if (tid == INT_MIN || tid == INT_MAX ||
246 (*end_ptr != '\0' && *end_ptr != ','))
247 goto out_free_threads;
248
249 if (tid == prev_tid)
250 continue;
251
252 ntasks++;
253 nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
254
255 if (nt == NULL)
256 goto out_free_threads;
257
258 threads = nt;
259 threads->map[ntasks - 1] = tid;
260 threads->nr = ntasks;
261 }
262out:
263 return threads;
264
265out_free_threads:
266 free(threads);
267 threads = NULL;
268 goto out;
269}
270
271struct thread_map *thread_map__new_str(const char *pid, const char *tid,
272 uid_t uid)
273{
274 if (pid)
275 return thread_map__new_by_pid_str(pid);
276
277 if (!tid && uid != UINT_MAX)
278 return thread_map__new_by_uid(uid);
279
280 return thread_map__new_by_tid_str(tid);
281}
282
61void thread_map__delete(struct thread_map *threads) 283void thread_map__delete(struct thread_map *threads)
62{ 284{
63 free(threads); 285 free(threads);
64} 286}
287
288size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
289{
290 int i;
291 size_t printed = fprintf(fp, "%d thread%s: ",
292 threads->nr, threads->nr > 1 ? "s" : "");
293 for (i = 0; i < threads->nr; ++i)
294 printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
295
296 return printed + fprintf(fp, "\n");
297}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3cb90731140..7da80f14418 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -2,6 +2,7 @@
2#define __PERF_THREAD_MAP_H 2#define __PERF_THREAD_MAP_H
3 3
4#include <sys/types.h> 4#include <sys/types.h>
5#include <stdio.h>
5 6
6struct thread_map { 7struct thread_map {
7 int nr; 8 int nr;
@@ -10,6 +11,14 @@ struct thread_map {
10 11
11struct thread_map *thread_map__new_by_pid(pid_t pid); 12struct thread_map *thread_map__new_by_pid(pid_t pid);
12struct thread_map *thread_map__new_by_tid(pid_t tid); 13struct thread_map *thread_map__new_by_tid(pid_t tid);
13struct thread_map *thread_map__new(pid_t pid, pid_t tid); 14struct thread_map *thread_map__new_by_uid(uid_t uid);
15struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
16
17struct thread_map *thread_map__new_str(const char *pid,
18 const char *tid, uid_t uid);
19
14void thread_map__delete(struct thread_map *threads); 20void thread_map__delete(struct thread_map *threads);
21
22size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
23
15#endif /* __PERF_THREAD_MAP_H */ 24#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 500471dffa4..09fe579ccaf 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
69 69
70 ret += SNPRINTF(bf + ret, size - ret, "], "); 70 ret += SNPRINTF(bf + ret, size - ret, "], ");
71 71
72 if (top->target_pid != -1) 72 if (top->target_pid)
73 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", 73 ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
74 top->target_pid); 74 top->target_pid);
75 else if (top->target_tid != -1) 75 else if (top->target_tid)
76 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", 76 ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
77 top->target_tid); 77 top->target_tid);
78 else if (top->uid_str != NULL)
79 ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
80 top->uid_str);
78 else 81 else
79 ret += SNPRINTF(bf + ret, size - ret, " (all"); 82 ret += SNPRINTF(bf + ret, size - ret, " (all");
80 83
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
82 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", 85 ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
83 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); 86 top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
84 else { 87 else {
85 if (top->target_tid != -1) 88 if (top->target_tid)
86 ret += SNPRINTF(bf + ret, size - ret, ")"); 89 ret += SNPRINTF(bf + ret, size - ret, ")");
87 else 90 else
88 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", 91 ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index a248f3c2c60..ce61cb2d1ac 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -23,7 +23,8 @@ struct perf_top {
23 u64 guest_us_samples, guest_kernel_samples; 23 u64 guest_us_samples, guest_kernel_samples;
24 int print_entries, count_filter, delay_secs; 24 int print_entries, count_filter, delay_secs;
25 int freq; 25 int freq;
26 pid_t target_pid, target_tid; 26 const char *target_pid, *target_tid;
27 uid_t uid;
27 bool hide_kernel_symbols, hide_user_symbols, zero; 28 bool hide_kernel_symbols, hide_user_symbols, zero;
28 bool system_wide; 29 bool system_wide;
29 bool use_tui, use_stdio; 30 bool use_tui, use_stdio;
@@ -33,7 +34,8 @@ struct perf_top {
33 bool vmlinux_warned; 34 bool vmlinux_warned;
34 bool inherit; 35 bool inherit;
35 bool group; 36 bool group;
36 bool sample_id_all_avail; 37 bool sample_id_all_missing;
38 bool exclude_guest_missing;
37 bool dump_symtab; 39 bool dump_symtab;
38 const char *cpu_list; 40 const char *cpu_list;
39 struct hist_entry *sym_filter_entry; 41 struct hist_entry *sym_filter_entry;
@@ -45,6 +47,7 @@ struct perf_top {
45 int realtime_prio; 47 int realtime_prio;
46 int sym_pcnt_filter; 48 int sym_pcnt_filter;
47 const char *sym_filter; 49 const char *sym_filter;
50 const char *uid_str;
48}; 51};
49 52
50size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); 53size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1a8d4dc4f38..e0a4f652f28 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -25,7 +25,6 @@
25#include <stdio.h> 25#include <stdio.h>
26#include <stdlib.h> 26#include <stdlib.h>
27#include <string.h> 27#include <string.h>
28#include <ctype.h>
29#include <errno.h> 28#include <errno.h>
30 29
31#include "../perf.h" 30#include "../perf.h"
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f55cc3a765a..b9592e0de8d 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -33,7 +33,6 @@
33#include <pthread.h> 33#include <pthread.h>
34#include <fcntl.h> 34#include <fcntl.h>
35#include <unistd.h> 35#include <unistd.h>
36#include <ctype.h>
37#include <errno.h> 36#include <errno.h>
38 37
39#include "../perf.h" 38#include "../perf.h"
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index a3fdf55f317..18ae6c1831d 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -22,7 +22,6 @@
22#include <stdio.h> 22#include <stdio.h>
23#include <stdlib.h> 23#include <stdlib.h>
24#include <string.h> 24#include <string.h>
25#include <ctype.h>
26#include <errno.h> 25#include <errno.h>
27 26
28#include "../perf.h" 27#include "../perf.h"
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index e81aef1f256..bfba0490c09 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -839,6 +839,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
839 nr_events = convert_unit(nr_events, &unit); 839 nr_events = convert_unit(nr_events, &unit);
840 printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); 840 printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
841 841
842 if (self->uid_filter_str)
843 printed += snprintf(bf + printed, size - printed,
844 ", UID: %s", self->uid_filter_str);
842 if (thread) 845 if (thread)
843 printed += snprintf(bf + printed, size - printed, 846 printed += snprintf(bf + printed, size - printed,
844 ", Thread: %s(%d)", 847 ", Thread: %s(%d)",
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 6905bcc8be2..eca6575abfd 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -3,9 +3,9 @@
3#include <newt.h> 3#include <newt.h>
4#include <inttypes.h> 4#include <inttypes.h>
5#include <sys/ttydefaults.h> 5#include <sys/ttydefaults.h>
6#include <ctype.h>
7#include <string.h> 6#include <string.h>
8#include <linux/bitops.h> 7#include <linux/bitops.h>
8#include "../../util.h"
9#include "../../debug.h" 9#include "../../debug.h"
10#include "../../symbol.h" 10#include "../../symbol.h"
11#include "../browser.h" 11#include "../browser.h"
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index d76d1c0ff98..52bb07c6442 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -7,6 +7,7 @@
7 * Copyright (C) Linus Torvalds, 2005 7 * Copyright (C) Linus Torvalds, 2005
8 */ 8 */
9#include "util.h" 9#include "util.h"
10#include "debug.h"
10 11
11static void report(const char *prefix, const char *err, va_list params) 12static void report(const char *prefix, const char *err, va_list params)
12{ 13{
@@ -81,3 +82,41 @@ void warning(const char *warn, ...)
81 warn_routine(warn, params); 82 warn_routine(warn, params);
82 va_end(params); 83 va_end(params);
83} 84}
85
86uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
87{
88 struct passwd pwd, *result;
89 char buf[1024];
90
91 if (str == NULL)
92 return UINT_MAX;
93
94 /* UID and PID are mutually exclusive */
95 if (tid || pid) {
96 ui__warning("PID/TID switch overriding UID\n");
97 sleep(1);
98 return UINT_MAX;
99 }
100
101 getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
102
103 if (result == NULL) {
104 char *endptr;
105 int uid = strtol(str, &endptr, 10);
106
107 if (*endptr != '\0') {
108 ui__error("Invalid user %s\n", str);
109 return UINT_MAX - 1;
110 }
111
112 getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
113
114 if (result == NULL) {
115 ui__error("Problems obtaining information for user %s\n",
116 str);
117 return UINT_MAX - 1;
118 }
119 }
120
121 return result->pw_uid;
122}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 813141047fc..8109a907841 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -6,7 +6,7 @@
6 * XXX We need to find a better place for these things... 6 * XXX We need to find a better place for these things...
7 */ 7 */
8bool perf_host = true; 8bool perf_host = true;
9bool perf_guest = true; 9bool perf_guest = false;
10 10
11void event_attr_init(struct perf_event_attr *attr) 11void event_attr_init(struct perf_event_attr *attr)
12{ 12{
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr)
14 attr->exclude_host = 1; 14 attr->exclude_host = 1;
15 if (!perf_guest) 15 if (!perf_guest)
16 attr->exclude_guest = 1; 16 attr->exclude_guest = 1;
17 /* to capture ABI version */
18 attr->size = sizeof(*attr);
17} 19}
18 20
19int mkdir_p(char *path, mode_t mode) 21int mkdir_p(char *path, mode_t mode)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ecf9898169c..0f99f394d8e 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext)
199#undef isalpha 199#undef isalpha
200#undef isprint 200#undef isprint
201#undef isalnum 201#undef isalnum
202#undef islower
203#undef isupper
202#undef tolower 204#undef tolower
203#undef toupper 205#undef toupper
204 206
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256];
219#define isalpha(x) sane_istest(x,GIT_ALPHA) 221#define isalpha(x) sane_istest(x,GIT_ALPHA)
220#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 222#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
221#define isprint(x) sane_istest(x,GIT_PRINT) 223#define isprint(x) sane_istest(x,GIT_PRINT)
224#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20))
225#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20))
222#define tolower(x) sane_case((unsigned char)(x), 0x20) 226#define tolower(x) sane_case((unsigned char)(x), 0x20)
223#define toupper(x) sane_case((unsigned char)(x), 0) 227#define toupper(x) sane_case((unsigned char)(x), 0)
224 228
@@ -245,6 +249,8 @@ struct perf_event_attr;
245 249
246void event_attr_init(struct perf_event_attr *attr); 250void event_attr_init(struct perf_event_attr *attr);
247 251
252uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
253
248#define _STR(x) #x 254#define _STR(x) #x
249#define STR(x) _STR(x) 255#define STR(x) _STR(x)
250 256