diff options
99 files changed, 3601 insertions, 804 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt new file mode 100644 index 00000000000..d2a36602ca8 --- /dev/null +++ b/Documentation/lockup-watchdogs.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | =============================================================== | ||
2 | Softlockup detector and hardlockup detector (aka nmi_watchdog) | ||
3 | =============================================================== | ||
4 | |||
5 | The Linux kernel can act as a watchdog to detect both soft and hard | ||
6 | lockups. | ||
7 | |||
8 | A 'softlockup' is defined as a bug that causes the kernel to loop in | ||
9 | kernel mode for more than 20 seconds (see "Implementation" below for | ||
10 | details), without giving other tasks a chance to run. The current | ||
11 | stack trace is displayed upon detection and, by default, the system | ||
12 | will stay locked up. Alternatively, the kernel can be configured to | ||
13 | panic; a sysctl, "kernel.softlockup_panic", a kernel parameter, | ||
14 | "softlockup_panic" (see "Documentation/kernel-parameters.txt" for | ||
15 | details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are | ||
16 | provided for this. | ||
17 | |||
18 | A 'hardlockup' is defined as a bug that causes the CPU to loop in | ||
19 | kernel mode for more than 10 seconds (see "Implementation" below for | ||
20 | details), without letting other interrupts have a chance to run. | ||
21 | Similarly to the softlockup case, the current stack trace is displayed | ||
22 | upon detection and the system will stay locked up unless the default | ||
23 | behavior is changed, which can be done through a compile time knob, | ||
24 | "BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog" | ||
25 | (see "Documentation/kernel-parameters.txt" for details). | ||
26 | |||
27 | The panic option can be used in combination with panic_timeout (this | ||
28 | timeout is set through the confusingly named "kernel.panic" sysctl), | ||
29 | to cause the system to reboot automatically after a specified amount | ||
30 | of time. | ||
31 | |||
32 | === Implementation === | ||
33 | |||
34 | The soft and hard lockup detectors are built on top of the hrtimer and | ||
35 | perf subsystems, respectively. A direct consequence of this is that, | ||
36 | in principle, they should work in any architecture where these | ||
37 | subsystems are present. | ||
38 | |||
39 | A periodic hrtimer runs to generate interrupts and kick the watchdog | ||
40 | task. An NMI perf event is generated every "watchdog_thresh" | ||
41 | (compile-time initialized to 10 and configurable through sysctl of the | ||
42 | same name) seconds to check for hardlockups. If any CPU in the system | ||
43 | does not receive any hrtimer interrupt during that time the | ||
44 | 'hardlockup detector' (the handler for the NMI perf event) will | ||
45 | generate a kernel warning or call panic, depending on the | ||
46 | configuration. | ||
47 | |||
48 | The watchdog task is a high priority kernel thread that updates a | ||
49 | timestamp every time it is scheduled. If that timestamp is not updated | ||
50 | for 2*watchdog_thresh seconds (the softlockup threshold) the | ||
51 | 'softlockup detector' (coded inside the hrtimer callback function) | ||
52 | will dump useful debug information to the system log, after which it | ||
53 | will call panic if it was instructed to do so or resume execution of | ||
54 | other kernel code. | ||
55 | |||
56 | The period of the hrtimer is 2*watchdog_thresh/5, which means it has | ||
57 | two or three chances to generate an interrupt before the hardlockup | ||
58 | detector kicks in. | ||
59 | |||
60 | As explained above, a kernel knob is provided that allows | ||
61 | administrators to configure the period of the hrtimer and the perf | ||
62 | event. The right value for a particular environment is a trade-off | ||
63 | between fast response to lockups and detection overhead. | ||
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt deleted file mode 100644 index bf9f80a9828..00000000000 --- a/Documentation/nmi_watchdog.txt +++ /dev/null | |||
@@ -1,83 +0,0 @@ | |||
1 | |||
2 | [NMI watchdog is available for x86 and x86-64 architectures] | ||
3 | |||
4 | Is your system locking up unpredictably? No keyboard activity, just | ||
5 | a frustrating complete hard lockup? Do you want to help us debugging | ||
6 | such lockups? If all yes then this document is definitely for you. | ||
7 | |||
8 | On many x86/x86-64 type hardware there is a feature that enables | ||
9 | us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt | ||
10 | which get executed even if the system is otherwise locked up hard). | ||
11 | This can be used to debug hard kernel lockups. By executing periodic | ||
12 | NMI interrupts, the kernel can monitor whether any CPU has locked up, | ||
13 | and print out debugging messages if so. | ||
14 | |||
15 | In order to use the NMI watchdog, you need to have APIC support in your | ||
16 | kernel. For SMP kernels, APIC support gets compiled in automatically. For | ||
17 | UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local | ||
18 | APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and | ||
19 | features -> IO-APIC support on uniprocessors) in your kernel config. | ||
20 | CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC. | ||
21 | CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain | ||
22 | kernel debugging options, such as Kernel Stack Meter or Kernel Tracer, | ||
23 | may implicitly disable the NMI watchdog.] | ||
24 | |||
25 | For x86-64, the needed APIC is always compiled in. | ||
26 | |||
27 | Using local APIC (nmi_watchdog=2) needs the first performance register, so | ||
28 | you can't use it for other purposes (such as high precision performance | ||
29 | profiling.) However, at least oprofile and the perfctr driver disable the | ||
30 | local APIC NMI watchdog automatically. | ||
31 | |||
32 | To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot | ||
33 | parameter. Eg. the relevant lilo.conf entry: | ||
34 | |||
35 | append="nmi_watchdog=1" | ||
36 | |||
37 | For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1. | ||
38 | For UP machines without an IO-APIC use nmi_watchdog=2, this only works | ||
39 | for some processor types. If in doubt, boot with nmi_watchdog=1 and | ||
40 | check the NMI count in /proc/interrupts; if the count is zero then | ||
41 | reboot with nmi_watchdog=2 and check the NMI count. If it is still | ||
42 | zero then log a problem, you probably have a processor that needs to be | ||
43 | added to the nmi code. | ||
44 | |||
45 | A 'lockup' is the following scenario: if any CPU in the system does not | ||
46 | execute the period local timer interrupt for more than 5 seconds, then | ||
47 | the NMI handler generates an oops and kills the process. This | ||
48 | 'controlled crash' (and the resulting kernel messages) can be used to | ||
49 | debug the lockup. Thus whenever the lockup happens, wait 5 seconds and | ||
50 | the oops will show up automatically. If the kernel produces no messages | ||
51 | then the system has crashed so hard (eg. hardware-wise) that either it | ||
52 | cannot even accept NMI interrupts, or the crash has made the kernel | ||
53 | unable to print messages. | ||
54 | |||
55 | Be aware that when using local APIC, the frequency of NMI interrupts | ||
56 | it generates, depends on the system load. The local APIC NMI watchdog, | ||
57 | lacking a better source, uses the "cycles unhalted" event. As you may | ||
58 | guess it doesn't tick when the CPU is in the halted state (which happens | ||
59 | when the system is idle), but if your system locks up on anything but the | ||
60 | "hlt" processor instruction, the watchdog will trigger very soon as the | ||
61 | "cycles unhalted" event will happen every clock tick. If it locks up on | ||
62 | "hlt", then you are out of luck -- the event will not happen at all and the | ||
63 | watchdog won't trigger. This is a shortcoming of the local APIC watchdog | ||
64 | -- unfortunately there is no "clock ticks" event that would work all the | ||
65 | time. The I/O APIC watchdog is driven externally and has no such shortcoming. | ||
66 | But its NMI frequency is much higher, resulting in a more significant hit | ||
67 | to the overall system performance. | ||
68 | |||
69 | On x86 nmi_watchdog is disabled by default so you have to enable it with | ||
70 | a boot time parameter. | ||
71 | |||
72 | It's possible to disable the NMI watchdog in run-time by writing "0" to | ||
73 | /proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable | ||
74 | the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter | ||
75 | at boot time. | ||
76 | |||
77 | NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally | ||
78 | on x86 SMP boxes. | ||
79 | |||
80 | [ feel free to send bug reports, suggestions and patches to | ||
81 | Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing | ||
82 | list at <linux-smp@vger.kernel.org> ] | ||
83 | |||
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be1..d0e37c9d5f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -65,6 +65,23 @@ config OPTPROBES | |||
65 | depends on KPROBES && HAVE_OPTPROBES | 65 | depends on KPROBES && HAVE_OPTPROBES |
66 | depends on !PREEMPT | 66 | depends on !PREEMPT |
67 | 67 | ||
68 | config UPROBES | ||
69 | bool "Transparent user-space probes (EXPERIMENTAL)" | ||
70 | depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS | ||
71 | default n | ||
72 | help | ||
73 | Uprobes is the user-space counterpart to kprobes: they | ||
74 | enable instrumentation applications (such as 'perf probe') | ||
75 | to establish unintrusive probes in user-space binaries and | ||
76 | libraries, by executing handler functions when the probes | ||
77 | are hit by user-space applications. | ||
78 | |||
79 | ( These probes come in the form of single-byte breakpoints, | ||
80 | managed by the kernel and kept transparent to the probed | ||
81 | application. ) | ||
82 | |||
83 | If in doubt, say "N". | ||
84 | |||
68 | config HAVE_EFFICIENT_UNALIGNED_ACCESS | 85 | config HAVE_EFFICIENT_UNALIGNED_ACCESS |
69 | bool | 86 | bool |
70 | help | 87 | help |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe360798..7523340afb8 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,10 +12,6 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | ||
16 | * same indexes here for consistency. */ | ||
17 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
18 | |||
19 | /* ARM perf PMU IDs for use by internal perf clients. */ | 15 | /* ARM perf PMU IDs for use by internal perf clients. */ |
20 | enum arm_perf_pmu_ids { | 16 | enum arm_perf_pmu_ids { |
21 | ARM_PERF_PMU_ID_XSCALE1 = 0, | 17 | ARM_PERF_PMU_ID_XSCALE1 = 0, |
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d14..c52ea5546b5 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h | |||
@@ -12,6 +12,4 @@ | |||
12 | #ifndef _ASM_PERF_EVENT_H | 12 | #ifndef _ASM_PERF_EVENT_H |
13 | #define _ASM_PERF_EVENT_H | 13 | #define _ASM_PERF_EVENT_H |
14 | 14 | ||
15 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
16 | |||
17 | #endif /* _ASM_PERF_EVENT_H */ | 15 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f9118..8b8526b491c 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h | |||
@@ -19,6 +19,4 @@ | |||
19 | #ifndef _ASM_PERF_EVENT_H | 19 | #ifndef _ASM_PERF_EVENT_H |
20 | #define _ASM_PERF_EVENT_H | 20 | #define _ASM_PERF_EVENT_H |
21 | 21 | ||
22 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
23 | |||
24 | #endif /* _ASM_PERF_EVENT_H */ | 22 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d2..1a8093fa8f7 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
@@ -61,8 +61,6 @@ struct pt_regs; | |||
61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
63 | 63 | ||
64 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
65 | |||
66 | /* | 64 | /* |
67 | * Only override the default definitions in include/linux/perf_event.h | 65 | * Only override the default definitions in include/linux/perf_event.h |
68 | * if we have hardware PMU support. | 66 | * if we have hardware PMU support. |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fde95c..f04c2301725 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1193,6 +1193,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1193 | return err; | 1193 | return err; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | static int power_pmu_event_idx(struct perf_event *event) | ||
1197 | { | ||
1198 | return event->hw.idx; | ||
1199 | } | ||
1200 | |||
1196 | struct pmu power_pmu = { | 1201 | struct pmu power_pmu = { |
1197 | .pmu_enable = power_pmu_enable, | 1202 | .pmu_enable = power_pmu_enable, |
1198 | .pmu_disable = power_pmu_disable, | 1203 | .pmu_disable = power_pmu_disable, |
@@ -1205,6 +1210,7 @@ struct pmu power_pmu = { | |||
1205 | .start_txn = power_pmu_start_txn, | 1210 | .start_txn = power_pmu_start_txn, |
1206 | .cancel_txn = power_pmu_cancel_txn, | 1211 | .cancel_txn = power_pmu_cancel_txn, |
1207 | .commit_txn = power_pmu_commit_txn, | 1212 | .commit_txn = power_pmu_commit_txn, |
1213 | .event_idx = power_pmu_event_idx, | ||
1208 | }; | 1214 | }; |
1209 | 1215 | ||
1210 | /* | 1216 | /* |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d271..4eb444edbe4 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -6,4 +6,3 @@ | |||
6 | 6 | ||
7 | /* Empty, just to avoid compiling error */ | 7 | /* Empty, just to avoid compiling error */ |
8 | 8 | ||
9 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e2b38b4bffd..d2a540f7d6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -84,7 +84,7 @@ config X86 | |||
84 | select GENERIC_IOMAP | 84 | select GENERIC_IOMAP |
85 | 85 | ||
86 | config INSTRUCTION_DECODER | 86 | config INSTRUCTION_DECODER |
87 | def_bool (KPROBES || PERF_EVENTS) | 87 | def_bool (KPROBES || PERF_EVENTS || UPROBES) |
88 | 88 | ||
89 | config OUTPUT_FORMAT | 89 | config OUTPUT_FORMAT |
90 | string | 90 | string |
@@ -240,6 +240,9 @@ config ARCH_CPU_PROBE_RELEASE | |||
240 | def_bool y | 240 | def_bool y |
241 | depends on HOTPLUG_CPU | 241 | depends on HOTPLUG_CPU |
242 | 242 | ||
243 | config ARCH_SUPPORTS_UPROBES | ||
244 | def_bool y | ||
245 | |||
243 | source "init/Kconfig" | 246 | source "init/Kconfig" |
244 | source "kernel/Kconfig.freezer" | 247 | source "kernel/Kconfig.freezer" |
245 | 248 | ||
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e3..74a2e312e8a 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h | |||
@@ -97,11 +97,12 @@ | |||
97 | 97 | ||
98 | /* Attribute search APIs */ | 98 | /* Attribute search APIs */ |
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); |
100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | 101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, |
101 | insn_byte_t last_pfx, | 102 | int lpfx_id, |
102 | insn_attr_t esc_attr); | 103 | insn_attr_t esc_attr); |
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | 104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, |
104 | insn_byte_t last_pfx, | 105 | int lpfx_id, |
105 | insn_attr_t esc_attr); | 106 | insn_attr_t esc_attr); |
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | 107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, |
107 | insn_byte_t vex_m, | 108 | insn_byte_t vex_m, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddf..48eb30a8606 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -96,12 +96,6 @@ struct insn { | |||
96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
98 | 98 | ||
99 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
100 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
101 | { | ||
102 | return insn->prefixes.bytes[3]; | ||
103 | } | ||
104 | |||
105 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | 99 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); |
106 | extern void insn_get_prefixes(struct insn *insn); | 100 | extern void insn_get_prefixes(struct insn *insn); |
107 | extern void insn_get_opcode(struct insn *insn); | 101 | extern void insn_get_opcode(struct insn *insn); |
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | |||
160 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | 154 | return X86_VEX_P(insn->vex_prefix.bytes[2]); |
161 | } | 155 | } |
162 | 156 | ||
157 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
158 | static inline int insn_last_prefix_id(struct insn *insn) | ||
159 | { | ||
160 | if (insn_is_avx(insn)) | ||
161 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
162 | |||
163 | if (insn->prefixes.bytes[3]) | ||
164 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
163 | /* Offset of each field from kaddr */ | 169 | /* Offset of each field from kaddr */ |
164 | static inline int insn_offset_rex_prefix(struct insn *insn) | 170 | static inline int insn_offset_rex_prefix(struct insn *insn) |
165 | { | 171 | { |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c..e8fb2c7a5f4 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); | |||
188 | #ifdef CONFIG_PERF_EVENTS | 188 | #ifdef CONFIG_PERF_EVENTS |
189 | extern void perf_events_lapic_init(void); | 189 | extern void perf_events_lapic_init(void); |
190 | 190 | ||
191 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
192 | |||
193 | /* | 191 | /* |
194 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 192 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. |
195 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 193 | * This flag is otherwise unused and ABI specified to be 0, so nobody should |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h new file mode 100644 index 00000000000..0500391f57d --- /dev/null +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -0,0 +1,43 @@ | |||
1 | #ifndef _ASM_UPROBES_H | ||
2 | #define _ASM_UPROBES_H | ||
3 | /* | ||
4 | * User-space Probes (UProbes) for x86 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2008-2011 | ||
21 | * Authors: | ||
22 | * Srikar Dronamraju | ||
23 | * Jim Keniston | ||
24 | */ | ||
25 | |||
26 | typedef u8 uprobe_opcode_t; | ||
27 | |||
28 | #define MAX_UINSN_BYTES 16 | ||
29 | #define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ | ||
30 | |||
31 | #define UPROBE_SWBP_INSN 0xcc | ||
32 | #define UPROBE_SWBP_INSN_SIZE 1 | ||
33 | |||
34 | struct arch_uprobe { | ||
35 | u16 fixups; | ||
36 | u8 insn[MAX_UINSN_BYTES]; | ||
37 | #ifdef CONFIG_X86_64 | ||
38 | unsigned long rip_rela_target_address; | ||
39 | #endif | ||
40 | }; | ||
41 | |||
42 | extern int arch_uprobes_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); | ||
43 | #endif /* _ASM_UPROBES_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a..8c8c365a3bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
100 | 100 | ||
101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
102 | obj-$(CONFIG_OF) += devicetree.o | 102 | obj-$(CONFIG_OF) += devicetree.o |
103 | obj-$(CONFIG_UPROBES) += uprobes.o | ||
103 | 104 | ||
104 | ### | 105 | ### |
105 | # 64 bit specific files | 106 | # 64 bit specific files |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae3..0a44b90602b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
8 | #include <linux/sched.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
460 | if (!check_tsc_unstable()) | ||
461 | sched_clock_stable = 1; | ||
459 | } | 462 | } |
460 | 463 | ||
461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 63c0e058a40..1c52bdbb9b8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -24,12 +24,14 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/device.h> | ||
27 | 28 | ||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
31 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
32 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | ||
33 | 35 | ||
34 | #include "perf_event.h" | 36 | #include "perf_event.h" |
35 | 37 | ||
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1209 | break; | 1211 | break; |
1210 | 1212 | ||
1211 | case CPU_STARTING: | 1213 | case CPU_STARTING: |
1214 | if (x86_pmu.attr_rdpmc) | ||
1215 | set_in_cr4(X86_CR4_PCE); | ||
1212 | if (x86_pmu.cpu_starting) | 1216 | if (x86_pmu.cpu_starting) |
1213 | x86_pmu.cpu_starting(cpu); | 1217 | x86_pmu.cpu_starting(cpu); |
1214 | break; | 1218 | break; |
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void) | |||
1318 | } | 1322 | } |
1319 | } | 1323 | } |
1320 | 1324 | ||
1325 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1326 | |||
1321 | pr_info("... version: %d\n", x86_pmu.version); | 1327 | pr_info("... version: %d\n", x86_pmu.version); |
1322 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1328 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1323 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1329 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1541 | return err; | 1547 | return err; |
1542 | } | 1548 | } |
1543 | 1549 | ||
1550 | static int x86_pmu_event_idx(struct perf_event *event) | ||
1551 | { | ||
1552 | int idx = event->hw.idx; | ||
1553 | |||
1554 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
1555 | idx -= X86_PMC_IDX_FIXED; | ||
1556 | idx |= 1 << 30; | ||
1557 | } | ||
1558 | |||
1559 | return idx + 1; | ||
1560 | } | ||
1561 | |||
1562 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
1563 | struct device_attribute *attr, | ||
1564 | char *buf) | ||
1565 | { | ||
1566 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
1567 | } | ||
1568 | |||
1569 | static void change_rdpmc(void *info) | ||
1570 | { | ||
1571 | bool enable = !!(unsigned long)info; | ||
1572 | |||
1573 | if (enable) | ||
1574 | set_in_cr4(X86_CR4_PCE); | ||
1575 | else | ||
1576 | clear_in_cr4(X86_CR4_PCE); | ||
1577 | } | ||
1578 | |||
1579 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
1580 | struct device_attribute *attr, | ||
1581 | const char *buf, size_t count) | ||
1582 | { | ||
1583 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
1584 | |||
1585 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
1586 | x86_pmu.attr_rdpmc = !!val; | ||
1587 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
1588 | } | ||
1589 | |||
1590 | return count; | ||
1591 | } | ||
1592 | |||
1593 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
1594 | |||
1595 | static struct attribute *x86_pmu_attrs[] = { | ||
1596 | &dev_attr_rdpmc.attr, | ||
1597 | NULL, | ||
1598 | }; | ||
1599 | |||
1600 | static struct attribute_group x86_pmu_attr_group = { | ||
1601 | .attrs = x86_pmu_attrs, | ||
1602 | }; | ||
1603 | |||
1604 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
1605 | &x86_pmu_attr_group, | ||
1606 | NULL, | ||
1607 | }; | ||
1608 | |||
1544 | static struct pmu pmu = { | 1609 | static struct pmu pmu = { |
1545 | .pmu_enable = x86_pmu_enable, | 1610 | .pmu_enable = x86_pmu_enable, |
1546 | .pmu_disable = x86_pmu_disable, | 1611 | .pmu_disable = x86_pmu_disable, |
1547 | 1612 | ||
1613 | .attr_groups = x86_pmu_attr_groups, | ||
1614 | |||
1548 | .event_init = x86_pmu_event_init, | 1615 | .event_init = x86_pmu_event_init, |
1549 | 1616 | ||
1550 | .add = x86_pmu_add, | 1617 | .add = x86_pmu_add, |
@@ -1556,8 +1623,23 @@ static struct pmu pmu = { | |||
1556 | .start_txn = x86_pmu_start_txn, | 1623 | .start_txn = x86_pmu_start_txn, |
1557 | .cancel_txn = x86_pmu_cancel_txn, | 1624 | .cancel_txn = x86_pmu_cancel_txn, |
1558 | .commit_txn = x86_pmu_commit_txn, | 1625 | .commit_txn = x86_pmu_commit_txn, |
1626 | |||
1627 | .event_idx = x86_pmu_event_idx, | ||
1559 | }; | 1628 | }; |
1560 | 1629 | ||
1630 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
1631 | { | ||
1632 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
1633 | return; | ||
1634 | |||
1635 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1636 | return; | ||
1637 | |||
1638 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
1639 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
1640 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
1641 | } | ||
1642 | |||
1561 | /* | 1643 | /* |
1562 | * callchain support | 1644 | * callchain support |
1563 | */ | 1645 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc7..82db83b5c3b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -309,6 +309,14 @@ struct x86_pmu { | |||
309 | struct x86_pmu_quirk *quirks; | 309 | struct x86_pmu_quirk *quirks; |
310 | int perfctr_second_write; | 310 | int perfctr_second_write; |
311 | 311 | ||
312 | /* | ||
313 | * sysfs attrs | ||
314 | */ | ||
315 | int attr_rdpmc; | ||
316 | |||
317 | /* | ||
318 | * CPU Hotplug hooks | ||
319 | */ | ||
312 | int (*cpu_prepare)(int cpu); | 320 | int (*cpu_prepare)(int cpu); |
313 | void (*cpu_starting)(int cpu); | 321 | void (*cpu_starting)(int cpu); |
314 | void (*cpu_dying)(int cpu); | 322 | void (*cpu_dying)(int cpu); |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c new file mode 100644 index 00000000000..851a11b0d38 --- /dev/null +++ b/arch/x86/kernel/uprobes.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * User-space Probes (UProbes) for x86 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2008-2011 | ||
19 | * Authors: | ||
20 | * Srikar Dronamraju | ||
21 | * Jim Keniston | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/uprobes.h> | ||
27 | |||
28 | #include <linux/kdebug.h> | ||
29 | #include <asm/insn.h> | ||
30 | |||
31 | /* Post-execution fixups. */ | ||
32 | |||
33 | /* No fixup needed */ | ||
34 | #define UPROBE_FIX_NONE 0x0 | ||
35 | /* Adjust IP back to vicinity of actual insn */ | ||
36 | #define UPROBE_FIX_IP 0x1 | ||
37 | /* Adjust the return address of a call insn */ | ||
38 | #define UPROBE_FIX_CALL 0x2 | ||
39 | |||
40 | #define UPROBE_FIX_RIP_AX 0x8000 | ||
41 | #define UPROBE_FIX_RIP_CX 0x4000 | ||
42 | |||
43 | /* Adaptations for mhiramat x86 decoder v14. */ | ||
44 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) | ||
45 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) | ||
46 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) | ||
47 | #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) | ||
48 | |||
49 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | ||
50 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
51 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
52 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
53 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
54 | << (row % 32)) | ||
55 | |||
56 | /* | ||
57 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | ||
58 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | ||
59 | * some versions of gcc to think only *(unsigned long*) is used. | ||
60 | */ | ||
61 | static volatile u32 good_insns_32[256 / 32] = { | ||
62 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
63 | /* ---------------------------------------------- */ | ||
64 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | ||
65 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | ||
66 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | ||
67 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | ||
68 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
69 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
70 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
71 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
72 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
73 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
74 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
75 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
76 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
77 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
78 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
79 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
80 | /* ---------------------------------------------- */ | ||
81 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
82 | }; | ||
83 | |||
84 | /* Using this for both 64-bit and 32-bit apps */ | ||
85 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
86 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
87 | /* ---------------------------------------------- */ | ||
88 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
89 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
90 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
91 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
92 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
93 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
94 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
95 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
96 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
97 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
98 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
99 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
100 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
101 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
102 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
103 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
104 | /* ---------------------------------------------- */ | ||
105 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
106 | }; | ||
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | /* Good-instruction tables for 64-bit apps */ | ||
110 | static volatile u32 good_insns_64[256 / 32] = { | ||
111 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
112 | /* ---------------------------------------------- */ | ||
113 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | ||
114 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | ||
115 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | ||
116 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | ||
117 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
118 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
119 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
120 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
121 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
122 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
123 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
124 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
125 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
126 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
127 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
128 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
129 | /* ---------------------------------------------- */ | ||
130 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
131 | }; | ||
132 | #endif | ||
133 | #undef W | ||
134 | |||
135 | /* | ||
136 | * opcodes we'll probably never support: | ||
137 | * | ||
138 | * 6c-6d, e4-e5, ec-ed - in | ||
139 | * 6e-6f, e6-e7, ee-ef - out | ||
140 | * cc, cd - int3, int | ||
141 | * cf - iret | ||
142 | * d6 - illegal instruction | ||
143 | * f1 - int1/icebp | ||
144 | * f4 - hlt | ||
145 | * fa, fb - cli, sti | ||
146 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
147 | * | ||
148 | * invalid opcodes in 64-bit mode: | ||
149 | * | ||
150 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
151 | * 63 - we support this opcode in x86_64 but not in i386. | ||
152 | * | ||
153 | * opcodes we may need to refine support for: | ||
154 | * | ||
155 | * 0f - 2-byte instructions: For many of these instructions, the validity | ||
156 | * depends on the prefix and/or the reg field. On such instructions, we | ||
157 | * just consider the opcode combination valid if it corresponds to any | ||
158 | * valid instruction. | ||
159 | * | ||
160 | * 8f - Group 1 - only reg = 0 is OK | ||
161 | * c6-c7 - Group 11 - only reg = 0 is OK | ||
162 | * d9-df - fpu insns with some illegal encodings | ||
163 | * f2, f3 - repnz, repz prefixes. These are also the first byte for | ||
164 | * certain floating-point instructions, such as addsd. | ||
165 | * | ||
166 | * fe - Group 4 - only reg = 0 or 1 is OK | ||
167 | * ff - Group 5 - only reg = 0-6 is OK | ||
168 | * | ||
169 | * others -- Do we need to support these? | ||
170 | * | ||
171 | * 0f - (floating-point?) prefetch instructions | ||
172 | * 07, 17, 1f - pop es, pop ss, pop ds | ||
173 | * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- | ||
174 | * but 64 and 65 (fs: and gs:) seem to be used, so we support them | ||
175 | * 67 - addr16 prefix | ||
176 | * ce - into | ||
177 | * f0 - lock prefix | ||
178 | */ | ||
179 | |||
180 | /* | ||
181 | * TODO: | ||
182 | * - Where necessary, examine the modrm byte and allow only valid instructions | ||
183 | * in the different Groups and fpu instructions. | ||
184 | */ | ||
185 | |||
186 | static bool is_prefix_bad(struct insn *insn) | ||
187 | { | ||
188 | int i; | ||
189 | |||
190 | for (i = 0; i < insn->prefixes.nbytes; i++) { | ||
191 | switch (insn->prefixes.bytes[i]) { | ||
192 | case 0x26: /* INAT_PFX_ES */ | ||
193 | case 0x2E: /* INAT_PFX_CS */ | ||
194 | case 0x36: /* INAT_PFX_DS */ | ||
195 | case 0x3E: /* INAT_PFX_SS */ | ||
196 | case 0xF0: /* INAT_PFX_LOCK */ | ||
197 | return true; | ||
198 | } | ||
199 | } | ||
200 | return false; | ||
201 | } | ||
202 | |||
203 | static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
204 | { | ||
205 | insn_init(insn, auprobe->insn, false); | ||
206 | |||
207 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
208 | insn_get_opcode(insn); | ||
209 | if (is_prefix_bad(insn)) | ||
210 | return -ENOTSUPP; | ||
211 | |||
212 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) | ||
213 | return 0; | ||
214 | |||
215 | if (insn->opcode.nbytes == 2) { | ||
216 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | return -ENOTSUPP; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Figure out which fixups post_xol() will need to perform, and annotate | ||
225 | * arch_uprobe->fixups accordingly. To start with, | ||
226 | * arch_uprobe->fixups is either zero or it reflects rip-related | ||
227 | * fixups. | ||
228 | */ | ||
229 | static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | ||
230 | { | ||
231 | bool fix_ip = true, fix_call = false; /* defaults */ | ||
232 | int reg; | ||
233 | |||
234 | insn_get_opcode(insn); /* should be a nop */ | ||
235 | |||
236 | switch (OPCODE1(insn)) { | ||
237 | case 0xc3: /* ret/lret */ | ||
238 | case 0xcb: | ||
239 | case 0xc2: | ||
240 | case 0xca: | ||
241 | /* ip is correct */ | ||
242 | fix_ip = false; | ||
243 | break; | ||
244 | case 0xe8: /* call relative - Fix return addr */ | ||
245 | fix_call = true; | ||
246 | break; | ||
247 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
248 | fix_call = true; | ||
249 | fix_ip = false; | ||
250 | break; | ||
251 | case 0xff: | ||
252 | insn_get_modrm(insn); | ||
253 | reg = MODRM_REG(insn); | ||
254 | if (reg == 2 || reg == 3) { | ||
255 | /* call or lcall, indirect */ | ||
256 | /* Fix return addr; ip is correct. */ | ||
257 | fix_call = true; | ||
258 | fix_ip = false; | ||
259 | } else if (reg == 4 || reg == 5) { | ||
260 | /* jmp or ljmp, indirect */ | ||
261 | /* ip is correct. */ | ||
262 | fix_ip = false; | ||
263 | } | ||
264 | break; | ||
265 | case 0xea: /* jmp absolute -- ip is correct */ | ||
266 | fix_ip = false; | ||
267 | break; | ||
268 | default: | ||
269 | break; | ||
270 | } | ||
271 | if (fix_ip) | ||
272 | auprobe->fixups |= UPROBE_FIX_IP; | ||
273 | if (fix_call) | ||
274 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
275 | } | ||
276 | |||
277 | #ifdef CONFIG_X86_64 | ||
278 | /* | ||
279 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | ||
280 | * immediately. Otherwise, rewrite the instruction so that it accesses | ||
281 | * its memory operand indirectly through a scratch register. Set | ||
282 | * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address | ||
283 | * accordingly. (The contents of the scratch register will be saved | ||
284 | * before we single-step the modified instruction, and restored | ||
285 | * afterward.) | ||
286 | * | ||
287 | * We do this because a rip-relative instruction can access only a | ||
288 | * relatively small area (+/- 2 GB from the instruction), and the XOL | ||
289 | * area typically lies beyond that area. At least for instructions | ||
290 | * that store to memory, we can't execute the original instruction | ||
291 | * and "fix things up" later, because the misdirected store could be | ||
292 | * disastrous. | ||
293 | * | ||
294 | * Some useful facts about rip-relative instructions: | ||
295 | * | ||
296 | * - There's always a modrm byte. | ||
297 | * - There's never a SIB byte. | ||
298 | * - The displacement is always 4 bytes. | ||
299 | */ | ||
300 | static void | ||
301 | handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
302 | { | ||
303 | u8 *cursor; | ||
304 | u8 reg; | ||
305 | |||
306 | if (mm->context.ia32_compat) | ||
307 | return; | ||
308 | |||
309 | auprobe->rip_rela_target_address = 0x0; | ||
310 | if (!insn_rip_relative(insn)) | ||
311 | return; | ||
312 | |||
313 | /* | ||
314 | * insn_rip_relative() would have decoded rex_prefix, modrm. | ||
315 | * Clear REX.b bit (extension of MODRM.rm field): | ||
316 | * we want to encode rax/rcx, not r8/r9. | ||
317 | */ | ||
318 | if (insn->rex_prefix.nbytes) { | ||
319 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | ||
320 | *cursor &= 0xfe; /* Clearing REX.B bit */ | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Point cursor at the modrm byte. The next 4 bytes are the | ||
325 | * displacement. Beyond the displacement, for some instructions, | ||
326 | * is the immediate operand. | ||
327 | */ | ||
328 | cursor = auprobe->insn + insn_offset_modrm(insn); | ||
329 | insn_get_length(insn); | ||
330 | |||
331 | /* | ||
332 | * Convert from rip-relative addressing to indirect addressing | ||
333 | * via a scratch register. Change the r/m field from 0x5 (%rip) | ||
334 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | ||
335 | */ | ||
336 | reg = MODRM_REG(insn); | ||
337 | if (reg == 0) { | ||
338 | /* | ||
339 | * The register operand (if any) is either the A register | ||
340 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
341 | * REX prefix) %r8. In any case, we know the C register | ||
342 | * is NOT the register operand, so we use %rcx (register | ||
343 | * #1) for the scratch register. | ||
344 | */ | ||
345 | auprobe->fixups = UPROBE_FIX_RIP_CX; | ||
346 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
347 | *cursor = 0x1; | ||
348 | } else { | ||
349 | /* Use %rax (register #0) for the scratch register. */ | ||
350 | auprobe->fixups = UPROBE_FIX_RIP_AX; | ||
351 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
352 | *cursor = (reg << 3); | ||
353 | } | ||
354 | |||
355 | /* Target address = address of next instruction + (signed) offset */ | ||
356 | auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; | ||
357 | |||
358 | /* Displacement field is gone; slide immediate field (if any) over. */ | ||
359 | if (insn->immediate.nbytes) { | ||
360 | cursor++; | ||
361 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | ||
362 | } | ||
363 | return; | ||
364 | } | ||
365 | |||
366 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
367 | { | ||
368 | insn_init(insn, auprobe->insn, true); | ||
369 | |||
370 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
371 | insn_get_opcode(insn); | ||
372 | if (is_prefix_bad(insn)) | ||
373 | return -ENOTSUPP; | ||
374 | |||
375 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) | ||
376 | return 0; | ||
377 | |||
378 | if (insn->opcode.nbytes == 2) { | ||
379 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
380 | return 0; | ||
381 | } | ||
382 | return -ENOTSUPP; | ||
383 | } | ||
384 | |||
385 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
386 | { | ||
387 | if (mm->context.ia32_compat) | ||
388 | return validate_insn_32bits(auprobe, insn); | ||
389 | return validate_insn_64bits(auprobe, insn); | ||
390 | } | ||
391 | #else /* 32-bit: */ | ||
392 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
393 | { | ||
394 | /* No RIP-relative addressing on 32-bit */ | ||
395 | } | ||
396 | |||
397 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
398 | { | ||
399 | return validate_insn_32bits(auprobe, insn); | ||
400 | } | ||
401 | #endif /* CONFIG_X86_64 */ | ||
402 | |||
403 | /** | ||
404 | * arch_uprobes_analyze_insn - instruction analysis including validity and fixups. | ||
405 | * @mm: the probed address space. | ||
406 | * @arch_uprobe: the probepoint information. | ||
407 | * Return 0 on success or a -ve number on error. | ||
408 | */ | ||
409 | int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) | ||
410 | { | ||
411 | int ret; | ||
412 | struct insn insn; | ||
413 | |||
414 | auprobe->fixups = 0; | ||
415 | ret = validate_insn_bits(auprobe, mm, &insn); | ||
416 | if (ret != 0) | ||
417 | return ret; | ||
418 | |||
419 | handle_riprel_insn(auprobe, mm, &insn); | ||
420 | prepare_fixups(auprobe, &insn); | ||
421 | |||
422 | return 0; | ||
423 | } | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fbda6e..c1f01a8e9f6 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | |||
29 | return inat_primary_table[opcode]; | 29 | return inat_primary_table[opcode]; |
30 | } | 30 | } |
31 | 31 | ||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | 32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) |
33 | { | ||
34 | insn_attr_t lpfx_attr; | ||
35 | |||
36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
37 | return inat_last_prefix_id(lpfx_attr); | ||
38 | } | ||
39 | |||
40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
33 | insn_attr_t esc_attr) | 41 | insn_attr_t esc_attr) |
34 | { | 42 | { |
35 | const insn_attr_t *table; | 43 | const insn_attr_t *table; |
36 | insn_attr_t lpfx_attr; | 44 | int n; |
37 | int n, m = 0; | ||
38 | 45 | ||
39 | n = inat_escape_id(esc_attr); | 46 | n = inat_escape_id(esc_attr); |
40 | if (last_pfx) { | 47 | |
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | 48 | table = inat_escape_tables[n][0]; |
45 | if (!table) | 49 | if (!table) |
46 | return 0; | 50 | return 0; |
47 | if (inat_has_variant(table[opcode]) && m) { | 51 | if (inat_has_variant(table[opcode]) && lpfx_id) { |
48 | table = inat_escape_tables[n][m]; | 52 | table = inat_escape_tables[n][lpfx_id]; |
49 | if (!table) | 53 | if (!table) |
50 | return 0; | 54 | return 0; |
51 | } | 55 | } |
52 | return table[opcode]; | 56 | return table[opcode]; |
53 | } | 57 | } |
54 | 58 | ||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | 59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, |
56 | insn_attr_t grp_attr) | 60 | insn_attr_t grp_attr) |
57 | { | 61 | { |
58 | const insn_attr_t *table; | 62 | const insn_attr_t *table; |
59 | insn_attr_t lpfx_attr; | 63 | int n; |
60 | int n, m = 0; | ||
61 | 64 | ||
62 | n = inat_group_id(grp_attr); | 65 | n = inat_group_id(grp_attr); |
63 | if (last_pfx) { | 66 | |
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | 67 | table = inat_group_tables[n][0]; |
68 | if (!table) | 68 | if (!table) |
69 | return inat_group_common_attribute(grp_attr); | 69 | return inat_group_common_attribute(grp_attr); |
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { |
71 | table = inat_group_tables[n][m]; | 71 | table = inat_group_tables[n][lpfx_id]; |
72 | if (!table) | 72 | if (!table) |
73 | return inat_group_common_attribute(grp_attr); | 73 | return inat_group_common_attribute(grp_attr); |
74 | } | 74 | } |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3e3fb..25feb1ae71c 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -185,7 +185,8 @@ err_out: | |||
185 | void insn_get_opcode(struct insn *insn) | 185 | void insn_get_opcode(struct insn *insn) |
186 | { | 186 | { |
187 | struct insn_field *opcode = &insn->opcode; | 187 | struct insn_field *opcode = &insn->opcode; |
188 | insn_byte_t op, pfx; | 188 | insn_byte_t op; |
189 | int pfx_id; | ||
189 | if (opcode->got) | 190 | if (opcode->got) |
190 | return; | 191 | return; |
191 | if (!insn->prefixes.got) | 192 | if (!insn->prefixes.got) |
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) | |||
212 | /* Get escaped opcode */ | 213 | /* Get escaped opcode */ |
213 | op = get_next(insn_byte_t, insn); | 214 | op = get_next(insn_byte_t, insn); |
214 | opcode->bytes[opcode->nbytes++] = op; | 215 | opcode->bytes[opcode->nbytes++] = op; |
215 | pfx = insn_last_prefix(insn); | 216 | pfx_id = insn_last_prefix_id(insn); |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | 217 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); |
217 | } | 218 | } |
218 | if (inat_must_vex(insn->attr)) | 219 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | 220 | insn->attr = 0; /* This instruction is bad */ |
@@ -235,7 +236,7 @@ err_out: | |||
235 | void insn_get_modrm(struct insn *insn) | 236 | void insn_get_modrm(struct insn *insn) |
236 | { | 237 | { |
237 | struct insn_field *modrm = &insn->modrm; | 238 | struct insn_field *modrm = &insn->modrm; |
238 | insn_byte_t pfx, mod; | 239 | insn_byte_t pfx_id, mod; |
239 | if (modrm->got) | 240 | if (modrm->got) |
240 | return; | 241 | return; |
241 | if (!insn->opcode.got) | 242 | if (!insn->opcode.got) |
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) | |||
246 | modrm->value = mod; | 247 | modrm->value = mod; |
247 | modrm->nbytes = 1; | 248 | modrm->nbytes = 1; |
248 | if (inat_is_group(insn->attr)) { | 249 | if (inat_is_group(insn->attr)) { |
249 | pfx = insn_last_prefix(insn); | 250 | pfx_id = insn_last_prefix_id(insn); |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | 251 | insn->attr = inat_get_group_attribute(mod, pfx_id, |
251 | insn->attr); | 252 | insn->attr); |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | 253 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
253 | insn->attr = 0; /* This is bad */ | 254 | insn->attr = 0; /* This is bad */ |
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5ce8b140428..f7c69580fea 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h | |||
@@ -1,6 +1,38 @@ | |||
1 | #ifndef _LINUX_JUMP_LABEL_H | 1 | #ifndef _LINUX_JUMP_LABEL_H |
2 | #define _LINUX_JUMP_LABEL_H | 2 | #define _LINUX_JUMP_LABEL_H |
3 | 3 | ||
4 | /* | ||
5 | * Jump label support | ||
6 | * | ||
7 | * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> | ||
8 | * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | ||
10 | * Jump labels provide an interface to generate dynamic branches using | ||
11 | * self-modifying code. Assuming toolchain and architecture support the result | ||
12 | * of a "if (static_branch(&key))" statement is a unconditional branch (which | ||
13 | * defaults to false - and the true block is placed out of line). | ||
14 | * | ||
15 | * However at runtime we can change the 'static' branch target using | ||
16 | * jump_label_{inc,dec}(). These function as a 'reference' count on the key | ||
17 | * object and for as long as there are references all branches referring to | ||
18 | * that particular key will point to the (out of line) true block. | ||
19 | * | ||
20 | * Since this relies on modifying code the jump_label_{inc,dec}() functions | ||
21 | * must be considered absolute slow paths (machine wide synchronization etc.). | ||
22 | * OTOH, since the affected branches are unconditional their runtime overhead | ||
23 | * will be absolutely minimal, esp. in the default (off) case where the total | ||
24 | * effect is a single NOP of appropriate size. The on case will patch in a jump | ||
25 | * to the out-of-line block. | ||
26 | * | ||
27 | * When the control is directly exposed to userspace it is prudent to delay the | ||
28 | * decrement to avoid high frequency code modifications which can (and do) | ||
29 | * cause significant performance degradation. Struct jump_label_key_deferred and | ||
30 | * jump_label_dec_deferred() provide for this. | ||
31 | * | ||
32 | * Lacking toolchain and or architecture support, it falls back to a simple | ||
33 | * conditional branch. | ||
34 | */ | ||
35 | |||
4 | #include <linux/types.h> | 36 | #include <linux/types.h> |
5 | #include <linux/compiler.h> | 37 | #include <linux/compiler.h> |
6 | #include <linux/workqueue.h> | 38 | #include <linux/workqueue.h> |
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index abb2776be1b..412b790f5da 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -291,12 +291,14 @@ struct perf_event_mmap_page { | |||
291 | __s64 offset; /* add to hardware event value */ | 291 | __s64 offset; /* add to hardware event value */ |
292 | __u64 time_enabled; /* time event active */ | 292 | __u64 time_enabled; /* time event active */ |
293 | __u64 time_running; /* time event on cpu */ | 293 | __u64 time_running; /* time event on cpu */ |
294 | __u32 time_mult, time_shift; | ||
295 | __u64 time_offset; | ||
294 | 296 | ||
295 | /* | 297 | /* |
296 | * Hole for extension of the self monitor capabilities | 298 | * Hole for extension of the self monitor capabilities |
297 | */ | 299 | */ |
298 | 300 | ||
299 | __u64 __reserved[123]; /* align to 1k */ | 301 | __u64 __reserved[121]; /* align to 1k */ |
300 | 302 | ||
301 | /* | 303 | /* |
302 | * Control data for the mmap() data buffer. | 304 | * Control data for the mmap() data buffer. |
@@ -616,6 +618,7 @@ struct pmu { | |||
616 | struct list_head entry; | 618 | struct list_head entry; |
617 | 619 | ||
618 | struct device *dev; | 620 | struct device *dev; |
621 | const struct attribute_group **attr_groups; | ||
619 | char *name; | 622 | char *name; |
620 | int type; | 623 | int type; |
621 | 624 | ||
@@ -681,6 +684,12 @@ struct pmu { | |||
681 | * for each successful ->add() during the transaction. | 684 | * for each successful ->add() during the transaction. |
682 | */ | 685 | */ |
683 | void (*cancel_txn) (struct pmu *pmu); /* optional */ | 686 | void (*cancel_txn) (struct pmu *pmu); /* optional */ |
687 | |||
688 | /* | ||
689 | * Will return the value for perf_event_mmap_page::index for this event, | ||
690 | * if no implementation is provided it will default to: event->hw.idx + 1. | ||
691 | */ | ||
692 | int (*event_idx) (struct perf_event *event); /*optional */ | ||
684 | }; | 693 | }; |
685 | 694 | ||
686 | /** | 695 | /** |
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h new file mode 100644 index 00000000000..eac525f41b9 --- /dev/null +++ b/include/linux/uprobes.h | |||
@@ -0,0 +1,76 @@ | |||
1 | #ifndef _LINUX_UPROBES_H | ||
2 | #define _LINUX_UPROBES_H | ||
3 | /* | ||
4 | * User-space Probes (UProbes) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2008-2012 | ||
21 | * Authors: | ||
22 | * Srikar Dronamraju | ||
23 | * Jim Keniston | ||
24 | * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
25 | */ | ||
26 | |||
27 | #include <linux/errno.h> | ||
28 | #include <linux/rbtree.h> | ||
29 | |||
30 | struct vm_area_struct; | ||
31 | #ifdef CONFIG_ARCH_SUPPORTS_UPROBES | ||
32 | #include <asm/uprobes.h> | ||
33 | #endif | ||
34 | |||
35 | /* flags that denote/change uprobes behaviour */ | ||
36 | |||
37 | /* Have a copy of original instruction */ | ||
38 | #define UPROBE_COPY_INSN 0x1 | ||
39 | |||
40 | /* Dont run handlers when first register/ last unregister in progress*/ | ||
41 | #define UPROBE_RUN_HANDLER 0x2 | ||
42 | |||
43 | struct uprobe_consumer { | ||
44 | int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); | ||
45 | /* | ||
46 | * filter is optional; If a filter exists, handler is run | ||
47 | * if and only if filter returns true. | ||
48 | */ | ||
49 | bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); | ||
50 | |||
51 | struct uprobe_consumer *next; | ||
52 | }; | ||
53 | |||
54 | #ifdef CONFIG_UPROBES | ||
55 | extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); | ||
56 | extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); | ||
57 | extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); | ||
58 | extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); | ||
59 | extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); | ||
60 | extern int uprobe_mmap(struct vm_area_struct *vma); | ||
61 | #else /* CONFIG_UPROBES is not defined */ | ||
62 | static inline int | ||
63 | uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) | ||
64 | { | ||
65 | return -ENOSYS; | ||
66 | } | ||
67 | static inline void | ||
68 | uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) | ||
69 | { | ||
70 | } | ||
71 | static inline int uprobe_mmap(struct vm_area_struct *vma) | ||
72 | { | ||
73 | return 0; | ||
74 | } | ||
75 | #endif /* CONFIG_UPROBES */ | ||
76 | #endif /* _LINUX_UPROBES_H */ | ||
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index 17df43464df..39a8a430d90 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h | |||
@@ -23,11 +23,23 @@ | |||
23 | } \ | 23 | } \ |
24 | } while (0) | 24 | } while (0) |
25 | 25 | ||
26 | #ifndef TRACE_HEADER_MULTI_READ | ||
27 | enum { | ||
28 | TRACE_SIGNAL_DELIVERED, | ||
29 | TRACE_SIGNAL_IGNORED, | ||
30 | TRACE_SIGNAL_ALREADY_PENDING, | ||
31 | TRACE_SIGNAL_OVERFLOW_FAIL, | ||
32 | TRACE_SIGNAL_LOSE_INFO, | ||
33 | }; | ||
34 | #endif | ||
35 | |||
26 | /** | 36 | /** |
27 | * signal_generate - called when a signal is generated | 37 | * signal_generate - called when a signal is generated |
28 | * @sig: signal number | 38 | * @sig: signal number |
29 | * @info: pointer to struct siginfo | 39 | * @info: pointer to struct siginfo |
30 | * @task: pointer to struct task_struct | 40 | * @task: pointer to struct task_struct |
41 | * @group: shared or private | ||
42 | * @result: TRACE_SIGNAL_* | ||
31 | * | 43 | * |
32 | * Current process sends a 'sig' signal to 'task' process with | 44 | * Current process sends a 'sig' signal to 'task' process with |
33 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, | 45 | * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, |
@@ -37,9 +49,10 @@ | |||
37 | */ | 49 | */ |
38 | TRACE_EVENT(signal_generate, | 50 | TRACE_EVENT(signal_generate, |
39 | 51 | ||
40 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), | 52 | TP_PROTO(int sig, struct siginfo *info, struct task_struct *task, |
53 | int group, int result), | ||
41 | 54 | ||
42 | TP_ARGS(sig, info, task), | 55 | TP_ARGS(sig, info, task, group, result), |
43 | 56 | ||
44 | TP_STRUCT__entry( | 57 | TP_STRUCT__entry( |
45 | __field( int, sig ) | 58 | __field( int, sig ) |
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate, | |||
47 | __field( int, code ) | 60 | __field( int, code ) |
48 | __array( char, comm, TASK_COMM_LEN ) | 61 | __array( char, comm, TASK_COMM_LEN ) |
49 | __field( pid_t, pid ) | 62 | __field( pid_t, pid ) |
63 | __field( int, group ) | ||
64 | __field( int, result ) | ||
50 | ), | 65 | ), |
51 | 66 | ||
52 | TP_fast_assign( | 67 | TP_fast_assign( |
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate, | |||
54 | TP_STORE_SIGINFO(__entry, info); | 69 | TP_STORE_SIGINFO(__entry, info); |
55 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); | 70 | memcpy(__entry->comm, task->comm, TASK_COMM_LEN); |
56 | __entry->pid = task->pid; | 71 | __entry->pid = task->pid; |
72 | __entry->group = group; | ||
73 | __entry->result = result; | ||
57 | ), | 74 | ), |
58 | 75 | ||
59 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", | 76 | TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", |
60 | __entry->sig, __entry->errno, __entry->code, | 77 | __entry->sig, __entry->errno, __entry->code, |
61 | __entry->comm, __entry->pid) | 78 | __entry->comm, __entry->pid, __entry->group, |
79 | __entry->result) | ||
62 | ); | 80 | ); |
63 | 81 | ||
64 | /** | 82 | /** |
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver, | |||
101 | __entry->sa_handler, __entry->sa_flags) | 119 | __entry->sa_handler, __entry->sa_flags) |
102 | ); | 120 | ); |
103 | 121 | ||
104 | DECLARE_EVENT_CLASS(signal_queue_overflow, | ||
105 | |||
106 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
107 | |||
108 | TP_ARGS(sig, group, info), | ||
109 | |||
110 | TP_STRUCT__entry( | ||
111 | __field( int, sig ) | ||
112 | __field( int, group ) | ||
113 | __field( int, errno ) | ||
114 | __field( int, code ) | ||
115 | ), | ||
116 | |||
117 | TP_fast_assign( | ||
118 | __entry->sig = sig; | ||
119 | __entry->group = group; | ||
120 | TP_STORE_SIGINFO(__entry, info); | ||
121 | ), | ||
122 | |||
123 | TP_printk("sig=%d group=%d errno=%d code=%d", | ||
124 | __entry->sig, __entry->group, __entry->errno, __entry->code) | ||
125 | ); | ||
126 | |||
127 | /** | ||
128 | * signal_overflow_fail - called when signal queue is overflow | ||
129 | * @sig: signal number | ||
130 | * @group: signal to process group or not (bool) | ||
131 | * @info: pointer to struct siginfo | ||
132 | * | ||
133 | * Kernel fails to generate 'sig' signal with 'info' siginfo, because | ||
134 | * siginfo queue is overflow, and the signal is dropped. | ||
135 | * 'group' is not 0 if the signal will be sent to a process group. | ||
136 | * 'sig' is always one of RT signals. | ||
137 | */ | ||
138 | DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, | ||
139 | |||
140 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
141 | |||
142 | TP_ARGS(sig, group, info) | ||
143 | ); | ||
144 | |||
145 | /** | ||
146 | * signal_lose_info - called when siginfo is lost | ||
147 | * @sig: signal number | ||
148 | * @group: signal to process group or not (bool) | ||
149 | * @info: pointer to struct siginfo | ||
150 | * | ||
151 | * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo | ||
152 | * queue is overflow. | ||
153 | * 'group' is not 0 if the signal will be sent to a process group. | ||
154 | * 'sig' is always one of non-RT signals. | ||
155 | */ | ||
156 | DEFINE_EVENT(signal_queue_overflow, signal_lose_info, | ||
157 | |||
158 | TP_PROTO(int sig, int group, struct siginfo *info), | ||
159 | |||
160 | TP_ARGS(sig, group, info) | ||
161 | ); | ||
162 | |||
163 | #endif /* _TRACE_SIGNAL_H */ | 122 | #endif /* _TRACE_SIGNAL_H */ |
164 | 123 | ||
165 | /* This part must be outside protection */ | 124 | /* This part must be outside protection */ |
diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 22d901f9caf..103f5d147b2 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile | |||
@@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg | |||
3 | endif | 3 | endif |
4 | 4 | ||
5 | obj-y := core.o ring_buffer.o callchain.o | 5 | obj-y := core.o ring_buffer.o callchain.o |
6 | |||
6 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 7 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
8 | obj-$(CONFIG_UPROBES) += uprobes.o | ||
9 | |||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1b5c081d8b9..94afe5b91c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3238,10 +3238,6 @@ int perf_event_task_disable(void) | |||
3238 | return 0; | 3238 | return 0; |
3239 | } | 3239 | } |
3240 | 3240 | ||
3241 | #ifndef PERF_EVENT_INDEX_OFFSET | ||
3242 | # define PERF_EVENT_INDEX_OFFSET 0 | ||
3243 | #endif | ||
3244 | |||
3245 | static int perf_event_index(struct perf_event *event) | 3241 | static int perf_event_index(struct perf_event *event) |
3246 | { | 3242 | { |
3247 | if (event->hw.state & PERF_HES_STOPPED) | 3243 | if (event->hw.state & PERF_HES_STOPPED) |
@@ -3250,21 +3246,26 @@ static int perf_event_index(struct perf_event *event) | |||
3250 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 3246 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
3251 | return 0; | 3247 | return 0; |
3252 | 3248 | ||
3253 | return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; | 3249 | return event->pmu->event_idx(event); |
3254 | } | 3250 | } |
3255 | 3251 | ||
3256 | static void calc_timer_values(struct perf_event *event, | 3252 | static void calc_timer_values(struct perf_event *event, |
3253 | u64 *now, | ||
3257 | u64 *enabled, | 3254 | u64 *enabled, |
3258 | u64 *running) | 3255 | u64 *running) |
3259 | { | 3256 | { |
3260 | u64 now, ctx_time; | 3257 | u64 ctx_time; |
3261 | 3258 | ||
3262 | now = perf_clock(); | 3259 | *now = perf_clock(); |
3263 | ctx_time = event->shadow_ctx_time + now; | 3260 | ctx_time = event->shadow_ctx_time + *now; |
3264 | *enabled = ctx_time - event->tstamp_enabled; | 3261 | *enabled = ctx_time - event->tstamp_enabled; |
3265 | *running = ctx_time - event->tstamp_running; | 3262 | *running = ctx_time - event->tstamp_running; |
3266 | } | 3263 | } |
3267 | 3264 | ||
3265 | void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
3266 | { | ||
3267 | } | ||
3268 | |||
3268 | /* | 3269 | /* |
3269 | * Callers need to ensure there can be no nesting of this function, otherwise | 3270 | * Callers need to ensure there can be no nesting of this function, otherwise |
3270 | * the seqlock logic goes bad. We can not serialize this because the arch | 3271 | * the seqlock logic goes bad. We can not serialize this because the arch |
@@ -3274,7 +3275,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3274 | { | 3275 | { |
3275 | struct perf_event_mmap_page *userpg; | 3276 | struct perf_event_mmap_page *userpg; |
3276 | struct ring_buffer *rb; | 3277 | struct ring_buffer *rb; |
3277 | u64 enabled, running; | 3278 | u64 enabled, running, now; |
3278 | 3279 | ||
3279 | rcu_read_lock(); | 3280 | rcu_read_lock(); |
3280 | /* | 3281 | /* |
@@ -3286,7 +3287,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3286 | * because of locking issue as we can be called in | 3287 | * because of locking issue as we can be called in |
3287 | * NMI context | 3288 | * NMI context |
3288 | */ | 3289 | */ |
3289 | calc_timer_values(event, &enabled, &running); | 3290 | calc_timer_values(event, &now, &enabled, &running); |
3290 | rb = rcu_dereference(event->rb); | 3291 | rb = rcu_dereference(event->rb); |
3291 | if (!rb) | 3292 | if (!rb) |
3292 | goto unlock; | 3293 | goto unlock; |
@@ -3302,7 +3303,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3302 | barrier(); | 3303 | barrier(); |
3303 | userpg->index = perf_event_index(event); | 3304 | userpg->index = perf_event_index(event); |
3304 | userpg->offset = perf_event_count(event); | 3305 | userpg->offset = perf_event_count(event); |
3305 | if (event->state == PERF_EVENT_STATE_ACTIVE) | 3306 | if (userpg->index) |
3306 | userpg->offset -= local64_read(&event->hw.prev_count); | 3307 | userpg->offset -= local64_read(&event->hw.prev_count); |
3307 | 3308 | ||
3308 | userpg->time_enabled = enabled + | 3309 | userpg->time_enabled = enabled + |
@@ -3311,6 +3312,8 @@ void perf_event_update_userpage(struct perf_event *event) | |||
3311 | userpg->time_running = running + | 3312 | userpg->time_running = running + |
3312 | atomic64_read(&event->child_total_time_running); | 3313 | atomic64_read(&event->child_total_time_running); |
3313 | 3314 | ||
3315 | perf_update_user_clock(userpg, now); | ||
3316 | |||
3314 | barrier(); | 3317 | barrier(); |
3315 | ++userpg->lock; | 3318 | ++userpg->lock; |
3316 | preempt_enable(); | 3319 | preempt_enable(); |
@@ -3568,6 +3571,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
3568 | event->mmap_user = get_current_user(); | 3571 | event->mmap_user = get_current_user(); |
3569 | vma->vm_mm->pinned_vm += event->mmap_locked; | 3572 | vma->vm_mm->pinned_vm += event->mmap_locked; |
3570 | 3573 | ||
3574 | perf_event_update_userpage(event); | ||
3575 | |||
3571 | unlock: | 3576 | unlock: |
3572 | if (!ret) | 3577 | if (!ret) |
3573 | atomic_inc(&event->mmap_count); | 3578 | atomic_inc(&event->mmap_count); |
@@ -3799,7 +3804,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3799 | static void perf_output_read(struct perf_output_handle *handle, | 3804 | static void perf_output_read(struct perf_output_handle *handle, |
3800 | struct perf_event *event) | 3805 | struct perf_event *event) |
3801 | { | 3806 | { |
3802 | u64 enabled = 0, running = 0; | 3807 | u64 enabled = 0, running = 0, now; |
3803 | u64 read_format = event->attr.read_format; | 3808 | u64 read_format = event->attr.read_format; |
3804 | 3809 | ||
3805 | /* | 3810 | /* |
@@ -3812,7 +3817,7 @@ static void perf_output_read(struct perf_output_handle *handle, | |||
3812 | * NMI context | 3817 | * NMI context |
3813 | */ | 3818 | */ |
3814 | if (read_format & PERF_FORMAT_TOTAL_TIMES) | 3819 | if (read_format & PERF_FORMAT_TOTAL_TIMES) |
3815 | calc_timer_values(event, &enabled, &running); | 3820 | calc_timer_values(event, &now, &enabled, &running); |
3816 | 3821 | ||
3817 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3822 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3818 | perf_output_read_group(handle, event, enabled, running); | 3823 | perf_output_read_group(handle, event, enabled, running); |
@@ -5031,6 +5036,11 @@ static int perf_swevent_init(struct perf_event *event) | |||
5031 | return 0; | 5036 | return 0; |
5032 | } | 5037 | } |
5033 | 5038 | ||
5039 | static int perf_swevent_event_idx(struct perf_event *event) | ||
5040 | { | ||
5041 | return 0; | ||
5042 | } | ||
5043 | |||
5034 | static struct pmu perf_swevent = { | 5044 | static struct pmu perf_swevent = { |
5035 | .task_ctx_nr = perf_sw_context, | 5045 | .task_ctx_nr = perf_sw_context, |
5036 | 5046 | ||
@@ -5040,6 +5050,8 @@ static struct pmu perf_swevent = { | |||
5040 | .start = perf_swevent_start, | 5050 | .start = perf_swevent_start, |
5041 | .stop = perf_swevent_stop, | 5051 | .stop = perf_swevent_stop, |
5042 | .read = perf_swevent_read, | 5052 | .read = perf_swevent_read, |
5053 | |||
5054 | .event_idx = perf_swevent_event_idx, | ||
5043 | }; | 5055 | }; |
5044 | 5056 | ||
5045 | #ifdef CONFIG_EVENT_TRACING | 5057 | #ifdef CONFIG_EVENT_TRACING |
@@ -5126,6 +5138,8 @@ static struct pmu perf_tracepoint = { | |||
5126 | .start = perf_swevent_start, | 5138 | .start = perf_swevent_start, |
5127 | .stop = perf_swevent_stop, | 5139 | .stop = perf_swevent_stop, |
5128 | .read = perf_swevent_read, | 5140 | .read = perf_swevent_read, |
5141 | |||
5142 | .event_idx = perf_swevent_event_idx, | ||
5129 | }; | 5143 | }; |
5130 | 5144 | ||
5131 | static inline void perf_tp_register(void) | 5145 | static inline void perf_tp_register(void) |
@@ -5345,6 +5359,8 @@ static struct pmu perf_cpu_clock = { | |||
5345 | .start = cpu_clock_event_start, | 5359 | .start = cpu_clock_event_start, |
5346 | .stop = cpu_clock_event_stop, | 5360 | .stop = cpu_clock_event_stop, |
5347 | .read = cpu_clock_event_read, | 5361 | .read = cpu_clock_event_read, |
5362 | |||
5363 | .event_idx = perf_swevent_event_idx, | ||
5348 | }; | 5364 | }; |
5349 | 5365 | ||
5350 | /* | 5366 | /* |
@@ -5417,6 +5433,8 @@ static struct pmu perf_task_clock = { | |||
5417 | .start = task_clock_event_start, | 5433 | .start = task_clock_event_start, |
5418 | .stop = task_clock_event_stop, | 5434 | .stop = task_clock_event_stop, |
5419 | .read = task_clock_event_read, | 5435 | .read = task_clock_event_read, |
5436 | |||
5437 | .event_idx = perf_swevent_event_idx, | ||
5420 | }; | 5438 | }; |
5421 | 5439 | ||
5422 | static void perf_pmu_nop_void(struct pmu *pmu) | 5440 | static void perf_pmu_nop_void(struct pmu *pmu) |
@@ -5444,6 +5462,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
5444 | perf_pmu_enable(pmu); | 5462 | perf_pmu_enable(pmu); |
5445 | } | 5463 | } |
5446 | 5464 | ||
5465 | static int perf_event_idx_default(struct perf_event *event) | ||
5466 | { | ||
5467 | return event->hw.idx + 1; | ||
5468 | } | ||
5469 | |||
5447 | /* | 5470 | /* |
5448 | * Ensures all contexts with the same task_ctx_nr have the same | 5471 | * Ensures all contexts with the same task_ctx_nr have the same |
5449 | * pmu_cpu_context too. | 5472 | * pmu_cpu_context too. |
@@ -5530,6 +5553,7 @@ static int pmu_dev_alloc(struct pmu *pmu) | |||
5530 | if (!pmu->dev) | 5553 | if (!pmu->dev) |
5531 | goto out; | 5554 | goto out; |
5532 | 5555 | ||
5556 | pmu->dev->groups = pmu->attr_groups; | ||
5533 | device_initialize(pmu->dev); | 5557 | device_initialize(pmu->dev); |
5534 | ret = dev_set_name(pmu->dev, "%s", pmu->name); | 5558 | ret = dev_set_name(pmu->dev, "%s", pmu->name); |
5535 | if (ret) | 5559 | if (ret) |
@@ -5633,6 +5657,9 @@ got_cpu_context: | |||
5633 | pmu->pmu_disable = perf_pmu_nop_void; | 5657 | pmu->pmu_disable = perf_pmu_nop_void; |
5634 | } | 5658 | } |
5635 | 5659 | ||
5660 | if (!pmu->event_idx) | ||
5661 | pmu->event_idx = perf_event_idx_default; | ||
5662 | |||
5636 | list_add_rcu(&pmu->entry, &pmus); | 5663 | list_add_rcu(&pmu->entry, &pmus); |
5637 | ret = 0; | 5664 | ret = 0; |
5638 | unlock: | 5665 | unlock: |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index ee706ce44aa..3330022a7ac 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) | |||
613 | bp->hw.state = PERF_HES_STOPPED; | 613 | bp->hw.state = PERF_HES_STOPPED; |
614 | } | 614 | } |
615 | 615 | ||
616 | static int hw_breakpoint_event_idx(struct perf_event *bp) | ||
617 | { | ||
618 | return 0; | ||
619 | } | ||
620 | |||
616 | static struct pmu perf_breakpoint = { | 621 | static struct pmu perf_breakpoint = { |
617 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | 622 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
618 | 623 | ||
@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = { | |||
622 | .start = hw_breakpoint_start, | 627 | .start = hw_breakpoint_start, |
623 | .stop = hw_breakpoint_stop, | 628 | .stop = hw_breakpoint_stop, |
624 | .read = hw_breakpoint_pmu_read, | 629 | .read = hw_breakpoint_pmu_read, |
630 | |||
631 | .event_idx = hw_breakpoint_event_idx, | ||
625 | }; | 632 | }; |
626 | 633 | ||
627 | int __init init_hw_breakpoint(void) | 634 | int __init init_hw_breakpoint(void) |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c new file mode 100644 index 00000000000..e56e56aa753 --- /dev/null +++ b/kernel/events/uprobes.c | |||
@@ -0,0 +1,1029 @@ | |||
1 | /* | ||
2 | * User-space Probes (UProbes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2008-2012 | ||
19 | * Authors: | ||
20 | * Srikar Dronamraju | ||
21 | * Jim Keniston | ||
22 | * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
23 | */ | ||
24 | |||
25 | #include <linux/kernel.h> | ||
26 | #include <linux/highmem.h> | ||
27 | #include <linux/pagemap.h> /* read_mapping_page */ | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/rmap.h> /* anon_vma_prepare */ | ||
31 | #include <linux/mmu_notifier.h> /* set_pte_at_notify */ | ||
32 | #include <linux/swap.h> /* try_to_free_swap */ | ||
33 | |||
34 | #include <linux/uprobes.h> | ||
35 | |||
36 | static struct rb_root uprobes_tree = RB_ROOT; | ||
37 | |||
38 | static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ | ||
39 | |||
40 | #define UPROBES_HASH_SZ 13 | ||
41 | |||
42 | /* serialize (un)register */ | ||
43 | static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; | ||
44 | |||
45 | #define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | ||
46 | |||
47 | /* serialize uprobe->pending_list */ | ||
48 | static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; | ||
49 | #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) | ||
50 | |||
51 | /* | ||
52 | * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe | ||
53 | * events active at this time. Probably a fine grained per inode count is | ||
54 | * better? | ||
55 | */ | ||
56 | static atomic_t uprobe_events = ATOMIC_INIT(0); | ||
57 | |||
58 | /* | ||
59 | * Maintain a temporary per vma info that can be used to search if a vma | ||
60 | * has already been handled. This structure is introduced since extending | ||
61 | * vm_area_struct wasnt recommended. | ||
62 | */ | ||
63 | struct vma_info { | ||
64 | struct list_head probe_list; | ||
65 | struct mm_struct *mm; | ||
66 | loff_t vaddr; | ||
67 | }; | ||
68 | |||
69 | struct uprobe { | ||
70 | struct rb_node rb_node; /* node in the rb tree */ | ||
71 | atomic_t ref; | ||
72 | struct rw_semaphore consumer_rwsem; | ||
73 | struct list_head pending_list; | ||
74 | struct uprobe_consumer *consumers; | ||
75 | struct inode *inode; /* Also hold a ref to inode */ | ||
76 | loff_t offset; | ||
77 | int flags; | ||
78 | struct arch_uprobe arch; | ||
79 | }; | ||
80 | |||
81 | /* | ||
82 | * valid_vma: Verify if the specified vma is an executable vma | ||
83 | * Relax restrictions while unregistering: vm_flags might have | ||
84 | * changed after breakpoint was inserted. | ||
85 | * - is_register: indicates if we are in register context. | ||
86 | * - Return 1 if the specified virtual address is in an | ||
87 | * executable vma. | ||
88 | */ | ||
89 | static bool valid_vma(struct vm_area_struct *vma, bool is_register) | ||
90 | { | ||
91 | if (!vma->vm_file) | ||
92 | return false; | ||
93 | |||
94 | if (!is_register) | ||
95 | return true; | ||
96 | |||
97 | if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) | ||
98 | return true; | ||
99 | |||
100 | return false; | ||
101 | } | ||
102 | |||
103 | static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | ||
104 | { | ||
105 | loff_t vaddr; | ||
106 | |||
107 | vaddr = vma->vm_start + offset; | ||
108 | vaddr -= vma->vm_pgoff << PAGE_SHIFT; | ||
109 | |||
110 | return vaddr; | ||
111 | } | ||
112 | |||
113 | /** | ||
114 | * __replace_page - replace page in vma by new page. | ||
115 | * based on replace_page in mm/ksm.c | ||
116 | * | ||
117 | * @vma: vma that holds the pte pointing to page | ||
118 | * @page: the cowed page we are replacing by kpage | ||
119 | * @kpage: the modified page we replace page by | ||
120 | * | ||
121 | * Returns 0 on success, -EFAULT on failure. | ||
122 | */ | ||
123 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | ||
124 | { | ||
125 | struct mm_struct *mm = vma->vm_mm; | ||
126 | pgd_t *pgd; | ||
127 | pud_t *pud; | ||
128 | pmd_t *pmd; | ||
129 | pte_t *ptep; | ||
130 | spinlock_t *ptl; | ||
131 | unsigned long addr; | ||
132 | int err = -EFAULT; | ||
133 | |||
134 | addr = page_address_in_vma(page, vma); | ||
135 | if (addr == -EFAULT) | ||
136 | goto out; | ||
137 | |||
138 | pgd = pgd_offset(mm, addr); | ||
139 | if (!pgd_present(*pgd)) | ||
140 | goto out; | ||
141 | |||
142 | pud = pud_offset(pgd, addr); | ||
143 | if (!pud_present(*pud)) | ||
144 | goto out; | ||
145 | |||
146 | pmd = pmd_offset(pud, addr); | ||
147 | if (!pmd_present(*pmd)) | ||
148 | goto out; | ||
149 | |||
150 | ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); | ||
151 | if (!ptep) | ||
152 | goto out; | ||
153 | |||
154 | get_page(kpage); | ||
155 | page_add_new_anon_rmap(kpage, vma, addr); | ||
156 | |||
157 | flush_cache_page(vma, addr, pte_pfn(*ptep)); | ||
158 | ptep_clear_flush(vma, addr, ptep); | ||
159 | set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); | ||
160 | |||
161 | page_remove_rmap(page); | ||
162 | if (!page_mapped(page)) | ||
163 | try_to_free_swap(page); | ||
164 | put_page(page); | ||
165 | pte_unmap_unlock(ptep, ptl); | ||
166 | err = 0; | ||
167 | |||
168 | out: | ||
169 | return err; | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * is_swbp_insn - check if instruction is breakpoint instruction. | ||
174 | * @insn: instruction to be checked. | ||
175 | * Default implementation of is_swbp_insn | ||
176 | * Returns true if @insn is a breakpoint instruction. | ||
177 | */ | ||
178 | bool __weak is_swbp_insn(uprobe_opcode_t *insn) | ||
179 | { | ||
180 | return *insn == UPROBE_SWBP_INSN; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * NOTE: | ||
185 | * Expect the breakpoint instruction to be the smallest size instruction for | ||
186 | * the architecture. If an arch has variable length instruction and the | ||
187 | * breakpoint instruction is not of the smallest length instruction | ||
188 | * supported by that architecture then we need to modify read_opcode / | ||
189 | * write_opcode accordingly. This would never be a problem for archs that | ||
190 | * have fixed length instructions. | ||
191 | */ | ||
192 | |||
193 | /* | ||
194 | * write_opcode - write the opcode at a given virtual address. | ||
195 | * @auprobe: arch breakpointing information. | ||
196 | * @mm: the probed process address space. | ||
197 | * @vaddr: the virtual address to store the opcode. | ||
198 | * @opcode: opcode to be written at @vaddr. | ||
199 | * | ||
200 | * Called with mm->mmap_sem held (for read and with a reference to | ||
201 | * mm). | ||
202 | * | ||
203 | * For mm @mm, write the opcode at @vaddr. | ||
204 | * Return 0 (success) or a negative errno. | ||
205 | */ | ||
206 | static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | ||
207 | unsigned long vaddr, uprobe_opcode_t opcode) | ||
208 | { | ||
209 | struct page *old_page, *new_page; | ||
210 | struct address_space *mapping; | ||
211 | void *vaddr_old, *vaddr_new; | ||
212 | struct vm_area_struct *vma; | ||
213 | struct uprobe *uprobe; | ||
214 | loff_t addr; | ||
215 | int ret; | ||
216 | |||
217 | /* Read the page with vaddr into memory */ | ||
218 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | ||
219 | if (ret <= 0) | ||
220 | return ret; | ||
221 | |||
222 | ret = -EINVAL; | ||
223 | |||
224 | /* | ||
225 | * We are interested in text pages only. Our pages of interest | ||
226 | * should be mapped for read and execute only. We desist from | ||
227 | * adding probes in write mapped pages since the breakpoints | ||
228 | * might end up in the file copy. | ||
229 | */ | ||
230 | if (!valid_vma(vma, is_swbp_insn(&opcode))) | ||
231 | goto put_out; | ||
232 | |||
233 | uprobe = container_of(auprobe, struct uprobe, arch); | ||
234 | mapping = uprobe->inode->i_mapping; | ||
235 | if (mapping != vma->vm_file->f_mapping) | ||
236 | goto put_out; | ||
237 | |||
238 | addr = vma_address(vma, uprobe->offset); | ||
239 | if (vaddr != (unsigned long)addr) | ||
240 | goto put_out; | ||
241 | |||
242 | ret = -ENOMEM; | ||
243 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | ||
244 | if (!new_page) | ||
245 | goto put_out; | ||
246 | |||
247 | __SetPageUptodate(new_page); | ||
248 | |||
249 | /* | ||
250 | * lock page will serialize against do_wp_page()'s | ||
251 | * PageAnon() handling | ||
252 | */ | ||
253 | lock_page(old_page); | ||
254 | /* copy the page now that we've got it stable */ | ||
255 | vaddr_old = kmap_atomic(old_page); | ||
256 | vaddr_new = kmap_atomic(new_page); | ||
257 | |||
258 | memcpy(vaddr_new, vaddr_old, PAGE_SIZE); | ||
259 | |||
260 | /* poke the new insn in, ASSUMES we don't cross page boundary */ | ||
261 | vaddr &= ~PAGE_MASK; | ||
262 | BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); | ||
263 | memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | ||
264 | |||
265 | kunmap_atomic(vaddr_new); | ||
266 | kunmap_atomic(vaddr_old); | ||
267 | |||
268 | ret = anon_vma_prepare(vma); | ||
269 | if (ret) | ||
270 | goto unlock_out; | ||
271 | |||
272 | lock_page(new_page); | ||
273 | ret = __replace_page(vma, old_page, new_page); | ||
274 | unlock_page(new_page); | ||
275 | |||
276 | unlock_out: | ||
277 | unlock_page(old_page); | ||
278 | page_cache_release(new_page); | ||
279 | |||
280 | put_out: | ||
281 | put_page(old_page); | ||
282 | |||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | /** | ||
287 | * read_opcode - read the opcode at a given virtual address. | ||
288 | * @mm: the probed process address space. | ||
289 | * @vaddr: the virtual address to read the opcode. | ||
290 | * @opcode: location to store the read opcode. | ||
291 | * | ||
292 | * Called with mm->mmap_sem held (for read and with a reference to | ||
293 | * mm. | ||
294 | * | ||
295 | * For mm @mm, read the opcode at @vaddr and store it in @opcode. | ||
296 | * Return 0 (success) or a negative errno. | ||
297 | */ | ||
298 | static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode) | ||
299 | { | ||
300 | struct page *page; | ||
301 | void *vaddr_new; | ||
302 | int ret; | ||
303 | |||
304 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); | ||
305 | if (ret <= 0) | ||
306 | return ret; | ||
307 | |||
308 | lock_page(page); | ||
309 | vaddr_new = kmap_atomic(page); | ||
310 | vaddr &= ~PAGE_MASK; | ||
311 | memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); | ||
312 | kunmap_atomic(vaddr_new); | ||
313 | unlock_page(page); | ||
314 | |||
315 | put_page(page); | ||
316 | |||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) | ||
321 | { | ||
322 | uprobe_opcode_t opcode; | ||
323 | int result; | ||
324 | |||
325 | result = read_opcode(mm, vaddr, &opcode); | ||
326 | if (result) | ||
327 | return result; | ||
328 | |||
329 | if (is_swbp_insn(&opcode)) | ||
330 | return 1; | ||
331 | |||
332 | return 0; | ||
333 | } | ||
334 | |||
335 | /** | ||
336 | * set_swbp - store breakpoint at a given address. | ||
337 | * @auprobe: arch specific probepoint information. | ||
338 | * @mm: the probed process address space. | ||
339 | * @vaddr: the virtual address to insert the opcode. | ||
340 | * | ||
341 | * For mm @mm, store the breakpoint instruction at @vaddr. | ||
342 | * Return 0 (success) or a negative errno. | ||
343 | */ | ||
344 | int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) | ||
345 | { | ||
346 | int result; | ||
347 | |||
348 | result = is_swbp_at_addr(mm, vaddr); | ||
349 | if (result == 1) | ||
350 | return -EEXIST; | ||
351 | |||
352 | if (result) | ||
353 | return result; | ||
354 | |||
355 | return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); | ||
356 | } | ||
357 | |||
358 | /** | ||
359 | * set_orig_insn - Restore the original instruction. | ||
360 | * @mm: the probed process address space. | ||
361 | * @auprobe: arch specific probepoint information. | ||
362 | * @vaddr: the virtual address to insert the opcode. | ||
363 | * @verify: if true, verify existance of breakpoint instruction. | ||
364 | * | ||
365 | * For mm @mm, restore the original opcode (opcode) at @vaddr. | ||
366 | * Return 0 (success) or a negative errno. | ||
367 | */ | ||
368 | int __weak | ||
369 | set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) | ||
370 | { | ||
371 | if (verify) { | ||
372 | int result; | ||
373 | |||
374 | result = is_swbp_at_addr(mm, vaddr); | ||
375 | if (!result) | ||
376 | return -EINVAL; | ||
377 | |||
378 | if (result != 1) | ||
379 | return result; | ||
380 | } | ||
381 | return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); | ||
382 | } | ||
383 | |||
384 | static int match_uprobe(struct uprobe *l, struct uprobe *r) | ||
385 | { | ||
386 | if (l->inode < r->inode) | ||
387 | return -1; | ||
388 | |||
389 | if (l->inode > r->inode) | ||
390 | return 1; | ||
391 | |||
392 | if (l->offset < r->offset) | ||
393 | return -1; | ||
394 | |||
395 | if (l->offset > r->offset) | ||
396 | return 1; | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | |||
401 | static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset) | ||
402 | { | ||
403 | struct uprobe u = { .inode = inode, .offset = offset }; | ||
404 | struct rb_node *n = uprobes_tree.rb_node; | ||
405 | struct uprobe *uprobe; | ||
406 | int match; | ||
407 | |||
408 | while (n) { | ||
409 | uprobe = rb_entry(n, struct uprobe, rb_node); | ||
410 | match = match_uprobe(&u, uprobe); | ||
411 | if (!match) { | ||
412 | atomic_inc(&uprobe->ref); | ||
413 | return uprobe; | ||
414 | } | ||
415 | |||
416 | if (match < 0) | ||
417 | n = n->rb_left; | ||
418 | else | ||
419 | n = n->rb_right; | ||
420 | } | ||
421 | return NULL; | ||
422 | } | ||
423 | |||
424 | /* | ||
425 | * Find a uprobe corresponding to a given inode:offset | ||
426 | * Acquires uprobes_treelock | ||
427 | */ | ||
428 | static struct uprobe *find_uprobe(struct inode *inode, loff_t offset) | ||
429 | { | ||
430 | struct uprobe *uprobe; | ||
431 | unsigned long flags; | ||
432 | |||
433 | spin_lock_irqsave(&uprobes_treelock, flags); | ||
434 | uprobe = __find_uprobe(inode, offset); | ||
435 | spin_unlock_irqrestore(&uprobes_treelock, flags); | ||
436 | |||
437 | return uprobe; | ||
438 | } | ||
439 | |||
440 | static struct uprobe *__insert_uprobe(struct uprobe *uprobe) | ||
441 | { | ||
442 | struct rb_node **p = &uprobes_tree.rb_node; | ||
443 | struct rb_node *parent = NULL; | ||
444 | struct uprobe *u; | ||
445 | int match; | ||
446 | |||
447 | while (*p) { | ||
448 | parent = *p; | ||
449 | u = rb_entry(parent, struct uprobe, rb_node); | ||
450 | match = match_uprobe(uprobe, u); | ||
451 | if (!match) { | ||
452 | atomic_inc(&u->ref); | ||
453 | return u; | ||
454 | } | ||
455 | |||
456 | if (match < 0) | ||
457 | p = &parent->rb_left; | ||
458 | else | ||
459 | p = &parent->rb_right; | ||
460 | |||
461 | } | ||
462 | |||
463 | u = NULL; | ||
464 | rb_link_node(&uprobe->rb_node, parent, p); | ||
465 | rb_insert_color(&uprobe->rb_node, &uprobes_tree); | ||
466 | /* get access + creation ref */ | ||
467 | atomic_set(&uprobe->ref, 2); | ||
468 | |||
469 | return u; | ||
470 | } | ||
471 | |||
472 | /* | ||
473 | * Acquire uprobes_treelock. | ||
474 | * Matching uprobe already exists in rbtree; | ||
475 | * increment (access refcount) and return the matching uprobe. | ||
476 | * | ||
477 | * No matching uprobe; insert the uprobe in rb_tree; | ||
478 | * get a double refcount (access + creation) and return NULL. | ||
479 | */ | ||
480 | static struct uprobe *insert_uprobe(struct uprobe *uprobe) | ||
481 | { | ||
482 | unsigned long flags; | ||
483 | struct uprobe *u; | ||
484 | |||
485 | spin_lock_irqsave(&uprobes_treelock, flags); | ||
486 | u = __insert_uprobe(uprobe); | ||
487 | spin_unlock_irqrestore(&uprobes_treelock, flags); | ||
488 | |||
489 | return u; | ||
490 | } | ||
491 | |||
492 | static void put_uprobe(struct uprobe *uprobe) | ||
493 | { | ||
494 | if (atomic_dec_and_test(&uprobe->ref)) | ||
495 | kfree(uprobe); | ||
496 | } | ||
497 | |||
498 | static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) | ||
499 | { | ||
500 | struct uprobe *uprobe, *cur_uprobe; | ||
501 | |||
502 | uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL); | ||
503 | if (!uprobe) | ||
504 | return NULL; | ||
505 | |||
506 | uprobe->inode = igrab(inode); | ||
507 | uprobe->offset = offset; | ||
508 | init_rwsem(&uprobe->consumer_rwsem); | ||
509 | INIT_LIST_HEAD(&uprobe->pending_list); | ||
510 | |||
511 | /* add to uprobes_tree, sorted on inode:offset */ | ||
512 | cur_uprobe = insert_uprobe(uprobe); | ||
513 | |||
514 | /* a uprobe exists for this inode:offset combination */ | ||
515 | if (cur_uprobe) { | ||
516 | kfree(uprobe); | ||
517 | uprobe = cur_uprobe; | ||
518 | iput(inode); | ||
519 | } else { | ||
520 | atomic_inc(&uprobe_events); | ||
521 | } | ||
522 | |||
523 | return uprobe; | ||
524 | } | ||
525 | |||
526 | /* Returns the previous consumer */ | ||
527 | static struct uprobe_consumer * | ||
528 | consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) | ||
529 | { | ||
530 | down_write(&uprobe->consumer_rwsem); | ||
531 | uc->next = uprobe->consumers; | ||
532 | uprobe->consumers = uc; | ||
533 | up_write(&uprobe->consumer_rwsem); | ||
534 | |||
535 | return uc->next; | ||
536 | } | ||
537 | |||
538 | /* | ||
539 | * For uprobe @uprobe, delete the consumer @uc. | ||
540 | * Return true if the @uc is deleted successfully | ||
541 | * or return false. | ||
542 | */ | ||
543 | static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) | ||
544 | { | ||
545 | struct uprobe_consumer **con; | ||
546 | bool ret = false; | ||
547 | |||
548 | down_write(&uprobe->consumer_rwsem); | ||
549 | for (con = &uprobe->consumers; *con; con = &(*con)->next) { | ||
550 | if (*con == uc) { | ||
551 | *con = uc->next; | ||
552 | ret = true; | ||
553 | break; | ||
554 | } | ||
555 | } | ||
556 | up_write(&uprobe->consumer_rwsem); | ||
557 | |||
558 | return ret; | ||
559 | } | ||
560 | |||
561 | static int | ||
562 | __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, | ||
563 | unsigned long nbytes, unsigned long offset) | ||
564 | { | ||
565 | struct file *filp = vma->vm_file; | ||
566 | struct page *page; | ||
567 | void *vaddr; | ||
568 | unsigned long off1; | ||
569 | unsigned long idx; | ||
570 | |||
571 | if (!filp) | ||
572 | return -EINVAL; | ||
573 | |||
574 | idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); | ||
575 | off1 = offset &= ~PAGE_MASK; | ||
576 | |||
577 | /* | ||
578 | * Ensure that the page that has the original instruction is | ||
579 | * populated and in page-cache. | ||
580 | */ | ||
581 | page = read_mapping_page(mapping, idx, filp); | ||
582 | if (IS_ERR(page)) | ||
583 | return PTR_ERR(page); | ||
584 | |||
585 | vaddr = kmap_atomic(page); | ||
586 | memcpy(insn, vaddr + off1, nbytes); | ||
587 | kunmap_atomic(vaddr); | ||
588 | page_cache_release(page); | ||
589 | |||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | static int | ||
594 | copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) | ||
595 | { | ||
596 | struct address_space *mapping; | ||
597 | unsigned long nbytes; | ||
598 | int bytes; | ||
599 | |||
600 | addr &= ~PAGE_MASK; | ||
601 | nbytes = PAGE_SIZE - addr; | ||
602 | mapping = uprobe->inode->i_mapping; | ||
603 | |||
604 | /* Instruction at end of binary; copy only available bytes */ | ||
605 | if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size) | ||
606 | bytes = uprobe->inode->i_size - uprobe->offset; | ||
607 | else | ||
608 | bytes = MAX_UINSN_BYTES; | ||
609 | |||
610 | /* Instruction at the page-boundary; copy bytes in second page */ | ||
611 | if (nbytes < bytes) { | ||
612 | if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, | ||
613 | bytes - nbytes, uprobe->offset + nbytes)) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | bytes = nbytes; | ||
617 | } | ||
618 | return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); | ||
619 | } | ||
620 | |||
621 | static int | ||
622 | install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, | ||
623 | struct vm_area_struct *vma, loff_t vaddr) | ||
624 | { | ||
625 | unsigned long addr; | ||
626 | int ret; | ||
627 | |||
628 | /* | ||
629 | * If probe is being deleted, unregister thread could be done with | ||
630 | * the vma-rmap-walk through. Adding a probe now can be fatal since | ||
631 | * nobody will be able to cleanup. Also we could be from fork or | ||
632 | * mremap path, where the probe might have already been inserted. | ||
633 | * Hence behave as if probe already existed. | ||
634 | */ | ||
635 | if (!uprobe->consumers) | ||
636 | return -EEXIST; | ||
637 | |||
638 | addr = (unsigned long)vaddr; | ||
639 | |||
640 | if (!(uprobe->flags & UPROBE_COPY_INSN)) { | ||
641 | ret = copy_insn(uprobe, vma, addr); | ||
642 | if (ret) | ||
643 | return ret; | ||
644 | |||
645 | if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) | ||
646 | return -EEXIST; | ||
647 | |||
648 | ret = arch_uprobes_analyze_insn(&uprobe->arch, mm); | ||
649 | if (ret) | ||
650 | return ret; | ||
651 | |||
652 | uprobe->flags |= UPROBE_COPY_INSN; | ||
653 | } | ||
654 | ret = set_swbp(&uprobe->arch, mm, addr); | ||
655 | |||
656 | return ret; | ||
657 | } | ||
658 | |||
659 | static void | ||
660 | remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) | ||
661 | { | ||
662 | set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true); | ||
663 | } | ||
664 | |||
665 | static void delete_uprobe(struct uprobe *uprobe) | ||
666 | { | ||
667 | unsigned long flags; | ||
668 | |||
669 | spin_lock_irqsave(&uprobes_treelock, flags); | ||
670 | rb_erase(&uprobe->rb_node, &uprobes_tree); | ||
671 | spin_unlock_irqrestore(&uprobes_treelock, flags); | ||
672 | iput(uprobe->inode); | ||
673 | put_uprobe(uprobe); | ||
674 | atomic_dec(&uprobe_events); | ||
675 | } | ||
676 | |||
677 | static struct vma_info * | ||
678 | __find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
679 | struct vma_info *vi, loff_t offset, bool is_register) | ||
680 | { | ||
681 | struct prio_tree_iter iter; | ||
682 | struct vm_area_struct *vma; | ||
683 | struct vma_info *tmpvi; | ||
684 | unsigned long pgoff; | ||
685 | int existing_vma; | ||
686 | loff_t vaddr; | ||
687 | |||
688 | pgoff = offset >> PAGE_SHIFT; | ||
689 | |||
690 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | ||
691 | if (!valid_vma(vma, is_register)) | ||
692 | continue; | ||
693 | |||
694 | existing_vma = 0; | ||
695 | vaddr = vma_address(vma, offset); | ||
696 | |||
697 | list_for_each_entry(tmpvi, head, probe_list) { | ||
698 | if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { | ||
699 | existing_vma = 1; | ||
700 | break; | ||
701 | } | ||
702 | } | ||
703 | |||
704 | /* | ||
705 | * Another vma needs a probe to be installed. However skip | ||
706 | * installing the probe if the vma is about to be unlinked. | ||
707 | */ | ||
708 | if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { | ||
709 | vi->mm = vma->vm_mm; | ||
710 | vi->vaddr = vaddr; | ||
711 | list_add(&vi->probe_list, head); | ||
712 | |||
713 | return vi; | ||
714 | } | ||
715 | } | ||
716 | |||
717 | return NULL; | ||
718 | } | ||
719 | |||
720 | /* | ||
721 | * Iterate in the rmap prio tree and find a vma where a probe has not | ||
722 | * yet been inserted. | ||
723 | */ | ||
724 | static struct vma_info * | ||
725 | find_next_vma_info(struct address_space *mapping, struct list_head *head, | ||
726 | loff_t offset, bool is_register) | ||
727 | { | ||
728 | struct vma_info *vi, *retvi; | ||
729 | |||
730 | vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); | ||
731 | if (!vi) | ||
732 | return ERR_PTR(-ENOMEM); | ||
733 | |||
734 | mutex_lock(&mapping->i_mmap_mutex); | ||
735 | retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); | ||
736 | mutex_unlock(&mapping->i_mmap_mutex); | ||
737 | |||
738 | if (!retvi) | ||
739 | kfree(vi); | ||
740 | |||
741 | return retvi; | ||
742 | } | ||
743 | |||
744 | static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | ||
745 | { | ||
746 | struct list_head try_list; | ||
747 | struct vm_area_struct *vma; | ||
748 | struct address_space *mapping; | ||
749 | struct vma_info *vi, *tmpvi; | ||
750 | struct mm_struct *mm; | ||
751 | loff_t vaddr; | ||
752 | int ret; | ||
753 | |||
754 | mapping = uprobe->inode->i_mapping; | ||
755 | INIT_LIST_HEAD(&try_list); | ||
756 | |||
757 | ret = 0; | ||
758 | |||
759 | for (;;) { | ||
760 | vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); | ||
761 | if (!vi) | ||
762 | break; | ||
763 | |||
764 | if (IS_ERR(vi)) { | ||
765 | ret = PTR_ERR(vi); | ||
766 | break; | ||
767 | } | ||
768 | |||
769 | mm = vi->mm; | ||
770 | down_read(&mm->mmap_sem); | ||
771 | vma = find_vma(mm, (unsigned long)vi->vaddr); | ||
772 | if (!vma || !valid_vma(vma, is_register)) { | ||
773 | list_del(&vi->probe_list); | ||
774 | kfree(vi); | ||
775 | up_read(&mm->mmap_sem); | ||
776 | mmput(mm); | ||
777 | continue; | ||
778 | } | ||
779 | vaddr = vma_address(vma, uprobe->offset); | ||
780 | if (vma->vm_file->f_mapping->host != uprobe->inode || | ||
781 | vaddr != vi->vaddr) { | ||
782 | list_del(&vi->probe_list); | ||
783 | kfree(vi); | ||
784 | up_read(&mm->mmap_sem); | ||
785 | mmput(mm); | ||
786 | continue; | ||
787 | } | ||
788 | |||
789 | if (is_register) | ||
790 | ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); | ||
791 | else | ||
792 | remove_breakpoint(uprobe, mm, vi->vaddr); | ||
793 | |||
794 | up_read(&mm->mmap_sem); | ||
795 | mmput(mm); | ||
796 | if (is_register) { | ||
797 | if (ret && ret == -EEXIST) | ||
798 | ret = 0; | ||
799 | if (ret) | ||
800 | break; | ||
801 | } | ||
802 | } | ||
803 | |||
804 | list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { | ||
805 | list_del(&vi->probe_list); | ||
806 | kfree(vi); | ||
807 | } | ||
808 | |||
809 | return ret; | ||
810 | } | ||
811 | |||
812 | static int __uprobe_register(struct uprobe *uprobe) | ||
813 | { | ||
814 | return register_for_each_vma(uprobe, true); | ||
815 | } | ||
816 | |||
817 | static void __uprobe_unregister(struct uprobe *uprobe) | ||
818 | { | ||
819 | if (!register_for_each_vma(uprobe, false)) | ||
820 | delete_uprobe(uprobe); | ||
821 | |||
822 | /* TODO : cant unregister? schedule a worker thread */ | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * uprobe_register - register a probe | ||
827 | * @inode: the file in which the probe has to be placed. | ||
828 | * @offset: offset from the start of the file. | ||
829 | * @uc: information on howto handle the probe.. | ||
830 | * | ||
831 | * Apart from the access refcount, uprobe_register() takes a creation | ||
832 | * refcount (thro alloc_uprobe) if and only if this @uprobe is getting | ||
833 | * inserted into the rbtree (i.e first consumer for a @inode:@offset | ||
834 | * tuple). Creation refcount stops uprobe_unregister from freeing the | ||
835 | * @uprobe even before the register operation is complete. Creation | ||
836 | * refcount is released when the last @uc for the @uprobe | ||
837 | * unregisters. | ||
838 | * | ||
839 | * Return errno if it cannot successully install probes | ||
840 | * else return 0 (success) | ||
841 | */ | ||
842 | int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) | ||
843 | { | ||
844 | struct uprobe *uprobe; | ||
845 | int ret; | ||
846 | |||
847 | if (!inode || !uc || uc->next) | ||
848 | return -EINVAL; | ||
849 | |||
850 | if (offset > i_size_read(inode)) | ||
851 | return -EINVAL; | ||
852 | |||
853 | ret = 0; | ||
854 | mutex_lock(uprobes_hash(inode)); | ||
855 | uprobe = alloc_uprobe(inode, offset); | ||
856 | |||
857 | if (uprobe && !consumer_add(uprobe, uc)) { | ||
858 | ret = __uprobe_register(uprobe); | ||
859 | if (ret) { | ||
860 | uprobe->consumers = NULL; | ||
861 | __uprobe_unregister(uprobe); | ||
862 | } else { | ||
863 | uprobe->flags |= UPROBE_RUN_HANDLER; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | mutex_unlock(uprobes_hash(inode)); | ||
868 | put_uprobe(uprobe); | ||
869 | |||
870 | return ret; | ||
871 | } | ||
872 | |||
873 | /* | ||
874 | * uprobe_unregister - unregister a already registered probe. | ||
875 | * @inode: the file in which the probe has to be removed. | ||
876 | * @offset: offset from the start of the file. | ||
877 | * @uc: identify which probe if multiple probes are colocated. | ||
878 | */ | ||
879 | void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) | ||
880 | { | ||
881 | struct uprobe *uprobe; | ||
882 | |||
883 | if (!inode || !uc) | ||
884 | return; | ||
885 | |||
886 | uprobe = find_uprobe(inode, offset); | ||
887 | if (!uprobe) | ||
888 | return; | ||
889 | |||
890 | mutex_lock(uprobes_hash(inode)); | ||
891 | |||
892 | if (consumer_del(uprobe, uc)) { | ||
893 | if (!uprobe->consumers) { | ||
894 | __uprobe_unregister(uprobe); | ||
895 | uprobe->flags &= ~UPROBE_RUN_HANDLER; | ||
896 | } | ||
897 | } | ||
898 | |||
899 | mutex_unlock(uprobes_hash(inode)); | ||
900 | if (uprobe) | ||
901 | put_uprobe(uprobe); | ||
902 | } | ||
903 | |||
904 | /* | ||
905 | * Of all the nodes that correspond to the given inode, return the node | ||
906 | * with the least offset. | ||
907 | */ | ||
908 | static struct rb_node *find_least_offset_node(struct inode *inode) | ||
909 | { | ||
910 | struct uprobe u = { .inode = inode, .offset = 0}; | ||
911 | struct rb_node *n = uprobes_tree.rb_node; | ||
912 | struct rb_node *close_node = NULL; | ||
913 | struct uprobe *uprobe; | ||
914 | int match; | ||
915 | |||
916 | while (n) { | ||
917 | uprobe = rb_entry(n, struct uprobe, rb_node); | ||
918 | match = match_uprobe(&u, uprobe); | ||
919 | |||
920 | if (uprobe->inode == inode) | ||
921 | close_node = n; | ||
922 | |||
923 | if (!match) | ||
924 | return close_node; | ||
925 | |||
926 | if (match < 0) | ||
927 | n = n->rb_left; | ||
928 | else | ||
929 | n = n->rb_right; | ||
930 | } | ||
931 | |||
932 | return close_node; | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * For a given inode, build a list of probes that need to be inserted. | ||
937 | */ | ||
938 | static void build_probe_list(struct inode *inode, struct list_head *head) | ||
939 | { | ||
940 | struct uprobe *uprobe; | ||
941 | unsigned long flags; | ||
942 | struct rb_node *n; | ||
943 | |||
944 | spin_lock_irqsave(&uprobes_treelock, flags); | ||
945 | |||
946 | n = find_least_offset_node(inode); | ||
947 | |||
948 | for (; n; n = rb_next(n)) { | ||
949 | uprobe = rb_entry(n, struct uprobe, rb_node); | ||
950 | if (uprobe->inode != inode) | ||
951 | break; | ||
952 | |||
953 | list_add(&uprobe->pending_list, head); | ||
954 | atomic_inc(&uprobe->ref); | ||
955 | } | ||
956 | |||
957 | spin_unlock_irqrestore(&uprobes_treelock, flags); | ||
958 | } | ||
959 | |||
960 | /* | ||
961 | * Called from mmap_region. | ||
962 | * called with mm->mmap_sem acquired. | ||
963 | * | ||
964 | * Return -ve no if we fail to insert probes and we cannot | ||
965 | * bail-out. | ||
966 | * Return 0 otherwise. i.e: | ||
967 | * | ||
968 | * - successful insertion of probes | ||
969 | * - (or) no possible probes to be inserted. | ||
970 | * - (or) insertion of probes failed but we can bail-out. | ||
971 | */ | ||
972 | int uprobe_mmap(struct vm_area_struct *vma) | ||
973 | { | ||
974 | struct list_head tmp_list; | ||
975 | struct uprobe *uprobe, *u; | ||
976 | struct inode *inode; | ||
977 | int ret; | ||
978 | |||
979 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) | ||
980 | return 0; | ||
981 | |||
982 | inode = vma->vm_file->f_mapping->host; | ||
983 | if (!inode) | ||
984 | return 0; | ||
985 | |||
986 | INIT_LIST_HEAD(&tmp_list); | ||
987 | mutex_lock(uprobes_mmap_hash(inode)); | ||
988 | build_probe_list(inode, &tmp_list); | ||
989 | |||
990 | ret = 0; | ||
991 | |||
992 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { | ||
993 | loff_t vaddr; | ||
994 | |||
995 | list_del(&uprobe->pending_list); | ||
996 | if (!ret) { | ||
997 | vaddr = vma_address(vma, uprobe->offset); | ||
998 | if (vaddr >= vma->vm_start && vaddr < vma->vm_end) { | ||
999 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | ||
1000 | /* Ignore double add: */ | ||
1001 | if (ret == -EEXIST) | ||
1002 | ret = 0; | ||
1003 | } | ||
1004 | } | ||
1005 | put_uprobe(uprobe); | ||
1006 | } | ||
1007 | |||
1008 | mutex_unlock(uprobes_mmap_hash(inode)); | ||
1009 | |||
1010 | return ret; | ||
1011 | } | ||
1012 | |||
1013 | static int __init init_uprobes(void) | ||
1014 | { | ||
1015 | int i; | ||
1016 | |||
1017 | for (i = 0; i < UPROBES_HASH_SZ; i++) { | ||
1018 | mutex_init(&uprobes_mutex[i]); | ||
1019 | mutex_init(&uprobes_mmap_mutex[i]); | ||
1020 | } | ||
1021 | return 0; | ||
1022 | } | ||
1023 | |||
1024 | static void __exit exit_uprobes(void) | ||
1025 | { | ||
1026 | } | ||
1027 | |||
1028 | module_init(init_uprobes); | ||
1029 | module_exit(exit_uprobes); | ||
diff --git a/kernel/signal.c b/kernel/signal.c index c73c4284160..8511e39813c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1054 | struct sigpending *pending; | 1054 | struct sigpending *pending; |
1055 | struct sigqueue *q; | 1055 | struct sigqueue *q; |
1056 | int override_rlimit; | 1056 | int override_rlimit; |
1057 | 1057 | int ret = 0, result; | |
1058 | trace_signal_generate(sig, info, t); | ||
1059 | 1058 | ||
1060 | assert_spin_locked(&t->sighand->siglock); | 1059 | assert_spin_locked(&t->sighand->siglock); |
1061 | 1060 | ||
1061 | result = TRACE_SIGNAL_IGNORED; | ||
1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) | 1062 | if (!prepare_signal(sig, t, from_ancestor_ns)) |
1063 | return 0; | 1063 | goto ret; |
1064 | 1064 | ||
1065 | pending = group ? &t->signal->shared_pending : &t->pending; | 1065 | pending = group ? &t->signal->shared_pending : &t->pending; |
1066 | /* | 1066 | /* |
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1068 | * exactly one non-rt signal, so that we can get more | 1068 | * exactly one non-rt signal, so that we can get more |
1069 | * detailed information about the cause of the signal. | 1069 | * detailed information about the cause of the signal. |
1070 | */ | 1070 | */ |
1071 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
1071 | if (legacy_queue(pending, sig)) | 1072 | if (legacy_queue(pending, sig)) |
1072 | return 0; | 1073 | goto ret; |
1074 | |||
1075 | result = TRACE_SIGNAL_DELIVERED; | ||
1073 | /* | 1076 | /* |
1074 | * fast-pathed signals for kernel-internal things like SIGSTOP | 1077 | * fast-pathed signals for kernel-internal things like SIGSTOP |
1075 | * or SIGKILL. | 1078 | * or SIGKILL. |
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, | |||
1127 | * signal was rt and sent by user using something | 1130 | * signal was rt and sent by user using something |
1128 | * other than kill(). | 1131 | * other than kill(). |
1129 | */ | 1132 | */ |
1130 | trace_signal_overflow_fail(sig, group, info); | 1133 | result = TRACE_SIGNAL_OVERFLOW_FAIL; |
1131 | return -EAGAIN; | 1134 | ret = -EAGAIN; |
1135 | goto ret; | ||
1132 | } else { | 1136 | } else { |
1133 | /* | 1137 | /* |
1134 | * This is a silent loss of information. We still | 1138 | * This is a silent loss of information. We still |
1135 | * send the signal, but the *info bits are lost. | 1139 | * send the signal, but the *info bits are lost. |
1136 | */ | 1140 | */ |
1137 | trace_signal_lose_info(sig, group, info); | 1141 | result = TRACE_SIGNAL_LOSE_INFO; |
1138 | } | 1142 | } |
1139 | } | 1143 | } |
1140 | 1144 | ||
@@ -1142,7 +1146,9 @@ out_set: | |||
1142 | signalfd_notify(t, sig); | 1146 | signalfd_notify(t, sig); |
1143 | sigaddset(&pending->signal, sig); | 1147 | sigaddset(&pending->signal, sig); |
1144 | complete_signal(sig, t, group); | 1148 | complete_signal(sig, t, group); |
1145 | return 0; | 1149 | ret: |
1150 | trace_signal_generate(sig, info, t, group, result); | ||
1151 | return ret; | ||
1146 | } | 1152 | } |
1147 | 1153 | ||
1148 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, | 1154 | static int send_signal(int sig, struct siginfo *info, struct task_struct *t, |
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1585 | int sig = q->info.si_signo; | 1591 | int sig = q->info.si_signo; |
1586 | struct sigpending *pending; | 1592 | struct sigpending *pending; |
1587 | unsigned long flags; | 1593 | unsigned long flags; |
1588 | int ret; | 1594 | int ret, result; |
1589 | 1595 | ||
1590 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1596 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
1591 | 1597 | ||
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1594 | goto ret; | 1600 | goto ret; |
1595 | 1601 | ||
1596 | ret = 1; /* the signal is ignored */ | 1602 | ret = 1; /* the signal is ignored */ |
1603 | result = TRACE_SIGNAL_IGNORED; | ||
1597 | if (!prepare_signal(sig, t, 0)) | 1604 | if (!prepare_signal(sig, t, 0)) |
1598 | goto out; | 1605 | goto out; |
1599 | 1606 | ||
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1605 | */ | 1612 | */ |
1606 | BUG_ON(q->info.si_code != SI_TIMER); | 1613 | BUG_ON(q->info.si_code != SI_TIMER); |
1607 | q->info.si_overrun++; | 1614 | q->info.si_overrun++; |
1615 | result = TRACE_SIGNAL_ALREADY_PENDING; | ||
1608 | goto out; | 1616 | goto out; |
1609 | } | 1617 | } |
1610 | q->info.si_overrun = 0; | 1618 | q->info.si_overrun = 0; |
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | |||
1614 | list_add_tail(&q->list, &pending->list); | 1622 | list_add_tail(&q->list, &pending->list); |
1615 | sigaddset(&pending->signal, sig); | 1623 | sigaddset(&pending->signal, sig); |
1616 | complete_signal(sig, t, group); | 1624 | complete_signal(sig, t, group); |
1625 | result = TRACE_SIGNAL_DELIVERED; | ||
1617 | out: | 1626 | out: |
1627 | trace_signal_generate(sig, &q->info, t, group, result); | ||
1618 | unlock_task_sighand(t, &flags); | 1628 | unlock_task_sighand(t, &flags); |
1619 | ret: | 1629 | ret: |
1620 | return ret; | 1630 | return ret; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d117262deba..14bc092fb12 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -3,12 +3,9 @@ | |||
3 | * | 3 | * |
4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. | 4 | * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. |
5 | * | 5 | * |
6 | * this code detects hard lockups: incidents in where on a CPU | 6 | * Note: Most of this code is borrowed heavily from the original softlockup |
7 | * the kernel does not respond to anything except NMI. | 7 | * detector, so thanks to Ingo for the initial implementation. |
8 | * | 8 | * Some chunks also taken from the old x86-specific nmi watchdog code, thanks |
9 | * Note: Most of this code is borrowed heavily from softlockup.c, | ||
10 | * so thanks to Ingo for the initial implementation. | ||
11 | * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks | ||
12 | * to those contributors as well. | 9 | * to those contributors as well. |
13 | */ | 10 | */ |
14 | 11 | ||
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void) | |||
117 | { | 114 | { |
118 | /* | 115 | /* |
119 | * convert watchdog_thresh from seconds to ns | 116 | * convert watchdog_thresh from seconds to ns |
120 | * the divide by 5 is to give hrtimer 5 chances to | 117 | * the divide by 5 is to give hrtimer several chances (two |
121 | * increment before the hardlockup detector generates | 118 | * or three with the current relation between the soft |
122 | * a warning | 119 | * and hard thresholds) to increment before the |
120 | * hardlockup detector generates a warning | ||
123 | */ | 121 | */ |
124 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); | 122 | return get_softlockup_thresh() * (NSEC_PER_SEC / 5); |
125 | } | 123 | } |
@@ -336,9 +334,11 @@ static int watchdog(void *unused) | |||
336 | 334 | ||
337 | set_current_state(TASK_INTERRUPTIBLE); | 335 | set_current_state(TASK_INTERRUPTIBLE); |
338 | /* | 336 | /* |
339 | * Run briefly once per second to reset the softlockup timestamp. | 337 | * Run briefly (kicked by the hrtimer callback function) once every |
340 | * If this gets delayed for more than 60 seconds then the | 338 | * get_sample_period() seconds (4 seconds by default) to reset the |
341 | * debug-printout triggers in watchdog_timer_fn(). | 339 | * softlockup timestamp. If this gets delayed for more than |
340 | * 2*watchdog_thresh seconds then the debug-printout triggers in | ||
341 | * watchdog_timer_fn(). | ||
342 | */ | 342 | */ |
343 | while (!kthread_should_stop()) { | 343 | while (!kthread_should_stop()) { |
344 | __touch_watchdog(); | 344 | __touch_watchdog(); |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8745ac7d1f7..9739c0b45e9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR | |||
166 | hard and soft lockups. | 166 | hard and soft lockups. |
167 | 167 | ||
168 | Softlockups are bugs that cause the kernel to loop in kernel | 168 | Softlockups are bugs that cause the kernel to loop in kernel |
169 | mode for more than 60 seconds, without giving other tasks a | 169 | mode for more than 20 seconds, without giving other tasks a |
170 | chance to run. The current stack trace is displayed upon | 170 | chance to run. The current stack trace is displayed upon |
171 | detection and the system will stay locked up. | 171 | detection and the system will stay locked up. |
172 | 172 | ||
173 | Hardlockups are bugs that cause the CPU to loop in kernel mode | 173 | Hardlockups are bugs that cause the CPU to loop in kernel mode |
174 | for more than 60 seconds, without letting other interrupts have a | 174 | for more than 10 seconds, without letting other interrupts have a |
175 | chance to run. The current stack trace is displayed upon detection | 175 | chance to run. The current stack trace is displayed upon detection |
176 | and the system will stay locked up. | 176 | and the system will stay locked up. |
177 | 177 | ||
178 | The overhead should be minimal. A periodic hrtimer runs to | 178 | The overhead should be minimal. A periodic hrtimer runs to |
179 | generate interrupts and kick the watchdog task every 10-12 seconds. | 179 | generate interrupts and kick the watchdog task every 4 seconds. |
180 | An NMI is generated every 60 seconds or so to check for hardlockups. | 180 | An NMI is generated every 10 seconds or so to check for hardlockups. |
181 | |||
182 | The frequency of hrtimer and NMI events and the soft and hard lockup | ||
183 | thresholds can be controlled through the sysctl watchdog_thresh. | ||
181 | 184 | ||
182 | config HARDLOCKUP_DETECTOR | 185 | config HARDLOCKUP_DETECTOR |
183 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ | 186 | def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \ |
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC | |||
189 | help | 192 | help |
190 | Say Y here to enable the kernel to panic on "hard lockups", | 193 | Say Y here to enable the kernel to panic on "hard lockups", |
191 | which are bugs that cause the kernel to loop in kernel | 194 | which are bugs that cause the kernel to loop in kernel |
192 | mode with interrupts disabled for more than 60 seconds. | 195 | mode with interrupts disabled for more than 10 seconds (configurable |
196 | using the watchdog_thresh sysctl). | ||
193 | 197 | ||
194 | Say N if unsure. | 198 | Say N if unsure. |
195 | 199 | ||
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC | |||
206 | help | 210 | help |
207 | Say Y here to enable the kernel to panic on "soft lockups", | 211 | Say Y here to enable the kernel to panic on "soft lockups", |
208 | which are bugs that cause the kernel to loop in kernel | 212 | which are bugs that cause the kernel to loop in kernel |
209 | mode for more than 60 seconds, without giving other tasks a | 213 | mode for more than 20 seconds (configurable using the watchdog_thresh |
210 | chance to run. | 214 | sysctl), without giving other tasks a chance to run. |
211 | 215 | ||
212 | The panic can be used in combination with panic_timeout, | 216 | The panic can be used in combination with panic_timeout, |
213 | to cause the system to reboot automatically after a | 217 | to cause the system to reboot automatically after a |
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/audit.h> | 31 | #include <linux/audit.h> |
32 | #include <linux/khugepaged.h> | 32 | #include <linux/khugepaged.h> |
33 | #include <linux/uprobes.h> | ||
33 | 34 | ||
34 | #include <asm/uaccess.h> | 35 | #include <asm/uaccess.h> |
35 | #include <asm/cacheflush.h> | 36 | #include <asm/cacheflush.h> |
@@ -616,6 +617,13 @@ again: remove_next = 1 + (end > next->vm_end); | |||
616 | if (mapping) | 617 | if (mapping) |
617 | mutex_unlock(&mapping->i_mmap_mutex); | 618 | mutex_unlock(&mapping->i_mmap_mutex); |
618 | 619 | ||
620 | if (root) { | ||
621 | uprobe_mmap(vma); | ||
622 | |||
623 | if (adjust_next) | ||
624 | uprobe_mmap(next); | ||
625 | } | ||
626 | |||
619 | if (remove_next) { | 627 | if (remove_next) { |
620 | if (file) { | 628 | if (file) { |
621 | fput(file); | 629 | fput(file); |
@@ -637,6 +645,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
637 | goto again; | 645 | goto again; |
638 | } | 646 | } |
639 | } | 647 | } |
648 | if (insert && file) | ||
649 | uprobe_mmap(insert); | ||
640 | 650 | ||
641 | validate_mm(mm); | 651 | validate_mm(mm); |
642 | 652 | ||
@@ -1329,6 +1339,11 @@ out: | |||
1329 | mm->locked_vm += (len >> PAGE_SHIFT); | 1339 | mm->locked_vm += (len >> PAGE_SHIFT); |
1330 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) | 1340 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) |
1331 | make_pages_present(addr, addr + len); | 1341 | make_pages_present(addr, addr + len); |
1342 | |||
1343 | if (file && uprobe_mmap(vma)) | ||
1344 | /* matching probes but cannot insert */ | ||
1345 | goto unmap_and_free_vma; | ||
1346 | |||
1332 | return addr; | 1347 | return addr; |
1333 | 1348 | ||
1334 | unmap_and_free_vma: | 1349 | unmap_and_free_vma: |
@@ -2285,6 +2300,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) | |||
2285 | if ((vma->vm_flags & VM_ACCOUNT) && | 2300 | if ((vma->vm_flags & VM_ACCOUNT) && |
2286 | security_vm_enough_memory_mm(mm, vma_pages(vma))) | 2301 | security_vm_enough_memory_mm(mm, vma_pages(vma))) |
2287 | return -ENOMEM; | 2302 | return -ENOMEM; |
2303 | |||
2304 | if (vma->vm_file && uprobe_mmap(vma)) | ||
2305 | return -EINVAL; | ||
2306 | |||
2288 | vma_link(mm, vma, prev, rb_link, rb_parent); | 2307 | vma_link(mm, vma, prev, rb_link, rb_parent); |
2289 | return 0; | 2308 | return 0; |
2290 | } | 2309 | } |
@@ -2354,6 +2373,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2354 | new_vma->vm_pgoff = pgoff; | 2373 | new_vma->vm_pgoff = pgoff; |
2355 | if (new_vma->vm_file) { | 2374 | if (new_vma->vm_file) { |
2356 | get_file(new_vma->vm_file); | 2375 | get_file(new_vma->vm_file); |
2376 | |||
2377 | if (uprobe_mmap(new_vma)) | ||
2378 | goto out_free_mempol; | ||
2379 | |||
2357 | if (vma->vm_flags & VM_EXECUTABLE) | 2380 | if (vma->vm_flags & VM_EXECUTABLE) |
2358 | added_exe_file_vma(mm); | 2381 | added_exe_file_vma(mm); |
2359 | } | 2382 | } |
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 4626a398836..ca600e09c8d 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile | |||
@@ -1,3 +1,10 @@ | |||
1 | OUTPUT := ./ | ||
2 | ifeq ("$(origin O)", "command line") | ||
3 | ifneq ($(O),) | ||
4 | OUTPUT := $(O)/ | ||
5 | endif | ||
6 | endif | ||
7 | |||
1 | MAN1_TXT= \ | 8 | MAN1_TXT= \ |
2 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ | 9 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ |
3 | $(wildcard perf-*.txt)) \ | 10 | $(wildcard perf-*.txt)) \ |
@@ -6,10 +13,11 @@ MAN5_TXT= | |||
6 | MAN7_TXT= | 13 | MAN7_TXT= |
7 | 14 | ||
8 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) | 15 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) |
9 | MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) | 16 | _MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) |
10 | MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) | 17 | _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) |
11 | 18 | ||
12 | DOC_HTML=$(MAN_HTML) | 19 | MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) |
20 | MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) | ||
13 | 21 | ||
14 | ARTICLES = | 22 | ARTICLES = |
15 | # with their own formatting rules. | 23 | # with their own formatting rules. |
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica | |||
18 | SP_ARTICLES += $(API_DOCS) | 26 | SP_ARTICLES += $(API_DOCS) |
19 | SP_ARTICLES += technical/api-index | 27 | SP_ARTICLES += technical/api-index |
20 | 28 | ||
21 | DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | 29 | _DOC_HTML = $(_MAN_HTML) |
30 | _DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | ||
31 | DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML)) | ||
22 | 32 | ||
23 | DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) | 33 | _DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) |
24 | DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) | 34 | _DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) |
25 | DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) | 35 | _DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) |
36 | |||
37 | DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) | ||
38 | DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) | ||
39 | DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7)) | ||
26 | 40 | ||
27 | # Make the path relative to DESTDIR, not prefix | 41 | # Make the path relative to DESTDIR, not prefix |
28 | ifndef DESTDIR | 42 | ifndef DESTDIR |
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1) | |||
150 | man5: $(DOC_MAN5) | 164 | man5: $(DOC_MAN5) |
151 | man7: $(DOC_MAN7) | 165 | man7: $(DOC_MAN7) |
152 | 166 | ||
153 | info: perf.info perfman.info | 167 | info: $(OUTPUT)perf.info $(OUTPUT)perfman.info |
154 | 168 | ||
155 | pdf: user-manual.pdf | 169 | pdf: $(OUTPUT)user-manual.pdf |
156 | 170 | ||
157 | install: install-man | 171 | install: install-man |
158 | 172 | ||
@@ -166,7 +180,7 @@ install-man: man | |||
166 | 180 | ||
167 | install-info: info | 181 | install-info: info |
168 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) | 182 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) |
169 | $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) | 183 | $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir) |
170 | if test -r $(DESTDIR)$(infodir)/dir; then \ | 184 | if test -r $(DESTDIR)$(infodir)/dir; then \ |
171 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ | 185 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ |
172 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ | 186 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ |
@@ -176,7 +190,7 @@ install-info: info | |||
176 | 190 | ||
177 | install-pdf: pdf | 191 | install-pdf: pdf |
178 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) | 192 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) |
179 | $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) | 193 | $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir) |
180 | 194 | ||
181 | #install-html: html | 195 | #install-html: html |
182 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) | 196 | # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) |
@@ -189,14 +203,14 @@ install-pdf: pdf | |||
189 | # | 203 | # |
190 | # Determine "include::" file references in asciidoc files. | 204 | # Determine "include::" file references in asciidoc files. |
191 | # | 205 | # |
192 | doc.dep : $(wildcard *.txt) build-docdep.perl | 206 | $(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl |
193 | $(QUIET_GEN)$(RM) $@+ $@ && \ | 207 | $(QUIET_GEN)$(RM) $@+ $@ && \ |
194 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ | 208 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ |
195 | mv $@+ $@ | 209 | mv $@+ $@ |
196 | 210 | ||
197 | -include doc.dep | 211 | -include $(OUPTUT)doc.dep |
198 | 212 | ||
199 | cmds_txt = cmds-ancillaryinterrogators.txt \ | 213 | _cmds_txt = cmds-ancillaryinterrogators.txt \ |
200 | cmds-ancillarymanipulators.txt \ | 214 | cmds-ancillarymanipulators.txt \ |
201 | cmds-mainporcelain.txt \ | 215 | cmds-mainporcelain.txt \ |
202 | cmds-plumbinginterrogators.txt \ | 216 | cmds-plumbinginterrogators.txt \ |
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \ | |||
205 | cmds-synchelpers.txt \ | 219 | cmds-synchelpers.txt \ |
206 | cmds-purehelpers.txt \ | 220 | cmds-purehelpers.txt \ |
207 | cmds-foreignscminterface.txt | 221 | cmds-foreignscminterface.txt |
222 | cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt)) | ||
208 | 223 | ||
209 | $(cmds_txt): cmd-list.made | 224 | $(cmds_txt): $(OUTPUT)cmd-list.made |
210 | 225 | ||
211 | cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) | 226 | $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) |
212 | $(QUIET_GEN)$(RM) $@ && \ | 227 | $(QUIET_GEN)$(RM) $@ && \ |
213 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ | 228 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ |
214 | date >$@ | 229 | date >$@ |
215 | 230 | ||
216 | clean: | 231 | clean: |
217 | $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 | 232 | $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML)) |
218 | $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info | 233 | $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) |
219 | $(RM) howto-index.txt howto/*.html doc.dep | 234 | $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) |
220 | $(RM) technical/api-*.html technical/api-index.txt | 235 | $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ |
221 | $(RM) $(cmds_txt) *.made | 236 | $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info |
222 | 237 | $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep | |
223 | $(MAN_HTML): %.html : %.txt | 238 | $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt |
239 | $(RM) $(cmds_txt) $(OUTPUT)*.made | ||
240 | |||
241 | $(MAN_HTML): $(OUTPUT)%.html : %.txt | ||
224 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 242 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
225 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ | 243 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ |
226 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 244 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
227 | mv $@+ $@ | 245 | mv $@+ $@ |
228 | 246 | ||
229 | %.1 %.5 %.7 : %.xml | 247 | $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml |
230 | $(QUIET_XMLTO)$(RM) $@ && \ | 248 | $(QUIET_XMLTO)$(RM) $@ && \ |
231 | xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< | 249 | xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< |
232 | 250 | ||
233 | %.xml : %.txt | 251 | $(OUTPUT)%.xml : %.txt |
234 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | 252 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ |
235 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ | 253 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ |
236 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | 254 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ |
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt | |||
239 | XSLT = docbook.xsl | 257 | XSLT = docbook.xsl |
240 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css | 258 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css |
241 | 259 | ||
242 | user-manual.html: user-manual.xml | 260 | $(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml |
243 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< | 261 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< |
244 | 262 | ||
245 | perf.info: user-manual.texi | 263 | $(OUTPUT)perf.info: $(OUTPUT)user-manual.texi |
246 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi | 264 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi |
247 | 265 | ||
248 | user-manual.texi: user-manual.xml | 266 | $(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml |
249 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 267 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
250 | $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ | 268 | $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ |
251 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ | 269 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ |
252 | rm $@++ && \ | 270 | rm $@++ && \ |
253 | mv $@+ $@ | 271 | mv $@+ $@ |
254 | 272 | ||
255 | user-manual.pdf: user-manual.xml | 273 | $(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml |
256 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ | 274 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ |
257 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ | 275 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ |
258 | mv $@+ $@ | 276 | mv $@+ $@ |
259 | 277 | ||
260 | perfman.texi: $(MAN_XML) cat-texi.perl | 278 | $(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl |
261 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | 279 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ |
262 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ | 280 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ |
263 | --to-stdout $(xml) &&) true) > $@++ && \ | 281 | --to-stdout $(xml) &&) true) > $@++ && \ |
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl | |||
265 | rm $@++ && \ | 283 | rm $@++ && \ |
266 | mv $@+ $@ | 284 | mv $@+ $@ |
267 | 285 | ||
268 | perfman.info: perfman.texi | 286 | $(OUTPUT)perfman.info: $(OUTPUT)perfman.texi |
269 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi | 287 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi |
270 | 288 | ||
271 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml | 289 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml |
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index d6b2a4f2108..c7f5f55634a 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt | |||
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events | |||
8 | SYNOPSIS | 8 | SYNOPSIS |
9 | -------- | 9 | -------- |
10 | [verse] | 10 | [verse] |
11 | 'perf lock' {record|report|trace} | 11 | 'perf lock' {record|report|script|info} |
12 | 12 | ||
13 | DESCRIPTION | 13 | DESCRIPTION |
14 | ----------- | 14 | ----------- |
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command. | |||
20 | produces the file "perf.data" which contains tracing | 20 | produces the file "perf.data" which contains tracing |
21 | results of lock events. | 21 | results of lock events. |
22 | 22 | ||
23 | 'perf lock trace' shows raw lock events. | ||
24 | |||
25 | 'perf lock report' reports statistical data. | 23 | 'perf lock report' reports statistical data. |
26 | 24 | ||
25 | 'perf lock script' shows raw lock events. | ||
26 | |||
27 | 'perf lock info' shows metadata like threads or addresses | ||
28 | of lock instances. | ||
29 | |||
27 | COMMON OPTIONS | 30 | COMMON OPTIONS |
28 | -------------- | 31 | -------------- |
29 | 32 | ||
@@ -47,6 +50,17 @@ REPORT OPTIONS | |||
47 | Sorting key. Possible values: acquired (default), contended, | 50 | Sorting key. Possible values: acquired (default), contended, |
48 | wait_total, wait_max, wait_min. | 51 | wait_total, wait_max, wait_min. |
49 | 52 | ||
53 | INFO OPTIONS | ||
54 | ------------ | ||
55 | |||
56 | -t:: | ||
57 | --threads:: | ||
58 | dump thread list in perf.data | ||
59 | |||
60 | -m:: | ||
61 | --map:: | ||
62 | dump map of lock instances (address:name table) | ||
63 | |||
50 | SEE ALSO | 64 | SEE ALSO |
51 | -------- | 65 | -------- |
52 | linkperf:perf[1] | 66 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2937f7e14bb..a5766b4b012 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -52,11 +52,15 @@ OPTIONS | |||
52 | 52 | ||
53 | -p:: | 53 | -p:: |
54 | --pid=:: | 54 | --pid=:: |
55 | Record events on existing process ID. | 55 | Record events on existing process ID (comma separated list). |
56 | 56 | ||
57 | -t:: | 57 | -t:: |
58 | --tid=:: | 58 | --tid=:: |
59 | Record events on existing thread ID. | 59 | Record events on existing thread ID (comma separated list). |
60 | |||
61 | -u:: | ||
62 | --uid=:: | ||
63 | Record events in threads owned by uid. Name or number. | ||
60 | 64 | ||
61 | -r:: | 65 | -r:: |
62 | --realtime=:: | 66 | --realtime=:: |
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2f6cef43da2..e9cbfcddfa3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
@@ -115,7 +115,7 @@ OPTIONS | |||
115 | -f:: | 115 | -f:: |
116 | --fields:: | 116 | --fields:: |
117 | Comma separated list of fields to print. Options are: | 117 | Comma separated list of fields to print. Options are: |
118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr. | 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff. |
119 | Field list can be prepended with the type, trace, sw or hw, | 119 | Field list can be prepended with the type, trace, sw or hw, |
120 | to indicate to which event type the field list applies. | 120 | to indicate to which event type the field list applies. |
121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace | 121 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace |
@@ -200,6 +200,9 @@ OPTIONS | |||
200 | It currently includes: cpu and numa topology of the host system. | 200 | It currently includes: cpu and numa topology of the host system. |
201 | It can only be used with the perf script report mode. | 201 | It can only be used with the perf script report mode. |
202 | 202 | ||
203 | --show-kernel-path:: | ||
204 | Try to resolve the path of [kernel.kallsyms] | ||
205 | |||
203 | SEE ALSO | 206 | SEE ALSO |
204 | -------- | 207 | -------- |
205 | linkperf:perf-record[1], linkperf:perf-script-perl[1], | 208 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 8966b9ab201..2fa173b5197 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -35,11 +35,11 @@ OPTIONS | |||
35 | child tasks do not inherit counters | 35 | child tasks do not inherit counters |
36 | -p:: | 36 | -p:: |
37 | --pid=<pid>:: | 37 | --pid=<pid>:: |
38 | stat events on existing process id | 38 | stat events on existing process id (comma separated list) |
39 | 39 | ||
40 | -t:: | 40 | -t:: |
41 | --tid=<tid>:: | 41 | --tid=<tid>:: |
42 | stat events on existing thread id | 42 | stat events on existing thread id (comma separated list) |
43 | 43 | ||
44 | 44 | ||
45 | -a:: | 45 | -a:: |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b1a5bbbfebe..4a5680cb242 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -72,11 +72,15 @@ Default is to monitor all CPUS. | |||
72 | 72 | ||
73 | -p <pid>:: | 73 | -p <pid>:: |
74 | --pid=<pid>:: | 74 | --pid=<pid>:: |
75 | Profile events on existing Process ID. | 75 | Profile events on existing Process ID (comma separated list). |
76 | 76 | ||
77 | -t <tid>:: | 77 | -t <tid>:: |
78 | --tid=<tid>:: | 78 | --tid=<tid>:: |
79 | Profile events on existing thread ID. | 79 | Profile events on existing thread ID (comma separated list). |
80 | |||
81 | -u:: | ||
82 | --uid=:: | ||
83 | Record events in threads owned by uid. Name or number. | ||
80 | 84 | ||
81 | -r <priority>:: | 85 | -r <priority>:: |
82 | --realtime=<priority>:: | 86 | --realtime=<priority>:: |
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 1078c5fadd5..5476bc0a1ea 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -9,6 +9,7 @@ lib/rbtree.c | |||
9 | include/linux/swab.h | 9 | include/linux/swab.h |
10 | arch/*/include/asm/unistd*.h | 10 | arch/*/include/asm/unistd*.h |
11 | arch/*/lib/memcpy*.S | 11 | arch/*/lib/memcpy*.S |
12 | arch/*/lib/memset*.S | ||
12 | include/linux/poison.h | 13 | include/linux/poison.h |
13 | include/linux/magic.h | 14 | include/linux/magic.h |
14 | include/linux/hw_breakpoint.h | 15 | include/linux/hw_breakpoint.h |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7c12650165a..e011b5060f9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64) | |||
61 | ifeq (${IS_X86_64}, 1) | 61 | ifeq (${IS_X86_64}, 1) |
62 | RAW_ARCH := x86_64 | 62 | RAW_ARCH := x86_64 |
63 | ARCH_CFLAGS := -DARCH_X86_64 | 63 | ARCH_CFLAGS := -DARCH_X86_64 |
64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S | 64 | ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S |
65 | endif | 65 | endif |
66 | endif | 66 | endif |
67 | 67 | ||
@@ -183,7 +183,10 @@ SCRIPT_SH += perf-archive.sh | |||
183 | grep-libs = $(filter -l%,$(1)) | 183 | grep-libs = $(filter -l%,$(1)) |
184 | strip-libs = $(filter-out -l%,$(1)) | 184 | strip-libs = $(filter-out -l%,$(1)) |
185 | 185 | ||
186 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) | 186 | PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) |
187 | PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py | ||
188 | |||
189 | $(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) | ||
187 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ | 190 | $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ |
188 | --quiet build_ext; \ | 191 | --quiet build_ext; \ |
189 | mkdir -p $(OUTPUT)python && \ | 192 | mkdir -p $(OUTPUT)python && \ |
@@ -256,6 +259,7 @@ LIB_H += util/callchain.h | |||
256 | LIB_H += util/build-id.h | 259 | LIB_H += util/build-id.h |
257 | LIB_H += util/debug.h | 260 | LIB_H += util/debug.h |
258 | LIB_H += util/debugfs.h | 261 | LIB_H += util/debugfs.h |
262 | LIB_H += util/sysfs.h | ||
259 | LIB_H += util/event.h | 263 | LIB_H += util/event.h |
260 | LIB_H += util/evsel.h | 264 | LIB_H += util/evsel.h |
261 | LIB_H += util/evlist.h | 265 | LIB_H += util/evlist.h |
@@ -302,6 +306,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o | |||
302 | LIB_OBJS += $(OUTPUT)util/config.o | 306 | LIB_OBJS += $(OUTPUT)util/config.o |
303 | LIB_OBJS += $(OUTPUT)util/ctype.o | 307 | LIB_OBJS += $(OUTPUT)util/ctype.o |
304 | LIB_OBJS += $(OUTPUT)util/debugfs.o | 308 | LIB_OBJS += $(OUTPUT)util/debugfs.o |
309 | LIB_OBJS += $(OUTPUT)util/sysfs.o | ||
305 | LIB_OBJS += $(OUTPUT)util/environment.o | 310 | LIB_OBJS += $(OUTPUT)util/environment.o |
306 | LIB_OBJS += $(OUTPUT)util/event.o | 311 | LIB_OBJS += $(OUTPUT)util/event.o |
307 | LIB_OBJS += $(OUTPUT)util/evlist.o | 312 | LIB_OBJS += $(OUTPUT)util/evlist.o |
@@ -359,8 +364,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o | |||
359 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o | 364 | BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o |
360 | ifeq ($(RAW_ARCH),x86_64) | 365 | ifeq ($(RAW_ARCH),x86_64) |
361 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o | 366 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o |
367 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o | ||
362 | endif | 368 | endif |
363 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o | 369 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o |
370 | BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o | ||
364 | 371 | ||
365 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o | 372 | BUILTIN_OBJS += $(OUTPUT)builtin-diff.o |
366 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o | 373 | BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o |
@@ -792,7 +799,6 @@ help: | |||
792 | @echo ' quick-install-html - install the html documentation quickly' | 799 | @echo ' quick-install-html - install the html documentation quickly' |
793 | @echo '' | 800 | @echo '' |
794 | @echo 'Perf maintainer targets:' | 801 | @echo 'Perf maintainer targets:' |
795 | @echo ' distclean - alias to clean' | ||
796 | @echo ' clean - clean all binary objects and build output' | 802 | @echo ' clean - clean all binary objects and build output' |
797 | 803 | ||
798 | doc: | 804 | doc: |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c..a09bece6dad 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
@@ -4,6 +4,7 @@ | |||
4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); | 4 | extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); |
5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); | 5 | extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); |
6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); | 6 | extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); |
7 | extern int bench_mem_memset(int argc, const char **argv, const char *prefix); | ||
7 | 8 | ||
8 | #define BENCH_FORMAT_DEFAULT_STR "default" | 9 | #define BENCH_FORMAT_DEFAULT_STR "default" |
9 | #define BENCH_FORMAT_DEFAULT 0 | 10 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696f..d66ab799b35 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h | |||
@@ -2,3 +2,11 @@ | |||
2 | MEMCPY_FN(__memcpy, | 2 | MEMCPY_FN(__memcpy, |
3 | "x86-64-unrolled", | 3 | "x86-64-unrolled", |
4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") | 4 | "unrolled memcpy() in arch/x86/lib/memcpy_64.S") |
5 | |||
6 | MEMCPY_FN(memcpy_c, | ||
7 | "x86-64-movsq", | ||
8 | "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
9 | |||
10 | MEMCPY_FN(memcpy_c_e, | ||
11 | "x86-64-movsb", | ||
12 | "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") | ||
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 185a96d66dd..fcd9cf00600 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S | |||
@@ -1,4 +1,8 @@ | |||
1 | 1 | #define memcpy MEMCPY /* don't hide glibc's memcpy() */ | |
2 | #define altinstr_replacement text | ||
3 | #define globl p2align 4; .globl | ||
4 | #define Lmemcpy_c globl memcpy_c; memcpy_c | ||
5 | #define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e | ||
2 | #include "../../../arch/x86/lib/memcpy_64.S" | 6 | #include "../../../arch/x86/lib/memcpy_64.S" |
3 | /* | 7 | /* |
4 | * We need to provide note.GNU-stack section, saying that we want | 8 | * We need to provide note.GNU-stack section, saying that we want |
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b9..71557225bf9 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c | |||
@@ -5,7 +5,6 @@ | |||
5 | * | 5 | * |
6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> | 6 | * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> |
7 | */ | 7 | */ |
8 | #include <ctype.h> | ||
9 | 8 | ||
10 | #include "../perf.h" | 9 | #include "../perf.h" |
11 | #include "../util/util.h" | 10 | #include "../util/util.h" |
@@ -24,6 +23,7 @@ | |||
24 | 23 | ||
25 | static const char *length_str = "1MB"; | 24 | static const char *length_str = "1MB"; |
26 | static const char *routine = "default"; | 25 | static const char *routine = "default"; |
26 | static int iterations = 1; | ||
27 | static bool use_clock; | 27 | static bool use_clock; |
28 | static int clock_fd; | 28 | static int clock_fd; |
29 | static bool only_prefault; | 29 | static bool only_prefault; |
@@ -35,6 +35,8 @@ static const struct option options[] = { | |||
35 | "available unit: B, MB, GB (upper and lower)"), | 35 | "available unit: B, MB, GB (upper and lower)"), |
36 | OPT_STRING('r', "routine", &routine, "default", | 36 | OPT_STRING('r', "routine", &routine, "default", |
37 | "Specify routine to copy"), | 37 | "Specify routine to copy"), |
38 | OPT_INTEGER('i', "iterations", &iterations, | ||
39 | "repeat memcpy() invocation this number of times"), | ||
38 | OPT_BOOLEAN('c', "clock", &use_clock, | 40 | OPT_BOOLEAN('c', "clock", &use_clock, |
39 | "Use CPU clock for measuring"), | 41 | "Use CPU clock for measuring"), |
40 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | 42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, |
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
121 | { | 123 | { |
122 | u64 clock_start = 0ULL, clock_end = 0ULL; | 124 | u64 clock_start = 0ULL, clock_end = 0ULL; |
123 | void *src = NULL, *dst = NULL; | 125 | void *src = NULL, *dst = NULL; |
126 | int i; | ||
124 | 127 | ||
125 | alloc_mem(&src, &dst, len); | 128 | alloc_mem(&src, &dst, len); |
126 | 129 | ||
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) | |||
128 | fn(dst, src, len); | 131 | fn(dst, src, len); |
129 | 132 | ||
130 | clock_start = get_clock(); | 133 | clock_start = get_clock(); |
131 | fn(dst, src, len); | 134 | for (i = 0; i < iterations; ++i) |
135 | fn(dst, src, len); | ||
132 | clock_end = get_clock(); | 136 | clock_end = get_clock(); |
133 | 137 | ||
134 | free(src); | 138 | free(src); |
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
140 | { | 144 | { |
141 | struct timeval tv_start, tv_end, tv_diff; | 145 | struct timeval tv_start, tv_end, tv_diff; |
142 | void *src = NULL, *dst = NULL; | 146 | void *src = NULL, *dst = NULL; |
147 | int i; | ||
143 | 148 | ||
144 | alloc_mem(&src, &dst, len); | 149 | alloc_mem(&src, &dst, len); |
145 | 150 | ||
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) | |||
147 | fn(dst, src, len); | 152 | fn(dst, src, len); |
148 | 153 | ||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | 154 | BUG_ON(gettimeofday(&tv_start, NULL)); |
150 | fn(dst, src, len); | 155 | for (i = 0; i < iterations; ++i) |
156 | fn(dst, src, len); | ||
151 | BUG_ON(gettimeofday(&tv_end, NULL)); | 157 | BUG_ON(gettimeofday(&tv_end, NULL)); |
152 | 158 | ||
153 | timersub(&tv_end, &tv_start, &tv_diff); | 159 | timersub(&tv_end, &tv_start, &tv_diff); |
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 00000000000..a040fa77665 --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | #ifdef ARCH_X86_64 | ||
3 | |||
4 | #define MEMSET_FN(fn, name, desc) \ | ||
5 | extern void *fn(void *, int, size_t); | ||
6 | |||
7 | #include "mem-memset-x86-64-asm-def.h" | ||
8 | |||
9 | #undef MEMSET_FN | ||
10 | |||
11 | #endif | ||
12 | |||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 00000000000..a71dff97c1f --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h | |||
@@ -0,0 +1,12 @@ | |||
1 | |||
2 | MEMSET_FN(__memset, | ||
3 | "x86-64-unrolled", | ||
4 | "unrolled memset() in arch/x86/lib/memset_64.S") | ||
5 | |||
6 | MEMSET_FN(memset_c, | ||
7 | "x86-64-stosq", | ||
8 | "movsq-based memset() in arch/x86/lib/memset_64.S") | ||
9 | |||
10 | MEMSET_FN(memset_c_e, | ||
11 | "x86-64-stosb", | ||
12 | "movsb-based memset() in arch/x86/lib/memset_64.S") | ||
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 00000000000..9e5af89ed13 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S | |||
@@ -0,0 +1,13 @@ | |||
1 | #define memset MEMSET /* don't hide glibc's memset() */ | ||
2 | #define altinstr_replacement text | ||
3 | #define globl p2align 4; .globl | ||
4 | #define Lmemset_c globl memset_c; memset_c | ||
5 | #define Lmemset_c_e globl memset_c_e; memset_c_e | ||
6 | #include "../../../arch/x86/lib/memset_64.S" | ||
7 | |||
8 | /* | ||
9 | * We need to provide note.GNU-stack section, saying that we want | ||
10 | * NOT executable stack. Otherwise the final linking will assume that | ||
11 | * the ELF stack should not be restricted at all and set it RWX. | ||
12 | */ | ||
13 | .section .note.GNU-stack,"",@progbits | ||
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 00000000000..e9079185bd7 --- /dev/null +++ b/tools/perf/bench/mem-memset.c | |||
@@ -0,0 +1,297 @@ | |||
1 | /* | ||
2 | * mem-memset.c | ||
3 | * | ||
4 | * memset: Simple memory set in various ways | ||
5 | * | ||
6 | * Trivial clone of mem-memcpy.c. | ||
7 | */ | ||
8 | |||
9 | #include "../perf.h" | ||
10 | #include "../util/util.h" | ||
11 | #include "../util/parse-options.h" | ||
12 | #include "../util/header.h" | ||
13 | #include "bench.h" | ||
14 | #include "mem-memset-arch.h" | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <stdlib.h> | ||
18 | #include <string.h> | ||
19 | #include <sys/time.h> | ||
20 | #include <errno.h> | ||
21 | |||
22 | #define K 1024 | ||
23 | |||
24 | static const char *length_str = "1MB"; | ||
25 | static const char *routine = "default"; | ||
26 | static int iterations = 1; | ||
27 | static bool use_clock; | ||
28 | static int clock_fd; | ||
29 | static bool only_prefault; | ||
30 | static bool no_prefault; | ||
31 | |||
32 | static const struct option options[] = { | ||
33 | OPT_STRING('l', "length", &length_str, "1MB", | ||
34 | "Specify length of memory to copy. " | ||
35 | "available unit: B, MB, GB (upper and lower)"), | ||
36 | OPT_STRING('r', "routine", &routine, "default", | ||
37 | "Specify routine to copy"), | ||
38 | OPT_INTEGER('i', "iterations", &iterations, | ||
39 | "repeat memset() invocation this number of times"), | ||
40 | OPT_BOOLEAN('c', "clock", &use_clock, | ||
41 | "Use CPU clock for measuring"), | ||
42 | OPT_BOOLEAN('o', "only-prefault", &only_prefault, | ||
43 | "Show only the result with page faults before memset()"), | ||
44 | OPT_BOOLEAN('n', "no-prefault", &no_prefault, | ||
45 | "Show only the result without page faults before memset()"), | ||
46 | OPT_END() | ||
47 | }; | ||
48 | |||
49 | typedef void *(*memset_t)(void *, int, size_t); | ||
50 | |||
51 | struct routine { | ||
52 | const char *name; | ||
53 | const char *desc; | ||
54 | memset_t fn; | ||
55 | }; | ||
56 | |||
57 | static const struct routine routines[] = { | ||
58 | { "default", | ||
59 | "Default memset() provided by glibc", | ||
60 | memset }, | ||
61 | #ifdef ARCH_X86_64 | ||
62 | |||
63 | #define MEMSET_FN(fn, name, desc) { name, desc, fn }, | ||
64 | #include "mem-memset-x86-64-asm-def.h" | ||
65 | #undef MEMSET_FN | ||
66 | |||
67 | #endif | ||
68 | |||
69 | { NULL, | ||
70 | NULL, | ||
71 | NULL } | ||
72 | }; | ||
73 | |||
74 | static const char * const bench_mem_memset_usage[] = { | ||
75 | "perf bench mem memset <options>", | ||
76 | NULL | ||
77 | }; | ||
78 | |||
79 | static struct perf_event_attr clock_attr = { | ||
80 | .type = PERF_TYPE_HARDWARE, | ||
81 | .config = PERF_COUNT_HW_CPU_CYCLES | ||
82 | }; | ||
83 | |||
84 | static void init_clock(void) | ||
85 | { | ||
86 | clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); | ||
87 | |||
88 | if (clock_fd < 0 && errno == ENOSYS) | ||
89 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | ||
90 | else | ||
91 | BUG_ON(clock_fd < 0); | ||
92 | } | ||
93 | |||
94 | static u64 get_clock(void) | ||
95 | { | ||
96 | int ret; | ||
97 | u64 clk; | ||
98 | |||
99 | ret = read(clock_fd, &clk, sizeof(u64)); | ||
100 | BUG_ON(ret != sizeof(u64)); | ||
101 | |||
102 | return clk; | ||
103 | } | ||
104 | |||
105 | static double timeval2double(struct timeval *ts) | ||
106 | { | ||
107 | return (double)ts->tv_sec + | ||
108 | (double)ts->tv_usec / (double)1000000; | ||
109 | } | ||
110 | |||
111 | static void alloc_mem(void **dst, size_t length) | ||
112 | { | ||
113 | *dst = zalloc(length); | ||
114 | if (!dst) | ||
115 | die("memory allocation failed - maybe length is too large?\n"); | ||
116 | } | ||
117 | |||
118 | static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) | ||
119 | { | ||
120 | u64 clock_start = 0ULL, clock_end = 0ULL; | ||
121 | void *dst = NULL; | ||
122 | int i; | ||
123 | |||
124 | alloc_mem(&dst, len); | ||
125 | |||
126 | if (prefault) | ||
127 | fn(dst, -1, len); | ||
128 | |||
129 | clock_start = get_clock(); | ||
130 | for (i = 0; i < iterations; ++i) | ||
131 | fn(dst, i, len); | ||
132 | clock_end = get_clock(); | ||
133 | |||
134 | free(dst); | ||
135 | return clock_end - clock_start; | ||
136 | } | ||
137 | |||
138 | static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) | ||
139 | { | ||
140 | struct timeval tv_start, tv_end, tv_diff; | ||
141 | void *dst = NULL; | ||
142 | int i; | ||
143 | |||
144 | alloc_mem(&dst, len); | ||
145 | |||
146 | if (prefault) | ||
147 | fn(dst, -1, len); | ||
148 | |||
149 | BUG_ON(gettimeofday(&tv_start, NULL)); | ||
150 | for (i = 0; i < iterations; ++i) | ||
151 | fn(dst, i, len); | ||
152 | BUG_ON(gettimeofday(&tv_end, NULL)); | ||
153 | |||
154 | timersub(&tv_end, &tv_start, &tv_diff); | ||
155 | |||
156 | free(dst); | ||
157 | return (double)((double)len / timeval2double(&tv_diff)); | ||
158 | } | ||
159 | |||
160 | #define pf (no_prefault ? 0 : 1) | ||
161 | |||
162 | #define print_bps(x) do { \ | ||
163 | if (x < K) \ | ||
164 | printf(" %14lf B/Sec", x); \ | ||
165 | else if (x < K * K) \ | ||
166 | printf(" %14lfd KB/Sec", x / K); \ | ||
167 | else if (x < K * K * K) \ | ||
168 | printf(" %14lf MB/Sec", x / K / K); \ | ||
169 | else \ | ||
170 | printf(" %14lf GB/Sec", x / K / K / K); \ | ||
171 | } while (0) | ||
172 | |||
173 | int bench_mem_memset(int argc, const char **argv, | ||
174 | const char *prefix __used) | ||
175 | { | ||
176 | int i; | ||
177 | size_t len; | ||
178 | double result_bps[2]; | ||
179 | u64 result_clock[2]; | ||
180 | |||
181 | argc = parse_options(argc, argv, options, | ||
182 | bench_mem_memset_usage, 0); | ||
183 | |||
184 | if (use_clock) | ||
185 | init_clock(); | ||
186 | |||
187 | len = (size_t)perf_atoll((char *)length_str); | ||
188 | |||
189 | result_clock[0] = result_clock[1] = 0ULL; | ||
190 | result_bps[0] = result_bps[1] = 0.0; | ||
191 | |||
192 | if ((s64)len <= 0) { | ||
193 | fprintf(stderr, "Invalid length:%s\n", length_str); | ||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | /* same to without specifying either of prefault and no-prefault */ | ||
198 | if (only_prefault && no_prefault) | ||
199 | only_prefault = no_prefault = false; | ||
200 | |||
201 | for (i = 0; routines[i].name; i++) { | ||
202 | if (!strcmp(routines[i].name, routine)) | ||
203 | break; | ||
204 | } | ||
205 | if (!routines[i].name) { | ||
206 | printf("Unknown routine:%s\n", routine); | ||
207 | printf("Available routines...\n"); | ||
208 | for (i = 0; routines[i].name; i++) { | ||
209 | printf("\t%s ... %s\n", | ||
210 | routines[i].name, routines[i].desc); | ||
211 | } | ||
212 | return 1; | ||
213 | } | ||
214 | |||
215 | if (bench_format == BENCH_FORMAT_DEFAULT) | ||
216 | printf("# Copying %s Bytes ...\n\n", length_str); | ||
217 | |||
218 | if (!only_prefault && !no_prefault) { | ||
219 | /* show both of results */ | ||
220 | if (use_clock) { | ||
221 | result_clock[0] = | ||
222 | do_memset_clock(routines[i].fn, len, false); | ||
223 | result_clock[1] = | ||
224 | do_memset_clock(routines[i].fn, len, true); | ||
225 | } else { | ||
226 | result_bps[0] = | ||
227 | do_memset_gettimeofday(routines[i].fn, | ||
228 | len, false); | ||
229 | result_bps[1] = | ||
230 | do_memset_gettimeofday(routines[i].fn, | ||
231 | len, true); | ||
232 | } | ||
233 | } else { | ||
234 | if (use_clock) { | ||
235 | result_clock[pf] = | ||
236 | do_memset_clock(routines[i].fn, | ||
237 | len, only_prefault); | ||
238 | } else { | ||
239 | result_bps[pf] = | ||
240 | do_memset_gettimeofday(routines[i].fn, | ||
241 | len, only_prefault); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | switch (bench_format) { | ||
246 | case BENCH_FORMAT_DEFAULT: | ||
247 | if (!only_prefault && !no_prefault) { | ||
248 | if (use_clock) { | ||
249 | printf(" %14lf Clock/Byte\n", | ||
250 | (double)result_clock[0] | ||
251 | / (double)len); | ||
252 | printf(" %14lf Clock/Byte (with prefault)\n ", | ||
253 | (double)result_clock[1] | ||
254 | / (double)len); | ||
255 | } else { | ||
256 | print_bps(result_bps[0]); | ||
257 | printf("\n"); | ||
258 | print_bps(result_bps[1]); | ||
259 | printf(" (with prefault)\n"); | ||
260 | } | ||
261 | } else { | ||
262 | if (use_clock) { | ||
263 | printf(" %14lf Clock/Byte", | ||
264 | (double)result_clock[pf] | ||
265 | / (double)len); | ||
266 | } else | ||
267 | print_bps(result_bps[pf]); | ||
268 | |||
269 | printf("%s\n", only_prefault ? " (with prefault)" : ""); | ||
270 | } | ||
271 | break; | ||
272 | case BENCH_FORMAT_SIMPLE: | ||
273 | if (!only_prefault && !no_prefault) { | ||
274 | if (use_clock) { | ||
275 | printf("%lf %lf\n", | ||
276 | (double)result_clock[0] / (double)len, | ||
277 | (double)result_clock[1] / (double)len); | ||
278 | } else { | ||
279 | printf("%lf %lf\n", | ||
280 | result_bps[0], result_bps[1]); | ||
281 | } | ||
282 | } else { | ||
283 | if (use_clock) { | ||
284 | printf("%lf\n", (double)result_clock[pf] | ||
285 | / (double)len); | ||
286 | } else | ||
287 | printf("%lf\n", result_bps[pf]); | ||
288 | } | ||
289 | break; | ||
290 | default: | ||
291 | /* reaching this means there's some disaster: */ | ||
292 | die("unknown format: %d\n", bench_format); | ||
293 | break; | ||
294 | } | ||
295 | |||
296 | return 0; | ||
297 | } | ||
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852..b0e74ab2d7a 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { | |||
52 | { "memcpy", | 52 | { "memcpy", |
53 | "Simple memory copy in various ways", | 53 | "Simple memory copy in various ways", |
54 | bench_mem_memcpy }, | 54 | bench_mem_memcpy }, |
55 | { "memset", | ||
56 | "Simple memory set in various ways", | ||
57 | bench_mem_memset }, | ||
55 | suite_all, | 58 | suite_all, |
56 | { NULL, | 59 | { NULL, |
57 | NULL, | 60 | NULL, |
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2296c391d0f..12c81483899 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
@@ -922,12 +922,12 @@ static const struct option info_options[] = { | |||
922 | OPT_BOOLEAN('t', "threads", &info_threads, | 922 | OPT_BOOLEAN('t', "threads", &info_threads, |
923 | "dump thread list in perf.data"), | 923 | "dump thread list in perf.data"), |
924 | OPT_BOOLEAN('m', "map", &info_map, | 924 | OPT_BOOLEAN('m', "map", &info_map, |
925 | "map of lock instances (name:address table)"), | 925 | "map of lock instances (address:name table)"), |
926 | OPT_END() | 926 | OPT_END() |
927 | }; | 927 | }; |
928 | 928 | ||
929 | static const char * const lock_usage[] = { | 929 | static const char * const lock_usage[] = { |
930 | "perf lock [<options>] {record|trace|report}", | 930 | "perf lock [<options>] {record|report|script|info}", |
931 | NULL | 931 | NULL |
932 | }; | 932 | }; |
933 | 933 | ||
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index fb8566181f2..4935c09dd5b 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
@@ -58,7 +58,7 @@ static struct { | |||
58 | struct perf_probe_event events[MAX_PROBES]; | 58 | struct perf_probe_event events[MAX_PROBES]; |
59 | struct strlist *dellist; | 59 | struct strlist *dellist; |
60 | struct line_range line_range; | 60 | struct line_range line_range; |
61 | const char *target_module; | 61 | const char *target; |
62 | int max_probe_points; | 62 | int max_probe_points; |
63 | struct strfilter *filter; | 63 | struct strfilter *filter; |
64 | } params; | 64 | } params; |
@@ -246,7 +246,7 @@ static const struct option options[] = { | |||
246 | "file", "vmlinux pathname"), | 246 | "file", "vmlinux pathname"), |
247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, | 247 | OPT_STRING('s', "source", &symbol_conf.source_prefix, |
248 | "directory", "path to kernel source"), | 248 | "directory", "path to kernel source"), |
249 | OPT_STRING('m', "module", ¶ms.target_module, | 249 | OPT_STRING('m', "module", ¶ms.target, |
250 | "modname|path", | 250 | "modname|path", |
251 | "target module name (for online) or path (for offline)"), | 251 | "target module name (for online) or path (for offline)"), |
252 | #endif | 252 | #endif |
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
333 | if (!params.filter) | 333 | if (!params.filter) |
334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, | 334 | params.filter = strfilter__new(DEFAULT_FUNC_FILTER, |
335 | NULL); | 335 | NULL); |
336 | ret = show_available_funcs(params.target_module, | 336 | ret = show_available_funcs(params.target, |
337 | params.filter); | 337 | params.filter); |
338 | strfilter__delete(params.filter); | 338 | strfilter__delete(params.filter); |
339 | if (ret < 0) | 339 | if (ret < 0) |
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
354 | usage_with_options(probe_usage, options); | 354 | usage_with_options(probe_usage, options); |
355 | } | 355 | } |
356 | 356 | ||
357 | ret = show_line_range(¶ms.line_range, params.target_module); | 357 | ret = show_line_range(¶ms.line_range, params.target); |
358 | if (ret < 0) | 358 | if (ret < 0) |
359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); | 359 | pr_err(" Error: Failed to show lines. (%d)\n", ret); |
360 | return ret; | 360 | return ret; |
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
371 | 371 | ||
372 | ret = show_available_vars(params.events, params.nevents, | 372 | ret = show_available_vars(params.events, params.nevents, |
373 | params.max_probe_points, | 373 | params.max_probe_points, |
374 | params.target_module, | 374 | params.target, |
375 | params.filter, | 375 | params.filter, |
376 | params.show_ext_vars); | 376 | params.show_ext_vars); |
377 | strfilter__delete(params.filter); | 377 | strfilter__delete(params.filter); |
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) | |||
393 | if (params.nevents) { | 393 | if (params.nevents) { |
394 | ret = add_perf_probe_events(params.events, params.nevents, | 394 | ret = add_perf_probe_events(params.events, params.nevents, |
395 | params.max_probe_points, | 395 | params.max_probe_points, |
396 | params.target_module, | 396 | params.target, |
397 | params.force_add); | 397 | params.force_add); |
398 | if (ret < 0) { | 398 | if (ret < 0) { |
399 | pr_err(" Error: Failed to add events. (%d)\n", ret); | 399 | pr_err(" Error: Failed to add events. (%d)\n", ret); |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0abfb18b911..75d230fef20 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -44,6 +44,7 @@ struct perf_record { | |||
44 | struct perf_evlist *evlist; | 44 | struct perf_evlist *evlist; |
45 | struct perf_session *session; | 45 | struct perf_session *session; |
46 | const char *progname; | 46 | const char *progname; |
47 | const char *uid_str; | ||
47 | int output; | 48 | int output; |
48 | unsigned int page_size; | 49 | unsigned int page_size; |
49 | int realtime_prio; | 50 | int realtime_prio; |
@@ -204,8 +205,11 @@ static void perf_record__open(struct perf_record *rec) | |||
204 | 205 | ||
205 | if (opts->group && pos != first) | 206 | if (opts->group && pos != first) |
206 | group_fd = first->fd; | 207 | group_fd = first->fd; |
208 | fallback_missing_features: | ||
209 | if (opts->exclude_guest_missing) | ||
210 | attr->exclude_guest = attr->exclude_host = 0; | ||
207 | retry_sample_id: | 211 | retry_sample_id: |
208 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 212 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
209 | try_again: | 213 | try_again: |
210 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, | 214 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads, |
211 | opts->group, group_fd) < 0) { | 215 | opts->group, group_fd) < 0) { |
@@ -217,15 +221,23 @@ try_again: | |||
217 | } else if (err == ENODEV && opts->cpu_list) { | 221 | } else if (err == ENODEV && opts->cpu_list) { |
218 | die("No such device - did you specify" | 222 | die("No such device - did you specify" |
219 | " an out-of-range profile CPU?\n"); | 223 | " an out-of-range profile CPU?\n"); |
220 | } else if (err == EINVAL && opts->sample_id_all_avail) { | 224 | } else if (err == EINVAL) { |
221 | /* | 225 | if (!opts->exclude_guest_missing && |
222 | * Old kernel, no attr->sample_id_type_all field | 226 | (attr->exclude_guest || attr->exclude_host)) { |
223 | */ | 227 | pr_debug("Old kernel, cannot exclude " |
224 | opts->sample_id_all_avail = false; | 228 | "guest or host samples.\n"); |
225 | if (!opts->sample_time && !opts->raw_samples && !time_needed) | 229 | opts->exclude_guest_missing = true; |
226 | attr->sample_type &= ~PERF_SAMPLE_TIME; | 230 | goto fallback_missing_features; |
227 | 231 | } else if (!opts->sample_id_all_missing) { | |
228 | goto retry_sample_id; | 232 | /* |
233 | * Old kernel, no attr->sample_id_type_all field | ||
234 | */ | ||
235 | opts->sample_id_all_missing = true; | ||
236 | if (!opts->sample_time && !opts->raw_samples && !time_needed) | ||
237 | attr->sample_type &= ~PERF_SAMPLE_TIME; | ||
238 | |||
239 | goto retry_sample_id; | ||
240 | } | ||
229 | } | 241 | } |
230 | 242 | ||
231 | /* | 243 | /* |
@@ -385,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
385 | { | 397 | { |
386 | struct stat st; | 398 | struct stat st; |
387 | int flags; | 399 | int flags; |
388 | int err, output; | 400 | int err, output, feat; |
389 | unsigned long waking = 0; | 401 | unsigned long waking = 0; |
390 | const bool forks = argc > 0; | 402 | const bool forks = argc > 0; |
391 | struct machine *machine; | 403 | struct machine *machine; |
@@ -452,8 +464,14 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
452 | 464 | ||
453 | rec->session = session; | 465 | rec->session = session; |
454 | 466 | ||
455 | if (!rec->no_buildid) | 467 | for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) |
456 | perf_header__set_feat(&session->header, HEADER_BUILD_ID); | 468 | perf_header__set_feat(&session->header, feat); |
469 | |||
470 | if (rec->no_buildid) | ||
471 | perf_header__clear_feat(&session->header, HEADER_BUILD_ID); | ||
472 | |||
473 | if (!have_tracepoints(&evsel_list->entries)) | ||
474 | perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); | ||
457 | 475 | ||
458 | if (!rec->file_new) { | 476 | if (!rec->file_new) { |
459 | err = perf_session__read_header(session, output); | 477 | err = perf_session__read_header(session, output); |
@@ -461,22 +479,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
461 | goto out_delete_session; | 479 | goto out_delete_session; |
462 | } | 480 | } |
463 | 481 | ||
464 | if (have_tracepoints(&evsel_list->entries)) | ||
465 | perf_header__set_feat(&session->header, HEADER_TRACE_INFO); | ||
466 | |||
467 | perf_header__set_feat(&session->header, HEADER_HOSTNAME); | ||
468 | perf_header__set_feat(&session->header, HEADER_OSRELEASE); | ||
469 | perf_header__set_feat(&session->header, HEADER_ARCH); | ||
470 | perf_header__set_feat(&session->header, HEADER_CPUDESC); | ||
471 | perf_header__set_feat(&session->header, HEADER_NRCPUS); | ||
472 | perf_header__set_feat(&session->header, HEADER_EVENT_DESC); | ||
473 | perf_header__set_feat(&session->header, HEADER_CMDLINE); | ||
474 | perf_header__set_feat(&session->header, HEADER_VERSION); | ||
475 | perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); | ||
476 | perf_header__set_feat(&session->header, HEADER_TOTAL_MEM); | ||
477 | perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY); | ||
478 | perf_header__set_feat(&session->header, HEADER_CPUID); | ||
479 | |||
480 | if (forks) { | 482 | if (forks) { |
481 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); | 483 | err = perf_evlist__prepare_workload(evsel_list, opts, argv); |
482 | if (err < 0) { | 484 | if (err < 0) { |
@@ -503,9 +505,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) | |||
503 | return err; | 505 | return err; |
504 | } | 506 | } |
505 | 507 | ||
506 | if (!!rec->no_buildid | 508 | if (!rec->no_buildid |
507 | && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { | 509 | && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { |
508 | pr_err("Couldn't generating buildids. " | 510 | pr_err("Couldn't generate buildids. " |
509 | "Use --no-buildid to profile anyway.\n"); | 511 | "Use --no-buildid to profile anyway.\n"); |
510 | return -1; | 512 | return -1; |
511 | } | 513 | } |
@@ -654,13 +656,10 @@ static const char * const record_usage[] = { | |||
654 | */ | 656 | */ |
655 | static struct perf_record record = { | 657 | static struct perf_record record = { |
656 | .opts = { | 658 | .opts = { |
657 | .target_pid = -1, | ||
658 | .target_tid = -1, | ||
659 | .mmap_pages = UINT_MAX, | 659 | .mmap_pages = UINT_MAX, |
660 | .user_freq = UINT_MAX, | 660 | .user_freq = UINT_MAX, |
661 | .user_interval = ULLONG_MAX, | 661 | .user_interval = ULLONG_MAX, |
662 | .freq = 1000, | 662 | .freq = 1000, |
663 | .sample_id_all_avail = true, | ||
664 | }, | 663 | }, |
665 | .write_mode = WRITE_FORCE, | 664 | .write_mode = WRITE_FORCE, |
666 | .file_new = true, | 665 | .file_new = true, |
@@ -679,9 +678,9 @@ const struct option record_options[] = { | |||
679 | parse_events_option), | 678 | parse_events_option), |
680 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", | 679 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", |
681 | "event filter", parse_filter), | 680 | "event filter", parse_filter), |
682 | OPT_INTEGER('p', "pid", &record.opts.target_pid, | 681 | OPT_STRING('p', "pid", &record.opts.target_pid, "pid", |
683 | "record events on existing process id"), | 682 | "record events on existing process id"), |
684 | OPT_INTEGER('t', "tid", &record.opts.target_tid, | 683 | OPT_STRING('t', "tid", &record.opts.target_tid, "tid", |
685 | "record events on existing thread id"), | 684 | "record events on existing thread id"), |
686 | OPT_INTEGER('r', "realtime", &record.realtime_prio, | 685 | OPT_INTEGER('r', "realtime", &record.realtime_prio, |
687 | "collect data with this RT SCHED_FIFO priority"), | 686 | "collect data with this RT SCHED_FIFO priority"), |
@@ -727,6 +726,7 @@ const struct option record_options[] = { | |||
727 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", | 726 | OPT_CALLBACK('G', "cgroup", &record.evlist, "name", |
728 | "monitor event in cgroup name only", | 727 | "monitor event in cgroup name only", |
729 | parse_cgroups), | 728 | parse_cgroups), |
729 | OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), | ||
730 | OPT_END() | 730 | OPT_END() |
731 | }; | 731 | }; |
732 | 732 | ||
@@ -747,8 +747,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
747 | 747 | ||
748 | argc = parse_options(argc, argv, record_options, record_usage, | 748 | argc = parse_options(argc, argv, record_options, record_usage, |
749 | PARSE_OPT_STOP_AT_NON_OPTION); | 749 | PARSE_OPT_STOP_AT_NON_OPTION); |
750 | if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && | 750 | if (!argc && !rec->opts.target_pid && !rec->opts.target_tid && |
751 | !rec->opts.system_wide && !rec->opts.cpu_list) | 751 | !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) |
752 | usage_with_options(record_usage, record_options); | 752 | usage_with_options(record_usage, record_options); |
753 | 753 | ||
754 | if (rec->force && rec->append_file) { | 754 | if (rec->force && rec->append_file) { |
@@ -788,11 +788,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) | |||
788 | goto out_symbol_exit; | 788 | goto out_symbol_exit; |
789 | } | 789 | } |
790 | 790 | ||
791 | if (rec->opts.target_pid != -1) | 791 | rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid, |
792 | rec->opts.target_pid); | ||
793 | if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) | ||
794 | goto out_free_fd; | ||
795 | |||
796 | if (rec->opts.target_pid) | ||
792 | rec->opts.target_tid = rec->opts.target_pid; | 797 | rec->opts.target_tid = rec->opts.target_pid; |
793 | 798 | ||
794 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, | 799 | if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, |
795 | rec->opts.target_tid, rec->opts.cpu_list) < 0) | 800 | rec->opts.target_tid, rec->opts.uid, |
801 | rec->opts.cpu_list) < 0) | ||
796 | usage_with_options(record_usage, record_options); | 802 | usage_with_options(record_usage, record_options); |
797 | 803 | ||
798 | list_for_each_entry(pos, &evsel_list->entries, node) { | 804 | list_for_each_entry(pos, &evsel_list->entries, node) { |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb68ddf257b..d4ce733b9eb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -40,6 +40,7 @@ enum perf_output_field { | |||
40 | PERF_OUTPUT_SYM = 1U << 8, | 40 | PERF_OUTPUT_SYM = 1U << 8, |
41 | PERF_OUTPUT_DSO = 1U << 9, | 41 | PERF_OUTPUT_DSO = 1U << 9, |
42 | PERF_OUTPUT_ADDR = 1U << 10, | 42 | PERF_OUTPUT_ADDR = 1U << 10, |
43 | PERF_OUTPUT_SYMOFFSET = 1U << 11, | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | struct output_option { | 46 | struct output_option { |
@@ -57,6 +58,7 @@ struct output_option { | |||
57 | {.str = "sym", .field = PERF_OUTPUT_SYM}, | 58 | {.str = "sym", .field = PERF_OUTPUT_SYM}, |
58 | {.str = "dso", .field = PERF_OUTPUT_DSO}, | 59 | {.str = "dso", .field = PERF_OUTPUT_DSO}, |
59 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, | 60 | {.str = "addr", .field = PERF_OUTPUT_ADDR}, |
61 | {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, | ||
60 | }; | 62 | }; |
61 | 63 | ||
62 | /* default set to maintain compatibility with current format */ | 64 | /* default set to maintain compatibility with current format */ |
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
193 | "to symbols.\n"); | 195 | "to symbols.\n"); |
194 | return -EINVAL; | 196 | return -EINVAL; |
195 | } | 197 | } |
198 | if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) { | ||
199 | pr_err("Display of offsets requested but symbol is not" | ||
200 | "selected.\n"); | ||
201 | return -EINVAL; | ||
202 | } | ||
196 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { | 203 | if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { |
197 | pr_err("Display of DSO requested but neither sample IP nor " | 204 | pr_err("Display of DSO requested but neither sample IP nor " |
198 | "sample address\nis selected. Hence, no addresses to convert " | 205 | "sample address\nis selected. Hence, no addresses to convert " |
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample, | |||
300 | } else | 307 | } else |
301 | evname = __event_name(attr->type, attr->config); | 308 | evname = __event_name(attr->type, attr->config); |
302 | 309 | ||
303 | printf("%s: ", evname ? evname : "(unknown)"); | 310 | printf("%s: ", evname ? evname : "[unknown]"); |
304 | } | 311 | } |
305 | } | 312 | } |
306 | 313 | ||
314 | static bool is_bts_event(struct perf_event_attr *attr) | ||
315 | { | ||
316 | return ((attr->type == PERF_TYPE_HARDWARE) && | ||
317 | (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
318 | (attr->sample_period == 1)); | ||
319 | } | ||
320 | |||
307 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | 321 | static bool sample_addr_correlates_sym(struct perf_event_attr *attr) |
308 | { | 322 | { |
309 | if ((attr->type == PERF_TYPE_SOFTWARE) && | 323 | if ((attr->type == PERF_TYPE_SOFTWARE) && |
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr) | |||
312 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) | 326 | (attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))) |
313 | return true; | 327 | return true; |
314 | 328 | ||
329 | if (is_bts_event(attr)) | ||
330 | return true; | ||
331 | |||
315 | return false; | 332 | return false; |
316 | } | 333 | } |
317 | 334 | ||
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event, | |||
323 | { | 340 | { |
324 | struct addr_location al; | 341 | struct addr_location al; |
325 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | 342 | u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; |
326 | const char *symname, *dsoname; | ||
327 | 343 | ||
328 | printf("%16" PRIx64, sample->addr); | 344 | printf("%16" PRIx64, sample->addr); |
329 | 345 | ||
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event, | |||
343 | al.sym = map__find_symbol(al.map, al.addr, NULL); | 359 | al.sym = map__find_symbol(al.map, al.addr, NULL); |
344 | 360 | ||
345 | if (PRINT_FIELD(SYM)) { | 361 | if (PRINT_FIELD(SYM)) { |
346 | if (al.sym && al.sym->name) | 362 | printf(" "); |
347 | symname = al.sym->name; | 363 | if (PRINT_FIELD(SYMOFFSET)) |
364 | symbol__fprintf_symname_offs(al.sym, &al, stdout); | ||
348 | else | 365 | else |
349 | symname = ""; | 366 | symbol__fprintf_symname(al.sym, stdout); |
350 | |||
351 | printf(" %16s", symname); | ||
352 | } | 367 | } |
353 | 368 | ||
354 | if (PRINT_FIELD(DSO)) { | 369 | if (PRINT_FIELD(DSO)) { |
355 | if (al.map && al.map->dso && al.map->dso->name) | 370 | printf(" ("); |
356 | dsoname = al.map->dso->name; | 371 | map__fprintf_dsoname(al.map, stdout); |
357 | else | 372 | printf(")"); |
358 | dsoname = ""; | 373 | } |
374 | } | ||
359 | 375 | ||
360 | printf(" (%s)", dsoname); | 376 | static void print_sample_bts(union perf_event *event, |
377 | struct perf_sample *sample, | ||
378 | struct perf_evsel *evsel, | ||
379 | struct machine *machine, | ||
380 | struct thread *thread) | ||
381 | { | ||
382 | struct perf_event_attr *attr = &evsel->attr; | ||
383 | |||
384 | /* print branch_from information */ | ||
385 | if (PRINT_FIELD(IP)) { | ||
386 | if (!symbol_conf.use_callchain) | ||
387 | printf(" "); | ||
388 | else | ||
389 | printf("\n"); | ||
390 | perf_event__print_ip(event, sample, machine, evsel, | ||
391 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), | ||
392 | PRINT_FIELD(SYMOFFSET)); | ||
361 | } | 393 | } |
394 | |||
395 | printf(" => "); | ||
396 | |||
397 | /* print branch_to information */ | ||
398 | if (PRINT_FIELD(ADDR)) | ||
399 | print_sample_addr(event, sample, machine, thread, attr); | ||
400 | |||
401 | printf("\n"); | ||
362 | } | 402 | } |
363 | 403 | ||
364 | static void process_event(union perf_event *event __unused, | 404 | static void process_event(union perf_event *event __unused, |
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused, | |||
374 | 414 | ||
375 | print_sample_start(sample, thread, attr); | 415 | print_sample_start(sample, thread, attr); |
376 | 416 | ||
417 | if (is_bts_event(attr)) { | ||
418 | print_sample_bts(event, sample, evsel, machine, thread); | ||
419 | return; | ||
420 | } | ||
421 | |||
377 | if (PRINT_FIELD(TRACE)) | 422 | if (PRINT_FIELD(TRACE)) |
378 | print_trace_event(sample->cpu, sample->raw_data, | 423 | print_trace_event(sample->cpu, sample->raw_data, |
379 | sample->raw_size); | 424 | sample->raw_size); |
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused, | |||
387 | else | 432 | else |
388 | printf("\n"); | 433 | printf("\n"); |
389 | perf_event__print_ip(event, sample, machine, evsel, | 434 | perf_event__print_ip(event, sample, machine, evsel, |
390 | PRINT_FIELD(SYM), PRINT_FIELD(DSO)); | 435 | PRINT_FIELD(SYM), PRINT_FIELD(DSO), |
436 | PRINT_FIELD(SYMOFFSET)); | ||
391 | } | 437 | } |
392 | 438 | ||
393 | printf("\n"); | 439 | printf("\n"); |
@@ -1097,7 +1143,10 @@ static const struct option options[] = { | |||
1097 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", | 1143 | OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", |
1098 | "Look for files with symbols relative to this directory"), | 1144 | "Look for files with symbols relative to this directory"), |
1099 | OPT_CALLBACK('f', "fields", NULL, "str", | 1145 | OPT_CALLBACK('f', "fields", NULL, "str", |
1100 | "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr", | 1146 | "comma separated output fields prepend with 'type:'. " |
1147 | "Valid types: hw,sw,trace,raw. " | ||
1148 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | ||
1149 | "addr,symoff", | ||
1101 | parse_output_fields), | 1150 | parse_output_fields), |
1102 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1151 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1103 | "system-wide collection from all CPUs"), | 1152 | "system-wide collection from all CPUs"), |
@@ -1106,6 +1155,9 @@ static const struct option options[] = { | |||
1106 | "only display events for these comms"), | 1155 | "only display events for these comms"), |
1107 | OPT_BOOLEAN('I', "show-info", &show_full_info, | 1156 | OPT_BOOLEAN('I', "show-info", &show_full_info, |
1108 | "display extended information from perf.data file"), | 1157 | "display extended information from perf.data file"), |
1158 | OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, | ||
1159 | "Show the path of [kernel.kallsyms]"), | ||
1160 | |||
1109 | OPT_END() | 1161 | OPT_END() |
1110 | }; | 1162 | }; |
1111 | 1163 | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5d2a63eba6..ea40e4e8b22 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -182,8 +182,8 @@ static int run_count = 1; | |||
182 | static bool no_inherit = false; | 182 | static bool no_inherit = false; |
183 | static bool scale = true; | 183 | static bool scale = true; |
184 | static bool no_aggr = false; | 184 | static bool no_aggr = false; |
185 | static pid_t target_pid = -1; | 185 | static const char *target_pid; |
186 | static pid_t target_tid = -1; | 186 | static const char *target_tid; |
187 | static pid_t child_pid = -1; | 187 | static pid_t child_pid = -1; |
188 | static bool null_run = false; | 188 | static bool null_run = false; |
189 | static int detailed_run = 0; | 189 | static int detailed_run = 0; |
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, | |||
296 | if (system_wide) | 296 | if (system_wide) |
297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, | 297 | return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, |
298 | group, group_fd); | 298 | group, group_fd); |
299 | if (target_pid == -1 && target_tid == -1) { | 299 | if (!target_pid && !target_tid) { |
300 | attr->disabled = 1; | 300 | attr->disabled = 1; |
301 | attr->enable_on_exec = 1; | 301 | attr->enable_on_exec = 1; |
302 | } | 302 | } |
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv) | |||
446 | exit(-1); | 446 | exit(-1); |
447 | } | 447 | } |
448 | 448 | ||
449 | if (target_tid == -1 && target_pid == -1 && !system_wide) | 449 | if (!target_tid && !target_pid && !system_wide) |
450 | evsel_list->threads->map[0] = child_pid; | 450 | evsel_list->threads->map[0] = child_pid; |
451 | 451 | ||
452 | /* | 452 | /* |
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 576 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
577 | fprintf(output, " # %8.3f CPUs utilized ", | 577 | fprintf(output, " # %8.3f CPUs utilized ", |
578 | avg / avg_stats(&walltime_nsecs_stats)); | 578 | avg / avg_stats(&walltime_nsecs_stats)); |
579 | else | ||
580 | fprintf(output, " "); | ||
579 | } | 581 | } |
580 | 582 | ||
581 | /* used for get_ratio_color() */ | 583 | /* used for get_ratio_color() */ |
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) | |||
844 | 846 | ||
845 | fprintf(output, " # %8.3f GHz ", ratio); | 847 | fprintf(output, " # %8.3f GHz ", ratio); |
846 | } else if (runtime_nsecs_stats[cpu].n != 0) { | 848 | } else if (runtime_nsecs_stats[cpu].n != 0) { |
849 | char unit = 'M'; | ||
850 | |||
847 | total = avg_stats(&runtime_nsecs_stats[cpu]); | 851 | total = avg_stats(&runtime_nsecs_stats[cpu]); |
848 | 852 | ||
849 | if (total) | 853 | if (total) |
850 | ratio = 1000.0 * avg / total; | 854 | ratio = 1000.0 * avg / total; |
855 | if (ratio < 0.001) { | ||
856 | ratio *= 1000; | ||
857 | unit = 'K'; | ||
858 | } | ||
851 | 859 | ||
852 | fprintf(output, " # %8.3f M/sec ", ratio); | 860 | fprintf(output, " # %8.3f %c/sec ", ratio, unit); |
853 | } else { | 861 | } else { |
854 | fprintf(output, " "); | 862 | fprintf(output, " "); |
855 | } | 863 | } |
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv) | |||
960 | if (!csv_output) { | 968 | if (!csv_output) { |
961 | fprintf(output, "\n"); | 969 | fprintf(output, "\n"); |
962 | fprintf(output, " Performance counter stats for "); | 970 | fprintf(output, " Performance counter stats for "); |
963 | if(target_pid == -1 && target_tid == -1) { | 971 | if (!target_pid && !target_tid) { |
964 | fprintf(output, "\'%s", argv[0]); | 972 | fprintf(output, "\'%s", argv[0]); |
965 | for (i = 1; i < argc; i++) | 973 | for (i = 1; i < argc; i++) |
966 | fprintf(output, " %s", argv[i]); | 974 | fprintf(output, " %s", argv[i]); |
967 | } else if (target_pid != -1) | 975 | } else if (target_pid) |
968 | fprintf(output, "process id \'%d", target_pid); | 976 | fprintf(output, "process id \'%s", target_pid); |
969 | else | 977 | else |
970 | fprintf(output, "thread id \'%d", target_tid); | 978 | fprintf(output, "thread id \'%s", target_tid); |
971 | 979 | ||
972 | fprintf(output, "\'"); | 980 | fprintf(output, "\'"); |
973 | if (run_count > 1) | 981 | if (run_count > 1) |
@@ -1041,10 +1049,10 @@ static const struct option options[] = { | |||
1041 | "event filter", parse_filter), | 1049 | "event filter", parse_filter), |
1042 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, | 1050 | OPT_BOOLEAN('i', "no-inherit", &no_inherit, |
1043 | "child tasks do not inherit counters"), | 1051 | "child tasks do not inherit counters"), |
1044 | OPT_INTEGER('p', "pid", &target_pid, | 1052 | OPT_STRING('p', "pid", &target_pid, "pid", |
1045 | "stat events on existing process id"), | 1053 | "stat events on existing process id"), |
1046 | OPT_INTEGER('t', "tid", &target_tid, | 1054 | OPT_STRING('t', "tid", &target_tid, "tid", |
1047 | "stat events on existing thread id"), | 1055 | "stat events on existing thread id"), |
1048 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1056 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1049 | "system-wide collection from all CPUs"), | 1057 | "system-wide collection from all CPUs"), |
1050 | OPT_BOOLEAN('g', "group", &group, | 1058 | OPT_BOOLEAN('g', "group", &group, |
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
1182 | } else if (big_num_opt == 0) /* User passed --no-big-num */ | 1190 | } else if (big_num_opt == 0) /* User passed --no-big-num */ |
1183 | big_num = false; | 1191 | big_num = false; |
1184 | 1192 | ||
1185 | if (!argc && target_pid == -1 && target_tid == -1) | 1193 | if (!argc && !target_pid && !target_tid) |
1186 | usage_with_options(stat_usage, options); | 1194 | usage_with_options(stat_usage, options); |
1187 | if (run_count <= 0) | 1195 | if (run_count <= 0) |
1188 | usage_with_options(stat_usage, options); | 1196 | usage_with_options(stat_usage, options); |
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) | |||
1198 | if (add_default_attributes()) | 1206 | if (add_default_attributes()) |
1199 | goto out; | 1207 | goto out; |
1200 | 1208 | ||
1201 | if (target_pid != -1) | 1209 | if (target_pid) |
1202 | target_tid = target_pid; | 1210 | target_tid = target_pid; |
1203 | 1211 | ||
1204 | evsel_list->threads = thread_map__new(target_pid, target_tid); | 1212 | evsel_list->threads = thread_map__new_str(target_pid, |
1213 | target_tid, UINT_MAX); | ||
1205 | if (evsel_list->threads == NULL) { | 1214 | if (evsel_list->threads == NULL) { |
1206 | pr_err("Problems finding threads of monitor\n"); | 1215 | pr_err("Problems finding threads of monitor\n"); |
1207 | usage_with_options(stat_usage, options); | 1216 | usage_with_options(stat_usage, options); |
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 3854e869dce..3e087ce8daa 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c | |||
@@ -15,6 +15,8 @@ | |||
15 | #include "util/thread_map.h" | 15 | #include "util/thread_map.h" |
16 | #include "../../include/linux/hw_breakpoint.h" | 16 | #include "../../include/linux/hw_breakpoint.h" |
17 | 17 | ||
18 | #include <sys/mman.h> | ||
19 | |||
18 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) | 20 | static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) |
19 | { | 21 | { |
20 | bool *visited = symbol__priv(sym); | 22 | bool *visited = symbol__priv(sym); |
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void) | |||
276 | return -1; | 278 | return -1; |
277 | } | 279 | } |
278 | 280 | ||
279 | threads = thread_map__new(-1, getpid()); | 281 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
280 | if (threads == NULL) { | 282 | if (threads == NULL) { |
281 | pr_debug("thread_map__new\n"); | 283 | pr_debug("thread_map__new\n"); |
282 | return -1; | 284 | return -1; |
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void) | |||
342 | return -1; | 344 | return -1; |
343 | } | 345 | } |
344 | 346 | ||
345 | threads = thread_map__new(-1, getpid()); | 347 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
346 | if (threads == NULL) { | 348 | if (threads == NULL) { |
347 | pr_debug("thread_map__new\n"); | 349 | pr_debug("thread_map__new\n"); |
348 | return -1; | 350 | return -1; |
@@ -490,7 +492,7 @@ static int test__basic_mmap(void) | |||
490 | expected_nr_events[i] = random() % 257; | 492 | expected_nr_events[i] = random() % 257; |
491 | } | 493 | } |
492 | 494 | ||
493 | threads = thread_map__new(-1, getpid()); | 495 | threads = thread_map__new(-1, getpid(), UINT_MAX); |
494 | if (threads == NULL) { | 496 | if (threads == NULL) { |
495 | pr_debug("thread_map__new\n"); | 497 | pr_debug("thread_map__new\n"); |
496 | return -1; | 498 | return -1; |
@@ -1008,12 +1010,9 @@ realloc: | |||
1008 | static int test__PERF_RECORD(void) | 1010 | static int test__PERF_RECORD(void) |
1009 | { | 1011 | { |
1010 | struct perf_record_opts opts = { | 1012 | struct perf_record_opts opts = { |
1011 | .target_pid = -1, | ||
1012 | .target_tid = -1, | ||
1013 | .no_delay = true, | 1013 | .no_delay = true, |
1014 | .freq = 10, | 1014 | .freq = 10, |
1015 | .mmap_pages = 256, | 1015 | .mmap_pages = 256, |
1016 | .sample_id_all_avail = true, | ||
1017 | }; | 1016 | }; |
1018 | cpu_set_t *cpu_mask = NULL; | 1017 | cpu_set_t *cpu_mask = NULL; |
1019 | size_t cpu_mask_size = 0; | 1018 | size_t cpu_mask_size = 0; |
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void) | |||
1054 | * we're monitoring, the one forked there. | 1053 | * we're monitoring, the one forked there. |
1055 | */ | 1054 | */ |
1056 | err = perf_evlist__create_maps(evlist, opts.target_pid, | 1055 | err = perf_evlist__create_maps(evlist, opts.target_pid, |
1057 | opts.target_tid, opts.cpu_list); | 1056 | opts.target_tid, UINT_MAX, opts.cpu_list); |
1058 | if (err < 0) { | 1057 | if (err < 0) { |
1059 | pr_debug("Not enough memory to create thread/cpu maps\n"); | 1058 | pr_debug("Not enough memory to create thread/cpu maps\n"); |
1060 | goto out_delete_evlist; | 1059 | goto out_delete_evlist; |
@@ -1296,6 +1295,173 @@ out: | |||
1296 | return (err < 0 || errs > 0) ? -1 : 0; | 1295 | return (err < 0 || errs > 0) ? -1 : 0; |
1297 | } | 1296 | } |
1298 | 1297 | ||
1298 | |||
1299 | #if defined(__x86_64__) || defined(__i386__) | ||
1300 | |||
1301 | #define barrier() asm volatile("" ::: "memory") | ||
1302 | |||
1303 | static u64 rdpmc(unsigned int counter) | ||
1304 | { | ||
1305 | unsigned int low, high; | ||
1306 | |||
1307 | asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); | ||
1308 | |||
1309 | return low | ((u64)high) << 32; | ||
1310 | } | ||
1311 | |||
1312 | static u64 rdtsc(void) | ||
1313 | { | ||
1314 | unsigned int low, high; | ||
1315 | |||
1316 | asm volatile("rdtsc" : "=a" (low), "=d" (high)); | ||
1317 | |||
1318 | return low | ((u64)high) << 32; | ||
1319 | } | ||
1320 | |||
1321 | static u64 mmap_read_self(void *addr) | ||
1322 | { | ||
1323 | struct perf_event_mmap_page *pc = addr; | ||
1324 | u32 seq, idx, time_mult = 0, time_shift = 0; | ||
1325 | u64 count, cyc = 0, time_offset = 0, enabled, running, delta; | ||
1326 | |||
1327 | do { | ||
1328 | seq = pc->lock; | ||
1329 | barrier(); | ||
1330 | |||
1331 | enabled = pc->time_enabled; | ||
1332 | running = pc->time_running; | ||
1333 | |||
1334 | if (enabled != running) { | ||
1335 | cyc = rdtsc(); | ||
1336 | time_mult = pc->time_mult; | ||
1337 | time_shift = pc->time_shift; | ||
1338 | time_offset = pc->time_offset; | ||
1339 | } | ||
1340 | |||
1341 | idx = pc->index; | ||
1342 | count = pc->offset; | ||
1343 | if (idx) | ||
1344 | count += rdpmc(idx - 1); | ||
1345 | |||
1346 | barrier(); | ||
1347 | } while (pc->lock != seq); | ||
1348 | |||
1349 | if (enabled != running) { | ||
1350 | u64 quot, rem; | ||
1351 | |||
1352 | quot = (cyc >> time_shift); | ||
1353 | rem = cyc & ((1 << time_shift) - 1); | ||
1354 | delta = time_offset + quot * time_mult + | ||
1355 | ((rem * time_mult) >> time_shift); | ||
1356 | |||
1357 | enabled += delta; | ||
1358 | if (idx) | ||
1359 | running += delta; | ||
1360 | |||
1361 | quot = count / running; | ||
1362 | rem = count % running; | ||
1363 | count = quot * enabled + (rem * enabled) / running; | ||
1364 | } | ||
1365 | |||
1366 | return count; | ||
1367 | } | ||
1368 | |||
1369 | /* | ||
1370 | * If the RDPMC instruction faults then signal this back to the test parent task: | ||
1371 | */ | ||
1372 | static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) | ||
1373 | { | ||
1374 | exit(-1); | ||
1375 | } | ||
1376 | |||
1377 | static int __test__rdpmc(void) | ||
1378 | { | ||
1379 | long page_size = sysconf(_SC_PAGE_SIZE); | ||
1380 | volatile int tmp = 0; | ||
1381 | u64 i, loops = 1000; | ||
1382 | int n; | ||
1383 | int fd; | ||
1384 | void *addr; | ||
1385 | struct perf_event_attr attr = { | ||
1386 | .type = PERF_TYPE_HARDWARE, | ||
1387 | .config = PERF_COUNT_HW_INSTRUCTIONS, | ||
1388 | .exclude_kernel = 1, | ||
1389 | }; | ||
1390 | u64 delta_sum = 0; | ||
1391 | struct sigaction sa; | ||
1392 | |||
1393 | sigfillset(&sa.sa_mask); | ||
1394 | sa.sa_sigaction = segfault_handler; | ||
1395 | sigaction(SIGSEGV, &sa, NULL); | ||
1396 | |||
1397 | fprintf(stderr, "\n\n"); | ||
1398 | |||
1399 | fd = sys_perf_event_open(&attr, 0, -1, -1, 0); | ||
1400 | if (fd < 0) { | ||
1401 | die("Error: sys_perf_event_open() syscall returned " | ||
1402 | "with %d (%s)\n", fd, strerror(errno)); | ||
1403 | } | ||
1404 | |||
1405 | addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); | ||
1406 | if (addr == (void *)(-1)) { | ||
1407 | die("Error: mmap() syscall returned " | ||
1408 | "with (%s)\n", strerror(errno)); | ||
1409 | } | ||
1410 | |||
1411 | for (n = 0; n < 6; n++) { | ||
1412 | u64 stamp, now, delta; | ||
1413 | |||
1414 | stamp = mmap_read_self(addr); | ||
1415 | |||
1416 | for (i = 0; i < loops; i++) | ||
1417 | tmp++; | ||
1418 | |||
1419 | now = mmap_read_self(addr); | ||
1420 | loops *= 10; | ||
1421 | |||
1422 | delta = now - stamp; | ||
1423 | fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta); | ||
1424 | |||
1425 | delta_sum += delta; | ||
1426 | } | ||
1427 | |||
1428 | munmap(addr, page_size); | ||
1429 | close(fd); | ||
1430 | |||
1431 | fprintf(stderr, " "); | ||
1432 | |||
1433 | if (!delta_sum) | ||
1434 | return -1; | ||
1435 | |||
1436 | return 0; | ||
1437 | } | ||
1438 | |||
1439 | static int test__rdpmc(void) | ||
1440 | { | ||
1441 | int status = 0; | ||
1442 | int wret = 0; | ||
1443 | int ret; | ||
1444 | int pid; | ||
1445 | |||
1446 | pid = fork(); | ||
1447 | if (pid < 0) | ||
1448 | return -1; | ||
1449 | |||
1450 | if (!pid) { | ||
1451 | ret = __test__rdpmc(); | ||
1452 | |||
1453 | exit(ret); | ||
1454 | } | ||
1455 | |||
1456 | wret = waitpid(pid, &status, 0); | ||
1457 | if (wret < 0 || status) | ||
1458 | return -1; | ||
1459 | |||
1460 | return 0; | ||
1461 | } | ||
1462 | |||
1463 | #endif | ||
1464 | |||
1299 | static struct test { | 1465 | static struct test { |
1300 | const char *desc; | 1466 | const char *desc; |
1301 | int (*func)(void); | 1467 | int (*func)(void); |
@@ -1320,6 +1486,12 @@ static struct test { | |||
1320 | .desc = "parse events tests", | 1486 | .desc = "parse events tests", |
1321 | .func = test__parse_events, | 1487 | .func = test__parse_events, |
1322 | }, | 1488 | }, |
1489 | #if defined(__x86_64__) || defined(__i386__) | ||
1490 | { | ||
1491 | .desc = "x86 rdpmc test", | ||
1492 | .func = test__rdpmc, | ||
1493 | }, | ||
1494 | #endif | ||
1323 | { | 1495 | { |
1324 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", | 1496 | .desc = "Validate PERF_RECORD_* events & perf_sample fields", |
1325 | .func = test__PERF_RECORD, | 1497 | .func = test__PERF_RECORD, |
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) | |||
1412 | if (symbol__init() < 0) | 1584 | if (symbol__init() < 0) |
1413 | return -1; | 1585 | return -1; |
1414 | 1586 | ||
1415 | setup_pager(); | ||
1416 | |||
1417 | return __cmd_test(argc, argv); | 1587 | return __cmd_test(argc, argv); |
1418 | } | 1588 | } |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index dd162aa24ba..e3c63aef8ef 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -64,7 +64,6 @@ | |||
64 | #include <linux/unistd.h> | 64 | #include <linux/unistd.h> |
65 | #include <linux/types.h> | 65 | #include <linux/types.h> |
66 | 66 | ||
67 | |||
68 | void get_term_dimensions(struct winsize *ws) | 67 | void get_term_dimensions(struct winsize *ws) |
69 | { | 68 | { |
70 | char *s = getenv("LINES"); | 69 | char *s = getenv("LINES"); |
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg) | |||
544 | 543 | ||
545 | static void *display_thread_tui(void *arg) | 544 | static void *display_thread_tui(void *arg) |
546 | { | 545 | { |
546 | struct perf_evsel *pos; | ||
547 | struct perf_top *top = arg; | 547 | struct perf_top *top = arg; |
548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; | 548 | const char *help = "For a higher level overview, try: perf top --sort comm,dso"; |
549 | 549 | ||
550 | perf_top__sort_new_samples(top); | 550 | perf_top__sort_new_samples(top); |
551 | |||
552 | /* | ||
553 | * Initialize the uid_filter_str, in the future the TUI will allow | ||
554 | * Zooming in/out UIDs. For now juse use whatever the user passed | ||
555 | * via --uid. | ||
556 | */ | ||
557 | list_for_each_entry(pos, &top->evlist->entries, node) | ||
558 | pos->hists.uid_filter_str = top->uid_str; | ||
559 | |||
551 | perf_evlist__tui_browse_hists(top->evlist, help, | 560 | perf_evlist__tui_browse_hists(top->evlist, help, |
552 | perf_top__sort_new_samples, | 561 | perf_top__sort_new_samples, |
553 | top, top->delay_secs); | 562 | top, top->delay_secs); |
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool, | |||
668 | return; | 677 | return; |
669 | } | 678 | } |
670 | 679 | ||
680 | if (!machine) { | ||
681 | pr_err("%u unprocessable samples recorded.", | ||
682 | top->session->hists.stats.nr_unprocessable_samples++); | ||
683 | return; | ||
684 | } | ||
685 | |||
671 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) | 686 | if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) |
672 | top->exact_samples++; | 687 | top->exact_samples++; |
673 | 688 | ||
@@ -857,8 +872,11 @@ static void perf_top__start_counters(struct perf_top *top) | |||
857 | attr->mmap = 1; | 872 | attr->mmap = 1; |
858 | attr->comm = 1; | 873 | attr->comm = 1; |
859 | attr->inherit = top->inherit; | 874 | attr->inherit = top->inherit; |
875 | fallback_missing_features: | ||
876 | if (top->exclude_guest_missing) | ||
877 | attr->exclude_guest = attr->exclude_host = 0; | ||
860 | retry_sample_id: | 878 | retry_sample_id: |
861 | attr->sample_id_all = top->sample_id_all_avail ? 1 : 0; | 879 | attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; |
862 | try_again: | 880 | try_again: |
863 | if (perf_evsel__open(counter, top->evlist->cpus, | 881 | if (perf_evsel__open(counter, top->evlist->cpus, |
864 | top->evlist->threads, top->group, | 882 | top->evlist->threads, top->group, |
@@ -868,12 +886,20 @@ try_again: | |||
868 | if (err == EPERM || err == EACCES) { | 886 | if (err == EPERM || err == EACCES) { |
869 | ui__error_paranoid(); | 887 | ui__error_paranoid(); |
870 | goto out_err; | 888 | goto out_err; |
871 | } else if (err == EINVAL && top->sample_id_all_avail) { | 889 | } else if (err == EINVAL) { |
872 | /* | 890 | if (!top->exclude_guest_missing && |
873 | * Old kernel, no attr->sample_id_type_all field | 891 | (attr->exclude_guest || attr->exclude_host)) { |
874 | */ | 892 | pr_debug("Old kernel, cannot exclude " |
875 | top->sample_id_all_avail = false; | 893 | "guest or host samples.\n"); |
876 | goto retry_sample_id; | 894 | top->exclude_guest_missing = true; |
895 | goto fallback_missing_features; | ||
896 | } else if (!top->sample_id_all_missing) { | ||
897 | /* | ||
898 | * Old kernel, no attr->sample_id_type_all field | ||
899 | */ | ||
900 | top->sample_id_all_missing = true; | ||
901 | goto retry_sample_id; | ||
902 | } | ||
877 | } | 903 | } |
878 | /* | 904 | /* |
879 | * If it's cycles then fall back to hrtimer | 905 | * If it's cycles then fall back to hrtimer |
@@ -956,7 +982,7 @@ static int __cmd_top(struct perf_top *top) | |||
956 | if (ret) | 982 | if (ret) |
957 | goto out_delete; | 983 | goto out_delete; |
958 | 984 | ||
959 | if (top->target_tid != -1) | 985 | if (top->target_tid || top->uid != UINT_MAX) |
960 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, | 986 | perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, |
961 | perf_event__process, | 987 | perf_event__process, |
962 | &top->session->host_machine); | 988 | &top->session->host_machine); |
@@ -1094,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1094 | struct perf_top top = { | 1120 | struct perf_top top = { |
1095 | .count_filter = 5, | 1121 | .count_filter = 5, |
1096 | .delay_secs = 2, | 1122 | .delay_secs = 2, |
1097 | .target_pid = -1, | 1123 | .uid = UINT_MAX, |
1098 | .target_tid = -1, | ||
1099 | .freq = 1000, /* 1 KHz */ | 1124 | .freq = 1000, /* 1 KHz */ |
1100 | .sample_id_all_avail = true, | ||
1101 | .mmap_pages = 128, | 1125 | .mmap_pages = 128, |
1102 | .sym_pcnt_filter = 5, | 1126 | .sym_pcnt_filter = 5, |
1103 | }; | 1127 | }; |
@@ -1108,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1108 | parse_events_option), | 1132 | parse_events_option), |
1109 | OPT_INTEGER('c', "count", &top.default_interval, | 1133 | OPT_INTEGER('c', "count", &top.default_interval, |
1110 | "event period to sample"), | 1134 | "event period to sample"), |
1111 | OPT_INTEGER('p', "pid", &top.target_pid, | 1135 | OPT_STRING('p', "pid", &top.target_pid, "pid", |
1112 | "profile events on existing process id"), | 1136 | "profile events on existing process id"), |
1113 | OPT_INTEGER('t', "tid", &top.target_tid, | 1137 | OPT_STRING('t', "tid", &top.target_tid, "tid", |
1114 | "profile events on existing thread id"), | 1138 | "profile events on existing thread id"), |
1115 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, | 1139 | OPT_BOOLEAN('a', "all-cpus", &top.system_wide, |
1116 | "system-wide collection from all CPUs"), | 1140 | "system-wide collection from all CPUs"), |
@@ -1169,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1169 | "Display raw encoding of assembly instructions (default)"), | 1193 | "Display raw encoding of assembly instructions (default)"), |
1170 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", | 1194 | OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", |
1171 | "Specify disassembler style (e.g. -M intel for intel syntax)"), | 1195 | "Specify disassembler style (e.g. -M intel for intel syntax)"), |
1196 | OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), | ||
1172 | OPT_END() | 1197 | OPT_END() |
1173 | }; | 1198 | }; |
1174 | 1199 | ||
@@ -1194,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1194 | 1219 | ||
1195 | setup_browser(false); | 1220 | setup_browser(false); |
1196 | 1221 | ||
1222 | top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid); | ||
1223 | if (top.uid_str != NULL && top.uid == UINT_MAX - 1) | ||
1224 | goto out_delete_evlist; | ||
1225 | |||
1197 | /* CPU and PID are mutually exclusive */ | 1226 | /* CPU and PID are mutually exclusive */ |
1198 | if (top.target_tid > 0 && top.cpu_list) { | 1227 | if (top.target_tid && top.cpu_list) { |
1199 | printf("WARNING: PID switch overriding CPU\n"); | 1228 | printf("WARNING: PID switch overriding CPU\n"); |
1200 | sleep(1); | 1229 | sleep(1); |
1201 | top.cpu_list = NULL; | 1230 | top.cpu_list = NULL; |
1202 | } | 1231 | } |
1203 | 1232 | ||
1204 | if (top.target_pid != -1) | 1233 | if (top.target_pid) |
1205 | top.target_tid = top.target_pid; | 1234 | top.target_tid = top.target_pid; |
1206 | 1235 | ||
1207 | if (perf_evlist__create_maps(top.evlist, top.target_pid, | 1236 | if (perf_evlist__create_maps(top.evlist, top.target_pid, |
1208 | top.target_tid, top.cpu_list) < 0) | 1237 | top.target_tid, top.uid, top.cpu_list) < 0) |
1209 | usage_with_options(top_usage, options); | 1238 | usage_with_options(top_usage, options); |
1210 | 1239 | ||
1211 | if (!top.evlist->nr_entries && | 1240 | if (!top.evlist->nr_entries && |
@@ -1269,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) | |||
1269 | 1298 | ||
1270 | status = __cmd_top(&top); | 1299 | status = __cmd_top(&top); |
1271 | 1300 | ||
1301 | out_delete_evlist: | ||
1272 | perf_evlist__delete(top.evlist); | 1302 | perf_evlist__delete(top.evlist); |
1273 | 1303 | ||
1274 | return status; | 1304 | return status; |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 64f8bee31ce..f0227e93665 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -167,7 +167,6 @@ sys_perf_event_open(struct perf_event_attr *attr, | |||
167 | pid_t pid, int cpu, int group_fd, | 167 | pid_t pid, int cpu, int group_fd, |
168 | unsigned long flags) | 168 | unsigned long flags) |
169 | { | 169 | { |
170 | attr->size = sizeof(*attr); | ||
171 | return syscall(__NR_perf_event_open, attr, pid, cpu, | 170 | return syscall(__NR_perf_event_open, attr, pid, cpu, |
172 | group_fd, flags); | 171 | group_fd, flags); |
173 | } | 172 | } |
@@ -186,8 +185,9 @@ extern const char perf_version_string[]; | |||
186 | void pthread__unblock_sigwinch(void); | 185 | void pthread__unblock_sigwinch(void); |
187 | 186 | ||
188 | struct perf_record_opts { | 187 | struct perf_record_opts { |
189 | pid_t target_pid; | 188 | const char *target_pid; |
190 | pid_t target_tid; | 189 | const char *target_tid; |
190 | uid_t uid; | ||
191 | bool call_graph; | 191 | bool call_graph; |
192 | bool group; | 192 | bool group; |
193 | bool inherit_stat; | 193 | bool inherit_stat; |
@@ -198,7 +198,8 @@ struct perf_record_opts { | |||
198 | bool raw_samples; | 198 | bool raw_samples; |
199 | bool sample_address; | 199 | bool sample_address; |
200 | bool sample_time; | 200 | bool sample_time; |
201 | bool sample_id_all_avail; | 201 | bool sample_id_all_missing; |
202 | bool exclude_guest_missing; | ||
202 | bool system_wide; | 203 | bool system_wide; |
203 | bool period; | 204 | bool period; |
204 | unsigned int freq; | 205 | unsigned int freq; |
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index df638c438a9..b11cca58423 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py | |||
@@ -19,7 +19,7 @@ def main(): | |||
19 | cpus = perf.cpu_map() | 19 | cpus = perf.cpu_map() |
20 | threads = perf.thread_map() | 20 | threads = perf.thread_map() |
21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, | 21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, |
22 | wakeup_events = 1, sample_period = 1, | 22 | wakeup_events = 1, watermark = 1, |
23 | sample_id_all = 1, | 23 | sample_id_all = 1, |
24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) | 24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID) |
25 | evsel.open(cpus = cpus, threads = threads); | 25 | evsel.open(cpus = cpus, threads = threads); |
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c index 5e230acae1e..0a1adc1111f 100644 --- a/tools/perf/util/bitmap.c +++ b/tools/perf/util/bitmap.c | |||
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits) | |||
19 | 19 | ||
20 | return w; | 20 | return w; |
21 | } | 21 | } |
22 | |||
23 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
24 | const unsigned long *bitmap2, int bits) | ||
25 | { | ||
26 | int k; | ||
27 | int nr = BITS_TO_LONGS(bits); | ||
28 | |||
29 | for (k = 0; k < nr; k++) | ||
30 | dst[k] = bitmap1[k] | bitmap2[k]; | ||
31 | } | ||
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693a..adc72f09914 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c | |||
@@ -166,6 +166,17 @@ out: | |||
166 | return cpus; | 166 | return cpus; |
167 | } | 167 | } |
168 | 168 | ||
169 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) | ||
170 | { | ||
171 | int i; | ||
172 | size_t printed = fprintf(fp, "%d cpu%s: ", | ||
173 | map->nr, map->nr > 1 ? "s" : ""); | ||
174 | for (i = 0; i < map->nr; ++i) | ||
175 | printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); | ||
176 | |||
177 | return printed + fprintf(fp, "\n"); | ||
178 | } | ||
179 | |||
169 | struct cpu_map *cpu_map__dummy_new(void) | 180 | struct cpu_map *cpu_map__dummy_new(void) |
170 | { | 181 | { |
171 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); | 182 | struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); |
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a37479..c41518573c6 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef __PERF_CPUMAP_H | 1 | #ifndef __PERF_CPUMAP_H |
2 | #define __PERF_CPUMAP_H | 2 | #define __PERF_CPUMAP_H |
3 | 3 | ||
4 | #include <stdio.h> | ||
5 | |||
4 | struct cpu_map { | 6 | struct cpu_map { |
5 | int nr; | 7 | int nr; |
6 | int map[]; | 8 | int map[]; |
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); | |||
10 | struct cpu_map *cpu_map__dummy_new(void); | 12 | struct cpu_map *cpu_map__dummy_new(void); |
11 | void cpu_map__delete(struct cpu_map *map); | 13 | void cpu_map__delete(struct cpu_map *map); |
12 | 14 | ||
15 | size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); | ||
16 | |||
13 | #endif /* __PERF_CPUMAP_H */ | 17 | #endif /* __PERF_CPUMAP_H */ |
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 35073621e5d..aada3ac5e89 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * No surprises, and works with signed and unsigned chars. | 4 | * No surprises, and works with signed and unsigned chars. |
5 | */ | 5 | */ |
6 | #include "cache.h" | 6 | #include "util.h" |
7 | 7 | ||
8 | enum { | 8 | enum { |
9 | S = GIT_SPACE, | 9 | S = GIT_SPACE, |
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c index ffc35e748e8..dd8b19319c0 100644 --- a/tools/perf/util/debugfs.c +++ b/tools/perf/util/debugfs.c | |||
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = { | |||
15 | 0, | 15 | 0, |
16 | }; | 16 | }; |
17 | 17 | ||
18 | /* use this to force a umount */ | ||
19 | void debugfs_force_cleanup(void) | ||
20 | { | ||
21 | debugfs_find_mountpoint(); | ||
22 | debugfs_premounted = 0; | ||
23 | debugfs_umount(); | ||
24 | } | ||
25 | |||
26 | /* construct a full path to a debugfs element */ | ||
27 | int debugfs_make_path(const char *element, char *buffer, int size) | ||
28 | { | ||
29 | int len; | ||
30 | |||
31 | if (strlen(debugfs_mountpoint) == 0) { | ||
32 | buffer[0] = '\0'; | ||
33 | return -1; | ||
34 | } | ||
35 | |||
36 | len = strlen(debugfs_mountpoint) + strlen(element) + 1; | ||
37 | if (len >= size) | ||
38 | return len+1; | ||
39 | |||
40 | snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); | ||
41 | return 0; | ||
42 | } | ||
43 | |||
44 | static int debugfs_found; | 18 | static int debugfs_found; |
45 | 19 | ||
46 | /* find the path to the mounted debugfs */ | 20 | /* find the path to the mounted debugfs */ |
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs) | |||
97 | return 0; | 71 | return 0; |
98 | } | 72 | } |
99 | 73 | ||
100 | |||
101 | int debugfs_valid_entry(const char *path) | ||
102 | { | ||
103 | struct stat st; | ||
104 | |||
105 | if (stat(path, &st)) | ||
106 | return -errno; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static void debugfs_set_tracing_events_path(const char *mountpoint) | 74 | static void debugfs_set_tracing_events_path(const char *mountpoint) |
112 | { | 75 | { |
113 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", | 76 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", |
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint) | |||
149 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); | 112 | snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint); |
150 | debugfs_set_tracing_events_path(mountpoint); | 113 | debugfs_set_tracing_events_path(mountpoint); |
151 | } | 114 | } |
152 | |||
153 | /* umount the debugfs */ | ||
154 | |||
155 | int debugfs_umount(void) | ||
156 | { | ||
157 | char umountcmd[128]; | ||
158 | int ret; | ||
159 | |||
160 | /* if it was already mounted, leave it */ | ||
161 | if (debugfs_premounted) | ||
162 | return 0; | ||
163 | |||
164 | /* make sure it's a valid mount point */ | ||
165 | ret = debugfs_valid_mountpoint(debugfs_mountpoint); | ||
166 | if (ret) | ||
167 | return ret; | ||
168 | |||
169 | snprintf(umountcmd, sizeof(umountcmd), | ||
170 | "/bin/umount %s", debugfs_mountpoint); | ||
171 | return system(umountcmd); | ||
172 | } | ||
173 | |||
174 | int debugfs_write(const char *entry, const char *value) | ||
175 | { | ||
176 | char path[PATH_MAX + 1]; | ||
177 | int ret, count; | ||
178 | int fd; | ||
179 | |||
180 | /* construct the path */ | ||
181 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
182 | |||
183 | /* verify that it exists */ | ||
184 | ret = debugfs_valid_entry(path); | ||
185 | if (ret) | ||
186 | return ret; | ||
187 | |||
188 | /* get how many chars we're going to write */ | ||
189 | count = strlen(value); | ||
190 | |||
191 | /* open the debugfs entry */ | ||
192 | fd = open(path, O_RDWR); | ||
193 | if (fd < 0) | ||
194 | return -errno; | ||
195 | |||
196 | while (count > 0) { | ||
197 | /* write it */ | ||
198 | ret = write(fd, value, count); | ||
199 | if (ret <= 0) { | ||
200 | if (ret == EAGAIN) | ||
201 | continue; | ||
202 | close(fd); | ||
203 | return -errno; | ||
204 | } | ||
205 | count -= ret; | ||
206 | } | ||
207 | |||
208 | /* close it */ | ||
209 | close(fd); | ||
210 | |||
211 | /* return success */ | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * read a debugfs entry | ||
217 | * returns the number of chars read or a negative errno | ||
218 | */ | ||
219 | int debugfs_read(const char *entry, char *buffer, size_t size) | ||
220 | { | ||
221 | char path[PATH_MAX + 1]; | ||
222 | int ret; | ||
223 | int fd; | ||
224 | |||
225 | /* construct the path */ | ||
226 | snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); | ||
227 | |||
228 | /* verify that it exists */ | ||
229 | ret = debugfs_valid_entry(path); | ||
230 | if (ret) | ||
231 | return ret; | ||
232 | |||
233 | /* open the debugfs entry */ | ||
234 | fd = open(path, O_RDONLY); | ||
235 | if (fd < 0) | ||
236 | return -errno; | ||
237 | |||
238 | do { | ||
239 | /* read it */ | ||
240 | ret = read(fd, buffer, size); | ||
241 | if (ret == 0) { | ||
242 | close(fd); | ||
243 | return EOF; | ||
244 | } | ||
245 | } while (ret < 0 && errno == EAGAIN); | ||
246 | |||
247 | /* close it */ | ||
248 | close(fd); | ||
249 | |||
250 | /* make *sure* there's a null character at the end */ | ||
251 | buffer[ret] = '\0'; | ||
252 | |||
253 | /* return the number of chars read */ | ||
254 | return ret; | ||
255 | } | ||
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h index 4a878f735eb..68f3e87ec57 100644 --- a/tools/perf/util/debugfs.h +++ b/tools/perf/util/debugfs.h | |||
@@ -3,14 +3,8 @@ | |||
3 | 3 | ||
4 | const char *debugfs_find_mountpoint(void); | 4 | const char *debugfs_find_mountpoint(void); |
5 | int debugfs_valid_mountpoint(const char *debugfs); | 5 | int debugfs_valid_mountpoint(const char *debugfs); |
6 | int debugfs_valid_entry(const char *path); | ||
7 | char *debugfs_mount(const char *mountpoint); | 6 | char *debugfs_mount(const char *mountpoint); |
8 | int debugfs_umount(void); | ||
9 | void debugfs_set_path(const char *mountpoint); | 7 | void debugfs_set_path(const char *mountpoint); |
10 | int debugfs_write(const char *entry, const char *value); | ||
11 | int debugfs_read(const char *entry, char *buffer, size_t size); | ||
12 | void debugfs_force_cleanup(void); | ||
13 | int debugfs_make_path(const char *element, char *buffer, int size); | ||
14 | 8 | ||
15 | extern char debugfs_mountpoint[]; | 9 | extern char debugfs_mountpoint[]; |
16 | extern char tracing_events_path[]; | 10 | extern char tracing_events_path[]; |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ea32a061f1c..f8da9fada00 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) | |||
97 | ++evlist->nr_entries; | 97 | ++evlist->nr_entries; |
98 | } | 98 | } |
99 | 99 | ||
100 | static void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | 100 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, |
101 | struct list_head *list, | 101 | struct list_head *list, |
102 | int nr_entries) | 102 | int nr_entries) |
103 | { | 103 | { |
104 | list_splice_tail(list, &evlist->entries); | 104 | list_splice_tail(list, &evlist->entries); |
105 | evlist->nr_entries += nr_entries; | 105 | evlist->nr_entries += nr_entries; |
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, | |||
597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); | 597 | return perf_evlist__mmap_per_cpu(evlist, prot, mask); |
598 | } | 598 | } |
599 | 599 | ||
600 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 600 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
601 | pid_t target_tid, const char *cpu_list) | 601 | const char *target_tid, uid_t uid, const char *cpu_list) |
602 | { | 602 | { |
603 | evlist->threads = thread_map__new(target_pid, target_tid); | 603 | evlist->threads = thread_map__new_str(target_pid, target_tid, uid); |
604 | 604 | ||
605 | if (evlist->threads == NULL) | 605 | if (evlist->threads == NULL) |
606 | return -1; | 606 | return -1; |
607 | 607 | ||
608 | if (cpu_list == NULL && target_tid != -1) | 608 | if (uid != UINT_MAX || (cpu_list == NULL && target_tid)) |
609 | evlist->cpus = cpu_map__dummy_new(); | 609 | evlist->cpus = cpu_map__dummy_new(); |
610 | else | 610 | else |
611 | evlist->cpus = cpu_map__new(cpu_list); | 611 | evlist->cpus = cpu_map__new(cpu_list); |
@@ -824,7 +824,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, | |||
824 | exit(-1); | 824 | exit(-1); |
825 | } | 825 | } |
826 | 826 | ||
827 | if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1) | 827 | if (!opts->system_wide && !opts->target_tid && !opts->target_pid) |
828 | evlist->threads->map[0] = evlist->workload.pid; | 828 | evlist->threads->map[0] = evlist->workload.pid; |
829 | 829 | ||
830 | close(child_ready_pipe[1]); | 830 | close(child_ready_pipe[1]); |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8922aeed046..21f1c9e57f1 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, | |||
106 | evlist->threads = threads; | 106 | evlist->threads = threads; |
107 | } | 107 | } |
108 | 108 | ||
109 | int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, | 109 | int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid, |
110 | pid_t target_tid, const char *cpu_list); | 110 | const char *tid, uid_t uid, const char *cpu_list); |
111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); | 111 | void perf_evlist__delete_maps(struct perf_evlist *evlist); |
112 | int perf_evlist__set_filters(struct perf_evlist *evlist); | 112 | int perf_evlist__set_filters(struct perf_evlist *evlist); |
113 | 113 | ||
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); | |||
117 | 117 | ||
118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); | 118 | bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); |
119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); | 119 | bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); |
120 | |||
121 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | ||
122 | struct list_head *list, | ||
123 | int nr_entries); | ||
124 | |||
120 | #endif /* __PERF_EVLIST_H */ | 125 | #endif /* __PERF_EVLIST_H */ |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7132ee834e0..302d49a9f98 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
68 | struct perf_event_attr *attr = &evsel->attr; | 68 | struct perf_event_attr *attr = &evsel->attr; |
69 | int track = !evsel->idx; /* only the first counter needs these */ | 69 | int track = !evsel->idx; /* only the first counter needs these */ |
70 | 70 | ||
71 | attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; | 71 | attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; |
72 | attr->inherit = !opts->no_inherit; | 72 | attr->inherit = !opts->no_inherit; |
73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 73 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
74 | PERF_FORMAT_TOTAL_TIME_RUNNING | | 74 | PERF_FORMAT_TOTAL_TIME_RUNNING | |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
111 | if (opts->period) | 111 | if (opts->period) |
112 | attr->sample_type |= PERF_SAMPLE_PERIOD; | 112 | attr->sample_type |= PERF_SAMPLE_PERIOD; |
113 | 113 | ||
114 | if (opts->sample_id_all_avail && | 114 | if (!opts->sample_id_all_missing && |
115 | (opts->sample_time || opts->system_wide || | 115 | (opts->sample_time || opts->system_wide || |
116 | !opts->no_inherit || opts->cpu_list)) | 116 | !opts->no_inherit || opts->cpu_list)) |
117 | attr->sample_type |= PERF_SAMPLE_TIME; | 117 | attr->sample_type |= PERF_SAMPLE_TIME; |
@@ -130,7 +130,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) | |||
130 | attr->mmap = track; | 130 | attr->mmap = track; |
131 | attr->comm = track; | 131 | attr->comm = track; |
132 | 132 | ||
133 | if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { | 133 | if (!opts->target_pid && !opts->target_tid && !opts->system_wide) { |
134 | attr->disabled = 1; | 134 | attr->disabled = 1; |
135 | attr->enable_on_exec = 1; | 135 | attr->enable_on_exec = 1; |
136 | } | 136 | } |
@@ -536,7 +536,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, | |||
536 | } | 536 | } |
537 | 537 | ||
538 | if (type & PERF_SAMPLE_READ) { | 538 | if (type & PERF_SAMPLE_READ) { |
539 | fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); | 539 | fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); |
540 | return -1; | 540 | return -1; |
541 | } | 541 | } |
542 | 542 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ecd7f4dd7ee..9f867d96c6a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id) | |||
63 | return NULL; | 63 | return NULL; |
64 | } | 64 | } |
65 | 65 | ||
66 | static const char *__perf_magic = "PERFFILE"; | 66 | /* |
67 | * magic2 = "PERFILE2" | ||
68 | * must be a numerical value to let the endianness | ||
69 | * determine the memory layout. That way we are able | ||
70 | * to detect endianness when reading the perf.data file | ||
71 | * back. | ||
72 | * | ||
73 | * we check for legacy (PERFFILE) format. | ||
74 | */ | ||
75 | static const char *__perf_magic1 = "PERFFILE"; | ||
76 | static const u64 __perf_magic2 = 0x32454c4946524550ULL; | ||
77 | static const u64 __perf_magic2_sw = 0x50455246494c4532ULL; | ||
67 | 78 | ||
68 | #define PERF_MAGIC (*(u64 *)__perf_magic) | 79 | #define PERF_MAGIC __perf_magic2 |
69 | 80 | ||
70 | struct perf_file_attr { | 81 | struct perf_file_attr { |
71 | struct perf_event_attr attr; | 82 | struct perf_event_attr attr; |
@@ -1305,25 +1316,198 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) | |||
1305 | free(str); | 1316 | free(str); |
1306 | } | 1317 | } |
1307 | 1318 | ||
1319 | static int __event_process_build_id(struct build_id_event *bev, | ||
1320 | char *filename, | ||
1321 | struct perf_session *session) | ||
1322 | { | ||
1323 | int err = -1; | ||
1324 | struct list_head *head; | ||
1325 | struct machine *machine; | ||
1326 | u16 misc; | ||
1327 | struct dso *dso; | ||
1328 | enum dso_kernel_type dso_type; | ||
1329 | |||
1330 | machine = perf_session__findnew_machine(session, bev->pid); | ||
1331 | if (!machine) | ||
1332 | goto out; | ||
1333 | |||
1334 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
1335 | |||
1336 | switch (misc) { | ||
1337 | case PERF_RECORD_MISC_KERNEL: | ||
1338 | dso_type = DSO_TYPE_KERNEL; | ||
1339 | head = &machine->kernel_dsos; | ||
1340 | break; | ||
1341 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
1342 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
1343 | head = &machine->kernel_dsos; | ||
1344 | break; | ||
1345 | case PERF_RECORD_MISC_USER: | ||
1346 | case PERF_RECORD_MISC_GUEST_USER: | ||
1347 | dso_type = DSO_TYPE_USER; | ||
1348 | head = &machine->user_dsos; | ||
1349 | break; | ||
1350 | default: | ||
1351 | goto out; | ||
1352 | } | ||
1353 | |||
1354 | dso = __dsos__findnew(head, filename); | ||
1355 | if (dso != NULL) { | ||
1356 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
1357 | |||
1358 | dso__set_build_id(dso, &bev->build_id); | ||
1359 | |||
1360 | if (filename[0] == '[') | ||
1361 | dso->kernel = dso_type; | ||
1362 | |||
1363 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
1364 | sbuild_id); | ||
1365 | pr_debug("build id event received for %s: %s\n", | ||
1366 | dso->long_name, sbuild_id); | ||
1367 | } | ||
1368 | |||
1369 | err = 0; | ||
1370 | out: | ||
1371 | return err; | ||
1372 | } | ||
1373 | |||
1374 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
1375 | int input, u64 offset, u64 size) | ||
1376 | { | ||
1377 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1378 | struct { | ||
1379 | struct perf_event_header header; | ||
1380 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
1381 | char filename[0]; | ||
1382 | } old_bev; | ||
1383 | struct build_id_event bev; | ||
1384 | char filename[PATH_MAX]; | ||
1385 | u64 limit = offset + size; | ||
1386 | |||
1387 | while (offset < limit) { | ||
1388 | ssize_t len; | ||
1389 | |||
1390 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
1391 | return -1; | ||
1392 | |||
1393 | if (header->needs_swap) | ||
1394 | perf_event_header__bswap(&old_bev.header); | ||
1395 | |||
1396 | len = old_bev.header.size - sizeof(old_bev); | ||
1397 | if (read(input, filename, len) != len) | ||
1398 | return -1; | ||
1399 | |||
1400 | bev.header = old_bev.header; | ||
1401 | |||
1402 | /* | ||
1403 | * As the pid is the missing value, we need to fill | ||
1404 | * it properly. The header.misc value give us nice hint. | ||
1405 | */ | ||
1406 | bev.pid = HOST_KERNEL_ID; | ||
1407 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
1408 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
1409 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
1410 | |||
1411 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
1412 | __event_process_build_id(&bev, filename, session); | ||
1413 | |||
1414 | offset += bev.header.size; | ||
1415 | } | ||
1416 | |||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | static int perf_header__read_build_ids(struct perf_header *header, | ||
1421 | int input, u64 offset, u64 size) | ||
1422 | { | ||
1423 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1424 | struct build_id_event bev; | ||
1425 | char filename[PATH_MAX]; | ||
1426 | u64 limit = offset + size, orig_offset = offset; | ||
1427 | int err = -1; | ||
1428 | |||
1429 | while (offset < limit) { | ||
1430 | ssize_t len; | ||
1431 | |||
1432 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
1433 | goto out; | ||
1434 | |||
1435 | if (header->needs_swap) | ||
1436 | perf_event_header__bswap(&bev.header); | ||
1437 | |||
1438 | len = bev.header.size - sizeof(bev); | ||
1439 | if (read(input, filename, len) != len) | ||
1440 | goto out; | ||
1441 | /* | ||
1442 | * The a1645ce1 changeset: | ||
1443 | * | ||
1444 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
1445 | * | ||
1446 | * Added a field to struct build_id_event that broke the file | ||
1447 | * format. | ||
1448 | * | ||
1449 | * Since the kernel build-id is the first entry, process the | ||
1450 | * table using the old format if the well known | ||
1451 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
1452 | * first 4 characters chopped off (where the pid_t sits). | ||
1453 | */ | ||
1454 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
1455 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
1456 | return -1; | ||
1457 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
1458 | } | ||
1459 | |||
1460 | __event_process_build_id(&bev, filename, session); | ||
1461 | |||
1462 | offset += bev.header.size; | ||
1463 | } | ||
1464 | err = 0; | ||
1465 | out: | ||
1466 | return err; | ||
1467 | } | ||
1468 | |||
1469 | static int process_trace_info(struct perf_file_section *section __unused, | ||
1470 | struct perf_header *ph __unused, | ||
1471 | int feat __unused, int fd) | ||
1472 | { | ||
1473 | trace_report(fd, false); | ||
1474 | return 0; | ||
1475 | } | ||
1476 | |||
1477 | static int process_build_id(struct perf_file_section *section, | ||
1478 | struct perf_header *ph, | ||
1479 | int feat __unused, int fd) | ||
1480 | { | ||
1481 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
1482 | pr_debug("Failed to read buildids, continuing...\n"); | ||
1483 | return 0; | ||
1484 | } | ||
1485 | |||
1308 | struct feature_ops { | 1486 | struct feature_ops { |
1309 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); | 1487 | int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); |
1310 | void (*print)(struct perf_header *h, int fd, FILE *fp); | 1488 | void (*print)(struct perf_header *h, int fd, FILE *fp); |
1489 | int (*process)(struct perf_file_section *section, | ||
1490 | struct perf_header *h, int feat, int fd); | ||
1311 | const char *name; | 1491 | const char *name; |
1312 | bool full_only; | 1492 | bool full_only; |
1313 | }; | 1493 | }; |
1314 | 1494 | ||
1315 | #define FEAT_OPA(n, func) \ | 1495 | #define FEAT_OPA(n, func) \ |
1316 | [n] = { .name = #n, .write = write_##func, .print = print_##func } | 1496 | [n] = { .name = #n, .write = write_##func, .print = print_##func } |
1497 | #define FEAT_OPP(n, func) \ | ||
1498 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ | ||
1499 | .process = process_##func } | ||
1317 | #define FEAT_OPF(n, func) \ | 1500 | #define FEAT_OPF(n, func) \ |
1318 | [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true } | 1501 | [n] = { .name = #n, .write = write_##func, .print = print_##func, \ |
1502 | .full_only = true } | ||
1319 | 1503 | ||
1320 | /* feature_ops not implemented: */ | 1504 | /* feature_ops not implemented: */ |
1321 | #define print_trace_info NULL | 1505 | #define print_trace_info NULL |
1322 | #define print_build_id NULL | 1506 | #define print_build_id NULL |
1323 | 1507 | ||
1324 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { | 1508 | static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { |
1325 | FEAT_OPA(HEADER_TRACE_INFO, trace_info), | 1509 | FEAT_OPP(HEADER_TRACE_INFO, trace_info), |
1326 | FEAT_OPA(HEADER_BUILD_ID, build_id), | 1510 | FEAT_OPP(HEADER_BUILD_ID, build_id), |
1327 | FEAT_OPA(HEADER_HOSTNAME, hostname), | 1511 | FEAT_OPA(HEADER_HOSTNAME, hostname), |
1328 | FEAT_OPA(HEADER_OSRELEASE, osrelease), | 1512 | FEAT_OPA(HEADER_OSRELEASE, osrelease), |
1329 | FEAT_OPA(HEADER_VERSION, version), | 1513 | FEAT_OPA(HEADER_VERSION, version), |
@@ -1620,24 +1804,59 @@ out_free: | |||
1620 | return err; | 1804 | return err; |
1621 | } | 1805 | } |
1622 | 1806 | ||
1807 | static int check_magic_endian(u64 *magic, struct perf_file_header *header, | ||
1808 | struct perf_header *ph) | ||
1809 | { | ||
1810 | int ret; | ||
1811 | |||
1812 | /* check for legacy format */ | ||
1813 | ret = memcmp(magic, __perf_magic1, sizeof(*magic)); | ||
1814 | if (ret == 0) { | ||
1815 | pr_debug("legacy perf.data format\n"); | ||
1816 | if (!header) | ||
1817 | return -1; | ||
1818 | |||
1819 | if (header->attr_size != sizeof(struct perf_file_attr)) { | ||
1820 | u64 attr_size = bswap_64(header->attr_size); | ||
1821 | |||
1822 | if (attr_size != sizeof(struct perf_file_attr)) | ||
1823 | return -1; | ||
1824 | |||
1825 | ph->needs_swap = true; | ||
1826 | } | ||
1827 | return 0; | ||
1828 | } | ||
1829 | |||
1830 | /* check magic number with same endianness */ | ||
1831 | if (*magic == __perf_magic2) | ||
1832 | return 0; | ||
1833 | |||
1834 | /* check magic number but opposite endianness */ | ||
1835 | if (*magic != __perf_magic2_sw) | ||
1836 | return -1; | ||
1837 | |||
1838 | ph->needs_swap = true; | ||
1839 | |||
1840 | return 0; | ||
1841 | } | ||
1842 | |||
1623 | int perf_file_header__read(struct perf_file_header *header, | 1843 | int perf_file_header__read(struct perf_file_header *header, |
1624 | struct perf_header *ph, int fd) | 1844 | struct perf_header *ph, int fd) |
1625 | { | 1845 | { |
1846 | int ret; | ||
1847 | |||
1626 | lseek(fd, 0, SEEK_SET); | 1848 | lseek(fd, 0, SEEK_SET); |
1627 | 1849 | ||
1628 | if (readn(fd, header, sizeof(*header)) <= 0 || | 1850 | ret = readn(fd, header, sizeof(*header)); |
1629 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | 1851 | if (ret <= 0) |
1630 | return -1; | 1852 | return -1; |
1631 | 1853 | ||
1632 | if (header->attr_size != sizeof(struct perf_file_attr)) { | 1854 | if (check_magic_endian(&header->magic, header, ph) < 0) |
1633 | u64 attr_size = bswap_64(header->attr_size); | 1855 | return -1; |
1634 | |||
1635 | if (attr_size != sizeof(struct perf_file_attr)) | ||
1636 | return -1; | ||
1637 | 1856 | ||
1857 | if (ph->needs_swap) { | ||
1638 | mem_bswap_64(header, offsetof(struct perf_file_header, | 1858 | mem_bswap_64(header, offsetof(struct perf_file_header, |
1639 | adds_features)); | 1859 | adds_features)); |
1640 | ph->needs_swap = true; | ||
1641 | } | 1860 | } |
1642 | 1861 | ||
1643 | if (header->size != sizeof(*header)) { | 1862 | if (header->size != sizeof(*header)) { |
@@ -1689,156 +1908,6 @@ int perf_file_header__read(struct perf_file_header *header, | |||
1689 | return 0; | 1908 | return 0; |
1690 | } | 1909 | } |
1691 | 1910 | ||
1692 | static int __event_process_build_id(struct build_id_event *bev, | ||
1693 | char *filename, | ||
1694 | struct perf_session *session) | ||
1695 | { | ||
1696 | int err = -1; | ||
1697 | struct list_head *head; | ||
1698 | struct machine *machine; | ||
1699 | u16 misc; | ||
1700 | struct dso *dso; | ||
1701 | enum dso_kernel_type dso_type; | ||
1702 | |||
1703 | machine = perf_session__findnew_machine(session, bev->pid); | ||
1704 | if (!machine) | ||
1705 | goto out; | ||
1706 | |||
1707 | misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; | ||
1708 | |||
1709 | switch (misc) { | ||
1710 | case PERF_RECORD_MISC_KERNEL: | ||
1711 | dso_type = DSO_TYPE_KERNEL; | ||
1712 | head = &machine->kernel_dsos; | ||
1713 | break; | ||
1714 | case PERF_RECORD_MISC_GUEST_KERNEL: | ||
1715 | dso_type = DSO_TYPE_GUEST_KERNEL; | ||
1716 | head = &machine->kernel_dsos; | ||
1717 | break; | ||
1718 | case PERF_RECORD_MISC_USER: | ||
1719 | case PERF_RECORD_MISC_GUEST_USER: | ||
1720 | dso_type = DSO_TYPE_USER; | ||
1721 | head = &machine->user_dsos; | ||
1722 | break; | ||
1723 | default: | ||
1724 | goto out; | ||
1725 | } | ||
1726 | |||
1727 | dso = __dsos__findnew(head, filename); | ||
1728 | if (dso != NULL) { | ||
1729 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | ||
1730 | |||
1731 | dso__set_build_id(dso, &bev->build_id); | ||
1732 | |||
1733 | if (filename[0] == '[') | ||
1734 | dso->kernel = dso_type; | ||
1735 | |||
1736 | build_id__sprintf(dso->build_id, sizeof(dso->build_id), | ||
1737 | sbuild_id); | ||
1738 | pr_debug("build id event received for %s: %s\n", | ||
1739 | dso->long_name, sbuild_id); | ||
1740 | } | ||
1741 | |||
1742 | err = 0; | ||
1743 | out: | ||
1744 | return err; | ||
1745 | } | ||
1746 | |||
1747 | static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, | ||
1748 | int input, u64 offset, u64 size) | ||
1749 | { | ||
1750 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1751 | struct { | ||
1752 | struct perf_event_header header; | ||
1753 | u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; | ||
1754 | char filename[0]; | ||
1755 | } old_bev; | ||
1756 | struct build_id_event bev; | ||
1757 | char filename[PATH_MAX]; | ||
1758 | u64 limit = offset + size; | ||
1759 | |||
1760 | while (offset < limit) { | ||
1761 | ssize_t len; | ||
1762 | |||
1763 | if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) | ||
1764 | return -1; | ||
1765 | |||
1766 | if (header->needs_swap) | ||
1767 | perf_event_header__bswap(&old_bev.header); | ||
1768 | |||
1769 | len = old_bev.header.size - sizeof(old_bev); | ||
1770 | if (read(input, filename, len) != len) | ||
1771 | return -1; | ||
1772 | |||
1773 | bev.header = old_bev.header; | ||
1774 | |||
1775 | /* | ||
1776 | * As the pid is the missing value, we need to fill | ||
1777 | * it properly. The header.misc value give us nice hint. | ||
1778 | */ | ||
1779 | bev.pid = HOST_KERNEL_ID; | ||
1780 | if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || | ||
1781 | bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) | ||
1782 | bev.pid = DEFAULT_GUEST_KERNEL_ID; | ||
1783 | |||
1784 | memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); | ||
1785 | __event_process_build_id(&bev, filename, session); | ||
1786 | |||
1787 | offset += bev.header.size; | ||
1788 | } | ||
1789 | |||
1790 | return 0; | ||
1791 | } | ||
1792 | |||
1793 | static int perf_header__read_build_ids(struct perf_header *header, | ||
1794 | int input, u64 offset, u64 size) | ||
1795 | { | ||
1796 | struct perf_session *session = container_of(header, struct perf_session, header); | ||
1797 | struct build_id_event bev; | ||
1798 | char filename[PATH_MAX]; | ||
1799 | u64 limit = offset + size, orig_offset = offset; | ||
1800 | int err = -1; | ||
1801 | |||
1802 | while (offset < limit) { | ||
1803 | ssize_t len; | ||
1804 | |||
1805 | if (read(input, &bev, sizeof(bev)) != sizeof(bev)) | ||
1806 | goto out; | ||
1807 | |||
1808 | if (header->needs_swap) | ||
1809 | perf_event_header__bswap(&bev.header); | ||
1810 | |||
1811 | len = bev.header.size - sizeof(bev); | ||
1812 | if (read(input, filename, len) != len) | ||
1813 | goto out; | ||
1814 | /* | ||
1815 | * The a1645ce1 changeset: | ||
1816 | * | ||
1817 | * "perf: 'perf kvm' tool for monitoring guest performance from host" | ||
1818 | * | ||
1819 | * Added a field to struct build_id_event that broke the file | ||
1820 | * format. | ||
1821 | * | ||
1822 | * Since the kernel build-id is the first entry, process the | ||
1823 | * table using the old format if the well known | ||
1824 | * '[kernel.kallsyms]' string for the kernel build-id has the | ||
1825 | * first 4 characters chopped off (where the pid_t sits). | ||
1826 | */ | ||
1827 | if (memcmp(filename, "nel.kallsyms]", 13) == 0) { | ||
1828 | if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1) | ||
1829 | return -1; | ||
1830 | return perf_header__read_build_ids_abi_quirk(header, input, offset, size); | ||
1831 | } | ||
1832 | |||
1833 | __event_process_build_id(&bev, filename, session); | ||
1834 | |||
1835 | offset += bev.header.size; | ||
1836 | } | ||
1837 | err = 0; | ||
1838 | out: | ||
1839 | return err; | ||
1840 | } | ||
1841 | |||
1842 | static int perf_file_section__process(struct perf_file_section *section, | 1911 | static int perf_file_section__process(struct perf_file_section *section, |
1843 | struct perf_header *ph, | 1912 | struct perf_header *ph, |
1844 | int feat, int fd, void *data __used) | 1913 | int feat, int fd, void *data __used) |
@@ -1854,27 +1923,23 @@ static int perf_file_section__process(struct perf_file_section *section, | |||
1854 | return 0; | 1923 | return 0; |
1855 | } | 1924 | } |
1856 | 1925 | ||
1857 | switch (feat) { | 1926 | if (!feat_ops[feat].process) |
1858 | case HEADER_TRACE_INFO: | 1927 | return 0; |
1859 | trace_report(fd, false); | ||
1860 | break; | ||
1861 | case HEADER_BUILD_ID: | ||
1862 | if (perf_header__read_build_ids(ph, fd, section->offset, section->size)) | ||
1863 | pr_debug("Failed to read buildids, continuing...\n"); | ||
1864 | break; | ||
1865 | default: | ||
1866 | break; | ||
1867 | } | ||
1868 | 1928 | ||
1869 | return 0; | 1929 | return feat_ops[feat].process(section, ph, feat, fd); |
1870 | } | 1930 | } |
1871 | 1931 | ||
1872 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, | 1932 | static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, |
1873 | struct perf_header *ph, int fd, | 1933 | struct perf_header *ph, int fd, |
1874 | bool repipe) | 1934 | bool repipe) |
1875 | { | 1935 | { |
1876 | if (readn(fd, header, sizeof(*header)) <= 0 || | 1936 | int ret; |
1877 | memcmp(&header->magic, __perf_magic, sizeof(header->magic))) | 1937 | |
1938 | ret = readn(fd, header, sizeof(*header)); | ||
1939 | if (ret <= 0) | ||
1940 | return -1; | ||
1941 | |||
1942 | if (check_magic_endian(&header->magic, NULL, ph) < 0) | ||
1878 | return -1; | 1943 | return -1; |
1879 | 1944 | ||
1880 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) | 1945 | if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) |
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ac4ec956024..e68f617d082 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | enum { | 12 | enum { |
13 | HEADER_RESERVED = 0, /* always cleared */ | 13 | HEADER_RESERVED = 0, /* always cleared */ |
14 | HEADER_FIRST_FEATURE = 1, | ||
14 | HEADER_TRACE_INFO = 1, | 15 | HEADER_TRACE_INFO = 1, |
15 | HEADER_BUILD_ID, | 16 | HEADER_BUILD_ID, |
16 | 17 | ||
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f55f0a8d1f8..48e5acd1e86 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -32,6 +32,7 @@ struct events_stats { | |||
32 | u32 nr_unknown_events; | 32 | u32 nr_unknown_events; |
33 | u32 nr_invalid_chains; | 33 | u32 nr_invalid_chains; |
34 | u32 nr_unknown_id; | 34 | u32 nr_unknown_id; |
35 | u32 nr_unprocessable_samples; | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | enum hist_column { | 38 | enum hist_column { |
@@ -55,6 +56,7 @@ struct hists { | |||
55 | u64 nr_entries; | 56 | u64 nr_entries; |
56 | const struct thread *thread_filter; | 57 | const struct thread *thread_filter; |
57 | const struct dso *dso_filter; | 58 | const struct dso *dso_filter; |
59 | const char *uid_filter_str; | ||
58 | pthread_mutex_t lock; | 60 | pthread_mutex_t lock; |
59 | struct events_stats stats; | 61 | struct events_stats stats; |
60 | u64 event_stream; | 62 | u64 event_stream; |
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837..afe38199e92 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h | |||
@@ -2,10 +2,12 @@ | |||
2 | #ifndef PERF_DWARF2_H | 2 | #ifndef PERF_DWARF2_H |
3 | #define PERF_DWARF2_H | 3 | #define PERF_DWARF2_H |
4 | 4 | ||
5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ | 5 | /* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ |
6 | 6 | ||
7 | #define CFI_STARTPROC | 7 | #define CFI_STARTPROC |
8 | #define CFI_ENDPROC | 8 | #define CFI_ENDPROC |
9 | #define CFI_REMEMBER_STATE | ||
10 | #define CFI_RESTORE_STATE | ||
9 | 11 | ||
10 | #endif /* PERF_DWARF2_H */ | 12 | #endif /* PERF_DWARF2_H */ |
11 | 13 | ||
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index eda4416efa0..bb162e40c76 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #include <linux/bitops.h> | 5 | #include <linux/bitops.h> |
6 | 6 | ||
7 | int __bitmap_weight(const unsigned long *bitmap, int bits); | 7 | int __bitmap_weight(const unsigned long *bitmap, int bits); |
8 | void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, | ||
9 | const unsigned long *bitmap2, int bits); | ||
8 | 10 | ||
9 | #define BITMAP_LAST_WORD_MASK(nbits) \ | 11 | #define BITMAP_LAST_WORD_MASK(nbits) \ |
10 | ( \ | 12 | ( \ |
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) | |||
32 | return __bitmap_weight(src, nbits); | 34 | return __bitmap_weight(src, nbits); |
33 | } | 35 | } |
34 | 36 | ||
37 | static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, | ||
38 | const unsigned long *src2, int nbits) | ||
39 | { | ||
40 | if (small_const_nbits(nbits)) | ||
41 | *dst = *src1 | *src2; | ||
42 | else | ||
43 | __bitmap_or(dst, src1, src2, nbits); | ||
44 | } | ||
45 | |||
35 | #endif /* _PERF_BITOPS_H */ | 46 | #endif /* _PERF_BITOPS_H */ |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 316aa0ab712..dea6d1c1a95 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp) | |||
212 | self->start, self->end, self->pgoff, self->dso->name); | 212 | self->start, self->end, self->pgoff, self->dso->name); |
213 | } | 213 | } |
214 | 214 | ||
215 | size_t map__fprintf_dsoname(struct map *map, FILE *fp) | ||
216 | { | ||
217 | const char *dsoname; | ||
218 | |||
219 | if (map && map->dso && (map->dso->name || map->dso->long_name)) { | ||
220 | if (symbol_conf.show_kernel_path && map->dso->long_name) | ||
221 | dsoname = map->dso->long_name; | ||
222 | else if (map->dso->name) | ||
223 | dsoname = map->dso->name; | ||
224 | } else | ||
225 | dsoname = "[unknown]"; | ||
226 | |||
227 | return fprintf(fp, "%s", dsoname); | ||
228 | } | ||
229 | |||
215 | /* | 230 | /* |
216 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. | 231 | * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN. |
217 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. | 232 | * map->dso->adjust_symbols==1 for ET_EXEC-like cases. |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 2b8017f8a93..b100c20b7f9 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
@@ -118,6 +118,7 @@ void map__delete(struct map *self); | |||
118 | struct map *map__clone(struct map *self); | 118 | struct map *map__clone(struct map *self); |
119 | int map__overlap(struct map *l, struct map *r); | 119 | int map__overlap(struct map *l, struct map *r); |
120 | size_t map__fprintf(struct map *self, FILE *fp); | 120 | size_t map__fprintf(struct map *self, FILE *fp); |
121 | size_t map__fprintf_dsoname(struct map *map, FILE *fp); | ||
121 | 122 | ||
122 | int map__load(struct map *self, symbol_filter_t filter); | 123 | int map__load(struct map *self, symbol_filter_t filter); |
123 | struct symbol *map__find_symbol(struct map *self, | 124 | struct symbol *map__find_symbol(struct map *self, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e33554a562b..15f9bb1b5f0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -273,10 +273,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, | |||
273 | /* Try to find perf_probe_event with debuginfo */ | 273 | /* Try to find perf_probe_event with debuginfo */ |
274 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | 274 | static int try_to_find_probe_trace_events(struct perf_probe_event *pev, |
275 | struct probe_trace_event **tevs, | 275 | struct probe_trace_event **tevs, |
276 | int max_tevs, const char *module) | 276 | int max_tevs, const char *target) |
277 | { | 277 | { |
278 | bool need_dwarf = perf_probe_event_need_dwarf(pev); | 278 | bool need_dwarf = perf_probe_event_need_dwarf(pev); |
279 | struct debuginfo *dinfo = open_debuginfo(module); | 279 | struct debuginfo *dinfo = open_debuginfo(target); |
280 | int ntevs, ret = 0; | 280 | int ntevs, ret = 0; |
281 | 281 | ||
282 | if (!dinfo) { | 282 | if (!dinfo) { |
@@ -295,9 +295,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
295 | 295 | ||
296 | if (ntevs > 0) { /* Succeeded to find trace events */ | 296 | if (ntevs > 0) { /* Succeeded to find trace events */ |
297 | pr_debug("find %d probe_trace_events.\n", ntevs); | 297 | pr_debug("find %d probe_trace_events.\n", ntevs); |
298 | if (module) | 298 | if (target) |
299 | ret = add_module_to_probe_trace_events(*tevs, ntevs, | 299 | ret = add_module_to_probe_trace_events(*tevs, ntevs, |
300 | module); | 300 | target); |
301 | return ret < 0 ? ret : ntevs; | 301 | return ret < 0 ? ret : ntevs; |
302 | } | 302 | } |
303 | 303 | ||
@@ -1729,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1729 | } | 1729 | } |
1730 | 1730 | ||
1731 | ret = 0; | 1731 | ret = 0; |
1732 | printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); | 1732 | printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); |
1733 | for (i = 0; i < ntevs; i++) { | 1733 | for (i = 0; i < ntevs; i++) { |
1734 | tev = &tevs[i]; | 1734 | tev = &tevs[i]; |
1735 | if (pev->event) | 1735 | if (pev->event) |
@@ -1784,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1784 | 1784 | ||
1785 | if (ret >= 0) { | 1785 | if (ret >= 0) { |
1786 | /* Show how to use the event. */ | 1786 | /* Show how to use the event. */ |
1787 | printf("\nYou can now use it on all perf tools, such as:\n\n"); | 1787 | printf("\nYou can now use it in all perf tools, such as:\n\n"); |
1788 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, | 1788 | printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, |
1789 | tev->event); | 1789 | tev->event); |
1790 | } | 1790 | } |
@@ -1796,14 +1796,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
1796 | 1796 | ||
1797 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, | 1797 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, |
1798 | struct probe_trace_event **tevs, | 1798 | struct probe_trace_event **tevs, |
1799 | int max_tevs, const char *module) | 1799 | int max_tevs, const char *target) |
1800 | { | 1800 | { |
1801 | struct symbol *sym; | 1801 | struct symbol *sym; |
1802 | int ret = 0, i; | 1802 | int ret = 0, i; |
1803 | struct probe_trace_event *tev; | 1803 | struct probe_trace_event *tev; |
1804 | 1804 | ||
1805 | /* Convert perf_probe_event with debuginfo */ | 1805 | /* Convert perf_probe_event with debuginfo */ |
1806 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module); | 1806 | ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); |
1807 | if (ret != 0) | 1807 | if (ret != 0) |
1808 | return ret; /* Found in debuginfo or got an error */ | 1808 | return ret; /* Found in debuginfo or got an error */ |
1809 | 1809 | ||
@@ -1819,8 +1819,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, | |||
1819 | goto error; | 1819 | goto error; |
1820 | } | 1820 | } |
1821 | 1821 | ||
1822 | if (module) { | 1822 | if (target) { |
1823 | tev->point.module = strdup(module); | 1823 | tev->point.module = strdup(target); |
1824 | if (tev->point.module == NULL) { | 1824 | if (tev->point.module == NULL) { |
1825 | ret = -ENOMEM; | 1825 | ret = -ENOMEM; |
1826 | goto error; | 1826 | goto error; |
@@ -1890,7 +1890,7 @@ struct __event_package { | |||
1890 | }; | 1890 | }; |
1891 | 1891 | ||
1892 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | 1892 | int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, |
1893 | int max_tevs, const char *module, bool force_add) | 1893 | int max_tevs, const char *target, bool force_add) |
1894 | { | 1894 | { |
1895 | int i, j, ret; | 1895 | int i, j, ret; |
1896 | struct __event_package *pkgs; | 1896 | struct __event_package *pkgs; |
@@ -1913,7 +1913,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, | |||
1913 | ret = convert_to_probe_trace_events(pkgs[i].pev, | 1913 | ret = convert_to_probe_trace_events(pkgs[i].pev, |
1914 | &pkgs[i].tevs, | 1914 | &pkgs[i].tevs, |
1915 | max_tevs, | 1915 | max_tevs, |
1916 | module); | 1916 | target); |
1917 | if (ret < 0) | 1917 | if (ret < 0) |
1918 | goto end; | 1918 | goto end; |
1919 | pkgs[i].ntevs = ret; | 1919 | pkgs[i].ntevs = ret; |
@@ -1965,7 +1965,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) | |||
1965 | goto error; | 1965 | goto error; |
1966 | } | 1966 | } |
1967 | 1967 | ||
1968 | printf("Remove event: %s\n", ent->s); | 1968 | printf("Removed event: %s\n", ent->s); |
1969 | return 0; | 1969 | return 0; |
1970 | error: | 1970 | error: |
1971 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); | 1971 | pr_warning("Failed to delete event: %s\n", strerror(-ret)); |
@@ -2069,7 +2069,7 @@ static int filter_available_functions(struct map *map __unused, | |||
2069 | return 1; | 2069 | return 1; |
2070 | } | 2070 | } |
2071 | 2071 | ||
2072 | int show_available_funcs(const char *module, struct strfilter *_filter) | 2072 | int show_available_funcs(const char *target, struct strfilter *_filter) |
2073 | { | 2073 | { |
2074 | struct map *map; | 2074 | struct map *map; |
2075 | int ret; | 2075 | int ret; |
@@ -2080,9 +2080,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter) | |||
2080 | if (ret < 0) | 2080 | if (ret < 0) |
2081 | return ret; | 2081 | return ret; |
2082 | 2082 | ||
2083 | map = kernel_get_module_map(module); | 2083 | map = kernel_get_module_map(target); |
2084 | if (!map) { | 2084 | if (!map) { |
2085 | pr_err("Failed to find %s map.\n", (module) ? : "kernel"); | 2085 | pr_err("Failed to find %s map.\n", (target) ? : "kernel"); |
2086 | return -EINVAL; | 2086 | return -EINVAL; |
2087 | } | 2087 | } |
2088 | available_func_filter = _filter; | 2088 | available_func_filter = _filter; |
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 74bd2e63c4b..2cc162d3b78 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <stdlib.h> | 30 | #include <stdlib.h> |
31 | #include <string.h> | 31 | #include <string.h> |
32 | #include <stdarg.h> | 32 | #include <stdarg.h> |
33 | #include <ctype.h> | ||
34 | #include <dwarf-regs.h> | 33 | #include <dwarf-regs.h> |
35 | 34 | ||
36 | #include <linux/bitops.h> | 35 | #include <linux/bitops.h> |
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources new file mode 100644 index 00000000000..2884e67ee62 --- /dev/null +++ b/tools/perf/util/python-ext-sources | |||
@@ -0,0 +1,19 @@ | |||
1 | # | ||
2 | # List of files needed by perf python extention | ||
3 | # | ||
4 | # Each source file must be placed on its own line so that it can be | ||
5 | # processed by Makefile and util/setup.py accordingly. | ||
6 | # | ||
7 | |||
8 | util/python.c | ||
9 | util/ctype.c | ||
10 | util/evlist.c | ||
11 | util/evsel.c | ||
12 | util/cpumap.c | ||
13 | util/thread_map.c | ||
14 | util/util.c | ||
15 | util/xyarray.c | ||
16 | util/cgroup.c | ||
17 | util/debugfs.c | ||
18 | util/strlist.c | ||
19 | ../../lib/rbtree.c | ||
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9dd47a4f259..e03b58a4842 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c | |||
@@ -425,14 +425,14 @@ struct pyrf_thread_map { | |||
425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, | 425 | static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, |
426 | PyObject *args, PyObject *kwargs) | 426 | PyObject *args, PyObject *kwargs) |
427 | { | 427 | { |
428 | static char *kwlist[] = { "pid", "tid", NULL }; | 428 | static char *kwlist[] = { "pid", "tid", "uid", NULL }; |
429 | int pid = -1, tid = -1; | 429 | int pid = -1, tid = -1, uid = UINT_MAX; |
430 | 430 | ||
431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", | 431 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", |
432 | kwlist, &pid, &tid)) | 432 | kwlist, &pid, &tid, &uid)) |
433 | return -1; | 433 | return -1; |
434 | 434 | ||
435 | pthreads->threads = thread_map__new(pid, tid); | 435 | pthreads->threads = thread_map__new(pid, tid, uid); |
436 | if (pthreads->threads == NULL) | 436 | if (pthreads->threads == NULL) |
437 | return -1; | 437 | return -1; |
438 | return 0; | 438 | return 0; |
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0b2a4878317..c2623c6f9b5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <stdio.h> | 24 | #include <stdio.h> |
25 | #include <stdlib.h> | 25 | #include <stdlib.h> |
26 | #include <string.h> | 26 | #include <string.h> |
27 | #include <ctype.h> | ||
28 | #include <errno.h> | 27 | #include <errno.h> |
29 | 28 | ||
30 | #include "../../perf.h" | 29 | #include "../../perf.h" |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b5ca2558c7b..9f833cf9c6a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -796,6 +796,10 @@ static int perf_session_deliver_event(struct perf_session *session, | |||
796 | ++session->hists.stats.nr_unknown_id; | 796 | ++session->hists.stats.nr_unknown_id; |
797 | return -1; | 797 | return -1; |
798 | } | 798 | } |
799 | if (machine == NULL) { | ||
800 | ++session->hists.stats.nr_unprocessable_samples; | ||
801 | return -1; | ||
802 | } | ||
799 | return tool->sample(tool, event, sample, evsel, machine); | 803 | return tool->sample(tool, event, sample, evsel, machine); |
800 | case PERF_RECORD_MMAP: | 804 | case PERF_RECORD_MMAP: |
801 | return tool->mmap(tool, event, sample, machine); | 805 | return tool->mmap(tool, event, sample, machine); |
@@ -964,6 +968,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session, | |||
964 | session->hists.stats.nr_invalid_chains, | 968 | session->hists.stats.nr_invalid_chains, |
965 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); | 969 | session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); |
966 | } | 970 | } |
971 | |||
972 | if (session->hists.stats.nr_unprocessable_samples != 0) { | ||
973 | ui__warning("%u unprocessable samples recorded.\n" | ||
974 | "Do you have a KVM guest running and not using 'perf kvm'?\n", | ||
975 | session->hists.stats.nr_unprocessable_samples); | ||
976 | } | ||
967 | } | 977 | } |
968 | 978 | ||
969 | #define session_done() (*(volatile int *)(&session_done)) | 979 | #define session_done() (*(volatile int *)(&session_done)) |
@@ -1293,10 +1303,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
1293 | 1303 | ||
1294 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 1304 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
1295 | struct machine *machine, struct perf_evsel *evsel, | 1305 | struct machine *machine, struct perf_evsel *evsel, |
1296 | int print_sym, int print_dso) | 1306 | int print_sym, int print_dso, int print_symoffset) |
1297 | { | 1307 | { |
1298 | struct addr_location al; | 1308 | struct addr_location al; |
1299 | const char *symname, *dsoname; | ||
1300 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; | 1309 | struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; |
1301 | struct callchain_cursor_node *node; | 1310 | struct callchain_cursor_node *node; |
1302 | 1311 | ||
@@ -1324,20 +1333,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1324 | 1333 | ||
1325 | printf("\t%16" PRIx64, node->ip); | 1334 | printf("\t%16" PRIx64, node->ip); |
1326 | if (print_sym) { | 1335 | if (print_sym) { |
1327 | if (node->sym && node->sym->name) | 1336 | printf(" "); |
1328 | symname = node->sym->name; | 1337 | symbol__fprintf_symname(node->sym, stdout); |
1329 | else | ||
1330 | symname = ""; | ||
1331 | |||
1332 | printf(" %s", symname); | ||
1333 | } | 1338 | } |
1334 | if (print_dso) { | 1339 | if (print_dso) { |
1335 | if (node->map && node->map->dso && node->map->dso->name) | 1340 | printf(" ("); |
1336 | dsoname = node->map->dso->name; | 1341 | map__fprintf_dsoname(al.map, stdout); |
1337 | else | 1342 | printf(")"); |
1338 | dsoname = ""; | ||
1339 | |||
1340 | printf(" (%s)", dsoname); | ||
1341 | } | 1343 | } |
1342 | printf("\n"); | 1344 | printf("\n"); |
1343 | 1345 | ||
@@ -1347,21 +1349,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | |||
1347 | } else { | 1349 | } else { |
1348 | printf("%16" PRIx64, sample->ip); | 1350 | printf("%16" PRIx64, sample->ip); |
1349 | if (print_sym) { | 1351 | if (print_sym) { |
1350 | if (al.sym && al.sym->name) | 1352 | printf(" "); |
1351 | symname = al.sym->name; | 1353 | if (print_symoffset) |
1354 | symbol__fprintf_symname_offs(al.sym, &al, | ||
1355 | stdout); | ||
1352 | else | 1356 | else |
1353 | symname = ""; | 1357 | symbol__fprintf_symname(al.sym, stdout); |
1354 | |||
1355 | printf(" %s", symname); | ||
1356 | } | 1358 | } |
1357 | 1359 | ||
1358 | if (print_dso) { | 1360 | if (print_dso) { |
1359 | if (al.map && al.map->dso && al.map->dso->name) | 1361 | printf(" ("); |
1360 | dsoname = al.map->dso->name; | 1362 | map__fprintf_dsoname(al.map, stdout); |
1361 | else | 1363 | printf(")"); |
1362 | dsoname = ""; | ||
1363 | |||
1364 | printf(" (%s)", dsoname); | ||
1365 | } | 1364 | } |
1366 | } | 1365 | } |
1367 | } | 1366 | } |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 37bc38381fb..c8d90178e7d 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -147,7 +147,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, | |||
147 | 147 | ||
148 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, | 148 | void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, |
149 | struct machine *machine, struct perf_evsel *evsel, | 149 | struct machine *machine, struct perf_evsel *evsel, |
150 | int print_sym, int print_dso); | 150 | int print_sym, int print_dso, int print_symoffset); |
151 | 151 | ||
152 | int perf_session__cpu_bitmap(struct perf_session *session, | 152 | int perf_session__cpu_bitmap(struct perf_session *session, |
153 | const char *cpu_list, unsigned long *cpu_bitmap); | 153 | const char *cpu_list, unsigned long *cpu_bitmap); |
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 36d4c561957..d0f9f29cf18 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py | |||
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split() | |||
24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') | 24 | build_lib = getenv('PYTHON_EXTBUILD_LIB') |
25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') | 25 | build_tmp = getenv('PYTHON_EXTBUILD_TMP') |
26 | 26 | ||
27 | ext_sources = [f.strip() for f in file('util/python-ext-sources') | ||
28 | if len(f.strip()) > 0 and f[0] != '#'] | ||
29 | |||
27 | perf = Extension('perf', | 30 | perf = Extension('perf', |
28 | sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c', | 31 | sources = ext_sources, |
29 | 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c', | ||
30 | 'util/util.c', 'util/xyarray.c', 'util/cgroup.c', | ||
31 | 'util/debugfs.c'], | ||
32 | include_dirs = ['util/include'], | 32 | include_dirs = ['util/include'], |
33 | extra_compile_args = cflags, | 33 | extra_compile_args = cflags, |
34 | ) | 34 | ) |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0975438c3e7..5dd83c3e2c0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <ctype.h> | ||
2 | #include <dirent.h> | 1 | #include <dirent.h> |
3 | #include <errno.h> | 2 | #include <errno.h> |
4 | #include <libgen.h> | 3 | #include <libgen.h> |
@@ -12,6 +11,7 @@ | |||
12 | #include <unistd.h> | 11 | #include <unistd.h> |
13 | #include <inttypes.h> | 12 | #include <inttypes.h> |
14 | #include "build-id.h" | 13 | #include "build-id.h" |
14 | #include "util.h" | ||
15 | #include "debug.h" | 15 | #include "debug.h" |
16 | #include "symbol.h" | 16 | #include "symbol.h" |
17 | #include "strlist.h" | 17 | #include "strlist.h" |
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp) | |||
263 | sym->name); | 263 | sym->name); |
264 | } | 264 | } |
265 | 265 | ||
266 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
267 | const struct addr_location *al, FILE *fp) | ||
268 | { | ||
269 | unsigned long offset; | ||
270 | size_t length; | ||
271 | |||
272 | if (sym && sym->name) { | ||
273 | length = fprintf(fp, "%s", sym->name); | ||
274 | if (al) { | ||
275 | offset = al->addr - sym->start; | ||
276 | length += fprintf(fp, "+0x%lx", offset); | ||
277 | } | ||
278 | return length; | ||
279 | } else | ||
280 | return fprintf(fp, "[unknown]"); | ||
281 | } | ||
282 | |||
283 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) | ||
284 | { | ||
285 | return symbol__fprintf_symname_offs(sym, NULL, fp); | ||
286 | } | ||
287 | |||
266 | void dso__set_long_name(struct dso *dso, char *name) | 288 | void dso__set_long_name(struct dso *dso, char *name) |
267 | { | 289 | { |
268 | if (name == NULL) | 290 | if (name == NULL) |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 123c2e14353..2a683d4fc91 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -70,6 +70,7 @@ struct symbol_conf { | |||
70 | unsigned short priv_size; | 70 | unsigned short priv_size; |
71 | unsigned short nr_events; | 71 | unsigned short nr_events; |
72 | bool try_vmlinux_path, | 72 | bool try_vmlinux_path, |
73 | show_kernel_path, | ||
73 | use_modules, | 74 | use_modules, |
74 | sort_by_name, | 75 | sort_by_name, |
75 | show_nr_samples, | 76 | show_nr_samples, |
@@ -241,6 +242,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); | |||
241 | 242 | ||
242 | int symbol__init(void); | 243 | int symbol__init(void); |
243 | void symbol__exit(void); | 244 | void symbol__exit(void); |
245 | size_t symbol__fprintf_symname_offs(const struct symbol *sym, | ||
246 | const struct addr_location *al, FILE *fp); | ||
247 | size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); | ||
244 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); | 248 | bool symbol_type__is_a(char symbol_type, enum map_type map_type); |
245 | 249 | ||
246 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); | 250 | size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); |
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c new file mode 100644 index 00000000000..48c6902e749 --- /dev/null +++ b/tools/perf/util/sysfs.c | |||
@@ -0,0 +1,60 @@ | |||
1 | |||
2 | #include "util.h" | ||
3 | #include "sysfs.h" | ||
4 | |||
5 | static const char * const sysfs_known_mountpoints[] = { | ||
6 | "/sys", | ||
7 | 0, | ||
8 | }; | ||
9 | |||
10 | static int sysfs_found; | ||
11 | char sysfs_mountpoint[PATH_MAX]; | ||
12 | |||
13 | static int sysfs_valid_mountpoint(const char *sysfs) | ||
14 | { | ||
15 | struct statfs st_fs; | ||
16 | |||
17 | if (statfs(sysfs, &st_fs) < 0) | ||
18 | return -ENOENT; | ||
19 | else if (st_fs.f_type != (long) SYSFS_MAGIC) | ||
20 | return -ENOENT; | ||
21 | |||
22 | return 0; | ||
23 | } | ||
24 | |||
25 | const char *sysfs_find_mountpoint(void) | ||
26 | { | ||
27 | const char * const *ptr; | ||
28 | char type[100]; | ||
29 | FILE *fp; | ||
30 | |||
31 | if (sysfs_found) | ||
32 | return (const char *) sysfs_mountpoint; | ||
33 | |||
34 | ptr = sysfs_known_mountpoints; | ||
35 | while (*ptr) { | ||
36 | if (sysfs_valid_mountpoint(*ptr) == 0) { | ||
37 | sysfs_found = 1; | ||
38 | strcpy(sysfs_mountpoint, *ptr); | ||
39 | return sysfs_mountpoint; | ||
40 | } | ||
41 | ptr++; | ||
42 | } | ||
43 | |||
44 | /* give up and parse /proc/mounts */ | ||
45 | fp = fopen("/proc/mounts", "r"); | ||
46 | if (fp == NULL) | ||
47 | return NULL; | ||
48 | |||
49 | while (!sysfs_found && | ||
50 | fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", | ||
51 | sysfs_mountpoint, type) == 2) { | ||
52 | |||
53 | if (strcmp(type, "sysfs") == 0) | ||
54 | sysfs_found = 1; | ||
55 | } | ||
56 | |||
57 | fclose(fp); | ||
58 | |||
59 | return sysfs_found ? sysfs_mountpoint : NULL; | ||
60 | } | ||
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h new file mode 100644 index 00000000000..a813b720393 --- /dev/null +++ b/tools/perf/util/sysfs.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef __SYSFS_H__ | ||
2 | #define __SYSFS_H__ | ||
3 | |||
4 | const char *sysfs_find_mountpoint(void); | ||
5 | |||
6 | #endif /* __DEBUGFS_H__ */ | ||
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c..e15983cf077 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c | |||
@@ -1,6 +1,13 @@ | |||
1 | #include <dirent.h> | 1 | #include <dirent.h> |
2 | #include <limits.h> | ||
3 | #include <stdbool.h> | ||
2 | #include <stdlib.h> | 4 | #include <stdlib.h> |
3 | #include <stdio.h> | 5 | #include <stdio.h> |
6 | #include <sys/types.h> | ||
7 | #include <sys/stat.h> | ||
8 | #include <unistd.h> | ||
9 | #include "strlist.h" | ||
10 | #include <string.h> | ||
4 | #include "thread_map.h" | 11 | #include "thread_map.h" |
5 | 12 | ||
6 | /* Skip "." and ".." directories */ | 13 | /* Skip "." and ".." directories */ |
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) | |||
23 | sprintf(name, "/proc/%d/task", pid); | 30 | sprintf(name, "/proc/%d/task", pid); |
24 | items = scandir(name, &namelist, filter, NULL); | 31 | items = scandir(name, &namelist, filter, NULL); |
25 | if (items <= 0) | 32 | if (items <= 0) |
26 | return NULL; | 33 | return NULL; |
27 | 34 | ||
28 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); | 35 | threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); |
29 | if (threads != NULL) { | 36 | if (threads != NULL) { |
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) | |||
51 | return threads; | 58 | return threads; |
52 | } | 59 | } |
53 | 60 | ||
54 | struct thread_map *thread_map__new(pid_t pid, pid_t tid) | 61 | struct thread_map *thread_map__new_by_uid(uid_t uid) |
62 | { | ||
63 | DIR *proc; | ||
64 | int max_threads = 32, items, i; | ||
65 | char path[256]; | ||
66 | struct dirent dirent, *next, **namelist = NULL; | ||
67 | struct thread_map *threads = malloc(sizeof(*threads) + | ||
68 | max_threads * sizeof(pid_t)); | ||
69 | if (threads == NULL) | ||
70 | goto out; | ||
71 | |||
72 | proc = opendir("/proc"); | ||
73 | if (proc == NULL) | ||
74 | goto out_free_threads; | ||
75 | |||
76 | threads->nr = 0; | ||
77 | |||
78 | while (!readdir_r(proc, &dirent, &next) && next) { | ||
79 | char *end; | ||
80 | bool grow = false; | ||
81 | struct stat st; | ||
82 | pid_t pid = strtol(dirent.d_name, &end, 10); | ||
83 | |||
84 | if (*end) /* only interested in proper numerical dirents */ | ||
85 | continue; | ||
86 | |||
87 | snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); | ||
88 | |||
89 | if (stat(path, &st) != 0) | ||
90 | continue; | ||
91 | |||
92 | if (st.st_uid != uid) | ||
93 | continue; | ||
94 | |||
95 | snprintf(path, sizeof(path), "/proc/%d/task", pid); | ||
96 | items = scandir(path, &namelist, filter, NULL); | ||
97 | if (items <= 0) | ||
98 | goto out_free_closedir; | ||
99 | |||
100 | while (threads->nr + items >= max_threads) { | ||
101 | max_threads *= 2; | ||
102 | grow = true; | ||
103 | } | ||
104 | |||
105 | if (grow) { | ||
106 | struct thread_map *tmp; | ||
107 | |||
108 | tmp = realloc(threads, (sizeof(*threads) + | ||
109 | max_threads * sizeof(pid_t))); | ||
110 | if (tmp == NULL) | ||
111 | goto out_free_namelist; | ||
112 | |||
113 | threads = tmp; | ||
114 | } | ||
115 | |||
116 | for (i = 0; i < items; i++) | ||
117 | threads->map[threads->nr + i] = atoi(namelist[i]->d_name); | ||
118 | |||
119 | for (i = 0; i < items; i++) | ||
120 | free(namelist[i]); | ||
121 | free(namelist); | ||
122 | |||
123 | threads->nr += items; | ||
124 | } | ||
125 | |||
126 | out_closedir: | ||
127 | closedir(proc); | ||
128 | out: | ||
129 | return threads; | ||
130 | |||
131 | out_free_threads: | ||
132 | free(threads); | ||
133 | return NULL; | ||
134 | |||
135 | out_free_namelist: | ||
136 | for (i = 0; i < items; i++) | ||
137 | free(namelist[i]); | ||
138 | free(namelist); | ||
139 | |||
140 | out_free_closedir: | ||
141 | free(threads); | ||
142 | threads = NULL; | ||
143 | goto out_closedir; | ||
144 | } | ||
145 | |||
146 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) | ||
55 | { | 147 | { |
56 | if (pid != -1) | 148 | if (pid != -1) |
57 | return thread_map__new_by_pid(pid); | 149 | return thread_map__new_by_pid(pid); |
150 | |||
151 | if (tid == -1 && uid != UINT_MAX) | ||
152 | return thread_map__new_by_uid(uid); | ||
153 | |||
58 | return thread_map__new_by_tid(tid); | 154 | return thread_map__new_by_tid(tid); |
59 | } | 155 | } |
60 | 156 | ||
157 | static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) | ||
158 | { | ||
159 | struct thread_map *threads = NULL, *nt; | ||
160 | char name[256]; | ||
161 | int items, total_tasks = 0; | ||
162 | struct dirent **namelist = NULL; | ||
163 | int i, j = 0; | ||
164 | pid_t pid, prev_pid = INT_MAX; | ||
165 | char *end_ptr; | ||
166 | struct str_node *pos; | ||
167 | struct strlist *slist = strlist__new(false, pid_str); | ||
168 | |||
169 | if (!slist) | ||
170 | return NULL; | ||
171 | |||
172 | strlist__for_each(pos, slist) { | ||
173 | pid = strtol(pos->s, &end_ptr, 10); | ||
174 | |||
175 | if (pid == INT_MIN || pid == INT_MAX || | ||
176 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
177 | goto out_free_threads; | ||
178 | |||
179 | if (pid == prev_pid) | ||
180 | continue; | ||
181 | |||
182 | sprintf(name, "/proc/%d/task", pid); | ||
183 | items = scandir(name, &namelist, filter, NULL); | ||
184 | if (items <= 0) | ||
185 | goto out_free_threads; | ||
186 | |||
187 | total_tasks += items; | ||
188 | nt = realloc(threads, (sizeof(*threads) + | ||
189 | sizeof(pid_t) * total_tasks)); | ||
190 | if (nt == NULL) | ||
191 | goto out_free_threads; | ||
192 | |||
193 | threads = nt; | ||
194 | |||
195 | if (threads) { | ||
196 | for (i = 0; i < items; i++) | ||
197 | threads->map[j++] = atoi(namelist[i]->d_name); | ||
198 | threads->nr = total_tasks; | ||
199 | } | ||
200 | |||
201 | for (i = 0; i < items; i++) | ||
202 | free(namelist[i]); | ||
203 | free(namelist); | ||
204 | |||
205 | if (!threads) | ||
206 | break; | ||
207 | } | ||
208 | |||
209 | out: | ||
210 | strlist__delete(slist); | ||
211 | return threads; | ||
212 | |||
213 | out_free_threads: | ||
214 | free(threads); | ||
215 | threads = NULL; | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) | ||
220 | { | ||
221 | struct thread_map *threads = NULL, *nt; | ||
222 | int ntasks = 0; | ||
223 | pid_t tid, prev_tid = INT_MAX; | ||
224 | char *end_ptr; | ||
225 | struct str_node *pos; | ||
226 | struct strlist *slist; | ||
227 | |||
228 | /* perf-stat expects threads to be generated even if tid not given */ | ||
229 | if (!tid_str) { | ||
230 | threads = malloc(sizeof(*threads) + sizeof(pid_t)); | ||
231 | if (threads != NULL) { | ||
232 | threads->map[1] = -1; | ||
233 | threads->nr = 1; | ||
234 | } | ||
235 | return threads; | ||
236 | } | ||
237 | |||
238 | slist = strlist__new(false, tid_str); | ||
239 | if (!slist) | ||
240 | return NULL; | ||
241 | |||
242 | strlist__for_each(pos, slist) { | ||
243 | tid = strtol(pos->s, &end_ptr, 10); | ||
244 | |||
245 | if (tid == INT_MIN || tid == INT_MAX || | ||
246 | (*end_ptr != '\0' && *end_ptr != ',')) | ||
247 | goto out_free_threads; | ||
248 | |||
249 | if (tid == prev_tid) | ||
250 | continue; | ||
251 | |||
252 | ntasks++; | ||
253 | nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); | ||
254 | |||
255 | if (nt == NULL) | ||
256 | goto out_free_threads; | ||
257 | |||
258 | threads = nt; | ||
259 | threads->map[ntasks - 1] = tid; | ||
260 | threads->nr = ntasks; | ||
261 | } | ||
262 | out: | ||
263 | return threads; | ||
264 | |||
265 | out_free_threads: | ||
266 | free(threads); | ||
267 | threads = NULL; | ||
268 | goto out; | ||
269 | } | ||
270 | |||
271 | struct thread_map *thread_map__new_str(const char *pid, const char *tid, | ||
272 | uid_t uid) | ||
273 | { | ||
274 | if (pid) | ||
275 | return thread_map__new_by_pid_str(pid); | ||
276 | |||
277 | if (!tid && uid != UINT_MAX) | ||
278 | return thread_map__new_by_uid(uid); | ||
279 | |||
280 | return thread_map__new_by_tid_str(tid); | ||
281 | } | ||
282 | |||
61 | void thread_map__delete(struct thread_map *threads) | 283 | void thread_map__delete(struct thread_map *threads) |
62 | { | 284 | { |
63 | free(threads); | 285 | free(threads); |
64 | } | 286 | } |
287 | |||
288 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) | ||
289 | { | ||
290 | int i; | ||
291 | size_t printed = fprintf(fp, "%d thread%s: ", | ||
292 | threads->nr, threads->nr > 1 ? "s" : ""); | ||
293 | for (i = 0; i < threads->nr; ++i) | ||
294 | printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); | ||
295 | |||
296 | return printed + fprintf(fp, "\n"); | ||
297 | } | ||
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb90731140..7da80f14418 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __PERF_THREAD_MAP_H | 2 | #define __PERF_THREAD_MAP_H |
3 | 3 | ||
4 | #include <sys/types.h> | 4 | #include <sys/types.h> |
5 | #include <stdio.h> | ||
5 | 6 | ||
6 | struct thread_map { | 7 | struct thread_map { |
7 | int nr; | 8 | int nr; |
@@ -10,6 +11,14 @@ struct thread_map { | |||
10 | 11 | ||
11 | struct thread_map *thread_map__new_by_pid(pid_t pid); | 12 | struct thread_map *thread_map__new_by_pid(pid_t pid); |
12 | struct thread_map *thread_map__new_by_tid(pid_t tid); | 13 | struct thread_map *thread_map__new_by_tid(pid_t tid); |
13 | struct thread_map *thread_map__new(pid_t pid, pid_t tid); | 14 | struct thread_map *thread_map__new_by_uid(uid_t uid); |
15 | struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); | ||
16 | |||
17 | struct thread_map *thread_map__new_str(const char *pid, | ||
18 | const char *tid, uid_t uid); | ||
19 | |||
14 | void thread_map__delete(struct thread_map *threads); | 20 | void thread_map__delete(struct thread_map *threads); |
21 | |||
22 | size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); | ||
23 | |||
15 | #endif /* __PERF_THREAD_MAP_H */ | 24 | #endif /* __PERF_THREAD_MAP_H */ |
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 500471dffa4..09fe579ccaf 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c | |||
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
69 | 69 | ||
70 | ret += SNPRINTF(bf + ret, size - ret, "], "); | 70 | ret += SNPRINTF(bf + ret, size - ret, "], "); |
71 | 71 | ||
72 | if (top->target_pid != -1) | 72 | if (top->target_pid) |
73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d", | 73 | ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", |
74 | top->target_pid); | 74 | top->target_pid); |
75 | else if (top->target_tid != -1) | 75 | else if (top->target_tid) |
76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", | 76 | ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", |
77 | top->target_tid); | 77 | top->target_tid); |
78 | else if (top->uid_str != NULL) | ||
79 | ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", | ||
80 | top->uid_str); | ||
78 | else | 81 | else |
79 | ret += SNPRINTF(bf + ret, size - ret, " (all"); | 82 | ret += SNPRINTF(bf + ret, size - ret, " (all"); |
80 | 83 | ||
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) | |||
82 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", | 85 | ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", |
83 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); | 86 | top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list); |
84 | else { | 87 | else { |
85 | if (top->target_tid != -1) | 88 | if (top->target_tid) |
86 | ret += SNPRINTF(bf + ret, size - ret, ")"); | 89 | ret += SNPRINTF(bf + ret, size - ret, ")"); |
87 | else | 90 | else |
88 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", | 91 | ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", |
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a248f3c2c60..ce61cb2d1ac 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h | |||
@@ -23,7 +23,8 @@ struct perf_top { | |||
23 | u64 guest_us_samples, guest_kernel_samples; | 23 | u64 guest_us_samples, guest_kernel_samples; |
24 | int print_entries, count_filter, delay_secs; | 24 | int print_entries, count_filter, delay_secs; |
25 | int freq; | 25 | int freq; |
26 | pid_t target_pid, target_tid; | 26 | const char *target_pid, *target_tid; |
27 | uid_t uid; | ||
27 | bool hide_kernel_symbols, hide_user_symbols, zero; | 28 | bool hide_kernel_symbols, hide_user_symbols, zero; |
28 | bool system_wide; | 29 | bool system_wide; |
29 | bool use_tui, use_stdio; | 30 | bool use_tui, use_stdio; |
@@ -33,7 +34,8 @@ struct perf_top { | |||
33 | bool vmlinux_warned; | 34 | bool vmlinux_warned; |
34 | bool inherit; | 35 | bool inherit; |
35 | bool group; | 36 | bool group; |
36 | bool sample_id_all_avail; | 37 | bool sample_id_all_missing; |
38 | bool exclude_guest_missing; | ||
37 | bool dump_symtab; | 39 | bool dump_symtab; |
38 | const char *cpu_list; | 40 | const char *cpu_list; |
39 | struct hist_entry *sym_filter_entry; | 41 | struct hist_entry *sym_filter_entry; |
@@ -45,6 +47,7 @@ struct perf_top { | |||
45 | int realtime_prio; | 47 | int realtime_prio; |
46 | int sym_pcnt_filter; | 48 | int sym_pcnt_filter; |
47 | const char *sym_filter; | 49 | const char *sym_filter; |
50 | const char *uid_str; | ||
48 | }; | 51 | }; |
49 | 52 | ||
50 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); | 53 | size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1a8d4dc4f38..e0a4f652f28 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <stdio.h> | 25 | #include <stdio.h> |
26 | #include <stdlib.h> | 26 | #include <stdlib.h> |
27 | #include <string.h> | 27 | #include <string.h> |
28 | #include <ctype.h> | ||
29 | #include <errno.h> | 28 | #include <errno.h> |
30 | 29 | ||
31 | #include "../perf.h" | 30 | #include "../perf.h" |
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f55cc3a765a..b9592e0de8d 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <pthread.h> | 33 | #include <pthread.h> |
34 | #include <fcntl.h> | 34 | #include <fcntl.h> |
35 | #include <unistd.h> | 35 | #include <unistd.h> |
36 | #include <ctype.h> | ||
37 | #include <errno.h> | 36 | #include <errno.h> |
38 | 37 | ||
39 | #include "../perf.h" | 38 | #include "../perf.h" |
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index a3fdf55f317..18ae6c1831d 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <stdio.h> | 22 | #include <stdio.h> |
23 | #include <stdlib.h> | 23 | #include <stdlib.h> |
24 | #include <string.h> | 24 | #include <string.h> |
25 | #include <ctype.h> | ||
26 | #include <errno.h> | 25 | #include <errno.h> |
27 | 26 | ||
28 | #include "../perf.h" | 27 | #include "../perf.h" |
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index e81aef1f256..bfba0490c09 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c | |||
@@ -839,6 +839,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, | |||
839 | nr_events = convert_unit(nr_events, &unit); | 839 | nr_events = convert_unit(nr_events, &unit); |
840 | printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); | 840 | printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); |
841 | 841 | ||
842 | if (self->uid_filter_str) | ||
843 | printed += snprintf(bf + printed, size - printed, | ||
844 | ", UID: %s", self->uid_filter_str); | ||
842 | if (thread) | 845 | if (thread) |
843 | printed += snprintf(bf + printed, size - printed, | 846 | printed += snprintf(bf + printed, size - printed, |
844 | ", Thread: %s(%d)", | 847 | ", Thread: %s(%d)", |
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 6905bcc8be2..eca6575abfd 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c | |||
@@ -3,9 +3,9 @@ | |||
3 | #include <newt.h> | 3 | #include <newt.h> |
4 | #include <inttypes.h> | 4 | #include <inttypes.h> |
5 | #include <sys/ttydefaults.h> | 5 | #include <sys/ttydefaults.h> |
6 | #include <ctype.h> | ||
7 | #include <string.h> | 6 | #include <string.h> |
8 | #include <linux/bitops.h> | 7 | #include <linux/bitops.h> |
8 | #include "../../util.h" | ||
9 | #include "../../debug.h" | 9 | #include "../../debug.h" |
10 | #include "../../symbol.h" | 10 | #include "../../symbol.h" |
11 | #include "../browser.h" | 11 | #include "../browser.h" |
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d76d1c0ff98..52bb07c6442 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (C) Linus Torvalds, 2005 | 7 | * Copyright (C) Linus Torvalds, 2005 |
8 | */ | 8 | */ |
9 | #include "util.h" | 9 | #include "util.h" |
10 | #include "debug.h" | ||
10 | 11 | ||
11 | static void report(const char *prefix, const char *err, va_list params) | 12 | static void report(const char *prefix, const char *err, va_list params) |
12 | { | 13 | { |
@@ -81,3 +82,41 @@ void warning(const char *warn, ...) | |||
81 | warn_routine(warn, params); | 82 | warn_routine(warn, params); |
82 | va_end(params); | 83 | va_end(params); |
83 | } | 84 | } |
85 | |||
86 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid) | ||
87 | { | ||
88 | struct passwd pwd, *result; | ||
89 | char buf[1024]; | ||
90 | |||
91 | if (str == NULL) | ||
92 | return UINT_MAX; | ||
93 | |||
94 | /* UID and PID are mutually exclusive */ | ||
95 | if (tid || pid) { | ||
96 | ui__warning("PID/TID switch overriding UID\n"); | ||
97 | sleep(1); | ||
98 | return UINT_MAX; | ||
99 | } | ||
100 | |||
101 | getpwnam_r(str, &pwd, buf, sizeof(buf), &result); | ||
102 | |||
103 | if (result == NULL) { | ||
104 | char *endptr; | ||
105 | int uid = strtol(str, &endptr, 10); | ||
106 | |||
107 | if (*endptr != '\0') { | ||
108 | ui__error("Invalid user %s\n", str); | ||
109 | return UINT_MAX - 1; | ||
110 | } | ||
111 | |||
112 | getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); | ||
113 | |||
114 | if (result == NULL) { | ||
115 | ui__error("Problems obtaining information for user %s\n", | ||
116 | str); | ||
117 | return UINT_MAX - 1; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | return result->pw_uid; | ||
122 | } | ||
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 813141047fc..8109a907841 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * XXX We need to find a better place for these things... | 6 | * XXX We need to find a better place for these things... |
7 | */ | 7 | */ |
8 | bool perf_host = true; | 8 | bool perf_host = true; |
9 | bool perf_guest = true; | 9 | bool perf_guest = false; |
10 | 10 | ||
11 | void event_attr_init(struct perf_event_attr *attr) | 11 | void event_attr_init(struct perf_event_attr *attr) |
12 | { | 12 | { |
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr) | |||
14 | attr->exclude_host = 1; | 14 | attr->exclude_host = 1; |
15 | if (!perf_guest) | 15 | if (!perf_guest) |
16 | attr->exclude_guest = 1; | 16 | attr->exclude_guest = 1; |
17 | /* to capture ABI version */ | ||
18 | attr->size = sizeof(*attr); | ||
17 | } | 19 | } |
18 | 20 | ||
19 | int mkdir_p(char *path, mode_t mode) | 21 | int mkdir_p(char *path, mode_t mode) |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ecf9898169c..0f99f394d8e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext) | |||
199 | #undef isalpha | 199 | #undef isalpha |
200 | #undef isprint | 200 | #undef isprint |
201 | #undef isalnum | 201 | #undef isalnum |
202 | #undef islower | ||
203 | #undef isupper | ||
202 | #undef tolower | 204 | #undef tolower |
203 | #undef toupper | 205 | #undef toupper |
204 | 206 | ||
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256]; | |||
219 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | 221 | #define isalpha(x) sane_istest(x,GIT_ALPHA) |
220 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | 222 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) |
221 | #define isprint(x) sane_istest(x,GIT_PRINT) | 223 | #define isprint(x) sane_istest(x,GIT_PRINT) |
224 | #define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20)) | ||
225 | #define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20)) | ||
222 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | 226 | #define tolower(x) sane_case((unsigned char)(x), 0x20) |
223 | #define toupper(x) sane_case((unsigned char)(x), 0) | 227 | #define toupper(x) sane_case((unsigned char)(x), 0) |
224 | 228 | ||
@@ -245,6 +249,8 @@ struct perf_event_attr; | |||
245 | 249 | ||
246 | void event_attr_init(struct perf_event_attr *attr); | 250 | void event_attr_init(struct perf_event_attr *attr); |
247 | 251 | ||
252 | uid_t parse_target_uid(const char *str, const char *tid, const char *pid); | ||
253 | |||
248 | #define _STR(x) #x | 254 | #define _STR(x) #x |
249 | #define STR(x) _STR(x) | 255 | #define STR(x) _STR(x) |
250 | 256 | ||