diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/Kconfig | 17 | ||||
-rw-r--r-- | arch/arm/include/asm/perf_event.h | 4 | ||||
-rw-r--r-- | arch/frv/include/asm/perf_event.h | 2 | ||||
-rw-r--r-- | arch/hexagon/include/asm/perf_event.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/perf_event_server.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_event.c | 6 | ||||
-rw-r--r-- | arch/s390/include/asm/perf_event.h | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/inat.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/insn.h | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/perf_event.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/uprobes.h | 43 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 82 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 423 | ||||
-rw-r--r-- | arch/x86/lib/inat.c | 36 | ||||
-rw-r--r-- | arch/x86/lib/insn.c | 13 |
19 files changed, 627 insertions, 46 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be1..d0e37c9d5f6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -65,6 +65,23 @@ config OPTPROBES | |||
65 | depends on KPROBES && HAVE_OPTPROBES | 65 | depends on KPROBES && HAVE_OPTPROBES |
66 | depends on !PREEMPT | 66 | depends on !PREEMPT |
67 | 67 | ||
68 | config UPROBES | ||
69 | bool "Transparent user-space probes (EXPERIMENTAL)" | ||
70 | depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS | ||
71 | default n | ||
72 | help | ||
73 | Uprobes is the user-space counterpart to kprobes: they | ||
74 | enable instrumentation applications (such as 'perf probe') | ||
75 | to establish unintrusive probes in user-space binaries and | ||
76 | libraries, by executing handler functions when the probes | ||
77 | are hit by user-space applications. | ||
78 | |||
79 | ( These probes come in the form of single-byte breakpoints, | ||
80 | managed by the kernel and kept transparent to the probed | ||
81 | application. ) | ||
82 | |||
83 | If in doubt, say "N". | ||
84 | |||
68 | config HAVE_EFFICIENT_UNALIGNED_ACCESS | 85 | config HAVE_EFFICIENT_UNALIGNED_ACCESS |
69 | bool | 86 | bool |
70 | help | 87 | help |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe360798..7523340afb8 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,10 +12,6 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | ||
16 | * same indexes here for consistency. */ | ||
17 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
18 | |||
19 | /* ARM perf PMU IDs for use by internal perf clients. */ | 15 | /* ARM perf PMU IDs for use by internal perf clients. */ |
20 | enum arm_perf_pmu_ids { | 16 | enum arm_perf_pmu_ids { |
21 | ARM_PERF_PMU_ID_XSCALE1 = 0, | 17 | ARM_PERF_PMU_ID_XSCALE1 = 0, |
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d14..c52ea5546b5 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h | |||
@@ -12,6 +12,4 @@ | |||
12 | #ifndef _ASM_PERF_EVENT_H | 12 | #ifndef _ASM_PERF_EVENT_H |
13 | #define _ASM_PERF_EVENT_H | 13 | #define _ASM_PERF_EVENT_H |
14 | 14 | ||
15 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
16 | |||
17 | #endif /* _ASM_PERF_EVENT_H */ | 15 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f9118..8b8526b491c 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h | |||
@@ -19,6 +19,4 @@ | |||
19 | #ifndef _ASM_PERF_EVENT_H | 19 | #ifndef _ASM_PERF_EVENT_H |
20 | #define _ASM_PERF_EVENT_H | 20 | #define _ASM_PERF_EVENT_H |
21 | 21 | ||
22 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
23 | |||
24 | #endif /* _ASM_PERF_EVENT_H */ | 22 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d2..1a8093fa8f7 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
@@ -61,8 +61,6 @@ struct pt_regs; | |||
61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
63 | 63 | ||
64 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
65 | |||
66 | /* | 64 | /* |
67 | * Only override the default definitions in include/linux/perf_event.h | 65 | * Only override the default definitions in include/linux/perf_event.h |
68 | * if we have hardware PMU support. | 66 | * if we have hardware PMU support. |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fde95c..f04c2301725 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1193,6 +1193,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1193 | return err; | 1193 | return err; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | static int power_pmu_event_idx(struct perf_event *event) | ||
1197 | { | ||
1198 | return event->hw.idx; | ||
1199 | } | ||
1200 | |||
1196 | struct pmu power_pmu = { | 1201 | struct pmu power_pmu = { |
1197 | .pmu_enable = power_pmu_enable, | 1202 | .pmu_enable = power_pmu_enable, |
1198 | .pmu_disable = power_pmu_disable, | 1203 | .pmu_disable = power_pmu_disable, |
@@ -1205,6 +1210,7 @@ struct pmu power_pmu = { | |||
1205 | .start_txn = power_pmu_start_txn, | 1210 | .start_txn = power_pmu_start_txn, |
1206 | .cancel_txn = power_pmu_cancel_txn, | 1211 | .cancel_txn = power_pmu_cancel_txn, |
1207 | .commit_txn = power_pmu_commit_txn, | 1212 | .commit_txn = power_pmu_commit_txn, |
1213 | .event_idx = power_pmu_event_idx, | ||
1208 | }; | 1214 | }; |
1209 | 1215 | ||
1210 | /* | 1216 | /* |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d271..4eb444edbe4 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -6,4 +6,3 @@ | |||
6 | 6 | ||
7 | /* Empty, just to avoid compiling error */ | 7 | /* Empty, just to avoid compiling error */ |
8 | 8 | ||
9 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e2b38b4bffd..d2a540f7d6c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -84,7 +84,7 @@ config X86 | |||
84 | select GENERIC_IOMAP | 84 | select GENERIC_IOMAP |
85 | 85 | ||
86 | config INSTRUCTION_DECODER | 86 | config INSTRUCTION_DECODER |
87 | def_bool (KPROBES || PERF_EVENTS) | 87 | def_bool (KPROBES || PERF_EVENTS || UPROBES) |
88 | 88 | ||
89 | config OUTPUT_FORMAT | 89 | config OUTPUT_FORMAT |
90 | string | 90 | string |
@@ -240,6 +240,9 @@ config ARCH_CPU_PROBE_RELEASE | |||
240 | def_bool y | 240 | def_bool y |
241 | depends on HOTPLUG_CPU | 241 | depends on HOTPLUG_CPU |
242 | 242 | ||
243 | config ARCH_SUPPORTS_UPROBES | ||
244 | def_bool y | ||
245 | |||
243 | source "init/Kconfig" | 246 | source "init/Kconfig" |
244 | source "kernel/Kconfig.freezer" | 247 | source "kernel/Kconfig.freezer" |
245 | 248 | ||
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e3..74a2e312e8a 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h | |||
@@ -97,11 +97,12 @@ | |||
97 | 97 | ||
98 | /* Attribute search APIs */ | 98 | /* Attribute search APIs */ |
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); |
100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | 101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, |
101 | insn_byte_t last_pfx, | 102 | int lpfx_id, |
102 | insn_attr_t esc_attr); | 103 | insn_attr_t esc_attr); |
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | 104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, |
104 | insn_byte_t last_pfx, | 105 | int lpfx_id, |
105 | insn_attr_t esc_attr); | 106 | insn_attr_t esc_attr); |
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | 107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, |
107 | insn_byte_t vex_m, | 108 | insn_byte_t vex_m, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddf..48eb30a8606 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -96,12 +96,6 @@ struct insn { | |||
96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
98 | 98 | ||
99 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
100 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
101 | { | ||
102 | return insn->prefixes.bytes[3]; | ||
103 | } | ||
104 | |||
105 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | 99 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); |
106 | extern void insn_get_prefixes(struct insn *insn); | 100 | extern void insn_get_prefixes(struct insn *insn); |
107 | extern void insn_get_opcode(struct insn *insn); | 101 | extern void insn_get_opcode(struct insn *insn); |
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | |||
160 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | 154 | return X86_VEX_P(insn->vex_prefix.bytes[2]); |
161 | } | 155 | } |
162 | 156 | ||
157 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
158 | static inline int insn_last_prefix_id(struct insn *insn) | ||
159 | { | ||
160 | if (insn_is_avx(insn)) | ||
161 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
162 | |||
163 | if (insn->prefixes.bytes[3]) | ||
164 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
163 | /* Offset of each field from kaddr */ | 169 | /* Offset of each field from kaddr */ |
164 | static inline int insn_offset_rex_prefix(struct insn *insn) | 170 | static inline int insn_offset_rex_prefix(struct insn *insn) |
165 | { | 171 | { |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c..e8fb2c7a5f4 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); | |||
188 | #ifdef CONFIG_PERF_EVENTS | 188 | #ifdef CONFIG_PERF_EVENTS |
189 | extern void perf_events_lapic_init(void); | 189 | extern void perf_events_lapic_init(void); |
190 | 190 | ||
191 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
192 | |||
193 | /* | 191 | /* |
194 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 192 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. |
195 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 193 | * This flag is otherwise unused and ABI specified to be 0, so nobody should |
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h new file mode 100644 index 00000000000..0500391f57d --- /dev/null +++ b/arch/x86/include/asm/uprobes.h | |||
@@ -0,0 +1,43 @@ | |||
1 | #ifndef _ASM_UPROBES_H | ||
2 | #define _ASM_UPROBES_H | ||
3 | /* | ||
4 | * User-space Probes (UProbes) for x86 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2008-2011 | ||
21 | * Authors: | ||
22 | * Srikar Dronamraju | ||
23 | * Jim Keniston | ||
24 | */ | ||
25 | |||
26 | typedef u8 uprobe_opcode_t; | ||
27 | |||
28 | #define MAX_UINSN_BYTES 16 | ||
29 | #define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */ | ||
30 | |||
31 | #define UPROBE_SWBP_INSN 0xcc | ||
32 | #define UPROBE_SWBP_INSN_SIZE 1 | ||
33 | |||
34 | struct arch_uprobe { | ||
35 | u16 fixups; | ||
36 | u8 insn[MAX_UINSN_BYTES]; | ||
37 | #ifdef CONFIG_X86_64 | ||
38 | unsigned long rip_rela_target_address; | ||
39 | #endif | ||
40 | }; | ||
41 | |||
42 | extern int arch_uprobes_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); | ||
43 | #endif /* _ASM_UPROBES_H */ | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a..8c8c365a3bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
100 | 100 | ||
101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
102 | obj-$(CONFIG_OF) += devicetree.o | 102 | obj-$(CONFIG_OF) += devicetree.o |
103 | obj-$(CONFIG_UPROBES) += uprobes.o | ||
103 | 104 | ||
104 | ### | 105 | ### |
105 | # 64 bit specific files | 106 | # 64 bit specific files |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae3..0a44b90602b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
8 | #include <linux/sched.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
460 | if (!check_tsc_unstable()) | ||
461 | sched_clock_stable = 1; | ||
459 | } | 462 | } |
460 | 463 | ||
461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 63c0e058a40..1c52bdbb9b8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -24,12 +24,14 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/device.h> | ||
27 | 28 | ||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
31 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
32 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | ||
33 | 35 | ||
34 | #include "perf_event.h" | 36 | #include "perf_event.h" |
35 | 37 | ||
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1209 | break; | 1211 | break; |
1210 | 1212 | ||
1211 | case CPU_STARTING: | 1213 | case CPU_STARTING: |
1214 | if (x86_pmu.attr_rdpmc) | ||
1215 | set_in_cr4(X86_CR4_PCE); | ||
1212 | if (x86_pmu.cpu_starting) | 1216 | if (x86_pmu.cpu_starting) |
1213 | x86_pmu.cpu_starting(cpu); | 1217 | x86_pmu.cpu_starting(cpu); |
1214 | break; | 1218 | break; |
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void) | |||
1318 | } | 1322 | } |
1319 | } | 1323 | } |
1320 | 1324 | ||
1325 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1326 | |||
1321 | pr_info("... version: %d\n", x86_pmu.version); | 1327 | pr_info("... version: %d\n", x86_pmu.version); |
1322 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1328 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1323 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1329 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1541 | return err; | 1547 | return err; |
1542 | } | 1548 | } |
1543 | 1549 | ||
1550 | static int x86_pmu_event_idx(struct perf_event *event) | ||
1551 | { | ||
1552 | int idx = event->hw.idx; | ||
1553 | |||
1554 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
1555 | idx -= X86_PMC_IDX_FIXED; | ||
1556 | idx |= 1 << 30; | ||
1557 | } | ||
1558 | |||
1559 | return idx + 1; | ||
1560 | } | ||
1561 | |||
1562 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
1563 | struct device_attribute *attr, | ||
1564 | char *buf) | ||
1565 | { | ||
1566 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
1567 | } | ||
1568 | |||
1569 | static void change_rdpmc(void *info) | ||
1570 | { | ||
1571 | bool enable = !!(unsigned long)info; | ||
1572 | |||
1573 | if (enable) | ||
1574 | set_in_cr4(X86_CR4_PCE); | ||
1575 | else | ||
1576 | clear_in_cr4(X86_CR4_PCE); | ||
1577 | } | ||
1578 | |||
1579 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
1580 | struct device_attribute *attr, | ||
1581 | const char *buf, size_t count) | ||
1582 | { | ||
1583 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
1584 | |||
1585 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
1586 | x86_pmu.attr_rdpmc = !!val; | ||
1587 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
1588 | } | ||
1589 | |||
1590 | return count; | ||
1591 | } | ||
1592 | |||
1593 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
1594 | |||
1595 | static struct attribute *x86_pmu_attrs[] = { | ||
1596 | &dev_attr_rdpmc.attr, | ||
1597 | NULL, | ||
1598 | }; | ||
1599 | |||
1600 | static struct attribute_group x86_pmu_attr_group = { | ||
1601 | .attrs = x86_pmu_attrs, | ||
1602 | }; | ||
1603 | |||
1604 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
1605 | &x86_pmu_attr_group, | ||
1606 | NULL, | ||
1607 | }; | ||
1608 | |||
1544 | static struct pmu pmu = { | 1609 | static struct pmu pmu = { |
1545 | .pmu_enable = x86_pmu_enable, | 1610 | .pmu_enable = x86_pmu_enable, |
1546 | .pmu_disable = x86_pmu_disable, | 1611 | .pmu_disable = x86_pmu_disable, |
1547 | 1612 | ||
1613 | .attr_groups = x86_pmu_attr_groups, | ||
1614 | |||
1548 | .event_init = x86_pmu_event_init, | 1615 | .event_init = x86_pmu_event_init, |
1549 | 1616 | ||
1550 | .add = x86_pmu_add, | 1617 | .add = x86_pmu_add, |
@@ -1556,8 +1623,23 @@ static struct pmu pmu = { | |||
1556 | .start_txn = x86_pmu_start_txn, | 1623 | .start_txn = x86_pmu_start_txn, |
1557 | .cancel_txn = x86_pmu_cancel_txn, | 1624 | .cancel_txn = x86_pmu_cancel_txn, |
1558 | .commit_txn = x86_pmu_commit_txn, | 1625 | .commit_txn = x86_pmu_commit_txn, |
1626 | |||
1627 | .event_idx = x86_pmu_event_idx, | ||
1559 | }; | 1628 | }; |
1560 | 1629 | ||
1630 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
1631 | { | ||
1632 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
1633 | return; | ||
1634 | |||
1635 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1636 | return; | ||
1637 | |||
1638 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
1639 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
1640 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
1641 | } | ||
1642 | |||
1561 | /* | 1643 | /* |
1562 | * callchain support | 1644 | * callchain support |
1563 | */ | 1645 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc7..82db83b5c3b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -309,6 +309,14 @@ struct x86_pmu { | |||
309 | struct x86_pmu_quirk *quirks; | 309 | struct x86_pmu_quirk *quirks; |
310 | int perfctr_second_write; | 310 | int perfctr_second_write; |
311 | 311 | ||
312 | /* | ||
313 | * sysfs attrs | ||
314 | */ | ||
315 | int attr_rdpmc; | ||
316 | |||
317 | /* | ||
318 | * CPU Hotplug hooks | ||
319 | */ | ||
312 | int (*cpu_prepare)(int cpu); | 320 | int (*cpu_prepare)(int cpu); |
313 | void (*cpu_starting)(int cpu); | 321 | void (*cpu_starting)(int cpu); |
314 | void (*cpu_dying)(int cpu); | 322 | void (*cpu_dying)(int cpu); |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c new file mode 100644 index 00000000000..851a11b0d38 --- /dev/null +++ b/arch/x86/kernel/uprobes.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * User-space Probes (UProbes) for x86 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2008-2011 | ||
19 | * Authors: | ||
20 | * Srikar Dronamraju | ||
21 | * Jim Keniston | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/uprobes.h> | ||
27 | |||
28 | #include <linux/kdebug.h> | ||
29 | #include <asm/insn.h> | ||
30 | |||
31 | /* Post-execution fixups. */ | ||
32 | |||
33 | /* No fixup needed */ | ||
34 | #define UPROBE_FIX_NONE 0x0 | ||
35 | /* Adjust IP back to vicinity of actual insn */ | ||
36 | #define UPROBE_FIX_IP 0x1 | ||
37 | /* Adjust the return address of a call insn */ | ||
38 | #define UPROBE_FIX_CALL 0x2 | ||
39 | |||
40 | #define UPROBE_FIX_RIP_AX 0x8000 | ||
41 | #define UPROBE_FIX_RIP_CX 0x4000 | ||
42 | |||
43 | /* Adaptations for mhiramat x86 decoder v14. */ | ||
44 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) | ||
45 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) | ||
46 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) | ||
47 | #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) | ||
48 | |||
49 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | ||
50 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
51 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
52 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
53 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
54 | << (row % 32)) | ||
55 | |||
56 | /* | ||
57 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | ||
58 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | ||
59 | * some versions of gcc to think only *(unsigned long*) is used. | ||
60 | */ | ||
61 | static volatile u32 good_insns_32[256 / 32] = { | ||
62 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
63 | /* ---------------------------------------------- */ | ||
64 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | ||
65 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | ||
66 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | ||
67 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | ||
68 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
69 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
70 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
71 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
72 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
73 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
74 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
75 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
76 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
77 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
78 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
79 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
80 | /* ---------------------------------------------- */ | ||
81 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
82 | }; | ||
83 | |||
84 | /* Using this for both 64-bit and 32-bit apps */ | ||
85 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
86 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
87 | /* ---------------------------------------------- */ | ||
88 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
89 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
90 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
91 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
92 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
93 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
94 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
95 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
96 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
97 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
98 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
99 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
100 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
101 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
102 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
103 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
104 | /* ---------------------------------------------- */ | ||
105 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
106 | }; | ||
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | /* Good-instruction tables for 64-bit apps */ | ||
110 | static volatile u32 good_insns_64[256 / 32] = { | ||
111 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
112 | /* ---------------------------------------------- */ | ||
113 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | ||
114 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | ||
115 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | ||
116 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | ||
117 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
118 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
119 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
120 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
121 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
122 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
123 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
124 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
125 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
126 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
127 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
128 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
129 | /* ---------------------------------------------- */ | ||
130 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
131 | }; | ||
132 | #endif | ||
133 | #undef W | ||
134 | |||
135 | /* | ||
136 | * opcodes we'll probably never support: | ||
137 | * | ||
138 | * 6c-6d, e4-e5, ec-ed - in | ||
139 | * 6e-6f, e6-e7, ee-ef - out | ||
140 | * cc, cd - int3, int | ||
141 | * cf - iret | ||
142 | * d6 - illegal instruction | ||
143 | * f1 - int1/icebp | ||
144 | * f4 - hlt | ||
145 | * fa, fb - cli, sti | ||
146 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
147 | * | ||
148 | * invalid opcodes in 64-bit mode: | ||
149 | * | ||
150 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
151 | * 63 - we support this opcode in x86_64 but not in i386. | ||
152 | * | ||
153 | * opcodes we may need to refine support for: | ||
154 | * | ||
155 | * 0f - 2-byte instructions: For many of these instructions, the validity | ||
156 | * depends on the prefix and/or the reg field. On such instructions, we | ||
157 | * just consider the opcode combination valid if it corresponds to any | ||
158 | * valid instruction. | ||
159 | * | ||
160 | * 8f - Group 1 - only reg = 0 is OK | ||
161 | * c6-c7 - Group 11 - only reg = 0 is OK | ||
162 | * d9-df - fpu insns with some illegal encodings | ||
163 | * f2, f3 - repnz, repz prefixes. These are also the first byte for | ||
164 | * certain floating-point instructions, such as addsd. | ||
165 | * | ||
166 | * fe - Group 4 - only reg = 0 or 1 is OK | ||
167 | * ff - Group 5 - only reg = 0-6 is OK | ||
168 | * | ||
169 | * others -- Do we need to support these? | ||
170 | * | ||
171 | * 0f - (floating-point?) prefetch instructions | ||
172 | * 07, 17, 1f - pop es, pop ss, pop ds | ||
173 | * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- | ||
174 | * but 64 and 65 (fs: and gs:) seem to be used, so we support them | ||
175 | * 67 - addr16 prefix | ||
176 | * ce - into | ||
177 | * f0 - lock prefix | ||
178 | */ | ||
179 | |||
180 | /* | ||
181 | * TODO: | ||
182 | * - Where necessary, examine the modrm byte and allow only valid instructions | ||
183 | * in the different Groups and fpu instructions. | ||
184 | */ | ||
185 | |||
186 | static bool is_prefix_bad(struct insn *insn) | ||
187 | { | ||
188 | int i; | ||
189 | |||
190 | for (i = 0; i < insn->prefixes.nbytes; i++) { | ||
191 | switch (insn->prefixes.bytes[i]) { | ||
192 | case 0x26: /* INAT_PFX_ES */ | ||
193 | case 0x2E: /* INAT_PFX_CS */ | ||
194 | case 0x36: /* INAT_PFX_DS */ | ||
195 | case 0x3E: /* INAT_PFX_SS */ | ||
196 | case 0xF0: /* INAT_PFX_LOCK */ | ||
197 | return true; | ||
198 | } | ||
199 | } | ||
200 | return false; | ||
201 | } | ||
202 | |||
203 | static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
204 | { | ||
205 | insn_init(insn, auprobe->insn, false); | ||
206 | |||
207 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
208 | insn_get_opcode(insn); | ||
209 | if (is_prefix_bad(insn)) | ||
210 | return -ENOTSUPP; | ||
211 | |||
212 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) | ||
213 | return 0; | ||
214 | |||
215 | if (insn->opcode.nbytes == 2) { | ||
216 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | return -ENOTSUPP; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Figure out which fixups post_xol() will need to perform, and annotate | ||
225 | * arch_uprobe->fixups accordingly. To start with, | ||
226 | * arch_uprobe->fixups is either zero or it reflects rip-related | ||
227 | * fixups. | ||
228 | */ | ||
229 | static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | ||
230 | { | ||
231 | bool fix_ip = true, fix_call = false; /* defaults */ | ||
232 | int reg; | ||
233 | |||
234 | insn_get_opcode(insn); /* should be a nop */ | ||
235 | |||
236 | switch (OPCODE1(insn)) { | ||
237 | case 0xc3: /* ret/lret */ | ||
238 | case 0xcb: | ||
239 | case 0xc2: | ||
240 | case 0xca: | ||
241 | /* ip is correct */ | ||
242 | fix_ip = false; | ||
243 | break; | ||
244 | case 0xe8: /* call relative - Fix return addr */ | ||
245 | fix_call = true; | ||
246 | break; | ||
247 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
248 | fix_call = true; | ||
249 | fix_ip = false; | ||
250 | break; | ||
251 | case 0xff: | ||
252 | insn_get_modrm(insn); | ||
253 | reg = MODRM_REG(insn); | ||
254 | if (reg == 2 || reg == 3) { | ||
255 | /* call or lcall, indirect */ | ||
256 | /* Fix return addr; ip is correct. */ | ||
257 | fix_call = true; | ||
258 | fix_ip = false; | ||
259 | } else if (reg == 4 || reg == 5) { | ||
260 | /* jmp or ljmp, indirect */ | ||
261 | /* ip is correct. */ | ||
262 | fix_ip = false; | ||
263 | } | ||
264 | break; | ||
265 | case 0xea: /* jmp absolute -- ip is correct */ | ||
266 | fix_ip = false; | ||
267 | break; | ||
268 | default: | ||
269 | break; | ||
270 | } | ||
271 | if (fix_ip) | ||
272 | auprobe->fixups |= UPROBE_FIX_IP; | ||
273 | if (fix_call) | ||
274 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
275 | } | ||
276 | |||
277 | #ifdef CONFIG_X86_64 | ||
278 | /* | ||
279 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | ||
280 | * immediately. Otherwise, rewrite the instruction so that it accesses | ||
281 | * its memory operand indirectly through a scratch register. Set | ||
282 | * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address | ||
283 | * accordingly. (The contents of the scratch register will be saved | ||
284 | * before we single-step the modified instruction, and restored | ||
285 | * afterward.) | ||
286 | * | ||
287 | * We do this because a rip-relative instruction can access only a | ||
288 | * relatively small area (+/- 2 GB from the instruction), and the XOL | ||
289 | * area typically lies beyond that area. At least for instructions | ||
290 | * that store to memory, we can't execute the original instruction | ||
291 | * and "fix things up" later, because the misdirected store could be | ||
292 | * disastrous. | ||
293 | * | ||
294 | * Some useful facts about rip-relative instructions: | ||
295 | * | ||
296 | * - There's always a modrm byte. | ||
297 | * - There's never a SIB byte. | ||
298 | * - The displacement is always 4 bytes. | ||
299 | */ | ||
300 | static void | ||
301 | handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
302 | { | ||
303 | u8 *cursor; | ||
304 | u8 reg; | ||
305 | |||
306 | if (mm->context.ia32_compat) | ||
307 | return; | ||
308 | |||
309 | auprobe->rip_rela_target_address = 0x0; | ||
310 | if (!insn_rip_relative(insn)) | ||
311 | return; | ||
312 | |||
313 | /* | ||
314 | * insn_rip_relative() would have decoded rex_prefix, modrm. | ||
315 | * Clear REX.b bit (extension of MODRM.rm field): | ||
316 | * we want to encode rax/rcx, not r8/r9. | ||
317 | */ | ||
318 | if (insn->rex_prefix.nbytes) { | ||
319 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | ||
320 | *cursor &= 0xfe; /* Clearing REX.B bit */ | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Point cursor at the modrm byte. The next 4 bytes are the | ||
325 | * displacement. Beyond the displacement, for some instructions, | ||
326 | * is the immediate operand. | ||
327 | */ | ||
328 | cursor = auprobe->insn + insn_offset_modrm(insn); | ||
329 | insn_get_length(insn); | ||
330 | |||
331 | /* | ||
332 | * Convert from rip-relative addressing to indirect addressing | ||
333 | * via a scratch register. Change the r/m field from 0x5 (%rip) | ||
334 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | ||
335 | */ | ||
336 | reg = MODRM_REG(insn); | ||
337 | if (reg == 0) { | ||
338 | /* | ||
339 | * The register operand (if any) is either the A register | ||
340 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
341 | * REX prefix) %r8. In any case, we know the C register | ||
342 | * is NOT the register operand, so we use %rcx (register | ||
343 | * #1) for the scratch register. | ||
344 | */ | ||
345 | auprobe->fixups = UPROBE_FIX_RIP_CX; | ||
346 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
347 | *cursor = 0x1; | ||
348 | } else { | ||
349 | /* Use %rax (register #0) for the scratch register. */ | ||
350 | auprobe->fixups = UPROBE_FIX_RIP_AX; | ||
351 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
352 | *cursor = (reg << 3); | ||
353 | } | ||
354 | |||
355 | /* Target address = address of next instruction + (signed) offset */ | ||
356 | auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; | ||
357 | |||
358 | /* Displacement field is gone; slide immediate field (if any) over. */ | ||
359 | if (insn->immediate.nbytes) { | ||
360 | cursor++; | ||
361 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | ||
362 | } | ||
363 | return; | ||
364 | } | ||
365 | |||
366 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
367 | { | ||
368 | insn_init(insn, auprobe->insn, true); | ||
369 | |||
370 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
371 | insn_get_opcode(insn); | ||
372 | if (is_prefix_bad(insn)) | ||
373 | return -ENOTSUPP; | ||
374 | |||
375 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) | ||
376 | return 0; | ||
377 | |||
378 | if (insn->opcode.nbytes == 2) { | ||
379 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
380 | return 0; | ||
381 | } | ||
382 | return -ENOTSUPP; | ||
383 | } | ||
384 | |||
385 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
386 | { | ||
387 | if (mm->context.ia32_compat) | ||
388 | return validate_insn_32bits(auprobe, insn); | ||
389 | return validate_insn_64bits(auprobe, insn); | ||
390 | } | ||
391 | #else /* 32-bit: */ | ||
392 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
393 | { | ||
394 | /* No RIP-relative addressing on 32-bit */ | ||
395 | } | ||
396 | |||
397 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
398 | { | ||
399 | return validate_insn_32bits(auprobe, insn); | ||
400 | } | ||
401 | #endif /* CONFIG_X86_64 */ | ||
402 | |||
403 | /** | ||
404 | * arch_uprobes_analyze_insn - instruction analysis including validity and fixups. | ||
405 | * @mm: the probed address space. | ||
406 | * @arch_uprobe: the probepoint information. | ||
407 | * Return 0 on success or a -ve number on error. | ||
408 | */ | ||
409 | int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) | ||
410 | { | ||
411 | int ret; | ||
412 | struct insn insn; | ||
413 | |||
414 | auprobe->fixups = 0; | ||
415 | ret = validate_insn_bits(auprobe, mm, &insn); | ||
416 | if (ret != 0) | ||
417 | return ret; | ||
418 | |||
419 | handle_riprel_insn(auprobe, mm, &insn); | ||
420 | prepare_fixups(auprobe, &insn); | ||
421 | |||
422 | return 0; | ||
423 | } | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fbda6e..c1f01a8e9f6 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | |||
29 | return inat_primary_table[opcode]; | 29 | return inat_primary_table[opcode]; |
30 | } | 30 | } |
31 | 31 | ||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | 32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) |
33 | { | ||
34 | insn_attr_t lpfx_attr; | ||
35 | |||
36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
37 | return inat_last_prefix_id(lpfx_attr); | ||
38 | } | ||
39 | |||
40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
33 | insn_attr_t esc_attr) | 41 | insn_attr_t esc_attr) |
34 | { | 42 | { |
35 | const insn_attr_t *table; | 43 | const insn_attr_t *table; |
36 | insn_attr_t lpfx_attr; | 44 | int n; |
37 | int n, m = 0; | ||
38 | 45 | ||
39 | n = inat_escape_id(esc_attr); | 46 | n = inat_escape_id(esc_attr); |
40 | if (last_pfx) { | 47 | |
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | 48 | table = inat_escape_tables[n][0]; |
45 | if (!table) | 49 | if (!table) |
46 | return 0; | 50 | return 0; |
47 | if (inat_has_variant(table[opcode]) && m) { | 51 | if (inat_has_variant(table[opcode]) && lpfx_id) { |
48 | table = inat_escape_tables[n][m]; | 52 | table = inat_escape_tables[n][lpfx_id]; |
49 | if (!table) | 53 | if (!table) |
50 | return 0; | 54 | return 0; |
51 | } | 55 | } |
52 | return table[opcode]; | 56 | return table[opcode]; |
53 | } | 57 | } |
54 | 58 | ||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | 59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, |
56 | insn_attr_t grp_attr) | 60 | insn_attr_t grp_attr) |
57 | { | 61 | { |
58 | const insn_attr_t *table; | 62 | const insn_attr_t *table; |
59 | insn_attr_t lpfx_attr; | 63 | int n; |
60 | int n, m = 0; | ||
61 | 64 | ||
62 | n = inat_group_id(grp_attr); | 65 | n = inat_group_id(grp_attr); |
63 | if (last_pfx) { | 66 | |
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | 67 | table = inat_group_tables[n][0]; |
68 | if (!table) | 68 | if (!table) |
69 | return inat_group_common_attribute(grp_attr); | 69 | return inat_group_common_attribute(grp_attr); |
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { |
71 | table = inat_group_tables[n][m]; | 71 | table = inat_group_tables[n][lpfx_id]; |
72 | if (!table) | 72 | if (!table) |
73 | return inat_group_common_attribute(grp_attr); | 73 | return inat_group_common_attribute(grp_attr); |
74 | } | 74 | } |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3e3fb..25feb1ae71c 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -185,7 +185,8 @@ err_out: | |||
185 | void insn_get_opcode(struct insn *insn) | 185 | void insn_get_opcode(struct insn *insn) |
186 | { | 186 | { |
187 | struct insn_field *opcode = &insn->opcode; | 187 | struct insn_field *opcode = &insn->opcode; |
188 | insn_byte_t op, pfx; | 188 | insn_byte_t op; |
189 | int pfx_id; | ||
189 | if (opcode->got) | 190 | if (opcode->got) |
190 | return; | 191 | return; |
191 | if (!insn->prefixes.got) | 192 | if (!insn->prefixes.got) |
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) | |||
212 | /* Get escaped opcode */ | 213 | /* Get escaped opcode */ |
213 | op = get_next(insn_byte_t, insn); | 214 | op = get_next(insn_byte_t, insn); |
214 | opcode->bytes[opcode->nbytes++] = op; | 215 | opcode->bytes[opcode->nbytes++] = op; |
215 | pfx = insn_last_prefix(insn); | 216 | pfx_id = insn_last_prefix_id(insn); |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | 217 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); |
217 | } | 218 | } |
218 | if (inat_must_vex(insn->attr)) | 219 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | 220 | insn->attr = 0; /* This instruction is bad */ |
@@ -235,7 +236,7 @@ err_out: | |||
235 | void insn_get_modrm(struct insn *insn) | 236 | void insn_get_modrm(struct insn *insn) |
236 | { | 237 | { |
237 | struct insn_field *modrm = &insn->modrm; | 238 | struct insn_field *modrm = &insn->modrm; |
238 | insn_byte_t pfx, mod; | 239 | insn_byte_t pfx_id, mod; |
239 | if (modrm->got) | 240 | if (modrm->got) |
240 | return; | 241 | return; |
241 | if (!insn->opcode.got) | 242 | if (!insn->opcode.got) |
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) | |||
246 | modrm->value = mod; | 247 | modrm->value = mod; |
247 | modrm->nbytes = 1; | 248 | modrm->nbytes = 1; |
248 | if (inat_is_group(insn->attr)) { | 249 | if (inat_is_group(insn->attr)) { |
249 | pfx = insn_last_prefix(insn); | 250 | pfx_id = insn_last_prefix_id(insn); |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | 251 | insn->attr = inat_get_group_attribute(mod, pfx_id, |
251 | insn->attr); | 252 | insn->attr); |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | 253 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
253 | insn->attr = 0; /* This is bad */ | 254 | insn->attr = 0; /* This is bad */ |