diff options
Diffstat (limited to 'arch')
41 files changed, 1656 insertions, 756 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -47,18 +47,29 @@ config KPROBES | |||
47 | If in doubt, say "N". | 47 | If in doubt, say "N". |
48 | 48 | ||
49 | config JUMP_LABEL | 49 | config JUMP_LABEL |
50 | bool "Optimize trace point call sites" | 50 | bool "Optimize very unlikely/likely branches" |
51 | depends on HAVE_ARCH_JUMP_LABEL | 51 | depends on HAVE_ARCH_JUMP_LABEL |
52 | help | 52 | help |
53 | This option enables a transparent branch optimization that | ||
54 | makes certain almost-always-true or almost-always-false branch | ||
55 | conditions even cheaper to execute within the kernel. | ||
56 | |||
57 | Certain performance-sensitive kernel code, such as trace points, | ||
58 | scheduler functionality, networking code and KVM have such | ||
59 | branches and include support for this optimization technique. | ||
60 | |||
53 | If it is detected that the compiler has support for "asm goto", | 61 | If it is detected that the compiler has support for "asm goto", |
54 | the kernel will compile trace point locations with just a | 62 | the kernel will compile such branches with just a nop |
55 | nop instruction. When trace points are enabled, the nop will | 63 | instruction. When the condition flag is toggled to true, the |
56 | be converted to a jump to the trace function. This technique | 64 | nop will be converted to a jump instruction to execute the |
57 | lowers overhead and stress on the branch prediction of the | 65 | conditional block of instructions. |
58 | processor. | 66 | |
59 | 67 | This technique lowers overhead and stress on the branch prediction | |
60 | On i386, options added to the compiler flags may increase | 68 | of the processor and generally makes the kernel faster. The update |
61 | the size of the kernel slightly. | 69 | of the condition is slower, but those are always very rare. |
70 | |||
71 | ( On 32-bit x86, the necessary options added to the compiler | ||
72 | flags may increase the size of the kernel slightly. ) | ||
62 | 73 | ||
63 | config OPTPROBES | 74 | config OPTPROBES |
64 | def_bool y | 75 | def_bool y |
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c | |||
@@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) | |||
685 | { | 685 | { |
686 | int err; | 686 | int err; |
687 | 687 | ||
688 | /* does not support taken branch sampling */ | ||
689 | if (has_branch_stack(event)) | ||
690 | return -EOPNOTSUPP; | ||
691 | |||
688 | switch (event->attr.type) { | 692 | switch (event->attr.type) { |
689 | case PERF_TYPE_RAW: | 693 | case PERF_TYPE_RAW: |
690 | case PERF_TYPE_HARDWARE: | 694 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe3607989..7523340afb8a 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h | |||
@@ -12,10 +12,6 @@ | |||
12 | #ifndef __ARM_PERF_EVENT_H__ | 12 | #ifndef __ARM_PERF_EVENT_H__ |
13 | #define __ARM_PERF_EVENT_H__ | 13 | #define __ARM_PERF_EVENT_H__ |
14 | 14 | ||
15 | /* ARM performance counters start from 1 (in the cp15 accesses) so use the | ||
16 | * same indexes here for consistency. */ | ||
17 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
18 | |||
19 | /* ARM perf PMU IDs for use by internal perf clients. */ | 15 | /* ARM perf PMU IDs for use by internal perf clients. */ |
20 | enum arm_perf_pmu_ids { | 16 | enum arm_perf_pmu_ids { |
21 | ARM_PERF_PMU_ID_XSCALE1 = 0, | 17 | ARM_PERF_PMU_ID_XSCALE1 = 0, |
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c | |||
@@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) | |||
539 | int err = 0; | 539 | int err = 0; |
540 | atomic_t *active_events = &armpmu->active_events; | 540 | atomic_t *active_events = &armpmu->active_events; |
541 | 541 | ||
542 | /* does not support taken branch sampling */ | ||
543 | if (has_branch_stack(event)) | ||
544 | return -EOPNOTSUPP; | ||
545 | |||
542 | if (armpmu->map_event(event) == -ENOENT) | 546 | if (armpmu->map_event(event) == -ENOENT) |
543 | return -ENOENT; | 547 | return -ENOENT; |
544 | 548 | ||
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d146..c52ea5546b5b 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h | |||
@@ -12,6 +12,4 @@ | |||
12 | #ifndef _ASM_PERF_EVENT_H | 12 | #ifndef _ASM_PERF_EVENT_H |
13 | #define _ASM_PERF_EVENT_H | 13 | #define _ASM_PERF_EVENT_H |
14 | 14 | ||
15 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
16 | |||
17 | #endif /* _ASM_PERF_EVENT_H */ | 15 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f91180..8b8526b491c7 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h | |||
@@ -19,6 +19,4 @@ | |||
19 | #ifndef _ASM_PERF_EVENT_H | 19 | #ifndef _ASM_PERF_EVENT_H |
20 | #define _ASM_PERF_EVENT_H | 20 | #define _ASM_PERF_EVENT_H |
21 | 21 | ||
22 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
23 | |||
24 | #endif /* _ASM_PERF_EVENT_H */ | 22 | #endif /* _ASM_PERF_EVENT_H */ |
diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h | |||
@@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) | |||
281 | pv_time_ops.init_missing_ticks_accounting(cpu); | 281 | pv_time_ops.init_missing_ticks_accounting(cpu); |
282 | } | 282 | } |
283 | 283 | ||
284 | struct jump_label_key; | 284 | struct static_key; |
285 | extern struct jump_label_key paravirt_steal_enabled; | 285 | extern struct static_key paravirt_steal_enabled; |
286 | extern struct jump_label_key paravirt_steal_rq_enabled; | 286 | extern struct static_key paravirt_steal_rq_enabled; |
287 | 287 | ||
288 | static inline int | 288 | static inline int |
289 | paravirt_do_steal_accounting(unsigned long *new_itm) | 289 | paravirt_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c | |||
@@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { | |||
634 | * pv_time_ops | 634 | * pv_time_ops |
635 | * time operations | 635 | * time operations |
636 | */ | 636 | */ |
637 | struct jump_label_key paravirt_steal_enabled; | 637 | struct static_key paravirt_steal_enabled; |
638 | struct jump_label_key paravirt_steal_rq_enabled; | 638 | struct static_key paravirt_steal_rq_enabled; |
639 | 639 | ||
640 | static int | 640 | static int |
641 | ia64_native_do_steal_accounting(unsigned long *new_itm) | 641 | ia64_native_do_steal_accounting(unsigned long *new_itm) |
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #define WORD_INSN ".word" | 20 | #define WORD_INSN ".word" |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 23 | static __always_inline bool arch_static_branch(struct static_key *key) |
24 | { | 24 | { |
25 | asm goto("1:\tnop\n\t" | 25 | asm goto("1:\tnop\n\t" |
26 | "nop\n\t" | 26 | "nop\n\t" |
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c | |||
@@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) | |||
606 | { | 606 | { |
607 | int err = 0; | 607 | int err = 0; |
608 | 608 | ||
609 | /* does not support taken branch sampling */ | ||
610 | if (has_branch_stack(event)) | ||
611 | return -EOPNOTSUPP; | ||
612 | |||
609 | switch (event->attr.type) { | 613 | switch (event->attr.type) { |
610 | case PERF_TYPE_RAW: | 614 | case PERF_TYPE_RAW: |
611 | case PERF_TYPE_HARDWARE: | 615 | case PERF_TYPE_HARDWARE: |
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h | |||
@@ -17,7 +17,7 @@ | |||
17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) | 17 | #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) |
18 | #define JUMP_LABEL_NOP_SIZE 4 | 18 | #define JUMP_LABEL_NOP_SIZE 4 |
19 | 19 | ||
20 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 20 | static __always_inline bool arch_static_branch(struct static_key *key) |
21 | { | 21 | { |
22 | asm goto("1:\n\t" | 22 | asm goto("1:\n\t" |
23 | "nop\n\t" | 23 | "nop\n\t" |
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d23..1a8093fa8f71 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h | |||
@@ -61,8 +61,6 @@ struct pt_regs; | |||
61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
63 | 63 | ||
64 | #define PERF_EVENT_INDEX_OFFSET 1 | ||
65 | |||
66 | /* | 64 | /* |
67 | * Only override the default definitions in include/linux/perf_event.h | 65 | * Only override the default definitions in include/linux/perf_event.h |
68 | * if we have hardware PMU support. | 66 | * if we have hardware PMU support. |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 64483fde95c6..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1084 | if (!ppmu) | 1084 | if (!ppmu) |
1085 | return -ENOENT; | 1085 | return -ENOENT; |
1086 | 1086 | ||
1087 | /* does not support taken branch sampling */ | ||
1088 | if (has_branch_stack(event)) | ||
1089 | return -EOPNOTSUPP; | ||
1090 | |||
1087 | switch (event->attr.type) { | 1091 | switch (event->attr.type) { |
1088 | case PERF_TYPE_HARDWARE: | 1092 | case PERF_TYPE_HARDWARE: |
1089 | ev = event->attr.config; | 1093 | ev = event->attr.config; |
@@ -1193,6 +1197,11 @@ static int power_pmu_event_init(struct perf_event *event) | |||
1193 | return err; | 1197 | return err; |
1194 | } | 1198 | } |
1195 | 1199 | ||
1200 | static int power_pmu_event_idx(struct perf_event *event) | ||
1201 | { | ||
1202 | return event->hw.idx; | ||
1203 | } | ||
1204 | |||
1196 | struct pmu power_pmu = { | 1205 | struct pmu power_pmu = { |
1197 | .pmu_enable = power_pmu_enable, | 1206 | .pmu_enable = power_pmu_enable, |
1198 | .pmu_disable = power_pmu_disable, | 1207 | .pmu_disable = power_pmu_disable, |
@@ -1205,6 +1214,7 @@ struct pmu power_pmu = { | |||
1205 | .start_txn = power_pmu_start_txn, | 1214 | .start_txn = power_pmu_start_txn, |
1206 | .cancel_txn = power_pmu_cancel_txn, | 1215 | .cancel_txn = power_pmu_cancel_txn, |
1207 | .commit_txn = power_pmu_commit_txn, | 1216 | .commit_txn = power_pmu_commit_txn, |
1217 | .event_idx = power_pmu_event_idx, | ||
1208 | }; | 1218 | }; |
1209 | 1219 | ||
1210 | /* | 1220 | /* |
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h | |||
@@ -13,7 +13,7 @@ | |||
13 | #define ASM_ALIGN ".balign 4" | 13 | #define ASM_ALIGN ".balign 4" |
14 | #endif | 14 | #endif |
15 | 15 | ||
16 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 16 | static __always_inline bool arch_static_branch(struct static_key *key) |
17 | { | 17 | { |
18 | asm goto("0: brcl 0,0\n" | 18 | asm goto("0: brcl 0,0\n" |
19 | ".pushsection __jump_table, \"aw\"\n" | 19 | ".pushsection __jump_table, \"aw\"\n" |
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d2718..4eb444edbe49 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h | |||
@@ -6,4 +6,3 @@ | |||
6 | 6 | ||
7 | /* Empty, just to avoid compiling error */ | 7 | /* Empty, just to avoid compiling error */ |
8 | 8 | ||
9 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c | |||
@@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) | |||
310 | { | 310 | { |
311 | int err; | 311 | int err; |
312 | 312 | ||
313 | /* does not support taken branch sampling */ | ||
314 | if (has_branch_stack(event)) | ||
315 | return -EOPNOTSUPP; | ||
316 | |||
313 | switch (event->attr.type) { | 317 | switch (event->attr.type) { |
314 | case PERF_TYPE_RAW: | 318 | case PERF_TYPE_RAW: |
315 | case PERF_TYPE_HW_CACHE: | 319 | case PERF_TYPE_HW_CACHE: |
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #define JUMP_LABEL_NOP_SIZE 4 | 8 | #define JUMP_LABEL_NOP_SIZE 4 |
9 | 9 | ||
10 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 10 | static __always_inline bool arch_static_branch(struct static_key *key) |
11 | { | 11 | { |
12 | asm goto("1:\n\t" | 12 | asm goto("1:\n\t" |
13 | "nop\n\t" | 13 | "nop\n\t" |
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c | |||
@@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) | |||
1105 | if (atomic_read(&nmi_active) < 0) | 1105 | if (atomic_read(&nmi_active) < 0) |
1106 | return -ENODEV; | 1106 | return -ENODEV; |
1107 | 1107 | ||
1108 | /* does not support taken branch sampling */ | ||
1109 | if (has_branch_stack(event)) | ||
1110 | return -EOPNOTSUPP; | ||
1111 | |||
1108 | switch (attr->type) { | 1112 | switch (attr->type) { |
1109 | case PERF_TYPE_HARDWARE: | 1113 | case PERF_TYPE_HARDWARE: |
1110 | if (attr->config >= sparc_pmu->max_events) | 1114 | if (attr->config >= sparc_pmu->max_events) |
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 205b063e3e32..74a2e312e8a2 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h | |||
@@ -97,11 +97,12 @@ | |||
97 | 97 | ||
98 | /* Attribute search APIs */ | 98 | /* Attribute search APIs */ |
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | 99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); |
100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
100 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | 101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, |
101 | insn_byte_t last_pfx, | 102 | int lpfx_id, |
102 | insn_attr_t esc_attr); | 103 | insn_attr_t esc_attr); |
103 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | 104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, |
104 | insn_byte_t last_pfx, | 105 | int lpfx_id, |
105 | insn_attr_t esc_attr); | 106 | insn_attr_t esc_attr); |
106 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | 107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, |
107 | insn_byte_t vex_m, | 108 | insn_byte_t vex_m, |
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 74df3f1eddfd..48eb30a86062 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -96,12 +96,6 @@ struct insn { | |||
96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | 96 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ |
97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | 97 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ |
98 | 98 | ||
99 | /* The last prefix is needed for two-byte and three-byte opcodes */ | ||
100 | static inline insn_byte_t insn_last_prefix(struct insn *insn) | ||
101 | { | ||
102 | return insn->prefixes.bytes[3]; | ||
103 | } | ||
104 | |||
105 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); | 99 | extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); |
106 | extern void insn_get_prefixes(struct insn *insn); | 100 | extern void insn_get_prefixes(struct insn *insn); |
107 | extern void insn_get_opcode(struct insn *insn); | 101 | extern void insn_get_opcode(struct insn *insn); |
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | |||
160 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | 154 | return X86_VEX_P(insn->vex_prefix.bytes[2]); |
161 | } | 155 | } |
162 | 156 | ||
157 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
158 | static inline int insn_last_prefix_id(struct insn *insn) | ||
159 | { | ||
160 | if (insn_is_avx(insn)) | ||
161 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
162 | |||
163 | if (insn->prefixes.bytes[3]) | ||
164 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
165 | |||
166 | return 0; | ||
167 | } | ||
168 | |||
163 | /* Offset of each field from kaddr */ | 169 | /* Offset of each field from kaddr */ |
164 | static inline int insn_offset_rex_prefix(struct insn *insn) | 170 | static inline int insn_offset_rex_prefix(struct insn *insn) |
165 | { | 171 | { |
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h | |||
@@ -9,12 +9,12 @@ | |||
9 | 9 | ||
10 | #define JUMP_LABEL_NOP_SIZE 5 | 10 | #define JUMP_LABEL_NOP_SIZE 5 |
11 | 11 | ||
12 | #define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" | 12 | #define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" |
13 | 13 | ||
14 | static __always_inline bool arch_static_branch(struct jump_label_key *key) | 14 | static __always_inline bool arch_static_branch(struct static_key *key) |
15 | { | 15 | { |
16 | asm goto("1:" | 16 | asm goto("1:" |
17 | JUMP_LABEL_INITIAL_NOP | 17 | STATIC_KEY_INITIAL_NOP |
18 | ".pushsection __jump_table, \"aw\" \n\t" | 18 | ".pushsection __jump_table, \"aw\" \n\t" |
19 | _ASM_ALIGN "\n\t" | 19 | _ASM_ALIGN "\n\t" |
20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" | 20 | _ASM_PTR "1b, %l[l_yes], %c0 \n\t" |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -56,6 +56,13 @@ | |||
56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
58 | 58 | ||
59 | #define MSR_LBR_SELECT 0x000001c8 | ||
60 | #define MSR_LBR_TOS 0x000001c9 | ||
61 | #define MSR_LBR_NHM_FROM 0x00000680 | ||
62 | #define MSR_LBR_NHM_TO 0x000006c0 | ||
63 | #define MSR_LBR_CORE_FROM 0x00000040 | ||
64 | #define MSR_LBR_CORE_TO 0x00000060 | ||
65 | |||
59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 66 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
60 | #define MSR_IA32_DS_AREA 0x00000600 | 67 | #define MSR_IA32_DS_AREA 0x00000600 |
61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 68 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) | |||
230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); | 230 | return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); |
231 | } | 231 | } |
232 | 232 | ||
233 | struct jump_label_key; | 233 | struct static_key; |
234 | extern struct jump_label_key paravirt_steal_enabled; | 234 | extern struct static_key paravirt_steal_enabled; |
235 | extern struct jump_label_key paravirt_steal_rq_enabled; | 235 | extern struct static_key paravirt_steal_rq_enabled; |
236 | 236 | ||
237 | static inline u64 paravirt_steal_clock(int cpu) | 237 | static inline u64 paravirt_steal_clock(int cpu) |
238 | { | 238 | { |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 461ce432b1c2..e8fb2c7a5f4f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); | |||
188 | #ifdef CONFIG_PERF_EVENTS | 188 | #ifdef CONFIG_PERF_EVENTS |
189 | extern void perf_events_lapic_init(void); | 189 | extern void perf_events_lapic_init(void); |
190 | 190 | ||
191 | #define PERF_EVENT_INDEX_OFFSET 0 | ||
192 | |||
193 | /* | 191 | /* |
194 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | 192 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. |
195 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | 193 | * This flag is otherwise unused and ABI specified to be 0, so nobody should |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a9..532d2e090e6f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | |||
69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
71 | obj-$(CONFIG_KPROBES) += kprobes.o | 71 | obj-$(CONFIG_KPROBES) += kprobes.o |
72 | obj-$(CONFIG_OPTPROBES) += kprobes-opt.o | ||
72 | obj-$(CONFIG_MODULES) += module.o | 73 | obj-$(CONFIG_MODULES) += module.o |
73 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 74 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
74 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae35..0a44b90602b0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
8 | #include <linux/sched.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
460 | if (!check_tsc_unstable()) | ||
461 | sched_clock_stable = 1; | ||
459 | } | 462 | } |
460 | 463 | ||
461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b11..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/device.h> | ||
27 | 28 | ||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
@@ -31,6 +32,7 @@ | |||
31 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
32 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
33 | #include <asm/alternative.h> | 34 | #include <asm/alternative.h> |
35 | #include <asm/timer.h> | ||
34 | 36 | ||
35 | #include "perf_event.h" | 37 | #include "perf_event.h" |
36 | 38 | ||
@@ -351,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) | |||
351 | return 0; | 353 | return 0; |
352 | } | 354 | } |
353 | 355 | ||
356 | /* | ||
357 | * check that branch_sample_type is compatible with | ||
358 | * settings needed for precise_ip > 1 which implies | ||
359 | * using the LBR to capture ALL taken branches at the | ||
360 | * priv levels of the measurement | ||
361 | */ | ||
362 | static inline int precise_br_compat(struct perf_event *event) | ||
363 | { | ||
364 | u64 m = event->attr.branch_sample_type; | ||
365 | u64 b = 0; | ||
366 | |||
367 | /* must capture all branches */ | ||
368 | if (!(m & PERF_SAMPLE_BRANCH_ANY)) | ||
369 | return 0; | ||
370 | |||
371 | m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; | ||
372 | |||
373 | if (!event->attr.exclude_user) | ||
374 | b |= PERF_SAMPLE_BRANCH_USER; | ||
375 | |||
376 | if (!event->attr.exclude_kernel) | ||
377 | b |= PERF_SAMPLE_BRANCH_KERNEL; | ||
378 | |||
379 | /* | ||
380 | * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 | ||
381 | */ | ||
382 | |||
383 | return m == b; | ||
384 | } | ||
385 | |||
354 | int x86_pmu_hw_config(struct perf_event *event) | 386 | int x86_pmu_hw_config(struct perf_event *event) |
355 | { | 387 | { |
356 | if (event->attr.precise_ip) { | 388 | if (event->attr.precise_ip) { |
@@ -367,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
367 | 399 | ||
368 | if (event->attr.precise_ip > precise) | 400 | if (event->attr.precise_ip > precise) |
369 | return -EOPNOTSUPP; | 401 | return -EOPNOTSUPP; |
402 | /* | ||
403 | * check that PEBS LBR correction does not conflict with | ||
404 | * whatever the user is asking with attr->branch_sample_type | ||
405 | */ | ||
406 | if (event->attr.precise_ip > 1) { | ||
407 | u64 *br_type = &event->attr.branch_sample_type; | ||
408 | |||
409 | if (has_branch_stack(event)) { | ||
410 | if (!precise_br_compat(event)) | ||
411 | return -EOPNOTSUPP; | ||
412 | |||
413 | /* branch_sample_type is compatible */ | ||
414 | |||
415 | } else { | ||
416 | /* | ||
417 | * user did not specify branch_sample_type | ||
418 | * | ||
419 | * For PEBS fixups, we capture all | ||
420 | * the branches at the priv level of the | ||
421 | * event. | ||
422 | */ | ||
423 | *br_type = PERF_SAMPLE_BRANCH_ANY; | ||
424 | |||
425 | if (!event->attr.exclude_user) | ||
426 | *br_type |= PERF_SAMPLE_BRANCH_USER; | ||
427 | |||
428 | if (!event->attr.exclude_kernel) | ||
429 | *br_type |= PERF_SAMPLE_BRANCH_KERNEL; | ||
430 | } | ||
431 | } | ||
370 | } | 432 | } |
371 | 433 | ||
372 | /* | 434 | /* |
@@ -424,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) | |||
424 | /* mark unused */ | 486 | /* mark unused */ |
425 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | 487 | event->hw.extra_reg.idx = EXTRA_REG_NONE; |
426 | 488 | ||
489 | /* mark not used */ | ||
490 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
491 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
492 | |||
427 | return x86_pmu.hw_config(event); | 493 | return x86_pmu.hw_config(event); |
428 | } | 494 | } |
429 | 495 | ||
@@ -1210,6 +1276,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1210 | break; | 1276 | break; |
1211 | 1277 | ||
1212 | case CPU_STARTING: | 1278 | case CPU_STARTING: |
1279 | if (x86_pmu.attr_rdpmc) | ||
1280 | set_in_cr4(X86_CR4_PCE); | ||
1213 | if (x86_pmu.cpu_starting) | 1281 | if (x86_pmu.cpu_starting) |
1214 | x86_pmu.cpu_starting(cpu); | 1282 | x86_pmu.cpu_starting(cpu); |
1215 | break; | 1283 | break; |
@@ -1319,6 +1387,8 @@ static int __init init_hw_perf_events(void) | |||
1319 | } | 1387 | } |
1320 | } | 1388 | } |
1321 | 1389 | ||
1390 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1391 | |||
1322 | pr_info("... version: %d\n", x86_pmu.version); | 1392 | pr_info("... version: %d\n", x86_pmu.version); |
1323 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1393 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1324 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1394 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1542,23 +1612,106 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1542 | return err; | 1612 | return err; |
1543 | } | 1613 | } |
1544 | 1614 | ||
1615 | static int x86_pmu_event_idx(struct perf_event *event) | ||
1616 | { | ||
1617 | int idx = event->hw.idx; | ||
1618 | |||
1619 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
1620 | idx -= X86_PMC_IDX_FIXED; | ||
1621 | idx |= 1 << 30; | ||
1622 | } | ||
1623 | |||
1624 | return idx + 1; | ||
1625 | } | ||
1626 | |||
1627 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
1628 | struct device_attribute *attr, | ||
1629 | char *buf) | ||
1630 | { | ||
1631 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
1632 | } | ||
1633 | |||
1634 | static void change_rdpmc(void *info) | ||
1635 | { | ||
1636 | bool enable = !!(unsigned long)info; | ||
1637 | |||
1638 | if (enable) | ||
1639 | set_in_cr4(X86_CR4_PCE); | ||
1640 | else | ||
1641 | clear_in_cr4(X86_CR4_PCE); | ||
1642 | } | ||
1643 | |||
1644 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
1645 | struct device_attribute *attr, | ||
1646 | const char *buf, size_t count) | ||
1647 | { | ||
1648 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
1649 | |||
1650 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
1651 | x86_pmu.attr_rdpmc = !!val; | ||
1652 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
1653 | } | ||
1654 | |||
1655 | return count; | ||
1656 | } | ||
1657 | |||
1658 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
1659 | |||
1660 | static struct attribute *x86_pmu_attrs[] = { | ||
1661 | &dev_attr_rdpmc.attr, | ||
1662 | NULL, | ||
1663 | }; | ||
1664 | |||
1665 | static struct attribute_group x86_pmu_attr_group = { | ||
1666 | .attrs = x86_pmu_attrs, | ||
1667 | }; | ||
1668 | |||
1669 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
1670 | &x86_pmu_attr_group, | ||
1671 | NULL, | ||
1672 | }; | ||
1673 | |||
1674 | static void x86_pmu_flush_branch_stack(void) | ||
1675 | { | ||
1676 | if (x86_pmu.flush_branch_stack) | ||
1677 | x86_pmu.flush_branch_stack(); | ||
1678 | } | ||
1679 | |||
1545 | static struct pmu pmu = { | 1680 | static struct pmu pmu = { |
1546 | .pmu_enable = x86_pmu_enable, | 1681 | .pmu_enable = x86_pmu_enable, |
1547 | .pmu_disable = x86_pmu_disable, | 1682 | .pmu_disable = x86_pmu_disable, |
1683 | |||
1684 | .attr_groups = x86_pmu_attr_groups, | ||
1548 | 1685 | ||
1549 | .event_init = x86_pmu_event_init, | 1686 | .event_init = x86_pmu_event_init, |
1550 | 1687 | ||
1551 | .add = x86_pmu_add, | 1688 | .add = x86_pmu_add, |
1552 | .del = x86_pmu_del, | 1689 | .del = x86_pmu_del, |
1553 | .start = x86_pmu_start, | 1690 | .start = x86_pmu_start, |
1554 | .stop = x86_pmu_stop, | 1691 | .stop = x86_pmu_stop, |
1555 | .read = x86_pmu_read, | 1692 | .read = x86_pmu_read, |
1556 | 1693 | ||
1557 | .start_txn = x86_pmu_start_txn, | 1694 | .start_txn = x86_pmu_start_txn, |
1558 | .cancel_txn = x86_pmu_cancel_txn, | 1695 | .cancel_txn = x86_pmu_cancel_txn, |
1559 | .commit_txn = x86_pmu_commit_txn, | 1696 | .commit_txn = x86_pmu_commit_txn, |
1697 | |||
1698 | .event_idx = x86_pmu_event_idx, | ||
1699 | .flush_branch_stack = x86_pmu_flush_branch_stack, | ||
1560 | }; | 1700 | }; |
1561 | 1701 | ||
1702 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
1703 | { | ||
1704 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
1705 | return; | ||
1706 | |||
1707 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1708 | return; | ||
1709 | |||
1710 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
1711 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
1712 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
1713 | } | ||
1714 | |||
1562 | /* | 1715 | /* |
1563 | * callchain support | 1716 | * callchain support |
1564 | */ | 1717 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc72..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -33,6 +33,7 @@ enum extra_reg_type { | |||
33 | 33 | ||
34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | 34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ |
35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | 35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ |
36 | EXTRA_REG_LBR = 2, /* lbr_select */ | ||
36 | 37 | ||
37 | EXTRA_REG_MAX /* number of entries needed */ | 38 | EXTRA_REG_MAX /* number of entries needed */ |
38 | }; | 39 | }; |
@@ -130,6 +131,8 @@ struct cpu_hw_events { | |||
130 | void *lbr_context; | 131 | void *lbr_context; |
131 | struct perf_branch_stack lbr_stack; | 132 | struct perf_branch_stack lbr_stack; |
132 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 133 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
134 | struct er_account *lbr_sel; | ||
135 | u64 br_sel; | ||
133 | 136 | ||
134 | /* | 137 | /* |
135 | * Intel host/guest exclude bits | 138 | * Intel host/guest exclude bits |
@@ -268,6 +271,29 @@ struct x86_pmu_quirk { | |||
268 | void (*func)(void); | 271 | void (*func)(void); |
269 | }; | 272 | }; |
270 | 273 | ||
274 | union x86_pmu_config { | ||
275 | struct { | ||
276 | u64 event:8, | ||
277 | umask:8, | ||
278 | usr:1, | ||
279 | os:1, | ||
280 | edge:1, | ||
281 | pc:1, | ||
282 | interrupt:1, | ||
283 | __reserved1:1, | ||
284 | en:1, | ||
285 | inv:1, | ||
286 | cmask:8, | ||
287 | event2:4, | ||
288 | __reserved2:4, | ||
289 | go:1, | ||
290 | ho:1; | ||
291 | } bits; | ||
292 | u64 value; | ||
293 | }; | ||
294 | |||
295 | #define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value | ||
296 | |||
271 | /* | 297 | /* |
272 | * struct x86_pmu - generic x86 pmu | 298 | * struct x86_pmu - generic x86 pmu |
273 | */ | 299 | */ |
@@ -309,10 +335,19 @@ struct x86_pmu { | |||
309 | struct x86_pmu_quirk *quirks; | 335 | struct x86_pmu_quirk *quirks; |
310 | int perfctr_second_write; | 336 | int perfctr_second_write; |
311 | 337 | ||
338 | /* | ||
339 | * sysfs attrs | ||
340 | */ | ||
341 | int attr_rdpmc; | ||
342 | |||
343 | /* | ||
344 | * CPU Hotplug hooks | ||
345 | */ | ||
312 | int (*cpu_prepare)(int cpu); | 346 | int (*cpu_prepare)(int cpu); |
313 | void (*cpu_starting)(int cpu); | 347 | void (*cpu_starting)(int cpu); |
314 | void (*cpu_dying)(int cpu); | 348 | void (*cpu_dying)(int cpu); |
315 | void (*cpu_dead)(int cpu); | 349 | void (*cpu_dead)(int cpu); |
350 | void (*flush_branch_stack)(void); | ||
316 | 351 | ||
317 | /* | 352 | /* |
318 | * Intel Arch Perfmon v2+ | 353 | * Intel Arch Perfmon v2+ |
@@ -334,6 +369,8 @@ struct x86_pmu { | |||
334 | */ | 369 | */ |
335 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 370 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
336 | int lbr_nr; /* hardware stack size */ | 371 | int lbr_nr; /* hardware stack size */ |
372 | u64 lbr_sel_mask; /* LBR_SELECT valid bits */ | ||
373 | const int *lbr_sel_map; /* lbr_select mappings */ | ||
337 | 374 | ||
338 | /* | 375 | /* |
339 | * Extra registers for events | 376 | * Extra registers for events |
@@ -447,6 +484,15 @@ extern struct event_constraint emptyconstraint; | |||
447 | 484 | ||
448 | extern struct event_constraint unconstrained; | 485 | extern struct event_constraint unconstrained; |
449 | 486 | ||
487 | static inline bool kernel_ip(unsigned long ip) | ||
488 | { | ||
489 | #ifdef CONFIG_X86_32 | ||
490 | return ip > PAGE_OFFSET; | ||
491 | #else | ||
492 | return (long)ip < 0; | ||
493 | #endif | ||
494 | } | ||
495 | |||
450 | #ifdef CONFIG_CPU_SUP_AMD | 496 | #ifdef CONFIG_CPU_SUP_AMD |
451 | 497 | ||
452 | int amd_pmu_init(void); | 498 | int amd_pmu_init(void); |
@@ -527,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); | |||
527 | 573 | ||
528 | void intel_pmu_lbr_init_atom(void); | 574 | void intel_pmu_lbr_init_atom(void); |
529 | 575 | ||
576 | void intel_pmu_lbr_init_snb(void); | ||
577 | |||
578 | int intel_pmu_setup_lbr_filter(struct perf_event *event); | ||
579 | |||
530 | int p4_pmu_init(void); | 580 | int p4_pmu_init(void); |
531 | 581 | ||
532 | int p6_pmu_init(void); | 582 | int p6_pmu_init(void); |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
139 | if (ret) | 139 | if (ret) |
140 | return ret; | 140 | return ret; |
141 | 141 | ||
142 | if (has_branch_stack(event)) | ||
143 | return -EOPNOTSUPP; | ||
144 | |||
142 | if (event->attr.exclude_host && event->attr.exclude_guest) | 145 | if (event->attr.exclude_host && event->attr.exclude_guest) |
143 | /* | 146 | /* |
144 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 | 147 | * When HO == GO == 1 the hardware treats that as GO == HO == 0 |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 61d4f79a550e..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids | |||
728 | }, | 728 | }, |
729 | }; | 729 | }; |
730 | 730 | ||
731 | static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) | ||
732 | { | ||
733 | /* user explicitly requested branch sampling */ | ||
734 | if (has_branch_stack(event)) | ||
735 | return true; | ||
736 | |||
737 | /* implicit branch sampling to correct PEBS skid */ | ||
738 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
739 | return true; | ||
740 | |||
741 | return false; | ||
742 | } | ||
743 | |||
731 | static void intel_pmu_disable_all(void) | 744 | static void intel_pmu_disable_all(void) |
732 | { | 745 | { |
733 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 746 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) | |||
882 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); | 895 | cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); |
883 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); | 896 | cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); |
884 | 897 | ||
898 | /* | ||
899 | * must disable before any actual event | ||
900 | * because any event may be combined with LBR | ||
901 | */ | ||
902 | if (intel_pmu_needs_lbr_smpl(event)) | ||
903 | intel_pmu_lbr_disable(event); | ||
904 | |||
885 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 905 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
886 | intel_pmu_disable_fixed(hwc); | 906 | intel_pmu_disable_fixed(hwc); |
887 | return; | 907 | return; |
@@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
936 | intel_pmu_enable_bts(hwc->config); | 956 | intel_pmu_enable_bts(hwc->config); |
937 | return; | 957 | return; |
938 | } | 958 | } |
959 | /* | ||
960 | * must enabled before any actual event | ||
961 | * because any event may be combined with LBR | ||
962 | */ | ||
963 | if (intel_pmu_needs_lbr_smpl(event)) | ||
964 | intel_pmu_lbr_enable(event); | ||
939 | 965 | ||
940 | if (event->attr.exclude_host) | 966 | if (event->attr.exclude_host) |
941 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); | 967 | cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); |
@@ -1058,6 +1084,9 @@ again: | |||
1058 | 1084 | ||
1059 | data.period = event->hw.last_period; | 1085 | data.period = event->hw.last_period; |
1060 | 1086 | ||
1087 | if (has_branch_stack(event)) | ||
1088 | data.br_stack = &cpuc->lbr_stack; | ||
1089 | |||
1061 | if (perf_event_overflow(event, &data, regs)) | 1090 | if (perf_event_overflow(event, &data, regs)) |
1062 | x86_pmu_stop(event, 0); | 1091 | x86_pmu_stop(event, 0); |
1063 | } | 1092 | } |
@@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) | |||
1124 | */ | 1153 | */ |
1125 | static struct event_constraint * | 1154 | static struct event_constraint * |
1126 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, | 1155 | __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, |
1127 | struct perf_event *event) | 1156 | struct perf_event *event, |
1157 | struct hw_perf_event_extra *reg) | ||
1128 | { | 1158 | { |
1129 | struct event_constraint *c = &emptyconstraint; | 1159 | struct event_constraint *c = &emptyconstraint; |
1130 | struct hw_perf_event_extra *reg = &event->hw.extra_reg; | ||
1131 | struct er_account *era; | 1160 | struct er_account *era; |
1132 | unsigned long flags; | 1161 | unsigned long flags; |
1133 | int orig_idx = reg->idx; | 1162 | int orig_idx = reg->idx; |
1134 | 1163 | ||
1135 | /* already allocated shared msr */ | 1164 | /* already allocated shared msr */ |
1136 | if (reg->alloc) | 1165 | if (reg->alloc) |
1137 | return &unconstrained; | 1166 | return NULL; /* call x86_get_event_constraint() */ |
1138 | 1167 | ||
1139 | again: | 1168 | again: |
1140 | era = &cpuc->shared_regs->regs[reg->idx]; | 1169 | era = &cpuc->shared_regs->regs[reg->idx]; |
@@ -1157,14 +1186,10 @@ again: | |||
1157 | reg->alloc = 1; | 1186 | reg->alloc = 1; |
1158 | 1187 | ||
1159 | /* | 1188 | /* |
1160 | * All events using extra_reg are unconstrained. | 1189 | * need to call x86_get_event_constraint() |
1161 | * Avoids calling x86_get_event_constraints() | 1190 | * to check if associated event has constraints |
1162 | * | ||
1163 | * Must revisit if extra_reg controlling events | ||
1164 | * ever have constraints. Worst case we go through | ||
1165 | * the regular event constraint table. | ||
1166 | */ | 1191 | */ |
1167 | c = &unconstrained; | 1192 | c = NULL; |
1168 | } else if (intel_try_alt_er(event, orig_idx)) { | 1193 | } else if (intel_try_alt_er(event, orig_idx)) { |
1169 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1194 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1170 | goto again; | 1195 | goto again; |
@@ -1201,11 +1226,23 @@ static struct event_constraint * | |||
1201 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, | 1226 | intel_shared_regs_constraints(struct cpu_hw_events *cpuc, |
1202 | struct perf_event *event) | 1227 | struct perf_event *event) |
1203 | { | 1228 | { |
1204 | struct event_constraint *c = NULL; | 1229 | struct event_constraint *c = NULL, *d; |
1205 | 1230 | struct hw_perf_event_extra *xreg, *breg; | |
1206 | if (event->hw.extra_reg.idx != EXTRA_REG_NONE) | 1231 | |
1207 | c = __intel_shared_reg_get_constraints(cpuc, event); | 1232 | xreg = &event->hw.extra_reg; |
1208 | 1233 | if (xreg->idx != EXTRA_REG_NONE) { | |
1234 | c = __intel_shared_reg_get_constraints(cpuc, event, xreg); | ||
1235 | if (c == &emptyconstraint) | ||
1236 | return c; | ||
1237 | } | ||
1238 | breg = &event->hw.branch_reg; | ||
1239 | if (breg->idx != EXTRA_REG_NONE) { | ||
1240 | d = __intel_shared_reg_get_constraints(cpuc, event, breg); | ||
1241 | if (d == &emptyconstraint) { | ||
1242 | __intel_shared_reg_put_constraints(cpuc, xreg); | ||
1243 | c = d; | ||
1244 | } | ||
1245 | } | ||
1209 | return c; | 1246 | return c; |
1210 | } | 1247 | } |
1211 | 1248 | ||
@@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, | |||
1253 | reg = &event->hw.extra_reg; | 1290 | reg = &event->hw.extra_reg; |
1254 | if (reg->idx != EXTRA_REG_NONE) | 1291 | if (reg->idx != EXTRA_REG_NONE) |
1255 | __intel_shared_reg_put_constraints(cpuc, reg); | 1292 | __intel_shared_reg_put_constraints(cpuc, reg); |
1293 | |||
1294 | reg = &event->hw.branch_reg; | ||
1295 | if (reg->idx != EXTRA_REG_NONE) | ||
1296 | __intel_shared_reg_put_constraints(cpuc, reg); | ||
1256 | } | 1297 | } |
1257 | 1298 | ||
1258 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | 1299 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, |
@@ -1288,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event) | |||
1288 | * | 1329 | * |
1289 | * Thereby we gain a PEBS capable cycle counter. | 1330 | * Thereby we gain a PEBS capable cycle counter. |
1290 | */ | 1331 | */ |
1291 | u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */ | 1332 | u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); |
1333 | |||
1292 | 1334 | ||
1293 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); | 1335 | alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); |
1294 | event->hw.config = alt_config; | 1336 | event->hw.config = alt_config; |
1295 | } | 1337 | } |
1296 | 1338 | ||
1339 | if (intel_pmu_needs_lbr_smpl(event)) { | ||
1340 | ret = intel_pmu_setup_lbr_filter(event); | ||
1341 | if (ret) | ||
1342 | return ret; | ||
1343 | } | ||
1344 | |||
1297 | if (event->attr.type != PERF_TYPE_RAW) | 1345 | if (event->attr.type != PERF_TYPE_RAW) |
1298 | return 0; | 1346 | return 0; |
1299 | 1347 | ||
@@ -1432,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) | |||
1432 | { | 1480 | { |
1433 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | 1481 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); |
1434 | 1482 | ||
1435 | if (!x86_pmu.extra_regs) | 1483 | if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) |
1436 | return NOTIFY_OK; | 1484 | return NOTIFY_OK; |
1437 | 1485 | ||
1438 | cpuc->shared_regs = allocate_shared_regs(cpu); | 1486 | cpuc->shared_regs = allocate_shared_regs(cpu); |
@@ -1454,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) | |||
1454 | */ | 1502 | */ |
1455 | intel_pmu_lbr_reset(); | 1503 | intel_pmu_lbr_reset(); |
1456 | 1504 | ||
1457 | if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) | 1505 | cpuc->lbr_sel = NULL; |
1506 | |||
1507 | if (!cpuc->shared_regs) | ||
1458 | return; | 1508 | return; |
1459 | 1509 | ||
1460 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | 1510 | if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { |
1461 | struct intel_shared_regs *pc; | 1511 | for_each_cpu(i, topology_thread_cpumask(cpu)) { |
1512 | struct intel_shared_regs *pc; | ||
1462 | 1513 | ||
1463 | pc = per_cpu(cpu_hw_events, i).shared_regs; | 1514 | pc = per_cpu(cpu_hw_events, i).shared_regs; |
1464 | if (pc && pc->core_id == core_id) { | 1515 | if (pc && pc->core_id == core_id) { |
1465 | cpuc->kfree_on_online = cpuc->shared_regs; | 1516 | cpuc->kfree_on_online = cpuc->shared_regs; |
1466 | cpuc->shared_regs = pc; | 1517 | cpuc->shared_regs = pc; |
1467 | break; | 1518 | break; |
1519 | } | ||
1468 | } | 1520 | } |
1521 | cpuc->shared_regs->core_id = core_id; | ||
1522 | cpuc->shared_regs->refcnt++; | ||
1469 | } | 1523 | } |
1470 | 1524 | ||
1471 | cpuc->shared_regs->core_id = core_id; | 1525 | if (x86_pmu.lbr_sel_map) |
1472 | cpuc->shared_regs->refcnt++; | 1526 | cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; |
1473 | } | 1527 | } |
1474 | 1528 | ||
1475 | static void intel_pmu_cpu_dying(int cpu) | 1529 | static void intel_pmu_cpu_dying(int cpu) |
@@ -1487,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) | |||
1487 | fini_debug_store_on_cpu(cpu); | 1541 | fini_debug_store_on_cpu(cpu); |
1488 | } | 1542 | } |
1489 | 1543 | ||
1544 | static void intel_pmu_flush_branch_stack(void) | ||
1545 | { | ||
1546 | /* | ||
1547 | * Intel LBR does not tag entries with the | ||
1548 | * PID of the current task, then we need to | ||
1549 | * flush it on ctxsw | ||
1550 | * For now, we simply reset it | ||
1551 | */ | ||
1552 | if (x86_pmu.lbr_nr) | ||
1553 | intel_pmu_lbr_reset(); | ||
1554 | } | ||
1555 | |||
1490 | static __initconst const struct x86_pmu intel_pmu = { | 1556 | static __initconst const struct x86_pmu intel_pmu = { |
1491 | .name = "Intel", | 1557 | .name = "Intel", |
1492 | .handle_irq = intel_pmu_handle_irq, | 1558 | .handle_irq = intel_pmu_handle_irq, |
@@ -1514,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1514 | .cpu_starting = intel_pmu_cpu_starting, | 1580 | .cpu_starting = intel_pmu_cpu_starting, |
1515 | .cpu_dying = intel_pmu_cpu_dying, | 1581 | .cpu_dying = intel_pmu_cpu_dying, |
1516 | .guest_get_msrs = intel_guest_get_msrs, | 1582 | .guest_get_msrs = intel_guest_get_msrs, |
1583 | .flush_branch_stack = intel_pmu_flush_branch_stack, | ||
1517 | }; | 1584 | }; |
1518 | 1585 | ||
1519 | static __init void intel_clovertown_quirk(void) | 1586 | static __init void intel_clovertown_quirk(void) |
@@ -1690,9 +1757,11 @@ __init int intel_pmu_init(void) | |||
1690 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | 1757 | x86_pmu.extra_regs = intel_nehalem_extra_regs; |
1691 | 1758 | ||
1692 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1759 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1693 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1760 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1761 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1694 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1762 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1695 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1763 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1764 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
1696 | 1765 | ||
1697 | x86_add_quirk(intel_nehalem_quirk); | 1766 | x86_add_quirk(intel_nehalem_quirk); |
1698 | 1767 | ||
@@ -1727,9 +1796,11 @@ __init int intel_pmu_init(void) | |||
1727 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | 1796 | x86_pmu.er_flags |= ERF_HAS_RSP_1; |
1728 | 1797 | ||
1729 | /* UOPS_ISSUED.STALLED_CYCLES */ | 1798 | /* UOPS_ISSUED.STALLED_CYCLES */ |
1730 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1799 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1800 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1731 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1801 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1732 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1802 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1803 | X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1); | ||
1733 | 1804 | ||
1734 | pr_cont("Westmere events, "); | 1805 | pr_cont("Westmere events, "); |
1735 | break; | 1806 | break; |
@@ -1740,7 +1811,7 @@ __init int intel_pmu_init(void) | |||
1740 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1811 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1741 | sizeof(hw_cache_event_ids)); | 1812 | sizeof(hw_cache_event_ids)); |
1742 | 1813 | ||
1743 | intel_pmu_lbr_init_nhm(); | 1814 | intel_pmu_lbr_init_snb(); |
1744 | 1815 | ||
1745 | x86_pmu.event_constraints = intel_snb_event_constraints; | 1816 | x86_pmu.event_constraints = intel_snb_event_constraints; |
1746 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; | 1817 | x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; |
@@ -1750,9 +1821,11 @@ __init int intel_pmu_init(void) | |||
1750 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | 1821 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; |
1751 | 1822 | ||
1752 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ | 1823 | /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ |
1753 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e; | 1824 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = |
1825 | X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1); | ||
1754 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ | 1826 | /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/ |
1755 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1; | 1827 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = |
1828 | X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1); | ||
1756 | 1829 | ||
1757 | pr_cont("SandyBridge events, "); | 1830 | pr_cont("SandyBridge events, "); |
1758 | break; | 1831 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | 4 | ||
5 | #include <asm/perf_event.h> | 5 | #include <asm/perf_event.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) | |||
439 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | 440 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; |
440 | 441 | ||
441 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | 442 | cpuc->pebs_enabled |= 1ULL << hwc->idx; |
442 | |||
443 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
444 | intel_pmu_lbr_enable(event); | ||
445 | } | 443 | } |
446 | 444 | ||
447 | void intel_pmu_pebs_disable(struct perf_event *event) | 445 | void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) | |||
454 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | 452 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); |
455 | 453 | ||
456 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 454 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
457 | |||
458 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
459 | intel_pmu_lbr_disable(event); | ||
460 | } | 455 | } |
461 | 456 | ||
462 | void intel_pmu_pebs_enable_all(void) | 457 | void intel_pmu_pebs_enable_all(void) |
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) | |||
475 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | 470 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); |
476 | } | 471 | } |
477 | 472 | ||
478 | #include <asm/insn.h> | ||
479 | |||
480 | static inline bool kernel_ip(unsigned long ip) | ||
481 | { | ||
482 | #ifdef CONFIG_X86_32 | ||
483 | return ip > PAGE_OFFSET; | ||
484 | #else | ||
485 | return (long)ip < 0; | ||
486 | #endif | ||
487 | } | ||
488 | |||
489 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | 473 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) |
490 | { | 474 | { |
491 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
572 | * both formats and we don't use the other fields in this | 556 | * both formats and we don't use the other fields in this |
573 | * routine. | 557 | * routine. |
574 | */ | 558 | */ |
559 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
575 | struct pebs_record_core *pebs = __pebs; | 560 | struct pebs_record_core *pebs = __pebs; |
576 | struct perf_sample_data data; | 561 | struct perf_sample_data data; |
577 | struct pt_regs regs; | 562 | struct pt_regs regs; |
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, | |||
602 | else | 587 | else |
603 | regs.flags &= ~PERF_EFLAGS_EXACT; | 588 | regs.flags &= ~PERF_EFLAGS_EXACT; |
604 | 589 | ||
590 | if (has_branch_stack(event)) | ||
591 | data.br_stack = &cpuc->lbr_stack; | ||
592 | |||
605 | if (perf_event_overflow(event, &data, ®s)) | 593 | if (perf_event_overflow(event, &data, ®s)) |
606 | x86_pmu_stop(event, 0); | 594 | x86_pmu_stop(event, 0); |
607 | } | 595 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <asm/perf_event.h> | 4 | #include <asm/perf_event.h> |
5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | #include <asm/insn.h> | ||
6 | 7 | ||
7 | #include "perf_event.h" | 8 | #include "perf_event.h" |
8 | 9 | ||
@@ -14,6 +15,100 @@ enum { | |||
14 | }; | 15 | }; |
15 | 16 | ||
16 | /* | 17 | /* |
18 | * Intel LBR_SELECT bits | ||
19 | * Intel Vol3a, April 2011, Section 16.7 Table 16-10 | ||
20 | * | ||
21 | * Hardware branch filter (not available on all CPUs) | ||
22 | */ | ||
23 | #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ | ||
24 | #define LBR_USER_BIT 1 /* do not capture at ring > 0 */ | ||
25 | #define LBR_JCC_BIT 2 /* do not capture conditional branches */ | ||
26 | #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ | ||
27 | #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ | ||
28 | #define LBR_RETURN_BIT 5 /* do not capture near returns */ | ||
29 | #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ | ||
30 | #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ | ||
31 | #define LBR_FAR_BIT 8 /* do not capture far branches */ | ||
32 | |||
33 | #define LBR_KERNEL (1 << LBR_KERNEL_BIT) | ||
34 | #define LBR_USER (1 << LBR_USER_BIT) | ||
35 | #define LBR_JCC (1 << LBR_JCC_BIT) | ||
36 | #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) | ||
37 | #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) | ||
38 | #define LBR_RETURN (1 << LBR_RETURN_BIT) | ||
39 | #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) | ||
40 | #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) | ||
41 | #define LBR_FAR (1 << LBR_FAR_BIT) | ||
42 | |||
43 | #define LBR_PLM (LBR_KERNEL | LBR_USER) | ||
44 | |||
45 | #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ | ||
46 | #define LBR_NOT_SUPP -1 /* LBR filter not supported */ | ||
47 | #define LBR_IGN 0 /* ignored */ | ||
48 | |||
49 | #define LBR_ANY \ | ||
50 | (LBR_JCC |\ | ||
51 | LBR_REL_CALL |\ | ||
52 | LBR_IND_CALL |\ | ||
53 | LBR_RETURN |\ | ||
54 | LBR_REL_JMP |\ | ||
55 | LBR_IND_JMP |\ | ||
56 | LBR_FAR) | ||
57 | |||
58 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
59 | |||
60 | #define for_each_branch_sample_type(x) \ | ||
61 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
62 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
63 | |||
64 | /* | ||
65 | * x86control flow change classification | ||
66 | * x86control flow changes include branches, interrupts, traps, faults | ||
67 | */ | ||
68 | enum { | ||
69 | X86_BR_NONE = 0, /* unknown */ | ||
70 | |||
71 | X86_BR_USER = 1 << 0, /* branch target is user */ | ||
72 | X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ | ||
73 | |||
74 | X86_BR_CALL = 1 << 2, /* call */ | ||
75 | X86_BR_RET = 1 << 3, /* return */ | ||
76 | X86_BR_SYSCALL = 1 << 4, /* syscall */ | ||
77 | X86_BR_SYSRET = 1 << 5, /* syscall return */ | ||
78 | X86_BR_INT = 1 << 6, /* sw interrupt */ | ||
79 | X86_BR_IRET = 1 << 7, /* return from interrupt */ | ||
80 | X86_BR_JCC = 1 << 8, /* conditional */ | ||
81 | X86_BR_JMP = 1 << 9, /* jump */ | ||
82 | X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ | ||
83 | X86_BR_IND_CALL = 1 << 11,/* indirect calls */ | ||
84 | }; | ||
85 | |||
86 | #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) | ||
87 | |||
88 | #define X86_BR_ANY \ | ||
89 | (X86_BR_CALL |\ | ||
90 | X86_BR_RET |\ | ||
91 | X86_BR_SYSCALL |\ | ||
92 | X86_BR_SYSRET |\ | ||
93 | X86_BR_INT |\ | ||
94 | X86_BR_IRET |\ | ||
95 | X86_BR_JCC |\ | ||
96 | X86_BR_JMP |\ | ||
97 | X86_BR_IRQ |\ | ||
98 | X86_BR_IND_CALL) | ||
99 | |||
100 | #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) | ||
101 | |||
102 | #define X86_BR_ANY_CALL \ | ||
103 | (X86_BR_CALL |\ | ||
104 | X86_BR_IND_CALL |\ | ||
105 | X86_BR_SYSCALL |\ | ||
106 | X86_BR_IRQ |\ | ||
107 | X86_BR_INT) | ||
108 | |||
109 | static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); | ||
110 | |||
111 | /* | ||
17 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | 112 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI |
18 | * otherwise it becomes near impossible to get a reliable stack. | 113 | * otherwise it becomes near impossible to get a reliable stack. |
19 | */ | 114 | */ |
@@ -21,6 +116,10 @@ enum { | |||
21 | static void __intel_pmu_lbr_enable(void) | 116 | static void __intel_pmu_lbr_enable(void) |
22 | { | 117 | { |
23 | u64 debugctl; | 118 | u64 debugctl; |
119 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
120 | |||
121 | if (cpuc->lbr_sel) | ||
122 | wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); | ||
24 | 123 | ||
25 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | 124 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); |
26 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | 125 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); |
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) | |||
76 | * Reset the LBR stack if we changed task context to | 175 | * Reset the LBR stack if we changed task context to |
77 | * avoid data leaks. | 176 | * avoid data leaks. |
78 | */ | 177 | */ |
79 | |||
80 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | 178 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { |
81 | intel_pmu_lbr_reset(); | 179 | intel_pmu_lbr_reset(); |
82 | cpuc->lbr_context = event->ctx; | 180 | cpuc->lbr_context = event->ctx; |
83 | } | 181 | } |
182 | cpuc->br_sel = event->hw.branch_reg.reg; | ||
84 | 183 | ||
85 | cpuc->lbr_users++; | 184 | cpuc->lbr_users++; |
86 | } | 185 | } |
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) | |||
95 | cpuc->lbr_users--; | 194 | cpuc->lbr_users--; |
96 | WARN_ON_ONCE(cpuc->lbr_users < 0); | 195 | WARN_ON_ONCE(cpuc->lbr_users < 0); |
97 | 196 | ||
98 | if (cpuc->enabled && !cpuc->lbr_users) | 197 | if (cpuc->enabled && !cpuc->lbr_users) { |
99 | __intel_pmu_lbr_disable(); | 198 | __intel_pmu_lbr_disable(); |
199 | /* avoid stale pointer */ | ||
200 | cpuc->lbr_context = NULL; | ||
201 | } | ||
100 | } | 202 | } |
101 | 203 | ||
102 | void intel_pmu_lbr_enable_all(void) | 204 | void intel_pmu_lbr_enable_all(void) |
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) | |||
115 | __intel_pmu_lbr_disable(); | 217 | __intel_pmu_lbr_disable(); |
116 | } | 218 | } |
117 | 219 | ||
220 | /* | ||
221 | * TOS = most recently recorded branch | ||
222 | */ | ||
118 | static inline u64 intel_pmu_lbr_tos(void) | 223 | static inline u64 intel_pmu_lbr_tos(void) |
119 | { | 224 | { |
120 | u64 tos; | 225 | u64 tos; |
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
142 | 247 | ||
143 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | 248 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); |
144 | 249 | ||
145 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | 250 | cpuc->lbr_entries[i].from = msr_lastbranch.from; |
146 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | 251 | cpuc->lbr_entries[i].to = msr_lastbranch.to; |
147 | cpuc->lbr_entries[i].flags = 0; | 252 | cpuc->lbr_entries[i].mispred = 0; |
253 | cpuc->lbr_entries[i].predicted = 0; | ||
254 | cpuc->lbr_entries[i].reserved = 0; | ||
148 | } | 255 | } |
149 | cpuc->lbr_stack.nr = i; | 256 | cpuc->lbr_stack.nr = i; |
150 | } | 257 | } |
151 | 258 | ||
152 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
153 | |||
154 | /* | 259 | /* |
155 | * Due to lack of segmentation in Linux the effective address (offset) | 260 | * Due to lack of segmentation in Linux the effective address (offset) |
156 | * is the same as the linear address, allowing us to merge the LIP and EIP | 261 | * is the same as the linear address, allowing us to merge the LIP and EIP |
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
165 | 270 | ||
166 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | 271 | for (i = 0; i < x86_pmu.lbr_nr; i++) { |
167 | unsigned long lbr_idx = (tos - i) & mask; | 272 | unsigned long lbr_idx = (tos - i) & mask; |
168 | u64 from, to, flags = 0; | 273 | u64 from, to, mis = 0, pred = 0; |
169 | 274 | ||
170 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 275 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
171 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 276 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
172 | 277 | ||
173 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | 278 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
174 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 279 | mis = !!(from & LBR_FROM_FLAG_MISPRED); |
280 | pred = !mis; | ||
175 | from = (u64)((((s64)from) << 1) >> 1); | 281 | from = (u64)((((s64)from) << 1) >> 1); |
176 | } | 282 | } |
177 | 283 | ||
178 | cpuc->lbr_entries[i].from = from; | 284 | cpuc->lbr_entries[i].from = from; |
179 | cpuc->lbr_entries[i].to = to; | 285 | cpuc->lbr_entries[i].to = to; |
180 | cpuc->lbr_entries[i].flags = flags; | 286 | cpuc->lbr_entries[i].mispred = mis; |
287 | cpuc->lbr_entries[i].predicted = pred; | ||
288 | cpuc->lbr_entries[i].reserved = 0; | ||
181 | } | 289 | } |
182 | cpuc->lbr_stack.nr = i; | 290 | cpuc->lbr_stack.nr = i; |
183 | } | 291 | } |
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) | |||
193 | intel_pmu_lbr_read_32(cpuc); | 301 | intel_pmu_lbr_read_32(cpuc); |
194 | else | 302 | else |
195 | intel_pmu_lbr_read_64(cpuc); | 303 | intel_pmu_lbr_read_64(cpuc); |
304 | |||
305 | intel_pmu_lbr_filter(cpuc); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * SW filter is used: | ||
310 | * - in case there is no HW filter | ||
311 | * - in case the HW filter has errata or limitations | ||
312 | */ | ||
313 | static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) | ||
314 | { | ||
315 | u64 br_type = event->attr.branch_sample_type; | ||
316 | int mask = 0; | ||
317 | |||
318 | if (br_type & PERF_SAMPLE_BRANCH_USER) | ||
319 | mask |= X86_BR_USER; | ||
320 | |||
321 | if (br_type & PERF_SAMPLE_BRANCH_KERNEL) | ||
322 | mask |= X86_BR_KERNEL; | ||
323 | |||
324 | /* we ignore BRANCH_HV here */ | ||
325 | |||
326 | if (br_type & PERF_SAMPLE_BRANCH_ANY) | ||
327 | mask |= X86_BR_ANY; | ||
328 | |||
329 | if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) | ||
330 | mask |= X86_BR_ANY_CALL; | ||
331 | |||
332 | if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) | ||
333 | mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; | ||
334 | |||
335 | if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) | ||
336 | mask |= X86_BR_IND_CALL; | ||
337 | /* | ||
338 | * stash actual user request into reg, it may | ||
339 | * be used by fixup code for some CPU | ||
340 | */ | ||
341 | event->hw.branch_reg.reg = mask; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * setup the HW LBR filter | ||
346 | * Used only when available, may not be enough to disambiguate | ||
347 | * all branches, may need the help of the SW filter | ||
348 | */ | ||
349 | static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | ||
350 | { | ||
351 | struct hw_perf_event_extra *reg; | ||
352 | u64 br_type = event->attr.branch_sample_type; | ||
353 | u64 mask = 0, m; | ||
354 | u64 v; | ||
355 | |||
356 | for_each_branch_sample_type(m) { | ||
357 | if (!(br_type & m)) | ||
358 | continue; | ||
359 | |||
360 | v = x86_pmu.lbr_sel_map[m]; | ||
361 | if (v == LBR_NOT_SUPP) | ||
362 | return -EOPNOTSUPP; | ||
363 | |||
364 | if (v != LBR_IGN) | ||
365 | mask |= v; | ||
366 | } | ||
367 | reg = &event->hw.branch_reg; | ||
368 | reg->idx = EXTRA_REG_LBR; | ||
369 | |||
370 | /* LBR_SELECT operates in suppress mode so invert mask */ | ||
371 | reg->config = ~mask & x86_pmu.lbr_sel_mask; | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | int intel_pmu_setup_lbr_filter(struct perf_event *event) | ||
377 | { | ||
378 | int ret = 0; | ||
379 | |||
380 | /* | ||
381 | * no LBR on this PMU | ||
382 | */ | ||
383 | if (!x86_pmu.lbr_nr) | ||
384 | return -EOPNOTSUPP; | ||
385 | |||
386 | /* | ||
387 | * setup SW LBR filter | ||
388 | */ | ||
389 | intel_pmu_setup_sw_lbr_filter(event); | ||
390 | |||
391 | /* | ||
392 | * setup HW LBR filter, if any | ||
393 | */ | ||
394 | if (x86_pmu.lbr_sel_map) | ||
395 | ret = intel_pmu_setup_hw_lbr_filter(event); | ||
396 | |||
397 | return ret; | ||
196 | } | 398 | } |
197 | 399 | ||
400 | /* | ||
401 | * return the type of control flow change at address "from" | ||
402 | * intruction is not necessarily a branch (in case of interrupt). | ||
403 | * | ||
404 | * The branch type returned also includes the priv level of the | ||
405 | * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). | ||
406 | * | ||
407 | * If a branch type is unknown OR the instruction cannot be | ||
408 | * decoded (e.g., text page not present), then X86_BR_NONE is | ||
409 | * returned. | ||
410 | */ | ||
411 | static int branch_type(unsigned long from, unsigned long to) | ||
412 | { | ||
413 | struct insn insn; | ||
414 | void *addr; | ||
415 | int bytes, size = MAX_INSN_SIZE; | ||
416 | int ret = X86_BR_NONE; | ||
417 | int ext, to_plm, from_plm; | ||
418 | u8 buf[MAX_INSN_SIZE]; | ||
419 | int is64 = 0; | ||
420 | |||
421 | to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; | ||
422 | from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; | ||
423 | |||
424 | /* | ||
425 | * maybe zero if lbr did not fill up after a reset by the time | ||
426 | * we get a PMU interrupt | ||
427 | */ | ||
428 | if (from == 0 || to == 0) | ||
429 | return X86_BR_NONE; | ||
430 | |||
431 | if (from_plm == X86_BR_USER) { | ||
432 | /* | ||
433 | * can happen if measuring at the user level only | ||
434 | * and we interrupt in a kernel thread, e.g., idle. | ||
435 | */ | ||
436 | if (!current->mm) | ||
437 | return X86_BR_NONE; | ||
438 | |||
439 | /* may fail if text not present */ | ||
440 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | ||
441 | if (bytes != size) | ||
442 | return X86_BR_NONE; | ||
443 | |||
444 | addr = buf; | ||
445 | } else | ||
446 | addr = (void *)from; | ||
447 | |||
448 | /* | ||
449 | * decoder needs to know the ABI especially | ||
450 | * on 64-bit systems running 32-bit apps | ||
451 | */ | ||
452 | #ifdef CONFIG_X86_64 | ||
453 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | ||
454 | #endif | ||
455 | insn_init(&insn, addr, is64); | ||
456 | insn_get_opcode(&insn); | ||
457 | |||
458 | switch (insn.opcode.bytes[0]) { | ||
459 | case 0xf: | ||
460 | switch (insn.opcode.bytes[1]) { | ||
461 | case 0x05: /* syscall */ | ||
462 | case 0x34: /* sysenter */ | ||
463 | ret = X86_BR_SYSCALL; | ||
464 | break; | ||
465 | case 0x07: /* sysret */ | ||
466 | case 0x35: /* sysexit */ | ||
467 | ret = X86_BR_SYSRET; | ||
468 | break; | ||
469 | case 0x80 ... 0x8f: /* conditional */ | ||
470 | ret = X86_BR_JCC; | ||
471 | break; | ||
472 | default: | ||
473 | ret = X86_BR_NONE; | ||
474 | } | ||
475 | break; | ||
476 | case 0x70 ... 0x7f: /* conditional */ | ||
477 | ret = X86_BR_JCC; | ||
478 | break; | ||
479 | case 0xc2: /* near ret */ | ||
480 | case 0xc3: /* near ret */ | ||
481 | case 0xca: /* far ret */ | ||
482 | case 0xcb: /* far ret */ | ||
483 | ret = X86_BR_RET; | ||
484 | break; | ||
485 | case 0xcf: /* iret */ | ||
486 | ret = X86_BR_IRET; | ||
487 | break; | ||
488 | case 0xcc ... 0xce: /* int */ | ||
489 | ret = X86_BR_INT; | ||
490 | break; | ||
491 | case 0xe8: /* call near rel */ | ||
492 | case 0x9a: /* call far absolute */ | ||
493 | ret = X86_BR_CALL; | ||
494 | break; | ||
495 | case 0xe0 ... 0xe3: /* loop jmp */ | ||
496 | ret = X86_BR_JCC; | ||
497 | break; | ||
498 | case 0xe9 ... 0xeb: /* jmp */ | ||
499 | ret = X86_BR_JMP; | ||
500 | break; | ||
501 | case 0xff: /* call near absolute, call far absolute ind */ | ||
502 | insn_get_modrm(&insn); | ||
503 | ext = (insn.modrm.bytes[0] >> 3) & 0x7; | ||
504 | switch (ext) { | ||
505 | case 2: /* near ind call */ | ||
506 | case 3: /* far ind call */ | ||
507 | ret = X86_BR_IND_CALL; | ||
508 | break; | ||
509 | case 4: | ||
510 | case 5: | ||
511 | ret = X86_BR_JMP; | ||
512 | break; | ||
513 | } | ||
514 | break; | ||
515 | default: | ||
516 | ret = X86_BR_NONE; | ||
517 | } | ||
518 | /* | ||
519 | * interrupts, traps, faults (and thus ring transition) may | ||
520 | * occur on any instructions. Thus, to classify them correctly, | ||
521 | * we need to first look at the from and to priv levels. If they | ||
522 | * are different and to is in the kernel, then it indicates | ||
523 | * a ring transition. If the from instruction is not a ring | ||
524 | * transition instr (syscall, systenter, int), then it means | ||
525 | * it was a irq, trap or fault. | ||
526 | * | ||
527 | * we have no way of detecting kernel to kernel faults. | ||
528 | */ | ||
529 | if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL | ||
530 | && ret != X86_BR_SYSCALL && ret != X86_BR_INT) | ||
531 | ret = X86_BR_IRQ; | ||
532 | |||
533 | /* | ||
534 | * branch priv level determined by target as | ||
535 | * is done by HW when LBR_SELECT is implemented | ||
536 | */ | ||
537 | if (ret != X86_BR_NONE) | ||
538 | ret |= to_plm; | ||
539 | |||
540 | return ret; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * implement actual branch filter based on user demand. | ||
545 | * Hardware may not exactly satisfy that request, thus | ||
546 | * we need to inspect opcodes. Mismatched branches are | ||
547 | * discarded. Therefore, the number of branches returned | ||
548 | * in PERF_SAMPLE_BRANCH_STACK sample may vary. | ||
549 | */ | ||
550 | static void | ||
551 | intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | ||
552 | { | ||
553 | u64 from, to; | ||
554 | int br_sel = cpuc->br_sel; | ||
555 | int i, j, type; | ||
556 | bool compress = false; | ||
557 | |||
558 | /* if sampling all branches, then nothing to filter */ | ||
559 | if ((br_sel & X86_BR_ALL) == X86_BR_ALL) | ||
560 | return; | ||
561 | |||
562 | for (i = 0; i < cpuc->lbr_stack.nr; i++) { | ||
563 | |||
564 | from = cpuc->lbr_entries[i].from; | ||
565 | to = cpuc->lbr_entries[i].to; | ||
566 | |||
567 | type = branch_type(from, to); | ||
568 | |||
569 | /* if type does not correspond, then discard */ | ||
570 | if (type == X86_BR_NONE || (br_sel & type) != type) { | ||
571 | cpuc->lbr_entries[i].from = 0; | ||
572 | compress = true; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | if (!compress) | ||
577 | return; | ||
578 | |||
579 | /* remove all entries with from=0 */ | ||
580 | for (i = 0; i < cpuc->lbr_stack.nr; ) { | ||
581 | if (!cpuc->lbr_entries[i].from) { | ||
582 | j = i; | ||
583 | while (++j < cpuc->lbr_stack.nr) | ||
584 | cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; | ||
585 | cpuc->lbr_stack.nr--; | ||
586 | if (!cpuc->lbr_entries[i].from) | ||
587 | continue; | ||
588 | } | ||
589 | i++; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * Map interface branch filters onto LBR filters | ||
595 | */ | ||
596 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
597 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
598 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
599 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
600 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
601 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | ||
602 | | LBR_IND_JMP | LBR_FAR, | ||
603 | /* | ||
604 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | ||
605 | */ | ||
606 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | ||
607 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | ||
608 | /* | ||
609 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | ||
610 | */ | ||
611 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | ||
612 | }; | ||
613 | |||
614 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | ||
615 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | ||
616 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | ||
617 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | ||
618 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | ||
619 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | ||
620 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | ||
621 | | LBR_FAR, | ||
622 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | ||
623 | }; | ||
624 | |||
625 | /* core */ | ||
198 | void intel_pmu_lbr_init_core(void) | 626 | void intel_pmu_lbr_init_core(void) |
199 | { | 627 | { |
200 | x86_pmu.lbr_nr = 4; | 628 | x86_pmu.lbr_nr = 4; |
201 | x86_pmu.lbr_tos = 0x01c9; | 629 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
202 | x86_pmu.lbr_from = 0x40; | 630 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
203 | x86_pmu.lbr_to = 0x60; | 631 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
632 | |||
633 | /* | ||
634 | * SW branch filter usage: | ||
635 | * - compensate for lack of HW filter | ||
636 | */ | ||
637 | pr_cont("4-deep LBR, "); | ||
204 | } | 638 | } |
205 | 639 | ||
640 | /* nehalem/westmere */ | ||
206 | void intel_pmu_lbr_init_nhm(void) | 641 | void intel_pmu_lbr_init_nhm(void) |
207 | { | 642 | { |
208 | x86_pmu.lbr_nr = 16; | 643 | x86_pmu.lbr_nr = 16; |
209 | x86_pmu.lbr_tos = 0x01c9; | 644 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
210 | x86_pmu.lbr_from = 0x680; | 645 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; |
211 | x86_pmu.lbr_to = 0x6c0; | 646 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; |
647 | |||
648 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
649 | x86_pmu.lbr_sel_map = nhm_lbr_sel_map; | ||
650 | |||
651 | /* | ||
652 | * SW branch filter usage: | ||
653 | * - workaround LBR_SEL errata (see above) | ||
654 | * - support syscall, sysret capture. | ||
655 | * That requires LBR_FAR but that means far | ||
656 | * jmp need to be filtered out | ||
657 | */ | ||
658 | pr_cont("16-deep LBR, "); | ||
659 | } | ||
660 | |||
661 | /* sandy bridge */ | ||
662 | void intel_pmu_lbr_init_snb(void) | ||
663 | { | ||
664 | x86_pmu.lbr_nr = 16; | ||
665 | x86_pmu.lbr_tos = MSR_LBR_TOS; | ||
666 | x86_pmu.lbr_from = MSR_LBR_NHM_FROM; | ||
667 | x86_pmu.lbr_to = MSR_LBR_NHM_TO; | ||
668 | |||
669 | x86_pmu.lbr_sel_mask = LBR_SEL_MASK; | ||
670 | x86_pmu.lbr_sel_map = snb_lbr_sel_map; | ||
671 | |||
672 | /* | ||
673 | * SW branch filter usage: | ||
674 | * - support syscall, sysret capture. | ||
675 | * That requires LBR_FAR but that means far | ||
676 | * jmp need to be filtered out | ||
677 | */ | ||
678 | pr_cont("16-deep LBR, "); | ||
212 | } | 679 | } |
213 | 680 | ||
681 | /* atom */ | ||
214 | void intel_pmu_lbr_init_atom(void) | 682 | void intel_pmu_lbr_init_atom(void) |
215 | { | 683 | { |
684 | /* | ||
685 | * only models starting at stepping 10 seems | ||
686 | * to have an operational LBR which can freeze | ||
687 | * on PMU interrupt | ||
688 | */ | ||
689 | if (boot_cpu_data.x86_mask < 10) { | ||
690 | pr_cont("LBR disabled due to erratum"); | ||
691 | return; | ||
692 | } | ||
693 | |||
216 | x86_pmu.lbr_nr = 8; | 694 | x86_pmu.lbr_nr = 8; |
217 | x86_pmu.lbr_tos = 0x01c9; | 695 | x86_pmu.lbr_tos = MSR_LBR_TOS; |
218 | x86_pmu.lbr_from = 0x40; | 696 | x86_pmu.lbr_from = MSR_LBR_CORE_FROM; |
219 | x86_pmu.lbr_to = 0x60; | 697 | x86_pmu.lbr_to = MSR_LBR_CORE_TO; |
698 | |||
699 | /* | ||
700 | * SW branch filter usage: | ||
701 | * - compensate for lack of HW filter | ||
702 | */ | ||
703 | pr_cont("8-deep LBR, "); | ||
220 | } | 704 | } |
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h new file mode 100644 index 000000000000..3230b68ef29a --- /dev/null +++ b/arch/x86/kernel/kprobes-common.h | |||
@@ -0,0 +1,102 @@ | |||
1 | #ifndef __X86_KERNEL_KPROBES_COMMON_H | ||
2 | #define __X86_KERNEL_KPROBES_COMMON_H | ||
3 | |||
4 | /* Kprobes and Optprobes common header */ | ||
5 | |||
6 | #ifdef CONFIG_X86_64 | ||
7 | #define SAVE_REGS_STRING \ | ||
8 | /* Skip cs, ip, orig_ax. */ \ | ||
9 | " subq $24, %rsp\n" \ | ||
10 | " pushq %rdi\n" \ | ||
11 | " pushq %rsi\n" \ | ||
12 | " pushq %rdx\n" \ | ||
13 | " pushq %rcx\n" \ | ||
14 | " pushq %rax\n" \ | ||
15 | " pushq %r8\n" \ | ||
16 | " pushq %r9\n" \ | ||
17 | " pushq %r10\n" \ | ||
18 | " pushq %r11\n" \ | ||
19 | " pushq %rbx\n" \ | ||
20 | " pushq %rbp\n" \ | ||
21 | " pushq %r12\n" \ | ||
22 | " pushq %r13\n" \ | ||
23 | " pushq %r14\n" \ | ||
24 | " pushq %r15\n" | ||
25 | #define RESTORE_REGS_STRING \ | ||
26 | " popq %r15\n" \ | ||
27 | " popq %r14\n" \ | ||
28 | " popq %r13\n" \ | ||
29 | " popq %r12\n" \ | ||
30 | " popq %rbp\n" \ | ||
31 | " popq %rbx\n" \ | ||
32 | " popq %r11\n" \ | ||
33 | " popq %r10\n" \ | ||
34 | " popq %r9\n" \ | ||
35 | " popq %r8\n" \ | ||
36 | " popq %rax\n" \ | ||
37 | " popq %rcx\n" \ | ||
38 | " popq %rdx\n" \ | ||
39 | " popq %rsi\n" \ | ||
40 | " popq %rdi\n" \ | ||
41 | /* Skip orig_ax, ip, cs */ \ | ||
42 | " addq $24, %rsp\n" | ||
43 | #else | ||
44 | #define SAVE_REGS_STRING \ | ||
45 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
46 | " subl $16, %esp\n" \ | ||
47 | " pushl %fs\n" \ | ||
48 | " pushl %es\n" \ | ||
49 | " pushl %ds\n" \ | ||
50 | " pushl %eax\n" \ | ||
51 | " pushl %ebp\n" \ | ||
52 | " pushl %edi\n" \ | ||
53 | " pushl %esi\n" \ | ||
54 | " pushl %edx\n" \ | ||
55 | " pushl %ecx\n" \ | ||
56 | " pushl %ebx\n" | ||
57 | #define RESTORE_REGS_STRING \ | ||
58 | " popl %ebx\n" \ | ||
59 | " popl %ecx\n" \ | ||
60 | " popl %edx\n" \ | ||
61 | " popl %esi\n" \ | ||
62 | " popl %edi\n" \ | ||
63 | " popl %ebp\n" \ | ||
64 | " popl %eax\n" \ | ||
65 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
66 | " addl $24, %esp\n" | ||
67 | #endif | ||
68 | |||
69 | /* Ensure if the instruction can be boostable */ | ||
70 | extern int can_boost(kprobe_opcode_t *instruction); | ||
71 | /* Recover instruction if given address is probed */ | ||
72 | extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, | ||
73 | unsigned long addr); | ||
74 | /* | ||
75 | * Copy an instruction and adjust the displacement if the instruction | ||
76 | * uses the %rip-relative addressing mode. | ||
77 | */ | ||
78 | extern int __copy_instruction(u8 *dest, u8 *src); | ||
79 | |||
80 | /* Generate a relative-jump/call instruction */ | ||
81 | extern void synthesize_reljump(void *from, void *to); | ||
82 | extern void synthesize_relcall(void *from, void *to); | ||
83 | |||
84 | #ifdef CONFIG_OPTPROBES | ||
85 | extern int arch_init_optprobes(void); | ||
86 | extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); | ||
87 | extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); | ||
88 | #else /* !CONFIG_OPTPROBES */ | ||
89 | static inline int arch_init_optprobes(void) | ||
90 | { | ||
91 | return 0; | ||
92 | } | ||
93 | static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
94 | { | ||
95 | return 0; | ||
96 | } | ||
97 | static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
98 | { | ||
99 | return addr; | ||
100 | } | ||
101 | #endif | ||
102 | #endif | ||
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c new file mode 100644 index 000000000000..c5e410eed403 --- /dev/null +++ b/arch/x86/kernel/kprobes-opt.c | |||
@@ -0,0 +1,512 @@ | |||
1 | /* | ||
2 | * Kernel Probes Jump Optimization (Optprobes) | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004 | ||
19 | * Copyright (C) Hitachi Ltd., 2012 | ||
20 | */ | ||
21 | #include <linux/kprobes.h> | ||
22 | #include <linux/ptrace.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/hardirq.h> | ||
26 | #include <linux/preempt.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/kallsyms.h> | ||
30 | #include <linux/ftrace.h> | ||
31 | |||
32 | #include <asm/cacheflush.h> | ||
33 | #include <asm/desc.h> | ||
34 | #include <asm/pgtable.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <asm/alternative.h> | ||
37 | #include <asm/insn.h> | ||
38 | #include <asm/debugreg.h> | ||
39 | |||
40 | #include "kprobes-common.h" | ||
41 | |||
42 | unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) | ||
43 | { | ||
44 | struct optimized_kprobe *op; | ||
45 | struct kprobe *kp; | ||
46 | long offs; | ||
47 | int i; | ||
48 | |||
49 | for (i = 0; i < RELATIVEJUMP_SIZE; i++) { | ||
50 | kp = get_kprobe((void *)addr - i); | ||
51 | /* This function only handles jump-optimized kprobe */ | ||
52 | if (kp && kprobe_optimized(kp)) { | ||
53 | op = container_of(kp, struct optimized_kprobe, kp); | ||
54 | /* If op->list is not empty, op is under optimizing */ | ||
55 | if (list_empty(&op->list)) | ||
56 | goto found; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | return addr; | ||
61 | found: | ||
62 | /* | ||
63 | * If the kprobe can be optimized, original bytes which can be | ||
64 | * overwritten by jump destination address. In this case, original | ||
65 | * bytes must be recovered from op->optinsn.copied_insn buffer. | ||
66 | */ | ||
67 | memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
68 | if (addr == (unsigned long)kp->addr) { | ||
69 | buf[0] = kp->opcode; | ||
70 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
71 | } else { | ||
72 | offs = addr - (unsigned long)kp->addr - 1; | ||
73 | memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); | ||
74 | } | ||
75 | |||
76 | return (unsigned long)buf; | ||
77 | } | ||
78 | |||
79 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
80 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) | ||
81 | { | ||
82 | #ifdef CONFIG_X86_64 | ||
83 | *addr++ = 0x48; | ||
84 | *addr++ = 0xbf; | ||
85 | #else | ||
86 | *addr++ = 0xb8; | ||
87 | #endif | ||
88 | *(unsigned long *)addr = val; | ||
89 | } | ||
90 | |||
91 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
92 | { | ||
93 | asm volatile ( | ||
94 | ".global optprobe_template_entry\n" | ||
95 | "optprobe_template_entry:\n" | ||
96 | #ifdef CONFIG_X86_64 | ||
97 | /* We don't bother saving the ss register */ | ||
98 | " pushq %rsp\n" | ||
99 | " pushfq\n" | ||
100 | SAVE_REGS_STRING | ||
101 | " movq %rsp, %rsi\n" | ||
102 | ".global optprobe_template_val\n" | ||
103 | "optprobe_template_val:\n" | ||
104 | ASM_NOP5 | ||
105 | ASM_NOP5 | ||
106 | ".global optprobe_template_call\n" | ||
107 | "optprobe_template_call:\n" | ||
108 | ASM_NOP5 | ||
109 | /* Move flags to rsp */ | ||
110 | " movq 144(%rsp), %rdx\n" | ||
111 | " movq %rdx, 152(%rsp)\n" | ||
112 | RESTORE_REGS_STRING | ||
113 | /* Skip flags entry */ | ||
114 | " addq $8, %rsp\n" | ||
115 | " popfq\n" | ||
116 | #else /* CONFIG_X86_32 */ | ||
117 | " pushf\n" | ||
118 | SAVE_REGS_STRING | ||
119 | " movl %esp, %edx\n" | ||
120 | ".global optprobe_template_val\n" | ||
121 | "optprobe_template_val:\n" | ||
122 | ASM_NOP5 | ||
123 | ".global optprobe_template_call\n" | ||
124 | "optprobe_template_call:\n" | ||
125 | ASM_NOP5 | ||
126 | RESTORE_REGS_STRING | ||
127 | " addl $4, %esp\n" /* skip cs */ | ||
128 | " popf\n" | ||
129 | #endif | ||
130 | ".global optprobe_template_end\n" | ||
131 | "optprobe_template_end:\n"); | ||
132 | } | ||
133 | |||
134 | #define TMPL_MOVE_IDX \ | ||
135 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
136 | #define TMPL_CALL_IDX \ | ||
137 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
138 | #define TMPL_END_IDX \ | ||
139 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
140 | |||
141 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
142 | |||
143 | /* Optimized kprobe call back function: called from optinsn */ | ||
144 | static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) | ||
145 | { | ||
146 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
147 | unsigned long flags; | ||
148 | |||
149 | /* This is possible if op is under delayed unoptimizing */ | ||
150 | if (kprobe_disabled(&op->kp)) | ||
151 | return; | ||
152 | |||
153 | local_irq_save(flags); | ||
154 | if (kprobe_running()) { | ||
155 | kprobes_inc_nmissed_count(&op->kp); | ||
156 | } else { | ||
157 | /* Save skipped registers */ | ||
158 | #ifdef CONFIG_X86_64 | ||
159 | regs->cs = __KERNEL_CS; | ||
160 | #else | ||
161 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
162 | regs->gs = 0; | ||
163 | #endif | ||
164 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
165 | regs->orig_ax = ~0UL; | ||
166 | |||
167 | __this_cpu_write(current_kprobe, &op->kp); | ||
168 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
169 | opt_pre_handler(&op->kp, regs); | ||
170 | __this_cpu_write(current_kprobe, NULL); | ||
171 | } | ||
172 | local_irq_restore(flags); | ||
173 | } | ||
174 | |||
175 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
176 | { | ||
177 | int len = 0, ret; | ||
178 | |||
179 | while (len < RELATIVEJUMP_SIZE) { | ||
180 | ret = __copy_instruction(dest + len, src + len); | ||
181 | if (!ret || !can_boost(dest + len)) | ||
182 | return -EINVAL; | ||
183 | len += ret; | ||
184 | } | ||
185 | /* Check whether the address range is reserved */ | ||
186 | if (ftrace_text_reserved(src, src + len - 1) || | ||
187 | alternatives_text_reserved(src, src + len - 1) || | ||
188 | jump_label_text_reserved(src, src + len - 1)) | ||
189 | return -EBUSY; | ||
190 | |||
191 | return len; | ||
192 | } | ||
193 | |||
194 | /* Check whether insn is indirect jump */ | ||
195 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
196 | { | ||
197 | return ((insn->opcode.bytes[0] == 0xff && | ||
198 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
199 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
200 | } | ||
201 | |||
202 | /* Check whether insn jumps into specified address range */ | ||
203 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
204 | { | ||
205 | unsigned long target = 0; | ||
206 | |||
207 | switch (insn->opcode.bytes[0]) { | ||
208 | case 0xe0: /* loopne */ | ||
209 | case 0xe1: /* loope */ | ||
210 | case 0xe2: /* loop */ | ||
211 | case 0xe3: /* jcxz */ | ||
212 | case 0xe9: /* near relative jump */ | ||
213 | case 0xeb: /* short relative jump */ | ||
214 | break; | ||
215 | case 0x0f: | ||
216 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
217 | break; | ||
218 | return 0; | ||
219 | default: | ||
220 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
221 | break; | ||
222 | return 0; | ||
223 | } | ||
224 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
225 | |||
226 | return (start <= target && target <= start + len); | ||
227 | } | ||
228 | |||
229 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
230 | static int __kprobes can_optimize(unsigned long paddr) | ||
231 | { | ||
232 | unsigned long addr, size = 0, offset = 0; | ||
233 | struct insn insn; | ||
234 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
235 | |||
236 | /* Lookup symbol including addr */ | ||
237 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
238 | return 0; | ||
239 | |||
240 | /* | ||
241 | * Do not optimize in the entry code due to the unstable | ||
242 | * stack handling. | ||
243 | */ | ||
244 | if ((paddr >= (unsigned long)__entry_text_start) && | ||
245 | (paddr < (unsigned long)__entry_text_end)) | ||
246 | return 0; | ||
247 | |||
248 | /* Check there is enough space for a relative jump. */ | ||
249 | if (size - offset < RELATIVEJUMP_SIZE) | ||
250 | return 0; | ||
251 | |||
252 | /* Decode instructions */ | ||
253 | addr = paddr - offset; | ||
254 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
255 | if (search_exception_tables(addr)) | ||
256 | /* | ||
257 | * Since some fixup code will jumps into this function, | ||
258 | * we can't optimize kprobe in this function. | ||
259 | */ | ||
260 | return 0; | ||
261 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | ||
262 | insn_get_length(&insn); | ||
263 | /* Another subsystem puts a breakpoint */ | ||
264 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
265 | return 0; | ||
266 | /* Recover address */ | ||
267 | insn.kaddr = (void *)addr; | ||
268 | insn.next_byte = (void *)(addr + insn.length); | ||
269 | /* Check any instructions don't jump into target */ | ||
270 | if (insn_is_indirect_jump(&insn) || | ||
271 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
272 | RELATIVE_ADDR_SIZE)) | ||
273 | return 0; | ||
274 | addr += insn.length; | ||
275 | } | ||
276 | |||
277 | return 1; | ||
278 | } | ||
279 | |||
280 | /* Check optimized_kprobe can actually be optimized. */ | ||
281 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
282 | { | ||
283 | int i; | ||
284 | struct kprobe *p; | ||
285 | |||
286 | for (i = 1; i < op->optinsn.size; i++) { | ||
287 | p = get_kprobe(op->kp.addr + i); | ||
288 | if (p && !kprobe_disabled(p)) | ||
289 | return -EEXIST; | ||
290 | } | ||
291 | |||
292 | return 0; | ||
293 | } | ||
294 | |||
295 | /* Check the addr is within the optimized instructions. */ | ||
296 | int __kprobes | ||
297 | arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) | ||
298 | { | ||
299 | return ((unsigned long)op->kp.addr <= addr && | ||
300 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
301 | } | ||
302 | |||
303 | /* Free optimized instruction slot */ | ||
304 | static __kprobes | ||
305 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
306 | { | ||
307 | if (op->optinsn.insn) { | ||
308 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
309 | op->optinsn.insn = NULL; | ||
310 | op->optinsn.size = 0; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
315 | { | ||
316 | __arch_remove_optimized_kprobe(op, 1); | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * Copy replacing target instructions | ||
321 | * Target instructions MUST be relocatable (checked inside) | ||
322 | * This is called when new aggr(opt)probe is allocated or reused. | ||
323 | */ | ||
324 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
325 | { | ||
326 | u8 *buf; | ||
327 | int ret; | ||
328 | long rel; | ||
329 | |||
330 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
331 | return -EILSEQ; | ||
332 | |||
333 | op->optinsn.insn = get_optinsn_slot(); | ||
334 | if (!op->optinsn.insn) | ||
335 | return -ENOMEM; | ||
336 | |||
337 | /* | ||
338 | * Verify if the address gap is in 2GB range, because this uses | ||
339 | * a relative jump. | ||
340 | */ | ||
341 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
342 | if (abs(rel) > 0x7fffffff) | ||
343 | return -ERANGE; | ||
344 | |||
345 | buf = (u8 *)op->optinsn.insn; | ||
346 | |||
347 | /* Copy instructions into the out-of-line buffer */ | ||
348 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
349 | if (ret < 0) { | ||
350 | __arch_remove_optimized_kprobe(op, 0); | ||
351 | return ret; | ||
352 | } | ||
353 | op->optinsn.size = ret; | ||
354 | |||
355 | /* Copy arch-dep-instance from template */ | ||
356 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
357 | |||
358 | /* Set probe information */ | ||
359 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
360 | |||
361 | /* Set probe function call */ | ||
362 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
363 | |||
364 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
365 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
366 | (u8 *)op->kp.addr + op->optinsn.size); | ||
367 | |||
368 | flush_icache_range((unsigned long) buf, | ||
369 | (unsigned long) buf + TMPL_END_IDX + | ||
370 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | #define MAX_OPTIMIZE_PROBES 256 | ||
375 | static struct text_poke_param *jump_poke_params; | ||
376 | static struct jump_poke_buffer { | ||
377 | u8 buf[RELATIVEJUMP_SIZE]; | ||
378 | } *jump_poke_bufs; | ||
379 | |||
380 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
381 | u8 *insn_buf, | ||
382 | struct optimized_kprobe *op) | ||
383 | { | ||
384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
386 | |||
387 | /* Backup instructions which will be replaced by jump address */ | ||
388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
389 | RELATIVE_ADDR_SIZE); | ||
390 | |||
391 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
392 | *(s32 *)(&insn_buf[1]) = rel; | ||
393 | |||
394 | tprm->addr = op->kp.addr; | ||
395 | tprm->opcode = insn_buf; | ||
396 | tprm->len = RELATIVEJUMP_SIZE; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Replace breakpoints (int3) with relative jumps. | ||
401 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
402 | */ | ||
403 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
404 | { | ||
405 | struct optimized_kprobe *op, *tmp; | ||
406 | int c = 0; | ||
407 | |||
408 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
409 | WARN_ON(kprobe_disabled(&op->kp)); | ||
410 | /* Setup param */ | ||
411 | setup_optimize_kprobe(&jump_poke_params[c], | ||
412 | jump_poke_bufs[c].buf, op); | ||
413 | list_del_init(&op->list); | ||
414 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
415 | break; | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
420 | * However, since kprobes itself also doesn't support NMI/MCE | ||
421 | * code probing, it's not a problem. | ||
422 | */ | ||
423 | text_poke_smp_batch(jump_poke_params, c); | ||
424 | } | ||
425 | |||
426 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
427 | u8 *insn_buf, | ||
428 | struct optimized_kprobe *op) | ||
429 | { | ||
430 | /* Set int3 to first byte for kprobes */ | ||
431 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
432 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
433 | |||
434 | tprm->addr = op->kp.addr; | ||
435 | tprm->opcode = insn_buf; | ||
436 | tprm->len = RELATIVEJUMP_SIZE; | ||
437 | } | ||
438 | |||
439 | /* | ||
440 | * Recover original instructions and breakpoints from relative jumps. | ||
441 | * Caller must call with locking kprobe_mutex. | ||
442 | */ | ||
443 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
444 | struct list_head *done_list) | ||
445 | { | ||
446 | struct optimized_kprobe *op, *tmp; | ||
447 | int c = 0; | ||
448 | |||
449 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
450 | /* Setup param */ | ||
451 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
452 | jump_poke_bufs[c].buf, op); | ||
453 | list_move(&op->list, done_list); | ||
454 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
455 | break; | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
460 | * However, since kprobes itself also doesn't support NMI/MCE | ||
461 | * code probing, it's not a problem. | ||
462 | */ | ||
463 | text_poke_smp_batch(jump_poke_params, c); | ||
464 | } | ||
465 | |||
466 | /* Replace a relative jump with a breakpoint (int3). */ | ||
467 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
468 | { | ||
469 | u8 buf[RELATIVEJUMP_SIZE]; | ||
470 | |||
471 | /* Set int3 to first byte for kprobes */ | ||
472 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
473 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
474 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
475 | } | ||
476 | |||
477 | int __kprobes | ||
478 | setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) | ||
479 | { | ||
480 | struct optimized_kprobe *op; | ||
481 | |||
482 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
483 | /* This kprobe is really able to run optimized path. */ | ||
484 | op = container_of(p, struct optimized_kprobe, kp); | ||
485 | /* Detour through copied instructions */ | ||
486 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
487 | if (!reenter) | ||
488 | reset_current_kprobe(); | ||
489 | preempt_enable_no_resched(); | ||
490 | return 1; | ||
491 | } | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | int __kprobes arch_init_optprobes(void) | ||
496 | { | ||
497 | /* Allocate code buffer and parameter array */ | ||
498 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
499 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
500 | if (!jump_poke_bufs) | ||
501 | return -ENOMEM; | ||
502 | |||
503 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
504 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
505 | if (!jump_poke_params) { | ||
506 | kfree(jump_poke_bufs); | ||
507 | jump_poke_bufs = NULL; | ||
508 | return -ENOMEM; | ||
509 | } | ||
510 | |||
511 | return 0; | ||
512 | } | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7da647d8b64c..e213fc8408d2 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -30,16 +30,15 @@ | |||
30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi | 30 | * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi |
31 | * <prasanna@in.ibm.com> added function-return probes. | 31 | * <prasanna@in.ibm.com> added function-return probes. |
32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> | 32 | * 2005-May Rusty Lynch <rusty.lynch@intel.com> |
33 | * Added function return probes functionality | 33 | * Added function return probes functionality |
34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added | 34 | * 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added |
35 | * kprobe-booster and kretprobe-booster for i386. | 35 | * kprobe-booster and kretprobe-booster for i386. |
36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster | 36 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster |
37 | * and kretprobe-booster for x86-64 | 37 | * and kretprobe-booster for x86-64 |
38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven | 38 | * 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven |
39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> | 39 | * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com> |
40 | * unified x86 kprobes code. | 40 | * unified x86 kprobes code. |
41 | */ | 41 | */ |
42 | |||
43 | #include <linux/kprobes.h> | 42 | #include <linux/kprobes.h> |
44 | #include <linux/ptrace.h> | 43 | #include <linux/ptrace.h> |
45 | #include <linux/string.h> | 44 | #include <linux/string.h> |
@@ -59,6 +58,8 @@ | |||
59 | #include <asm/insn.h> | 58 | #include <asm/insn.h> |
60 | #include <asm/debugreg.h> | 59 | #include <asm/debugreg.h> |
61 | 60 | ||
61 | #include "kprobes-common.h" | ||
62 | |||
62 | void jprobe_return_end(void); | 63 | void jprobe_return_end(void); |
63 | 64 | ||
64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 65 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
108 | doesn't switch kernel stack.*/ | 109 | doesn't switch kernel stack.*/ |
109 | {NULL, NULL} /* Terminator */ | 110 | {NULL, NULL} /* Terminator */ |
110 | }; | 111 | }; |
112 | |||
111 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 113 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
112 | 114 | ||
113 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | 115 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) | |||
123 | } | 125 | } |
124 | 126 | ||
125 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 127 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ |
126 | static void __kprobes synthesize_reljump(void *from, void *to) | 128 | void __kprobes synthesize_reljump(void *from, void *to) |
127 | { | 129 | { |
128 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | 130 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); |
129 | } | 131 | } |
130 | 132 | ||
133 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
134 | void __kprobes synthesize_relcall(void *from, void *to) | ||
135 | { | ||
136 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
137 | } | ||
138 | |||
131 | /* | 139 | /* |
132 | * Skip the prefixes of the instruction. | 140 | * Skip the prefixes of the instruction. |
133 | */ | 141 | */ |
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) | |||
151 | * Returns non-zero if opcode is boostable. | 159 | * Returns non-zero if opcode is boostable. |
152 | * RIP relative instructions are adjusted at copying time in 64 bits mode | 160 | * RIP relative instructions are adjusted at copying time in 64 bits mode |
153 | */ | 161 | */ |
154 | static int __kprobes can_boost(kprobe_opcode_t *opcodes) | 162 | int __kprobes can_boost(kprobe_opcode_t *opcodes) |
155 | { | 163 | { |
156 | kprobe_opcode_t opcode; | 164 | kprobe_opcode_t opcode; |
157 | kprobe_opcode_t *orig_opcodes = opcodes; | 165 | kprobe_opcode_t *orig_opcodes = opcodes; |
@@ -207,13 +215,15 @@ retry: | |||
207 | } | 215 | } |
208 | } | 216 | } |
209 | 217 | ||
210 | /* Recover the probed instruction at addr for further analysis. */ | 218 | static unsigned long |
211 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | 219 | __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) |
212 | { | 220 | { |
213 | struct kprobe *kp; | 221 | struct kprobe *kp; |
222 | |||
214 | kp = get_kprobe((void *)addr); | 223 | kp = get_kprobe((void *)addr); |
224 | /* There is no probe, return original address */ | ||
215 | if (!kp) | 225 | if (!kp) |
216 | return -EINVAL; | 226 | return addr; |
217 | 227 | ||
218 | /* | 228 | /* |
219 | * Basically, kp->ainsn.insn has an original instruction. | 229 | * Basically, kp->ainsn.insn has an original instruction. |
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | |||
230 | */ | 240 | */ |
231 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 241 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); |
232 | buf[0] = kp->opcode; | 242 | buf[0] = kp->opcode; |
233 | return 0; | 243 | return (unsigned long)buf; |
244 | } | ||
245 | |||
246 | /* | ||
247 | * Recover the probed instruction at addr for further analysis. | ||
248 | * Caller must lock kprobes by kprobe_mutex, or disable preemption | ||
249 | * for preventing to release referencing kprobes. | ||
250 | */ | ||
251 | unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
252 | { | ||
253 | unsigned long __addr; | ||
254 | |||
255 | __addr = __recover_optprobed_insn(buf, addr); | ||
256 | if (__addr != addr) | ||
257 | return __addr; | ||
258 | |||
259 | return __recover_probed_insn(buf, addr); | ||
234 | } | 260 | } |
235 | 261 | ||
236 | /* Check if paddr is at an instruction boundary */ | 262 | /* Check if paddr is at an instruction boundary */ |
237 | static int __kprobes can_probe(unsigned long paddr) | 263 | static int __kprobes can_probe(unsigned long paddr) |
238 | { | 264 | { |
239 | int ret; | 265 | unsigned long addr, __addr, offset = 0; |
240 | unsigned long addr, offset = 0; | ||
241 | struct insn insn; | 266 | struct insn insn; |
242 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 267 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
243 | 268 | ||
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr) | |||
247 | /* Decode instructions */ | 272 | /* Decode instructions */ |
248 | addr = paddr - offset; | 273 | addr = paddr - offset; |
249 | while (addr < paddr) { | 274 | while (addr < paddr) { |
250 | kernel_insn_init(&insn, (void *)addr); | ||
251 | insn_get_opcode(&insn); | ||
252 | |||
253 | /* | 275 | /* |
254 | * Check if the instruction has been modified by another | 276 | * Check if the instruction has been modified by another |
255 | * kprobe, in which case we replace the breakpoint by the | 277 | * kprobe, in which case we replace the breakpoint by the |
256 | * original instruction in our buffer. | 278 | * original instruction in our buffer. |
279 | * Also, jump optimization will change the breakpoint to | ||
280 | * relative-jump. Since the relative-jump itself is | ||
281 | * normally used, we just go through if there is no kprobe. | ||
257 | */ | 282 | */ |
258 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | 283 | __addr = recover_probed_instruction(buf, addr); |
259 | ret = recover_probed_instruction(buf, addr); | 284 | kernel_insn_init(&insn, (void *)__addr); |
260 | if (ret) | ||
261 | /* | ||
262 | * Another debugging subsystem might insert | ||
263 | * this breakpoint. In that case, we can't | ||
264 | * recover it. | ||
265 | */ | ||
266 | return 0; | ||
267 | kernel_insn_init(&insn, buf); | ||
268 | } | ||
269 | insn_get_length(&insn); | 285 | insn_get_length(&insn); |
286 | |||
287 | /* | ||
288 | * Another debugging subsystem might insert this breakpoint. | ||
289 | * In that case, we can't recover it. | ||
290 | */ | ||
291 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
292 | return 0; | ||
270 | addr += insn.length; | 293 | addr += insn.length; |
271 | } | 294 | } |
272 | 295 | ||
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
299 | * If not, return null. | 322 | * If not, return null. |
300 | * Only applicable to 64-bit x86. | 323 | * Only applicable to 64-bit x86. |
301 | */ | 324 | */ |
302 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | 325 | int __kprobes __copy_instruction(u8 *dest, u8 *src) |
303 | { | 326 | { |
304 | struct insn insn; | 327 | struct insn insn; |
305 | int ret; | ||
306 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 328 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
307 | 329 | ||
308 | kernel_insn_init(&insn, src); | 330 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); |
309 | if (recover) { | ||
310 | insn_get_opcode(&insn); | ||
311 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
312 | ret = recover_probed_instruction(buf, | ||
313 | (unsigned long)src); | ||
314 | if (ret) | ||
315 | return 0; | ||
316 | kernel_insn_init(&insn, buf); | ||
317 | } | ||
318 | } | ||
319 | insn_get_length(&insn); | 331 | insn_get_length(&insn); |
332 | /* Another subsystem puts a breakpoint, failed to recover */ | ||
333 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | ||
334 | return 0; | ||
320 | memcpy(dest, insn.kaddr, insn.length); | 335 | memcpy(dest, insn.kaddr, insn.length); |
321 | 336 | ||
322 | #ifdef CONFIG_X86_64 | 337 | #ifdef CONFIG_X86_64 |
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
337 | * extension of the original signed 32-bit displacement would | 352 | * extension of the original signed 32-bit displacement would |
338 | * have given. | 353 | * have given. |
339 | */ | 354 | */ |
340 | newdisp = (u8 *) src + (s64) insn.displacement.value - | 355 | newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; |
341 | (u8 *) dest; | ||
342 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ | 356 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
343 | disp = (u8 *) dest + insn_offset_displacement(&insn); | 357 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
344 | *(s32 *) disp = (s32) newdisp; | 358 | *(s32 *) disp = (s32) newdisp; |
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) | |||
349 | 363 | ||
350 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 364 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
351 | { | 365 | { |
366 | /* Copy an instruction with recovering if other optprobe modifies it.*/ | ||
367 | __copy_instruction(p->ainsn.insn, p->addr); | ||
368 | |||
352 | /* | 369 | /* |
353 | * Copy an instruction without recovering int3, because it will be | 370 | * __copy_instruction can modify the displacement of the instruction, |
354 | * put by another subsystem. | 371 | * but it doesn't affect boostable check. |
355 | */ | 372 | */ |
356 | __copy_instruction(p->ainsn.insn, p->addr, 0); | 373 | if (can_boost(p->ainsn.insn)) |
357 | |||
358 | if (can_boost(p->addr)) | ||
359 | p->ainsn.boostable = 0; | 374 | p->ainsn.boostable = 0; |
360 | else | 375 | else |
361 | p->ainsn.boostable = -1; | 376 | p->ainsn.boostable = -1; |
362 | 377 | ||
363 | p->opcode = *p->addr; | 378 | /* Also, displacement change doesn't affect the first byte */ |
379 | p->opcode = p->ainsn.insn[0]; | ||
364 | } | 380 | } |
365 | 381 | ||
366 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 382 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void) | |||
442 | } | 458 | } |
443 | } | 459 | } |
444 | 460 | ||
445 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 461 | void __kprobes |
446 | struct pt_regs *regs) | 462 | arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) |
447 | { | 463 | { |
448 | unsigned long *sara = stack_addr(regs); | 464 | unsigned long *sara = stack_addr(regs); |
449 | 465 | ||
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
453 | *sara = (unsigned long) &kretprobe_trampoline; | 469 | *sara = (unsigned long) &kretprobe_trampoline; |
454 | } | 470 | } |
455 | 471 | ||
456 | #ifdef CONFIG_OPTPROBES | 472 | static void __kprobes |
457 | static int __kprobes setup_detour_execution(struct kprobe *p, | 473 | setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) |
458 | struct pt_regs *regs, | ||
459 | int reenter); | ||
460 | #else | ||
461 | #define setup_detour_execution(p, regs, reenter) (0) | ||
462 | #endif | ||
463 | |||
464 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | ||
465 | struct kprobe_ctlblk *kcb, int reenter) | ||
466 | { | 474 | { |
467 | if (setup_detour_execution(p, regs, reenter)) | 475 | if (setup_detour_execution(p, regs, reenter)) |
468 | return; | 476 | return; |
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | |||
504 | * within the handler. We save the original kprobes variables and just single | 512 | * within the handler. We save the original kprobes variables and just single |
505 | * step on the instruction of the new probe without calling any user handlers. | 513 | * step on the instruction of the new probe without calling any user handlers. |
506 | */ | 514 | */ |
507 | static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | 515 | static int __kprobes |
508 | struct kprobe_ctlblk *kcb) | 516 | reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
509 | { | 517 | { |
510 | switch (kcb->kprobe_status) { | 518 | switch (kcb->kprobe_status) { |
511 | case KPROBE_HIT_SSDONE: | 519 | case KPROBE_HIT_SSDONE: |
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
600 | return 0; | 608 | return 0; |
601 | } | 609 | } |
602 | 610 | ||
603 | #ifdef CONFIG_X86_64 | ||
604 | #define SAVE_REGS_STRING \ | ||
605 | /* Skip cs, ip, orig_ax. */ \ | ||
606 | " subq $24, %rsp\n" \ | ||
607 | " pushq %rdi\n" \ | ||
608 | " pushq %rsi\n" \ | ||
609 | " pushq %rdx\n" \ | ||
610 | " pushq %rcx\n" \ | ||
611 | " pushq %rax\n" \ | ||
612 | " pushq %r8\n" \ | ||
613 | " pushq %r9\n" \ | ||
614 | " pushq %r10\n" \ | ||
615 | " pushq %r11\n" \ | ||
616 | " pushq %rbx\n" \ | ||
617 | " pushq %rbp\n" \ | ||
618 | " pushq %r12\n" \ | ||
619 | " pushq %r13\n" \ | ||
620 | " pushq %r14\n" \ | ||
621 | " pushq %r15\n" | ||
622 | #define RESTORE_REGS_STRING \ | ||
623 | " popq %r15\n" \ | ||
624 | " popq %r14\n" \ | ||
625 | " popq %r13\n" \ | ||
626 | " popq %r12\n" \ | ||
627 | " popq %rbp\n" \ | ||
628 | " popq %rbx\n" \ | ||
629 | " popq %r11\n" \ | ||
630 | " popq %r10\n" \ | ||
631 | " popq %r9\n" \ | ||
632 | " popq %r8\n" \ | ||
633 | " popq %rax\n" \ | ||
634 | " popq %rcx\n" \ | ||
635 | " popq %rdx\n" \ | ||
636 | " popq %rsi\n" \ | ||
637 | " popq %rdi\n" \ | ||
638 | /* Skip orig_ax, ip, cs */ \ | ||
639 | " addq $24, %rsp\n" | ||
640 | #else | ||
641 | #define SAVE_REGS_STRING \ | ||
642 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
643 | " subl $16, %esp\n" \ | ||
644 | " pushl %fs\n" \ | ||
645 | " pushl %es\n" \ | ||
646 | " pushl %ds\n" \ | ||
647 | " pushl %eax\n" \ | ||
648 | " pushl %ebp\n" \ | ||
649 | " pushl %edi\n" \ | ||
650 | " pushl %esi\n" \ | ||
651 | " pushl %edx\n" \ | ||
652 | " pushl %ecx\n" \ | ||
653 | " pushl %ebx\n" | ||
654 | #define RESTORE_REGS_STRING \ | ||
655 | " popl %ebx\n" \ | ||
656 | " popl %ecx\n" \ | ||
657 | " popl %edx\n" \ | ||
658 | " popl %esi\n" \ | ||
659 | " popl %edi\n" \ | ||
660 | " popl %ebp\n" \ | ||
661 | " popl %eax\n" \ | ||
662 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
663 | " addl $24, %esp\n" | ||
664 | #endif | ||
665 | |||
666 | /* | 611 | /* |
667 | * When a retprobed function returns, this code saves registers and | 612 | * When a retprobed function returns, this code saves registers and |
668 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 613 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
816 | * jump instruction after the copied instruction, that jumps to the next | 761 | * jump instruction after the copied instruction, that jumps to the next |
817 | * instruction after the probepoint. | 762 | * instruction after the probepoint. |
818 | */ | 763 | */ |
819 | static void __kprobes resume_execution(struct kprobe *p, | 764 | static void __kprobes |
820 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | 765 | resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) |
821 | { | 766 | { |
822 | unsigned long *tos = stack_addr(regs); | 767 | unsigned long *tos = stack_addr(regs); |
823 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; | 768 | unsigned long copy_ip = (unsigned long)p->ainsn.insn; |
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | |||
996 | /* | 941 | /* |
997 | * Wrapper routine for handling exceptions. | 942 | * Wrapper routine for handling exceptions. |
998 | */ | 943 | */ |
999 | int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | 944 | int __kprobes |
1000 | unsigned long val, void *data) | 945 | kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) |
1001 | { | 946 | { |
1002 | struct die_args *args = data; | 947 | struct die_args *args = data; |
1003 | int ret = NOTIFY_DONE; | 948 | int ret = NOTIFY_DONE; |
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1107 | return 0; | 1052 | return 0; |
1108 | } | 1053 | } |
1109 | 1054 | ||
1110 | |||
1111 | #ifdef CONFIG_OPTPROBES | ||
1112 | |||
1113 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1114 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1115 | { | ||
1116 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1117 | } | ||
1118 | |||
1119 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1120 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1121 | unsigned long val) | ||
1122 | { | ||
1123 | #ifdef CONFIG_X86_64 | ||
1124 | *addr++ = 0x48; | ||
1125 | *addr++ = 0xbf; | ||
1126 | #else | ||
1127 | *addr++ = 0xb8; | ||
1128 | #endif | ||
1129 | *(unsigned long *)addr = val; | ||
1130 | } | ||
1131 | |||
1132 | static void __used __kprobes kprobes_optinsn_template_holder(void) | ||
1133 | { | ||
1134 | asm volatile ( | ||
1135 | ".global optprobe_template_entry\n" | ||
1136 | "optprobe_template_entry: \n" | ||
1137 | #ifdef CONFIG_X86_64 | ||
1138 | /* We don't bother saving the ss register */ | ||
1139 | " pushq %rsp\n" | ||
1140 | " pushfq\n" | ||
1141 | SAVE_REGS_STRING | ||
1142 | " movq %rsp, %rsi\n" | ||
1143 | ".global optprobe_template_val\n" | ||
1144 | "optprobe_template_val: \n" | ||
1145 | ASM_NOP5 | ||
1146 | ASM_NOP5 | ||
1147 | ".global optprobe_template_call\n" | ||
1148 | "optprobe_template_call: \n" | ||
1149 | ASM_NOP5 | ||
1150 | /* Move flags to rsp */ | ||
1151 | " movq 144(%rsp), %rdx\n" | ||
1152 | " movq %rdx, 152(%rsp)\n" | ||
1153 | RESTORE_REGS_STRING | ||
1154 | /* Skip flags entry */ | ||
1155 | " addq $8, %rsp\n" | ||
1156 | " popfq\n" | ||
1157 | #else /* CONFIG_X86_32 */ | ||
1158 | " pushf\n" | ||
1159 | SAVE_REGS_STRING | ||
1160 | " movl %esp, %edx\n" | ||
1161 | ".global optprobe_template_val\n" | ||
1162 | "optprobe_template_val: \n" | ||
1163 | ASM_NOP5 | ||
1164 | ".global optprobe_template_call\n" | ||
1165 | "optprobe_template_call: \n" | ||
1166 | ASM_NOP5 | ||
1167 | RESTORE_REGS_STRING | ||
1168 | " addl $4, %esp\n" /* skip cs */ | ||
1169 | " popf\n" | ||
1170 | #endif | ||
1171 | ".global optprobe_template_end\n" | ||
1172 | "optprobe_template_end: \n"); | ||
1173 | } | ||
1174 | |||
1175 | #define TMPL_MOVE_IDX \ | ||
1176 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1177 | #define TMPL_CALL_IDX \ | ||
1178 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1179 | #define TMPL_END_IDX \ | ||
1180 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1181 | |||
1182 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1183 | |||
1184 | /* Optimized kprobe call back function: called from optinsn */ | ||
1185 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1186 | struct pt_regs *regs) | ||
1187 | { | ||
1188 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1189 | unsigned long flags; | ||
1190 | |||
1191 | /* This is possible if op is under delayed unoptimizing */ | ||
1192 | if (kprobe_disabled(&op->kp)) | ||
1193 | return; | ||
1194 | |||
1195 | local_irq_save(flags); | ||
1196 | if (kprobe_running()) { | ||
1197 | kprobes_inc_nmissed_count(&op->kp); | ||
1198 | } else { | ||
1199 | /* Save skipped registers */ | ||
1200 | #ifdef CONFIG_X86_64 | ||
1201 | regs->cs = __KERNEL_CS; | ||
1202 | #else | ||
1203 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1204 | regs->gs = 0; | ||
1205 | #endif | ||
1206 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1207 | regs->orig_ax = ~0UL; | ||
1208 | |||
1209 | __this_cpu_write(current_kprobe, &op->kp); | ||
1210 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1211 | opt_pre_handler(&op->kp, regs); | ||
1212 | __this_cpu_write(current_kprobe, NULL); | ||
1213 | } | ||
1214 | local_irq_restore(flags); | ||
1215 | } | ||
1216 | |||
1217 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1218 | { | ||
1219 | int len = 0, ret; | ||
1220 | |||
1221 | while (len < RELATIVEJUMP_SIZE) { | ||
1222 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1223 | if (!ret || !can_boost(dest + len)) | ||
1224 | return -EINVAL; | ||
1225 | len += ret; | ||
1226 | } | ||
1227 | /* Check whether the address range is reserved */ | ||
1228 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1229 | alternatives_text_reserved(src, src + len - 1) || | ||
1230 | jump_label_text_reserved(src, src + len - 1)) | ||
1231 | return -EBUSY; | ||
1232 | |||
1233 | return len; | ||
1234 | } | ||
1235 | |||
1236 | /* Check whether insn is indirect jump */ | ||
1237 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1238 | { | ||
1239 | return ((insn->opcode.bytes[0] == 0xff && | ||
1240 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1241 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1242 | } | ||
1243 | |||
1244 | /* Check whether insn jumps into specified address range */ | ||
1245 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1246 | { | ||
1247 | unsigned long target = 0; | ||
1248 | |||
1249 | switch (insn->opcode.bytes[0]) { | ||
1250 | case 0xe0: /* loopne */ | ||
1251 | case 0xe1: /* loope */ | ||
1252 | case 0xe2: /* loop */ | ||
1253 | case 0xe3: /* jcxz */ | ||
1254 | case 0xe9: /* near relative jump */ | ||
1255 | case 0xeb: /* short relative jump */ | ||
1256 | break; | ||
1257 | case 0x0f: | ||
1258 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1259 | break; | ||
1260 | return 0; | ||
1261 | default: | ||
1262 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1263 | break; | ||
1264 | return 0; | ||
1265 | } | ||
1266 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1267 | |||
1268 | return (start <= target && target <= start + len); | ||
1269 | } | ||
1270 | |||
1271 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1272 | static int __kprobes can_optimize(unsigned long paddr) | ||
1273 | { | ||
1274 | int ret; | ||
1275 | unsigned long addr, size = 0, offset = 0; | ||
1276 | struct insn insn; | ||
1277 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1278 | |||
1279 | /* Lookup symbol including addr */ | ||
1280 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | ||
1281 | return 0; | ||
1282 | |||
1283 | /* | ||
1284 | * Do not optimize in the entry code due to the unstable | ||
1285 | * stack handling. | ||
1286 | */ | ||
1287 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
1288 | (paddr < (unsigned long )__entry_text_end)) | ||
1289 | return 0; | ||
1290 | |||
1291 | /* Check there is enough space for a relative jump. */ | ||
1292 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1293 | return 0; | ||
1294 | |||
1295 | /* Decode instructions */ | ||
1296 | addr = paddr - offset; | ||
1297 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1298 | if (search_exception_tables(addr)) | ||
1299 | /* | ||
1300 | * Since some fixup code will jumps into this function, | ||
1301 | * we can't optimize kprobe in this function. | ||
1302 | */ | ||
1303 | return 0; | ||
1304 | kernel_insn_init(&insn, (void *)addr); | ||
1305 | insn_get_opcode(&insn); | ||
1306 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1307 | ret = recover_probed_instruction(buf, addr); | ||
1308 | if (ret) | ||
1309 | return 0; | ||
1310 | kernel_insn_init(&insn, buf); | ||
1311 | } | ||
1312 | insn_get_length(&insn); | ||
1313 | /* Recover address */ | ||
1314 | insn.kaddr = (void *)addr; | ||
1315 | insn.next_byte = (void *)(addr + insn.length); | ||
1316 | /* Check any instructions don't jump into target */ | ||
1317 | if (insn_is_indirect_jump(&insn) || | ||
1318 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1319 | RELATIVE_ADDR_SIZE)) | ||
1320 | return 0; | ||
1321 | addr += insn.length; | ||
1322 | } | ||
1323 | |||
1324 | return 1; | ||
1325 | } | ||
1326 | |||
1327 | /* Check optimized_kprobe can actually be optimized. */ | ||
1328 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1329 | { | ||
1330 | int i; | ||
1331 | struct kprobe *p; | ||
1332 | |||
1333 | for (i = 1; i < op->optinsn.size; i++) { | ||
1334 | p = get_kprobe(op->kp.addr + i); | ||
1335 | if (p && !kprobe_disabled(p)) | ||
1336 | return -EEXIST; | ||
1337 | } | ||
1338 | |||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | /* Check the addr is within the optimized instructions. */ | ||
1343 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1344 | unsigned long addr) | ||
1345 | { | ||
1346 | return ((unsigned long)op->kp.addr <= addr && | ||
1347 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1348 | } | ||
1349 | |||
1350 | /* Free optimized instruction slot */ | ||
1351 | static __kprobes | ||
1352 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1353 | { | ||
1354 | if (op->optinsn.insn) { | ||
1355 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1356 | op->optinsn.insn = NULL; | ||
1357 | op->optinsn.size = 0; | ||
1358 | } | ||
1359 | } | ||
1360 | |||
1361 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1362 | { | ||
1363 | __arch_remove_optimized_kprobe(op, 1); | ||
1364 | } | ||
1365 | |||
1366 | /* | ||
1367 | * Copy replacing target instructions | ||
1368 | * Target instructions MUST be relocatable (checked inside) | ||
1369 | */ | ||
1370 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1371 | { | ||
1372 | u8 *buf; | ||
1373 | int ret; | ||
1374 | long rel; | ||
1375 | |||
1376 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1377 | return -EILSEQ; | ||
1378 | |||
1379 | op->optinsn.insn = get_optinsn_slot(); | ||
1380 | if (!op->optinsn.insn) | ||
1381 | return -ENOMEM; | ||
1382 | |||
1383 | /* | ||
1384 | * Verify if the address gap is in 2GB range, because this uses | ||
1385 | * a relative jump. | ||
1386 | */ | ||
1387 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1388 | if (abs(rel) > 0x7fffffff) | ||
1389 | return -ERANGE; | ||
1390 | |||
1391 | buf = (u8 *)op->optinsn.insn; | ||
1392 | |||
1393 | /* Copy instructions into the out-of-line buffer */ | ||
1394 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1395 | if (ret < 0) { | ||
1396 | __arch_remove_optimized_kprobe(op, 0); | ||
1397 | return ret; | ||
1398 | } | ||
1399 | op->optinsn.size = ret; | ||
1400 | |||
1401 | /* Copy arch-dep-instance from template */ | ||
1402 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1403 | |||
1404 | /* Set probe information */ | ||
1405 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1406 | |||
1407 | /* Set probe function call */ | ||
1408 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1409 | |||
1410 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1411 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1412 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1413 | |||
1414 | flush_icache_range((unsigned long) buf, | ||
1415 | (unsigned long) buf + TMPL_END_IDX + | ||
1416 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1417 | return 0; | ||
1418 | } | ||
1419 | |||
1420 | #define MAX_OPTIMIZE_PROBES 256 | ||
1421 | static struct text_poke_param *jump_poke_params; | ||
1422 | static struct jump_poke_buffer { | ||
1423 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1424 | } *jump_poke_bufs; | ||
1425 | |||
1426 | static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, | ||
1427 | u8 *insn_buf, | ||
1428 | struct optimized_kprobe *op) | ||
1429 | { | ||
1430 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1431 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1432 | |||
1433 | /* Backup instructions which will be replaced by jump address */ | ||
1434 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1435 | RELATIVE_ADDR_SIZE); | ||
1436 | |||
1437 | insn_buf[0] = RELATIVEJUMP_OPCODE; | ||
1438 | *(s32 *)(&insn_buf[1]) = rel; | ||
1439 | |||
1440 | tprm->addr = op->kp.addr; | ||
1441 | tprm->opcode = insn_buf; | ||
1442 | tprm->len = RELATIVEJUMP_SIZE; | ||
1443 | } | ||
1444 | |||
1445 | /* | ||
1446 | * Replace breakpoints (int3) with relative jumps. | ||
1447 | * Caller must call with locking kprobe_mutex and text_mutex. | ||
1448 | */ | ||
1449 | void __kprobes arch_optimize_kprobes(struct list_head *oplist) | ||
1450 | { | ||
1451 | struct optimized_kprobe *op, *tmp; | ||
1452 | int c = 0; | ||
1453 | |||
1454 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1455 | WARN_ON(kprobe_disabled(&op->kp)); | ||
1456 | /* Setup param */ | ||
1457 | setup_optimize_kprobe(&jump_poke_params[c], | ||
1458 | jump_poke_bufs[c].buf, op); | ||
1459 | list_del_init(&op->list); | ||
1460 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1461 | break; | ||
1462 | } | ||
1463 | |||
1464 | /* | ||
1465 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1466 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1467 | * code probing, it's not a problem. | ||
1468 | */ | ||
1469 | text_poke_smp_batch(jump_poke_params, c); | ||
1470 | } | ||
1471 | |||
1472 | static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, | ||
1473 | u8 *insn_buf, | ||
1474 | struct optimized_kprobe *op) | ||
1475 | { | ||
1476 | /* Set int3 to first byte for kprobes */ | ||
1477 | insn_buf[0] = BREAKPOINT_INSTRUCTION; | ||
1478 | memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1479 | |||
1480 | tprm->addr = op->kp.addr; | ||
1481 | tprm->opcode = insn_buf; | ||
1482 | tprm->len = RELATIVEJUMP_SIZE; | ||
1483 | } | ||
1484 | |||
1485 | /* | ||
1486 | * Recover original instructions and breakpoints from relative jumps. | ||
1487 | * Caller must call with locking kprobe_mutex. | ||
1488 | */ | ||
1489 | extern void arch_unoptimize_kprobes(struct list_head *oplist, | ||
1490 | struct list_head *done_list) | ||
1491 | { | ||
1492 | struct optimized_kprobe *op, *tmp; | ||
1493 | int c = 0; | ||
1494 | |||
1495 | list_for_each_entry_safe(op, tmp, oplist, list) { | ||
1496 | /* Setup param */ | ||
1497 | setup_unoptimize_kprobe(&jump_poke_params[c], | ||
1498 | jump_poke_bufs[c].buf, op); | ||
1499 | list_move(&op->list, done_list); | ||
1500 | if (++c >= MAX_OPTIMIZE_PROBES) | ||
1501 | break; | ||
1502 | } | ||
1503 | |||
1504 | /* | ||
1505 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1506 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1507 | * code probing, it's not a problem. | ||
1508 | */ | ||
1509 | text_poke_smp_batch(jump_poke_params, c); | ||
1510 | } | ||
1511 | |||
1512 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1513 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1514 | { | ||
1515 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1516 | |||
1517 | /* Set int3 to first byte for kprobes */ | ||
1518 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1519 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1520 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1521 | } | ||
1522 | |||
1523 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1524 | struct pt_regs *regs, | ||
1525 | int reenter) | ||
1526 | { | ||
1527 | struct optimized_kprobe *op; | ||
1528 | |||
1529 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1530 | /* This kprobe is really able to run optimized path. */ | ||
1531 | op = container_of(p, struct optimized_kprobe, kp); | ||
1532 | /* Detour through copied instructions */ | ||
1533 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1534 | if (!reenter) | ||
1535 | reset_current_kprobe(); | ||
1536 | preempt_enable_no_resched(); | ||
1537 | return 1; | ||
1538 | } | ||
1539 | return 0; | ||
1540 | } | ||
1541 | |||
1542 | static int __kprobes init_poke_params(void) | ||
1543 | { | ||
1544 | /* Allocate code buffer and parameter array */ | ||
1545 | jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * | ||
1546 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1547 | if (!jump_poke_bufs) | ||
1548 | return -ENOMEM; | ||
1549 | |||
1550 | jump_poke_params = kmalloc(sizeof(struct text_poke_param) * | ||
1551 | MAX_OPTIMIZE_PROBES, GFP_KERNEL); | ||
1552 | if (!jump_poke_params) { | ||
1553 | kfree(jump_poke_bufs); | ||
1554 | jump_poke_bufs = NULL; | ||
1555 | return -ENOMEM; | ||
1556 | } | ||
1557 | |||
1558 | return 0; | ||
1559 | } | ||
1560 | #else /* !CONFIG_OPTPROBES */ | ||
1561 | static int __kprobes init_poke_params(void) | ||
1562 | { | ||
1563 | return 0; | ||
1564 | } | ||
1565 | #endif | ||
1566 | |||
1567 | int __init arch_init_kprobes(void) | 1055 | int __init arch_init_kprobes(void) |
1568 | { | 1056 | { |
1569 | return init_poke_params(); | 1057 | return arch_init_optprobes(); |
1570 | } | 1058 | } |
1571 | 1059 | ||
1572 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) | 1060 | int __kprobes arch_trampoline_kprobe(struct kprobe *p) |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -438,9 +438,9 @@ void __init kvm_guest_init(void) | |||
438 | static __init int activate_jump_labels(void) | 438 | static __init int activate_jump_labels(void) |
439 | { | 439 | { |
440 | if (has_steal_clock) { | 440 | if (has_steal_clock) { |
441 | jump_label_inc(¶virt_steal_enabled); | 441 | static_key_slow_inc(¶virt_steal_enabled); |
442 | if (steal_acc) | 442 | if (steal_acc) |
443 | jump_label_inc(¶virt_steal_rq_enabled); | 443 | static_key_slow_inc(¶virt_steal_rq_enabled); |
444 | } | 444 | } |
445 | 445 | ||
446 | return 0; | 446 | return 0; |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) | |||
202 | __native_flush_tlb_single(addr); | 202 | __native_flush_tlb_single(addr); |
203 | } | 203 | } |
204 | 204 | ||
205 | struct jump_label_key paravirt_steal_enabled; | 205 | struct static_key paravirt_steal_enabled; |
206 | struct jump_label_key paravirt_steal_rq_enabled; | 206 | struct static_key paravirt_steal_rq_enabled; |
207 | 207 | ||
208 | static u64 native_steal_clock(int cpu) | 208 | static u64 native_steal_clock(int cpu) |
209 | { | 209 | { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 15763af7bfe3..44eefde92109 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -377,8 +377,8 @@ static inline int hlt_use_halt(void) | |||
377 | void default_idle(void) | 377 | void default_idle(void) |
378 | { | 378 | { |
379 | if (hlt_use_halt()) { | 379 | if (hlt_use_halt()) { |
380 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 380 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
381 | trace_cpu_idle(1, smp_processor_id()); | 381 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
382 | current_thread_info()->status &= ~TS_POLLING; | 382 | current_thread_info()->status &= ~TS_POLLING; |
383 | /* | 383 | /* |
384 | * TS_POLLING-cleared state must be visible before we | 384 | * TS_POLLING-cleared state must be visible before we |
@@ -391,8 +391,8 @@ void default_idle(void) | |||
391 | else | 391 | else |
392 | local_irq_enable(); | 392 | local_irq_enable(); |
393 | current_thread_info()->status |= TS_POLLING; | 393 | current_thread_info()->status |= TS_POLLING; |
394 | trace_power_end(smp_processor_id()); | 394 | trace_power_end_rcuidle(smp_processor_id()); |
395 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 395 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
396 | } else { | 396 | } else { |
397 | local_irq_enable(); | 397 | local_irq_enable(); |
398 | /* loop is done by the caller */ | 398 | /* loop is done by the caller */ |
@@ -450,8 +450,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
450 | static void mwait_idle(void) | 450 | static void mwait_idle(void) |
451 | { | 451 | { |
452 | if (!need_resched()) { | 452 | if (!need_resched()) { |
453 | trace_power_start(POWER_CSTATE, 1, smp_processor_id()); | 453 | trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); |
454 | trace_cpu_idle(1, smp_processor_id()); | 454 | trace_cpu_idle_rcuidle(1, smp_processor_id()); |
455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | 455 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) |
456 | clflush((void *)¤t_thread_info()->flags); | 456 | clflush((void *)¤t_thread_info()->flags); |
457 | 457 | ||
@@ -461,8 +461,8 @@ static void mwait_idle(void) | |||
461 | __sti_mwait(0, 0); | 461 | __sti_mwait(0, 0); |
462 | else | 462 | else |
463 | local_irq_enable(); | 463 | local_irq_enable(); |
464 | trace_power_end(smp_processor_id()); | 464 | trace_power_end_rcuidle(smp_processor_id()); |
465 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 465 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
466 | } else | 466 | } else |
467 | local_irq_enable(); | 467 | local_irq_enable(); |
468 | } | 468 | } |
@@ -474,13 +474,13 @@ static void mwait_idle(void) | |||
474 | */ | 474 | */ |
475 | static void poll_idle(void) | 475 | static void poll_idle(void) |
476 | { | 476 | { |
477 | trace_power_start(POWER_CSTATE, 0, smp_processor_id()); | 477 | trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); |
478 | trace_cpu_idle(0, smp_processor_id()); | 478 | trace_cpu_idle_rcuidle(0, smp_processor_id()); |
479 | local_irq_enable(); | 479 | local_irq_enable(); |
480 | while (!need_resched()) | 480 | while (!need_resched()) |
481 | cpu_relax(); | 481 | cpu_relax(); |
482 | trace_power_end(smp_processor_id()); | 482 | trace_power_end_rcuidle(smp_processor_id()); |
483 | trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); | 483 | trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); |
484 | } | 484 | } |
485 | 485 | ||
486 | /* | 486 | /* |
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c | |||
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) | |||
234 | } | 234 | } |
235 | 235 | ||
236 | static bool mmu_audit; | 236 | static bool mmu_audit; |
237 | static struct jump_label_key mmu_audit_key; | 237 | static struct static_key mmu_audit_key; |
238 | 238 | ||
239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 239 | static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
240 | { | 240 | { |
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | |||
250 | 250 | ||
251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) | 251 | static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) |
252 | { | 252 | { |
253 | if (static_branch((&mmu_audit_key))) | 253 | if (static_key_false((&mmu_audit_key))) |
254 | __kvm_mmu_audit(vcpu, point); | 254 | __kvm_mmu_audit(vcpu, point); |
255 | } | 255 | } |
256 | 256 | ||
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void) | |||
259 | if (mmu_audit) | 259 | if (mmu_audit) |
260 | return; | 260 | return; |
261 | 261 | ||
262 | jump_label_inc(&mmu_audit_key); | 262 | static_key_slow_inc(&mmu_audit_key); |
263 | mmu_audit = true; | 263 | mmu_audit = true; |
264 | } | 264 | } |
265 | 265 | ||
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void) | |||
268 | if (!mmu_audit) | 268 | if (!mmu_audit) |
269 | return; | 269 | return; |
270 | 270 | ||
271 | jump_label_dec(&mmu_audit_key); | 271 | static_key_slow_dec(&mmu_audit_key); |
272 | mmu_audit = false; | 272 | mmu_audit = false; |
273 | } | 273 | } |
274 | 274 | ||
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 88ad5fbda6e1..c1f01a8e9f65 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c | |||
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | |||
29 | return inat_primary_table[opcode]; | 29 | return inat_primary_table[opcode]; |
30 | } | 30 | } |
31 | 31 | ||
32 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, | 32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) |
33 | { | ||
34 | insn_attr_t lpfx_attr; | ||
35 | |||
36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
37 | return inat_last_prefix_id(lpfx_attr); | ||
38 | } | ||
39 | |||
40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
33 | insn_attr_t esc_attr) | 41 | insn_attr_t esc_attr) |
34 | { | 42 | { |
35 | const insn_attr_t *table; | 43 | const insn_attr_t *table; |
36 | insn_attr_t lpfx_attr; | 44 | int n; |
37 | int n, m = 0; | ||
38 | 45 | ||
39 | n = inat_escape_id(esc_attr); | 46 | n = inat_escape_id(esc_attr); |
40 | if (last_pfx) { | 47 | |
41 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
42 | m = inat_last_prefix_id(lpfx_attr); | ||
43 | } | ||
44 | table = inat_escape_tables[n][0]; | 48 | table = inat_escape_tables[n][0]; |
45 | if (!table) | 49 | if (!table) |
46 | return 0; | 50 | return 0; |
47 | if (inat_has_variant(table[opcode]) && m) { | 51 | if (inat_has_variant(table[opcode]) && lpfx_id) { |
48 | table = inat_escape_tables[n][m]; | 52 | table = inat_escape_tables[n][lpfx_id]; |
49 | if (!table) | 53 | if (!table) |
50 | return 0; | 54 | return 0; |
51 | } | 55 | } |
52 | return table[opcode]; | 56 | return table[opcode]; |
53 | } | 57 | } |
54 | 58 | ||
55 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, | 59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, |
56 | insn_attr_t grp_attr) | 60 | insn_attr_t grp_attr) |
57 | { | 61 | { |
58 | const insn_attr_t *table; | 62 | const insn_attr_t *table; |
59 | insn_attr_t lpfx_attr; | 63 | int n; |
60 | int n, m = 0; | ||
61 | 64 | ||
62 | n = inat_group_id(grp_attr); | 65 | n = inat_group_id(grp_attr); |
63 | if (last_pfx) { | 66 | |
64 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
65 | m = inat_last_prefix_id(lpfx_attr); | ||
66 | } | ||
67 | table = inat_group_tables[n][0]; | 67 | table = inat_group_tables[n][0]; |
68 | if (!table) | 68 | if (!table) |
69 | return inat_group_common_attribute(grp_attr); | 69 | return inat_group_common_attribute(grp_attr); |
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { | 70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { |
71 | table = inat_group_tables[n][m]; | 71 | table = inat_group_tables[n][lpfx_id]; |
72 | if (!table) | 72 | if (!table) |
73 | return inat_group_common_attribute(grp_attr); | 73 | return inat_group_common_attribute(grp_attr); |
74 | } | 74 | } |
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 5a1f9f3e3fbb..25feb1ae71c5 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c | |||
@@ -185,7 +185,8 @@ err_out: | |||
185 | void insn_get_opcode(struct insn *insn) | 185 | void insn_get_opcode(struct insn *insn) |
186 | { | 186 | { |
187 | struct insn_field *opcode = &insn->opcode; | 187 | struct insn_field *opcode = &insn->opcode; |
188 | insn_byte_t op, pfx; | 188 | insn_byte_t op; |
189 | int pfx_id; | ||
189 | if (opcode->got) | 190 | if (opcode->got) |
190 | return; | 191 | return; |
191 | if (!insn->prefixes.got) | 192 | if (!insn->prefixes.got) |
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn) | |||
212 | /* Get escaped opcode */ | 213 | /* Get escaped opcode */ |
213 | op = get_next(insn_byte_t, insn); | 214 | op = get_next(insn_byte_t, insn); |
214 | opcode->bytes[opcode->nbytes++] = op; | 215 | opcode->bytes[opcode->nbytes++] = op; |
215 | pfx = insn_last_prefix(insn); | 216 | pfx_id = insn_last_prefix_id(insn); |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | 217 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); |
217 | } | 218 | } |
218 | if (inat_must_vex(insn->attr)) | 219 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | 220 | insn->attr = 0; /* This instruction is bad */ |
@@ -235,7 +236,7 @@ err_out: | |||
235 | void insn_get_modrm(struct insn *insn) | 236 | void insn_get_modrm(struct insn *insn) |
236 | { | 237 | { |
237 | struct insn_field *modrm = &insn->modrm; | 238 | struct insn_field *modrm = &insn->modrm; |
238 | insn_byte_t pfx, mod; | 239 | insn_byte_t pfx_id, mod; |
239 | if (modrm->got) | 240 | if (modrm->got) |
240 | return; | 241 | return; |
241 | if (!insn->opcode.got) | 242 | if (!insn->opcode.got) |
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn) | |||
246 | modrm->value = mod; | 247 | modrm->value = mod; |
247 | modrm->nbytes = 1; | 248 | modrm->nbytes = 1; |
248 | if (inat_is_group(insn->attr)) { | 249 | if (inat_is_group(insn->attr)) { |
249 | pfx = insn_last_prefix(insn); | 250 | pfx_id = insn_last_prefix_id(insn); |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | 251 | insn->attr = inat_get_group_attribute(mod, pfx_id, |
251 | insn->attr); | 252 | insn->attr); |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | 253 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
253 | insn->attr = 0; /* This is bad */ | 254 | insn->attr = 0; /* This is bad */ |