diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 82 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 423 |
5 files changed, 517 insertions, 0 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5369059c07a..8c8c365a3bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | |||
100 | 100 | ||
101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 101 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
102 | obj-$(CONFIG_OF) += devicetree.o | 102 | obj-$(CONFIG_OF) += devicetree.o |
103 | obj-$(CONFIG_UPROBES) += uprobes.o | ||
103 | 104 | ||
104 | ### | 105 | ### |
105 | # 64 bit specific files | 106 | # 64 bit specific files |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4773f4aae3..0a44b90602b 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | 6 | ||
7 | #include <linux/io.h> | 7 | #include <linux/io.h> |
8 | #include <linux/sched.h> | ||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
10 | #include <asm/cpu.h> | 11 | #include <asm/cpu.h> |
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
456 | if (c->x86_power & (1 << 8)) { | 457 | if (c->x86_power & (1 << 8)) { |
457 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 458 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
458 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 459 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
460 | if (!check_tsc_unstable()) | ||
461 | sched_clock_stable = 1; | ||
459 | } | 462 | } |
460 | 463 | ||
461 | #ifdef CONFIG_X86_64 | 464 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 63c0e058a40..1c52bdbb9b8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -24,12 +24,14 @@ | |||
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitops.h> |
27 | #include <linux/device.h> | ||
27 | 28 | ||
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
29 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
30 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
31 | #include <asm/smp.h> | 32 | #include <asm/smp.h> |
32 | #include <asm/alternative.h> | 33 | #include <asm/alternative.h> |
34 | #include <asm/timer.h> | ||
33 | 35 | ||
34 | #include "perf_event.h" | 36 | #include "perf_event.h" |
35 | 37 | ||
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | |||
1209 | break; | 1211 | break; |
1210 | 1212 | ||
1211 | case CPU_STARTING: | 1213 | case CPU_STARTING: |
1214 | if (x86_pmu.attr_rdpmc) | ||
1215 | set_in_cr4(X86_CR4_PCE); | ||
1212 | if (x86_pmu.cpu_starting) | 1216 | if (x86_pmu.cpu_starting) |
1213 | x86_pmu.cpu_starting(cpu); | 1217 | x86_pmu.cpu_starting(cpu); |
1214 | break; | 1218 | break; |
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void) | |||
1318 | } | 1322 | } |
1319 | } | 1323 | } |
1320 | 1324 | ||
1325 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1326 | |||
1321 | pr_info("... version: %d\n", x86_pmu.version); | 1327 | pr_info("... version: %d\n", x86_pmu.version); |
1322 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); | 1328 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1323 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); | 1329 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event) | |||
1541 | return err; | 1547 | return err; |
1542 | } | 1548 | } |
1543 | 1549 | ||
1550 | static int x86_pmu_event_idx(struct perf_event *event) | ||
1551 | { | ||
1552 | int idx = event->hw.idx; | ||
1553 | |||
1554 | if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { | ||
1555 | idx -= X86_PMC_IDX_FIXED; | ||
1556 | idx |= 1 << 30; | ||
1557 | } | ||
1558 | |||
1559 | return idx + 1; | ||
1560 | } | ||
1561 | |||
1562 | static ssize_t get_attr_rdpmc(struct device *cdev, | ||
1563 | struct device_attribute *attr, | ||
1564 | char *buf) | ||
1565 | { | ||
1566 | return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); | ||
1567 | } | ||
1568 | |||
1569 | static void change_rdpmc(void *info) | ||
1570 | { | ||
1571 | bool enable = !!(unsigned long)info; | ||
1572 | |||
1573 | if (enable) | ||
1574 | set_in_cr4(X86_CR4_PCE); | ||
1575 | else | ||
1576 | clear_in_cr4(X86_CR4_PCE); | ||
1577 | } | ||
1578 | |||
1579 | static ssize_t set_attr_rdpmc(struct device *cdev, | ||
1580 | struct device_attribute *attr, | ||
1581 | const char *buf, size_t count) | ||
1582 | { | ||
1583 | unsigned long val = simple_strtoul(buf, NULL, 0); | ||
1584 | |||
1585 | if (!!val != !!x86_pmu.attr_rdpmc) { | ||
1586 | x86_pmu.attr_rdpmc = !!val; | ||
1587 | smp_call_function(change_rdpmc, (void *)val, 1); | ||
1588 | } | ||
1589 | |||
1590 | return count; | ||
1591 | } | ||
1592 | |||
1593 | static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); | ||
1594 | |||
1595 | static struct attribute *x86_pmu_attrs[] = { | ||
1596 | &dev_attr_rdpmc.attr, | ||
1597 | NULL, | ||
1598 | }; | ||
1599 | |||
1600 | static struct attribute_group x86_pmu_attr_group = { | ||
1601 | .attrs = x86_pmu_attrs, | ||
1602 | }; | ||
1603 | |||
1604 | static const struct attribute_group *x86_pmu_attr_groups[] = { | ||
1605 | &x86_pmu_attr_group, | ||
1606 | NULL, | ||
1607 | }; | ||
1608 | |||
1544 | static struct pmu pmu = { | 1609 | static struct pmu pmu = { |
1545 | .pmu_enable = x86_pmu_enable, | 1610 | .pmu_enable = x86_pmu_enable, |
1546 | .pmu_disable = x86_pmu_disable, | 1611 | .pmu_disable = x86_pmu_disable, |
1547 | 1612 | ||
1613 | .attr_groups = x86_pmu_attr_groups, | ||
1614 | |||
1548 | .event_init = x86_pmu_event_init, | 1615 | .event_init = x86_pmu_event_init, |
1549 | 1616 | ||
1550 | .add = x86_pmu_add, | 1617 | .add = x86_pmu_add, |
@@ -1556,8 +1623,23 @@ static struct pmu pmu = { | |||
1556 | .start_txn = x86_pmu_start_txn, | 1623 | .start_txn = x86_pmu_start_txn, |
1557 | .cancel_txn = x86_pmu_cancel_txn, | 1624 | .cancel_txn = x86_pmu_cancel_txn, |
1558 | .commit_txn = x86_pmu_commit_txn, | 1625 | .commit_txn = x86_pmu_commit_txn, |
1626 | |||
1627 | .event_idx = x86_pmu_event_idx, | ||
1559 | }; | 1628 | }; |
1560 | 1629 | ||
1630 | void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) | ||
1631 | { | ||
1632 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
1633 | return; | ||
1634 | |||
1635 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | ||
1636 | return; | ||
1637 | |||
1638 | userpg->time_mult = this_cpu_read(cyc2ns); | ||
1639 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | ||
1640 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | ||
1641 | } | ||
1642 | |||
1561 | /* | 1643 | /* |
1562 | * callchain support | 1644 | * callchain support |
1563 | */ | 1645 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c30c807ddc7..82db83b5c3b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -309,6 +309,14 @@ struct x86_pmu { | |||
309 | struct x86_pmu_quirk *quirks; | 309 | struct x86_pmu_quirk *quirks; |
310 | int perfctr_second_write; | 310 | int perfctr_second_write; |
311 | 311 | ||
312 | /* | ||
313 | * sysfs attrs | ||
314 | */ | ||
315 | int attr_rdpmc; | ||
316 | |||
317 | /* | ||
318 | * CPU Hotplug hooks | ||
319 | */ | ||
312 | int (*cpu_prepare)(int cpu); | 320 | int (*cpu_prepare)(int cpu); |
313 | void (*cpu_starting)(int cpu); | 321 | void (*cpu_starting)(int cpu); |
314 | void (*cpu_dying)(int cpu); | 322 | void (*cpu_dying)(int cpu); |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c new file mode 100644 index 00000000000..851a11b0d38 --- /dev/null +++ b/arch/x86/kernel/uprobes.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * User-space Probes (UProbes) for x86 | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2008-2011 | ||
19 | * Authors: | ||
20 | * Srikar Dronamraju | ||
21 | * Jim Keniston | ||
22 | */ | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/sched.h> | ||
25 | #include <linux/ptrace.h> | ||
26 | #include <linux/uprobes.h> | ||
27 | |||
28 | #include <linux/kdebug.h> | ||
29 | #include <asm/insn.h> | ||
30 | |||
31 | /* Post-execution fixups. */ | ||
32 | |||
33 | /* No fixup needed */ | ||
34 | #define UPROBE_FIX_NONE 0x0 | ||
35 | /* Adjust IP back to vicinity of actual insn */ | ||
36 | #define UPROBE_FIX_IP 0x1 | ||
37 | /* Adjust the return address of a call insn */ | ||
38 | #define UPROBE_FIX_CALL 0x2 | ||
39 | |||
40 | #define UPROBE_FIX_RIP_AX 0x8000 | ||
41 | #define UPROBE_FIX_RIP_CX 0x4000 | ||
42 | |||
43 | /* Adaptations for mhiramat x86 decoder v14. */ | ||
44 | #define OPCODE1(insn) ((insn)->opcode.bytes[0]) | ||
45 | #define OPCODE2(insn) ((insn)->opcode.bytes[1]) | ||
46 | #define OPCODE3(insn) ((insn)->opcode.bytes[2]) | ||
47 | #define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value) | ||
48 | |||
49 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | ||
50 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | ||
51 | (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ | ||
52 | (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ | ||
53 | (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ | ||
54 | << (row % 32)) | ||
55 | |||
56 | /* | ||
57 | * Good-instruction tables for 32-bit apps. This is non-const and volatile | ||
58 | * to keep gcc from statically optimizing it out, as variable_test_bit makes | ||
59 | * some versions of gcc to think only *(unsigned long*) is used. | ||
60 | */ | ||
61 | static volatile u32 good_insns_32[256 / 32] = { | ||
62 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
63 | /* ---------------------------------------------- */ | ||
64 | W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */ | ||
65 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */ | ||
66 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */ | ||
67 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */ | ||
68 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
69 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
70 | W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
71 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
72 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
73 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
74 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
75 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
76 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
77 | W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
78 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
79 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
80 | /* ---------------------------------------------- */ | ||
81 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
82 | }; | ||
83 | |||
84 | /* Using this for both 64-bit and 32-bit apps */ | ||
85 | static volatile u32 good_2byte_insns[256 / 32] = { | ||
86 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
87 | /* ---------------------------------------------- */ | ||
88 | W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */ | ||
89 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */ | ||
90 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */ | ||
91 | W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ | ||
92 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ | ||
93 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
94 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */ | ||
95 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ | ||
96 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
97 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
98 | W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */ | ||
99 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
100 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ | ||
101 | W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
102 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */ | ||
103 | W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */ | ||
104 | /* ---------------------------------------------- */ | ||
105 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
106 | }; | ||
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | /* Good-instruction tables for 64-bit apps */ | ||
110 | static volatile u32 good_insns_64[256 / 32] = { | ||
111 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
112 | /* ---------------------------------------------- */ | ||
113 | W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ | ||
114 | W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ | ||
115 | W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ | ||
116 | W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ | ||
117 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
118 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ | ||
119 | W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ | ||
120 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ | ||
121 | W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
122 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ | ||
123 | W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ | ||
124 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ | ||
125 | W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ | ||
126 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
127 | W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ | ||
128 | W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ | ||
129 | /* ---------------------------------------------- */ | ||
130 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
131 | }; | ||
132 | #endif | ||
133 | #undef W | ||
134 | |||
135 | /* | ||
136 | * opcodes we'll probably never support: | ||
137 | * | ||
138 | * 6c-6d, e4-e5, ec-ed - in | ||
139 | * 6e-6f, e6-e7, ee-ef - out | ||
140 | * cc, cd - int3, int | ||
141 | * cf - iret | ||
142 | * d6 - illegal instruction | ||
143 | * f1 - int1/icebp | ||
144 | * f4 - hlt | ||
145 | * fa, fb - cli, sti | ||
146 | * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2 | ||
147 | * | ||
148 | * invalid opcodes in 64-bit mode: | ||
149 | * | ||
150 | * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5 | ||
151 | * 63 - we support this opcode in x86_64 but not in i386. | ||
152 | * | ||
153 | * opcodes we may need to refine support for: | ||
154 | * | ||
155 | * 0f - 2-byte instructions: For many of these instructions, the validity | ||
156 | * depends on the prefix and/or the reg field. On such instructions, we | ||
157 | * just consider the opcode combination valid if it corresponds to any | ||
158 | * valid instruction. | ||
159 | * | ||
160 | * 8f - Group 1 - only reg = 0 is OK | ||
161 | * c6-c7 - Group 11 - only reg = 0 is OK | ||
162 | * d9-df - fpu insns with some illegal encodings | ||
163 | * f2, f3 - repnz, repz prefixes. These are also the first byte for | ||
164 | * certain floating-point instructions, such as addsd. | ||
165 | * | ||
166 | * fe - Group 4 - only reg = 0 or 1 is OK | ||
167 | * ff - Group 5 - only reg = 0-6 is OK | ||
168 | * | ||
169 | * others -- Do we need to support these? | ||
170 | * | ||
171 | * 0f - (floating-point?) prefetch instructions | ||
172 | * 07, 17, 1f - pop es, pop ss, pop ds | ||
173 | * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- | ||
174 | * but 64 and 65 (fs: and gs:) seem to be used, so we support them | ||
175 | * 67 - addr16 prefix | ||
176 | * ce - into | ||
177 | * f0 - lock prefix | ||
178 | */ | ||
179 | |||
180 | /* | ||
181 | * TODO: | ||
182 | * - Where necessary, examine the modrm byte and allow only valid instructions | ||
183 | * in the different Groups and fpu instructions. | ||
184 | */ | ||
185 | |||
186 | static bool is_prefix_bad(struct insn *insn) | ||
187 | { | ||
188 | int i; | ||
189 | |||
190 | for (i = 0; i < insn->prefixes.nbytes; i++) { | ||
191 | switch (insn->prefixes.bytes[i]) { | ||
192 | case 0x26: /* INAT_PFX_ES */ | ||
193 | case 0x2E: /* INAT_PFX_CS */ | ||
194 | case 0x36: /* INAT_PFX_DS */ | ||
195 | case 0x3E: /* INAT_PFX_SS */ | ||
196 | case 0xF0: /* INAT_PFX_LOCK */ | ||
197 | return true; | ||
198 | } | ||
199 | } | ||
200 | return false; | ||
201 | } | ||
202 | |||
203 | static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
204 | { | ||
205 | insn_init(insn, auprobe->insn, false); | ||
206 | |||
207 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
208 | insn_get_opcode(insn); | ||
209 | if (is_prefix_bad(insn)) | ||
210 | return -ENOTSUPP; | ||
211 | |||
212 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) | ||
213 | return 0; | ||
214 | |||
215 | if (insn->opcode.nbytes == 2) { | ||
216 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | return -ENOTSUPP; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Figure out which fixups post_xol() will need to perform, and annotate | ||
225 | * arch_uprobe->fixups accordingly. To start with, | ||
226 | * arch_uprobe->fixups is either zero or it reflects rip-related | ||
227 | * fixups. | ||
228 | */ | ||
229 | static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) | ||
230 | { | ||
231 | bool fix_ip = true, fix_call = false; /* defaults */ | ||
232 | int reg; | ||
233 | |||
234 | insn_get_opcode(insn); /* should be a nop */ | ||
235 | |||
236 | switch (OPCODE1(insn)) { | ||
237 | case 0xc3: /* ret/lret */ | ||
238 | case 0xcb: | ||
239 | case 0xc2: | ||
240 | case 0xca: | ||
241 | /* ip is correct */ | ||
242 | fix_ip = false; | ||
243 | break; | ||
244 | case 0xe8: /* call relative - Fix return addr */ | ||
245 | fix_call = true; | ||
246 | break; | ||
247 | case 0x9a: /* call absolute - Fix return addr, not ip */ | ||
248 | fix_call = true; | ||
249 | fix_ip = false; | ||
250 | break; | ||
251 | case 0xff: | ||
252 | insn_get_modrm(insn); | ||
253 | reg = MODRM_REG(insn); | ||
254 | if (reg == 2 || reg == 3) { | ||
255 | /* call or lcall, indirect */ | ||
256 | /* Fix return addr; ip is correct. */ | ||
257 | fix_call = true; | ||
258 | fix_ip = false; | ||
259 | } else if (reg == 4 || reg == 5) { | ||
260 | /* jmp or ljmp, indirect */ | ||
261 | /* ip is correct. */ | ||
262 | fix_ip = false; | ||
263 | } | ||
264 | break; | ||
265 | case 0xea: /* jmp absolute -- ip is correct */ | ||
266 | fix_ip = false; | ||
267 | break; | ||
268 | default: | ||
269 | break; | ||
270 | } | ||
271 | if (fix_ip) | ||
272 | auprobe->fixups |= UPROBE_FIX_IP; | ||
273 | if (fix_call) | ||
274 | auprobe->fixups |= UPROBE_FIX_CALL; | ||
275 | } | ||
276 | |||
277 | #ifdef CONFIG_X86_64 | ||
278 | /* | ||
279 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | ||
280 | * immediately. Otherwise, rewrite the instruction so that it accesses | ||
281 | * its memory operand indirectly through a scratch register. Set | ||
282 | * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address | ||
283 | * accordingly. (The contents of the scratch register will be saved | ||
284 | * before we single-step the modified instruction, and restored | ||
285 | * afterward.) | ||
286 | * | ||
287 | * We do this because a rip-relative instruction can access only a | ||
288 | * relatively small area (+/- 2 GB from the instruction), and the XOL | ||
289 | * area typically lies beyond that area. At least for instructions | ||
290 | * that store to memory, we can't execute the original instruction | ||
291 | * and "fix things up" later, because the misdirected store could be | ||
292 | * disastrous. | ||
293 | * | ||
294 | * Some useful facts about rip-relative instructions: | ||
295 | * | ||
296 | * - There's always a modrm byte. | ||
297 | * - There's never a SIB byte. | ||
298 | * - The displacement is always 4 bytes. | ||
299 | */ | ||
300 | static void | ||
301 | handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
302 | { | ||
303 | u8 *cursor; | ||
304 | u8 reg; | ||
305 | |||
306 | if (mm->context.ia32_compat) | ||
307 | return; | ||
308 | |||
309 | auprobe->rip_rela_target_address = 0x0; | ||
310 | if (!insn_rip_relative(insn)) | ||
311 | return; | ||
312 | |||
313 | /* | ||
314 | * insn_rip_relative() would have decoded rex_prefix, modrm. | ||
315 | * Clear REX.b bit (extension of MODRM.rm field): | ||
316 | * we want to encode rax/rcx, not r8/r9. | ||
317 | */ | ||
318 | if (insn->rex_prefix.nbytes) { | ||
319 | cursor = auprobe->insn + insn_offset_rex_prefix(insn); | ||
320 | *cursor &= 0xfe; /* Clearing REX.B bit */ | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Point cursor at the modrm byte. The next 4 bytes are the | ||
325 | * displacement. Beyond the displacement, for some instructions, | ||
326 | * is the immediate operand. | ||
327 | */ | ||
328 | cursor = auprobe->insn + insn_offset_modrm(insn); | ||
329 | insn_get_length(insn); | ||
330 | |||
331 | /* | ||
332 | * Convert from rip-relative addressing to indirect addressing | ||
333 | * via a scratch register. Change the r/m field from 0x5 (%rip) | ||
334 | * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. | ||
335 | */ | ||
336 | reg = MODRM_REG(insn); | ||
337 | if (reg == 0) { | ||
338 | /* | ||
339 | * The register operand (if any) is either the A register | ||
340 | * (%rax, %eax, etc.) or (if the 0x4 bit is set in the | ||
341 | * REX prefix) %r8. In any case, we know the C register | ||
342 | * is NOT the register operand, so we use %rcx (register | ||
343 | * #1) for the scratch register. | ||
344 | */ | ||
345 | auprobe->fixups = UPROBE_FIX_RIP_CX; | ||
346 | /* Change modrm from 00 000 101 to 00 000 001. */ | ||
347 | *cursor = 0x1; | ||
348 | } else { | ||
349 | /* Use %rax (register #0) for the scratch register. */ | ||
350 | auprobe->fixups = UPROBE_FIX_RIP_AX; | ||
351 | /* Change modrm from 00 xxx 101 to 00 xxx 000 */ | ||
352 | *cursor = (reg << 3); | ||
353 | } | ||
354 | |||
355 | /* Target address = address of next instruction + (signed) offset */ | ||
356 | auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; | ||
357 | |||
358 | /* Displacement field is gone; slide immediate field (if any) over. */ | ||
359 | if (insn->immediate.nbytes) { | ||
360 | cursor++; | ||
361 | memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); | ||
362 | } | ||
363 | return; | ||
364 | } | ||
365 | |||
366 | static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) | ||
367 | { | ||
368 | insn_init(insn, auprobe->insn, true); | ||
369 | |||
370 | /* Skip good instruction prefixes; reject "bad" ones. */ | ||
371 | insn_get_opcode(insn); | ||
372 | if (is_prefix_bad(insn)) | ||
373 | return -ENOTSUPP; | ||
374 | |||
375 | if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) | ||
376 | return 0; | ||
377 | |||
378 | if (insn->opcode.nbytes == 2) { | ||
379 | if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) | ||
380 | return 0; | ||
381 | } | ||
382 | return -ENOTSUPP; | ||
383 | } | ||
384 | |||
385 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
386 | { | ||
387 | if (mm->context.ia32_compat) | ||
388 | return validate_insn_32bits(auprobe, insn); | ||
389 | return validate_insn_64bits(auprobe, insn); | ||
390 | } | ||
391 | #else /* 32-bit: */ | ||
392 | static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
393 | { | ||
394 | /* No RIP-relative addressing on 32-bit */ | ||
395 | } | ||
396 | |||
397 | static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) | ||
398 | { | ||
399 | return validate_insn_32bits(auprobe, insn); | ||
400 | } | ||
401 | #endif /* CONFIG_X86_64 */ | ||
402 | |||
403 | /** | ||
404 | * arch_uprobes_analyze_insn - instruction analysis including validity and fixups. | ||
405 | * @mm: the probed address space. | ||
406 | * @arch_uprobe: the probepoint information. | ||
407 | * Return 0 on success or a -ve number on error. | ||
408 | */ | ||
409 | int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) | ||
410 | { | ||
411 | int ret; | ||
412 | struct insn insn; | ||
413 | |||
414 | auprobe->fixups = 0; | ||
415 | ret = validate_insn_bits(auprobe, mm, &insn); | ||
416 | if (ret != 0) | ||
417 | return ret; | ||
418 | |||
419 | handle_riprel_insn(auprobe, mm, &insn); | ||
420 | prepare_fixups(auprobe, &insn); | ||
421 | |||
422 | return 0; | ||
423 | } | ||