aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c82
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/uprobes.c423
5 files changed, 517 insertions, 0 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369059c07a..8c8c365a3bc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
100 100
101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o 101obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
102obj-$(CONFIG_OF) += devicetree.o 102obj-$(CONFIG_OF) += devicetree.o
103obj-$(CONFIG_UPROBES) += uprobes.o
103 104
104### 105###
105# 64 bit specific files 106# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4aae3..0a44b90602b 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
5#include <linux/mm.h> 5#include <linux/mm.h>
6 6
7#include <linux/io.h> 7#include <linux/io.h>
8#include <linux/sched.h>
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
10#include <asm/cpu.h> 11#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
456 if (c->x86_power & (1 << 8)) { 457 if (c->x86_power & (1 << 8)) {
457 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 458 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
458 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 459 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
460 if (!check_tsc_unstable())
461 sched_clock_stable = 1;
459 } 462 }
460 463
461#ifdef CONFIG_X86_64 464#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 63c0e058a40..1c52bdbb9b8 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,12 +24,14 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/cpu.h> 25#include <linux/cpu.h>
26#include <linux/bitops.h> 26#include <linux/bitops.h>
27#include <linux/device.h>
27 28
28#include <asm/apic.h> 29#include <asm/apic.h>
29#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
30#include <asm/nmi.h> 31#include <asm/nmi.h>
31#include <asm/smp.h> 32#include <asm/smp.h>
32#include <asm/alternative.h> 33#include <asm/alternative.h>
34#include <asm/timer.h>
33 35
34#include "perf_event.h" 36#include "perf_event.h"
35 37
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
1209 break; 1211 break;
1210 1212
1211 case CPU_STARTING: 1213 case CPU_STARTING:
1214 if (x86_pmu.attr_rdpmc)
1215 set_in_cr4(X86_CR4_PCE);
1212 if (x86_pmu.cpu_starting) 1216 if (x86_pmu.cpu_starting)
1213 x86_pmu.cpu_starting(cpu); 1217 x86_pmu.cpu_starting(cpu);
1214 break; 1218 break;
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void)
1318 } 1322 }
1319 } 1323 }
1320 1324
1325 x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
1326
1321 pr_info("... version: %d\n", x86_pmu.version); 1327 pr_info("... version: %d\n", x86_pmu.version);
1322 pr_info("... bit width: %d\n", x86_pmu.cntval_bits); 1328 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1323 pr_info("... generic registers: %d\n", x86_pmu.num_counters); 1329 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event)
1541 return err; 1547 return err;
1542} 1548}
1543 1549
1550static int x86_pmu_event_idx(struct perf_event *event)
1551{
1552 int idx = event->hw.idx;
1553
1554 if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
1555 idx -= X86_PMC_IDX_FIXED;
1556 idx |= 1 << 30;
1557 }
1558
1559 return idx + 1;
1560}
1561
1562static ssize_t get_attr_rdpmc(struct device *cdev,
1563 struct device_attribute *attr,
1564 char *buf)
1565{
1566 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
1567}
1568
1569static void change_rdpmc(void *info)
1570{
1571 bool enable = !!(unsigned long)info;
1572
1573 if (enable)
1574 set_in_cr4(X86_CR4_PCE);
1575 else
1576 clear_in_cr4(X86_CR4_PCE);
1577}
1578
1579static ssize_t set_attr_rdpmc(struct device *cdev,
1580 struct device_attribute *attr,
1581 const char *buf, size_t count)
1582{
1583 unsigned long val = simple_strtoul(buf, NULL, 0);
1584
1585 if (!!val != !!x86_pmu.attr_rdpmc) {
1586 x86_pmu.attr_rdpmc = !!val;
1587 smp_call_function(change_rdpmc, (void *)val, 1);
1588 }
1589
1590 return count;
1591}
1592
1593static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
1594
1595static struct attribute *x86_pmu_attrs[] = {
1596 &dev_attr_rdpmc.attr,
1597 NULL,
1598};
1599
1600static struct attribute_group x86_pmu_attr_group = {
1601 .attrs = x86_pmu_attrs,
1602};
1603
1604static const struct attribute_group *x86_pmu_attr_groups[] = {
1605 &x86_pmu_attr_group,
1606 NULL,
1607};
1608
1544static struct pmu pmu = { 1609static struct pmu pmu = {
1545 .pmu_enable = x86_pmu_enable, 1610 .pmu_enable = x86_pmu_enable,
1546 .pmu_disable = x86_pmu_disable, 1611 .pmu_disable = x86_pmu_disable,
1547 1612
1613 .attr_groups = x86_pmu_attr_groups,
1614
1548 .event_init = x86_pmu_event_init, 1615 .event_init = x86_pmu_event_init,
1549 1616
1550 .add = x86_pmu_add, 1617 .add = x86_pmu_add,
@@ -1556,8 +1623,23 @@ static struct pmu pmu = {
1556 .start_txn = x86_pmu_start_txn, 1623 .start_txn = x86_pmu_start_txn,
1557 .cancel_txn = x86_pmu_cancel_txn, 1624 .cancel_txn = x86_pmu_cancel_txn,
1558 .commit_txn = x86_pmu_commit_txn, 1625 .commit_txn = x86_pmu_commit_txn,
1626
1627 .event_idx = x86_pmu_event_idx,
1559}; 1628};
1560 1629
1630void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
1631{
1632 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
1633 return;
1634
1635 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1636 return;
1637
1638 userpg->time_mult = this_cpu_read(cyc2ns);
1639 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1640 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1641}
1642
1561/* 1643/*
1562 * callchain support 1644 * callchain support
1563 */ 1645 */
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c30c807ddc7..82db83b5c3b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -309,6 +309,14 @@ struct x86_pmu {
309 struct x86_pmu_quirk *quirks; 309 struct x86_pmu_quirk *quirks;
310 int perfctr_second_write; 310 int perfctr_second_write;
311 311
312 /*
313 * sysfs attrs
314 */
315 int attr_rdpmc;
316
317 /*
318 * CPU Hotplug hooks
319 */
312 int (*cpu_prepare)(int cpu); 320 int (*cpu_prepare)(int cpu);
313 void (*cpu_starting)(int cpu); 321 void (*cpu_starting)(int cpu);
314 void (*cpu_dying)(int cpu); 322 void (*cpu_dying)(int cpu);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
new file mode 100644
index 00000000000..851a11b0d38
--- /dev/null
+++ b/arch/x86/kernel/uprobes.c
@@ -0,0 +1,423 @@
1/*
2 * User-space Probes (UProbes) for x86
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
27
28#include <linux/kdebug.h>
29#include <asm/insn.h>
30
31/* Post-execution fixups. */
32
33/* No fixup needed */
34#define UPROBE_FIX_NONE 0x0
35/* Adjust IP back to vicinity of actual insn */
36#define UPROBE_FIX_IP 0x1
37/* Adjust the return address of a call insn */
38#define UPROBE_FIX_CALL 0x2
39
40#define UPROBE_FIX_RIP_AX 0x8000
41#define UPROBE_FIX_RIP_CX 0x4000
42
43/* Adaptations for mhiramat x86 decoder v14. */
44#define OPCODE1(insn) ((insn)->opcode.bytes[0])
45#define OPCODE2(insn) ((insn)->opcode.bytes[1])
46#define OPCODE3(insn) ((insn)->opcode.bytes[2])
47#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
48
49#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
50 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
51 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
52 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
53 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
54 << (row % 32))
55
56/*
57 * Good-instruction tables for 32-bit apps. This is non-const and volatile
58 * to keep gcc from statically optimizing it out, as variable_test_bit makes
59 * some versions of gcc to think only *(unsigned long*) is used.
60 */
61static volatile u32 good_insns_32[256 / 32] = {
62 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
63 /* ---------------------------------------------- */
64 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
65 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
66 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
67 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
68 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
69 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
70 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
71 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
72 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
73 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
74 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
75 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
76 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
77 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
78 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
79 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
80 /* ---------------------------------------------- */
81 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
82};
83
84/* Using this for both 64-bit and 32-bit apps */
85static volatile u32 good_2byte_insns[256 / 32] = {
86 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
87 /* ---------------------------------------------- */
88 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
89 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
90 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
91 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
92 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
93 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
94 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
95 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
96 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
97 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
98 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
99 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
100 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
101 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
102 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
103 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
104 /* ---------------------------------------------- */
105 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
106};
107
108#ifdef CONFIG_X86_64
109/* Good-instruction tables for 64-bit apps */
110static volatile u32 good_insns_64[256 / 32] = {
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112 /* ---------------------------------------------- */
113 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
114 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
115 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
116 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
117 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
118 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
119 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
120 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
121 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
122 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
123 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
124 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
125 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
126 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
127 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
128 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
129 /* ---------------------------------------------- */
130 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
131};
132#endif
133#undef W
134
135/*
136 * opcodes we'll probably never support:
137 *
138 * 6c-6d, e4-e5, ec-ed - in
139 * 6e-6f, e6-e7, ee-ef - out
140 * cc, cd - int3, int
141 * cf - iret
142 * d6 - illegal instruction
143 * f1 - int1/icebp
144 * f4 - hlt
145 * fa, fb - cli, sti
146 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
147 *
148 * invalid opcodes in 64-bit mode:
149 *
150 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
151 * 63 - we support this opcode in x86_64 but not in i386.
152 *
153 * opcodes we may need to refine support for:
154 *
155 * 0f - 2-byte instructions: For many of these instructions, the validity
156 * depends on the prefix and/or the reg field. On such instructions, we
157 * just consider the opcode combination valid if it corresponds to any
158 * valid instruction.
159 *
160 * 8f - Group 1 - only reg = 0 is OK
161 * c6-c7 - Group 11 - only reg = 0 is OK
162 * d9-df - fpu insns with some illegal encodings
163 * f2, f3 - repnz, repz prefixes. These are also the first byte for
164 * certain floating-point instructions, such as addsd.
165 *
166 * fe - Group 4 - only reg = 0 or 1 is OK
167 * ff - Group 5 - only reg = 0-6 is OK
168 *
169 * others -- Do we need to support these?
170 *
171 * 0f - (floating-point?) prefetch instructions
172 * 07, 17, 1f - pop es, pop ss, pop ds
173 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
174 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
175 * 67 - addr16 prefix
176 * ce - into
177 * f0 - lock prefix
178 */
179
180/*
181 * TODO:
182 * - Where necessary, examine the modrm byte and allow only valid instructions
183 * in the different Groups and fpu instructions.
184 */
185
186static bool is_prefix_bad(struct insn *insn)
187{
188 int i;
189
190 for (i = 0; i < insn->prefixes.nbytes; i++) {
191 switch (insn->prefixes.bytes[i]) {
192 case 0x26: /* INAT_PFX_ES */
193 case 0x2E: /* INAT_PFX_CS */
194 case 0x36: /* INAT_PFX_DS */
195 case 0x3E: /* INAT_PFX_SS */
196 case 0xF0: /* INAT_PFX_LOCK */
197 return true;
198 }
199 }
200 return false;
201}
202
203static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
204{
205 insn_init(insn, auprobe->insn, false);
206
207 /* Skip good instruction prefixes; reject "bad" ones. */
208 insn_get_opcode(insn);
209 if (is_prefix_bad(insn))
210 return -ENOTSUPP;
211
212 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
213 return 0;
214
215 if (insn->opcode.nbytes == 2) {
216 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
217 return 0;
218 }
219
220 return -ENOTSUPP;
221}
222
223/*
224 * Figure out which fixups post_xol() will need to perform, and annotate
225 * arch_uprobe->fixups accordingly. To start with,
226 * arch_uprobe->fixups is either zero or it reflects rip-related
227 * fixups.
228 */
229static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
230{
231 bool fix_ip = true, fix_call = false; /* defaults */
232 int reg;
233
234 insn_get_opcode(insn); /* should be a nop */
235
236 switch (OPCODE1(insn)) {
237 case 0xc3: /* ret/lret */
238 case 0xcb:
239 case 0xc2:
240 case 0xca:
241 /* ip is correct */
242 fix_ip = false;
243 break;
244 case 0xe8: /* call relative - Fix return addr */
245 fix_call = true;
246 break;
247 case 0x9a: /* call absolute - Fix return addr, not ip */
248 fix_call = true;
249 fix_ip = false;
250 break;
251 case 0xff:
252 insn_get_modrm(insn);
253 reg = MODRM_REG(insn);
254 if (reg == 2 || reg == 3) {
255 /* call or lcall, indirect */
256 /* Fix return addr; ip is correct. */
257 fix_call = true;
258 fix_ip = false;
259 } else if (reg == 4 || reg == 5) {
260 /* jmp or ljmp, indirect */
261 /* ip is correct. */
262 fix_ip = false;
263 }
264 break;
265 case 0xea: /* jmp absolute -- ip is correct */
266 fix_ip = false;
267 break;
268 default:
269 break;
270 }
271 if (fix_ip)
272 auprobe->fixups |= UPROBE_FIX_IP;
273 if (fix_call)
274 auprobe->fixups |= UPROBE_FIX_CALL;
275}
276
277#ifdef CONFIG_X86_64
278/*
279 * If arch_uprobe->insn doesn't use rip-relative addressing, return
280 * immediately. Otherwise, rewrite the instruction so that it accesses
281 * its memory operand indirectly through a scratch register. Set
282 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
283 * accordingly. (The contents of the scratch register will be saved
284 * before we single-step the modified instruction, and restored
285 * afterward.)
286 *
287 * We do this because a rip-relative instruction can access only a
288 * relatively small area (+/- 2 GB from the instruction), and the XOL
289 * area typically lies beyond that area. At least for instructions
290 * that store to memory, we can't execute the original instruction
291 * and "fix things up" later, because the misdirected store could be
292 * disastrous.
293 *
294 * Some useful facts about rip-relative instructions:
295 *
296 * - There's always a modrm byte.
297 * - There's never a SIB byte.
298 * - The displacement is always 4 bytes.
299 */
300static void
301handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
302{
303 u8 *cursor;
304 u8 reg;
305
306 if (mm->context.ia32_compat)
307 return;
308
309 auprobe->rip_rela_target_address = 0x0;
310 if (!insn_rip_relative(insn))
311 return;
312
313 /*
314 * insn_rip_relative() would have decoded rex_prefix, modrm.
315 * Clear REX.b bit (extension of MODRM.rm field):
316 * we want to encode rax/rcx, not r8/r9.
317 */
318 if (insn->rex_prefix.nbytes) {
319 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
320 *cursor &= 0xfe; /* Clearing REX.B bit */
321 }
322
323 /*
324 * Point cursor at the modrm byte. The next 4 bytes are the
325 * displacement. Beyond the displacement, for some instructions,
326 * is the immediate operand.
327 */
328 cursor = auprobe->insn + insn_offset_modrm(insn);
329 insn_get_length(insn);
330
331 /*
332 * Convert from rip-relative addressing to indirect addressing
333 * via a scratch register. Change the r/m field from 0x5 (%rip)
334 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
335 */
336 reg = MODRM_REG(insn);
337 if (reg == 0) {
338 /*
339 * The register operand (if any) is either the A register
340 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
341 * REX prefix) %r8. In any case, we know the C register
342 * is NOT the register operand, so we use %rcx (register
343 * #1) for the scratch register.
344 */
345 auprobe->fixups = UPROBE_FIX_RIP_CX;
346 /* Change modrm from 00 000 101 to 00 000 001. */
347 *cursor = 0x1;
348 } else {
349 /* Use %rax (register #0) for the scratch register. */
350 auprobe->fixups = UPROBE_FIX_RIP_AX;
351 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
352 *cursor = (reg << 3);
353 }
354
355 /* Target address = address of next instruction + (signed) offset */
356 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
357
358 /* Displacement field is gone; slide immediate field (if any) over. */
359 if (insn->immediate.nbytes) {
360 cursor++;
361 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
362 }
363 return;
364}
365
366static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
367{
368 insn_init(insn, auprobe->insn, true);
369
370 /* Skip good instruction prefixes; reject "bad" ones. */
371 insn_get_opcode(insn);
372 if (is_prefix_bad(insn))
373 return -ENOTSUPP;
374
375 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
376 return 0;
377
378 if (insn->opcode.nbytes == 2) {
379 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
380 return 0;
381 }
382 return -ENOTSUPP;
383}
384
385static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
386{
387 if (mm->context.ia32_compat)
388 return validate_insn_32bits(auprobe, insn);
389 return validate_insn_64bits(auprobe, insn);
390}
391#else /* 32-bit: */
392static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
393{
394 /* No RIP-relative addressing on 32-bit */
395}
396
397static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
398{
399 return validate_insn_32bits(auprobe, insn);
400}
401#endif /* CONFIG_X86_64 */
402
403/**
404 * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
405 * @mm: the probed address space.
406 * @arch_uprobe: the probepoint information.
407 * Return 0 on success or a -ve number on error.
408 */
409int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
410{
411 int ret;
412 struct insn insn;
413
414 auprobe->fixups = 0;
415 ret = validate_insn_bits(auprobe, mm, &insn);
416 if (ret != 0)
417 return ret;
418
419 handle_riprel_insn(auprobe, mm, &insn);
420 prepare_fixups(auprobe, &insn);
421
422 return 0;
423}