aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2014-11-14 10:39:57 -0500
committerThomas Gleixner <tglx@linutronix.de>2014-11-17 18:58:52 -0500
commit6ba48ff46f764414f979d2eacb23c4e6296bcc95 (patch)
treec1e1ea9fd3e1ef9c396e36c5d446fa6f74d97224
parent206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff)
x86: Remove arbitrary instruction size limit in instruction decoder
The current x86 instruction decoder steps along through the instruction stream but always ensures that it never steps farther than the largest possible instruction size (MAX_INSN_SIZE). The MPX code is now going to be doing some decoding of userspace instructions. We copy those from userspace in to the kernel and they're obviously completely untrusted coming from userspace. In addition to the constraint that instructions can only be so long, we also have to be aware of how long the buffer is that came in from userspace. This _looks_ to be similar to what the perf and kprobes is doing, but it's unclear to me whether they are affected. The whole reason we need this is that it is perfectly valid to be executing an instruction within MAX_INSN_SIZE bytes of an unreadable page. We should be able to gracefully handle short reads in those cases. This adds support to the decoder to record how long the buffer being decoded is and to refuse to "validate" the instruction if we would have gone over the end of the buffer to decode it. The kprobes code probably needs to be looked at here a bit more carefully. This patch still respects the MAX_INSN_SIZE limit there but the kprobes code does look like it might be able to be a bit more strict than it currently is. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: x86@kernel.org Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Link: http://lkml.kernel.org/r/20141114153957.E6B01535@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/include/asm/insn.h10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c25
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/lib/insn.c5
-rw-r--r--arch/x86/tools/insn_sanity.c2
-rw-r--r--arch/x86/tools/test_get_len.c2
9 files changed, 53 insertions, 22 deletions
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 48eb30a86062..47f29b1d1846 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -65,6 +65,7 @@ struct insn {
65 unsigned char x86_64; 65 unsigned char x86_64;
66 66
67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */ 67 const insn_byte_t *kaddr; /* kernel address of insn to analyze */
68 const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
68 const insn_byte_t *next_byte; 69 const insn_byte_t *next_byte;
69}; 70};
70 71
@@ -96,7 +97,7 @@ struct insn {
96#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ 97#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
97#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ 98#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
98 99
99extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); 100extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
100extern void insn_get_prefixes(struct insn *insn); 101extern void insn_get_prefixes(struct insn *insn);
101extern void insn_get_opcode(struct insn *insn); 102extern void insn_get_opcode(struct insn *insn);
102extern void insn_get_modrm(struct insn *insn); 103extern void insn_get_modrm(struct insn *insn);
@@ -115,12 +116,13 @@ static inline void insn_get_attribute(struct insn *insn)
115extern int insn_rip_relative(struct insn *insn); 116extern int insn_rip_relative(struct insn *insn);
116 117
117/* Init insn for kernel text */ 118/* Init insn for kernel text */
118static inline void kernel_insn_init(struct insn *insn, const void *kaddr) 119static inline void kernel_insn_init(struct insn *insn,
120 const void *kaddr, int buf_len)
119{ 121{
120#ifdef CONFIG_X86_64 122#ifdef CONFIG_X86_64
121 insn_init(insn, kaddr, 1); 123 insn_init(insn, kaddr, buf_len, 1);
122#else /* CONFIG_X86_32 */ 124#else /* CONFIG_X86_32 */
123 insn_init(insn, kaddr, 0); 125 insn_init(insn, kaddr, buf_len, 0);
124#endif 126#endif
125} 127}
126 128
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 46211bcc813e..6a94277525c9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -724,6 +724,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
724 unsigned long ip = regs->ip; 724 unsigned long ip = regs->ip;
725 int is_64bit = 0; 725 int is_64bit = 0;
726 void *kaddr; 726 void *kaddr;
727 int size;
727 728
728 /* 729 /*
729 * We don't need to fixup if the PEBS assist is fault like 730 * We don't need to fixup if the PEBS assist is fault like
@@ -758,11 +759,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
758 return 1; 759 return 1;
759 } 760 }
760 761
762 size = ip - to;
761 if (!kernel_ip(ip)) { 763 if (!kernel_ip(ip)) {
762 int size, bytes; 764 int bytes;
763 u8 *buf = this_cpu_read(insn_buffer); 765 u8 *buf = this_cpu_read(insn_buffer);
764 766
765 size = ip - to; /* Must fit our buffer, see above */ 767 /* 'size' must fit our buffer, see above */
766 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 768 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
767 if (bytes != 0) 769 if (bytes != 0)
768 return 0; 770 return 0;
@@ -780,11 +782,20 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
780#ifdef CONFIG_X86_64 782#ifdef CONFIG_X86_64
781 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 783 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
782#endif 784#endif
783 insn_init(&insn, kaddr, is_64bit); 785 insn_init(&insn, kaddr, size, is_64bit);
784 insn_get_length(&insn); 786 insn_get_length(&insn);
787 /*
788 * Make sure there was not a problem decoding the
789 * instruction and getting the length. This is
790 * doubly important because we have an infinite
791 * loop if insn.length=0.
792 */
793 if (!insn.length)
794 break;
785 795
786 to += insn.length; 796 to += insn.length;
787 kaddr += insn.length; 797 kaddr += insn.length;
798 size -= insn.length;
788 } while (to < ip); 799 } while (to < ip);
789 800
790 if (to == ip) { 801 if (to == ip) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 45fa730a5283..58f1a94beaf0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -465,7 +465,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
465{ 465{
466 struct insn insn; 466 struct insn insn;
467 void *addr; 467 void *addr;
468 int bytes, size = MAX_INSN_SIZE; 468 int bytes_read, bytes_left;
469 int ret = X86_BR_NONE; 469 int ret = X86_BR_NONE;
470 int ext, to_plm, from_plm; 470 int ext, to_plm, from_plm;
471 u8 buf[MAX_INSN_SIZE]; 471 u8 buf[MAX_INSN_SIZE];
@@ -493,8 +493,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
493 return X86_BR_NONE; 493 return X86_BR_NONE;
494 494
495 /* may fail if text not present */ 495 /* may fail if text not present */
496 bytes = copy_from_user_nmi(buf, (void __user *)from, size); 496 bytes_left = copy_from_user_nmi(buf, (void __user *)from,
497 if (bytes != 0) 497 MAX_INSN_SIZE);
498 bytes_read = MAX_INSN_SIZE - bytes_left;
499 if (!bytes_read)
498 return X86_BR_NONE; 500 return X86_BR_NONE;
499 501
500 addr = buf; 502 addr = buf;
@@ -505,10 +507,19 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
505 * Ensure we don't blindy read any address by validating it is 507 * Ensure we don't blindy read any address by validating it is
506 * a known text address. 508 * a known text address.
507 */ 509 */
508 if (kernel_text_address(from)) 510 if (kernel_text_address(from)) {
509 addr = (void *)from; 511 addr = (void *)from;
510 else 512 /*
513 * Assume we can get the maximum possible size
514 * when grabbing kernel data. This is not
515 * _strictly_ true since we could possibly be
516 * executing up next to a memory hole, but
517 * it is very unlikely to be a problem.
518 */
519 bytes_read = MAX_INSN_SIZE;
520 } else {
511 return X86_BR_NONE; 521 return X86_BR_NONE;
522 }
512 } 523 }
513 524
514 /* 525 /*
@@ -518,8 +529,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
518#ifdef CONFIG_X86_64 529#ifdef CONFIG_X86_64
519 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 530 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
520#endif 531#endif
521 insn_init(&insn, addr, is64); 532 insn_init(&insn, addr, bytes_read, is64);
522 insn_get_opcode(&insn); 533 insn_get_opcode(&insn);
534 if (!insn.opcode.got)
535 return X86_BR_ABORT;
523 536
524 switch (insn.opcode.bytes[0]) { 537 switch (insn.opcode.bytes[0]) {
525 case 0xf: 538 case 0xf:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 67e6d19ef1be..f7e3cd50ece0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr)
285 * normally used, we just go through if there is no kprobe. 285 * normally used, we just go through if there is no kprobe.
286 */ 286 */
287 __addr = recover_probed_instruction(buf, addr); 287 __addr = recover_probed_instruction(buf, addr);
288 kernel_insn_init(&insn, (void *)__addr); 288 kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
289 insn_get_length(&insn); 289 insn_get_length(&insn);
290 290
291 /* 291 /*
@@ -330,8 +330,10 @@ int __copy_instruction(u8 *dest, u8 *src)
330{ 330{
331 struct insn insn; 331 struct insn insn;
332 kprobe_opcode_t buf[MAX_INSN_SIZE]; 332 kprobe_opcode_t buf[MAX_INSN_SIZE];
333 unsigned long recovered_insn =
334 recover_probed_instruction(buf, (unsigned long)src);
333 335
334 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); 336 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
335 insn_get_length(&insn); 337 insn_get_length(&insn);
336 /* Another subsystem puts a breakpoint, failed to recover */ 338 /* Another subsystem puts a breakpoint, failed to recover */
337 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 339 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
@@ -342,7 +344,7 @@ int __copy_instruction(u8 *dest, u8 *src)
342 if (insn_rip_relative(&insn)) { 344 if (insn_rip_relative(&insn)) {
343 s64 newdisp; 345 s64 newdisp;
344 u8 *disp; 346 u8 *disp;
345 kernel_insn_init(&insn, dest); 347 kernel_insn_init(&insn, dest, insn.length);
346 insn_get_displacement(&insn); 348 insn_get_displacement(&insn);
347 /* 349 /*
348 * The copied instruction uses the %rip-relative addressing 350 * The copied instruction uses the %rip-relative addressing
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f1314d0bcf0a..7c523bbf3dc8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -251,13 +251,15 @@ static int can_optimize(unsigned long paddr)
251 /* Decode instructions */ 251 /* Decode instructions */
252 addr = paddr - offset; 252 addr = paddr - offset;
253 while (addr < paddr - offset + size) { /* Decode until function end */ 253 while (addr < paddr - offset + size) { /* Decode until function end */
254 unsigned long recovered_insn;
254 if (search_exception_tables(addr)) 255 if (search_exception_tables(addr))
255 /* 256 /*
256 * Since some fixup code will jumps into this function, 257 * Since some fixup code will jumps into this function,
257 * we can't optimize kprobe in this function. 258 * we can't optimize kprobe in this function.
258 */ 259 */
259 return 0; 260 return 0;
260 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); 261 recovered_insn = recover_probed_instruction(buf, addr);
262 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
261 insn_get_length(&insn); 263 insn_get_length(&insn);
262 /* Another subsystem puts a breakpoint */ 264 /* Another subsystem puts a breakpoint */
263 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 265 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5d1cbfe4ae58..8b96a947021f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -219,7 +219,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
219{ 219{
220 u32 volatile *good_insns; 220 u32 volatile *good_insns;
221 221
222 insn_init(insn, auprobe->insn, x86_64); 222 insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
223 /* has the side-effect of processing the entire instruction */ 223 /* has the side-effect of processing the entire instruction */
224 insn_get_length(insn); 224 insn_get_length(insn);
225 if (WARN_ON_ONCE(!insn_complete(insn))) 225 if (WARN_ON_ONCE(!insn_complete(insn)))
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 54fcffed28ed..2480978b31cc 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -28,7 +28,7 @@
28 28
29/* Verify next sizeof(t) bytes can be on the same instruction */ 29/* Verify next sizeof(t) bytes can be on the same instruction */
30#define validate_next(t, insn, n) \ 30#define validate_next(t, insn, n) \
31 ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) 31 ((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
32 32
33#define __get_next(t, insn) \ 33#define __get_next(t, insn) \
34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) 34 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
@@ -50,10 +50,11 @@
50 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 50 * @kaddr: address (in kernel memory) of instruction (or copy thereof)
51 * @x86_64: !0 for 64-bit kernel or 64-bit app 51 * @x86_64: !0 for 64-bit kernel or 64-bit app
52 */ 52 */
53void insn_init(struct insn *insn, const void *kaddr, int x86_64) 53void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
54{ 54{
55 memset(insn, 0, sizeof(*insn)); 55 memset(insn, 0, sizeof(*insn));
56 insn->kaddr = kaddr; 56 insn->kaddr = kaddr;
57 insn->end_kaddr = kaddr + buf_len;
57 insn->next_byte = kaddr; 58 insn->next_byte = kaddr;
58 insn->x86_64 = x86_64 ? 1 : 0; 59 insn->x86_64 = x86_64 ? 1 : 0;
59 insn->opnd_bytes = 4; 60 insn->opnd_bytes = 4;
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index 872eb60e7806..ba70ff232917 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -254,7 +254,7 @@ int main(int argc, char **argv)
254 continue; 254 continue;
255 255
256 /* Decode an instruction */ 256 /* Decode an instruction */
257 insn_init(&insn, insn_buf, x86_64); 257 insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
258 insn_get_length(&insn); 258 insn_get_length(&insn);
259 259
260 if (insn.next_byte <= insn.kaddr || 260 if (insn.next_byte <= insn.kaddr ||
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index 13403fc95a96..56f04db0c9c0 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -149,7 +149,7 @@ int main(int argc, char **argv)
149 break; 149 break;
150 } 150 }
151 /* Decode an instruction */ 151 /* Decode an instruction */
152 insn_init(&insn, insn_buf, x86_64); 152 insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
153 insn_get_length(&insn); 153 insn_get_length(&insn);
154 if (insn.length != nb) { 154 if (insn.length != nb) {
155 warnings++; 155 warnings++;