aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2014-11-14 10:39:57 -0500
committerThomas Gleixner <tglx@linutronix.de>2014-11-17 18:58:52 -0500
commit6ba48ff46f764414f979d2eacb23c4e6296bcc95 (patch)
treec1e1ea9fd3e1ef9c396e36c5d446fa6f74d97224 /arch/x86/kernel
parent206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff)
x86: Remove arbitrary instruction size limit in instruction decoder
The current x86 instruction decoder steps along through the instruction stream but always ensures that it never steps farther than the largest possible instruction size (MAX_INSN_SIZE). The MPX code is now going to be doing some decoding of userspace instructions. We copy those from userspace in to the kernel and they're obviously completely untrusted coming from userspace. In addition to the constraint that instructions can only be so long, we also have to be aware of how long the buffer is that came in from userspace. This _looks_ to be similar to what the perf and kprobes is doing, but it's unclear to me whether they are affected. The whole reason we need this is that it is perfectly valid to be executing an instruction within MAX_INSN_SIZE bytes of an unreadable page. We should be able to gracefully handle short reads in those cases. This adds support to the decoder to record how long the buffer being decoded is and to refuse to "validate" the instruction if we would have gone over the end of the buffer to decode it. The kprobes code probably needs to be looked at here a bit more carefully. This patch still respects the MAX_INSN_SIZE limit there but the kprobes code does look like it might be able to be a bit more strict than it currently is. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Acked-by: Jim Keniston <jkenisto@us.ibm.com> Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: x86@kernel.org Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Link: http://lkml.kernel.org/r/20141114153957.E6B01535@viggo.jf.intel.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c25
-rw-r--r--arch/x86/kernel/kprobes/core.c8
-rw-r--r--arch/x86/kernel/kprobes/opt.c4
-rw-r--r--arch/x86/kernel/uprobes.c2
5 files changed, 42 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 46211bcc813e..6a94277525c9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -724,6 +724,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
724 unsigned long ip = regs->ip; 724 unsigned long ip = regs->ip;
725 int is_64bit = 0; 725 int is_64bit = 0;
726 void *kaddr; 726 void *kaddr;
727 int size;
727 728
728 /* 729 /*
729 * We don't need to fixup if the PEBS assist is fault like 730 * We don't need to fixup if the PEBS assist is fault like
@@ -758,11 +759,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
758 return 1; 759 return 1;
759 } 760 }
760 761
762 size = ip - to;
761 if (!kernel_ip(ip)) { 763 if (!kernel_ip(ip)) {
762 int size, bytes; 764 int bytes;
763 u8 *buf = this_cpu_read(insn_buffer); 765 u8 *buf = this_cpu_read(insn_buffer);
764 766
765 size = ip - to; /* Must fit our buffer, see above */ 767 /* 'size' must fit our buffer, see above */
766 bytes = copy_from_user_nmi(buf, (void __user *)to, size); 768 bytes = copy_from_user_nmi(buf, (void __user *)to, size);
767 if (bytes != 0) 769 if (bytes != 0)
768 return 0; 770 return 0;
@@ -780,11 +782,20 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
780#ifdef CONFIG_X86_64 782#ifdef CONFIG_X86_64
781 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); 783 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32);
782#endif 784#endif
783 insn_init(&insn, kaddr, is_64bit); 785 insn_init(&insn, kaddr, size, is_64bit);
784 insn_get_length(&insn); 786 insn_get_length(&insn);
787 /*
788 * Make sure there was not a problem decoding the
789 * instruction and getting the length. This is
790 * doubly important because we have an infinite
791 * loop if insn.length=0.
792 */
793 if (!insn.length)
794 break;
785 795
786 to += insn.length; 796 to += insn.length;
787 kaddr += insn.length; 797 kaddr += insn.length;
798 size -= insn.length;
788 } while (to < ip); 799 } while (to < ip);
789 800
790 if (to == ip) { 801 if (to == ip) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 45fa730a5283..58f1a94beaf0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -465,7 +465,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
465{ 465{
466 struct insn insn; 466 struct insn insn;
467 void *addr; 467 void *addr;
468 int bytes, size = MAX_INSN_SIZE; 468 int bytes_read, bytes_left;
469 int ret = X86_BR_NONE; 469 int ret = X86_BR_NONE;
470 int ext, to_plm, from_plm; 470 int ext, to_plm, from_plm;
471 u8 buf[MAX_INSN_SIZE]; 471 u8 buf[MAX_INSN_SIZE];
@@ -493,8 +493,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
493 return X86_BR_NONE; 493 return X86_BR_NONE;
494 494
495 /* may fail if text not present */ 495 /* may fail if text not present */
496 bytes = copy_from_user_nmi(buf, (void __user *)from, size); 496 bytes_left = copy_from_user_nmi(buf, (void __user *)from,
497 if (bytes != 0) 497 MAX_INSN_SIZE);
498 bytes_read = MAX_INSN_SIZE - bytes_left;
499 if (!bytes_read)
498 return X86_BR_NONE; 500 return X86_BR_NONE;
499 501
500 addr = buf; 502 addr = buf;
@@ -505,10 +507,19 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
505 * Ensure we don't blindy read any address by validating it is 507 * Ensure we don't blindy read any address by validating it is
506 * a known text address. 508 * a known text address.
507 */ 509 */
508 if (kernel_text_address(from)) 510 if (kernel_text_address(from)) {
509 addr = (void *)from; 511 addr = (void *)from;
510 else 512 /*
513 * Assume we can get the maximum possible size
514 * when grabbing kernel data. This is not
515 * _strictly_ true since we could possibly be
516 * executing up next to a memory hole, but
517 * it is very unlikely to be a problem.
518 */
519 bytes_read = MAX_INSN_SIZE;
520 } else {
511 return X86_BR_NONE; 521 return X86_BR_NONE;
522 }
512 } 523 }
513 524
514 /* 525 /*
@@ -518,8 +529,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
518#ifdef CONFIG_X86_64 529#ifdef CONFIG_X86_64
519 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); 530 is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
520#endif 531#endif
521 insn_init(&insn, addr, is64); 532 insn_init(&insn, addr, bytes_read, is64);
522 insn_get_opcode(&insn); 533 insn_get_opcode(&insn);
534 if (!insn.opcode.got)
535 return X86_BR_ABORT;
523 536
524 switch (insn.opcode.bytes[0]) { 537 switch (insn.opcode.bytes[0]) {
525 case 0xf: 538 case 0xf:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 67e6d19ef1be..f7e3cd50ece0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr)
285 * normally used, we just go through if there is no kprobe. 285 * normally used, we just go through if there is no kprobe.
286 */ 286 */
287 __addr = recover_probed_instruction(buf, addr); 287 __addr = recover_probed_instruction(buf, addr);
288 kernel_insn_init(&insn, (void *)__addr); 288 kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
289 insn_get_length(&insn); 289 insn_get_length(&insn);
290 290
291 /* 291 /*
@@ -330,8 +330,10 @@ int __copy_instruction(u8 *dest, u8 *src)
330{ 330{
331 struct insn insn; 331 struct insn insn;
332 kprobe_opcode_t buf[MAX_INSN_SIZE]; 332 kprobe_opcode_t buf[MAX_INSN_SIZE];
333 unsigned long recovered_insn =
334 recover_probed_instruction(buf, (unsigned long)src);
333 335
334 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); 336 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
335 insn_get_length(&insn); 337 insn_get_length(&insn);
336 /* Another subsystem puts a breakpoint, failed to recover */ 338 /* Another subsystem puts a breakpoint, failed to recover */
337 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 339 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
@@ -342,7 +344,7 @@ int __copy_instruction(u8 *dest, u8 *src)
342 if (insn_rip_relative(&insn)) { 344 if (insn_rip_relative(&insn)) {
343 s64 newdisp; 345 s64 newdisp;
344 u8 *disp; 346 u8 *disp;
345 kernel_insn_init(&insn, dest); 347 kernel_insn_init(&insn, dest, insn.length);
346 insn_get_displacement(&insn); 348 insn_get_displacement(&insn);
347 /* 349 /*
348 * The copied instruction uses the %rip-relative addressing 350 * The copied instruction uses the %rip-relative addressing
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index f1314d0bcf0a..7c523bbf3dc8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -251,13 +251,15 @@ static int can_optimize(unsigned long paddr)
251 /* Decode instructions */ 251 /* Decode instructions */
252 addr = paddr - offset; 252 addr = paddr - offset;
253 while (addr < paddr - offset + size) { /* Decode until function end */ 253 while (addr < paddr - offset + size) { /* Decode until function end */
254 unsigned long recovered_insn;
254 if (search_exception_tables(addr)) 255 if (search_exception_tables(addr))
255 /* 256 /*
256 * Since some fixup code will jumps into this function, 257 * Since some fixup code will jumps into this function,
257 * we can't optimize kprobe in this function. 258 * we can't optimize kprobe in this function.
258 */ 259 */
259 return 0; 260 return 0;
260 kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); 261 recovered_insn = recover_probed_instruction(buf, addr);
262 kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
261 insn_get_length(&insn); 263 insn_get_length(&insn);
262 /* Another subsystem puts a breakpoint */ 264 /* Another subsystem puts a breakpoint */
263 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) 265 if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 5d1cbfe4ae58..8b96a947021f 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -219,7 +219,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
219{ 219{
220 u32 volatile *good_insns; 220 u32 volatile *good_insns;
221 221
222 insn_init(insn, auprobe->insn, x86_64); 222 insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
223 /* has the side-effect of processing the entire instruction */ 223 /* has the side-effect of processing the entire instruction */
224 insn_get_length(insn); 224 insn_get_length(insn);
225 if (WARN_ON_ONCE(!insn_complete(insn))) 225 if (WARN_ON_ONCE(!insn_complete(insn)))