diff options
author | Dave Hansen <dave.hansen@linux.intel.com> | 2014-11-14 10:39:57 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2014-11-17 18:58:52 -0500 |
commit | 6ba48ff46f764414f979d2eacb23c4e6296bcc95 (patch) | |
tree | c1e1ea9fd3e1ef9c396e36c5d446fa6f74d97224 /arch/x86/kernel | |
parent | 206c5f60a3d902bc4b56dab2de3e88de5eb06108 (diff) |
x86: Remove arbitrary instruction size limit in instruction decoder
The current x86 instruction decoder steps along through the
instruction stream but always ensures that it never steps farther
than the largest possible instruction size (MAX_INSN_SIZE).
The MPX code is now going to be doing some decoding of userspace
instructions. We copy those from userspace in to the kernel and
they're obviously completely untrusted coming from userspace. In
addition to the constraint that instructions can only be so long,
we also have to be aware of how long the buffer is that came in
from userspace. This _looks_ to be similar to what the perf and
kprobes is doing, but it's unclear to me whether they are
affected.
The whole reason we need this is that it is perfectly valid to be
executing an instruction within MAX_INSN_SIZE bytes of an
unreadable page. We should be able to gracefully handle short
reads in those cases.
This adds support to the decoder to record how long the buffer
being decoded is and to refuse to "validate" the instruction if
we would have gone over the end of the buffer to decode it.
The kprobes code probably needs to be looked at here a bit more
carefully. This patch still respects the MAX_INSN_SIZE limit
there but the kprobes code does look like it might be able to
be a bit more strict than it currently is.
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Jim Keniston <jkenisto@us.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: x86@kernel.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20141114153957.E6B01535@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 2 |
5 files changed, 42 insertions, 14 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 46211bcc813e..6a94277525c9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -724,6 +724,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
724 | unsigned long ip = regs->ip; | 724 | unsigned long ip = regs->ip; |
725 | int is_64bit = 0; | 725 | int is_64bit = 0; |
726 | void *kaddr; | 726 | void *kaddr; |
727 | int size; | ||
727 | 728 | ||
728 | /* | 729 | /* |
729 | * We don't need to fixup if the PEBS assist is fault like | 730 | * We don't need to fixup if the PEBS assist is fault like |
@@ -758,11 +759,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
758 | return 1; | 759 | return 1; |
759 | } | 760 | } |
760 | 761 | ||
762 | size = ip - to; | ||
761 | if (!kernel_ip(ip)) { | 763 | if (!kernel_ip(ip)) { |
762 | int size, bytes; | 764 | int bytes; |
763 | u8 *buf = this_cpu_read(insn_buffer); | 765 | u8 *buf = this_cpu_read(insn_buffer); |
764 | 766 | ||
765 | size = ip - to; /* Must fit our buffer, see above */ | 767 | /* 'size' must fit our buffer, see above */ |
766 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | 768 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); |
767 | if (bytes != 0) | 769 | if (bytes != 0) |
768 | return 0; | 770 | return 0; |
@@ -780,11 +782,20 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
780 | #ifdef CONFIG_X86_64 | 782 | #ifdef CONFIG_X86_64 |
781 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | 783 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); |
782 | #endif | 784 | #endif |
783 | insn_init(&insn, kaddr, is_64bit); | 785 | insn_init(&insn, kaddr, size, is_64bit); |
784 | insn_get_length(&insn); | 786 | insn_get_length(&insn); |
787 | /* | ||
788 | * Make sure there was not a problem decoding the | ||
789 | * instruction and getting the length. This is | ||
790 | * doubly important because we have an infinite | ||
791 | * loop if insn.length=0. | ||
792 | */ | ||
793 | if (!insn.length) | ||
794 | break; | ||
785 | 795 | ||
786 | to += insn.length; | 796 | to += insn.length; |
787 | kaddr += insn.length; | 797 | kaddr += insn.length; |
798 | size -= insn.length; | ||
788 | } while (to < ip); | 799 | } while (to < ip); |
789 | 800 | ||
790 | if (to == ip) { | 801 | if (to == ip) { |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 45fa730a5283..58f1a94beaf0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -465,7 +465,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) | |||
465 | { | 465 | { |
466 | struct insn insn; | 466 | struct insn insn; |
467 | void *addr; | 467 | void *addr; |
468 | int bytes, size = MAX_INSN_SIZE; | 468 | int bytes_read, bytes_left; |
469 | int ret = X86_BR_NONE; | 469 | int ret = X86_BR_NONE; |
470 | int ext, to_plm, from_plm; | 470 | int ext, to_plm, from_plm; |
471 | u8 buf[MAX_INSN_SIZE]; | 471 | u8 buf[MAX_INSN_SIZE]; |
@@ -493,8 +493,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort) | |||
493 | return X86_BR_NONE; | 493 | return X86_BR_NONE; |
494 | 494 | ||
495 | /* may fail if text not present */ | 495 | /* may fail if text not present */ |
496 | bytes = copy_from_user_nmi(buf, (void __user *)from, size); | 496 | bytes_left = copy_from_user_nmi(buf, (void __user *)from, |
497 | if (bytes != 0) | 497 | MAX_INSN_SIZE); |
498 | bytes_read = MAX_INSN_SIZE - bytes_left; | ||
499 | if (!bytes_read) | ||
498 | return X86_BR_NONE; | 500 | return X86_BR_NONE; |
499 | 501 | ||
500 | addr = buf; | 502 | addr = buf; |
@@ -505,10 +507,19 @@ static int branch_type(unsigned long from, unsigned long to, int abort) | |||
505 | * Ensure we don't blindy read any address by validating it is | 507 | * Ensure we don't blindy read any address by validating it is |
506 | * a known text address. | 508 | * a known text address. |
507 | */ | 509 | */ |
508 | if (kernel_text_address(from)) | 510 | if (kernel_text_address(from)) { |
509 | addr = (void *)from; | 511 | addr = (void *)from; |
510 | else | 512 | /* |
513 | * Assume we can get the maximum possible size | ||
514 | * when grabbing kernel data. This is not | ||
515 | * _strictly_ true since we could possibly be | ||
516 | * executing up next to a memory hole, but | ||
517 | * it is very unlikely to be a problem. | ||
518 | */ | ||
519 | bytes_read = MAX_INSN_SIZE; | ||
520 | } else { | ||
511 | return X86_BR_NONE; | 521 | return X86_BR_NONE; |
522 | } | ||
512 | } | 523 | } |
513 | 524 | ||
514 | /* | 525 | /* |
@@ -518,8 +529,10 @@ static int branch_type(unsigned long from, unsigned long to, int abort) | |||
518 | #ifdef CONFIG_X86_64 | 529 | #ifdef CONFIG_X86_64 |
519 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); | 530 | is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); |
520 | #endif | 531 | #endif |
521 | insn_init(&insn, addr, is64); | 532 | insn_init(&insn, addr, bytes_read, is64); |
522 | insn_get_opcode(&insn); | 533 | insn_get_opcode(&insn); |
534 | if (!insn.opcode.got) | ||
535 | return X86_BR_ABORT; | ||
523 | 536 | ||
524 | switch (insn.opcode.bytes[0]) { | 537 | switch (insn.opcode.bytes[0]) { |
525 | case 0xf: | 538 | case 0xf: |
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 67e6d19ef1be..f7e3cd50ece0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c | |||
@@ -285,7 +285,7 @@ static int can_probe(unsigned long paddr) | |||
285 | * normally used, we just go through if there is no kprobe. | 285 | * normally used, we just go through if there is no kprobe. |
286 | */ | 286 | */ |
287 | __addr = recover_probed_instruction(buf, addr); | 287 | __addr = recover_probed_instruction(buf, addr); |
288 | kernel_insn_init(&insn, (void *)__addr); | 288 | kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); |
289 | insn_get_length(&insn); | 289 | insn_get_length(&insn); |
290 | 290 | ||
291 | /* | 291 | /* |
@@ -330,8 +330,10 @@ int __copy_instruction(u8 *dest, u8 *src) | |||
330 | { | 330 | { |
331 | struct insn insn; | 331 | struct insn insn; |
332 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | 332 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
333 | unsigned long recovered_insn = | ||
334 | recover_probed_instruction(buf, (unsigned long)src); | ||
333 | 335 | ||
334 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); | 336 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); |
335 | insn_get_length(&insn); | 337 | insn_get_length(&insn); |
336 | /* Another subsystem puts a breakpoint, failed to recover */ | 338 | /* Another subsystem puts a breakpoint, failed to recover */ |
337 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | 339 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
@@ -342,7 +344,7 @@ int __copy_instruction(u8 *dest, u8 *src) | |||
342 | if (insn_rip_relative(&insn)) { | 344 | if (insn_rip_relative(&insn)) { |
343 | s64 newdisp; | 345 | s64 newdisp; |
344 | u8 *disp; | 346 | u8 *disp; |
345 | kernel_insn_init(&insn, dest); | 347 | kernel_insn_init(&insn, dest, insn.length); |
346 | insn_get_displacement(&insn); | 348 | insn_get_displacement(&insn); |
347 | /* | 349 | /* |
348 | * The copied instruction uses the %rip-relative addressing | 350 | * The copied instruction uses the %rip-relative addressing |
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index f1314d0bcf0a..7c523bbf3dc8 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c | |||
@@ -251,13 +251,15 @@ static int can_optimize(unsigned long paddr) | |||
251 | /* Decode instructions */ | 251 | /* Decode instructions */ |
252 | addr = paddr - offset; | 252 | addr = paddr - offset; |
253 | while (addr < paddr - offset + size) { /* Decode until function end */ | 253 | while (addr < paddr - offset + size) { /* Decode until function end */ |
254 | unsigned long recovered_insn; | ||
254 | if (search_exception_tables(addr)) | 255 | if (search_exception_tables(addr)) |
255 | /* | 256 | /* |
256 | * Since some fixup code will jumps into this function, | 257 | * Since some fixup code will jumps into this function, |
257 | * we can't optimize kprobe in this function. | 258 | * we can't optimize kprobe in this function. |
258 | */ | 259 | */ |
259 | return 0; | 260 | return 0; |
260 | kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); | 261 | recovered_insn = recover_probed_instruction(buf, addr); |
262 | kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); | ||
261 | insn_get_length(&insn); | 263 | insn_get_length(&insn); |
262 | /* Another subsystem puts a breakpoint */ | 264 | /* Another subsystem puts a breakpoint */ |
263 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) | 265 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 5d1cbfe4ae58..8b96a947021f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
@@ -219,7 +219,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool | |||
219 | { | 219 | { |
220 | u32 volatile *good_insns; | 220 | u32 volatile *good_insns; |
221 | 221 | ||
222 | insn_init(insn, auprobe->insn, x86_64); | 222 | insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); |
223 | /* has the side-effect of processing the entire instruction */ | 223 | /* has the side-effect of processing the entire instruction */ |
224 | insn_get_length(insn); | 224 | insn_get_length(insn); |
225 | if (WARN_ON_ONCE(!insn_complete(insn))) | 225 | if (WARN_ON_ONCE(!insn_complete(insn))) |