aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/alternative.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2013-07-12 05:21:48 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2013-07-16 20:55:29 -0400
commitfd4363fff3d96795d3feb1b3fb48ce590f186bdd (patch)
treedf1cc6d557edc0f3088323cf43cd7630f0f5ec49 /arch/x86/kernel/alternative.c
parentad81f0545ef01ea651886dddac4bef6cec930092 (diff)
x86: Introduce int3 (breakpoint)-based instruction patching
Introduce a method for run-time instruction patching on a live SMP kernel based on int3 breakpoint, completely avoiding the need for stop_machine(). The way this is achieved: - add a int3 trap to the address that will be patched - sync cores - update all but the first byte of the patched range - sync cores - replace the first byte (int3) by the first byte of replacing opcode - sync cores According to http://lkml.indiana.edu/hypermail/linux/kernel/1001.1/01530.html synchronization after replacing "all but first" instructions should not be necessary (on Intel hardware), as the syncing after the subsequent patching of the first byte provides enough safety. But there's not only Intel HW out there, and we'd rather be on a safe side. If any CPU instruction execution would collide with the patching, it'd be trapped by the int3 breakpoint and redirected to the provided "handler" (which would typically mean just skipping over the patched region, acting as "nop" has been there, in case we are doing nop -> jump and jump -> nop transitions). Ftrace has been using this very technique since 08d636b ("ftrace/x86: Have arch x86_64 use breakpoints instead of stop machine") for ages already, and jump labels are another obvious potential user of this. Based on activities of Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> a few years ago. Reviewed-by: Steven Rostedt <rostedt@goodmis.org> Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Signed-off-by: Jiri Kosina <jkosina@suse.cz> Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1307121102440.29788@pobox.suse.cz Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel/alternative.c')
-rw-r--r--arch/x86/kernel/alternative.c106
1 files changed, 106 insertions, 0 deletions
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c15cf9a25e27..0ab49366a7a6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
11#include <linux/memory.h> 11#include <linux/memory.h>
12#include <linux/stop_machine.h> 12#include <linux/stop_machine.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/kdebug.h>
14#include <asm/alternative.h> 15#include <asm/alternative.h>
15#include <asm/sections.h> 16#include <asm/sections.h>
16#include <asm/pgtable.h> 17#include <asm/pgtable.h>
@@ -596,6 +597,111 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
596 return addr; 597 return addr;
597} 598}
598 599
600static void do_sync_core(void *info)
601{
602 sync_core();
603}
604
605static bool bp_patching_in_progress;
606static void *bp_int3_handler, *bp_int3_addr;
607
608static int int3_notify(struct notifier_block *self, unsigned long val, void *data)
609{
610 struct die_args *args = data;
611
612 /* bp_patching_in_progress */
613 smp_rmb();
614
615 if (likely(!bp_patching_in_progress))
616 return NOTIFY_DONE;
617
618 /* we are not interested in non-int3 faults and ring > 0 faults */
619 if (val != DIE_INT3 || !args->regs || user_mode_vm(args->regs)
620 || args->regs->ip != (unsigned long)bp_int3_addr)
621 return NOTIFY_DONE;
622
623 /* set up the specified breakpoint handler */
624 args->regs->ip = (unsigned long) bp_int3_handler;
625
626 return NOTIFY_STOP;
627}
628/**
629 * text_poke_bp() -- update instructions on live kernel on SMP
630 * @addr: address to patch
631 * @opcode: opcode of new instruction
632 * @len: length to copy
633 * @handler: address to jump to when the temporary breakpoint is hit
634 *
635 * Modify multi-byte instruction by using int3 breakpoint on SMP.
636 * In contrary to text_poke_smp(), we completely avoid stop_machine() here,
637 * and achieve the synchronization using int3 breakpoint.
638 *
639 * The way it is done:
640 * - add a int3 trap to the address that will be patched
641 * - sync cores
642 * - update all but the first byte of the patched range
643 * - sync cores
644 * - replace the first byte (int3) by the first byte of
645 * replacing opcode
646 * - sync cores
647 *
648 * Note: must be called under text_mutex.
649 */
650void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
651{
652 unsigned char int3 = 0xcc;
653
654 bp_int3_handler = handler;
655 bp_int3_addr = (u8 *)addr + sizeof(int3);
656 bp_patching_in_progress = true;
657 /*
658 * Corresponding read barrier in int3 notifier for
659 * making sure the in_progress flags is correctly ordered wrt.
660 * patching
661 */
662 smp_wmb();
663
664 text_poke(addr, &int3, sizeof(int3));
665
666 on_each_cpu(do_sync_core, NULL, 1);
667
668 if (len - sizeof(int3) > 0) {
669 /* patch all but the first byte */
670 text_poke((char *)addr + sizeof(int3),
671 (const char *) opcode + sizeof(int3),
672 len - sizeof(int3));
673 /*
674 * According to Intel, this core syncing is very likely
675 * not necessary and we'd be safe even without it. But
676 * better safe than sorry (plus there's not only Intel).
677 */
678 on_each_cpu(do_sync_core, NULL, 1);
679 }
680
681 /* patch the first byte */
682 text_poke(addr, opcode, sizeof(int3));
683
684 on_each_cpu(do_sync_core, NULL, 1);
685
686 bp_patching_in_progress = false;
687 smp_wmb();
688
689 return addr;
690}
691
692/* this one needs to run before anything else handles it as a
693 * regular exception */
694static struct notifier_block int3_nb = {
695 .priority = 0x7fffffff,
696 .notifier_call = int3_notify
697};
698
699static int __init int3_init(void)
700{
701 return register_die_notifier(&int3_nb);
702}
703
704arch_initcall(int3_init);
599/* 705/*
600 * Cross-modifying kernel text with stop_machine(). 706 * Cross-modifying kernel text with stop_machine().
601 * This code originally comes from immediate value. 707 * This code originally comes from immediate value.