aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Amit <namit@vmware.com>2019-04-25 20:11:27 -0400
committerIngo Molnar <mingo@kernel.org>2019-04-30 06:37:52 -0400
commitb3fd8e83ada0d51b71a84297480187e2d40e5ded (patch)
treef6edb618afdbc8b44052ebdb697c3b95e86c65bc
parent4fc19708b165c1c152fa1f12f6600e66184b7786 (diff)
x86/alternatives: Use temporary mm for text poking
text_poke() can potentially compromise security as it sets temporary PTEs in the fixmap. These PTEs might be used to rewrite the kernel code from other cores accidentally or maliciously, if an attacker gains the ability to write onto kernel memory. Moreover, since remote TLBs are not flushed after the temporary PTEs are removed, the time-window in which the code is writable is not limited if the fixmap PTEs - maliciously or accidentally - are cached in the TLB. To address these potential security hazards, use a temporary mm for patching the code. Finally, text_poke() is also not conservative enough when mapping pages, as it always tries to map 2 pages, even when a single one is sufficient. So try to be more conservative, and do not map more than needed. Signed-off-by: Nadav Amit <namit@vmware.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: <akpm@linux-foundation.org> Cc: <ard.biesheuvel@linaro.org> Cc: <deneen.t.dock@intel.com> Cc: <kernel-hardening@lists.openwall.com> Cc: <kristen@linux.intel.com> Cc: <linux_dti@icloud.com> Cc: <will.deacon@arm.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/20190426001143.4983-8-namit@vmware.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/kernel/alternative.c108
-rw-r--r--arch/x86/xen/mmu_pv.c2
3 files changed, 86 insertions, 26 deletions
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37..9da8cccdf3fb 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -103,8 +103,6 @@ enum fixed_addresses {
103#ifdef CONFIG_PARAVIRT 103#ifdef CONFIG_PARAVIRT
104 FIX_PARAVIRT_BOOTMAP, 104 FIX_PARAVIRT_BOOTMAP,
105#endif 105#endif
106 FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
107 FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
108#ifdef CONFIG_X86_INTEL_MID 106#ifdef CONFIG_X86_INTEL_MID
109 FIX_LNW_VRTC, 107 FIX_LNW_VRTC,
110#endif 108#endif
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 11d5c710a94f..599203876c32 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -12,6 +12,7 @@
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/kdebug.h> 13#include <linux/kdebug.h>
14#include <linux/kprobes.h> 14#include <linux/kprobes.h>
15#include <linux/mmu_context.h>
15#include <asm/text-patching.h> 16#include <asm/text-patching.h>
16#include <asm/alternative.h> 17#include <asm/alternative.h>
17#include <asm/sections.h> 18#include <asm/sections.h>
@@ -684,41 +685,104 @@ __ro_after_init unsigned long poking_addr;
684 685
685static void *__text_poke(void *addr, const void *opcode, size_t len) 686static void *__text_poke(void *addr, const void *opcode, size_t len)
686{ 687{
688 bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
689 struct page *pages[2] = {NULL};
690 temp_mm_state_t prev;
687 unsigned long flags; 691 unsigned long flags;
688 char *vaddr; 692 pte_t pte, *ptep;
689 struct page *pages[2]; 693 spinlock_t *ptl;
690 int i; 694 pgprot_t pgprot;
691 695
692 /* 696 /*
693 * While boot memory allocator is runnig we cannot use struct 697 * While boot memory allocator is running we cannot use struct pages as
694 * pages as they are not yet initialized. 698 * they are not yet initialized. There is no way to recover.
695 */ 699 */
696 BUG_ON(!after_bootmem); 700 BUG_ON(!after_bootmem);
697 701
698 if (!core_kernel_text((unsigned long)addr)) { 702 if (!core_kernel_text((unsigned long)addr)) {
699 pages[0] = vmalloc_to_page(addr); 703 pages[0] = vmalloc_to_page(addr);
700 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 704 if (cross_page_boundary)
705 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
701 } else { 706 } else {
702 pages[0] = virt_to_page(addr); 707 pages[0] = virt_to_page(addr);
703 WARN_ON(!PageReserved(pages[0])); 708 WARN_ON(!PageReserved(pages[0]));
704 pages[1] = virt_to_page(addr + PAGE_SIZE); 709 if (cross_page_boundary)
710 pages[1] = virt_to_page(addr + PAGE_SIZE);
705 } 711 }
706 BUG_ON(!pages[0]); 712 /*
713 * If something went wrong, crash and burn since recovery paths are not
714 * implemented.
715 */
716 BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
717
707 local_irq_save(flags); 718 local_irq_save(flags);
708 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); 719
709 if (pages[1]) 720 /*
710 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); 721 * Map the page without the global bit, as TLB flushing is done with
711 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); 722 * flush_tlb_mm_range(), which is intended for non-global PTEs.
712 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); 723 */
713 clear_fixmap(FIX_TEXT_POKE0); 724 pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
714 if (pages[1]) 725
715 clear_fixmap(FIX_TEXT_POKE1); 726 /*
716 local_flush_tlb(); 727 * The lock is not really needed, but this allows to avoid open-coding.
717 sync_core(); 728 */
718 /* Could also do a CLFLUSH here to speed up CPU recovery; but 729 ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
719 that causes hangs on some VIA CPUs. */ 730
720 for (i = 0; i < len; i++) 731 /*
721 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); 732 * This must not fail; preallocated in poking_init().
733 */
734 VM_BUG_ON(!ptep);
735
736 pte = mk_pte(pages[0], pgprot);
737 set_pte_at(poking_mm, poking_addr, ptep, pte);
738
739 if (cross_page_boundary) {
740 pte = mk_pte(pages[1], pgprot);
741 set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
742 }
743
744 /*
745 * Loading the temporary mm behaves as a compiler barrier, which
746 * guarantees that the PTE will be set at the time memcpy() is done.
747 */
748 prev = use_temporary_mm(poking_mm);
749
750 kasan_disable_current();
751 memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
752 kasan_enable_current();
753
754 /*
755 * Ensure that the PTE is only cleared after the instructions of memcpy
756 * were issued by using a compiler barrier.
757 */
758 barrier();
759
760 pte_clear(poking_mm, poking_addr, ptep);
761 if (cross_page_boundary)
762 pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
763
764 /*
765 * Loading the previous page-table hierarchy requires a serializing
766 * instruction that already allows the core to see the updated version.
767 * Xen-PV is assumed to serialize execution in a similar manner.
768 */
769 unuse_temporary_mm(prev);
770
771 /*
772 * Flushing the TLB might involve IPIs, which would require enabled
773 * IRQs, but not if the mm is not used, as it is in this point.
774 */
775 flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
776 (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
777 PAGE_SHIFT, false);
778
779 /*
780 * If the text does not match what we just wrote then something is
781 * fundamentally screwy; there's nothing we can really do about that.
782 */
783 BUG_ON(memcmp(addr, opcode, len));
784
785 pte_unmap_unlock(ptep, ptl);
722 local_irq_restore(flags); 786 local_irq_restore(flags);
723 return addr; 787 return addr;
724} 788}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a21e1734fc1f..beb44e22afdf 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2318,8 +2318,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2318#elif defined(CONFIG_X86_VSYSCALL_EMULATION) 2318#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
2319 case VSYSCALL_PAGE: 2319 case VSYSCALL_PAGE:
2320#endif 2320#endif
2321 case FIX_TEXT_POKE0:
2322 case FIX_TEXT_POKE1:
2323 /* All local page mappings */ 2321 /* All local page mappings */
2324 pte = pfn_pte(phys, prot); 2322 pte = pfn_pte(phys, prot);
2325 break; 2323 break;