aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Fleming <matt.fleming@intel.com>2015-09-25 18:02:18 -0400
committerIngo Molnar <mingo@kernel.org>2015-10-01 06:51:28 -0400
commita5caa209ba9c29c6421292e7879d2387a2ef39c9 (patch)
tree26ce6df7867c604e2d30b2f6d28867133c2edb49
parentdd36d7393d6310b0c1adefb22fba79c3cf8a577c (diff)
x86/efi: Fix boot crash by mapping EFI memmap entries bottom-up at runtime, instead of top-down
Beginning with UEFI v2.5 EFI_PROPERTIES_TABLE was introduced that signals that the firmware PE/COFF loader supports splitting code and data sections of PE/COFF images into separate EFI memory map entries. This allows the kernel to map those regions with strict memory protections, e.g. EFI_MEMORY_RO for code, EFI_MEMORY_XP for data, etc. Unfortunately, an unwritten requirement of this new feature is that the regions need to be mapped with the same offsets relative to each other as observed in the EFI memory map. If this is not done crashes like this may occur, BUG: unable to handle kernel paging request at fffffffefe6086dd IP: [<fffffffefe6086dd>] 0xfffffffefe6086dd Call Trace: [<ffffffff8104c90e>] efi_call+0x7e/0x100 [<ffffffff81602091>] ? virt_efi_set_variable+0x61/0x90 [<ffffffff8104c583>] efi_delete_dummy_variable+0x63/0x70 [<ffffffff81f4e4aa>] efi_enter_virtual_mode+0x383/0x392 [<ffffffff81f37e1b>] start_kernel+0x38a/0x417 [<ffffffff81f37495>] x86_64_start_reservations+0x2a/0x2c [<ffffffff81f37582>] x86_64_start_kernel+0xeb/0xef Here 0xfffffffefe6086dd refers to an address the firmware expects to be mapped but which the OS never claimed was mapped. The issue is that included in these regions are relative addresses to other regions which were emitted by the firmware toolchain before the "splitting" of sections occurred at runtime. Needless to say, we don't satisfy this unwritten requirement on x86_64 and instead map the EFI memory map entries in reverse order. The above crash is almost certainly triggerable with any kernel newer than v3.13 because that's when we rewrote the EFI runtime region mapping code, in commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping"). For kernel versions before v3.13 things may work by pure luck depending on the fragmentation of the kernel virtual address space at the time we map the EFI regions. Instead of mapping the EFI memory map entries in reverse order, where entry N has a higher virtual address than entry N+1, map them in the same order as they appear in the EFI memory map to preserve this relative offset between regions. This patch has been kept as small as possible with the intention that it should be applied aggressively to stable and distribution kernels. It is very much a bugfix rather than support for a new feature, since when EFI_PROPERTIES_TABLE is enabled we must map things as outlined above to even boot - we have no way of asking the firmware not to split the code/data regions. In fact, this patch doesn't even make use of the more strict memory protections available in UEFI v2.5. That will come later. Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Matt Fleming <matt.fleming@intel.com> Cc: <stable@vger.kernel.org> Cc: Borislav Petkov <bp@suse.de> Cc: Chun-Yi <jlee@suse.com> Cc: Dave Young <dyoung@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: James Bottomley <JBottomley@Odin.com> Cc: Lee, Chun-Yi <jlee@suse.com> Cc: Leif Lindholm <leif.lindholm@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matthew Garrett <mjg59@srcf.ucam.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Jones <pjones@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443218539-7610-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/platform/efi/efi.c67
1 files changed, 66 insertions, 1 deletions
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1db84c0758b7..6a28ded74211 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -705,6 +705,70 @@ out:
705} 705}
706 706
707/* 707/*
708 * Iterate the EFI memory map in reverse order because the regions
709 * will be mapped top-down. The end result is the same as if we had
710 * mapped things forward, but doesn't require us to change the
711 * existing implementation of efi_map_region().
712 */
713static inline void *efi_map_next_entry_reverse(void *entry)
714{
715 /* Initial call */
716 if (!entry)
717 return memmap.map_end - memmap.desc_size;
718
719 entry -= memmap.desc_size;
720 if (entry < memmap.map)
721 return NULL;
722
723 return entry;
724}
725
726/*
727 * efi_map_next_entry - Return the next EFI memory map descriptor
728 * @entry: Previous EFI memory map descriptor
729 *
730 * This is a helper function to iterate over the EFI memory map, which
731 * we do in different orders depending on the current configuration.
732 *
733 * To begin traversing the memory map @entry must be %NULL.
734 *
735 * Returns %NULL when we reach the end of the memory map.
736 */
737static void *efi_map_next_entry(void *entry)
738{
739 if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
740 /*
741 * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
742 * config table feature requires us to map all entries
743 * in the same order as they appear in the EFI memory
744 * map. That is to say, entry N must have a lower
745 * virtual address than entry N+1. This is because the
746 * firmware toolchain leaves relative references in
747 * the code/data sections, which are split and become
748 * separate EFI memory regions. Mapping things
749 * out-of-order leads to the firmware accessing
750 * unmapped addresses.
751 *
752 * Since we need to map things this way whether or not
753 * the kernel actually makes use of
754 * EFI_PROPERTIES_TABLE, let's just switch to this
755 * scheme by default for 64-bit.
756 */
757 return efi_map_next_entry_reverse(entry);
758 }
759
760 /* Initial call */
761 if (!entry)
762 return memmap.map;
763
764 entry += memmap.desc_size;
765 if (entry >= memmap.map_end)
766 return NULL;
767
768 return entry;
769}
770
771/*
708 * Map the efi memory ranges of the runtime services and update new_mmap with 772 * Map the efi memory ranges of the runtime services and update new_mmap with
709 * virtual addresses. 773 * virtual addresses.
710 */ 774 */
@@ -714,7 +778,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
714 unsigned long left = 0; 778 unsigned long left = 0;
715 efi_memory_desc_t *md; 779 efi_memory_desc_t *md;
716 780
717 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 781 p = NULL;
782 while ((p = efi_map_next_entry(p))) {
718 md = p; 783 md = p;
719 if (!(md->attribute & EFI_MEMORY_RUNTIME)) { 784 if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
720#ifdef CONFIG_X86_64 785#ifdef CONFIG_X86_64