diff options
author | Borislav Petkov <bp@suse.de> | 2013-10-31 12:25:08 -0400 |
---|---|---|
committer | Matt Fleming <matt.fleming@intel.com> | 2013-11-02 07:09:36 -0400 |
commit | d2f7cbe7b26a74dbbbf8f325b2a6fd01bc34032c (patch) | |
tree | 69d2c1f92120fa532c77cf8634418b7622d55692 | |
parent | 82f0712ca0f947170e785300b5c39d9c25e2f6ff (diff) |
x86/efi: Runtime services virtual mapping
We map the EFI regions needed for runtime services non-contiguously,
with preserved alignment on virtual addresses starting from -4G down
for a total max space of 64G. This way, we provide for stable runtime
services addresses across kernels so that a kexec'd kernel can still use
them.
Thus, they're mapped in a separate pagetable so that we don't pollute
the kernel namespace.
Add an efi= kernel command line parameter for passing miscellaneous
options and chicken bits from the command line.
While at it, add a chicken bit called "efi=old_map" which can be used as
a fallback to the old runtime services mapping method in case there's
some b0rkage with a particular EFI implementation (haha, it is hard to
hold up the sarcasm here...).
Also, add the UEFI RT VA space to Documentation/x86/x86_64/mm.txt.
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/efi.h | 64 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_types.h | 3 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi.c | 94 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_32.c | 9 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_64.c | 109 | ||||
-rw-r--r-- | arch/x86/platform/efi/efi_stub_64.S | 54 | ||||
-rw-r--r-- | include/linux/efi.h | 1 |
9 files changed, 300 insertions, 47 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7a0f202d482e..ed43e92b0e7e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -835,6 +835,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
835 | edd= [EDD] | 835 | edd= [EDD] |
836 | Format: {"off" | "on" | "skip[mbr]"} | 836 | Format: {"off" | "on" | "skip[mbr]"} |
837 | 837 | ||
838 | efi= [EFI] | ||
839 | Format: { "old_map" } | ||
840 | old_map [X86-64]: switch to the old ioremap-based EFI | ||
841 | runtime services mapping. 32-bit still uses this one by | ||
842 | default. | ||
843 | |||
838 | efi_no_storage_paranoia [EFI; X86] | 844 | efi_no_storage_paranoia [EFI; X86] |
839 | Using this parameter you can use more than 50% of | 845 | Using this parameter you can use more than 50% of |
840 | your efi variable storage. Use this parameter only if | 846 | your efi variable storage. Use this parameter only if |
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 881582f75c9c..c584a51add15 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -28,4 +28,11 @@ reference. | |||
28 | Current X86-64 implementations only support 40 bits of address space, | 28 | Current X86-64 implementations only support 40 bits of address space, |
29 | but we support up to 46 bits. This expands into MBZ space in the page tables. | 29 | but we support up to 46 bits. This expands into MBZ space in the page tables. |
30 | 30 | ||
31 | ->trampoline_pgd: | ||
32 | |||
33 | We map EFI runtime services in the aforementioned PGD in the virtual | ||
34 | range of 64Gb (arbitrarily set, can be raised if needed) | ||
35 | |||
36 | 0xffffffef00000000 - 0xffffffff00000000 | ||
37 | |||
31 | -Andi Kleen, Jul 2004 | 38 | -Andi Kleen, Jul 2004 |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..89a05b0507b9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -1,6 +1,24 @@ | |||
1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
3 | 3 | ||
4 | /* | ||
5 | * We map the EFI regions needed for runtime services non-contiguously, | ||
6 | * with preserved alignment on virtual addresses starting from -4G down | ||
7 | * for a total max space of 64G. This way, we provide for stable runtime | ||
8 | * services addresses across kernels so that a kexec'd kernel can still | ||
9 | * use them. | ||
10 | * | ||
11 | * This is the main reason why we're doing stable VA mappings for RT | ||
12 | * services. | ||
13 | * | ||
14 | * This flag is used in conjuction with a chicken bit called | ||
15 | * "efi=old_map" which can be used as a fallback to the old runtime | ||
16 | * services mapping method in case there's some b0rkage with a | ||
17 | * particular EFI implementation (haha, it is hard to hold up the | ||
18 | * sarcasm here...). | ||
19 | */ | ||
20 | #define EFI_OLD_MEMMAP EFI_ARCH_1 | ||
21 | |||
4 | #ifdef CONFIG_X86_32 | 22 | #ifdef CONFIG_X86_32 |
5 | 23 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | 24 | #define EFI_LOADER_SIGNATURE "EL32" |
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
69 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ | 87 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ |
70 | (u64)(a4), (u64)(a5), (u64)(a6)) | 88 | (u64)(a4), (u64)(a5), (u64)(a6)) |
71 | 89 | ||
90 | #define _efi_call_virtX(x, f, ...) \ | ||
91 | ({ \ | ||
92 | efi_status_t __s; \ | ||
93 | \ | ||
94 | efi_sync_low_kernel_mappings(); \ | ||
95 | preempt_disable(); \ | ||
96 | __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ | ||
97 | preempt_enable(); \ | ||
98 | __s; \ | ||
99 | }) | ||
100 | |||
72 | #define efi_call_virt0(f) \ | 101 | #define efi_call_virt0(f) \ |
73 | efi_call0((efi.systab->runtime->f)) | 102 | _efi_call_virtX(0, f) |
74 | #define efi_call_virt1(f, a1) \ | 103 | #define efi_call_virt1(f, a1) \ |
75 | efi_call1((efi.systab->runtime->f), (u64)(a1)) | 104 | _efi_call_virtX(1, f, (u64)(a1)) |
76 | #define efi_call_virt2(f, a1, a2) \ | 105 | #define efi_call_virt2(f, a1, a2) \ |
77 | efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) | 106 | _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) |
78 | #define efi_call_virt3(f, a1, a2, a3) \ | 107 | #define efi_call_virt3(f, a1, a2, a3) \ |
79 | efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 108 | _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) |
80 | (u64)(a3)) | 109 | #define efi_call_virt4(f, a1, a2, a3, a4) \ |
81 | #define efi_call_virt4(f, a1, a2, a3, a4) \ | 110 | _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) |
82 | efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 111 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ |
83 | (u64)(a3), (u64)(a4)) | 112 | _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) |
84 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ | 113 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
85 | efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 114 | _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | (u64)(a3), (u64)(a4), (u64)(a5)) | ||
87 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | ||
88 | efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | ||
89 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | ||
90 | 115 | ||
91 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 116 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
92 | u32 type, u64 attribute); | 117 | u32 type, u64 attribute); |
@@ -95,12 +120,17 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
95 | 120 | ||
96 | extern int add_efi_memmap; | 121 | extern int add_efi_memmap; |
97 | extern unsigned long x86_efi_facility; | 122 | extern unsigned long x86_efi_facility; |
123 | extern struct efi_scratch efi_scratch; | ||
98 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 124 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); |
99 | extern int efi_memblock_x86_reserve_range(void); | 125 | extern int efi_memblock_x86_reserve_range(void); |
100 | extern void efi_call_phys_prelog(void); | 126 | extern void efi_call_phys_prelog(void); |
101 | extern void efi_call_phys_epilog(void); | 127 | extern void efi_call_phys_epilog(void); |
102 | extern void efi_unmap_memmap(void); | 128 | extern void efi_unmap_memmap(void); |
103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 129 | extern void efi_memory_uc(u64 addr, unsigned long size); |
130 | extern void __init efi_map_region(efi_memory_desc_t *md); | ||
131 | extern void efi_sync_low_kernel_mappings(void); | ||
132 | extern void efi_setup_page_tables(void); | ||
133 | extern void __init old_map_region(efi_memory_desc_t *md); | ||
104 | 134 | ||
105 | #ifdef CONFIG_EFI | 135 | #ifdef CONFIG_EFI |
106 | 136 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index f4843e031131..028e28b6fc2c 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -379,7 +379,8 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
379 | */ | 379 | */ |
380 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 380 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
381 | extern phys_addr_t slow_virt_to_phys(void *__address); | 381 | extern phys_addr_t slow_virt_to_phys(void *__address); |
382 | 382 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |
383 | unsigned numpages, unsigned long page_flags); | ||
383 | #endif /* !__ASSEMBLY__ */ | 384 | #endif /* !__ASSEMBLY__ */ |
384 | 385 | ||
385 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ | 386 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index f396163b0402..b453069236fd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
15 | * Copyright (C) 2013 SuSE Labs | ||
16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
15 | * | 17 | * |
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
@@ -745,21 +747,56 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
745 | set_memory_uc(addr, npages); | 747 | set_memory_uc(addr, npages); |
746 | } | 748 | } |
747 | 749 | ||
750 | void __init old_map_region(efi_memory_desc_t *md) | ||
751 | { | ||
752 | u64 start_pfn, end_pfn, end; | ||
753 | unsigned long size; | ||
754 | void *va; | ||
755 | |||
756 | start_pfn = PFN_DOWN(md->phys_addr); | ||
757 | size = md->num_pages << PAGE_SHIFT; | ||
758 | end = md->phys_addr + size; | ||
759 | end_pfn = PFN_UP(end); | ||
760 | |||
761 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | ||
762 | va = __va(md->phys_addr); | ||
763 | |||
764 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
765 | efi_memory_uc((u64)(unsigned long)va, size); | ||
766 | } else | ||
767 | va = efi_ioremap(md->phys_addr, size, | ||
768 | md->type, md->attribute); | ||
769 | |||
770 | md->virt_addr = (u64) (unsigned long) va; | ||
771 | if (!va) | ||
772 | pr_err("ioremap of 0x%llX failed!\n", | ||
773 | (unsigned long long)md->phys_addr); | ||
774 | } | ||
775 | |||
748 | /* | 776 | /* |
749 | * This function will switch the EFI runtime services to virtual mode. | 777 | * This function will switch the EFI runtime services to virtual mode. |
750 | * Essentially, look through the EFI memmap and map every region that | 778 | * Essentially, we look through the EFI memmap and map every region that |
751 | * has the runtime attribute bit set in its memory descriptor and update | 779 | * has the runtime attribute bit set in its memory descriptor into the |
752 | * that memory descriptor with the virtual address obtained from ioremap(). | 780 | * ->trampoline_pgd page table using a top-down VA allocation scheme. |
753 | * This enables the runtime services to be called without having to | 781 | * |
782 | * The old method which used to update that memory descriptor with the | ||
783 | * virtual address obtained from ioremap() is still supported when the | ||
784 | * kernel is booted with efi=old_map on its command line. Same old | ||
785 | * method enabled the runtime services to be called without having to | ||
754 | * thunk back into physical mode for every invocation. | 786 | * thunk back into physical mode for every invocation. |
787 | * | ||
788 | * The new method does a pagetable switch in a preemption-safe manner | ||
789 | * so that we're in a different address space when calling a runtime | ||
790 | * function. For function arguments passing we do copy the PGDs of the | ||
791 | * kernel page table into ->trampoline_pgd prior to each call. | ||
755 | */ | 792 | */ |
756 | void __init efi_enter_virtual_mode(void) | 793 | void __init efi_enter_virtual_mode(void) |
757 | { | 794 | { |
758 | efi_memory_desc_t *md, *prev_md = NULL; | 795 | efi_memory_desc_t *md, *prev_md = NULL; |
759 | efi_status_t status; | 796 | void *p, *new_memmap = NULL; |
760 | unsigned long size; | 797 | unsigned long size; |
761 | u64 end, systab, start_pfn, end_pfn; | 798 | efi_status_t status; |
762 | void *p, *va, *new_memmap = NULL; | 799 | u64 end, systab; |
763 | int count = 0; | 800 | int count = 0; |
764 | 801 | ||
765 | efi.systab = NULL; | 802 | efi.systab = NULL; |
@@ -768,7 +805,6 @@ void __init efi_enter_virtual_mode(void) | |||
768 | * We don't do virtual mode, since we don't do runtime services, on | 805 | * We don't do virtual mode, since we don't do runtime services, on |
769 | * non-native EFI | 806 | * non-native EFI |
770 | */ | 807 | */ |
771 | |||
772 | if (!efi_is_native()) { | 808 | if (!efi_is_native()) { |
773 | efi_unmap_memmap(); | 809 | efi_unmap_memmap(); |
774 | return; | 810 | return; |
@@ -799,6 +835,7 @@ void __init efi_enter_virtual_mode(void) | |||
799 | continue; | 835 | continue; |
800 | } | 836 | } |
801 | prev_md = md; | 837 | prev_md = md; |
838 | |||
802 | } | 839 | } |
803 | 840 | ||
804 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 841 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
@@ -808,33 +845,18 @@ void __init efi_enter_virtual_mode(void) | |||
808 | md->type != EFI_BOOT_SERVICES_DATA) | 845 | md->type != EFI_BOOT_SERVICES_DATA) |
809 | continue; | 846 | continue; |
810 | 847 | ||
848 | efi_map_region(md); | ||
849 | |||
811 | size = md->num_pages << EFI_PAGE_SHIFT; | 850 | size = md->num_pages << EFI_PAGE_SHIFT; |
812 | end = md->phys_addr + size; | 851 | end = md->phys_addr + size; |
813 | 852 | ||
814 | start_pfn = PFN_DOWN(md->phys_addr); | ||
815 | end_pfn = PFN_UP(end); | ||
816 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | ||
817 | va = __va(md->phys_addr); | ||
818 | |||
819 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
820 | efi_memory_uc((u64)(unsigned long)va, size); | ||
821 | } else | ||
822 | va = efi_ioremap(md->phys_addr, size, | ||
823 | md->type, md->attribute); | ||
824 | |||
825 | md->virt_addr = (u64) (unsigned long) va; | ||
826 | |||
827 | if (!va) { | ||
828 | pr_err("ioremap of 0x%llX failed!\n", | ||
829 | (unsigned long long)md->phys_addr); | ||
830 | continue; | ||
831 | } | ||
832 | |||
833 | systab = (u64) (unsigned long) efi_phys.systab; | 853 | systab = (u64) (unsigned long) efi_phys.systab; |
834 | if (md->phys_addr <= systab && systab < end) { | 854 | if (md->phys_addr <= systab && systab < end) { |
835 | systab += md->virt_addr - md->phys_addr; | 855 | systab += md->virt_addr - md->phys_addr; |
856 | |||
836 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | 857 | efi.systab = (efi_system_table_t *) (unsigned long) systab; |
837 | } | 858 | } |
859 | |||
838 | new_memmap = krealloc(new_memmap, | 860 | new_memmap = krealloc(new_memmap, |
839 | (count + 1) * memmap.desc_size, | 861 | (count + 1) * memmap.desc_size, |
840 | GFP_KERNEL); | 862 | GFP_KERNEL); |
@@ -845,6 +867,9 @@ void __init efi_enter_virtual_mode(void) | |||
845 | 867 | ||
846 | BUG_ON(!efi.systab); | 868 | BUG_ON(!efi.systab); |
847 | 869 | ||
870 | efi_setup_page_tables(); | ||
871 | efi_sync_low_kernel_mappings(); | ||
872 | |||
848 | status = phys_efi_set_virtual_address_map( | 873 | status = phys_efi_set_virtual_address_map( |
849 | memmap.desc_size * count, | 874 | memmap.desc_size * count, |
850 | memmap.desc_size, | 875 | memmap.desc_size, |
@@ -877,7 +902,8 @@ void __init efi_enter_virtual_mode(void) | |||
877 | efi.query_variable_info = virt_efi_query_variable_info; | 902 | efi.query_variable_info = virt_efi_query_variable_info; |
878 | efi.update_capsule = virt_efi_update_capsule; | 903 | efi.update_capsule = virt_efi_update_capsule; |
879 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | 904 | efi.query_capsule_caps = virt_efi_query_capsule_caps; |
880 | if (__supported_pte_mask & _PAGE_NX) | 905 | |
906 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
881 | runtime_code_page_mkexec(); | 907 | runtime_code_page_mkexec(); |
882 | 908 | ||
883 | kfree(new_memmap); | 909 | kfree(new_memmap); |
@@ -1007,3 +1033,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1007 | return EFI_SUCCESS; | 1033 | return EFI_SUCCESS; |
1008 | } | 1034 | } |
1009 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1035 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
1036 | |||
1037 | static int __init parse_efi_cmdline(char *str) | ||
1038 | { | ||
1039 | if (*str == '=') | ||
1040 | str++; | ||
1041 | |||
1042 | if (!strncmp(str, "old_map", 7)) | ||
1043 | set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); | ||
1044 | |||
1045 | return 0; | ||
1046 | } | ||
1047 | early_param("efi", parse_efi_cmdline); | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..e94557cf5487 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -37,9 +37,16 @@ | |||
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
40 | |||
41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
42 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | ||
43 | void efi_setup_page_tables(void) {} | ||
44 | |||
45 | void __init efi_map_region(efi_memory_desc_t *md) | ||
46 | { | ||
47 | old_map_region(md); | ||
48 | } | ||
49 | |||
43 | void efi_call_phys_prelog(void) | 50 | void efi_call_phys_prelog(void) |
44 | { | 51 | { |
45 | struct desc_ptr gdt_descr; | 52 | struct desc_ptr gdt_descr; |
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..bf286c386d33 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -38,10 +38,28 @@ | |||
38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
41 | #include <asm/realmode.h> | ||
41 | 42 | ||
42 | static pgd_t *save_pgd __initdata; | 43 | static pgd_t *save_pgd __initdata; |
43 | static unsigned long efi_flags __initdata; | 44 | static unsigned long efi_flags __initdata; |
44 | 45 | ||
46 | /* | ||
47 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
48 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
49 | */ | ||
50 | static u64 efi_va = -4 * (1UL << 30); | ||
51 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
52 | |||
53 | /* | ||
54 | * Scratch space used for switching the pagetable in the EFI stub | ||
55 | */ | ||
56 | struct efi_scratch { | ||
57 | u64 r15; | ||
58 | u64 prev_cr3; | ||
59 | pgd_t *efi_pgt; | ||
60 | bool use_pgd; | ||
61 | }; | ||
62 | |||
45 | static void __init early_code_mapping_set_exec(int executable) | 63 | static void __init early_code_mapping_set_exec(int executable) |
46 | { | 64 | { |
47 | efi_memory_desc_t *md; | 65 | efi_memory_desc_t *md; |
@@ -65,6 +83,9 @@ void __init efi_call_phys_prelog(void) | |||
65 | int pgd; | 83 | int pgd; |
66 | int n_pgds; | 84 | int n_pgds; |
67 | 85 | ||
86 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
87 | return; | ||
88 | |||
68 | early_code_mapping_set_exec(1); | 89 | early_code_mapping_set_exec(1); |
69 | local_irq_save(efi_flags); | 90 | local_irq_save(efi_flags); |
70 | 91 | ||
@@ -86,6 +107,10 @@ void __init efi_call_phys_epilog(void) | |||
86 | */ | 107 | */ |
87 | int pgd; | 108 | int pgd; |
88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 109 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
110 | |||
111 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
112 | return; | ||
113 | |||
89 | for (pgd = 0; pgd < n_pgds; pgd++) | 114 | for (pgd = 0; pgd < n_pgds; pgd++) |
90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 115 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
91 | kfree(save_pgd); | 116 | kfree(save_pgd); |
@@ -94,6 +119,90 @@ void __init efi_call_phys_epilog(void) | |||
94 | early_code_mapping_set_exec(0); | 119 | early_code_mapping_set_exec(0); |
95 | } | 120 | } |
96 | 121 | ||
122 | /* | ||
123 | * Add low kernel mappings for passing arguments to EFI functions. | ||
124 | */ | ||
125 | void efi_sync_low_kernel_mappings(void) | ||
126 | { | ||
127 | unsigned num_pgds; | ||
128 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
129 | |||
130 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
131 | return; | ||
132 | |||
133 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
134 | |||
135 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
136 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
137 | sizeof(pgd_t) * num_pgds); | ||
138 | } | ||
139 | |||
140 | void efi_setup_page_tables(void) | ||
141 | { | ||
142 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
143 | |||
144 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
145 | efi_scratch.use_pgd = true; | ||
146 | } | ||
147 | |||
148 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
149 | { | ||
150 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
151 | unsigned long pf = 0, size; | ||
152 | u64 end; | ||
153 | |||
154 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
155 | pf |= _PAGE_PCD; | ||
156 | |||
157 | size = md->num_pages << PAGE_SHIFT; | ||
158 | end = va + size; | ||
159 | |||
160 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
161 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
162 | md->phys_addr, va); | ||
163 | } | ||
164 | |||
165 | void __init efi_map_region(efi_memory_desc_t *md) | ||
166 | { | ||
167 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
168 | u64 pa = md->phys_addr; | ||
169 | |||
170 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
171 | return old_map_region(md); | ||
172 | |||
173 | /* | ||
174 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
175 | * firmware which doesn't update all internal pointers after switching | ||
176 | * to virtual mode and would otherwise crap on us. | ||
177 | */ | ||
178 | __map_region(md, md->phys_addr); | ||
179 | |||
180 | efi_va -= size; | ||
181 | |||
182 | /* Is PA 2M-aligned? */ | ||
183 | if (!(pa & (PMD_SIZE - 1))) { | ||
184 | efi_va &= PMD_MASK; | ||
185 | } else { | ||
186 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
187 | u64 prev_va = efi_va; | ||
188 | |||
189 | /* get us the same offset within this 2M page */ | ||
190 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
191 | |||
192 | if (efi_va > prev_va) | ||
193 | efi_va -= PMD_SIZE; | ||
194 | } | ||
195 | |||
196 | if (efi_va < EFI_VA_END) { | ||
197 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
198 | return; | ||
199 | } | ||
200 | |||
201 | /* Do the VA map */ | ||
202 | __map_region(md, efi_va); | ||
203 | md->virt_addr = efi_va; | ||
204 | } | ||
205 | |||
97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 206 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
98 | u32 type, u64 attribute) | 207 | u32 type, u64 attribute) |
99 | { | 208 | { |
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..88073b140298 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -34,10 +34,47 @@ | |||
34 | mov %rsi, %cr0; \ | 34 | mov %rsi, %cr0; \ |
35 | mov (%rsp), %rsp | 35 | mov (%rsp), %rsp |
36 | 36 | ||
37 | /* stolen from gcc */ | ||
38 | .macro FLUSH_TLB_ALL | ||
39 | movq %r15, efi_scratch(%rip) | ||
40 | movq %r14, efi_scratch+8(%rip) | ||
41 | movq %cr4, %r15 | ||
42 | movq %r15, %r14 | ||
43 | andb $0x7f, %r14b | ||
44 | movq %r14, %cr4 | ||
45 | movq %r15, %cr4 | ||
46 | movq efi_scratch+8(%rip), %r14 | ||
47 | movq efi_scratch(%rip), %r15 | ||
48 | .endm | ||
49 | |||
50 | .macro SWITCH_PGT | ||
51 | cmpb $0, efi_scratch+24(%rip) | ||
52 | je 1f | ||
53 | movq %r15, efi_scratch(%rip) # r15 | ||
54 | # save previous CR3 | ||
55 | movq %cr3, %r15 | ||
56 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
57 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
58 | movq %r15, %cr3 | ||
59 | 1: | ||
60 | .endm | ||
61 | |||
62 | .macro RESTORE_PGT | ||
63 | cmpb $0, efi_scratch+24(%rip) | ||
64 | je 2f | ||
65 | movq efi_scratch+8(%rip), %r15 | ||
66 | movq %r15, %cr3 | ||
67 | movq efi_scratch(%rip), %r15 | ||
68 | FLUSH_TLB_ALL | ||
69 | 2: | ||
70 | .endm | ||
71 | |||
37 | ENTRY(efi_call0) | 72 | ENTRY(efi_call0) |
38 | SAVE_XMM | 73 | SAVE_XMM |
39 | subq $32, %rsp | 74 | subq $32, %rsp |
75 | SWITCH_PGT | ||
40 | call *%rdi | 76 | call *%rdi |
77 | RESTORE_PGT | ||
41 | addq $32, %rsp | 78 | addq $32, %rsp |
42 | RESTORE_XMM | 79 | RESTORE_XMM |
43 | ret | 80 | ret |
@@ -47,7 +84,9 @@ ENTRY(efi_call1) | |||
47 | SAVE_XMM | 84 | SAVE_XMM |
48 | subq $32, %rsp | 85 | subq $32, %rsp |
49 | mov %rsi, %rcx | 86 | mov %rsi, %rcx |
87 | SWITCH_PGT | ||
50 | call *%rdi | 88 | call *%rdi |
89 | RESTORE_PGT | ||
51 | addq $32, %rsp | 90 | addq $32, %rsp |
52 | RESTORE_XMM | 91 | RESTORE_XMM |
53 | ret | 92 | ret |
@@ -57,7 +96,9 @@ ENTRY(efi_call2) | |||
57 | SAVE_XMM | 96 | SAVE_XMM |
58 | subq $32, %rsp | 97 | subq $32, %rsp |
59 | mov %rsi, %rcx | 98 | mov %rsi, %rcx |
99 | SWITCH_PGT | ||
60 | call *%rdi | 100 | call *%rdi |
101 | RESTORE_PGT | ||
61 | addq $32, %rsp | 102 | addq $32, %rsp |
62 | RESTORE_XMM | 103 | RESTORE_XMM |
63 | ret | 104 | ret |
@@ -68,7 +109,9 @@ ENTRY(efi_call3) | |||
68 | subq $32, %rsp | 109 | subq $32, %rsp |
69 | mov %rcx, %r8 | 110 | mov %rcx, %r8 |
70 | mov %rsi, %rcx | 111 | mov %rsi, %rcx |
112 | SWITCH_PGT | ||
71 | call *%rdi | 113 | call *%rdi |
114 | RESTORE_PGT | ||
72 | addq $32, %rsp | 115 | addq $32, %rsp |
73 | RESTORE_XMM | 116 | RESTORE_XMM |
74 | ret | 117 | ret |
@@ -80,7 +123,9 @@ ENTRY(efi_call4) | |||
80 | mov %r8, %r9 | 123 | mov %r8, %r9 |
81 | mov %rcx, %r8 | 124 | mov %rcx, %r8 |
82 | mov %rsi, %rcx | 125 | mov %rsi, %rcx |
126 | SWITCH_PGT | ||
83 | call *%rdi | 127 | call *%rdi |
128 | RESTORE_PGT | ||
84 | addq $32, %rsp | 129 | addq $32, %rsp |
85 | RESTORE_XMM | 130 | RESTORE_XMM |
86 | ret | 131 | ret |
@@ -93,7 +138,9 @@ ENTRY(efi_call5) | |||
93 | mov %r8, %r9 | 138 | mov %r8, %r9 |
94 | mov %rcx, %r8 | 139 | mov %rcx, %r8 |
95 | mov %rsi, %rcx | 140 | mov %rsi, %rcx |
141 | SWITCH_PGT | ||
96 | call *%rdi | 142 | call *%rdi |
143 | RESTORE_PGT | ||
97 | addq $48, %rsp | 144 | addq $48, %rsp |
98 | RESTORE_XMM | 145 | RESTORE_XMM |
99 | ret | 146 | ret |
@@ -109,8 +156,15 @@ ENTRY(efi_call6) | |||
109 | mov %r8, %r9 | 156 | mov %r8, %r9 |
110 | mov %rcx, %r8 | 157 | mov %rcx, %r8 |
111 | mov %rsi, %rcx | 158 | mov %rsi, %rcx |
159 | SWITCH_PGT | ||
112 | call *%rdi | 160 | call *%rdi |
161 | RESTORE_PGT | ||
113 | addq $48, %rsp | 162 | addq $48, %rsp |
114 | RESTORE_XMM | 163 | RESTORE_XMM |
115 | ret | 164 | ret |
116 | ENDPROC(efi_call6) | 165 | ENDPROC(efi_call6) |
166 | |||
167 | .data | ||
168 | ENTRY(efi_scratch) | ||
169 | .fill 3,8,0 | ||
170 | .byte 0 | ||
diff --git a/include/linux/efi.h b/include/linux/efi.h index bc5687d0f315..6c0ca528300c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h | |||
@@ -653,6 +653,7 @@ extern int __init efi_setup_pcdp_console(char *); | |||
653 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ | 653 | #define EFI_RUNTIME_SERVICES 3 /* Can we use runtime services? */ |
654 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ | 654 | #define EFI_MEMMAP 4 /* Can we use EFI memory map? */ |
655 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ | 655 | #define EFI_64BIT 5 /* Is the firmware 64-bit? */ |
656 | #define EFI_ARCH_1 6 /* First arch-specific bit */ | ||
656 | 657 | ||
657 | #ifdef CONFIG_EFI | 658 | #ifdef CONFIG_EFI |
658 | # ifdef CONFIG_X86 | 659 | # ifdef CONFIG_X86 |