aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/platform
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-20 21:58:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-20 21:58:18 -0400
commit24b5e20f11a75866bbffc46c30a22fa50612a769 (patch)
tree2dab5fc6714a5ad1e31bdea1e954fbd69704ce72 /arch/x86/platform
parent26660a4046b171a752e72a1dd32153230234fe3a (diff)
parentd367cef0a7f0c6ee86e997c0cb455b21b3c6b9ba (diff)
Merge branch 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull EFI updates from Ingo Molnar: "The main changes are: - Use separate EFI page tables when executing EFI firmware code. This isolates the EFI context from the rest of the kernel, which has security and general robustness advantages. (Matt Fleming) - Run regular UEFI firmware with interrupts enabled. This is already the status quo under other OSs. (Ard Biesheuvel) - Various x86 EFI enhancements, such as the use of non-executable attributes for EFI memory mappings. (Sai Praneeth Prakhya) - Various arm64 UEFI enhancements. (Ard Biesheuvel) - ... various fixes and cleanups. The separate EFI page tables feature got delayed twice already, because it's an intrusive change and we didn't feel confident about it - third time's the charm we hope!" * 'efi-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits) x86/mm/pat: Fix boot crash when 1GB pages are not supported by the CPU x86/efi: Only map kernel text for EFI mixed mode x86/efi: Map EFI_MEMORY_{XP,RO} memory region bits to EFI page tables x86/mm/pat: Don't implicitly allow _PAGE_RW in kernel_map_pages_in_pgd() efi/arm*: Perform hardware compatibility check efi/arm64: Check for h/w support before booting a >4 KB granular kernel efi/arm: Check for LPAE support before booting a LPAE kernel efi/arm-init: Use read-only early mappings efi/efistub: Prevent __init annotations from being used arm64/vmlinux.lds.S: Handle .init.rodata.xxx and .init.bss sections efi/arm64: Drop __init annotation from handle_kernel_image() x86/mm/pat: Use _PAGE_GLOBAL bit for EFI page table mappings efi/runtime-wrappers: Run UEFI Runtime Services with interrupts enabled efi: Reformat GUID tables to follow the format in UEFI spec efi: Add Persistent Memory type name efi: Add NV memory attribute x86/efi: Show actual ending addresses in efi_print_memmap x86/efi/bgrt: Don't ignore the BGRT if the 'valid' bit is 0 efivars: Use to_efivar_entry efi: Runtime-wrapper: Get rid of the rtc_lock spinlock ...
Diffstat (limited to 'arch/x86/platform')
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c52
-rw-r--r--arch/x86/platform/efi/efi.c67
-rw-r--r--arch/x86/platform/efi/efi_32.c7
-rw-r--r--arch/x86/platform/efi/efi_64.c206
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S43
-rw-r--r--arch/x86/platform/efi/quirks.c37
6 files changed, 268 insertions, 144 deletions
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index ea48449b2e63..a2433817c987 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -10,6 +10,9 @@
10 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation. 11 * published by the Free Software Foundation.
12 */ 12 */
13
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
13#include <linux/kernel.h> 16#include <linux/kernel.h>
14#include <linux/init.h> 17#include <linux/init.h>
15#include <linux/acpi.h> 18#include <linux/acpi.h>
@@ -28,8 +31,7 @@ struct bmp_header {
28void __init efi_bgrt_init(void) 31void __init efi_bgrt_init(void)
29{ 32{
30 acpi_status status; 33 acpi_status status;
31 void __iomem *image; 34 void *image;
32 bool ioremapped = false;
33 struct bmp_header bmp_header; 35 struct bmp_header bmp_header;
34 36
35 if (acpi_disabled) 37 if (acpi_disabled)
@@ -55,11 +57,6 @@ void __init efi_bgrt_init(void)
55 bgrt_tab->status); 57 bgrt_tab->status);
56 return; 58 return;
57 } 59 }
58 if (bgrt_tab->status != 1) {
59 pr_debug("Ignoring BGRT: invalid status %u (expected 1)\n",
60 bgrt_tab->status);
61 return;
62 }
63 if (bgrt_tab->image_type != 0) { 60 if (bgrt_tab->image_type != 0) {
64 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n", 61 pr_err("Ignoring BGRT: invalid image type %u (expected 0)\n",
65 bgrt_tab->image_type); 62 bgrt_tab->image_type);
@@ -70,20 +67,19 @@ void __init efi_bgrt_init(void)
70 return; 67 return;
71 } 68 }
72 69
73 image = efi_lookup_mapped_addr(bgrt_tab->image_address); 70 image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB);
74 if (!image) { 71 if (!image) {
75 image = early_ioremap(bgrt_tab->image_address, 72 pr_err("Ignoring BGRT: failed to map image header memory\n");
76 sizeof(bmp_header)); 73 return;
77 ioremapped = true;
78 if (!image) {
79 pr_err("Ignoring BGRT: failed to map image header memory\n");
80 return;
81 }
82 } 74 }
83 75
84 memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); 76 memcpy(&bmp_header, image, sizeof(bmp_header));
85 if (ioremapped) 77 memunmap(image);
86 early_iounmap(image, sizeof(bmp_header)); 78 if (bmp_header.id != 0x4d42) {
79 pr_err("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n",
80 bmp_header.id);
81 return;
82 }
87 bgrt_image_size = bmp_header.size; 83 bgrt_image_size = bmp_header.size;
88 84
89 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN); 85 bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
@@ -93,18 +89,14 @@ void __init efi_bgrt_init(void)
93 return; 89 return;
94 } 90 }
95 91
96 if (ioremapped) { 92 image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
97 image = early_ioremap(bgrt_tab->image_address, 93 if (!image) {
98 bmp_header.size); 94 pr_err("Ignoring BGRT: failed to map image memory\n");
99 if (!image) { 95 kfree(bgrt_image);
100 pr_err("Ignoring BGRT: failed to map image memory\n"); 96 bgrt_image = NULL;
101 kfree(bgrt_image); 97 return;
102 bgrt_image = NULL;
103 return;
104 }
105 } 98 }
106 99
107 memcpy_fromio(bgrt_image, image, bgrt_image_size); 100 memcpy(bgrt_image, image, bgrt_image_size);
108 if (ioremapped) 101 memunmap(image);
109 early_iounmap(image, bmp_header.size);
110} 102}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad285404ea7f..994a7df84a7b 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
235 char buf[64]; 235 char buf[64];
236 236
237 md = p; 237 md = p;
238 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n", 238 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
239 i, efi_md_typeattr_format(buf, sizeof(buf), md), 239 i, efi_md_typeattr_format(buf, sizeof(buf), md),
240 md->phys_addr, 240 md->phys_addr,
241 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), 241 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
242 (md->num_pages >> (20 - EFI_PAGE_SHIFT))); 242 (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
243 } 243 }
244#endif /* EFI_DEBUG */ 244#endif /* EFI_DEBUG */
@@ -815,6 +815,7 @@ static void __init kexec_enter_virtual_mode(void)
815{ 815{
816#ifdef CONFIG_KEXEC_CORE 816#ifdef CONFIG_KEXEC_CORE
817 efi_memory_desc_t *md; 817 efi_memory_desc_t *md;
818 unsigned int num_pages;
818 void *p; 819 void *p;
819 820
820 efi.systab = NULL; 821 efi.systab = NULL;
@@ -829,6 +830,12 @@ static void __init kexec_enter_virtual_mode(void)
829 return; 830 return;
830 } 831 }
831 832
833 if (efi_alloc_page_tables()) {
834 pr_err("Failed to allocate EFI page tables\n");
835 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
836 return;
837 }
838
832 /* 839 /*
833 * Map efi regions which were passed via setup_data. The virt_addr is a 840 * Map efi regions which were passed via setup_data. The virt_addr is a
834 * fixed addr which was used in first kernel of a kexec boot. 841 * fixed addr which was used in first kernel of a kexec boot.
@@ -843,6 +850,14 @@ static void __init kexec_enter_virtual_mode(void)
843 850
844 BUG_ON(!efi.systab); 851 BUG_ON(!efi.systab);
845 852
853 num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE);
854 num_pages >>= PAGE_SHIFT;
855
856 if (efi_setup_page_tables(memmap.phys_map, num_pages)) {
857 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
858 return;
859 }
860
846 efi_sync_low_kernel_mappings(); 861 efi_sync_low_kernel_mappings();
847 862
848 /* 863 /*
@@ -869,7 +884,7 @@ static void __init kexec_enter_virtual_mode(void)
869 * This function will switch the EFI runtime services to virtual mode. 884 * This function will switch the EFI runtime services to virtual mode.
870 * Essentially, we look through the EFI memmap and map every region that 885 * Essentially, we look through the EFI memmap and map every region that
871 * has the runtime attribute bit set in its memory descriptor into the 886 * has the runtime attribute bit set in its memory descriptor into the
872 * ->trampoline_pgd page table using a top-down VA allocation scheme. 887 * efi_pgd page table.
873 * 888 *
874 * The old method which used to update that memory descriptor with the 889 * The old method which used to update that memory descriptor with the
875 * virtual address obtained from ioremap() is still supported when the 890 * virtual address obtained from ioremap() is still supported when the
@@ -879,8 +894,8 @@ static void __init kexec_enter_virtual_mode(void)
879 * 894 *
880 * The new method does a pagetable switch in a preemption-safe manner 895 * The new method does a pagetable switch in a preemption-safe manner
881 * so that we're in a different address space when calling a runtime 896 * so that we're in a different address space when calling a runtime
882 * function. For function arguments passing we do copy the PGDs of the 897 * function. For function arguments passing we do copy the PUDs of the
883 * kernel page table into ->trampoline_pgd prior to each call. 898 * kernel page table into efi_pgd prior to each call.
884 * 899 *
885 * Specially for kexec boot, efi runtime maps in previous kernel should 900 * Specially for kexec boot, efi runtime maps in previous kernel should
886 * be passed in via setup_data. In that case runtime ranges will be mapped 901 * be passed in via setup_data. In that case runtime ranges will be mapped
@@ -895,6 +910,12 @@ static void __init __efi_enter_virtual_mode(void)
895 910
896 efi.systab = NULL; 911 efi.systab = NULL;
897 912
913 if (efi_alloc_page_tables()) {
914 pr_err("Failed to allocate EFI page tables\n");
915 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
916 return;
917 }
918
898 efi_merge_regions(); 919 efi_merge_regions();
899 new_memmap = efi_map_regions(&count, &pg_shift); 920 new_memmap = efi_map_regions(&count, &pg_shift);
900 if (!new_memmap) { 921 if (!new_memmap) {
@@ -913,7 +934,6 @@ static void __init __efi_enter_virtual_mode(void)
913 } 934 }
914 935
915 efi_sync_low_kernel_mappings(); 936 efi_sync_low_kernel_mappings();
916 efi_dump_pagetable();
917 937
918 if (efi_is_native()) { 938 if (efi_is_native()) {
919 status = phys_efi_set_virtual_address_map( 939 status = phys_efi_set_virtual_address_map(
@@ -951,31 +971,20 @@ static void __init __efi_enter_virtual_mode(void)
951 971
952 efi.set_virtual_address_map = NULL; 972 efi.set_virtual_address_map = NULL;
953 973
954 efi_runtime_mkexec(); 974 /*
975 * Apply more restrictive page table mapping attributes now that
976 * SVAM() has been called and the firmware has performed all
977 * necessary relocation fixups for the new virtual addresses.
978 */
979 efi_runtime_update_mappings();
980 efi_dump_pagetable();
955 981
956 /* 982 /*
957 * We mapped the descriptor array into the EFI pagetable above but we're 983 * We mapped the descriptor array into the EFI pagetable above
958 * not unmapping it here. Here's why: 984 * but we're not unmapping it here because if we're running in
959 * 985 * EFI mixed mode we need all of memory to be accessible when
960 * We're copying select PGDs from the kernel page table to the EFI page 986 * we pass parameters to the EFI runtime services in the
961 * table and when we do so and make changes to those PGDs like unmapping 987 * thunking code.
962 * stuff from them, those changes appear in the kernel page table and we
963 * go boom.
964 *
965 * From setup_real_mode():
966 *
967 * ...
968 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
969 *
970 * In this particular case, our allocation is in PGD 0 of the EFI page
971 * table but we've copied that PGD from PGD[272] of the EFI page table:
972 *
973 * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
974 *
975 * where the direct memory mapping in kernel space is.
976 *
977 * new_memmap's VA comes from that direct mapping and thus clearing it,
978 * it would get cleared in the kernel page table too.
979 * 988 *
980 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); 989 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
981 */ 990 */
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index ed5b67338294..338402b91d2e 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -38,6 +38,11 @@
38 * say 0 - 3G. 38 * say 0 - 3G.
39 */ 39 */
40 40
41int __init efi_alloc_page_tables(void)
42{
43 return 0;
44}
45
41void efi_sync_low_kernel_mappings(void) {} 46void efi_sync_low_kernel_mappings(void) {}
42void __init efi_dump_pagetable(void) {} 47void __init efi_dump_pagetable(void) {}
43int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 48int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
@@ -85,7 +90,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
85 __flush_tlb_all(); 90 __flush_tlb_all();
86} 91}
87 92
88void __init efi_runtime_mkexec(void) 93void __init efi_runtime_update_mappings(void)
89{ 94{
90 if (__supported_pte_mask & _PAGE_NX) 95 if (__supported_pte_mask & _PAGE_NX)
91 runtime_code_page_mkexec(); 96 runtime_code_page_mkexec();
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index a0ac0f9c307f..49e4dd4a1f58 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -15,6 +15,8 @@
15 * 15 *
16 */ 16 */
17 17
18#define pr_fmt(fmt) "efi: " fmt
19
18#include <linux/kernel.h> 20#include <linux/kernel.h>
19#include <linux/init.h> 21#include <linux/init.h>
20#include <linux/mm.h> 22#include <linux/mm.h>
@@ -40,6 +42,7 @@
40#include <asm/fixmap.h> 42#include <asm/fixmap.h>
41#include <asm/realmode.h> 43#include <asm/realmode.h>
42#include <asm/time.h> 44#include <asm/time.h>
45#include <asm/pgalloc.h>
43 46
44/* 47/*
45 * We allocate runtime services regions bottom-up, starting from -4G, i.e. 48 * We allocate runtime services regions bottom-up, starting from -4G, i.e.
@@ -47,16 +50,7 @@
47 */ 50 */
48static u64 efi_va = EFI_VA_START; 51static u64 efi_va = EFI_VA_START;
49 52
50/* 53struct efi_scratch efi_scratch;
51 * Scratch space used for switching the pagetable in the EFI stub
52 */
53struct efi_scratch {
54 u64 r15;
55 u64 prev_cr3;
56 pgd_t *efi_pgt;
57 bool use_pgd;
58 u64 phys_stack;
59} __packed;
60 54
61static void __init early_code_mapping_set_exec(int executable) 55static void __init early_code_mapping_set_exec(int executable)
62{ 56{
@@ -83,8 +77,11 @@ pgd_t * __init efi_call_phys_prolog(void)
83 int pgd; 77 int pgd;
84 int n_pgds; 78 int n_pgds;
85 79
86 if (!efi_enabled(EFI_OLD_MEMMAP)) 80 if (!efi_enabled(EFI_OLD_MEMMAP)) {
87 return NULL; 81 save_pgd = (pgd_t *)read_cr3();
82 write_cr3((unsigned long)efi_scratch.efi_pgt);
83 goto out;
84 }
88 85
89 early_code_mapping_set_exec(1); 86 early_code_mapping_set_exec(1);
90 87
@@ -96,6 +93,7 @@ pgd_t * __init efi_call_phys_prolog(void)
96 vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); 93 vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
97 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); 94 set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
98 } 95 }
96out:
99 __flush_tlb_all(); 97 __flush_tlb_all();
100 98
101 return save_pgd; 99 return save_pgd;
@@ -109,8 +107,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
109 int pgd_idx; 107 int pgd_idx;
110 int nr_pgds; 108 int nr_pgds;
111 109
112 if (!save_pgd) 110 if (!efi_enabled(EFI_OLD_MEMMAP)) {
111 write_cr3((unsigned long)save_pgd);
112 __flush_tlb_all();
113 return; 113 return;
114 }
114 115
115 nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); 116 nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
116 117
@@ -123,27 +124,98 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
123 early_code_mapping_set_exec(0); 124 early_code_mapping_set_exec(0);
124} 125}
125 126
127static pgd_t *efi_pgd;
128
129/*
130 * We need our own copy of the higher levels of the page tables
131 * because we want to avoid inserting EFI region mappings (EFI_VA_END
132 * to EFI_VA_START) into the standard kernel page tables. Everything
133 * else can be shared, see efi_sync_low_kernel_mappings().
134 */
135int __init efi_alloc_page_tables(void)
136{
137 pgd_t *pgd;
138 pud_t *pud;
139 gfp_t gfp_mask;
140
141 if (efi_enabled(EFI_OLD_MEMMAP))
142 return 0;
143
144 gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO;
145 efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
146 if (!efi_pgd)
147 return -ENOMEM;
148
149 pgd = efi_pgd + pgd_index(EFI_VA_END);
150
151 pud = pud_alloc_one(NULL, 0);
152 if (!pud) {
153 free_page((unsigned long)efi_pgd);
154 return -ENOMEM;
155 }
156
157 pgd_populate(NULL, pgd, pud);
158
159 return 0;
160}
161
126/* 162/*
127 * Add low kernel mappings for passing arguments to EFI functions. 163 * Add low kernel mappings for passing arguments to EFI functions.
128 */ 164 */
129void efi_sync_low_kernel_mappings(void) 165void efi_sync_low_kernel_mappings(void)
130{ 166{
131 unsigned num_pgds; 167 unsigned num_entries;
132 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 168 pgd_t *pgd_k, *pgd_efi;
169 pud_t *pud_k, *pud_efi;
133 170
134 if (efi_enabled(EFI_OLD_MEMMAP)) 171 if (efi_enabled(EFI_OLD_MEMMAP))
135 return; 172 return;
136 173
137 num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); 174 /*
175 * We can share all PGD entries apart from the one entry that
176 * covers the EFI runtime mapping space.
177 *
178 * Make sure the EFI runtime region mappings are guaranteed to
179 * only span a single PGD entry and that the entry also maps
180 * other important kernel regions.
181 */
182 BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
183 BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
184 (EFI_VA_END & PGDIR_MASK));
185
186 pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
187 pgd_k = pgd_offset_k(PAGE_OFFSET);
138 188
139 memcpy(pgd + pgd_index(PAGE_OFFSET), 189 num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
140 init_mm.pgd + pgd_index(PAGE_OFFSET), 190 memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
141 sizeof(pgd_t) * num_pgds); 191
192 /*
193 * We share all the PUD entries apart from those that map the
194 * EFI regions. Copy around them.
195 */
196 BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0);
197 BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
198
199 pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
200 pud_efi = pud_offset(pgd_efi, 0);
201
202 pgd_k = pgd_offset_k(EFI_VA_END);
203 pud_k = pud_offset(pgd_k, 0);
204
205 num_entries = pud_index(EFI_VA_END);
206 memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
207
208 pud_efi = pud_offset(pgd_efi, EFI_VA_START);
209 pud_k = pud_offset(pgd_k, EFI_VA_START);
210
211 num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
212 memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
142} 213}
143 214
144int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) 215int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
145{ 216{
146 unsigned long text; 217 unsigned long pfn, text;
218 efi_memory_desc_t *md;
147 struct page *page; 219 struct page *page;
148 unsigned npages; 220 unsigned npages;
149 pgd_t *pgd; 221 pgd_t *pgd;
@@ -151,8 +223,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
151 if (efi_enabled(EFI_OLD_MEMMAP)) 223 if (efi_enabled(EFI_OLD_MEMMAP))
152 return 0; 224 return 0;
153 225
154 efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; 226 efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
155 pgd = __va(efi_scratch.efi_pgt); 227 pgd = efi_pgd;
156 228
157 /* 229 /*
158 * It can happen that the physical address of new_memmap lands in memory 230 * It can happen that the physical address of new_memmap lands in memory
@@ -160,7 +232,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
160 * and ident-map those pages containing the map before calling 232 * and ident-map those pages containing the map before calling
161 * phys_efi_set_virtual_address_map(). 233 * phys_efi_set_virtual_address_map().
162 */ 234 */
163 if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { 235 pfn = pa_memmap >> PAGE_SHIFT;
236 if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
164 pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); 237 pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
165 return 1; 238 return 1;
166 } 239 }
@@ -176,6 +249,25 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
176 if (!IS_ENABLED(CONFIG_EFI_MIXED)) 249 if (!IS_ENABLED(CONFIG_EFI_MIXED))
177 return 0; 250 return 0;
178 251
252 /*
253 * Map all of RAM so that we can access arguments in the 1:1
254 * mapping when making EFI runtime calls.
255 */
256 for_each_efi_memory_desc(&memmap, md) {
257 if (md->type != EFI_CONVENTIONAL_MEMORY &&
258 md->type != EFI_LOADER_DATA &&
259 md->type != EFI_LOADER_CODE)
260 continue;
261
262 pfn = md->phys_addr >> PAGE_SHIFT;
263 npages = md->num_pages;
264
265 if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) {
266 pr_err("Failed to map 1:1 memory\n");
267 return 1;
268 }
269 }
270
179 page = alloc_page(GFP_KERNEL|__GFP_DMA32); 271 page = alloc_page(GFP_KERNEL|__GFP_DMA32);
180 if (!page) 272 if (!page)
181 panic("Unable to allocate EFI runtime stack < 4GB\n"); 273 panic("Unable to allocate EFI runtime stack < 4GB\n");
@@ -183,10 +275,11 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
183 efi_scratch.phys_stack = virt_to_phys(page_address(page)); 275 efi_scratch.phys_stack = virt_to_phys(page_address(page));
184 efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ 276 efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */
185 277
186 npages = (_end - _text) >> PAGE_SHIFT; 278 npages = (_etext - _text) >> PAGE_SHIFT;
187 text = __pa(_text); 279 text = __pa(_text);
280 pfn = text >> PAGE_SHIFT;
188 281
189 if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { 282 if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
190 pr_err("Failed to map kernel text 1:1\n"); 283 pr_err("Failed to map kernel text 1:1\n");
191 return 1; 284 return 1;
192 } 285 }
@@ -196,20 +289,20 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
196 289
197void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) 290void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
198{ 291{
199 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 292 kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages);
200
201 kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
202} 293}
203 294
204static void __init __map_region(efi_memory_desc_t *md, u64 va) 295static void __init __map_region(efi_memory_desc_t *md, u64 va)
205{ 296{
206 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 297 unsigned long flags = _PAGE_RW;
207 unsigned long pf = 0; 298 unsigned long pfn;
299 pgd_t *pgd = efi_pgd;
208 300
209 if (!(md->attribute & EFI_MEMORY_WB)) 301 if (!(md->attribute & EFI_MEMORY_WB))
210 pf |= _PAGE_PCD; 302 flags |= _PAGE_PCD;
211 303
212 if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) 304 pfn = md->phys_addr >> PAGE_SHIFT;
305 if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags))
213 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", 306 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
214 md->phys_addr, va); 307 md->phys_addr, va);
215} 308}
@@ -300,21 +393,56 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len)
300 efi_setup = phys_addr + sizeof(struct setup_data); 393 efi_setup = phys_addr + sizeof(struct setup_data);
301} 394}
302 395
303void __init efi_runtime_mkexec(void) 396void __init efi_runtime_update_mappings(void)
304{ 397{
305 if (!efi_enabled(EFI_OLD_MEMMAP)) 398 unsigned long pfn;
399 pgd_t *pgd = efi_pgd;
400 efi_memory_desc_t *md;
401 void *p;
402
403 if (efi_enabled(EFI_OLD_MEMMAP)) {
404 if (__supported_pte_mask & _PAGE_NX)
405 runtime_code_page_mkexec();
306 return; 406 return;
407 }
408
409 if (!efi_enabled(EFI_NX_PE_DATA))
410 return;
411
412 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
413 unsigned long pf = 0;
414 md = p;
415
416 if (!(md->attribute & EFI_MEMORY_RUNTIME))
417 continue;
307 418
308 if (__supported_pte_mask & _PAGE_NX) 419 if (!(md->attribute & EFI_MEMORY_WB))
309 runtime_code_page_mkexec(); 420 pf |= _PAGE_PCD;
421
422 if ((md->attribute & EFI_MEMORY_XP) ||
423 (md->type == EFI_RUNTIME_SERVICES_DATA))
424 pf |= _PAGE_NX;
425
426 if (!(md->attribute & EFI_MEMORY_RO) &&
427 (md->type != EFI_RUNTIME_SERVICES_CODE))
428 pf |= _PAGE_RW;
429
430 /* Update the 1:1 mapping */
431 pfn = md->phys_addr >> PAGE_SHIFT;
432 if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf))
433 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
434 md->phys_addr, md->virt_addr);
435
436 if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf))
437 pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n",
438 md->phys_addr, md->virt_addr);
439 }
310} 440}
311 441
312void __init efi_dump_pagetable(void) 442void __init efi_dump_pagetable(void)
313{ 443{
314#ifdef CONFIG_EFI_PGT_DUMP 444#ifdef CONFIG_EFI_PGT_DUMP
315 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); 445 ptdump_walk_pgd_level(NULL, efi_pgd);
316
317 ptdump_walk_pgd_level(NULL, pgd);
318#endif 446#endif
319} 447}
320 448
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 0df2dcc18404..92723aeae0f9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -39,41 +39,6 @@
39 mov %rsi, %cr0; \ 39 mov %rsi, %cr0; \
40 mov (%rsp), %rsp 40 mov (%rsp), %rsp
41 41
42 /* stolen from gcc */
43 .macro FLUSH_TLB_ALL
44 movq %r15, efi_scratch(%rip)
45 movq %r14, efi_scratch+8(%rip)
46 movq %cr4, %r15
47 movq %r15, %r14
48 andb $0x7f, %r14b
49 movq %r14, %cr4
50 movq %r15, %cr4
51 movq efi_scratch+8(%rip), %r14
52 movq efi_scratch(%rip), %r15
53 .endm
54
55 .macro SWITCH_PGT
56 cmpb $0, efi_scratch+24(%rip)
57 je 1f
58 movq %r15, efi_scratch(%rip) # r15
59 # save previous CR3
60 movq %cr3, %r15
61 movq %r15, efi_scratch+8(%rip) # prev_cr3
62 movq efi_scratch+16(%rip), %r15 # EFI pgt
63 movq %r15, %cr3
64 1:
65 .endm
66
67 .macro RESTORE_PGT
68 cmpb $0, efi_scratch+24(%rip)
69 je 2f
70 movq efi_scratch+8(%rip), %r15
71 movq %r15, %cr3
72 movq efi_scratch(%rip), %r15
73 FLUSH_TLB_ALL
74 2:
75 .endm
76
77ENTRY(efi_call) 42ENTRY(efi_call)
78 FRAME_BEGIN 43 FRAME_BEGIN
79 SAVE_XMM 44 SAVE_XMM
@@ -85,17 +50,9 @@ ENTRY(efi_call)
85 mov %r8, %r9 50 mov %r8, %r9
86 mov %rcx, %r8 51 mov %rcx, %r8
87 mov %rsi, %rcx 52 mov %rsi, %rcx
88 SWITCH_PGT
89 call *%rdi 53 call *%rdi
90 RESTORE_PGT
91 addq $48, %rsp 54 addq $48, %rsp
92 RESTORE_XMM 55 RESTORE_XMM
93 FRAME_END 56 FRAME_END
94 ret 57 ret
95ENDPROC(efi_call) 58ENDPROC(efi_call)
96
97 .data
98ENTRY(efi_scratch)
99 .fill 3,8,0
100 .byte 0
101 .quad 0
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index ed30e79347e8..ab50ada1d56e 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -1,3 +1,5 @@
1#define pr_fmt(fmt) "efi: " fmt
2
1#include <linux/init.h> 3#include <linux/init.h>
2#include <linux/kernel.h> 4#include <linux/kernel.h>
3#include <linux/string.h> 5#include <linux/string.h>
@@ -55,13 +57,41 @@ void efi_delete_dummy_variable(void)
55} 57}
56 58
57/* 59/*
60 * In the nonblocking case we do not attempt to perform garbage
61 * collection if we do not have enough free space. Rather, we do the
62 * bare minimum check and give up immediately if the available space
63 * is below EFI_MIN_RESERVE.
64 *
65 * This function is intended to be small and simple because it is
66 * invoked from crash handler paths.
67 */
68static efi_status_t
69query_variable_store_nonblocking(u32 attributes, unsigned long size)
70{
71 efi_status_t status;
72 u64 storage_size, remaining_size, max_size;
73
74 status = efi.query_variable_info_nonblocking(attributes, &storage_size,
75 &remaining_size,
76 &max_size);
77 if (status != EFI_SUCCESS)
78 return status;
79
80 if (remaining_size - size < EFI_MIN_RESERVE)
81 return EFI_OUT_OF_RESOURCES;
82
83 return EFI_SUCCESS;
84}
85
86/*
58 * Some firmware implementations refuse to boot if there's insufficient space 87 * Some firmware implementations refuse to boot if there's insufficient space
59 * in the variable store. Ensure that we never use more than a safe limit. 88 * in the variable store. Ensure that we never use more than a safe limit.
60 * 89 *
61 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable 90 * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
62 * store. 91 * store.
63 */ 92 */
64efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) 93efi_status_t efi_query_variable_store(u32 attributes, unsigned long size,
94 bool nonblocking)
65{ 95{
66 efi_status_t status; 96 efi_status_t status;
67 u64 storage_size, remaining_size, max_size; 97 u64 storage_size, remaining_size, max_size;
@@ -69,6 +99,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
69 if (!(attributes & EFI_VARIABLE_NON_VOLATILE)) 99 if (!(attributes & EFI_VARIABLE_NON_VOLATILE))
70 return 0; 100 return 0;
71 101
102 if (nonblocking)
103 return query_variable_store_nonblocking(attributes, size);
104
72 status = efi.query_variable_info(attributes, &storage_size, 105 status = efi.query_variable_info(attributes, &storage_size,
73 &remaining_size, &max_size); 106 &remaining_size, &max_size);
74 if (status != EFI_SUCCESS) 107 if (status != EFI_SUCCESS)
@@ -312,7 +345,7 @@ void __init efi_apply_memmap_quirks(void)
312 * services. 345 * services.
313 */ 346 */
314 if (!efi_runtime_supported()) { 347 if (!efi_runtime_supported()) {
315 pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); 348 pr_info("Setup done, disabling due to 32/64-bit mismatch\n");
316 efi_unmap_memmap(); 349 efi_unmap_memmap();
317 } 350 }
318 351