aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Fleming <matt@codeblueprint.co.uk>2016-03-11 06:19:23 -0500
committerIngo Molnar <mingo@kernel.org>2016-03-12 10:57:45 -0500
commit452308de61056a539352a9306c46716d7af8a1f1 (patch)
treee3220cd334c147a9eb9d4ee2512d45f7d9863587
parent6e6867093de35141f0a76b66ac13f9f2e2c8e77a (diff)
x86/efi: Fix boot crash by always mapping boot service regions into new EFI page tables
Some machines have EFI regions in page zero (physical address 0x00000000) and historically that region has been added to the e820 map via trim_bios_range(), and ultimately mapped into the kernel page tables. It was not mapped via efi_map_regions() as one would expect. Alexis reports that with the new separate EFI page tables some boot services regions, such as page zero, are not mapped. This triggers an oops during the SetVirtualAddressMap() runtime call. For the EFI boot services quirk on x86 we need to memblock_reserve() boot services regions until after SetVirtualAddressMap(). Doing that while respecting the ownership of regions that may have already been reserved by the kernel was the motivation behind this commit: 7d68dc3f1003 ("x86, efi: Do not reserve boot services regions within reserved areas") That patch was merged at a time when the EFI runtime virtual mappings were inserted into the kernel page tables as described above, and the trick of setting ->numpages (and hence the region size) to zero to track regions that should not be freed in efi_free_boot_services() meant that we never mapped those regions in efi_map_regions(). Instead we were relying solely on the existing kernel mappings. Now that we have separate page tables we need to make sure the EFI boot services regions are mapped correctly, even if someone else has already called memblock_reserve(). Instead of stashing a tag in ->numpages, set the EFI_MEMORY_RUNTIME bit of ->attribute. Since it generally makes no sense to mark a boot services region as required at runtime, it's pretty much guaranteed the firmware will not have already set this bit. For the record, the specific circumstances under which Alexis triggered this bug was that an EFI runtime driver on his machine was responding to the EVT_SIGNAL_VIRTUAL_ADDRESS_CHANGE event during SetVirtualAddressMap(). The event handler for this driver looks like this, sub rsp,0x28 lea rdx,[rip+0x2445] # 0xaa948720 mov ecx,0x4 call func_aa9447c0 ; call to ConvertPointer(4, & 0xaa948720) mov r11,QWORD PTR [rip+0x2434] # 0xaa948720 xor eax,eax mov BYTE PTR [r11+0x1],0x1 add rsp,0x28 ret Which is pretty typical code for an EVT_SIGNAL_VIRTUAL_ADDRESS_CHANGE handler. The "mov r11, QWORD PTR [rip+0x2424]" was the faulting instruction because ConvertPointer() was being called to convert the address 0x0000000000000000, which when converted is left unchanged and remains 0x0000000000000000. The output of the oops trace gave the impression of a standard NULL pointer dereference bug, but because we're accessing physical addresses during ConvertPointer(), it wasn't. EFI boot services code is stored at that address on Alexis' machine. Reported-by: Alexis Murzeau <amurzeau@gmail.com> Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Ben Hutchings <ben@decadent.org.uk> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com> Cc: Matthew Garrett <mjg59@srcf.ucam.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Raphael Hertzog <hertzog@debian.org> Cc: Roger Shimizu <rogershimizu@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1457695163-29632-2-git-send-email-matt@codeblueprint.co.uk Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=815125 Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/platform/efi/quirks.c79
1 files changed, 62 insertions, 17 deletions
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 2d66db8f80f9..ed30e79347e8 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -131,6 +131,27 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
131EXPORT_SYMBOL_GPL(efi_query_variable_store); 131EXPORT_SYMBOL_GPL(efi_query_variable_store);
132 132
133/* 133/*
134 * Helper function for efi_reserve_boot_services() to figure out if we
135 * can free regions in efi_free_boot_services().
136 *
137 * Use this function to ensure we do not free regions owned by somebody
138 * else. We must only reserve (and then free) regions:
139 *
140 * - Not within any part of the kernel
141 * - Not the BIOS reserved area (E820_RESERVED, E820_NVS, etc)
142 */
143static bool can_free_region(u64 start, u64 size)
144{
145 if (start + size > __pa_symbol(_text) && start <= __pa_symbol(_end))
146 return false;
147
148 if (!e820_all_mapped(start, start+size, E820_RAM))
149 return false;
150
151 return true;
152}
153
154/*
134 * The UEFI specification makes it clear that the operating system is free to do 155 * The UEFI specification makes it clear that the operating system is free to do
135 * whatever it wants with boot services code after ExitBootServices() has been 156 * whatever it wants with boot services code after ExitBootServices() has been
136 * called. Ignoring this recommendation a significant bunch of EFI implementations 157 * called. Ignoring this recommendation a significant bunch of EFI implementations
@@ -147,26 +168,50 @@ void __init efi_reserve_boot_services(void)
147 efi_memory_desc_t *md = p; 168 efi_memory_desc_t *md = p;
148 u64 start = md->phys_addr; 169 u64 start = md->phys_addr;
149 u64 size = md->num_pages << EFI_PAGE_SHIFT; 170 u64 size = md->num_pages << EFI_PAGE_SHIFT;
171 bool already_reserved;
150 172
151 if (md->type != EFI_BOOT_SERVICES_CODE && 173 if (md->type != EFI_BOOT_SERVICES_CODE &&
152 md->type != EFI_BOOT_SERVICES_DATA) 174 md->type != EFI_BOOT_SERVICES_DATA)
153 continue; 175 continue;
154 /* Only reserve where possible: 176
155 * - Not within any already allocated areas 177 already_reserved = memblock_is_region_reserved(start, size);
156 * - Not over any memory area (really needed, if above?) 178
157 * - Not within any part of the kernel 179 /*
158 * - Not the bios reserved area 180 * Because the following memblock_reserve() is paired
159 */ 181 * with free_bootmem_late() for this region in
160 if ((start + size > __pa_symbol(_text) 182 * efi_free_boot_services(), we must be extremely
161 && start <= __pa_symbol(_end)) || 183 * careful not to reserve, and subsequently free,
162 !e820_all_mapped(start, start+size, E820_RAM) || 184 * critical regions of memory (like the kernel image) or
163 memblock_is_region_reserved(start, size)) { 185 * those regions that somebody else has already
164 /* Could not reserve, skip it */ 186 * reserved.
165 md->num_pages = 0; 187 *
166 memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", 188 * A good example of a critical region that must not be
167 start, start+size-1); 189 * freed is page zero (first 4Kb of memory), which may
168 } else 190 * contain boot services code/data but is marked
191 * E820_RESERVED by trim_bios_range().
192 */
193 if (!already_reserved) {
169 memblock_reserve(start, size); 194 memblock_reserve(start, size);
195
196 /*
197 * If we are the first to reserve the region, no
198 * one else cares about it. We own it and can
199 * free it later.
200 */
201 if (can_free_region(start, size))
202 continue;
203 }
204
205 /*
206 * We don't own the region. We must not free it.
207 *
208 * Setting this bit for a boot services region really
209 * doesn't make sense as far as the firmware is
210 * concerned, but it does provide us with a way to tag
211 * those regions that must not be paired with
212 * free_bootmem_late().
213 */
214 md->attribute |= EFI_MEMORY_RUNTIME;
170 } 215 }
171} 216}
172 217
@@ -183,8 +228,8 @@ void __init efi_free_boot_services(void)
183 md->type != EFI_BOOT_SERVICES_DATA) 228 md->type != EFI_BOOT_SERVICES_DATA)
184 continue; 229 continue;
185 230
186 /* Could not reserve boot area */ 231 /* Do not free, someone else owns it: */
187 if (!size) 232 if (md->attribute & EFI_MEMORY_RUNTIME)
188 continue; 233 continue;
189 234
190 free_bootmem_late(start, size); 235 free_bootmem_late(start, size);