aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBorislav Petkov <bp@suse.de>2014-01-18 06:48:17 -0500
committerMatt Fleming <matt.fleming@intel.com>2014-03-04 11:17:18 -0500
commitb7b898ae0c0a82489511a1ce1b35f26215e6beb5 (patch)
tree79167e1a9fe59bf3e4b55772ccd294bf9cad2b11
parent42a5477251f0e0f33ad5f6a95c48d685ec03191e (diff)
x86/efi: Make efi virtual runtime map passing more robust
Currently, running SetVirtualAddressMap() and passing the physical address of the virtual map array was working only by a lucky coincidence because the memory was present in the EFI page table too. Until Toshi went and booted this on a big HP box - the krealloc() manner of resizing the memmap we're doing did allocate from such physical addresses which were not mapped anymore and boom: http://lkml.kernel.org/r/1386806463.1791.295.camel@misato.fc.hp.com One way to take care of that issue is to reimplement the krealloc thing but with pages. We start with contiguous pages of order 1, i.e. 2 pages, and when we deplete that memory (shouldn't happen all that often but you know firmware) we realloc the next power-of-two pages. Having the pages, it is much more handy and easy to map them into the EFI page table with the already existing mapping code which we're using for building the virtual mappings. Thanks to Toshi Kani and Matt for the great debugging help. Reported-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Borislav Petkov <bp@suse.de> Tested-by: Toshi Kani <toshi.kani@hp.com> Signed-off-by: Matt Fleming <matt.fleming@intel.com>
-rw-r--r--arch/x86/include/asm/efi.h3
-rw-r--r--arch/x86/platform/efi/efi.c99
-rw-r--r--arch/x86/platform/efi/efi_32.c6
-rw-r--r--arch/x86/platform/efi/efi_64.c32
4 files changed, 114 insertions, 26 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 4afd3b3b658f..e985d6bf7d3a 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -129,7 +129,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size);
129extern void __init efi_map_region(efi_memory_desc_t *md); 129extern void __init efi_map_region(efi_memory_desc_t *md);
130extern void __init efi_map_region_fixed(efi_memory_desc_t *md); 130extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
131extern void efi_sync_low_kernel_mappings(void); 131extern void efi_sync_low_kernel_mappings(void);
132extern void efi_setup_page_tables(void); 132extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages);
133extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages);
133extern void __init old_map_region(efi_memory_desc_t *md); 134extern void __init old_map_region(efi_memory_desc_t *md);
134extern void __init runtime_code_page_mkexec(void); 135extern void __init runtime_code_page_mkexec(void);
135extern void __init efi_runtime_mkexec(void); 136extern void __init efi_runtime_mkexec(void);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 37f20d71ec4b..576bb126593a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -926,14 +926,36 @@ static void __init efi_map_regions_fixed(void)
926 926
927} 927}
928 928
929static void *realloc_pages(void *old_memmap, int old_shift)
930{
931 void *ret;
932
933 ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
934 if (!ret)
935 goto out;
936
937 /*
938 * A first-time allocation doesn't have anything to copy.
939 */
940 if (!old_memmap)
941 return ret;
942
943 memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
944
945out:
946 free_pages((unsigned long)old_memmap, old_shift);
947 return ret;
948}
949
929/* 950/*
930 * Map efi memory ranges for runtime serivce and update new_memmap with virtual 951 * Map the efi memory ranges of the runtime services and update new_mmap with
931 * addresses. 952 * virtual addresses.
932 */ 953 */
933static void * __init efi_map_regions(int *count) 954static void * __init efi_map_regions(int *count, int *pg_shift)
934{ 955{
956 void *p, *new_memmap = NULL;
957 unsigned long left = 0;
935 efi_memory_desc_t *md; 958 efi_memory_desc_t *md;
936 void *p, *tmp, *new_memmap = NULL;
937 959
938 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 960 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
939 md = p; 961 md = p;
@@ -948,20 +970,23 @@ static void * __init efi_map_regions(int *count)
948 efi_map_region(md); 970 efi_map_region(md);
949 get_systab_virt_addr(md); 971 get_systab_virt_addr(md);
950 972
951 tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, 973 if (left < memmap.desc_size) {
952 GFP_KERNEL); 974 new_memmap = realloc_pages(new_memmap, *pg_shift);
953 if (!tmp) 975 if (!new_memmap)
954 goto out; 976 return NULL;
955 new_memmap = tmp; 977
978 left += PAGE_SIZE << *pg_shift;
979 (*pg_shift)++;
980 }
981
956 memcpy(new_memmap + (*count * memmap.desc_size), md, 982 memcpy(new_memmap + (*count * memmap.desc_size), md,
957 memmap.desc_size); 983 memmap.desc_size);
984
985 left -= memmap.desc_size;
958 (*count)++; 986 (*count)++;
959 } 987 }
960 988
961 return new_memmap; 989 return new_memmap;
962out:
963 kfree(new_memmap);
964 return NULL;
965} 990}
966 991
967/* 992/*
@@ -987,9 +1012,9 @@ out:
987 */ 1012 */
988void __init efi_enter_virtual_mode(void) 1013void __init efi_enter_virtual_mode(void)
989{ 1014{
990 efi_status_t status; 1015 int err, count = 0, pg_shift = 0;
991 void *new_memmap = NULL; 1016 void *new_memmap = NULL;
992 int err, count = 0; 1017 efi_status_t status;
993 1018
994 efi.systab = NULL; 1019 efi.systab = NULL;
995 1020
@@ -1006,20 +1031,24 @@ void __init efi_enter_virtual_mode(void)
1006 efi_map_regions_fixed(); 1031 efi_map_regions_fixed();
1007 } else { 1032 } else {
1008 efi_merge_regions(); 1033 efi_merge_regions();
1009 new_memmap = efi_map_regions(&count); 1034 new_memmap = efi_map_regions(&count, &pg_shift);
1010 if (!new_memmap) { 1035 if (!new_memmap) {
1011 pr_err("Error reallocating memory, EFI runtime non-functional!\n"); 1036 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
1012 return; 1037 return;
1013 } 1038 }
1014 }
1015 1039
1016 err = save_runtime_map(); 1040 err = save_runtime_map();
1017 if (err) 1041 if (err)
1018 pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); 1042 pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
1043 }
1019 1044
1020 BUG_ON(!efi.systab); 1045 BUG_ON(!efi.systab);
1021 1046
1022 efi_setup_page_tables(); 1047 if (!efi_setup) {
1048 if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift))
1049 return;
1050 }
1051
1023 efi_sync_low_kernel_mappings(); 1052 efi_sync_low_kernel_mappings();
1024 efi_dump_pagetable(); 1053 efi_dump_pagetable();
1025 1054
@@ -1060,7 +1089,35 @@ void __init efi_enter_virtual_mode(void)
1060 1089
1061 efi_runtime_mkexec(); 1090 efi_runtime_mkexec();
1062 1091
1063 kfree(new_memmap); 1092
1093 /*
1094 * We mapped the descriptor array into the EFI pagetable above but we're
1095 * not unmapping it here. Here's why:
1096 *
1097 * We're copying select PGDs from the kernel page table to the EFI page
1098 * table and when we do so and make changes to those PGDs like unmapping
1099 * stuff from them, those changes appear in the kernel page table and we
1100 * go boom.
1101 *
1102 * From setup_real_mode():
1103 *
1104 * ...
1105 * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
1106 *
1107 * In this particular case, our allocation is in PGD 0 of the EFI page
1108 * table but we've copied that PGD from PGD[272] of the EFI page table:
1109 *
1110 * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272
1111 *
1112 * where the direct memory mapping in kernel space is.
1113 *
1114 * new_memmap's VA comes from that direct mapping and thus clearing it,
1115 * it would get cleared in the kernel page table too.
1116 *
1117 * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift);
1118 */
1119 if (!efi_setup)
1120 free_pages((unsigned long)new_memmap, pg_shift);
1064 1121
1065 /* clean DUMMY object */ 1122 /* clean DUMMY object */
1066 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, 1123 efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 39496ae3928a..9ee3491e31fb 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -40,8 +40,12 @@
40static unsigned long efi_rt_eflags; 40static unsigned long efi_rt_eflags;
41 41
42void efi_sync_low_kernel_mappings(void) {} 42void efi_sync_low_kernel_mappings(void) {}
43void efi_setup_page_tables(void) {}
44void __init efi_dump_pagetable(void) {} 43void __init efi_dump_pagetable(void) {}
44int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
45{
46 return 0;
47}
48void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {}
45 49
46void __init efi_map_region(efi_memory_desc_t *md) 50void __init efi_map_region(efi_memory_desc_t *md)
47{ 51{
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e05c69b46f05..19280900ec25 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void)
137 sizeof(pgd_t) * num_pgds); 137 sizeof(pgd_t) * num_pgds);
138} 138}
139 139
140void efi_setup_page_tables(void) 140int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
141{ 141{
142 pgd_t *pgd;
143
144 if (efi_enabled(EFI_OLD_MEMMAP))
145 return 0;
146
142 efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; 147 efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd;
148 pgd = __va(efi_scratch.efi_pgt);
143 149
144 if (!efi_enabled(EFI_OLD_MEMMAP)) 150 /*
145 efi_scratch.use_pgd = true; 151 * It can happen that the physical address of new_memmap lands in memory
152 * which is not mapped in the EFI page table. Therefore we need to go
153 * and ident-map those pages containing the map before calling
154 * phys_efi_set_virtual_address_map().
155 */
156 if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) {
157 pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
158 return 1;
159 }
160
161 efi_scratch.use_pgd = true;
162
163
164 return 0;
165}
166
167void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages)
168{
169 pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd);
170
171 kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages);
146} 172}
147 173
148static void __init __map_region(efi_memory_desc_t *md, u64 va) 174static void __init __map_region(efi_memory_desc_t *md, u64 va)