diff options
| author | Baoquan He <bhe@redhat.com> | 2017-05-26 07:36:50 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2017-05-28 05:06:16 -0400 |
| commit | 94133e46a0f5ca3f138479806104ab4a8cb0455e (patch) | |
| tree | 8159e0fd072d0b496f2dd5ebe825ceb44d449aa9 | |
| parent | 4e52797d2efefac3271abdc54439a3435abd77b9 (diff) | |
x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled
For EFI with the 'efi=old_map' kernel option specified, the kernel will panic
when KASLR is enabled:
BUG: unable to handle kernel paging request at 000000007febd57e
IP: 0x7febd57e
PGD 1025a067
PUD 0
Oops: 0010 [#1] SMP
Call Trace:
efi_enter_virtual_mode()
start_kernel()
x86_64_start_reservations()
x86_64_start_kernel()
start_cpu()
The root cause is that the identity mapping is not built correctly
in the 'efi=old_map' case.
On 'nokaslr' kernels, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE
aligned. We can borrow the PUD table from the direct mappings safely. Given a
physical address X, we have pud_index(X) == pud_index(__va(X)).
However, on KASLR kernels, PAGE_OFFSET is PUD_SIZE aligned. For a given physical
address X, pud_index(X) != pud_index(__va(X)). We can't just copy the PGD entry
from direct mapping to build identity mapping, instead we need to copy the
PUD entries one by one from the direct mapping.
Fix it.
Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: Frank Ramsay <frank.ramsay@hpe.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russ Anderson <rja@sgi.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20170526113652.21339-5-matt@codeblueprint.co.uk
[ Fixed and reworded the changelog and code comments to be more readable. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
| -rw-r--r-- | arch/x86/platform/efi/efi_64.c | 79 |
1 files changed, 71 insertions, 8 deletions
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c488625c9712..eb8dff15a7f6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
| @@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable) | |||
| 71 | 71 | ||
| 72 | pgd_t * __init efi_call_phys_prolog(void) | 72 | pgd_t * __init efi_call_phys_prolog(void) |
| 73 | { | 73 | { |
| 74 | unsigned long vaddress; | 74 | unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; |
| 75 | pgd_t *save_pgd; | 75 | pgd_t *save_pgd, *pgd_k, *pgd_efi; |
| 76 | p4d_t *p4d, *p4d_k, *p4d_efi; | ||
| 77 | pud_t *pud; | ||
| 76 | 78 | ||
| 77 | int pgd; | 79 | int pgd; |
| 78 | int n_pgds; | 80 | int n_pgds, i, j; |
| 79 | 81 | ||
| 80 | if (!efi_enabled(EFI_OLD_MEMMAP)) { | 82 | if (!efi_enabled(EFI_OLD_MEMMAP)) { |
| 81 | save_pgd = (pgd_t *)read_cr3(); | 83 | save_pgd = (pgd_t *)read_cr3(); |
| @@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void) | |||
| 88 | n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); | 90 | n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); |
| 89 | save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); | 91 | save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); |
| 90 | 92 | ||
| 93 | /* | ||
| 94 | * Build 1:1 identity mapping for efi=old_map usage. Note that | ||
| 95 | * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while | ||
| 96 | * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical | ||
| 97 | * address X, the pud_index(X) != pud_index(__va(X)), we can only copy | ||
| 98 | * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. | ||
| 99 | * This means here we can only reuse the PMD tables of the direct mapping. | ||
| 100 | */ | ||
| 91 | for (pgd = 0; pgd < n_pgds; pgd++) { | 101 | for (pgd = 0; pgd < n_pgds; pgd++) { |
| 92 | save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); | 102 | addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); |
| 93 | vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); | 103 | vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); |
| 94 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); | 104 | pgd_efi = pgd_offset_k(addr_pgd); |
| 105 | save_pgd[pgd] = *pgd_efi; | ||
| 106 | |||
| 107 | p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); | ||
| 108 | if (!p4d) { | ||
| 109 | pr_err("Failed to allocate p4d table!\n"); | ||
| 110 | goto out; | ||
| 111 | } | ||
| 112 | |||
| 113 | for (i = 0; i < PTRS_PER_P4D; i++) { | ||
| 114 | addr_p4d = addr_pgd + i * P4D_SIZE; | ||
| 115 | p4d_efi = p4d + p4d_index(addr_p4d); | ||
| 116 | |||
| 117 | pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); | ||
| 118 | if (!pud) { | ||
| 119 | pr_err("Failed to allocate pud table!\n"); | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | |||
| 123 | for (j = 0; j < PTRS_PER_PUD; j++) { | ||
| 124 | addr_pud = addr_p4d + j * PUD_SIZE; | ||
| 125 | |||
| 126 | if (addr_pud > (max_pfn << PAGE_SHIFT)) | ||
| 127 | break; | ||
| 128 | |||
| 129 | vaddr = (unsigned long)__va(addr_pud); | ||
| 130 | |||
| 131 | pgd_k = pgd_offset_k(vaddr); | ||
| 132 | p4d_k = p4d_offset(pgd_k, vaddr); | ||
| 133 | pud[j] = *pud_offset(p4d_k, vaddr); | ||
| 134 | } | ||
| 135 | } | ||
| 95 | } | 136 | } |
| 96 | out: | 137 | out: |
| 97 | __flush_tlb_all(); | 138 | __flush_tlb_all(); |
| @@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) | |||
| 104 | /* | 145 | /* |
| 105 | * After the lock is released, the original page table is restored. | 146 | * After the lock is released, the original page table is restored. |
| 106 | */ | 147 | */ |
| 107 | int pgd_idx; | 148 | int pgd_idx, i; |
| 108 | int nr_pgds; | 149 | int nr_pgds; |
| 150 | pgd_t *pgd; | ||
| 151 | p4d_t *p4d; | ||
| 152 | pud_t *pud; | ||
| 109 | 153 | ||
| 110 | if (!efi_enabled(EFI_OLD_MEMMAP)) { | 154 | if (!efi_enabled(EFI_OLD_MEMMAP)) { |
| 111 | write_cr3((unsigned long)save_pgd); | 155 | write_cr3((unsigned long)save_pgd); |
| @@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) | |||
| 115 | 159 | ||
| 116 | nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 160 | nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
| 117 | 161 | ||
| 118 | for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) | 162 | for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { |
| 163 | pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); | ||
| 119 | set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); | 164 | set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); |
| 120 | 165 | ||
| 166 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) | ||
| 167 | continue; | ||
| 168 | |||
| 169 | for (i = 0; i < PTRS_PER_P4D; i++) { | ||
| 170 | p4d = p4d_offset(pgd, | ||
| 171 | pgd_idx * PGDIR_SIZE + i * P4D_SIZE); | ||
| 172 | |||
| 173 | if (!(p4d_val(*p4d) & _PAGE_PRESENT)) | ||
| 174 | continue; | ||
| 175 | |||
| 176 | pud = (pud_t *)p4d_page_vaddr(*p4d); | ||
| 177 | pud_free(&init_mm, pud); | ||
| 178 | } | ||
| 179 | |||
| 180 | p4d = (p4d_t *)pgd_page_vaddr(*pgd); | ||
| 181 | p4d_free(&init_mm, p4d); | ||
| 182 | } | ||
| 183 | |||
| 121 | kfree(save_pgd); | 184 | kfree(save_pgd); |
| 122 | 185 | ||
| 123 | __flush_tlb_all(); | 186 | __flush_tlb_all(); |
