diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 18:45:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-02 18:45:30 -0400 |
commit | d22fff81418edc92be534cad8d59da914049bf69 (patch) | |
tree | 96b22b20bbc789a76e744bcfc11a7f0854b62ece /arch/x86/mm | |
parent | 986b37c0ae4f0a3f93d8974d03a9cbc1502dd377 (diff) | |
parent | eaeb8e76cd5751e805f6e4a3fcec91d283e3b0c2 (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:
- Extend the memmap= boot parameter syntax to allow the redeclaration
and dropping of existing ranges, and to support all e820 range types
(Jan H. Schönherr)
- Improve the W+X boot time security checks to remove false positive
warnings on Xen (Jan Beulich)
- Support booting as Xen PVH guest (Juergen Gross)
- Improved 5-level paging (LA57) support, in particular it's possible
now to have a single kernel image for both 4-level and 5-level
hardware (Kirill A. Shutemov)
- AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky)
- Preparatory commits for hardware-encrypted RAM support on Intel CPUs.
(Kirill A. Shutemov)
- Improved Intel-MID support (Andy Shevchenko)
- Show EFI page tables in page_tables debug files (Andy Lutomirski)
- ... plus misc fixes and smaller cleanups
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits)
x86/cpu/tme: Fix spelling: "configuation" -> "configuration"
x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT
x86/mm: Update comment in detect_tme() regarding x86_phys_bits
x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic
x86/mm: Remove pointless checks in vmalloc_fault
x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms
ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback
ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export
x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf
x86/pconfig: Detect PCONFIG targets
x86/tme: Detect if TME and MKTME is activated by BIOS
x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G
x86/boot/compressed/64: Use page table in trampoline memory
x86/boot/compressed/64: Use stack from trampoline memory
x86/boot/compressed/64: Make sure we have a 32-bit code segment
x86/mm: Do not use paravirtualized calls in native_set_p4d()
kdump, vmcoreinfo: Export pgtable_l5_enabled value
x86/boot/compressed/64: Prepare new top-level page table for trampoline
x86/boot/compressed/64: Set up trampoline memory
x86/boot/compressed/64: Save and restore trampoline memory
...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 15 | ||||
-rw-r--r-- | arch/x86/mm/debug_pagetables.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 125 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 60 | ||||
-rw-r--r-- | arch/x86/mm/ident_map.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 32 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 20 | ||||
-rw-r--r-- | arch/x86/mm/kaslr.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 578 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_identity.c | 564 | ||||
-rw-r--r-- | arch/x86/mm/numa_32.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 4 |
12 files changed, 750 insertions, 722 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 27e9e90a8d35..4b101dd6e52f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,12 +1,15 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
2 | # Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c | 2 | # Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c |
3 | KCOV_INSTRUMENT_tlb.o := n | 3 | KCOV_INSTRUMENT_tlb.o := n |
4 | KCOV_INSTRUMENT_mem_encrypt.o := n | 4 | KCOV_INSTRUMENT_mem_encrypt.o := n |
5 | KCOV_INSTRUMENT_mem_encrypt_identity.o := n | ||
5 | 6 | ||
6 | KASAN_SANITIZE_mem_encrypt.o := n | 7 | KASAN_SANITIZE_mem_encrypt.o := n |
8 | KASAN_SANITIZE_mem_encrypt_identity.o := n | ||
7 | 9 | ||
8 | ifdef CONFIG_FUNCTION_TRACER | 10 | ifdef CONFIG_FUNCTION_TRACER |
9 | CFLAGS_REMOVE_mem_encrypt.o = -pg | 11 | CFLAGS_REMOVE_mem_encrypt.o = -pg |
12 | CFLAGS_REMOVE_mem_encrypt_identity.o = -pg | ||
10 | endif | 13 | endif |
11 | 14 | ||
12 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 15 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | |||
16 | nostackp := $(call cc-option, -fno-stack-protector) | 19 | nostackp := $(call cc-option, -fno-stack-protector) |
17 | CFLAGS_physaddr.o := $(nostackp) | 20 | CFLAGS_physaddr.o := $(nostackp) |
18 | CFLAGS_setup_nx.o := $(nostackp) | 21 | CFLAGS_setup_nx.o := $(nostackp) |
22 | CFLAGS_mem_encrypt_identity.o := $(nostackp) | ||
19 | 23 | ||
20 | CFLAGS_fault.o := -I$(src)/../include/asm/trace | 24 | CFLAGS_fault.o := -I$(src)/../include/asm/trace |
21 | 25 | ||
@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o | |||
47 | obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o | 51 | obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o |
48 | 52 | ||
49 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o | 53 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o |
54 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o | ||
50 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o | 55 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o |
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 421f2664ffa0..51a6f92da2bf 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c | |||
@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = { | |||
72 | }; | 72 | }; |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) | ||
76 | extern pgd_t *efi_pgd; | ||
77 | static struct dentry *pe_efi; | ||
78 | |||
79 | static int ptdump_show_efi(struct seq_file *m, void *v) | ||
80 | { | ||
81 | if (efi_pgd) | ||
82 | ptdump_walk_pgd_level_debugfs(m, efi_pgd, false); | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int ptdump_open_efi(struct inode *inode, struct file *filp) | ||
87 | { | ||
88 | return single_open(filp, ptdump_show_efi, NULL); | ||
89 | } | ||
90 | |||
91 | static const struct file_operations ptdump_efi_fops = { | ||
92 | .owner = THIS_MODULE, | ||
93 | .open = ptdump_open_efi, | ||
94 | .read = seq_read, | ||
95 | .llseek = seq_lseek, | ||
96 | .release = single_release, | ||
97 | }; | ||
98 | #endif | ||
99 | |||
75 | static struct dentry *dir, *pe_knl, *pe_curknl; | 100 | static struct dentry *dir, *pe_knl, *pe_curknl; |
76 | 101 | ||
77 | static int __init pt_dump_debug_init(void) | 102 | static int __init pt_dump_debug_init(void) |
@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void) | |||
96 | if (!pe_curusr) | 121 | if (!pe_curusr) |
97 | goto err; | 122 | goto err; |
98 | #endif | 123 | #endif |
124 | |||
125 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) | ||
126 | pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops); | ||
127 | if (!pe_efi) | ||
128 | goto err; | ||
129 | #endif | ||
130 | |||
99 | return 0; | 131 | return 0; |
100 | err: | 132 | err: |
101 | debugfs_remove_recursive(dir); | 133 | debugfs_remove_recursive(dir); |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2a4849e92831..62a7e9f65dec 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -29,6 +29,7 @@ | |||
29 | struct pg_state { | 29 | struct pg_state { |
30 | int level; | 30 | int level; |
31 | pgprot_t current_prot; | 31 | pgprot_t current_prot; |
32 | pgprotval_t effective_prot; | ||
32 | unsigned long start_address; | 33 | unsigned long start_address; |
33 | unsigned long current_address; | 34 | unsigned long current_address; |
34 | const struct addr_marker *marker; | 35 | const struct addr_marker *marker; |
@@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = { | |||
85 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, | 86 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, |
86 | [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, | 87 | [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, |
87 | #ifdef CONFIG_KASAN | 88 | #ifdef CONFIG_KASAN |
88 | [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, | 89 | /* |
89 | [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, | 90 | * These fields get initialized with the (dynamic) |
91 | * KASAN_SHADOW_{START,END} values in pt_dump_init(). | ||
92 | */ | ||
93 | [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, | ||
94 | [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, | ||
90 | #endif | 95 | #endif |
91 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | 96 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
92 | [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, | 97 | [LDT_NR] = { 0UL, "LDT remap" }, |
93 | #endif | 98 | #endif |
94 | [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, | 99 | [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, |
95 | #ifdef CONFIG_X86_ESPFIX64 | 100 | #ifdef CONFIG_X86_ESPFIX64 |
@@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u) | |||
231 | * print what we collected so far. | 236 | * print what we collected so far. |
232 | */ | 237 | */ |
233 | static void note_page(struct seq_file *m, struct pg_state *st, | 238 | static void note_page(struct seq_file *m, struct pg_state *st, |
234 | pgprot_t new_prot, int level) | 239 | pgprot_t new_prot, pgprotval_t new_eff, int level) |
235 | { | 240 | { |
236 | pgprotval_t prot, cur; | 241 | pgprotval_t prot, cur, eff; |
237 | static const char units[] = "BKMGTPE"; | 242 | static const char units[] = "BKMGTPE"; |
238 | 243 | ||
239 | /* | 244 | /* |
@@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
243 | */ | 248 | */ |
244 | prot = pgprot_val(new_prot); | 249 | prot = pgprot_val(new_prot); |
245 | cur = pgprot_val(st->current_prot); | 250 | cur = pgprot_val(st->current_prot); |
251 | eff = st->effective_prot; | ||
246 | 252 | ||
247 | if (!st->level) { | 253 | if (!st->level) { |
248 | /* First entry */ | 254 | /* First entry */ |
249 | st->current_prot = new_prot; | 255 | st->current_prot = new_prot; |
256 | st->effective_prot = new_eff; | ||
250 | st->level = level; | 257 | st->level = level; |
251 | st->marker = address_markers; | 258 | st->marker = address_markers; |
252 | st->lines = 0; | 259 | st->lines = 0; |
253 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", | 260 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
254 | st->marker->name); | 261 | st->marker->name); |
255 | } else if (prot != cur || level != st->level || | 262 | } else if (prot != cur || new_eff != eff || level != st->level || |
256 | st->current_address >= st->marker[1].start_address) { | 263 | st->current_address >= st->marker[1].start_address) { |
257 | const char *unit = units; | 264 | const char *unit = units; |
258 | unsigned long delta; | 265 | unsigned long delta; |
259 | int width = sizeof(unsigned long) * 2; | 266 | int width = sizeof(unsigned long) * 2; |
260 | pgprotval_t pr = pgprot_val(st->current_prot); | ||
261 | 267 | ||
262 | if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { | 268 | if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) { |
263 | WARN_ONCE(1, | 269 | WARN_ONCE(1, |
264 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", | 270 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", |
265 | (void *)st->start_address, | 271 | (void *)st->start_address, |
@@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
313 | 319 | ||
314 | st->start_address = st->current_address; | 320 | st->start_address = st->current_address; |
315 | st->current_prot = new_prot; | 321 | st->current_prot = new_prot; |
322 | st->effective_prot = new_eff; | ||
316 | st->level = level; | 323 | st->level = level; |
317 | } | 324 | } |
318 | } | 325 | } |
319 | 326 | ||
320 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) | 327 | static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) |
328 | { | ||
329 | return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | | ||
330 | ((prot1 | prot2) & _PAGE_NX); | ||
331 | } | ||
332 | |||
333 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, | ||
334 | pgprotval_t eff_in, unsigned long P) | ||
321 | { | 335 | { |
322 | int i; | 336 | int i; |
323 | pte_t *start; | 337 | pte_t *start; |
324 | pgprotval_t prot; | 338 | pgprotval_t prot, eff; |
325 | 339 | ||
326 | start = (pte_t *)pmd_page_vaddr(addr); | 340 | start = (pte_t *)pmd_page_vaddr(addr); |
327 | for (i = 0; i < PTRS_PER_PTE; i++) { | 341 | for (i = 0; i < PTRS_PER_PTE; i++) { |
328 | prot = pte_flags(*start); | 342 | prot = pte_flags(*start); |
343 | eff = effective_prot(eff_in, prot); | ||
329 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); | 344 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
330 | note_page(m, st, __pgprot(prot), 5); | 345 | note_page(m, st, __pgprot(prot), eff, 5); |
331 | start++; | 346 | start++; |
332 | } | 347 | } |
333 | } | 348 | } |
@@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |||
344 | void *pt) | 359 | void *pt) |
345 | { | 360 | { |
346 | if (__pa(pt) == __pa(kasan_zero_pmd) || | 361 | if (__pa(pt) == __pa(kasan_zero_pmd) || |
347 | #ifdef CONFIG_X86_5LEVEL | 362 | (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) || |
348 | __pa(pt) == __pa(kasan_zero_p4d) || | ||
349 | #endif | ||
350 | __pa(pt) == __pa(kasan_zero_pud)) { | 363 | __pa(pt) == __pa(kasan_zero_pud)) { |
351 | pgprotval_t prot = pte_flags(kasan_zero_pte[0]); | 364 | pgprotval_t prot = pte_flags(kasan_zero_pte[0]); |
352 | note_page(m, st, __pgprot(prot), 5); | 365 | note_page(m, st, __pgprot(prot), 0, 5); |
353 | return true; | 366 | return true; |
354 | } | 367 | } |
355 | return false; | 368 | return false; |
@@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |||
364 | 377 | ||
365 | #if PTRS_PER_PMD > 1 | 378 | #if PTRS_PER_PMD > 1 |
366 | 379 | ||
367 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) | 380 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
381 | pgprotval_t eff_in, unsigned long P) | ||
368 | { | 382 | { |
369 | int i; | 383 | int i; |
370 | pmd_t *start, *pmd_start; | 384 | pmd_t *start, *pmd_start; |
371 | pgprotval_t prot; | 385 | pgprotval_t prot, eff; |
372 | 386 | ||
373 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); | 387 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); |
374 | for (i = 0; i < PTRS_PER_PMD; i++) { | 388 | for (i = 0; i < PTRS_PER_PMD; i++) { |
375 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); | 389 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
376 | if (!pmd_none(*start)) { | 390 | if (!pmd_none(*start)) { |
391 | prot = pmd_flags(*start); | ||
392 | eff = effective_prot(eff_in, prot); | ||
377 | if (pmd_large(*start) || !pmd_present(*start)) { | 393 | if (pmd_large(*start) || !pmd_present(*start)) { |
378 | prot = pmd_flags(*start); | 394 | note_page(m, st, __pgprot(prot), eff, 4); |
379 | note_page(m, st, __pgprot(prot), 4); | ||
380 | } else if (!kasan_page_table(m, st, pmd_start)) { | 395 | } else if (!kasan_page_table(m, st, pmd_start)) { |
381 | walk_pte_level(m, st, *start, | 396 | walk_pte_level(m, st, *start, eff, |
382 | P + i * PMD_LEVEL_MULT); | 397 | P + i * PMD_LEVEL_MULT); |
383 | } | 398 | } |
384 | } else | 399 | } else |
385 | note_page(m, st, __pgprot(0), 4); | 400 | note_page(m, st, __pgprot(0), 0, 4); |
386 | start++; | 401 | start++; |
387 | } | 402 | } |
388 | } | 403 | } |
389 | 404 | ||
390 | #else | 405 | #else |
391 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | 406 | #define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) |
392 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | 407 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) |
393 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | 408 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) |
394 | #endif | 409 | #endif |
395 | 410 | ||
396 | #if PTRS_PER_PUD > 1 | 411 | #if PTRS_PER_PUD > 1 |
397 | 412 | ||
398 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) | 413 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, |
414 | pgprotval_t eff_in, unsigned long P) | ||
399 | { | 415 | { |
400 | int i; | 416 | int i; |
401 | pud_t *start, *pud_start; | 417 | pud_t *start, *pud_start; |
402 | pgprotval_t prot; | 418 | pgprotval_t prot, eff; |
403 | pud_t *prev_pud = NULL; | 419 | pud_t *prev_pud = NULL; |
404 | 420 | ||
405 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); | 421 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); |
@@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, | |||
407 | for (i = 0; i < PTRS_PER_PUD; i++) { | 423 | for (i = 0; i < PTRS_PER_PUD; i++) { |
408 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); | 424 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
409 | if (!pud_none(*start)) { | 425 | if (!pud_none(*start)) { |
426 | prot = pud_flags(*start); | ||
427 | eff = effective_prot(eff_in, prot); | ||
410 | if (pud_large(*start) || !pud_present(*start)) { | 428 | if (pud_large(*start) || !pud_present(*start)) { |
411 | prot = pud_flags(*start); | 429 | note_page(m, st, __pgprot(prot), eff, 3); |
412 | note_page(m, st, __pgprot(prot), 3); | ||
413 | } else if (!kasan_page_table(m, st, pud_start)) { | 430 | } else if (!kasan_page_table(m, st, pud_start)) { |
414 | walk_pmd_level(m, st, *start, | 431 | walk_pmd_level(m, st, *start, eff, |
415 | P + i * PUD_LEVEL_MULT); | 432 | P + i * PUD_LEVEL_MULT); |
416 | } | 433 | } |
417 | } else | 434 | } else |
418 | note_page(m, st, __pgprot(0), 3); | 435 | note_page(m, st, __pgprot(0), 0, 3); |
419 | 436 | ||
420 | prev_pud = start; | 437 | prev_pud = start; |
421 | start++; | 438 | start++; |
@@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, | |||
423 | } | 440 | } |
424 | 441 | ||
425 | #else | 442 | #else |
426 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p) | 443 | #define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) |
427 | #define p4d_large(a) pud_large(__pud(p4d_val(a))) | 444 | #define p4d_large(a) pud_large(__pud(p4d_val(a))) |
428 | #define p4d_none(a) pud_none(__pud(p4d_val(a))) | 445 | #define p4d_none(a) pud_none(__pud(p4d_val(a))) |
429 | #endif | 446 | #endif |
430 | 447 | ||
431 | #if PTRS_PER_P4D > 1 | 448 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, |
432 | 449 | pgprotval_t eff_in, unsigned long P) | |
433 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) | ||
434 | { | 450 | { |
435 | int i; | 451 | int i; |
436 | p4d_t *start, *p4d_start; | 452 | p4d_t *start, *p4d_start; |
437 | pgprotval_t prot; | 453 | pgprotval_t prot, eff; |
454 | |||
455 | if (PTRS_PER_P4D == 1) | ||
456 | return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); | ||
438 | 457 | ||
439 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); | 458 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); |
440 | 459 | ||
441 | for (i = 0; i < PTRS_PER_P4D; i++) { | 460 | for (i = 0; i < PTRS_PER_P4D; i++) { |
442 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); | 461 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); |
443 | if (!p4d_none(*start)) { | 462 | if (!p4d_none(*start)) { |
463 | prot = p4d_flags(*start); | ||
464 | eff = effective_prot(eff_in, prot); | ||
444 | if (p4d_large(*start) || !p4d_present(*start)) { | 465 | if (p4d_large(*start) || !p4d_present(*start)) { |
445 | prot = p4d_flags(*start); | 466 | note_page(m, st, __pgprot(prot), eff, 2); |
446 | note_page(m, st, __pgprot(prot), 2); | ||
447 | } else if (!kasan_page_table(m, st, p4d_start)) { | 467 | } else if (!kasan_page_table(m, st, p4d_start)) { |
448 | walk_pud_level(m, st, *start, | 468 | walk_pud_level(m, st, *start, eff, |
449 | P + i * P4D_LEVEL_MULT); | 469 | P + i * P4D_LEVEL_MULT); |
450 | } | 470 | } |
451 | } else | 471 | } else |
452 | note_page(m, st, __pgprot(0), 2); | 472 | note_page(m, st, __pgprot(0), 0, 2); |
453 | 473 | ||
454 | start++; | 474 | start++; |
455 | } | 475 | } |
456 | } | 476 | } |
457 | 477 | ||
458 | #else | 478 | #define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) |
459 | #define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) | 479 | #define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) |
460 | #define pgd_large(a) p4d_large(__p4d(pgd_val(a))) | ||
461 | #define pgd_none(a) p4d_none(__p4d(pgd_val(a))) | ||
462 | #endif | ||
463 | 480 | ||
464 | static inline bool is_hypervisor_range(int idx) | 481 | static inline bool is_hypervisor_range(int idx) |
465 | { | 482 | { |
@@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
483 | #else | 500 | #else |
484 | pgd_t *start = swapper_pg_dir; | 501 | pgd_t *start = swapper_pg_dir; |
485 | #endif | 502 | #endif |
486 | pgprotval_t prot; | 503 | pgprotval_t prot, eff; |
487 | int i; | 504 | int i; |
488 | struct pg_state st = {}; | 505 | struct pg_state st = {}; |
489 | 506 | ||
@@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
499 | for (i = 0; i < PTRS_PER_PGD; i++) { | 516 | for (i = 0; i < PTRS_PER_PGD; i++) { |
500 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 517 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
501 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { | 518 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
519 | prot = pgd_flags(*start); | ||
520 | #ifdef CONFIG_X86_PAE | ||
521 | eff = _PAGE_USER | _PAGE_RW; | ||
522 | #else | ||
523 | eff = prot; | ||
524 | #endif | ||
502 | if (pgd_large(*start) || !pgd_present(*start)) { | 525 | if (pgd_large(*start) || !pgd_present(*start)) { |
503 | prot = pgd_flags(*start); | 526 | note_page(m, &st, __pgprot(prot), eff, 1); |
504 | note_page(m, &st, __pgprot(prot), 1); | ||
505 | } else { | 527 | } else { |
506 | walk_p4d_level(m, &st, *start, | 528 | walk_p4d_level(m, &st, *start, eff, |
507 | i * PGD_LEVEL_MULT); | 529 | i * PGD_LEVEL_MULT); |
508 | } | 530 | } |
509 | } else | 531 | } else |
510 | note_page(m, &st, __pgprot(0), 1); | 532 | note_page(m, &st, __pgprot(0), 0, 1); |
511 | 533 | ||
512 | cond_resched(); | 534 | cond_resched(); |
513 | start++; | 535 | start++; |
@@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
515 | 537 | ||
516 | /* Flush out the last page */ | 538 | /* Flush out the last page */ |
517 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | 539 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); |
518 | note_page(m, &st, __pgprot(0), 0); | 540 | note_page(m, &st, __pgprot(0), 0, 0); |
519 | if (!checkwx) | 541 | if (!checkwx) |
520 | return; | 542 | return; |
521 | if (st.wx_pages) | 543 | if (st.wx_pages) |
@@ -570,6 +592,13 @@ static int __init pt_dump_init(void) | |||
570 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; | 592 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; |
571 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | 593 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
572 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; | 594 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; |
595 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
596 | address_markers[LDT_NR].start_address = LDT_BASE_ADDR; | ||
597 | #endif | ||
598 | #ifdef CONFIG_KASAN | ||
599 | address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; | ||
600 | address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; | ||
601 | #endif | ||
573 | #endif | 602 | #endif |
574 | #ifdef CONFIG_X86_32 | 603 | #ifdef CONFIG_X86_32 |
575 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | 604 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f75ea0748b9f..73bd8c95ac71 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -417,11 +417,11 @@ void vmalloc_sync_all(void) | |||
417 | */ | 417 | */ |
418 | static noinline int vmalloc_fault(unsigned long address) | 418 | static noinline int vmalloc_fault(unsigned long address) |
419 | { | 419 | { |
420 | pgd_t *pgd, *pgd_ref; | 420 | pgd_t *pgd, *pgd_k; |
421 | p4d_t *p4d, *p4d_ref; | 421 | p4d_t *p4d, *p4d_k; |
422 | pud_t *pud, *pud_ref; | 422 | pud_t *pud; |
423 | pmd_t *pmd, *pmd_ref; | 423 | pmd_t *pmd; |
424 | pte_t *pte, *pte_ref; | 424 | pte_t *pte; |
425 | 425 | ||
426 | /* Make sure we are in vmalloc area: */ | 426 | /* Make sure we are in vmalloc area: */ |
427 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 427 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address) | |||
435 | * case just flush: | 435 | * case just flush: |
436 | */ | 436 | */ |
437 | pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); | 437 | pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); |
438 | pgd_ref = pgd_offset_k(address); | 438 | pgd_k = pgd_offset_k(address); |
439 | if (pgd_none(*pgd_ref)) | 439 | if (pgd_none(*pgd_k)) |
440 | return -1; | 440 | return -1; |
441 | 441 | ||
442 | if (CONFIG_PGTABLE_LEVELS > 4) { | 442 | if (pgtable_l5_enabled) { |
443 | if (pgd_none(*pgd)) { | 443 | if (pgd_none(*pgd)) { |
444 | set_pgd(pgd, *pgd_ref); | 444 | set_pgd(pgd, *pgd_k); |
445 | arch_flush_lazy_mmu_mode(); | 445 | arch_flush_lazy_mmu_mode(); |
446 | } else { | 446 | } else { |
447 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | 447 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k)); |
448 | } | 448 | } |
449 | } | 449 | } |
450 | 450 | ||
451 | /* With 4-level paging, copying happens on the p4d level. */ | 451 | /* With 4-level paging, copying happens on the p4d level. */ |
452 | p4d = p4d_offset(pgd, address); | 452 | p4d = p4d_offset(pgd, address); |
453 | p4d_ref = p4d_offset(pgd_ref, address); | 453 | p4d_k = p4d_offset(pgd_k, address); |
454 | if (p4d_none(*p4d_ref)) | 454 | if (p4d_none(*p4d_k)) |
455 | return -1; | 455 | return -1; |
456 | 456 | ||
457 | if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { | 457 | if (p4d_none(*p4d) && !pgtable_l5_enabled) { |
458 | set_p4d(p4d, *p4d_ref); | 458 | set_p4d(p4d, *p4d_k); |
459 | arch_flush_lazy_mmu_mode(); | 459 | arch_flush_lazy_mmu_mode(); |
460 | } else { | 460 | } else { |
461 | BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref)); | 461 | BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k)); |
462 | } | 462 | } |
463 | 463 | ||
464 | /* | ||
465 | * Below here mismatches are bugs because these lower tables | ||
466 | * are shared: | ||
467 | */ | ||
468 | BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); | 464 | BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); |
469 | 465 | ||
470 | pud = pud_offset(p4d, address); | 466 | pud = pud_offset(p4d, address); |
471 | pud_ref = pud_offset(p4d_ref, address); | 467 | if (pud_none(*pud)) |
472 | if (pud_none(*pud_ref)) | ||
473 | return -1; | 468 | return -1; |
474 | 469 | ||
475 | if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) | ||
476 | BUG(); | ||
477 | |||
478 | if (pud_large(*pud)) | 470 | if (pud_large(*pud)) |
479 | return 0; | 471 | return 0; |
480 | 472 | ||
481 | pmd = pmd_offset(pud, address); | 473 | pmd = pmd_offset(pud, address); |
482 | pmd_ref = pmd_offset(pud_ref, address); | 474 | if (pmd_none(*pmd)) |
483 | if (pmd_none(*pmd_ref)) | ||
484 | return -1; | 475 | return -1; |
485 | 476 | ||
486 | if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) | ||
487 | BUG(); | ||
488 | |||
489 | if (pmd_large(*pmd)) | 477 | if (pmd_large(*pmd)) |
490 | return 0; | 478 | return 0; |
491 | 479 | ||
492 | pte_ref = pte_offset_kernel(pmd_ref, address); | ||
493 | if (!pte_present(*pte_ref)) | ||
494 | return -1; | ||
495 | |||
496 | pte = pte_offset_kernel(pmd, address); | 480 | pte = pte_offset_kernel(pmd, address); |
497 | 481 | if (!pte_present(*pte)) | |
498 | /* | 482 | return -1; |
499 | * Don't use pte_page here, because the mappings can point | ||
500 | * outside mem_map, and the NUMA hash lookup cannot handle | ||
501 | * that: | ||
502 | */ | ||
503 | if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | ||
504 | BUG(); | ||
505 | 483 | ||
506 | return 0; | 484 | return 0; |
507 | } | 485 | } |
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index ab33a32df2a8..9aa22be8331e 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c | |||
@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | |||
120 | result = ident_p4d_init(info, p4d, addr, next); | 120 | result = ident_p4d_init(info, p4d, addr, next); |
121 | if (result) | 121 | if (result) |
122 | return result; | 122 | return result; |
123 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 123 | if (pgtable_l5_enabled) { |
124 | set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); | 124 | set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); |
125 | } else { | 125 | } else { |
126 | /* | 126 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af11a2890235..45241de66785 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str) | |||
88 | } | 88 | } |
89 | __setup("noexec32=", nonx32_setup); | 89 | __setup("noexec32=", nonx32_setup); |
90 | 90 | ||
91 | /* | 91 | static void sync_global_pgds_l5(unsigned long start, unsigned long end) |
92 | * When memory was added make sure all the processes MM have | ||
93 | * suitable PGD entries in the local PGD level page. | ||
94 | */ | ||
95 | #ifdef CONFIG_X86_5LEVEL | ||
96 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
97 | { | 92 | { |
98 | unsigned long addr; | 93 | unsigned long addr; |
99 | 94 | ||
@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
129 | spin_unlock(&pgd_lock); | 124 | spin_unlock(&pgd_lock); |
130 | } | 125 | } |
131 | } | 126 | } |
132 | #else | 127 | |
133 | void sync_global_pgds(unsigned long start, unsigned long end) | 128 | static void sync_global_pgds_l4(unsigned long start, unsigned long end) |
134 | { | 129 | { |
135 | unsigned long addr; | 130 | unsigned long addr; |
136 | 131 | ||
@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
143 | * With folded p4d, pgd_none() is always false, we need to | 138 | * With folded p4d, pgd_none() is always false, we need to |
144 | * handle synchonization on p4d level. | 139 | * handle synchonization on p4d level. |
145 | */ | 140 | */ |
146 | BUILD_BUG_ON(pgd_none(*pgd_ref)); | 141 | MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); |
147 | p4d_ref = p4d_offset(pgd_ref, addr); | 142 | p4d_ref = p4d_offset(pgd_ref, addr); |
148 | 143 | ||
149 | if (p4d_none(*p4d_ref)) | 144 | if (p4d_none(*p4d_ref)) |
@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
173 | spin_unlock(&pgd_lock); | 168 | spin_unlock(&pgd_lock); |
174 | } | 169 | } |
175 | } | 170 | } |
176 | #endif | 171 | |
172 | /* | ||
173 | * When memory was added make sure all the processes MM have | ||
174 | * suitable PGD entries in the local PGD level page. | ||
175 | */ | ||
176 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
177 | { | ||
178 | if (pgtable_l5_enabled) | ||
179 | sync_global_pgds_l5(start, end); | ||
180 | else | ||
181 | sync_global_pgds_l4(start, end); | ||
182 | } | ||
177 | 183 | ||
178 | /* | 184 | /* |
179 | * NOTE: This function is marked __ref because it calls __init function | 185 | * NOTE: This function is marked __ref because it calls __init function |
@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, | |||
632 | unsigned long vaddr = (unsigned long)__va(paddr); | 638 | unsigned long vaddr = (unsigned long)__va(paddr); |
633 | int i = p4d_index(vaddr); | 639 | int i = p4d_index(vaddr); |
634 | 640 | ||
635 | if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 641 | if (!pgtable_l5_enabled) |
636 | return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); | 642 | return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); |
637 | 643 | ||
638 | for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { | 644 | for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { |
@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, | |||
712 | page_size_mask); | 718 | page_size_mask); |
713 | 719 | ||
714 | spin_lock(&init_mm.page_table_lock); | 720 | spin_lock(&init_mm.page_table_lock); |
715 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 721 | if (pgtable_l5_enabled) |
716 | pgd_populate(&init_mm, pgd, p4d); | 722 | pgd_populate(&init_mm, pgd, p4d); |
717 | else | 723 | else |
718 | p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); | 724 | p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); |
@@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, | |||
1089 | * 5-level case we should free them. This code will have to change | 1095 | * 5-level case we should free them. This code will have to change |
1090 | * to adapt for boot-time switching between 4 and 5 level page tables. | 1096 | * to adapt for boot-time switching between 4 and 5 level page tables. |
1091 | */ | 1097 | */ |
1092 | if (CONFIG_PGTABLE_LEVELS == 5) | 1098 | if (pgtable_l5_enabled) |
1093 | free_pud_table(pud_base, p4d); | 1099 | free_pud_table(pud_base, p4d); |
1094 | } | 1100 | } |
1095 | 1101 | ||
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index af6f2f9c6a26..d8ff013ea9d0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -1,6 +1,12 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define DISABLE_BRANCH_PROFILING | 2 | #define DISABLE_BRANCH_PROFILING |
3 | #define pr_fmt(fmt) "kasan: " fmt | 3 | #define pr_fmt(fmt) "kasan: " fmt |
4 | |||
5 | #ifdef CONFIG_X86_5LEVEL | ||
6 | /* Too early to use cpu_feature_enabled() */ | ||
7 | #define pgtable_l5_enabled __pgtable_l5_enabled | ||
8 | #endif | ||
9 | |||
4 | #include <linux/bootmem.h> | 10 | #include <linux/bootmem.h> |
5 | #include <linux/kasan.h> | 11 | #include <linux/kasan.h> |
6 | #include <linux/kdebug.h> | 12 | #include <linux/kdebug.h> |
@@ -19,7 +25,7 @@ | |||
19 | 25 | ||
20 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; | 26 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; |
21 | 27 | ||
22 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); | 28 | static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
23 | 29 | ||
24 | static __init void *early_alloc(size_t size, int nid, bool panic) | 30 | static __init void *early_alloc(size_t size, int nid, bool panic) |
25 | { | 31 | { |
@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start, | |||
176 | * With folded p4d, pgd_clear() is nop, use p4d_clear() | 182 | * With folded p4d, pgd_clear() is nop, use p4d_clear() |
177 | * instead. | 183 | * instead. |
178 | */ | 184 | */ |
179 | if (CONFIG_PGTABLE_LEVELS < 5) | 185 | if (pgtable_l5_enabled) |
180 | p4d_clear(p4d_offset(pgd, start)); | ||
181 | else | ||
182 | pgd_clear(pgd); | 186 | pgd_clear(pgd); |
187 | else | ||
188 | p4d_clear(p4d_offset(pgd, start)); | ||
183 | } | 189 | } |
184 | 190 | ||
185 | pgd = pgd_offset_k(start); | 191 | pgd = pgd_offset_k(start); |
@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) | |||
191 | { | 197 | { |
192 | unsigned long p4d; | 198 | unsigned long p4d; |
193 | 199 | ||
194 | if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 200 | if (!pgtable_l5_enabled) |
195 | return (p4d_t *)pgd; | 201 | return (p4d_t *)pgd; |
196 | 202 | ||
197 | p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; | 203 | p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; |
@@ -272,7 +278,7 @@ void __init kasan_early_init(void) | |||
272 | for (i = 0; i < PTRS_PER_PUD; i++) | 278 | for (i = 0; i < PTRS_PER_PUD; i++) |
273 | kasan_zero_pud[i] = __pud(pud_val); | 279 | kasan_zero_pud[i] = __pud(pud_val); |
274 | 280 | ||
275 | for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) | 281 | for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++) |
276 | kasan_zero_p4d[i] = __p4d(p4d_val); | 282 | kasan_zero_p4d[i] = __p4d(p4d_val); |
277 | 283 | ||
278 | kasan_map_early_shadow(early_top_pgt); | 284 | kasan_map_early_shadow(early_top_pgt); |
@@ -303,7 +309,7 @@ void __init kasan_init(void) | |||
303 | * bunch of things like kernel code, modules, EFI mapping, etc. | 309 | * bunch of things like kernel code, modules, EFI mapping, etc. |
304 | * We need to take extra steps to not overwrite them. | 310 | * We need to take extra steps to not overwrite them. |
305 | */ | 311 | */ |
306 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 312 | if (pgtable_l5_enabled) { |
307 | void *ptr; | 313 | void *ptr; |
308 | 314 | ||
309 | ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); | 315 | ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); |
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aedebd2ebf1e..615cc03ced84 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c | |||
@@ -34,23 +34,12 @@ | |||
34 | #define TB_SHIFT 40 | 34 | #define TB_SHIFT 40 |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Virtual address start and end range for randomization. | ||
38 | * | ||
39 | * The end address could depend on more configuration options to make the | 37 | * The end address could depend on more configuration options to make the |
40 | * highest amount of space for randomization available, but that's too hard | 38 | * highest amount of space for randomization available, but that's too hard |
41 | * to keep straight and caused issues already. | 39 | * to keep straight and caused issues already. |
42 | */ | 40 | */ |
43 | static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; | ||
44 | static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; | 41 | static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; |
45 | 42 | ||
46 | /* Default values */ | ||
47 | unsigned long page_offset_base = __PAGE_OFFSET_BASE; | ||
48 | EXPORT_SYMBOL(page_offset_base); | ||
49 | unsigned long vmalloc_base = __VMALLOC_BASE; | ||
50 | EXPORT_SYMBOL(vmalloc_base); | ||
51 | unsigned long vmemmap_base = __VMEMMAP_BASE; | ||
52 | EXPORT_SYMBOL(vmemmap_base); | ||
53 | |||
54 | /* | 43 | /* |
55 | * Memory regions randomized by KASLR (except modules that use a separate logic | 44 | * Memory regions randomized by KASLR (except modules that use a separate logic |
56 | * earlier during boot). The list is ordered based on virtual addresses. This | 45 | * earlier during boot). The list is ordered based on virtual addresses. This |
@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region { | |||
60 | unsigned long *base; | 49 | unsigned long *base; |
61 | unsigned long size_tb; | 50 | unsigned long size_tb; |
62 | } kaslr_regions[] = { | 51 | } kaslr_regions[] = { |
63 | { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, | 52 | { &page_offset_base, 0 }, |
64 | { &vmalloc_base, VMALLOC_SIZE_TB }, | 53 | { &vmalloc_base, 0 }, |
65 | { &vmemmap_base, 1 }, | 54 | { &vmemmap_base, 1 }, |
66 | }; | 55 | }; |
67 | 56 | ||
@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void) | |||
84 | void __init kernel_randomize_memory(void) | 73 | void __init kernel_randomize_memory(void) |
85 | { | 74 | { |
86 | size_t i; | 75 | size_t i; |
87 | unsigned long vaddr = vaddr_start; | 76 | unsigned long vaddr_start, vaddr; |
88 | unsigned long rand, memory_tb; | 77 | unsigned long rand, memory_tb; |
89 | struct rnd_state rand_state; | 78 | struct rnd_state rand_state; |
90 | unsigned long remain_entropy; | 79 | unsigned long remain_entropy; |
91 | 80 | ||
81 | vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; | ||
82 | vaddr = vaddr_start; | ||
83 | |||
92 | /* | 84 | /* |
93 | * These BUILD_BUG_ON checks ensure the memory layout is consistent | 85 | * These BUILD_BUG_ON checks ensure the memory layout is consistent |
94 | * with the vaddr_start/vaddr_end variables. These checks are very | 86 | * with the vaddr_start/vaddr_end variables. These checks are very |
@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void) | |||
101 | if (!kaslr_memory_enabled()) | 93 | if (!kaslr_memory_enabled()) |
102 | return; | 94 | return; |
103 | 95 | ||
96 | kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); | ||
97 | kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; | ||
98 | |||
104 | /* | 99 | /* |
105 | * Update Physical memory mapping to available and | 100 | * Update Physical memory mapping to available and |
106 | * add padding if needed (especially for memory hotplug support). | 101 | * add padding if needed (especially for memory hotplug support). |
@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void) | |||
129 | */ | 124 | */ |
130 | entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); | 125 | entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); |
131 | prandom_bytes_state(&rand_state, &rand, sizeof(rand)); | 126 | prandom_bytes_state(&rand_state, &rand, sizeof(rand)); |
132 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 127 | if (pgtable_l5_enabled) |
133 | entropy = (rand % (entropy + 1)) & P4D_MASK; | 128 | entropy = (rand % (entropy + 1)) & P4D_MASK; |
134 | else | 129 | else |
135 | entropy = (rand % (entropy + 1)) & PUD_MASK; | 130 | entropy = (rand % (entropy + 1)) & PUD_MASK; |
@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void) | |||
141 | * randomization alignment. | 136 | * randomization alignment. |
142 | */ | 137 | */ |
143 | vaddr += get_padding(&kaslr_regions[i]); | 138 | vaddr += get_padding(&kaslr_regions[i]); |
144 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 139 | if (pgtable_l5_enabled) |
145 | vaddr = round_up(vaddr + 1, P4D_SIZE); | 140 | vaddr = round_up(vaddr + 1, P4D_SIZE); |
146 | else | 141 | else |
147 | vaddr = round_up(vaddr + 1, PUD_SIZE); | 142 | vaddr = round_up(vaddr + 1, PUD_SIZE); |
@@ -217,7 +212,7 @@ void __meminit init_trampoline(void) | |||
217 | return; | 212 | return; |
218 | } | 213 | } |
219 | 214 | ||
220 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 215 | if (pgtable_l5_enabled) |
221 | init_trampoline_p4d(); | 216 | init_trampoline_p4d(); |
222 | else | 217 | else |
223 | init_trampoline_pud(); | 218 | init_trampoline_pud(); |
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 1a53071e2e17..3a1b5fe4c2ca 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c | |||
@@ -25,17 +25,12 @@ | |||
25 | #include <asm/bootparam.h> | 25 | #include <asm/bootparam.h> |
26 | #include <asm/set_memory.h> | 26 | #include <asm/set_memory.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/sections.h> | ||
29 | #include <asm/processor-flags.h> | 28 | #include <asm/processor-flags.h> |
30 | #include <asm/msr.h> | 29 | #include <asm/msr.h> |
31 | #include <asm/cmdline.h> | 30 | #include <asm/cmdline.h> |
32 | 31 | ||
33 | #include "mm_internal.h" | 32 | #include "mm_internal.h" |
34 | 33 | ||
35 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | ||
36 | static char sme_cmdline_on[] __initdata = "on"; | ||
37 | static char sme_cmdline_off[] __initdata = "off"; | ||
38 | |||
39 | /* | 34 | /* |
40 | * Since SME related variables are set early in the boot process they must | 35 | * Since SME related variables are set early in the boot process they must |
41 | * reside in the .data section so as not to be zeroed out when the .bss | 36 | * reside in the .data section so as not to be zeroed out when the .bss |
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask); | |||
46 | DEFINE_STATIC_KEY_FALSE(sev_enable_key); | 41 | DEFINE_STATIC_KEY_FALSE(sev_enable_key); |
47 | EXPORT_SYMBOL_GPL(sev_enable_key); | 42 | EXPORT_SYMBOL_GPL(sev_enable_key); |
48 | 43 | ||
49 | static bool sev_enabled __section(.data); | 44 | bool sev_enabled __section(.data); |
50 | 45 | ||
51 | /* Buffer used for early in-place encryption by BSP, no locking needed */ | 46 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
52 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); | 47 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); |
@@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) | |||
463 | /* Make the SWIOTLB buffer area decrypted */ | 458 | /* Make the SWIOTLB buffer area decrypted */ |
464 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); | 459 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); |
465 | } | 460 | } |
466 | |||
467 | struct sme_populate_pgd_data { | ||
468 | void *pgtable_area; | ||
469 | pgd_t *pgd; | ||
470 | |||
471 | pmdval_t pmd_flags; | ||
472 | pteval_t pte_flags; | ||
473 | unsigned long paddr; | ||
474 | |||
475 | unsigned long vaddr; | ||
476 | unsigned long vaddr_end; | ||
477 | }; | ||
478 | |||
479 | static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
480 | { | ||
481 | unsigned long pgd_start, pgd_end, pgd_size; | ||
482 | pgd_t *pgd_p; | ||
483 | |||
484 | pgd_start = ppd->vaddr & PGDIR_MASK; | ||
485 | pgd_end = ppd->vaddr_end & PGDIR_MASK; | ||
486 | |||
487 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); | ||
488 | |||
489 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
490 | |||
491 | memset(pgd_p, 0, pgd_size); | ||
492 | } | ||
493 | |||
494 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | ||
495 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | ||
496 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | ||
497 | #define PMD_FLAGS _KERNPG_TABLE_NOENC | ||
498 | |||
499 | #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
500 | |||
501 | #define PMD_FLAGS_DEC PMD_FLAGS_LARGE | ||
502 | #define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
503 | (_PAGE_PAT | _PAGE_PWT)) | ||
504 | |||
505 | #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) | ||
506 | |||
507 | #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) | ||
508 | |||
509 | #define PTE_FLAGS_DEC PTE_FLAGS | ||
510 | #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
511 | (_PAGE_PAT | _PAGE_PWT)) | ||
512 | |||
513 | #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) | ||
514 | |||
515 | static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
516 | { | ||
517 | pgd_t *pgd_p; | ||
518 | p4d_t *p4d_p; | ||
519 | pud_t *pud_p; | ||
520 | pmd_t *pmd_p; | ||
521 | |||
522 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
523 | if (native_pgd_val(*pgd_p)) { | ||
524 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | ||
525 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
526 | else | ||
527 | pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
528 | } else { | ||
529 | pgd_t pgd; | ||
530 | |||
531 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
532 | p4d_p = ppd->pgtable_area; | ||
533 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); | ||
534 | ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; | ||
535 | |||
536 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); | ||
537 | } else { | ||
538 | pud_p = ppd->pgtable_area; | ||
539 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
540 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
541 | |||
542 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); | ||
543 | } | ||
544 | native_set_pgd(pgd_p, pgd); | ||
545 | } | ||
546 | |||
547 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
548 | p4d_p += p4d_index(ppd->vaddr); | ||
549 | if (native_p4d_val(*p4d_p)) { | ||
550 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); | ||
551 | } else { | ||
552 | p4d_t p4d; | ||
553 | |||
554 | pud_p = ppd->pgtable_area; | ||
555 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
556 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
557 | |||
558 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); | ||
559 | native_set_p4d(p4d_p, p4d); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | pud_p += pud_index(ppd->vaddr); | ||
564 | if (native_pud_val(*pud_p)) { | ||
565 | if (native_pud_val(*pud_p) & _PAGE_PSE) | ||
566 | return NULL; | ||
567 | |||
568 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); | ||
569 | } else { | ||
570 | pud_t pud; | ||
571 | |||
572 | pmd_p = ppd->pgtable_area; | ||
573 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | ||
574 | ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; | ||
575 | |||
576 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); | ||
577 | native_set_pud(pud_p, pud); | ||
578 | } | ||
579 | |||
580 | return pmd_p; | ||
581 | } | ||
582 | |||
583 | static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
584 | { | ||
585 | pmd_t *pmd_p; | ||
586 | |||
587 | pmd_p = sme_prepare_pgd(ppd); | ||
588 | if (!pmd_p) | ||
589 | return; | ||
590 | |||
591 | pmd_p += pmd_index(ppd->vaddr); | ||
592 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) | ||
593 | native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); | ||
594 | } | ||
595 | |||
596 | static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
597 | { | ||
598 | pmd_t *pmd_p; | ||
599 | pte_t *pte_p; | ||
600 | |||
601 | pmd_p = sme_prepare_pgd(ppd); | ||
602 | if (!pmd_p) | ||
603 | return; | ||
604 | |||
605 | pmd_p += pmd_index(ppd->vaddr); | ||
606 | if (native_pmd_val(*pmd_p)) { | ||
607 | if (native_pmd_val(*pmd_p) & _PAGE_PSE) | ||
608 | return; | ||
609 | |||
610 | pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); | ||
611 | } else { | ||
612 | pmd_t pmd; | ||
613 | |||
614 | pte_p = ppd->pgtable_area; | ||
615 | memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); | ||
616 | ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; | ||
617 | |||
618 | pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); | ||
619 | native_set_pmd(pmd_p, pmd); | ||
620 | } | ||
621 | |||
622 | pte_p += pte_index(ppd->vaddr); | ||
623 | if (!native_pte_val(*pte_p)) | ||
624 | native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); | ||
625 | } | ||
626 | |||
627 | static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
628 | { | ||
629 | while (ppd->vaddr < ppd->vaddr_end) { | ||
630 | sme_populate_pgd_large(ppd); | ||
631 | |||
632 | ppd->vaddr += PMD_PAGE_SIZE; | ||
633 | ppd->paddr += PMD_PAGE_SIZE; | ||
634 | } | ||
635 | } | ||
636 | |||
637 | static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
638 | { | ||
639 | while (ppd->vaddr < ppd->vaddr_end) { | ||
640 | sme_populate_pgd(ppd); | ||
641 | |||
642 | ppd->vaddr += PAGE_SIZE; | ||
643 | ppd->paddr += PAGE_SIZE; | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
648 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
649 | { | ||
650 | unsigned long vaddr_end; | ||
651 | |||
652 | ppd->pmd_flags = pmd_flags; | ||
653 | ppd->pte_flags = pte_flags; | ||
654 | |||
655 | /* Save original end value since we modify the struct value */ | ||
656 | vaddr_end = ppd->vaddr_end; | ||
657 | |||
658 | /* If start is not 2MB aligned, create PTE entries */ | ||
659 | ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); | ||
660 | __sme_map_range_pte(ppd); | ||
661 | |||
662 | /* Create PMD entries */ | ||
663 | ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; | ||
664 | __sme_map_range_pmd(ppd); | ||
665 | |||
666 | /* If end is not 2MB aligned, create PTE entries */ | ||
667 | ppd->vaddr_end = vaddr_end; | ||
668 | __sme_map_range_pte(ppd); | ||
669 | } | ||
670 | |||
671 | static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
672 | { | ||
673 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
674 | } | ||
675 | |||
676 | static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
677 | { | ||
678 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
679 | } | ||
680 | |||
681 | static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
682 | { | ||
683 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
684 | } | ||
685 | |||
686 | static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
687 | { | ||
688 | unsigned long p4d_size, pud_size, pmd_size, pte_size; | ||
689 | unsigned long total; | ||
690 | |||
691 | /* | ||
692 | * Perform a relatively simplistic calculation of the pagetable | ||
693 | * entries that are needed. Those mappings will be covered mostly | ||
694 | * by 2MB PMD entries so we can conservatively calculate the required | ||
695 | * number of P4D, PUD and PMD structures needed to perform the | ||
696 | * mappings. For mappings that are not 2MB aligned, PTE mappings | ||
697 | * would be needed for the start and end portion of the address range | ||
698 | * that fall outside of the 2MB alignment. This results in, at most, | ||
699 | * two extra pages to hold PTE entries for each range that is mapped. | ||
700 | * Incrementing the count for each covers the case where the addresses | ||
701 | * cross entries. | ||
702 | */ | ||
703 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
704 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
705 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
706 | pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; | ||
707 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
708 | } else { | ||
709 | p4d_size = 0; | ||
710 | pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
711 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
712 | } | ||
713 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; | ||
714 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
715 | pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; | ||
716 | |||
717 | total = p4d_size + pud_size + pmd_size + pte_size; | ||
718 | |||
719 | /* | ||
720 | * Now calculate the added pagetable structures needed to populate | ||
721 | * the new pagetables. | ||
722 | */ | ||
723 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
724 | p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
725 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
726 | pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; | ||
727 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
728 | } else { | ||
729 | p4d_size = 0; | ||
730 | pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
731 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
732 | } | ||
733 | pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; | ||
734 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
735 | |||
736 | total += p4d_size + pud_size + pmd_size; | ||
737 | |||
738 | return total; | ||
739 | } | ||
740 | |||
741 | void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) | ||
742 | { | ||
743 | unsigned long workarea_start, workarea_end, workarea_len; | ||
744 | unsigned long execute_start, execute_end, execute_len; | ||
745 | unsigned long kernel_start, kernel_end, kernel_len; | ||
746 | unsigned long initrd_start, initrd_end, initrd_len; | ||
747 | struct sme_populate_pgd_data ppd; | ||
748 | unsigned long pgtable_area_len; | ||
749 | unsigned long decrypted_base; | ||
750 | |||
751 | if (!sme_active()) | ||
752 | return; | ||
753 | |||
754 | /* | ||
755 | * Prepare for encrypting the kernel and initrd by building new | ||
756 | * pagetables with the necessary attributes needed to encrypt the | ||
757 | * kernel in place. | ||
758 | * | ||
759 | * One range of virtual addresses will map the memory occupied | ||
760 | * by the kernel and initrd as encrypted. | ||
761 | * | ||
762 | * Another range of virtual addresses will map the memory occupied | ||
763 | * by the kernel and initrd as decrypted and write-protected. | ||
764 | * | ||
765 | * The use of write-protect attribute will prevent any of the | ||
766 | * memory from being cached. | ||
767 | */ | ||
768 | |||
769 | /* Physical addresses gives us the identity mapped virtual addresses */ | ||
770 | kernel_start = __pa_symbol(_text); | ||
771 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | ||
772 | kernel_len = kernel_end - kernel_start; | ||
773 | |||
774 | initrd_start = 0; | ||
775 | initrd_end = 0; | ||
776 | initrd_len = 0; | ||
777 | #ifdef CONFIG_BLK_DEV_INITRD | ||
778 | initrd_len = (unsigned long)bp->hdr.ramdisk_size | | ||
779 | ((unsigned long)bp->ext_ramdisk_size << 32); | ||
780 | if (initrd_len) { | ||
781 | initrd_start = (unsigned long)bp->hdr.ramdisk_image | | ||
782 | ((unsigned long)bp->ext_ramdisk_image << 32); | ||
783 | initrd_end = PAGE_ALIGN(initrd_start + initrd_len); | ||
784 | initrd_len = initrd_end - initrd_start; | ||
785 | } | ||
786 | #endif | ||
787 | |||
788 | /* Set the encryption workarea to be immediately after the kernel */ | ||
789 | workarea_start = kernel_end; | ||
790 | |||
791 | /* | ||
792 | * Calculate required number of workarea bytes needed: | ||
793 | * executable encryption area size: | ||
794 | * stack page (PAGE_SIZE) | ||
795 | * encryption routine page (PAGE_SIZE) | ||
796 | * intermediate copy buffer (PMD_PAGE_SIZE) | ||
797 | * pagetable structures for the encryption of the kernel | ||
798 | * pagetable structures for workarea (in case not currently mapped) | ||
799 | */ | ||
800 | execute_start = workarea_start; | ||
801 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | ||
802 | execute_len = execute_end - execute_start; | ||
803 | |||
804 | /* | ||
805 | * One PGD for both encrypted and decrypted mappings and a set of | ||
806 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | ||
807 | */ | ||
808 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | ||
809 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | ||
810 | if (initrd_len) | ||
811 | pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; | ||
812 | |||
813 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | ||
814 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | ||
815 | |||
816 | /* | ||
817 | * The total workarea includes the executable encryption area and | ||
818 | * the pagetable area. The start of the workarea is already 2MB | ||
819 | * aligned, align the end of the workarea on a 2MB boundary so that | ||
820 | * we don't try to create/allocate PTE entries from the workarea | ||
821 | * before it is mapped. | ||
822 | */ | ||
823 | workarea_len = execute_len + pgtable_area_len; | ||
824 | workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); | ||
825 | |||
826 | /* | ||
827 | * Set the address to the start of where newly created pagetable | ||
828 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | ||
829 | * structures are created when the workarea is added to the current | ||
830 | * pagetables and when the new encrypted and decrypted kernel | ||
831 | * mappings are populated. | ||
832 | */ | ||
833 | ppd.pgtable_area = (void *)execute_end; | ||
834 | |||
835 | /* | ||
836 | * Make sure the current pagetable structure has entries for | ||
837 | * addressing the workarea. | ||
838 | */ | ||
839 | ppd.pgd = (pgd_t *)native_read_cr3_pa(); | ||
840 | ppd.paddr = workarea_start; | ||
841 | ppd.vaddr = workarea_start; | ||
842 | ppd.vaddr_end = workarea_end; | ||
843 | sme_map_range_decrypted(&ppd); | ||
844 | |||
845 | /* Flush the TLB - no globals so cr3 is enough */ | ||
846 | native_write_cr3(__native_read_cr3()); | ||
847 | |||
848 | /* | ||
849 | * A new pagetable structure is being built to allow for the kernel | ||
850 | * and initrd to be encrypted. It starts with an empty PGD that will | ||
851 | * then be populated with new PUDs and PMDs as the encrypted and | ||
852 | * decrypted kernel mappings are created. | ||
853 | */ | ||
854 | ppd.pgd = ppd.pgtable_area; | ||
855 | memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); | ||
856 | ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; | ||
857 | |||
858 | /* | ||
859 | * A different PGD index/entry must be used to get different | ||
860 | * pagetable entries for the decrypted mapping. Choose the next | ||
861 | * PGD index and convert it to a virtual address to be used as | ||
862 | * the base of the mapping. | ||
863 | */ | ||
864 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | ||
865 | if (initrd_len) { | ||
866 | unsigned long check_base; | ||
867 | |||
868 | check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); | ||
869 | decrypted_base = max(decrypted_base, check_base); | ||
870 | } | ||
871 | decrypted_base <<= PGDIR_SHIFT; | ||
872 | |||
873 | /* Add encrypted kernel (identity) mappings */ | ||
874 | ppd.paddr = kernel_start; | ||
875 | ppd.vaddr = kernel_start; | ||
876 | ppd.vaddr_end = kernel_end; | ||
877 | sme_map_range_encrypted(&ppd); | ||
878 | |||
879 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | ||
880 | ppd.paddr = kernel_start; | ||
881 | ppd.vaddr = kernel_start + decrypted_base; | ||
882 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
883 | sme_map_range_decrypted_wp(&ppd); | ||
884 | |||
885 | if (initrd_len) { | ||
886 | /* Add encrypted initrd (identity) mappings */ | ||
887 | ppd.paddr = initrd_start; | ||
888 | ppd.vaddr = initrd_start; | ||
889 | ppd.vaddr_end = initrd_end; | ||
890 | sme_map_range_encrypted(&ppd); | ||
891 | /* | ||
892 | * Add decrypted, write-protected initrd (non-identity) mappings | ||
893 | */ | ||
894 | ppd.paddr = initrd_start; | ||
895 | ppd.vaddr = initrd_start + decrypted_base; | ||
896 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
897 | sme_map_range_decrypted_wp(&ppd); | ||
898 | } | ||
899 | |||
900 | /* Add decrypted workarea mappings to both kernel mappings */ | ||
901 | ppd.paddr = workarea_start; | ||
902 | ppd.vaddr = workarea_start; | ||
903 | ppd.vaddr_end = workarea_end; | ||
904 | sme_map_range_decrypted(&ppd); | ||
905 | |||
906 | ppd.paddr = workarea_start; | ||
907 | ppd.vaddr = workarea_start + decrypted_base; | ||
908 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
909 | sme_map_range_decrypted(&ppd); | ||
910 | |||
911 | /* Perform the encryption */ | ||
912 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | ||
913 | kernel_len, workarea_start, (unsigned long)ppd.pgd); | ||
914 | |||
915 | if (initrd_len) | ||
916 | sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, | ||
917 | initrd_len, workarea_start, | ||
918 | (unsigned long)ppd.pgd); | ||
919 | |||
920 | /* | ||
921 | * At this point we are running encrypted. Remove the mappings for | ||
922 | * the decrypted areas - all that is needed for this is to remove | ||
923 | * the PGD entry/entries. | ||
924 | */ | ||
925 | ppd.vaddr = kernel_start + decrypted_base; | ||
926 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
927 | sme_clear_pgd(&ppd); | ||
928 | |||
929 | if (initrd_len) { | ||
930 | ppd.vaddr = initrd_start + decrypted_base; | ||
931 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
932 | sme_clear_pgd(&ppd); | ||
933 | } | ||
934 | |||
935 | ppd.vaddr = workarea_start + decrypted_base; | ||
936 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
937 | sme_clear_pgd(&ppd); | ||
938 | |||
939 | /* Flush the TLB - no globals so cr3 is enough */ | ||
940 | native_write_cr3(__native_read_cr3()); | ||
941 | } | ||
942 | |||
943 | void __init __nostackprotector sme_enable(struct boot_params *bp) | ||
944 | { | ||
945 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; | ||
946 | unsigned int eax, ebx, ecx, edx; | ||
947 | unsigned long feature_mask; | ||
948 | bool active_by_default; | ||
949 | unsigned long me_mask; | ||
950 | char buffer[16]; | ||
951 | u64 msr; | ||
952 | |||
953 | /* Check for the SME/SEV support leaf */ | ||
954 | eax = 0x80000000; | ||
955 | ecx = 0; | ||
956 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
957 | if (eax < 0x8000001f) | ||
958 | return; | ||
959 | |||
960 | #define AMD_SME_BIT BIT(0) | ||
961 | #define AMD_SEV_BIT BIT(1) | ||
962 | /* | ||
963 | * Set the feature mask (SME or SEV) based on whether we are | ||
964 | * running under a hypervisor. | ||
965 | */ | ||
966 | eax = 1; | ||
967 | ecx = 0; | ||
968 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
969 | feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
970 | |||
971 | /* | ||
972 | * Check for the SME/SEV feature: | ||
973 | * CPUID Fn8000_001F[EAX] | ||
974 | * - Bit 0 - Secure Memory Encryption support | ||
975 | * - Bit 1 - Secure Encrypted Virtualization support | ||
976 | * CPUID Fn8000_001F[EBX] | ||
977 | * - Bits 5:0 - Pagetable bit position used to indicate encryption | ||
978 | */ | ||
979 | eax = 0x8000001f; | ||
980 | ecx = 0; | ||
981 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
982 | if (!(eax & feature_mask)) | ||
983 | return; | ||
984 | |||
985 | me_mask = 1UL << (ebx & 0x3f); | ||
986 | |||
987 | /* Check if memory encryption is enabled */ | ||
988 | if (feature_mask == AMD_SME_BIT) { | ||
989 | /* For SME, check the SYSCFG MSR */ | ||
990 | msr = __rdmsr(MSR_K8_SYSCFG); | ||
991 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | ||
992 | return; | ||
993 | } else { | ||
994 | /* For SEV, check the SEV MSR */ | ||
995 | msr = __rdmsr(MSR_AMD64_SEV); | ||
996 | if (!(msr & MSR_AMD64_SEV_ENABLED)) | ||
997 | return; | ||
998 | |||
999 | /* SEV state cannot be controlled by a command line option */ | ||
1000 | sme_me_mask = me_mask; | ||
1001 | sev_enabled = true; | ||
1002 | return; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * Fixups have not been applied to phys_base yet and we're running | ||
1007 | * identity mapped, so we must obtain the address to the SME command | ||
1008 | * line argument data using rip-relative addressing. | ||
1009 | */ | ||
1010 | asm ("lea sme_cmdline_arg(%%rip), %0" | ||
1011 | : "=r" (cmdline_arg) | ||
1012 | : "p" (sme_cmdline_arg)); | ||
1013 | asm ("lea sme_cmdline_on(%%rip), %0" | ||
1014 | : "=r" (cmdline_on) | ||
1015 | : "p" (sme_cmdline_on)); | ||
1016 | asm ("lea sme_cmdline_off(%%rip), %0" | ||
1017 | : "=r" (cmdline_off) | ||
1018 | : "p" (sme_cmdline_off)); | ||
1019 | |||
1020 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | ||
1021 | active_by_default = true; | ||
1022 | else | ||
1023 | active_by_default = false; | ||
1024 | |||
1025 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | ||
1026 | ((u64)bp->ext_cmd_line_ptr << 32)); | ||
1027 | |||
1028 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | ||
1029 | |||
1030 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | ||
1031 | sme_me_mask = me_mask; | ||
1032 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | ||
1033 | sme_me_mask = 0; | ||
1034 | else | ||
1035 | sme_me_mask = active_by_default ? me_mask : 0; | ||
1036 | } | ||
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c new file mode 100644 index 000000000000..1b2197d13832 --- /dev/null +++ b/arch/x86/mm/mem_encrypt_identity.c | |||
@@ -0,0 +1,564 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #define DISABLE_BRANCH_PROFILING | ||
14 | |||
15 | /* | ||
16 | * Since we're dealing with identity mappings, physical and virtual | ||
17 | * addresses are the same, so override these defines which are ultimately | ||
18 | * used by the headers in misc.h. | ||
19 | */ | ||
20 | #define __pa(x) ((unsigned long)(x)) | ||
21 | #define __va(x) ((void *)((unsigned long)(x))) | ||
22 | |||
23 | /* | ||
24 | * Special hack: we have to be careful, because no indirections are | ||
25 | * allowed here, and paravirt_ops is a kind of one. As it will only run in | ||
26 | * baremetal anyway, we just keep it from happening. (This list needs to | ||
27 | * be extended when new paravirt and debugging variants are added.) | ||
28 | */ | ||
29 | #undef CONFIG_PARAVIRT | ||
30 | #undef CONFIG_PARAVIRT_SPINLOCKS | ||
31 | |||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/mem_encrypt.h> | ||
35 | |||
36 | #include <asm/setup.h> | ||
37 | #include <asm/sections.h> | ||
38 | #include <asm/cmdline.h> | ||
39 | |||
40 | #include "mm_internal.h" | ||
41 | |||
42 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | ||
43 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | ||
44 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | ||
45 | #define PMD_FLAGS _KERNPG_TABLE_NOENC | ||
46 | |||
47 | #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
48 | |||
49 | #define PMD_FLAGS_DEC PMD_FLAGS_LARGE | ||
50 | #define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
51 | (_PAGE_PAT | _PAGE_PWT)) | ||
52 | |||
53 | #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) | ||
54 | |||
55 | #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) | ||
56 | |||
57 | #define PTE_FLAGS_DEC PTE_FLAGS | ||
58 | #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
59 | (_PAGE_PAT | _PAGE_PWT)) | ||
60 | |||
61 | #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) | ||
62 | |||
63 | struct sme_populate_pgd_data { | ||
64 | void *pgtable_area; | ||
65 | pgd_t *pgd; | ||
66 | |||
67 | pmdval_t pmd_flags; | ||
68 | pteval_t pte_flags; | ||
69 | unsigned long paddr; | ||
70 | |||
71 | unsigned long vaddr; | ||
72 | unsigned long vaddr_end; | ||
73 | }; | ||
74 | |||
75 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | ||
76 | static char sme_cmdline_on[] __initdata = "on"; | ||
77 | static char sme_cmdline_off[] __initdata = "off"; | ||
78 | |||
79 | static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
80 | { | ||
81 | unsigned long pgd_start, pgd_end, pgd_size; | ||
82 | pgd_t *pgd_p; | ||
83 | |||
84 | pgd_start = ppd->vaddr & PGDIR_MASK; | ||
85 | pgd_end = ppd->vaddr_end & PGDIR_MASK; | ||
86 | |||
87 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); | ||
88 | |||
89 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
90 | |||
91 | memset(pgd_p, 0, pgd_size); | ||
92 | } | ||
93 | |||
94 | static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
95 | { | ||
96 | pgd_t *pgd; | ||
97 | p4d_t *p4d; | ||
98 | pud_t *pud; | ||
99 | pmd_t *pmd; | ||
100 | |||
101 | pgd = ppd->pgd + pgd_index(ppd->vaddr); | ||
102 | if (pgd_none(*pgd)) { | ||
103 | p4d = ppd->pgtable_area; | ||
104 | memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D); | ||
105 | ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D; | ||
106 | set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d))); | ||
107 | } | ||
108 | |||
109 | p4d = p4d_offset(pgd, ppd->vaddr); | ||
110 | if (p4d_none(*p4d)) { | ||
111 | pud = ppd->pgtable_area; | ||
112 | memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD); | ||
113 | ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD; | ||
114 | set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud))); | ||
115 | } | ||
116 | |||
117 | pud = pud_offset(p4d, ppd->vaddr); | ||
118 | if (pud_none(*pud)) { | ||
119 | pmd = ppd->pgtable_area; | ||
120 | memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD); | ||
121 | ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD; | ||
122 | set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); | ||
123 | } | ||
124 | |||
125 | if (pud_large(*pud)) | ||
126 | return NULL; | ||
127 | |||
128 | return pud; | ||
129 | } | ||
130 | |||
131 | static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
132 | { | ||
133 | pud_t *pud; | ||
134 | pmd_t *pmd; | ||
135 | |||
136 | pud = sme_prepare_pgd(ppd); | ||
137 | if (!pud) | ||
138 | return; | ||
139 | |||
140 | pmd = pmd_offset(pud, ppd->vaddr); | ||
141 | if (pmd_large(*pmd)) | ||
142 | return; | ||
143 | |||
144 | set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); | ||
145 | } | ||
146 | |||
147 | static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
148 | { | ||
149 | pud_t *pud; | ||
150 | pmd_t *pmd; | ||
151 | pte_t *pte; | ||
152 | |||
153 | pud = sme_prepare_pgd(ppd); | ||
154 | if (!pud) | ||
155 | return; | ||
156 | |||
157 | pmd = pmd_offset(pud, ppd->vaddr); | ||
158 | if (pmd_none(*pmd)) { | ||
159 | pte = ppd->pgtable_area; | ||
160 | memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); | ||
161 | ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; | ||
162 | set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); | ||
163 | } | ||
164 | |||
165 | if (pmd_large(*pmd)) | ||
166 | return; | ||
167 | |||
168 | pte = pte_offset_map(pmd, ppd->vaddr); | ||
169 | if (pte_none(*pte)) | ||
170 | set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); | ||
171 | } | ||
172 | |||
173 | static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
174 | { | ||
175 | while (ppd->vaddr < ppd->vaddr_end) { | ||
176 | sme_populate_pgd_large(ppd); | ||
177 | |||
178 | ppd->vaddr += PMD_PAGE_SIZE; | ||
179 | ppd->paddr += PMD_PAGE_SIZE; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
184 | { | ||
185 | while (ppd->vaddr < ppd->vaddr_end) { | ||
186 | sme_populate_pgd(ppd); | ||
187 | |||
188 | ppd->vaddr += PAGE_SIZE; | ||
189 | ppd->paddr += PAGE_SIZE; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
194 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
195 | { | ||
196 | unsigned long vaddr_end; | ||
197 | |||
198 | ppd->pmd_flags = pmd_flags; | ||
199 | ppd->pte_flags = pte_flags; | ||
200 | |||
201 | /* Save original end value since we modify the struct value */ | ||
202 | vaddr_end = ppd->vaddr_end; | ||
203 | |||
204 | /* If start is not 2MB aligned, create PTE entries */ | ||
205 | ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); | ||
206 | __sme_map_range_pte(ppd); | ||
207 | |||
208 | /* Create PMD entries */ | ||
209 | ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; | ||
210 | __sme_map_range_pmd(ppd); | ||
211 | |||
212 | /* If end is not 2MB aligned, create PTE entries */ | ||
213 | ppd->vaddr_end = vaddr_end; | ||
214 | __sme_map_range_pte(ppd); | ||
215 | } | ||
216 | |||
217 | static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
218 | { | ||
219 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
220 | } | ||
221 | |||
222 | static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
223 | { | ||
224 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
225 | } | ||
226 | |||
227 | static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
228 | { | ||
229 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
230 | } | ||
231 | |||
232 | static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
233 | { | ||
234 | unsigned long entries = 0, tables = 0; | ||
235 | |||
236 | /* | ||
237 | * Perform a relatively simplistic calculation of the pagetable | ||
238 | * entries that are needed. Those mappings will be covered mostly | ||
239 | * by 2MB PMD entries so we can conservatively calculate the required | ||
240 | * number of P4D, PUD and PMD structures needed to perform the | ||
241 | * mappings. For mappings that are not 2MB aligned, PTE mappings | ||
242 | * would be needed for the start and end portion of the address range | ||
243 | * that fall outside of the 2MB alignment. This results in, at most, | ||
244 | * two extra pages to hold PTE entries for each range that is mapped. | ||
245 | * Incrementing the count for each covers the case where the addresses | ||
246 | * cross entries. | ||
247 | */ | ||
248 | |||
249 | /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */ | ||
250 | if (PTRS_PER_P4D > 1) | ||
251 | entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D; | ||
252 | entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD; | ||
253 | entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD; | ||
254 | entries += 2 * sizeof(pte_t) * PTRS_PER_PTE; | ||
255 | |||
256 | /* | ||
257 | * Now calculate the added pagetable structures needed to populate | ||
258 | * the new pagetables. | ||
259 | */ | ||
260 | |||
261 | if (PTRS_PER_P4D > 1) | ||
262 | tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D; | ||
263 | tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD; | ||
264 | tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD; | ||
265 | |||
266 | return entries + tables; | ||
267 | } | ||
268 | |||
269 | void __init sme_encrypt_kernel(struct boot_params *bp) | ||
270 | { | ||
271 | unsigned long workarea_start, workarea_end, workarea_len; | ||
272 | unsigned long execute_start, execute_end, execute_len; | ||
273 | unsigned long kernel_start, kernel_end, kernel_len; | ||
274 | unsigned long initrd_start, initrd_end, initrd_len; | ||
275 | struct sme_populate_pgd_data ppd; | ||
276 | unsigned long pgtable_area_len; | ||
277 | unsigned long decrypted_base; | ||
278 | |||
279 | if (!sme_active()) | ||
280 | return; | ||
281 | |||
282 | /* | ||
283 | * Prepare for encrypting the kernel and initrd by building new | ||
284 | * pagetables with the necessary attributes needed to encrypt the | ||
285 | * kernel in place. | ||
286 | * | ||
287 | * One range of virtual addresses will map the memory occupied | ||
288 | * by the kernel and initrd as encrypted. | ||
289 | * | ||
290 | * Another range of virtual addresses will map the memory occupied | ||
291 | * by the kernel and initrd as decrypted and write-protected. | ||
292 | * | ||
293 | * The use of write-protect attribute will prevent any of the | ||
294 | * memory from being cached. | ||
295 | */ | ||
296 | |||
297 | /* Physical addresses gives us the identity mapped virtual addresses */ | ||
298 | kernel_start = __pa_symbol(_text); | ||
299 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | ||
300 | kernel_len = kernel_end - kernel_start; | ||
301 | |||
302 | initrd_start = 0; | ||
303 | initrd_end = 0; | ||
304 | initrd_len = 0; | ||
305 | #ifdef CONFIG_BLK_DEV_INITRD | ||
306 | initrd_len = (unsigned long)bp->hdr.ramdisk_size | | ||
307 | ((unsigned long)bp->ext_ramdisk_size << 32); | ||
308 | if (initrd_len) { | ||
309 | initrd_start = (unsigned long)bp->hdr.ramdisk_image | | ||
310 | ((unsigned long)bp->ext_ramdisk_image << 32); | ||
311 | initrd_end = PAGE_ALIGN(initrd_start + initrd_len); | ||
312 | initrd_len = initrd_end - initrd_start; | ||
313 | } | ||
314 | #endif | ||
315 | |||
316 | /* Set the encryption workarea to be immediately after the kernel */ | ||
317 | workarea_start = kernel_end; | ||
318 | |||
319 | /* | ||
320 | * Calculate required number of workarea bytes needed: | ||
321 | * executable encryption area size: | ||
322 | * stack page (PAGE_SIZE) | ||
323 | * encryption routine page (PAGE_SIZE) | ||
324 | * intermediate copy buffer (PMD_PAGE_SIZE) | ||
325 | * pagetable structures for the encryption of the kernel | ||
326 | * pagetable structures for workarea (in case not currently mapped) | ||
327 | */ | ||
328 | execute_start = workarea_start; | ||
329 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | ||
330 | execute_len = execute_end - execute_start; | ||
331 | |||
332 | /* | ||
333 | * One PGD for both encrypted and decrypted mappings and a set of | ||
334 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | ||
335 | */ | ||
336 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | ||
337 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | ||
338 | if (initrd_len) | ||
339 | pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; | ||
340 | |||
341 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | ||
342 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | ||
343 | |||
344 | /* | ||
345 | * The total workarea includes the executable encryption area and | ||
346 | * the pagetable area. The start of the workarea is already 2MB | ||
347 | * aligned, align the end of the workarea on a 2MB boundary so that | ||
348 | * we don't try to create/allocate PTE entries from the workarea | ||
349 | * before it is mapped. | ||
350 | */ | ||
351 | workarea_len = execute_len + pgtable_area_len; | ||
352 | workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); | ||
353 | |||
354 | /* | ||
355 | * Set the address to the start of where newly created pagetable | ||
356 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | ||
357 | * structures are created when the workarea is added to the current | ||
358 | * pagetables and when the new encrypted and decrypted kernel | ||
359 | * mappings are populated. | ||
360 | */ | ||
361 | ppd.pgtable_area = (void *)execute_end; | ||
362 | |||
363 | /* | ||
364 | * Make sure the current pagetable structure has entries for | ||
365 | * addressing the workarea. | ||
366 | */ | ||
367 | ppd.pgd = (pgd_t *)native_read_cr3_pa(); | ||
368 | ppd.paddr = workarea_start; | ||
369 | ppd.vaddr = workarea_start; | ||
370 | ppd.vaddr_end = workarea_end; | ||
371 | sme_map_range_decrypted(&ppd); | ||
372 | |||
373 | /* Flush the TLB - no globals so cr3 is enough */ | ||
374 | native_write_cr3(__native_read_cr3()); | ||
375 | |||
376 | /* | ||
377 | * A new pagetable structure is being built to allow for the kernel | ||
378 | * and initrd to be encrypted. It starts with an empty PGD that will | ||
379 | * then be populated with new PUDs and PMDs as the encrypted and | ||
380 | * decrypted kernel mappings are created. | ||
381 | */ | ||
382 | ppd.pgd = ppd.pgtable_area; | ||
383 | memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); | ||
384 | ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; | ||
385 | |||
386 | /* | ||
387 | * A different PGD index/entry must be used to get different | ||
388 | * pagetable entries for the decrypted mapping. Choose the next | ||
389 | * PGD index and convert it to a virtual address to be used as | ||
390 | * the base of the mapping. | ||
391 | */ | ||
392 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | ||
393 | if (initrd_len) { | ||
394 | unsigned long check_base; | ||
395 | |||
396 | check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); | ||
397 | decrypted_base = max(decrypted_base, check_base); | ||
398 | } | ||
399 | decrypted_base <<= PGDIR_SHIFT; | ||
400 | |||
401 | /* Add encrypted kernel (identity) mappings */ | ||
402 | ppd.paddr = kernel_start; | ||
403 | ppd.vaddr = kernel_start; | ||
404 | ppd.vaddr_end = kernel_end; | ||
405 | sme_map_range_encrypted(&ppd); | ||
406 | |||
407 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | ||
408 | ppd.paddr = kernel_start; | ||
409 | ppd.vaddr = kernel_start + decrypted_base; | ||
410 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
411 | sme_map_range_decrypted_wp(&ppd); | ||
412 | |||
413 | if (initrd_len) { | ||
414 | /* Add encrypted initrd (identity) mappings */ | ||
415 | ppd.paddr = initrd_start; | ||
416 | ppd.vaddr = initrd_start; | ||
417 | ppd.vaddr_end = initrd_end; | ||
418 | sme_map_range_encrypted(&ppd); | ||
419 | /* | ||
420 | * Add decrypted, write-protected initrd (non-identity) mappings | ||
421 | */ | ||
422 | ppd.paddr = initrd_start; | ||
423 | ppd.vaddr = initrd_start + decrypted_base; | ||
424 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
425 | sme_map_range_decrypted_wp(&ppd); | ||
426 | } | ||
427 | |||
428 | /* Add decrypted workarea mappings to both kernel mappings */ | ||
429 | ppd.paddr = workarea_start; | ||
430 | ppd.vaddr = workarea_start; | ||
431 | ppd.vaddr_end = workarea_end; | ||
432 | sme_map_range_decrypted(&ppd); | ||
433 | |||
434 | ppd.paddr = workarea_start; | ||
435 | ppd.vaddr = workarea_start + decrypted_base; | ||
436 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
437 | sme_map_range_decrypted(&ppd); | ||
438 | |||
439 | /* Perform the encryption */ | ||
440 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | ||
441 | kernel_len, workarea_start, (unsigned long)ppd.pgd); | ||
442 | |||
443 | if (initrd_len) | ||
444 | sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, | ||
445 | initrd_len, workarea_start, | ||
446 | (unsigned long)ppd.pgd); | ||
447 | |||
448 | /* | ||
449 | * At this point we are running encrypted. Remove the mappings for | ||
450 | * the decrypted areas - all that is needed for this is to remove | ||
451 | * the PGD entry/entries. | ||
452 | */ | ||
453 | ppd.vaddr = kernel_start + decrypted_base; | ||
454 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
455 | sme_clear_pgd(&ppd); | ||
456 | |||
457 | if (initrd_len) { | ||
458 | ppd.vaddr = initrd_start + decrypted_base; | ||
459 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
460 | sme_clear_pgd(&ppd); | ||
461 | } | ||
462 | |||
463 | ppd.vaddr = workarea_start + decrypted_base; | ||
464 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
465 | sme_clear_pgd(&ppd); | ||
466 | |||
467 | /* Flush the TLB - no globals so cr3 is enough */ | ||
468 | native_write_cr3(__native_read_cr3()); | ||
469 | } | ||
470 | |||
471 | void __init sme_enable(struct boot_params *bp) | ||
472 | { | ||
473 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; | ||
474 | unsigned int eax, ebx, ecx, edx; | ||
475 | unsigned long feature_mask; | ||
476 | bool active_by_default; | ||
477 | unsigned long me_mask; | ||
478 | char buffer[16]; | ||
479 | u64 msr; | ||
480 | |||
481 | /* Check for the SME/SEV support leaf */ | ||
482 | eax = 0x80000000; | ||
483 | ecx = 0; | ||
484 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
485 | if (eax < 0x8000001f) | ||
486 | return; | ||
487 | |||
488 | #define AMD_SME_BIT BIT(0) | ||
489 | #define AMD_SEV_BIT BIT(1) | ||
490 | /* | ||
491 | * Set the feature mask (SME or SEV) based on whether we are | ||
492 | * running under a hypervisor. | ||
493 | */ | ||
494 | eax = 1; | ||
495 | ecx = 0; | ||
496 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
497 | feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
498 | |||
499 | /* | ||
500 | * Check for the SME/SEV feature: | ||
501 | * CPUID Fn8000_001F[EAX] | ||
502 | * - Bit 0 - Secure Memory Encryption support | ||
503 | * - Bit 1 - Secure Encrypted Virtualization support | ||
504 | * CPUID Fn8000_001F[EBX] | ||
505 | * - Bits 5:0 - Pagetable bit position used to indicate encryption | ||
506 | */ | ||
507 | eax = 0x8000001f; | ||
508 | ecx = 0; | ||
509 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
510 | if (!(eax & feature_mask)) | ||
511 | return; | ||
512 | |||
513 | me_mask = 1UL << (ebx & 0x3f); | ||
514 | |||
515 | /* Check if memory encryption is enabled */ | ||
516 | if (feature_mask == AMD_SME_BIT) { | ||
517 | /* For SME, check the SYSCFG MSR */ | ||
518 | msr = __rdmsr(MSR_K8_SYSCFG); | ||
519 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | ||
520 | return; | ||
521 | } else { | ||
522 | /* For SEV, check the SEV MSR */ | ||
523 | msr = __rdmsr(MSR_AMD64_SEV); | ||
524 | if (!(msr & MSR_AMD64_SEV_ENABLED)) | ||
525 | return; | ||
526 | |||
527 | /* SEV state cannot be controlled by a command line option */ | ||
528 | sme_me_mask = me_mask; | ||
529 | sev_enabled = true; | ||
530 | return; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Fixups have not been applied to phys_base yet and we're running | ||
535 | * identity mapped, so we must obtain the address to the SME command | ||
536 | * line argument data using rip-relative addressing. | ||
537 | */ | ||
538 | asm ("lea sme_cmdline_arg(%%rip), %0" | ||
539 | : "=r" (cmdline_arg) | ||
540 | : "p" (sme_cmdline_arg)); | ||
541 | asm ("lea sme_cmdline_on(%%rip), %0" | ||
542 | : "=r" (cmdline_on) | ||
543 | : "p" (sme_cmdline_on)); | ||
544 | asm ("lea sme_cmdline_off(%%rip), %0" | ||
545 | : "=r" (cmdline_off) | ||
546 | : "p" (sme_cmdline_off)); | ||
547 | |||
548 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | ||
549 | active_by_default = true; | ||
550 | else | ||
551 | active_by_default = false; | ||
552 | |||
553 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | ||
554 | ((u64)bp->ext_cmd_line_ptr << 32)); | ||
555 | |||
556 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | ||
557 | |||
558 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | ||
559 | sme_me_mask = me_mask; | ||
560 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | ||
561 | sme_me_mask = 0; | ||
562 | else | ||
563 | sme_me_mask = active_by_default ? me_mask : 0; | ||
564 | } | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index aca6295350f3..e8a4a09e20f1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
60 | } | 60 | } |
61 | printk(KERN_CONT "\n"); | 61 | printk(KERN_CONT "\n"); |
62 | } | 62 | } |
63 | |||
64 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | ||
65 | unsigned long end_pfn) | ||
66 | { | ||
67 | unsigned long nr_pages = end_pfn - start_pfn; | ||
68 | |||
69 | if (!nr_pages) | ||
70 | return 0; | ||
71 | |||
72 | return (nr_pages + 1) * sizeof(struct page); | ||
73 | } | ||
74 | #endif | 63 | #endif |
75 | 64 | ||
76 | extern unsigned long highend_pfn, highstart_pfn; | 65 | extern unsigned long highend_pfn, highstart_pfn; |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7f1a51399674..e055d1a06699 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) | |||
157 | unsigned long sp = current_stack_pointer; | 157 | unsigned long sp = current_stack_pointer; |
158 | pgd_t *pgd = pgd_offset(mm, sp); | 158 | pgd_t *pgd = pgd_offset(mm, sp); |
159 | 159 | ||
160 | if (CONFIG_PGTABLE_LEVELS > 4) { | 160 | if (pgtable_l5_enabled) { |
161 | if (unlikely(pgd_none(*pgd))) { | 161 | if (unlikely(pgd_none(*pgd))) { |
162 | pgd_t *pgd_ref = pgd_offset_k(sp); | 162 | pgd_t *pgd_ref = pgd_offset_k(sp); |
163 | 163 | ||
@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
613 | { | 613 | { |
614 | int cpu; | 614 | int cpu; |
615 | 615 | ||
616 | struct flush_tlb_info info = { | 616 | struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { |
617 | .mm = mm, | 617 | .mm = mm, |
618 | }; | 618 | }; |
619 | 619 | ||