aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 18:45:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-02 18:45:30 -0400
commitd22fff81418edc92be534cad8d59da914049bf69 (patch)
tree96b22b20bbc789a76e744bcfc11a7f0854b62ece /arch/x86/mm
parent986b37c0ae4f0a3f93d8974d03a9cbc1502dd377 (diff)
parenteaeb8e76cd5751e805f6e4a3fcec91d283e3b0c2 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: - Extend the memmap= boot parameter syntax to allow the redeclaration and dropping of existing ranges, and to support all e820 range types (Jan H. Schönherr) - Improve the W+X boot time security checks to remove false positive warnings on Xen (Jan Beulich) - Support booting as Xen PVH guest (Juergen Gross) - Improved 5-level paging (LA57) support, in particular it's possible now to have a single kernel image for both 4-level and 5-level hardware (Kirill A. Shutemov) - AMD hardware RAM encryption support (SME/SEV) fixes (Tom Lendacky) - Preparatory commits for hardware-encrypted RAM support on Intel CPUs. (Kirill A. Shutemov) - Improved Intel-MID support (Andy Shevchenko) - Show EFI page tables in page_tables debug files (Andy Lutomirski) - ... plus misc fixes and smaller cleanups * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (56 commits) x86/cpu/tme: Fix spelling: "configuation" -> "configuration" x86/boot: Fix SEV boot failure from change to __PHYSICAL_MASK_SHIFT x86/mm: Update comment in detect_tme() regarding x86_phys_bits x86/mm/32: Remove unused node_memmap_size_bytes() & CONFIG_NEED_NODE_MEMMAP_SIZE logic x86/mm: Remove pointless checks in vmalloc_fault x86/platform/intel-mid: Add special handling for ACPI HW reduced platforms ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf x86/pconfig: Detect PCONFIG targets x86/tme: Detect if TME and MKTME is activated by BIOS x86/boot/compressed/64: Handle 5-level paging boot if kernel is above 4G x86/boot/compressed/64: Use page table in trampoline memory x86/boot/compressed/64: Use stack from trampoline memory x86/boot/compressed/64: Make sure we have a 32-bit code segment x86/mm: Do not use paravirtualized calls in native_set_p4d() kdump, vmcoreinfo: Export pgtable_l5_enabled value x86/boot/compressed/64: Prepare new top-level page table for trampoline x86/boot/compressed/64: Set up trampoline memory x86/boot/compressed/64: Save and restore trampoline memory ...
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile15
-rw-r--r--arch/x86/mm/debug_pagetables.c32
-rw-r--r--arch/x86/mm/dump_pagetables.c125
-rw-r--r--arch/x86/mm/fault.c60
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_64.c32
-rw-r--r--arch/x86/mm/kasan_init_64.c20
-rw-r--r--arch/x86/mm/kaslr.c29
-rw-r--r--arch/x86/mm/mem_encrypt.c578
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c564
-rw-r--r--arch/x86/mm/numa_32.c11
-rw-r--r--arch/x86/mm/tlb.c4
12 files changed, 750 insertions, 722 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 27e9e90a8d35..4b101dd6e52f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,12 +1,15 @@
1# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
2# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c 2# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
3KCOV_INSTRUMENT_tlb.o := n 3KCOV_INSTRUMENT_tlb.o := n
4KCOV_INSTRUMENT_mem_encrypt.o := n 4KCOV_INSTRUMENT_mem_encrypt.o := n
5KCOV_INSTRUMENT_mem_encrypt_identity.o := n
5 6
6KASAN_SANITIZE_mem_encrypt.o := n 7KASAN_SANITIZE_mem_encrypt.o := n
8KASAN_SANITIZE_mem_encrypt_identity.o := n
7 9
8ifdef CONFIG_FUNCTION_TRACER 10ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_mem_encrypt.o = -pg 11CFLAGS_REMOVE_mem_encrypt.o = -pg
12CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
10endif 13endif
11 14
12obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 15obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
16nostackp := $(call cc-option, -fno-stack-protector) 19nostackp := $(call cc-option, -fno-stack-protector)
17CFLAGS_physaddr.o := $(nostackp) 20CFLAGS_physaddr.o := $(nostackp)
18CFLAGS_setup_nx.o := $(nostackp) 21CFLAGS_setup_nx.o := $(nostackp)
22CFLAGS_mem_encrypt_identity.o := $(nostackp)
19 23
20CFLAGS_fault.o := -I$(src)/../include/asm/trace 24CFLAGS_fault.o := -I$(src)/../include/asm/trace
21 25
@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
47obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o 51obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
48 52
49obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o 53obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
54obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
50obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o 55obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index 421f2664ffa0..51a6f92da2bf 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = {
72}; 72};
73#endif 73#endif
74 74
75#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
76extern pgd_t *efi_pgd;
77static struct dentry *pe_efi;
78
79static int ptdump_show_efi(struct seq_file *m, void *v)
80{
81 if (efi_pgd)
82 ptdump_walk_pgd_level_debugfs(m, efi_pgd, false);
83 return 0;
84}
85
86static int ptdump_open_efi(struct inode *inode, struct file *filp)
87{
88 return single_open(filp, ptdump_show_efi, NULL);
89}
90
91static const struct file_operations ptdump_efi_fops = {
92 .owner = THIS_MODULE,
93 .open = ptdump_open_efi,
94 .read = seq_read,
95 .llseek = seq_lseek,
96 .release = single_release,
97};
98#endif
99
75static struct dentry *dir, *pe_knl, *pe_curknl; 100static struct dentry *dir, *pe_knl, *pe_curknl;
76 101
77static int __init pt_dump_debug_init(void) 102static int __init pt_dump_debug_init(void)
@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void)
96 if (!pe_curusr) 121 if (!pe_curusr)
97 goto err; 122 goto err;
98#endif 123#endif
124
125#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
126 pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
127 if (!pe_efi)
128 goto err;
129#endif
130
99 return 0; 131 return 0;
100err: 132err:
101 debugfs_remove_recursive(dir); 133 debugfs_remove_recursive(dir);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2a4849e92831..62a7e9f65dec 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -29,6 +29,7 @@
29struct pg_state { 29struct pg_state {
30 int level; 30 int level;
31 pgprot_t current_prot; 31 pgprot_t current_prot;
32 pgprotval_t effective_prot;
32 unsigned long start_address; 33 unsigned long start_address;
33 unsigned long current_address; 34 unsigned long current_address;
34 const struct addr_marker *marker; 35 const struct addr_marker *marker;
@@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = {
85 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, 86 [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
86 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, 87 [VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
87#ifdef CONFIG_KASAN 88#ifdef CONFIG_KASAN
88 [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, 89 /*
89 [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, 90 * These fields get initialized with the (dynamic)
91 * KASAN_SHADOW_{START,END} values in pt_dump_init().
92 */
93 [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
94 [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
90#endif 95#endif
91#ifdef CONFIG_MODIFY_LDT_SYSCALL 96#ifdef CONFIG_MODIFY_LDT_SYSCALL
92 [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, 97 [LDT_NR] = { 0UL, "LDT remap" },
93#endif 98#endif
94 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, 99 [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
95#ifdef CONFIG_X86_ESPFIX64 100#ifdef CONFIG_X86_ESPFIX64
@@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u)
231 * print what we collected so far. 236 * print what we collected so far.
232 */ 237 */
233static void note_page(struct seq_file *m, struct pg_state *st, 238static void note_page(struct seq_file *m, struct pg_state *st,
234 pgprot_t new_prot, int level) 239 pgprot_t new_prot, pgprotval_t new_eff, int level)
235{ 240{
236 pgprotval_t prot, cur; 241 pgprotval_t prot, cur, eff;
237 static const char units[] = "BKMGTPE"; 242 static const char units[] = "BKMGTPE";
238 243
239 /* 244 /*
@@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
243 */ 248 */
244 prot = pgprot_val(new_prot); 249 prot = pgprot_val(new_prot);
245 cur = pgprot_val(st->current_prot); 250 cur = pgprot_val(st->current_prot);
251 eff = st->effective_prot;
246 252
247 if (!st->level) { 253 if (!st->level) {
248 /* First entry */ 254 /* First entry */
249 st->current_prot = new_prot; 255 st->current_prot = new_prot;
256 st->effective_prot = new_eff;
250 st->level = level; 257 st->level = level;
251 st->marker = address_markers; 258 st->marker = address_markers;
252 st->lines = 0; 259 st->lines = 0;
253 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 260 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
254 st->marker->name); 261 st->marker->name);
255 } else if (prot != cur || level != st->level || 262 } else if (prot != cur || new_eff != eff || level != st->level ||
256 st->current_address >= st->marker[1].start_address) { 263 st->current_address >= st->marker[1].start_address) {
257 const char *unit = units; 264 const char *unit = units;
258 unsigned long delta; 265 unsigned long delta;
259 int width = sizeof(unsigned long) * 2; 266 int width = sizeof(unsigned long) * 2;
260 pgprotval_t pr = pgprot_val(st->current_prot);
261 267
262 if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { 268 if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) {
263 WARN_ONCE(1, 269 WARN_ONCE(1,
264 "x86/mm: Found insecure W+X mapping at address %p/%pS\n", 270 "x86/mm: Found insecure W+X mapping at address %p/%pS\n",
265 (void *)st->start_address, 271 (void *)st->start_address,
@@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st,
313 319
314 st->start_address = st->current_address; 320 st->start_address = st->current_address;
315 st->current_prot = new_prot; 321 st->current_prot = new_prot;
322 st->effective_prot = new_eff;
316 st->level = level; 323 st->level = level;
317 } 324 }
318} 325}
319 326
320static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) 327static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
328{
329 return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
330 ((prot1 | prot2) & _PAGE_NX);
331}
332
333static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
334 pgprotval_t eff_in, unsigned long P)
321{ 335{
322 int i; 336 int i;
323 pte_t *start; 337 pte_t *start;
324 pgprotval_t prot; 338 pgprotval_t prot, eff;
325 339
326 start = (pte_t *)pmd_page_vaddr(addr); 340 start = (pte_t *)pmd_page_vaddr(addr);
327 for (i = 0; i < PTRS_PER_PTE; i++) { 341 for (i = 0; i < PTRS_PER_PTE; i++) {
328 prot = pte_flags(*start); 342 prot = pte_flags(*start);
343 eff = effective_prot(eff_in, prot);
329 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); 344 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
330 note_page(m, st, __pgprot(prot), 5); 345 note_page(m, st, __pgprot(prot), eff, 5);
331 start++; 346 start++;
332 } 347 }
333} 348}
@@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
344 void *pt) 359 void *pt)
345{ 360{
346 if (__pa(pt) == __pa(kasan_zero_pmd) || 361 if (__pa(pt) == __pa(kasan_zero_pmd) ||
347#ifdef CONFIG_X86_5LEVEL 362 (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
348 __pa(pt) == __pa(kasan_zero_p4d) ||
349#endif
350 __pa(pt) == __pa(kasan_zero_pud)) { 363 __pa(pt) == __pa(kasan_zero_pud)) {
351 pgprotval_t prot = pte_flags(kasan_zero_pte[0]); 364 pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
352 note_page(m, st, __pgprot(prot), 5); 365 note_page(m, st, __pgprot(prot), 0, 5);
353 return true; 366 return true;
354 } 367 }
355 return false; 368 return false;
@@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
364 377
365#if PTRS_PER_PMD > 1 378#if PTRS_PER_PMD > 1
366 379
367static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) 380static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
381 pgprotval_t eff_in, unsigned long P)
368{ 382{
369 int i; 383 int i;
370 pmd_t *start, *pmd_start; 384 pmd_t *start, *pmd_start;
371 pgprotval_t prot; 385 pgprotval_t prot, eff;
372 386
373 pmd_start = start = (pmd_t *)pud_page_vaddr(addr); 387 pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
374 for (i = 0; i < PTRS_PER_PMD; i++) { 388 for (i = 0; i < PTRS_PER_PMD; i++) {
375 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); 389 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
376 if (!pmd_none(*start)) { 390 if (!pmd_none(*start)) {
391 prot = pmd_flags(*start);
392 eff = effective_prot(eff_in, prot);
377 if (pmd_large(*start) || !pmd_present(*start)) { 393 if (pmd_large(*start) || !pmd_present(*start)) {
378 prot = pmd_flags(*start); 394 note_page(m, st, __pgprot(prot), eff, 4);
379 note_page(m, st, __pgprot(prot), 4);
380 } else if (!kasan_page_table(m, st, pmd_start)) { 395 } else if (!kasan_page_table(m, st, pmd_start)) {
381 walk_pte_level(m, st, *start, 396 walk_pte_level(m, st, *start, eff,
382 P + i * PMD_LEVEL_MULT); 397 P + i * PMD_LEVEL_MULT);
383 } 398 }
384 } else 399 } else
385 note_page(m, st, __pgprot(0), 4); 400 note_page(m, st, __pgprot(0), 0, 4);
386 start++; 401 start++;
387 } 402 }
388} 403}
389 404
390#else 405#else
391#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) 406#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
392#define pud_large(a) pmd_large(__pmd(pud_val(a))) 407#define pud_large(a) pmd_large(__pmd(pud_val(a)))
393#define pud_none(a) pmd_none(__pmd(pud_val(a))) 408#define pud_none(a) pmd_none(__pmd(pud_val(a)))
394#endif 409#endif
395 410
396#if PTRS_PER_PUD > 1 411#if PTRS_PER_PUD > 1
397 412
398static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) 413static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
414 pgprotval_t eff_in, unsigned long P)
399{ 415{
400 int i; 416 int i;
401 pud_t *start, *pud_start; 417 pud_t *start, *pud_start;
402 pgprotval_t prot; 418 pgprotval_t prot, eff;
403 pud_t *prev_pud = NULL; 419 pud_t *prev_pud = NULL;
404 420
405 pud_start = start = (pud_t *)p4d_page_vaddr(addr); 421 pud_start = start = (pud_t *)p4d_page_vaddr(addr);
@@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
407 for (i = 0; i < PTRS_PER_PUD; i++) { 423 for (i = 0; i < PTRS_PER_PUD; i++) {
408 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); 424 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
409 if (!pud_none(*start)) { 425 if (!pud_none(*start)) {
426 prot = pud_flags(*start);
427 eff = effective_prot(eff_in, prot);
410 if (pud_large(*start) || !pud_present(*start)) { 428 if (pud_large(*start) || !pud_present(*start)) {
411 prot = pud_flags(*start); 429 note_page(m, st, __pgprot(prot), eff, 3);
412 note_page(m, st, __pgprot(prot), 3);
413 } else if (!kasan_page_table(m, st, pud_start)) { 430 } else if (!kasan_page_table(m, st, pud_start)) {
414 walk_pmd_level(m, st, *start, 431 walk_pmd_level(m, st, *start, eff,
415 P + i * PUD_LEVEL_MULT); 432 P + i * PUD_LEVEL_MULT);
416 } 433 }
417 } else 434 } else
418 note_page(m, st, __pgprot(0), 3); 435 note_page(m, st, __pgprot(0), 0, 3);
419 436
420 prev_pud = start; 437 prev_pud = start;
421 start++; 438 start++;
@@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
423} 440}
424 441
425#else 442#else
426#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p) 443#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
427#define p4d_large(a) pud_large(__pud(p4d_val(a))) 444#define p4d_large(a) pud_large(__pud(p4d_val(a)))
428#define p4d_none(a) pud_none(__pud(p4d_val(a))) 445#define p4d_none(a) pud_none(__pud(p4d_val(a)))
429#endif 446#endif
430 447
431#if PTRS_PER_P4D > 1 448static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
432 449 pgprotval_t eff_in, unsigned long P)
433static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
434{ 450{
435 int i; 451 int i;
436 p4d_t *start, *p4d_start; 452 p4d_t *start, *p4d_start;
437 pgprotval_t prot; 453 pgprotval_t prot, eff;
454
455 if (PTRS_PER_P4D == 1)
456 return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
438 457
439 p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); 458 p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
440 459
441 for (i = 0; i < PTRS_PER_P4D; i++) { 460 for (i = 0; i < PTRS_PER_P4D; i++) {
442 st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); 461 st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
443 if (!p4d_none(*start)) { 462 if (!p4d_none(*start)) {
463 prot = p4d_flags(*start);
464 eff = effective_prot(eff_in, prot);
444 if (p4d_large(*start) || !p4d_present(*start)) { 465 if (p4d_large(*start) || !p4d_present(*start)) {
445 prot = p4d_flags(*start); 466 note_page(m, st, __pgprot(prot), eff, 2);
446 note_page(m, st, __pgprot(prot), 2);
447 } else if (!kasan_page_table(m, st, p4d_start)) { 467 } else if (!kasan_page_table(m, st, p4d_start)) {
448 walk_pud_level(m, st, *start, 468 walk_pud_level(m, st, *start, eff,
449 P + i * P4D_LEVEL_MULT); 469 P + i * P4D_LEVEL_MULT);
450 } 470 }
451 } else 471 } else
452 note_page(m, st, __pgprot(0), 2); 472 note_page(m, st, __pgprot(0), 0, 2);
453 473
454 start++; 474 start++;
455 } 475 }
456} 476}
457 477
458#else 478#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
459#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) 479#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
460#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
461#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
462#endif
463 480
464static inline bool is_hypervisor_range(int idx) 481static inline bool is_hypervisor_range(int idx)
465{ 482{
@@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
483#else 500#else
484 pgd_t *start = swapper_pg_dir; 501 pgd_t *start = swapper_pg_dir;
485#endif 502#endif
486 pgprotval_t prot; 503 pgprotval_t prot, eff;
487 int i; 504 int i;
488 struct pg_state st = {}; 505 struct pg_state st = {};
489 506
@@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
499 for (i = 0; i < PTRS_PER_PGD; i++) { 516 for (i = 0; i < PTRS_PER_PGD; i++) {
500 st.current_address = normalize_addr(i * PGD_LEVEL_MULT); 517 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
501 if (!pgd_none(*start) && !is_hypervisor_range(i)) { 518 if (!pgd_none(*start) && !is_hypervisor_range(i)) {
519 prot = pgd_flags(*start);
520#ifdef CONFIG_X86_PAE
521 eff = _PAGE_USER | _PAGE_RW;
522#else
523 eff = prot;
524#endif
502 if (pgd_large(*start) || !pgd_present(*start)) { 525 if (pgd_large(*start) || !pgd_present(*start)) {
503 prot = pgd_flags(*start); 526 note_page(m, &st, __pgprot(prot), eff, 1);
504 note_page(m, &st, __pgprot(prot), 1);
505 } else { 527 } else {
506 walk_p4d_level(m, &st, *start, 528 walk_p4d_level(m, &st, *start, eff,
507 i * PGD_LEVEL_MULT); 529 i * PGD_LEVEL_MULT);
508 } 530 }
509 } else 531 } else
510 note_page(m, &st, __pgprot(0), 1); 532 note_page(m, &st, __pgprot(0), 0, 1);
511 533
512 cond_resched(); 534 cond_resched();
513 start++; 535 start++;
@@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
515 537
516 /* Flush out the last page */ 538 /* Flush out the last page */
517 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); 539 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
518 note_page(m, &st, __pgprot(0), 0); 540 note_page(m, &st, __pgprot(0), 0, 0);
519 if (!checkwx) 541 if (!checkwx)
520 return; 542 return;
521 if (st.wx_pages) 543 if (st.wx_pages)
@@ -570,6 +592,13 @@ static int __init pt_dump_init(void)
570 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; 592 address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
571 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 593 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
572 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; 594 address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
595#ifdef CONFIG_MODIFY_LDT_SYSCALL
596 address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
597#endif
598#ifdef CONFIG_KASAN
599 address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
600 address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
601#endif
573#endif 602#endif
574#ifdef CONFIG_X86_32 603#ifdef CONFIG_X86_32
575 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; 604 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f75ea0748b9f..73bd8c95ac71 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -417,11 +417,11 @@ void vmalloc_sync_all(void)
417 */ 417 */
418static noinline int vmalloc_fault(unsigned long address) 418static noinline int vmalloc_fault(unsigned long address)
419{ 419{
420 pgd_t *pgd, *pgd_ref; 420 pgd_t *pgd, *pgd_k;
421 p4d_t *p4d, *p4d_ref; 421 p4d_t *p4d, *p4d_k;
422 pud_t *pud, *pud_ref; 422 pud_t *pud;
423 pmd_t *pmd, *pmd_ref; 423 pmd_t *pmd;
424 pte_t *pte, *pte_ref; 424 pte_t *pte;
425 425
426 /* Make sure we are in vmalloc area: */ 426 /* Make sure we are in vmalloc area: */
427 if (!(address >= VMALLOC_START && address < VMALLOC_END)) 427 if (!(address >= VMALLOC_START && address < VMALLOC_END))
@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address)
435 * case just flush: 435 * case just flush:
436 */ 436 */
437 pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); 437 pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
438 pgd_ref = pgd_offset_k(address); 438 pgd_k = pgd_offset_k(address);
439 if (pgd_none(*pgd_ref)) 439 if (pgd_none(*pgd_k))
440 return -1; 440 return -1;
441 441
442 if (CONFIG_PGTABLE_LEVELS > 4) { 442 if (pgtable_l5_enabled) {
443 if (pgd_none(*pgd)) { 443 if (pgd_none(*pgd)) {
444 set_pgd(pgd, *pgd_ref); 444 set_pgd(pgd, *pgd_k);
445 arch_flush_lazy_mmu_mode(); 445 arch_flush_lazy_mmu_mode();
446 } else { 446 } else {
447 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); 447 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
448 } 448 }
449 } 449 }
450 450
451 /* With 4-level paging, copying happens on the p4d level. */ 451 /* With 4-level paging, copying happens on the p4d level. */
452 p4d = p4d_offset(pgd, address); 452 p4d = p4d_offset(pgd, address);
453 p4d_ref = p4d_offset(pgd_ref, address); 453 p4d_k = p4d_offset(pgd_k, address);
454 if (p4d_none(*p4d_ref)) 454 if (p4d_none(*p4d_k))
455 return -1; 455 return -1;
456 456
457 if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { 457 if (p4d_none(*p4d) && !pgtable_l5_enabled) {
458 set_p4d(p4d, *p4d_ref); 458 set_p4d(p4d, *p4d_k);
459 arch_flush_lazy_mmu_mode(); 459 arch_flush_lazy_mmu_mode();
460 } else { 460 } else {
461 BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref)); 461 BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
462 } 462 }
463 463
464 /*
465 * Below here mismatches are bugs because these lower tables
466 * are shared:
467 */
468 BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); 464 BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
469 465
470 pud = pud_offset(p4d, address); 466 pud = pud_offset(p4d, address);
471 pud_ref = pud_offset(p4d_ref, address); 467 if (pud_none(*pud))
472 if (pud_none(*pud_ref))
473 return -1; 468 return -1;
474 469
475 if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
476 BUG();
477
478 if (pud_large(*pud)) 470 if (pud_large(*pud))
479 return 0; 471 return 0;
480 472
481 pmd = pmd_offset(pud, address); 473 pmd = pmd_offset(pud, address);
482 pmd_ref = pmd_offset(pud_ref, address); 474 if (pmd_none(*pmd))
483 if (pmd_none(*pmd_ref))
484 return -1; 475 return -1;
485 476
486 if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
487 BUG();
488
489 if (pmd_large(*pmd)) 477 if (pmd_large(*pmd))
490 return 0; 478 return 0;
491 479
492 pte_ref = pte_offset_kernel(pmd_ref, address);
493 if (!pte_present(*pte_ref))
494 return -1;
495
496 pte = pte_offset_kernel(pmd, address); 480 pte = pte_offset_kernel(pmd, address);
497 481 if (!pte_present(*pte))
498 /* 482 return -1;
499 * Don't use pte_page here, because the mappings can point
500 * outside mem_map, and the NUMA hash lookup cannot handle
501 * that:
502 */
503 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
504 BUG();
505 483
506 return 0; 484 return 0;
507} 485}
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ab33a32df2a8..9aa22be8331e 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
120 result = ident_p4d_init(info, p4d, addr, next); 120 result = ident_p4d_init(info, p4d, addr, next);
121 if (result) 121 if (result)
122 return result; 122 return result;
123 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 123 if (pgtable_l5_enabled) {
124 set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); 124 set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
125 } else { 125 } else {
126 /* 126 /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index af11a2890235..45241de66785 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str)
88} 88}
89__setup("noexec32=", nonx32_setup); 89__setup("noexec32=", nonx32_setup);
90 90
91/* 91static void sync_global_pgds_l5(unsigned long start, unsigned long end)
92 * When memory was added make sure all the processes MM have
93 * suitable PGD entries in the local PGD level page.
94 */
95#ifdef CONFIG_X86_5LEVEL
96void sync_global_pgds(unsigned long start, unsigned long end)
97{ 92{
98 unsigned long addr; 93 unsigned long addr;
99 94
@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
129 spin_unlock(&pgd_lock); 124 spin_unlock(&pgd_lock);
130 } 125 }
131} 126}
132#else 127
133void sync_global_pgds(unsigned long start, unsigned long end) 128static void sync_global_pgds_l4(unsigned long start, unsigned long end)
134{ 129{
135 unsigned long addr; 130 unsigned long addr;
136 131
@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
143 * With folded p4d, pgd_none() is always false, we need to 138 * With folded p4d, pgd_none() is always false, we need to
144 * handle synchonization on p4d level. 139 * handle synchonization on p4d level.
145 */ 140 */
146 BUILD_BUG_ON(pgd_none(*pgd_ref)); 141 MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
147 p4d_ref = p4d_offset(pgd_ref, addr); 142 p4d_ref = p4d_offset(pgd_ref, addr);
148 143
149 if (p4d_none(*p4d_ref)) 144 if (p4d_none(*p4d_ref))
@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
173 spin_unlock(&pgd_lock); 168 spin_unlock(&pgd_lock);
174 } 169 }
175} 170}
176#endif 171
172/*
173 * When memory was added make sure all the processes MM have
174 * suitable PGD entries in the local PGD level page.
175 */
176void sync_global_pgds(unsigned long start, unsigned long end)
177{
178 if (pgtable_l5_enabled)
179 sync_global_pgds_l5(start, end);
180 else
181 sync_global_pgds_l4(start, end);
182}
177 183
178/* 184/*
179 * NOTE: This function is marked __ref because it calls __init function 185 * NOTE: This function is marked __ref because it calls __init function
@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
632 unsigned long vaddr = (unsigned long)__va(paddr); 638 unsigned long vaddr = (unsigned long)__va(paddr);
633 int i = p4d_index(vaddr); 639 int i = p4d_index(vaddr);
634 640
635 if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 641 if (!pgtable_l5_enabled)
636 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); 642 return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
637 643
638 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { 644 for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
712 page_size_mask); 718 page_size_mask);
713 719
714 spin_lock(&init_mm.page_table_lock); 720 spin_lock(&init_mm.page_table_lock);
715 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 721 if (pgtable_l5_enabled)
716 pgd_populate(&init_mm, pgd, p4d); 722 pgd_populate(&init_mm, pgd, p4d);
717 else 723 else
718 p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); 724 p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
1089 * 5-level case we should free them. This code will have to change 1095 * 5-level case we should free them. This code will have to change
1090 * to adapt for boot-time switching between 4 and 5 level page tables. 1096 * to adapt for boot-time switching between 4 and 5 level page tables.
1091 */ 1097 */
1092 if (CONFIG_PGTABLE_LEVELS == 5) 1098 if (pgtable_l5_enabled)
1093 free_pud_table(pud_base, p4d); 1099 free_pud_table(pud_base, p4d);
1094 } 1100 }
1095 1101
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index af6f2f9c6a26..d8ff013ea9d0 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,6 +1,12 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#define DISABLE_BRANCH_PROFILING 2#define DISABLE_BRANCH_PROFILING
3#define pr_fmt(fmt) "kasan: " fmt 3#define pr_fmt(fmt) "kasan: " fmt
4
5#ifdef CONFIG_X86_5LEVEL
6/* Too early to use cpu_feature_enabled() */
7#define pgtable_l5_enabled __pgtable_l5_enabled
8#endif
9
4#include <linux/bootmem.h> 10#include <linux/bootmem.h>
5#include <linux/kasan.h> 11#include <linux/kasan.h>
6#include <linux/kdebug.h> 12#include <linux/kdebug.h>
@@ -19,7 +25,7 @@
19 25
20extern struct range pfn_mapped[E820_MAX_ENTRIES]; 26extern struct range pfn_mapped[E820_MAX_ENTRIES];
21 27
22static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); 28static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
23 29
24static __init void *early_alloc(size_t size, int nid, bool panic) 30static __init void *early_alloc(size_t size, int nid, bool panic)
25{ 31{
@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start,
176 * With folded p4d, pgd_clear() is nop, use p4d_clear() 182 * With folded p4d, pgd_clear() is nop, use p4d_clear()
177 * instead. 183 * instead.
178 */ 184 */
179 if (CONFIG_PGTABLE_LEVELS < 5) 185 if (pgtable_l5_enabled)
180 p4d_clear(p4d_offset(pgd, start));
181 else
182 pgd_clear(pgd); 186 pgd_clear(pgd);
187 else
188 p4d_clear(p4d_offset(pgd, start));
183 } 189 }
184 190
185 pgd = pgd_offset_k(start); 191 pgd = pgd_offset_k(start);
@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
191{ 197{
192 unsigned long p4d; 198 unsigned long p4d;
193 199
194 if (!IS_ENABLED(CONFIG_X86_5LEVEL)) 200 if (!pgtable_l5_enabled)
195 return (p4d_t *)pgd; 201 return (p4d_t *)pgd;
196 202
197 p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; 203 p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -272,7 +278,7 @@ void __init kasan_early_init(void)
272 for (i = 0; i < PTRS_PER_PUD; i++) 278 for (i = 0; i < PTRS_PER_PUD; i++)
273 kasan_zero_pud[i] = __pud(pud_val); 279 kasan_zero_pud[i] = __pud(pud_val);
274 280
275 for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) 281 for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
276 kasan_zero_p4d[i] = __p4d(p4d_val); 282 kasan_zero_p4d[i] = __p4d(p4d_val);
277 283
278 kasan_map_early_shadow(early_top_pgt); 284 kasan_map_early_shadow(early_top_pgt);
@@ -303,7 +309,7 @@ void __init kasan_init(void)
303 * bunch of things like kernel code, modules, EFI mapping, etc. 309 * bunch of things like kernel code, modules, EFI mapping, etc.
304 * We need to take extra steps to not overwrite them. 310 * We need to take extra steps to not overwrite them.
305 */ 311 */
306 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 312 if (pgtable_l5_enabled) {
307 void *ptr; 313 void *ptr;
308 314
309 ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); 315 ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aedebd2ebf1e..615cc03ced84 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,23 +34,12 @@
34#define TB_SHIFT 40 34#define TB_SHIFT 40
35 35
36/* 36/*
37 * Virtual address start and end range for randomization.
38 *
39 * The end address could depend on more configuration options to make the 37 * The end address could depend on more configuration options to make the
40 * highest amount of space for randomization available, but that's too hard 38 * highest amount of space for randomization available, but that's too hard
41 * to keep straight and caused issues already. 39 * to keep straight and caused issues already.
42 */ 40 */
43static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
44static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; 41static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
45 42
46/* Default values */
47unsigned long page_offset_base = __PAGE_OFFSET_BASE;
48EXPORT_SYMBOL(page_offset_base);
49unsigned long vmalloc_base = __VMALLOC_BASE;
50EXPORT_SYMBOL(vmalloc_base);
51unsigned long vmemmap_base = __VMEMMAP_BASE;
52EXPORT_SYMBOL(vmemmap_base);
53
54/* 43/*
55 * Memory regions randomized by KASLR (except modules that use a separate logic 44 * Memory regions randomized by KASLR (except modules that use a separate logic
56 * earlier during boot). The list is ordered based on virtual addresses. This 45 * earlier during boot). The list is ordered based on virtual addresses. This
@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region {
60 unsigned long *base; 49 unsigned long *base;
61 unsigned long size_tb; 50 unsigned long size_tb;
62} kaslr_regions[] = { 51} kaslr_regions[] = {
63 { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, 52 { &page_offset_base, 0 },
64 { &vmalloc_base, VMALLOC_SIZE_TB }, 53 { &vmalloc_base, 0 },
65 { &vmemmap_base, 1 }, 54 { &vmemmap_base, 1 },
66}; 55};
67 56
@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
84void __init kernel_randomize_memory(void) 73void __init kernel_randomize_memory(void)
85{ 74{
86 size_t i; 75 size_t i;
87 unsigned long vaddr = vaddr_start; 76 unsigned long vaddr_start, vaddr;
88 unsigned long rand, memory_tb; 77 unsigned long rand, memory_tb;
89 struct rnd_state rand_state; 78 struct rnd_state rand_state;
90 unsigned long remain_entropy; 79 unsigned long remain_entropy;
91 80
81 vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
82 vaddr = vaddr_start;
83
92 /* 84 /*
93 * These BUILD_BUG_ON checks ensure the memory layout is consistent 85 * These BUILD_BUG_ON checks ensure the memory layout is consistent
94 * with the vaddr_start/vaddr_end variables. These checks are very 86 * with the vaddr_start/vaddr_end variables. These checks are very
@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void)
101 if (!kaslr_memory_enabled()) 93 if (!kaslr_memory_enabled())
102 return; 94 return;
103 95
96 kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
97 kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
98
104 /* 99 /*
105 * Update Physical memory mapping to available and 100 * Update Physical memory mapping to available and
106 * add padding if needed (especially for memory hotplug support). 101 * add padding if needed (especially for memory hotplug support).
@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void)
129 */ 124 */
130 entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); 125 entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
131 prandom_bytes_state(&rand_state, &rand, sizeof(rand)); 126 prandom_bytes_state(&rand_state, &rand, sizeof(rand));
132 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 127 if (pgtable_l5_enabled)
133 entropy = (rand % (entropy + 1)) & P4D_MASK; 128 entropy = (rand % (entropy + 1)) & P4D_MASK;
134 else 129 else
135 entropy = (rand % (entropy + 1)) & PUD_MASK; 130 entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void)
141 * randomization alignment. 136 * randomization alignment.
142 */ 137 */
143 vaddr += get_padding(&kaslr_regions[i]); 138 vaddr += get_padding(&kaslr_regions[i]);
144 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 139 if (pgtable_l5_enabled)
145 vaddr = round_up(vaddr + 1, P4D_SIZE); 140 vaddr = round_up(vaddr + 1, P4D_SIZE);
146 else 141 else
147 vaddr = round_up(vaddr + 1, PUD_SIZE); 142 vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -217,7 +212,7 @@ void __meminit init_trampoline(void)
217 return; 212 return;
218 } 213 }
219 214
220 if (IS_ENABLED(CONFIG_X86_5LEVEL)) 215 if (pgtable_l5_enabled)
221 init_trampoline_p4d(); 216 init_trampoline_p4d();
222 else 217 else
223 init_trampoline_pud(); 218 init_trampoline_pud();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071e2e17..3a1b5fe4c2ca 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -25,17 +25,12 @@
25#include <asm/bootparam.h> 25#include <asm/bootparam.h>
26#include <asm/set_memory.h> 26#include <asm/set_memory.h>
27#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
28#include <asm/sections.h>
29#include <asm/processor-flags.h> 28#include <asm/processor-flags.h>
30#include <asm/msr.h> 29#include <asm/msr.h>
31#include <asm/cmdline.h> 30#include <asm/cmdline.h>
32 31
33#include "mm_internal.h" 32#include "mm_internal.h"
34 33
35static char sme_cmdline_arg[] __initdata = "mem_encrypt";
36static char sme_cmdline_on[] __initdata = "on";
37static char sme_cmdline_off[] __initdata = "off";
38
39/* 34/*
40 * Since SME related variables are set early in the boot process they must 35 * Since SME related variables are set early in the boot process they must
41 * reside in the .data section so as not to be zeroed out when the .bss 36 * reside in the .data section so as not to be zeroed out when the .bss
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask);
46DEFINE_STATIC_KEY_FALSE(sev_enable_key); 41DEFINE_STATIC_KEY_FALSE(sev_enable_key);
47EXPORT_SYMBOL_GPL(sev_enable_key); 42EXPORT_SYMBOL_GPL(sev_enable_key);
48 43
49static bool sev_enabled __section(.data); 44bool sev_enabled __section(.data);
50 45
51/* Buffer used for early in-place encryption by BSP, no locking needed */ 46/* Buffer used for early in-place encryption by BSP, no locking needed */
52static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); 47static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
463 /* Make the SWIOTLB buffer area decrypted */ 458 /* Make the SWIOTLB buffer area decrypted */
464 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); 459 set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
465} 460}
466
467struct sme_populate_pgd_data {
468 void *pgtable_area;
469 pgd_t *pgd;
470
471 pmdval_t pmd_flags;
472 pteval_t pte_flags;
473 unsigned long paddr;
474
475 unsigned long vaddr;
476 unsigned long vaddr_end;
477};
478
479static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
480{
481 unsigned long pgd_start, pgd_end, pgd_size;
482 pgd_t *pgd_p;
483
484 pgd_start = ppd->vaddr & PGDIR_MASK;
485 pgd_end = ppd->vaddr_end & PGDIR_MASK;
486
487 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
488
489 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
490
491 memset(pgd_p, 0, pgd_size);
492}
493
494#define PGD_FLAGS _KERNPG_TABLE_NOENC
495#define P4D_FLAGS _KERNPG_TABLE_NOENC
496#define PUD_FLAGS _KERNPG_TABLE_NOENC
497#define PMD_FLAGS _KERNPG_TABLE_NOENC
498
499#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
500
501#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
502#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
503 (_PAGE_PAT | _PAGE_PWT))
504
505#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
506
507#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
508
509#define PTE_FLAGS_DEC PTE_FLAGS
510#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
511 (_PAGE_PAT | _PAGE_PWT))
512
513#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
514
515static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
516{
517 pgd_t *pgd_p;
518 p4d_t *p4d_p;
519 pud_t *pud_p;
520 pmd_t *pmd_p;
521
522 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
523 if (native_pgd_val(*pgd_p)) {
524 if (IS_ENABLED(CONFIG_X86_5LEVEL))
525 p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
526 else
527 pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
528 } else {
529 pgd_t pgd;
530
531 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
532 p4d_p = ppd->pgtable_area;
533 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
534 ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
535
536 pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
537 } else {
538 pud_p = ppd->pgtable_area;
539 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
540 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
541
542 pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
543 }
544 native_set_pgd(pgd_p, pgd);
545 }
546
547 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
548 p4d_p += p4d_index(ppd->vaddr);
549 if (native_p4d_val(*p4d_p)) {
550 pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
551 } else {
552 p4d_t p4d;
553
554 pud_p = ppd->pgtable_area;
555 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
556 ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
557
558 p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
559 native_set_p4d(p4d_p, p4d);
560 }
561 }
562
563 pud_p += pud_index(ppd->vaddr);
564 if (native_pud_val(*pud_p)) {
565 if (native_pud_val(*pud_p) & _PAGE_PSE)
566 return NULL;
567
568 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
569 } else {
570 pud_t pud;
571
572 pmd_p = ppd->pgtable_area;
573 memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
574 ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
575
576 pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
577 native_set_pud(pud_p, pud);
578 }
579
580 return pmd_p;
581}
582
583static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
584{
585 pmd_t *pmd_p;
586
587 pmd_p = sme_prepare_pgd(ppd);
588 if (!pmd_p)
589 return;
590
591 pmd_p += pmd_index(ppd->vaddr);
592 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
593 native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
594}
595
596static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
597{
598 pmd_t *pmd_p;
599 pte_t *pte_p;
600
601 pmd_p = sme_prepare_pgd(ppd);
602 if (!pmd_p)
603 return;
604
605 pmd_p += pmd_index(ppd->vaddr);
606 if (native_pmd_val(*pmd_p)) {
607 if (native_pmd_val(*pmd_p) & _PAGE_PSE)
608 return;
609
610 pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
611 } else {
612 pmd_t pmd;
613
614 pte_p = ppd->pgtable_area;
615 memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
616 ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
617
618 pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
619 native_set_pmd(pmd_p, pmd);
620 }
621
622 pte_p += pte_index(ppd->vaddr);
623 if (!native_pte_val(*pte_p))
624 native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
625}
626
627static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
628{
629 while (ppd->vaddr < ppd->vaddr_end) {
630 sme_populate_pgd_large(ppd);
631
632 ppd->vaddr += PMD_PAGE_SIZE;
633 ppd->paddr += PMD_PAGE_SIZE;
634 }
635}
636
637static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
638{
639 while (ppd->vaddr < ppd->vaddr_end) {
640 sme_populate_pgd(ppd);
641
642 ppd->vaddr += PAGE_SIZE;
643 ppd->paddr += PAGE_SIZE;
644 }
645}
646
647static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
648 pmdval_t pmd_flags, pteval_t pte_flags)
649{
650 unsigned long vaddr_end;
651
652 ppd->pmd_flags = pmd_flags;
653 ppd->pte_flags = pte_flags;
654
655 /* Save original end value since we modify the struct value */
656 vaddr_end = ppd->vaddr_end;
657
658 /* If start is not 2MB aligned, create PTE entries */
659 ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
660 __sme_map_range_pte(ppd);
661
662 /* Create PMD entries */
663 ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
664 __sme_map_range_pmd(ppd);
665
666 /* If end is not 2MB aligned, create PTE entries */
667 ppd->vaddr_end = vaddr_end;
668 __sme_map_range_pte(ppd);
669}
670
671static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
672{
673 __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
674}
675
676static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
677{
678 __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
679}
680
681static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
682{
683 __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
684}
685
686static unsigned long __init sme_pgtable_calc(unsigned long len)
687{
688 unsigned long p4d_size, pud_size, pmd_size, pte_size;
689 unsigned long total;
690
691 /*
692 * Perform a relatively simplistic calculation of the pagetable
693 * entries that are needed. Those mappings will be covered mostly
694 * by 2MB PMD entries so we can conservatively calculate the required
695 * number of P4D, PUD and PMD structures needed to perform the
696 * mappings. For mappings that are not 2MB aligned, PTE mappings
697 * would be needed for the start and end portion of the address range
698 * that fall outside of the 2MB alignment. This results in, at most,
699 * two extra pages to hold PTE entries for each range that is mapped.
700 * Incrementing the count for each covers the case where the addresses
701 * cross entries.
702 */
703 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
704 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
705 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
706 pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
707 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
708 } else {
709 p4d_size = 0;
710 pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
711 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
712 }
713 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
714 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
715 pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
716
717 total = p4d_size + pud_size + pmd_size + pte_size;
718
719 /*
720 * Now calculate the added pagetable structures needed to populate
721 * the new pagetables.
722 */
723 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
724 p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
725 p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
726 pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
727 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
728 } else {
729 p4d_size = 0;
730 pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
731 pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
732 }
733 pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
734 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
735
736 total += p4d_size + pud_size + pmd_size;
737
738 return total;
739}
740
741void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
742{
743 unsigned long workarea_start, workarea_end, workarea_len;
744 unsigned long execute_start, execute_end, execute_len;
745 unsigned long kernel_start, kernel_end, kernel_len;
746 unsigned long initrd_start, initrd_end, initrd_len;
747 struct sme_populate_pgd_data ppd;
748 unsigned long pgtable_area_len;
749 unsigned long decrypted_base;
750
751 if (!sme_active())
752 return;
753
754 /*
755 * Prepare for encrypting the kernel and initrd by building new
756 * pagetables with the necessary attributes needed to encrypt the
757 * kernel in place.
758 *
759 * One range of virtual addresses will map the memory occupied
760 * by the kernel and initrd as encrypted.
761 *
762 * Another range of virtual addresses will map the memory occupied
763 * by the kernel and initrd as decrypted and write-protected.
764 *
765 * The use of write-protect attribute will prevent any of the
766 * memory from being cached.
767 */
768
769 /* Physical addresses gives us the identity mapped virtual addresses */
770 kernel_start = __pa_symbol(_text);
771 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
772 kernel_len = kernel_end - kernel_start;
773
774 initrd_start = 0;
775 initrd_end = 0;
776 initrd_len = 0;
777#ifdef CONFIG_BLK_DEV_INITRD
778 initrd_len = (unsigned long)bp->hdr.ramdisk_size |
779 ((unsigned long)bp->ext_ramdisk_size << 32);
780 if (initrd_len) {
781 initrd_start = (unsigned long)bp->hdr.ramdisk_image |
782 ((unsigned long)bp->ext_ramdisk_image << 32);
783 initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
784 initrd_len = initrd_end - initrd_start;
785 }
786#endif
787
788 /* Set the encryption workarea to be immediately after the kernel */
789 workarea_start = kernel_end;
790
791 /*
792 * Calculate required number of workarea bytes needed:
793 * executable encryption area size:
794 * stack page (PAGE_SIZE)
795 * encryption routine page (PAGE_SIZE)
796 * intermediate copy buffer (PMD_PAGE_SIZE)
797 * pagetable structures for the encryption of the kernel
798 * pagetable structures for workarea (in case not currently mapped)
799 */
800 execute_start = workarea_start;
801 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
802 execute_len = execute_end - execute_start;
803
804 /*
805 * One PGD for both encrypted and decrypted mappings and a set of
806 * PUDs and PMDs for each of the encrypted and decrypted mappings.
807 */
808 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
809 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
810 if (initrd_len)
811 pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
812
813 /* PUDs and PMDs needed in the current pagetables for the workarea */
814 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
815
816 /*
817 * The total workarea includes the executable encryption area and
818 * the pagetable area. The start of the workarea is already 2MB
819 * aligned, align the end of the workarea on a 2MB boundary so that
820 * we don't try to create/allocate PTE entries from the workarea
821 * before it is mapped.
822 */
823 workarea_len = execute_len + pgtable_area_len;
824 workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
825
826 /*
827 * Set the address to the start of where newly created pagetable
828 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
829 * structures are created when the workarea is added to the current
830 * pagetables and when the new encrypted and decrypted kernel
831 * mappings are populated.
832 */
833 ppd.pgtable_area = (void *)execute_end;
834
835 /*
836 * Make sure the current pagetable structure has entries for
837 * addressing the workarea.
838 */
839 ppd.pgd = (pgd_t *)native_read_cr3_pa();
840 ppd.paddr = workarea_start;
841 ppd.vaddr = workarea_start;
842 ppd.vaddr_end = workarea_end;
843 sme_map_range_decrypted(&ppd);
844
845 /* Flush the TLB - no globals so cr3 is enough */
846 native_write_cr3(__native_read_cr3());
847
848 /*
849 * A new pagetable structure is being built to allow for the kernel
850 * and initrd to be encrypted. It starts with an empty PGD that will
851 * then be populated with new PUDs and PMDs as the encrypted and
852 * decrypted kernel mappings are created.
853 */
854 ppd.pgd = ppd.pgtable_area;
855 memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
856 ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
857
858 /*
859 * A different PGD index/entry must be used to get different
860 * pagetable entries for the decrypted mapping. Choose the next
861 * PGD index and convert it to a virtual address to be used as
862 * the base of the mapping.
863 */
864 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
865 if (initrd_len) {
866 unsigned long check_base;
867
868 check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
869 decrypted_base = max(decrypted_base, check_base);
870 }
871 decrypted_base <<= PGDIR_SHIFT;
872
873 /* Add encrypted kernel (identity) mappings */
874 ppd.paddr = kernel_start;
875 ppd.vaddr = kernel_start;
876 ppd.vaddr_end = kernel_end;
877 sme_map_range_encrypted(&ppd);
878
879 /* Add decrypted, write-protected kernel (non-identity) mappings */
880 ppd.paddr = kernel_start;
881 ppd.vaddr = kernel_start + decrypted_base;
882 ppd.vaddr_end = kernel_end + decrypted_base;
883 sme_map_range_decrypted_wp(&ppd);
884
885 if (initrd_len) {
886 /* Add encrypted initrd (identity) mappings */
887 ppd.paddr = initrd_start;
888 ppd.vaddr = initrd_start;
889 ppd.vaddr_end = initrd_end;
890 sme_map_range_encrypted(&ppd);
891 /*
892 * Add decrypted, write-protected initrd (non-identity) mappings
893 */
894 ppd.paddr = initrd_start;
895 ppd.vaddr = initrd_start + decrypted_base;
896 ppd.vaddr_end = initrd_end + decrypted_base;
897 sme_map_range_decrypted_wp(&ppd);
898 }
899
900 /* Add decrypted workarea mappings to both kernel mappings */
901 ppd.paddr = workarea_start;
902 ppd.vaddr = workarea_start;
903 ppd.vaddr_end = workarea_end;
904 sme_map_range_decrypted(&ppd);
905
906 ppd.paddr = workarea_start;
907 ppd.vaddr = workarea_start + decrypted_base;
908 ppd.vaddr_end = workarea_end + decrypted_base;
909 sme_map_range_decrypted(&ppd);
910
911 /* Perform the encryption */
912 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
913 kernel_len, workarea_start, (unsigned long)ppd.pgd);
914
915 if (initrd_len)
916 sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
917 initrd_len, workarea_start,
918 (unsigned long)ppd.pgd);
919
920 /*
921 * At this point we are running encrypted. Remove the mappings for
922 * the decrypted areas - all that is needed for this is to remove
923 * the PGD entry/entries.
924 */
925 ppd.vaddr = kernel_start + decrypted_base;
926 ppd.vaddr_end = kernel_end + decrypted_base;
927 sme_clear_pgd(&ppd);
928
929 if (initrd_len) {
930 ppd.vaddr = initrd_start + decrypted_base;
931 ppd.vaddr_end = initrd_end + decrypted_base;
932 sme_clear_pgd(&ppd);
933 }
934
935 ppd.vaddr = workarea_start + decrypted_base;
936 ppd.vaddr_end = workarea_end + decrypted_base;
937 sme_clear_pgd(&ppd);
938
939 /* Flush the TLB - no globals so cr3 is enough */
940 native_write_cr3(__native_read_cr3());
941}
942
943void __init __nostackprotector sme_enable(struct boot_params *bp)
944{
945 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
946 unsigned int eax, ebx, ecx, edx;
947 unsigned long feature_mask;
948 bool active_by_default;
949 unsigned long me_mask;
950 char buffer[16];
951 u64 msr;
952
953 /* Check for the SME/SEV support leaf */
954 eax = 0x80000000;
955 ecx = 0;
956 native_cpuid(&eax, &ebx, &ecx, &edx);
957 if (eax < 0x8000001f)
958 return;
959
960#define AMD_SME_BIT BIT(0)
961#define AMD_SEV_BIT BIT(1)
962 /*
963 * Set the feature mask (SME or SEV) based on whether we are
964 * running under a hypervisor.
965 */
966 eax = 1;
967 ecx = 0;
968 native_cpuid(&eax, &ebx, &ecx, &edx);
969 feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
970
971 /*
972 * Check for the SME/SEV feature:
973 * CPUID Fn8000_001F[EAX]
974 * - Bit 0 - Secure Memory Encryption support
975 * - Bit 1 - Secure Encrypted Virtualization support
976 * CPUID Fn8000_001F[EBX]
977 * - Bits 5:0 - Pagetable bit position used to indicate encryption
978 */
979 eax = 0x8000001f;
980 ecx = 0;
981 native_cpuid(&eax, &ebx, &ecx, &edx);
982 if (!(eax & feature_mask))
983 return;
984
985 me_mask = 1UL << (ebx & 0x3f);
986
987 /* Check if memory encryption is enabled */
988 if (feature_mask == AMD_SME_BIT) {
989 /* For SME, check the SYSCFG MSR */
990 msr = __rdmsr(MSR_K8_SYSCFG);
991 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
992 return;
993 } else {
994 /* For SEV, check the SEV MSR */
995 msr = __rdmsr(MSR_AMD64_SEV);
996 if (!(msr & MSR_AMD64_SEV_ENABLED))
997 return;
998
999 /* SEV state cannot be controlled by a command line option */
1000 sme_me_mask = me_mask;
1001 sev_enabled = true;
1002 return;
1003 }
1004
1005 /*
1006 * Fixups have not been applied to phys_base yet and we're running
1007 * identity mapped, so we must obtain the address to the SME command
1008 * line argument data using rip-relative addressing.
1009 */
1010 asm ("lea sme_cmdline_arg(%%rip), %0"
1011 : "=r" (cmdline_arg)
1012 : "p" (sme_cmdline_arg));
1013 asm ("lea sme_cmdline_on(%%rip), %0"
1014 : "=r" (cmdline_on)
1015 : "p" (sme_cmdline_on));
1016 asm ("lea sme_cmdline_off(%%rip), %0"
1017 : "=r" (cmdline_off)
1018 : "p" (sme_cmdline_off));
1019
1020 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
1021 active_by_default = true;
1022 else
1023 active_by_default = false;
1024
1025 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
1026 ((u64)bp->ext_cmd_line_ptr << 32));
1027
1028 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
1029
1030 if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
1031 sme_me_mask = me_mask;
1032 else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
1033 sme_me_mask = 0;
1034 else
1035 sme_me_mask = active_by_default ? me_mask : 0;
1036}
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
new file mode 100644
index 000000000000..1b2197d13832
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -0,0 +1,564 @@
1/*
2 * AMD Memory Encryption Support
3 *
4 * Copyright (C) 2016 Advanced Micro Devices, Inc.
5 *
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#define DISABLE_BRANCH_PROFILING
14
15/*
16 * Since we're dealing with identity mappings, physical and virtual
17 * addresses are the same, so override these defines which are ultimately
18 * used by the headers in misc.h.
19 */
20#define __pa(x) ((unsigned long)(x))
21#define __va(x) ((void *)((unsigned long)(x)))
22
23/*
24 * Special hack: we have to be careful, because no indirections are
25 * allowed here, and paravirt_ops is a kind of one. As it will only run in
26 * baremetal anyway, we just keep it from happening. (This list needs to
27 * be extended when new paravirt and debugging variants are added.)
28 */
29#undef CONFIG_PARAVIRT
30#undef CONFIG_PARAVIRT_SPINLOCKS
31
32#include <linux/kernel.h>
33#include <linux/mm.h>
34#include <linux/mem_encrypt.h>
35
36#include <asm/setup.h>
37#include <asm/sections.h>
38#include <asm/cmdline.h>
39
40#include "mm_internal.h"
41
42#define PGD_FLAGS _KERNPG_TABLE_NOENC
43#define P4D_FLAGS _KERNPG_TABLE_NOENC
44#define PUD_FLAGS _KERNPG_TABLE_NOENC
45#define PMD_FLAGS _KERNPG_TABLE_NOENC
46
47#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
48
49#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
50#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
51 (_PAGE_PAT | _PAGE_PWT))
52
53#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
54
55#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
56
57#define PTE_FLAGS_DEC PTE_FLAGS
58#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
59 (_PAGE_PAT | _PAGE_PWT))
60
61#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
62
63struct sme_populate_pgd_data {
64 void *pgtable_area;
65 pgd_t *pgd;
66
67 pmdval_t pmd_flags;
68 pteval_t pte_flags;
69 unsigned long paddr;
70
71 unsigned long vaddr;
72 unsigned long vaddr_end;
73};
74
75static char sme_cmdline_arg[] __initdata = "mem_encrypt";
76static char sme_cmdline_on[] __initdata = "on";
77static char sme_cmdline_off[] __initdata = "off";
78
79static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
80{
81 unsigned long pgd_start, pgd_end, pgd_size;
82 pgd_t *pgd_p;
83
84 pgd_start = ppd->vaddr & PGDIR_MASK;
85 pgd_end = ppd->vaddr_end & PGDIR_MASK;
86
87 pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
88
89 pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
90
91 memset(pgd_p, 0, pgd_size);
92}
93
94static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
95{
96 pgd_t *pgd;
97 p4d_t *p4d;
98 pud_t *pud;
99 pmd_t *pmd;
100
101 pgd = ppd->pgd + pgd_index(ppd->vaddr);
102 if (pgd_none(*pgd)) {
103 p4d = ppd->pgtable_area;
104 memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D);
105 ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D;
106 set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d)));
107 }
108
109 p4d = p4d_offset(pgd, ppd->vaddr);
110 if (p4d_none(*p4d)) {
111 pud = ppd->pgtable_area;
112 memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD);
113 ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD;
114 set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud)));
115 }
116
117 pud = pud_offset(p4d, ppd->vaddr);
118 if (pud_none(*pud)) {
119 pmd = ppd->pgtable_area;
120 memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD);
121 ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD;
122 set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
123 }
124
125 if (pud_large(*pud))
126 return NULL;
127
128 return pud;
129}
130
131static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
132{
133 pud_t *pud;
134 pmd_t *pmd;
135
136 pud = sme_prepare_pgd(ppd);
137 if (!pud)
138 return;
139
140 pmd = pmd_offset(pud, ppd->vaddr);
141 if (pmd_large(*pmd))
142 return;
143
144 set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
145}
146
147static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
148{
149 pud_t *pud;
150 pmd_t *pmd;
151 pte_t *pte;
152
153 pud = sme_prepare_pgd(ppd);
154 if (!pud)
155 return;
156
157 pmd = pmd_offset(pud, ppd->vaddr);
158 if (pmd_none(*pmd)) {
159 pte = ppd->pgtable_area;
160 memset(pte, 0, sizeof(pte) * PTRS_PER_PTE);
161 ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE;
162 set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
163 }
164
165 if (pmd_large(*pmd))
166 return;
167
168 pte = pte_offset_map(pmd, ppd->vaddr);
169 if (pte_none(*pte))
170 set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
171}
172
173static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
174{
175 while (ppd->vaddr < ppd->vaddr_end) {
176 sme_populate_pgd_large(ppd);
177
178 ppd->vaddr += PMD_PAGE_SIZE;
179 ppd->paddr += PMD_PAGE_SIZE;
180 }
181}
182
183static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
184{
185 while (ppd->vaddr < ppd->vaddr_end) {
186 sme_populate_pgd(ppd);
187
188 ppd->vaddr += PAGE_SIZE;
189 ppd->paddr += PAGE_SIZE;
190 }
191}
192
193static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
194 pmdval_t pmd_flags, pteval_t pte_flags)
195{
196 unsigned long vaddr_end;
197
198 ppd->pmd_flags = pmd_flags;
199 ppd->pte_flags = pte_flags;
200
201 /* Save original end value since we modify the struct value */
202 vaddr_end = ppd->vaddr_end;
203
204 /* If start is not 2MB aligned, create PTE entries */
205 ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
206 __sme_map_range_pte(ppd);
207
208 /* Create PMD entries */
209 ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
210 __sme_map_range_pmd(ppd);
211
212 /* If end is not 2MB aligned, create PTE entries */
213 ppd->vaddr_end = vaddr_end;
214 __sme_map_range_pte(ppd);
215}
216
217static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
218{
219 __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
220}
221
222static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
223{
224 __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
225}
226
227static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
228{
229 __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
230}
231
232static unsigned long __init sme_pgtable_calc(unsigned long len)
233{
234 unsigned long entries = 0, tables = 0;
235
236 /*
237 * Perform a relatively simplistic calculation of the pagetable
238 * entries that are needed. Those mappings will be covered mostly
239 * by 2MB PMD entries so we can conservatively calculate the required
240 * number of P4D, PUD and PMD structures needed to perform the
241 * mappings. For mappings that are not 2MB aligned, PTE mappings
242 * would be needed for the start and end portion of the address range
243 * that fall outside of the 2MB alignment. This results in, at most,
244 * two extra pages to hold PTE entries for each range that is mapped.
245 * Incrementing the count for each covers the case where the addresses
246 * cross entries.
247 */
248
249 /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */
250 if (PTRS_PER_P4D > 1)
251 entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D;
252 entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD;
253 entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD;
254 entries += 2 * sizeof(pte_t) * PTRS_PER_PTE;
255
256 /*
257 * Now calculate the added pagetable structures needed to populate
258 * the new pagetables.
259 */
260
261 if (PTRS_PER_P4D > 1)
262 tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D;
263 tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD;
264 tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD;
265
266 return entries + tables;
267}
268
269void __init sme_encrypt_kernel(struct boot_params *bp)
270{
271 unsigned long workarea_start, workarea_end, workarea_len;
272 unsigned long execute_start, execute_end, execute_len;
273 unsigned long kernel_start, kernel_end, kernel_len;
274 unsigned long initrd_start, initrd_end, initrd_len;
275 struct sme_populate_pgd_data ppd;
276 unsigned long pgtable_area_len;
277 unsigned long decrypted_base;
278
279 if (!sme_active())
280 return;
281
282 /*
283 * Prepare for encrypting the kernel and initrd by building new
284 * pagetables with the necessary attributes needed to encrypt the
285 * kernel in place.
286 *
287 * One range of virtual addresses will map the memory occupied
288 * by the kernel and initrd as encrypted.
289 *
290 * Another range of virtual addresses will map the memory occupied
291 * by the kernel and initrd as decrypted and write-protected.
292 *
293 * The use of write-protect attribute will prevent any of the
294 * memory from being cached.
295 */
296
297 /* Physical addresses gives us the identity mapped virtual addresses */
298 kernel_start = __pa_symbol(_text);
299 kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
300 kernel_len = kernel_end - kernel_start;
301
302 initrd_start = 0;
303 initrd_end = 0;
304 initrd_len = 0;
305#ifdef CONFIG_BLK_DEV_INITRD
306 initrd_len = (unsigned long)bp->hdr.ramdisk_size |
307 ((unsigned long)bp->ext_ramdisk_size << 32);
308 if (initrd_len) {
309 initrd_start = (unsigned long)bp->hdr.ramdisk_image |
310 ((unsigned long)bp->ext_ramdisk_image << 32);
311 initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
312 initrd_len = initrd_end - initrd_start;
313 }
314#endif
315
316 /* Set the encryption workarea to be immediately after the kernel */
317 workarea_start = kernel_end;
318
319 /*
320 * Calculate required number of workarea bytes needed:
321 * executable encryption area size:
322 * stack page (PAGE_SIZE)
323 * encryption routine page (PAGE_SIZE)
324 * intermediate copy buffer (PMD_PAGE_SIZE)
325 * pagetable structures for the encryption of the kernel
326 * pagetable structures for workarea (in case not currently mapped)
327 */
328 execute_start = workarea_start;
329 execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
330 execute_len = execute_end - execute_start;
331
332 /*
333 * One PGD for both encrypted and decrypted mappings and a set of
334 * PUDs and PMDs for each of the encrypted and decrypted mappings.
335 */
336 pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
337 pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
338 if (initrd_len)
339 pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
340
341 /* PUDs and PMDs needed in the current pagetables for the workarea */
342 pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
343
344 /*
345 * The total workarea includes the executable encryption area and
346 * the pagetable area. The start of the workarea is already 2MB
347 * aligned, align the end of the workarea on a 2MB boundary so that
348 * we don't try to create/allocate PTE entries from the workarea
349 * before it is mapped.
350 */
351 workarea_len = execute_len + pgtable_area_len;
352 workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
353
354 /*
355 * Set the address to the start of where newly created pagetable
356 * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
357 * structures are created when the workarea is added to the current
358 * pagetables and when the new encrypted and decrypted kernel
359 * mappings are populated.
360 */
361 ppd.pgtable_area = (void *)execute_end;
362
363 /*
364 * Make sure the current pagetable structure has entries for
365 * addressing the workarea.
366 */
367 ppd.pgd = (pgd_t *)native_read_cr3_pa();
368 ppd.paddr = workarea_start;
369 ppd.vaddr = workarea_start;
370 ppd.vaddr_end = workarea_end;
371 sme_map_range_decrypted(&ppd);
372
373 /* Flush the TLB - no globals so cr3 is enough */
374 native_write_cr3(__native_read_cr3());
375
376 /*
377 * A new pagetable structure is being built to allow for the kernel
378 * and initrd to be encrypted. It starts with an empty PGD that will
379 * then be populated with new PUDs and PMDs as the encrypted and
380 * decrypted kernel mappings are created.
381 */
382 ppd.pgd = ppd.pgtable_area;
383 memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
384 ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
385
386 /*
387 * A different PGD index/entry must be used to get different
388 * pagetable entries for the decrypted mapping. Choose the next
389 * PGD index and convert it to a virtual address to be used as
390 * the base of the mapping.
391 */
392 decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
393 if (initrd_len) {
394 unsigned long check_base;
395
396 check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
397 decrypted_base = max(decrypted_base, check_base);
398 }
399 decrypted_base <<= PGDIR_SHIFT;
400
401 /* Add encrypted kernel (identity) mappings */
402 ppd.paddr = kernel_start;
403 ppd.vaddr = kernel_start;
404 ppd.vaddr_end = kernel_end;
405 sme_map_range_encrypted(&ppd);
406
407 /* Add decrypted, write-protected kernel (non-identity) mappings */
408 ppd.paddr = kernel_start;
409 ppd.vaddr = kernel_start + decrypted_base;
410 ppd.vaddr_end = kernel_end + decrypted_base;
411 sme_map_range_decrypted_wp(&ppd);
412
413 if (initrd_len) {
414 /* Add encrypted initrd (identity) mappings */
415 ppd.paddr = initrd_start;
416 ppd.vaddr = initrd_start;
417 ppd.vaddr_end = initrd_end;
418 sme_map_range_encrypted(&ppd);
419 /*
420 * Add decrypted, write-protected initrd (non-identity) mappings
421 */
422 ppd.paddr = initrd_start;
423 ppd.vaddr = initrd_start + decrypted_base;
424 ppd.vaddr_end = initrd_end + decrypted_base;
425 sme_map_range_decrypted_wp(&ppd);
426 }
427
428 /* Add decrypted workarea mappings to both kernel mappings */
429 ppd.paddr = workarea_start;
430 ppd.vaddr = workarea_start;
431 ppd.vaddr_end = workarea_end;
432 sme_map_range_decrypted(&ppd);
433
434 ppd.paddr = workarea_start;
435 ppd.vaddr = workarea_start + decrypted_base;
436 ppd.vaddr_end = workarea_end + decrypted_base;
437 sme_map_range_decrypted(&ppd);
438
439 /* Perform the encryption */
440 sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
441 kernel_len, workarea_start, (unsigned long)ppd.pgd);
442
443 if (initrd_len)
444 sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
445 initrd_len, workarea_start,
446 (unsigned long)ppd.pgd);
447
448 /*
449 * At this point we are running encrypted. Remove the mappings for
450 * the decrypted areas - all that is needed for this is to remove
451 * the PGD entry/entries.
452 */
453 ppd.vaddr = kernel_start + decrypted_base;
454 ppd.vaddr_end = kernel_end + decrypted_base;
455 sme_clear_pgd(&ppd);
456
457 if (initrd_len) {
458 ppd.vaddr = initrd_start + decrypted_base;
459 ppd.vaddr_end = initrd_end + decrypted_base;
460 sme_clear_pgd(&ppd);
461 }
462
463 ppd.vaddr = workarea_start + decrypted_base;
464 ppd.vaddr_end = workarea_end + decrypted_base;
465 sme_clear_pgd(&ppd);
466
467 /* Flush the TLB - no globals so cr3 is enough */
468 native_write_cr3(__native_read_cr3());
469}
470
471void __init sme_enable(struct boot_params *bp)
472{
473 const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
474 unsigned int eax, ebx, ecx, edx;
475 unsigned long feature_mask;
476 bool active_by_default;
477 unsigned long me_mask;
478 char buffer[16];
479 u64 msr;
480
481 /* Check for the SME/SEV support leaf */
482 eax = 0x80000000;
483 ecx = 0;
484 native_cpuid(&eax, &ebx, &ecx, &edx);
485 if (eax < 0x8000001f)
486 return;
487
488#define AMD_SME_BIT BIT(0)
489#define AMD_SEV_BIT BIT(1)
490 /*
491 * Set the feature mask (SME or SEV) based on whether we are
492 * running under a hypervisor.
493 */
494 eax = 1;
495 ecx = 0;
496 native_cpuid(&eax, &ebx, &ecx, &edx);
497 feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
498
499 /*
500 * Check for the SME/SEV feature:
501 * CPUID Fn8000_001F[EAX]
502 * - Bit 0 - Secure Memory Encryption support
503 * - Bit 1 - Secure Encrypted Virtualization support
504 * CPUID Fn8000_001F[EBX]
505 * - Bits 5:0 - Pagetable bit position used to indicate encryption
506 */
507 eax = 0x8000001f;
508 ecx = 0;
509 native_cpuid(&eax, &ebx, &ecx, &edx);
510 if (!(eax & feature_mask))
511 return;
512
513 me_mask = 1UL << (ebx & 0x3f);
514
515 /* Check if memory encryption is enabled */
516 if (feature_mask == AMD_SME_BIT) {
517 /* For SME, check the SYSCFG MSR */
518 msr = __rdmsr(MSR_K8_SYSCFG);
519 if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
520 return;
521 } else {
522 /* For SEV, check the SEV MSR */
523 msr = __rdmsr(MSR_AMD64_SEV);
524 if (!(msr & MSR_AMD64_SEV_ENABLED))
525 return;
526
527 /* SEV state cannot be controlled by a command line option */
528 sme_me_mask = me_mask;
529 sev_enabled = true;
530 return;
531 }
532
533 /*
534 * Fixups have not been applied to phys_base yet and we're running
535 * identity mapped, so we must obtain the address to the SME command
536 * line argument data using rip-relative addressing.
537 */
538 asm ("lea sme_cmdline_arg(%%rip), %0"
539 : "=r" (cmdline_arg)
540 : "p" (sme_cmdline_arg));
541 asm ("lea sme_cmdline_on(%%rip), %0"
542 : "=r" (cmdline_on)
543 : "p" (sme_cmdline_on));
544 asm ("lea sme_cmdline_off(%%rip), %0"
545 : "=r" (cmdline_off)
546 : "p" (sme_cmdline_off));
547
548 if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
549 active_by_default = true;
550 else
551 active_by_default = false;
552
553 cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
554 ((u64)bp->ext_cmd_line_ptr << 32));
555
556 cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
557
558 if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
559 sme_me_mask = me_mask;
560 else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
561 sme_me_mask = 0;
562 else
563 sme_me_mask = active_by_default ? me_mask : 0;
564}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index aca6295350f3..e8a4a09e20f1 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end)
60 } 60 }
61 printk(KERN_CONT "\n"); 61 printk(KERN_CONT "\n");
62} 62}
63
64unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
65 unsigned long end_pfn)
66{
67 unsigned long nr_pages = end_pfn - start_pfn;
68
69 if (!nr_pages)
70 return 0;
71
72 return (nr_pages + 1) * sizeof(struct page);
73}
74#endif 63#endif
75 64
76extern unsigned long highend_pfn, highstart_pfn; 65extern unsigned long highend_pfn, highstart_pfn;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7f1a51399674..e055d1a06699 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
157 unsigned long sp = current_stack_pointer; 157 unsigned long sp = current_stack_pointer;
158 pgd_t *pgd = pgd_offset(mm, sp); 158 pgd_t *pgd = pgd_offset(mm, sp);
159 159
160 if (CONFIG_PGTABLE_LEVELS > 4) { 160 if (pgtable_l5_enabled) {
161 if (unlikely(pgd_none(*pgd))) { 161 if (unlikely(pgd_none(*pgd))) {
162 pgd_t *pgd_ref = pgd_offset_k(sp); 162 pgd_t *pgd_ref = pgd_offset_k(sp);
163 163
@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
613{ 613{
614 int cpu; 614 int cpu;
615 615
616 struct flush_tlb_info info = { 616 struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
617 .mm = mm, 617 .mm = mm,
618 }; 618 };
619 619