diff options
68 files changed, 1657 insertions, 1016 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 50b9837e985b..b37c1c30c16f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
@@ -2248,6 +2248,15 @@ | |||
2248 | The memory region may be marked as e820 type 12 (0xc) | 2248 | The memory region may be marked as e820 type 12 (0xc) |
2249 | and is NVDIMM or ADR memory. | 2249 | and is NVDIMM or ADR memory. |
2250 | 2250 | ||
2251 | memmap=<size>%<offset>-<oldtype>+<newtype> | ||
2252 | [KNL,ACPI] Convert memory within the specified region | ||
2253 | from <oldtype> to <newtype>. If "-<oldtype>" is left | ||
2254 | out, the whole region will be marked as <newtype>, | ||
2255 | even if previously unavailable. If "+<newtype>" is left | ||
2256 | out, matching memory will be removed. Types are | ||
2257 | specified as e820 types, e.g., 1 = RAM, 2 = reserved, | ||
2258 | 3 = ACPI, 12 = PRAM. | ||
2259 | |||
2251 | memory_corruption_check=0/1 [X86] | 2260 | memory_corruption_check=0/1 [X86] |
2252 | Some BIOSes seem to corrupt the first 64k of | 2261 | Some BIOSes seem to corrupt the first 64k of |
2253 | memory when doing things like suspend/resume. | 2262 | memory when doing things like suspend/resume. |
diff --git a/Documentation/x86/x86_64/5level-paging.txt b/Documentation/x86/x86_64/5level-paging.txt index 087251a0d99c..2432a5ef86d9 100644 --- a/Documentation/x86/x86_64/5level-paging.txt +++ b/Documentation/x86/x86_64/5level-paging.txt | |||
@@ -20,12 +20,9 @@ Documentation/x86/x86_64/mm.txt | |||
20 | 20 | ||
21 | CONFIG_X86_5LEVEL=y enables the feature. | 21 | CONFIG_X86_5LEVEL=y enables the feature. |
22 | 22 | ||
23 | So far, a kernel compiled with the option enabled will be able to boot | 23 | Kernel with CONFIG_X86_5LEVEL=y still able to boot on 4-level hardware. |
24 | only on machines that supports the feature -- see for 'la57' flag in | 24 | In this case additional page table level -- p4d -- will be folded at |
25 | /proc/cpuinfo. | 25 | runtime. |
26 | |||
27 | The plan is to implement boot-time switching between 4- and 5-level paging | ||
28 | in the future. | ||
29 | 26 | ||
30 | == User-space and large virtual address space == | 27 | == User-space and large virtual address space == |
31 | 28 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cb5b5907dbd6..518b41b097dc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1461,6 +1461,8 @@ config X86_PAE | |||
1461 | 1461 | ||
1462 | config X86_5LEVEL | 1462 | config X86_5LEVEL |
1463 | bool "Enable 5-level page tables support" | 1463 | bool "Enable 5-level page tables support" |
1464 | select DYNAMIC_MEMORY_LAYOUT | ||
1465 | select SPARSEMEM_VMEMMAP | ||
1464 | depends on X86_64 | 1466 | depends on X86_64 |
1465 | ---help--- | 1467 | ---help--- |
1466 | 5-level paging enables access to larger address space: | 1468 | 5-level paging enables access to larger address space: |
@@ -1469,8 +1471,8 @@ config X86_5LEVEL | |||
1469 | 1471 | ||
1470 | It will be supported by future Intel CPUs. | 1472 | It will be supported by future Intel CPUs. |
1471 | 1473 | ||
1472 | Note: a kernel with this option enabled can only be booted | 1474 | A kernel with the option enabled can be booted on machines that |
1473 | on machines that support the feature. | 1475 | support 4- or 5-level paging. |
1474 | 1476 | ||
1475 | See Documentation/x86/x86_64/5level-paging.txt for more | 1477 | See Documentation/x86/x86_64/5level-paging.txt for more |
1476 | information. | 1478 | information. |
@@ -1595,10 +1597,6 @@ config ARCH_HAVE_MEMORY_PRESENT | |||
1595 | def_bool y | 1597 | def_bool y |
1596 | depends on X86_32 && DISCONTIGMEM | 1598 | depends on X86_32 && DISCONTIGMEM |
1597 | 1599 | ||
1598 | config NEED_NODE_MEMMAP_SIZE | ||
1599 | def_bool y | ||
1600 | depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) | ||
1601 | |||
1602 | config ARCH_FLATMEM_ENABLE | 1600 | config ARCH_FLATMEM_ENABLE |
1603 | def_bool y | 1601 | def_bool y |
1604 | depends on X86_32 && !NUMA | 1602 | depends on X86_32 && !NUMA |
@@ -2174,10 +2172,17 @@ config PHYSICAL_ALIGN | |||
2174 | 2172 | ||
2175 | Don't change this unless you know what you are doing. | 2173 | Don't change this unless you know what you are doing. |
2176 | 2174 | ||
2175 | config DYNAMIC_MEMORY_LAYOUT | ||
2176 | bool | ||
2177 | ---help--- | ||
2178 | This option makes base addresses of vmalloc and vmemmap as well as | ||
2179 | __PAGE_OFFSET movable during boot. | ||
2180 | |||
2177 | config RANDOMIZE_MEMORY | 2181 | config RANDOMIZE_MEMORY |
2178 | bool "Randomize the kernel memory sections" | 2182 | bool "Randomize the kernel memory sections" |
2179 | depends on X86_64 | 2183 | depends on X86_64 |
2180 | depends on RANDOMIZE_BASE | 2184 | depends on RANDOMIZE_BASE |
2185 | select DYNAMIC_MEMORY_LAYOUT | ||
2181 | default RANDOMIZE_BASE | 2186 | default RANDOMIZE_BASE |
2182 | ---help--- | 2187 | ---help--- |
2183 | Randomizes the base virtual address of kernel memory sections | 2188 | Randomizes the base virtual address of kernel memory sections |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f484ae0ece93..fa42f895fdde 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ | |||
78 | vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o | 78 | vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o |
79 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o | 79 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o |
80 | ifdef CONFIG_X86_64 | 80 | ifdef CONFIG_X86_64 |
81 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o | 81 | vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o |
82 | vmlinux-objs-y += $(obj)/mem_encrypt.o | 82 | vmlinux-objs-y += $(obj)/mem_encrypt.o |
83 | vmlinux-objs-y += $(obj)/pgtable_64.o | 83 | vmlinux-objs-y += $(obj)/pgtable_64.o |
84 | endif | 84 | endif |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fc313e29fe2c..fca012baba19 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | #include <asm/asm-offsets.h> | 34 | #include <asm/asm-offsets.h> |
35 | #include <asm/bootparam.h> | 35 | #include <asm/bootparam.h> |
36 | #include "pgtable.h" | ||
36 | 37 | ||
37 | /* | 38 | /* |
38 | * Locally defined symbols should be marked hidden: | 39 | * Locally defined symbols should be marked hidden: |
@@ -304,55 +305,77 @@ ENTRY(startup_64) | |||
304 | /* Set up the stack */ | 305 | /* Set up the stack */ |
305 | leaq boot_stack_end(%rbx), %rsp | 306 | leaq boot_stack_end(%rbx), %rsp |
306 | 307 | ||
307 | #ifdef CONFIG_X86_5LEVEL | ||
308 | /* | 308 | /* |
309 | * Check if we need to enable 5-level paging. | 309 | * At this point we are in long mode with 4-level paging enabled, |
310 | * RSI holds real mode data and need to be preserved across | 310 | * but we might want to enable 5-level paging or vice versa. |
311 | * a function call. | 311 | * |
312 | * The problem is that we cannot do it directly. Setting or clearing | ||
313 | * CR4.LA57 in long mode would trigger #GP. So we need to switch off | ||
314 | * long mode and paging first. | ||
315 | * | ||
316 | * We also need a trampoline in lower memory to switch over from | ||
317 | * 4- to 5-level paging for cases when the bootloader puts the kernel | ||
318 | * above 4G, but didn't enable 5-level paging for us. | ||
319 | * | ||
320 | * The same trampoline can be used to switch from 5- to 4-level paging | ||
321 | * mode, like when starting 4-level paging kernel via kexec() when | ||
322 | * original kernel worked in 5-level paging mode. | ||
323 | * | ||
324 | * For the trampoline, we need the top page table to reside in lower | ||
325 | * memory as we don't have a way to load 64-bit values into CR3 in | ||
326 | * 32-bit mode. | ||
327 | * | ||
328 | * We go though the trampoline even if we don't have to: if we're | ||
329 | * already in a desired paging mode. This way the trampoline code gets | ||
330 | * tested on every boot. | ||
312 | */ | 331 | */ |
313 | pushq %rsi | ||
314 | call l5_paging_required | ||
315 | popq %rsi | ||
316 | 332 | ||
317 | /* If l5_paging_required() returned zero, we're done here. */ | 333 | /* Make sure we have GDT with 32-bit code segment */ |
318 | cmpq $0, %rax | 334 | leaq gdt(%rip), %rax |
319 | je lvl5 | 335 | movq %rax, gdt64+2(%rip) |
336 | lgdt gdt64(%rip) | ||
320 | 337 | ||
321 | /* | 338 | /* |
322 | * At this point we are in long mode with 4-level paging enabled, | 339 | * paging_prepare() sets up the trampoline and checks if we need to |
323 | * but we want to enable 5-level paging. | 340 | * enable 5-level paging. |
324 | * | 341 | * |
325 | * The problem is that we cannot do it directly. Setting LA57 in | 342 | * Address of the trampoline is returned in RAX. |
326 | * long mode would trigger #GP. So we need to switch off long mode | 343 | * Non zero RDX on return means we need to enable 5-level paging. |
327 | * first. | ||
328 | * | 344 | * |
329 | * NOTE: This is not going to work if bootloader put us above 4G | 345 | * RSI holds real mode data and needs to be preserved across |
330 | * limit. | 346 | * this function call. |
331 | * | ||
332 | * The first step is go into compatibility mode. | ||
333 | */ | 347 | */ |
348 | pushq %rsi | ||
349 | call paging_prepare | ||
350 | popq %rsi | ||
334 | 351 | ||
335 | /* Clear additional page table */ | 352 | /* Save the trampoline address in RCX */ |
336 | leaq lvl5_pgtable(%rbx), %rdi | 353 | movq %rax, %rcx |
337 | xorq %rax, %rax | ||
338 | movq $(PAGE_SIZE/8), %rcx | ||
339 | rep stosq | ||
340 | 354 | ||
341 | /* | 355 | /* |
342 | * Setup current CR3 as the first and only entry in a new top level | 356 | * Load the address of trampoline_return() into RDI. |
343 | * page table. | 357 | * It will be used by the trampoline to return to the main code. |
344 | */ | 358 | */ |
345 | movq %cr3, %rdi | 359 | leaq trampoline_return(%rip), %rdi |
346 | leaq 0x7 (%rdi), %rax | ||
347 | movq %rax, lvl5_pgtable(%rbx) | ||
348 | 360 | ||
349 | /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ | 361 | /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ |
350 | pushq $__KERNEL32_CS | 362 | pushq $__KERNEL32_CS |
351 | leaq compatible_mode(%rip), %rax | 363 | leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax |
352 | pushq %rax | 364 | pushq %rax |
353 | lretq | 365 | lretq |
354 | lvl5: | 366 | trampoline_return: |
355 | #endif | 367 | /* Restore the stack, the 32-bit trampoline uses its own stack */ |
368 | leaq boot_stack_end(%rbx), %rsp | ||
369 | |||
370 | /* | ||
371 | * cleanup_trampoline() would restore trampoline memory. | ||
372 | * | ||
373 | * RSI holds real mode data and needs to be preserved across | ||
374 | * this function call. | ||
375 | */ | ||
376 | pushq %rsi | ||
377 | call cleanup_trampoline | ||
378 | popq %rsi | ||
356 | 379 | ||
357 | /* Zero EFLAGS */ | 380 | /* Zero EFLAGS */ |
358 | pushq $0 | 381 | pushq $0 |
@@ -490,46 +513,82 @@ relocated: | |||
490 | jmp *%rax | 513 | jmp *%rax |
491 | 514 | ||
492 | .code32 | 515 | .code32 |
493 | #ifdef CONFIG_X86_5LEVEL | 516 | /* |
494 | compatible_mode: | 517 | * This is the 32-bit trampoline that will be copied over to low memory. |
495 | /* Setup data and stack segments */ | 518 | * |
519 | * RDI contains the return address (might be above 4G). | ||
520 | * ECX contains the base address of the trampoline memory. | ||
521 | * Non zero RDX on return means we need to enable 5-level paging. | ||
522 | */ | ||
523 | ENTRY(trampoline_32bit_src) | ||
524 | /* Set up data and stack segments */ | ||
496 | movl $__KERNEL_DS, %eax | 525 | movl $__KERNEL_DS, %eax |
497 | movl %eax, %ds | 526 | movl %eax, %ds |
498 | movl %eax, %ss | 527 | movl %eax, %ss |
499 | 528 | ||
529 | /* Set up new stack */ | ||
530 | leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp | ||
531 | |||
500 | /* Disable paging */ | 532 | /* Disable paging */ |
501 | movl %cr0, %eax | 533 | movl %cr0, %eax |
502 | btrl $X86_CR0_PG_BIT, %eax | 534 | btrl $X86_CR0_PG_BIT, %eax |
503 | movl %eax, %cr0 | 535 | movl %eax, %cr0 |
504 | 536 | ||
505 | /* Point CR3 to 5-level paging */ | 537 | /* Check what paging mode we want to be in after the trampoline */ |
506 | leal lvl5_pgtable(%ebx), %eax | 538 | cmpl $0, %edx |
507 | movl %eax, %cr3 | 539 | jz 1f |
508 | 540 | ||
509 | /* Enable PAE and LA57 mode */ | 541 | /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ |
542 | movl %cr4, %eax | ||
543 | testl $X86_CR4_LA57, %eax | ||
544 | jnz 3f | ||
545 | jmp 2f | ||
546 | 1: | ||
547 | /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ | ||
510 | movl %cr4, %eax | 548 | movl %cr4, %eax |
511 | orl $(X86_CR4_PAE | X86_CR4_LA57), %eax | 549 | testl $X86_CR4_LA57, %eax |
550 | jz 3f | ||
551 | 2: | ||
552 | /* Point CR3 to the trampoline's new top level page table */ | ||
553 | leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax | ||
554 | movl %eax, %cr3 | ||
555 | 3: | ||
556 | /* Enable PAE and LA57 (if required) paging modes */ | ||
557 | movl $X86_CR4_PAE, %eax | ||
558 | cmpl $0, %edx | ||
559 | jz 1f | ||
560 | orl $X86_CR4_LA57, %eax | ||
561 | 1: | ||
512 | movl %eax, %cr4 | 562 | movl %eax, %cr4 |
513 | 563 | ||
514 | /* Calculate address we are running at */ | 564 | /* Calculate address of paging_enabled() once we are executing in the trampoline */ |
515 | call 1f | 565 | leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax |
516 | 1: popl %edi | ||
517 | subl $1b, %edi | ||
518 | 566 | ||
519 | /* Prepare stack for far return to Long Mode */ | 567 | /* Prepare the stack for far return to Long Mode */ |
520 | pushl $__KERNEL_CS | 568 | pushl $__KERNEL_CS |
521 | leal lvl5(%edi), %eax | 569 | pushl %eax |
522 | push %eax | ||
523 | 570 | ||
524 | /* Enable paging back */ | 571 | /* Enable paging again */ |
525 | movl $(X86_CR0_PG | X86_CR0_PE), %eax | 572 | movl $(X86_CR0_PG | X86_CR0_PE), %eax |
526 | movl %eax, %cr0 | 573 | movl %eax, %cr0 |
527 | 574 | ||
528 | lret | 575 | lret |
529 | #endif | ||
530 | 576 | ||
577 | .code64 | ||
578 | paging_enabled: | ||
579 | /* Return from the trampoline */ | ||
580 | jmp *%rdi | ||
581 | |||
582 | /* | ||
583 | * The trampoline code has a size limit. | ||
584 | * Make sure we fail to compile if the trampoline code grows | ||
585 | * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. | ||
586 | */ | ||
587 | .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE | ||
588 | |||
589 | .code32 | ||
531 | no_longmode: | 590 | no_longmode: |
532 | /* This isn't an x86-64 CPU so hang */ | 591 | /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ |
533 | 1: | 592 | 1: |
534 | hlt | 593 | hlt |
535 | jmp 1b | 594 | jmp 1b |
@@ -537,6 +596,11 @@ no_longmode: | |||
537 | #include "../../kernel/verify_cpu.S" | 596 | #include "../../kernel/verify_cpu.S" |
538 | 597 | ||
539 | .data | 598 | .data |
599 | gdt64: | ||
600 | .word gdt_end - gdt | ||
601 | .long 0 | ||
602 | .word 0 | ||
603 | .quad 0 | ||
540 | gdt: | 604 | gdt: |
541 | .word gdt_end - gdt | 605 | .word gdt_end - gdt |
542 | .long gdt | 606 | .long gdt |
@@ -585,7 +649,3 @@ boot_stack_end: | |||
585 | .balign 4096 | 649 | .balign 4096 |
586 | pgtable: | 650 | pgtable: |
587 | .fill BOOT_PGT_SIZE, 1, 0 | 651 | .fill BOOT_PGT_SIZE, 1, 0 |
588 | #ifdef CONFIG_X86_5LEVEL | ||
589 | lvl5_pgtable: | ||
590 | .fill PAGE_SIZE, 1, 0 | ||
591 | #endif | ||
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8199a6187251..66e42a098d70 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c | |||
@@ -46,6 +46,12 @@ | |||
46 | #define STATIC | 46 | #define STATIC |
47 | #include <linux/decompress/mm.h> | 47 | #include <linux/decompress/mm.h> |
48 | 48 | ||
49 | #ifdef CONFIG_X86_5LEVEL | ||
50 | unsigned int pgtable_l5_enabled __ro_after_init; | ||
51 | unsigned int pgdir_shift __ro_after_init = 39; | ||
52 | unsigned int ptrs_per_p4d __ro_after_init = 1; | ||
53 | #endif | ||
54 | |||
49 | extern unsigned long get_cmd_line_ptr(void); | 55 | extern unsigned long get_cmd_line_ptr(void); |
50 | 56 | ||
51 | /* Simplified build-specific string for starting entropy. */ | 57 | /* Simplified build-specific string for starting entropy. */ |
@@ -723,6 +729,14 @@ void choose_random_location(unsigned long input, | |||
723 | return; | 729 | return; |
724 | } | 730 | } |
725 | 731 | ||
732 | #ifdef CONFIG_X86_5LEVEL | ||
733 | if (__read_cr4() & X86_CR4_LA57) { | ||
734 | pgtable_l5_enabled = 1; | ||
735 | pgdir_shift = 48; | ||
736 | ptrs_per_p4d = 512; | ||
737 | } | ||
738 | #endif | ||
739 | |||
726 | boot_params->hdr.loadflags |= KASLR_FLAG; | 740 | boot_params->hdr.loadflags |= KASLR_FLAG; |
727 | 741 | ||
728 | /* Prepare to add new identity pagetables on demand. */ | 742 | /* Prepare to add new identity pagetables on demand. */ |
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/kaslr_64.c index b5e5e02f8cde..522d11431433 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/kaslr_64.c | |||
@@ -16,13 +16,6 @@ | |||
16 | #define __pa(x) ((unsigned long)(x)) | 16 | #define __pa(x) ((unsigned long)(x)) |
17 | #define __va(x) ((void *)((unsigned long)(x))) | 17 | #define __va(x) ((void *)((unsigned long)(x))) |
18 | 18 | ||
19 | /* | ||
20 | * The pgtable.h and mm/ident_map.c includes make use of the SME related | ||
21 | * information which is not used in the compressed image support. Un-define | ||
22 | * the SME support to avoid any compile and link errors. | ||
23 | */ | ||
24 | #undef CONFIG_AMD_MEM_ENCRYPT | ||
25 | |||
26 | /* No PAGE_TABLE_ISOLATION support needed either: */ | 19 | /* No PAGE_TABLE_ISOLATION support needed either: */ |
27 | #undef CONFIG_PAGE_TABLE_ISOLATION | 20 | #undef CONFIG_PAGE_TABLE_ISOLATION |
28 | 21 | ||
@@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info; | |||
85 | /* Locates and clears a region for a new top level page table. */ | 78 | /* Locates and clears a region for a new top level page table. */ |
86 | void initialize_identity_maps(void) | 79 | void initialize_identity_maps(void) |
87 | { | 80 | { |
88 | unsigned long sev_me_mask = get_sev_encryption_mask(); | 81 | /* If running as an SEV guest, the encryption mask is required. */ |
82 | set_sev_encryption_mask(); | ||
89 | 83 | ||
90 | /* Init mapping_info with run-time function/buffer pointers. */ | 84 | /* Init mapping_info with run-time function/buffer pointers. */ |
91 | mapping_info.alloc_pgt_page = alloc_pgt_page; | 85 | mapping_info.alloc_pgt_page = alloc_pgt_page; |
92 | mapping_info.context = &pgt_data; | 86 | mapping_info.context = &pgt_data; |
93 | mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask; | 87 | mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; |
94 | mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask; | 88 | mapping_info.kernpg_flag = _KERNPG_TABLE; |
95 | 89 | ||
96 | /* | 90 | /* |
97 | * It should be impossible for this not to already be true, | 91 | * It should be impossible for this not to already be true, |
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 54f5f6625a73..eaa843a52907 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S | |||
@@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit) | |||
88 | ENDPROC(get_sev_encryption_bit) | 88 | ENDPROC(get_sev_encryption_bit) |
89 | 89 | ||
90 | .code64 | 90 | .code64 |
91 | ENTRY(get_sev_encryption_mask) | 91 | ENTRY(set_sev_encryption_mask) |
92 | xor %rax, %rax | ||
93 | |||
94 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 92 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
95 | push %rbp | 93 | push %rbp |
96 | push %rdx | 94 | push %rdx |
@@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask) | |||
101 | testl %eax, %eax | 99 | testl %eax, %eax |
102 | jz .Lno_sev_mask | 100 | jz .Lno_sev_mask |
103 | 101 | ||
104 | xor %rdx, %rdx | 102 | bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ |
105 | bts %rax, %rdx /* Create the encryption mask */ | ||
106 | mov %rdx, %rax /* ... and return it */ | ||
107 | 103 | ||
108 | .Lno_sev_mask: | 104 | .Lno_sev_mask: |
109 | movq %rbp, %rsp /* Restore original stack pointer */ | 105 | movq %rbp, %rsp /* Restore original stack pointer */ |
@@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask) | |||
112 | pop %rbp | 108 | pop %rbp |
113 | #endif | 109 | #endif |
114 | 110 | ||
111 | xor %rax, %rax | ||
115 | ret | 112 | ret |
116 | ENDPROC(get_sev_encryption_mask) | 113 | ENDPROC(set_sev_encryption_mask) |
117 | 114 | ||
118 | .data | 115 | .data |
119 | enc_bit: | 116 | enc_bit: |
120 | .int 0xffffffff | 117 | .int 0xffffffff |
118 | |||
119 | #ifdef CONFIG_AMD_MEM_ENCRYPT | ||
120 | .balign 8 | ||
121 | GLOBAL(sme_me_mask) | ||
122 | .quad 0 | ||
123 | #endif | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 252fee320816..8dd1d5ccae58 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -14,6 +14,7 @@ | |||
14 | 14 | ||
15 | #include "misc.h" | 15 | #include "misc.h" |
16 | #include "error.h" | 16 | #include "error.h" |
17 | #include "pgtable.h" | ||
17 | #include "../string.h" | 18 | #include "../string.h" |
18 | #include "../voffset.h" | 19 | #include "../voffset.h" |
19 | 20 | ||
@@ -169,16 +170,6 @@ void __puthex(unsigned long value) | |||
169 | } | 170 | } |
170 | } | 171 | } |
171 | 172 | ||
172 | static bool l5_supported(void) | ||
173 | { | ||
174 | /* Check if leaf 7 is supported. */ | ||
175 | if (native_cpuid_eax(0) < 7) | ||
176 | return 0; | ||
177 | |||
178 | /* Check if la57 is supported. */ | ||
179 | return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); | ||
180 | } | ||
181 | |||
182 | #if CONFIG_X86_NEED_RELOCS | 173 | #if CONFIG_X86_NEED_RELOCS |
183 | static void handle_relocations(void *output, unsigned long output_len, | 174 | static void handle_relocations(void *output, unsigned long output_len, |
184 | unsigned long virt_addr) | 175 | unsigned long virt_addr) |
@@ -376,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, | |||
376 | console_init(); | 367 | console_init(); |
377 | debug_putstr("early console in extract_kernel\n"); | 368 | debug_putstr("early console in extract_kernel\n"); |
378 | 369 | ||
379 | if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { | ||
380 | error("This linux kernel as configured requires 5-level paging\n" | ||
381 | "This CPU does not support the required 'cr4.la57' feature\n" | ||
382 | "Unable to boot - please use a kernel appropriate for your CPU\n"); | ||
383 | } | ||
384 | |||
385 | free_mem_ptr = heap; /* Heap */ | 370 | free_mem_ptr = heap; /* Heap */ |
386 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 371 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
387 | 372 | ||
@@ -392,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, | |||
392 | debug_putaddr(output_len); | 377 | debug_putaddr(output_len); |
393 | debug_putaddr(kernel_total_size); | 378 | debug_putaddr(kernel_total_size); |
394 | 379 | ||
380 | #ifdef CONFIG_X86_64 | ||
381 | /* Report address of 32-bit trampoline */ | ||
382 | debug_putaddr(trampoline_32bit); | ||
383 | #endif | ||
384 | |||
395 | /* | 385 | /* |
396 | * The memory hole needed for the kernel is the larger of either | 386 | * The memory hole needed for the kernel is the larger of either |
397 | * the entire decompressed kernel plus relocation table, or the | 387 | * the entire decompressed kernel plus relocation table, or the |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9d323dc6b159..9e11be4cae19 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -12,6 +12,11 @@ | |||
12 | #undef CONFIG_PARAVIRT_SPINLOCKS | 12 | #undef CONFIG_PARAVIRT_SPINLOCKS |
13 | #undef CONFIG_KASAN | 13 | #undef CONFIG_KASAN |
14 | 14 | ||
15 | #ifdef CONFIG_X86_5LEVEL | ||
16 | /* cpu_feature_enabled() cannot be used that early */ | ||
17 | #define pgtable_l5_enabled __pgtable_l5_enabled | ||
18 | #endif | ||
19 | |||
15 | #include <linux/linkage.h> | 20 | #include <linux/linkage.h> |
16 | #include <linux/screen_info.h> | 21 | #include <linux/screen_info.h> |
17 | #include <linux/elf.h> | 22 | #include <linux/elf.h> |
@@ -109,6 +114,6 @@ static inline void console_init(void) | |||
109 | { } | 114 | { } |
110 | #endif | 115 | #endif |
111 | 116 | ||
112 | unsigned long get_sev_encryption_mask(void); | 117 | void set_sev_encryption_mask(void); |
113 | 118 | ||
114 | #endif | 119 | #endif |
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h new file mode 100644 index 000000000000..91f75638f6e6 --- /dev/null +++ b/arch/x86/boot/compressed/pgtable.h | |||
@@ -0,0 +1,20 @@ | |||
1 | #ifndef BOOT_COMPRESSED_PAGETABLE_H | ||
2 | #define BOOT_COMPRESSED_PAGETABLE_H | ||
3 | |||
4 | #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) | ||
5 | |||
6 | #define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 | ||
7 | |||
8 | #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE | ||
9 | #define TRAMPOLINE_32BIT_CODE_SIZE 0x60 | ||
10 | |||
11 | #define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE | ||
12 | |||
13 | #ifndef __ASSEMBLER__ | ||
14 | |||
15 | extern unsigned long *trampoline_32bit; | ||
16 | |||
17 | extern void trampoline_32bit_src(void *return_ptr); | ||
18 | |||
19 | #endif /* __ASSEMBLER__ */ | ||
20 | #endif /* BOOT_COMPRESSED_PAGETABLE_H */ | ||
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index b4469a37e9a1..32af1cbcd903 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c | |||
@@ -1,4 +1,6 @@ | |||
1 | #include <asm/processor.h> | 1 | #include <asm/processor.h> |
2 | #include "pgtable.h" | ||
3 | #include "../string.h" | ||
2 | 4 | ||
3 | /* | 5 | /* |
4 | * __force_order is used by special_insns.h asm code to force instruction | 6 | * __force_order is used by special_insns.h asm code to force instruction |
@@ -9,20 +11,144 @@ | |||
9 | */ | 11 | */ |
10 | unsigned long __force_order; | 12 | unsigned long __force_order; |
11 | 13 | ||
12 | int l5_paging_required(void) | 14 | #define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ |
15 | #define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ | ||
16 | |||
17 | struct paging_config { | ||
18 | unsigned long trampoline_start; | ||
19 | unsigned long l5_required; | ||
20 | }; | ||
21 | |||
22 | /* Buffer to preserve trampoline memory */ | ||
23 | static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; | ||
24 | |||
25 | /* | ||
26 | * The page table is going to be used instead of page table in the trampoline | ||
27 | * memory. | ||
28 | * | ||
29 | * It must not be in BSS as BSS is cleared after cleanup_trampoline(). | ||
30 | */ | ||
31 | static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); | ||
32 | |||
33 | /* | ||
34 | * Trampoline address will be printed by extract_kernel() for debugging | ||
35 | * purposes. | ||
36 | * | ||
37 | * Avoid putting the pointer into .bss as it will be cleared between | ||
38 | * paging_prepare() and extract_kernel(). | ||
39 | */ | ||
40 | unsigned long *trampoline_32bit __section(.data); | ||
41 | |||
42 | struct paging_config paging_prepare(void) | ||
13 | { | 43 | { |
14 | /* Check if leaf 7 is supported. */ | 44 | struct paging_config paging_config = {}; |
45 | unsigned long bios_start, ebda_start; | ||
46 | |||
47 | /* | ||
48 | * Check if LA57 is desired and supported. | ||
49 | * | ||
50 | * There are two parts to the check: | ||
51 | * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y | ||
52 | * - if the machine supports 5-level paging: | ||
53 | * + CPUID leaf 7 is supported | ||
54 | * + the leaf has the feature bit set | ||
55 | * | ||
56 | * That's substitute for boot_cpu_has() in early boot code. | ||
57 | */ | ||
58 | if (IS_ENABLED(CONFIG_X86_5LEVEL) && | ||
59 | native_cpuid_eax(0) >= 7 && | ||
60 | (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { | ||
61 | paging_config.l5_required = 1; | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Find a suitable spot for the trampoline. | ||
66 | * This code is based on reserve_bios_regions(). | ||
67 | */ | ||
68 | |||
69 | ebda_start = *(unsigned short *)0x40e << 4; | ||
70 | bios_start = *(unsigned short *)0x413 << 10; | ||
15 | 71 | ||
16 | if (native_cpuid_eax(0) < 7) | 72 | if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) |
17 | return 0; | 73 | bios_start = BIOS_START_MAX; |
74 | |||
75 | if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) | ||
76 | bios_start = ebda_start; | ||
77 | |||
78 | /* Place the trampoline just below the end of low memory, aligned to 4k */ | ||
79 | paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE; | ||
80 | paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE); | ||
81 | |||
82 | trampoline_32bit = (unsigned long *)paging_config.trampoline_start; | ||
83 | |||
84 | /* Preserve trampoline memory */ | ||
85 | memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); | ||
86 | |||
87 | /* Clear trampoline memory first */ | ||
88 | memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); | ||
89 | |||
90 | /* Copy trampoline code in place */ | ||
91 | memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), | ||
92 | &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); | ||
93 | |||
94 | /* | ||
95 | * The code below prepares page table in trampoline memory. | ||
96 | * | ||
97 | * The new page table will be used by trampoline code for switching | ||
98 | * from 4- to 5-level paging or vice versa. | ||
99 | * | ||
100 | * If switching is not required, the page table is unused: trampoline | ||
101 | * code wouldn't touch CR3. | ||
102 | */ | ||
103 | |||
104 | /* | ||
105 | * We are not going to use the page table in trampoline memory if we | ||
106 | * are already in the desired paging mode. | ||
107 | */ | ||
108 | if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) | ||
109 | goto out; | ||
110 | |||
111 | if (paging_config.l5_required) { | ||
112 | /* | ||
113 | * For 4- to 5-level paging transition, set up current CR3 as | ||
114 | * the first and the only entry in a new top-level page table. | ||
115 | */ | ||
116 | trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; | ||
117 | } else { | ||
118 | unsigned long src; | ||
119 | |||
120 | /* | ||
121 | * For 5- to 4-level paging transition, copy page table pointed | ||
122 | * by first entry in the current top-level page table as our | ||
123 | * new top-level page table. | ||
124 | * | ||
125 | * We cannot just point to the page table from trampoline as it | ||
126 | * may be above 4G. | ||
127 | */ | ||
128 | src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; | ||
129 | memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), | ||
130 | (void *)src, PAGE_SIZE); | ||
131 | } | ||
132 | |||
133 | out: | ||
134 | return paging_config; | ||
135 | } | ||
136 | |||
137 | void cleanup_trampoline(void) | ||
138 | { | ||
139 | void *trampoline_pgtable; | ||
18 | 140 | ||
19 | /* Check if la57 is supported. */ | 141 | trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET; |
20 | if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) | ||
21 | return 0; | ||
22 | 142 | ||
23 | /* Check if 5-level paging has already been enabled. */ | 143 | /* |
24 | if (native_read_cr4() & X86_CR4_LA57) | 144 | * Move the top level page table out of trampoline memory, |
25 | return 0; | 145 | * if it's there. |
146 | */ | ||
147 | if ((void *)__native_read_cr3() == trampoline_pgtable) { | ||
148 | memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); | ||
149 | native_write_cr3((unsigned long)top_pgtable); | ||
150 | } | ||
26 | 151 | ||
27 | return 1; | 152 | /* Restore trampoline memory */ |
153 | memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); | ||
28 | } | 154 | } |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 18ed349b4f83..936e19642eab 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
@@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) | |||
260 | * Change top bits to match most significant bit (47th or 56th bit | 260 | * Change top bits to match most significant bit (47th or 56th bit |
261 | * depending on paging mode) in the address. | 261 | * depending on paging mode) in the address. |
262 | */ | 262 | */ |
263 | #ifdef CONFIG_X86_5LEVEL | ||
264 | ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \ | ||
265 | "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57 | ||
266 | #else | ||
263 | shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx | 267 | shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx |
264 | sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx | 268 | sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx |
269 | #endif | ||
265 | 270 | ||
266 | /* If this changed %rcx, it was not canonical */ | 271 | /* If this changed %rcx, it was not canonical */ |
267 | cmpq %rcx, %r11 | 272 | cmpq %rcx, %r11 |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 11881726ed37..a303d7b7d763 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/mmu.h> | 31 | #include <asm/mmu.h> |
32 | #include <asm/mpspec.h> | 32 | #include <asm/mpspec.h> |
33 | #include <asm/realmode.h> | 33 | #include <asm/realmode.h> |
34 | #include <asm/x86_init.h> | ||
34 | 35 | ||
35 | #ifdef CONFIG_ACPI_APEI | 36 | #ifdef CONFIG_ACPI_APEI |
36 | # include <asm/pgtable_types.h> | 37 | # include <asm/pgtable_types.h> |
@@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void) | |||
133 | return !!acpi_lapic; | 134 | return !!acpi_lapic; |
134 | } | 135 | } |
135 | 136 | ||
137 | #define ACPI_HAVE_ARCH_GET_ROOT_POINTER | ||
138 | static inline u64 acpi_arch_get_root_pointer(void) | ||
139 | { | ||
140 | return x86_init.acpi.get_root_pointer(); | ||
141 | } | ||
142 | |||
143 | void acpi_generic_reduced_hw_init(void); | ||
144 | |||
136 | #else /* !CONFIG_ACPI */ | 145 | #else /* !CONFIG_ACPI */ |
137 | 146 | ||
138 | #define acpi_lapic 0 | 147 | #define acpi_lapic 0 |
@@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { } | |||
142 | static inline void acpi_disable_pci(void) { } | 151 | static inline void acpi_disable_pci(void) { } |
143 | static inline void disable_acpi(void) { } | 152 | static inline void disable_acpi(void) { } |
144 | 153 | ||
154 | static inline void acpi_generic_reduced_hw_init(void) { } | ||
155 | |||
145 | #endif /* !CONFIG_ACPI */ | 156 | #endif /* !CONFIG_ACPI */ |
146 | 157 | ||
147 | #define ARCH_HAS_POWER_INIT 1 | 158 | #define ARCH_HAS_POWER_INIT 1 |
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 000000000000..3cb002b1d0f9 --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h | |||
@@ -0,0 +1,65 @@ | |||
1 | #ifndef _ASM_X86_INTEL_PCONFIG_H | ||
2 | #define _ASM_X86_INTEL_PCONFIG_H | ||
3 | |||
4 | #include <asm/asm.h> | ||
5 | #include <asm/processor.h> | ||
6 | |||
7 | enum pconfig_target { | ||
8 | INVALID_TARGET = 0, | ||
9 | MKTME_TARGET = 1, | ||
10 | PCONFIG_TARGET_NR | ||
11 | }; | ||
12 | |||
13 | int pconfig_target_supported(enum pconfig_target target); | ||
14 | |||
15 | enum pconfig_leaf { | ||
16 | MKTME_KEY_PROGRAM = 0, | ||
17 | PCONFIG_LEAF_INVALID, | ||
18 | }; | ||
19 | |||
20 | #define PCONFIG ".byte 0x0f, 0x01, 0xc5" | ||
21 | |||
22 | /* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ | ||
23 | |||
24 | /* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ | ||
25 | #define MKTME_KEYID_SET_KEY_DIRECT 0 | ||
26 | #define MKTME_KEYID_SET_KEY_RANDOM 1 | ||
27 | #define MKTME_KEYID_CLEAR_KEY 2 | ||
28 | #define MKTME_KEYID_NO_ENCRYPT 3 | ||
29 | |||
30 | /* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ | ||
31 | #define MKTME_AES_XTS_128 (1 << 8) | ||
32 | |||
33 | /* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ | ||
34 | #define MKTME_PROG_SUCCESS 0 | ||
35 | #define MKTME_INVALID_PROG_CMD 1 | ||
36 | #define MKTME_ENTROPY_ERROR 2 | ||
37 | #define MKTME_INVALID_KEYID 3 | ||
38 | #define MKTME_INVALID_ENC_ALG 4 | ||
39 | #define MKTME_DEVICE_BUSY 5 | ||
40 | |||
41 | /* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ | ||
42 | struct mktme_key_program { | ||
43 | u16 keyid; | ||
44 | u32 keyid_ctrl; | ||
45 | u8 __rsvd[58]; | ||
46 | u8 key_field_1[64]; | ||
47 | u8 key_field_2[64]; | ||
48 | } __packed __aligned(256); | ||
49 | |||
50 | static inline int mktme_key_program(struct mktme_key_program *key_program) | ||
51 | { | ||
52 | unsigned long rax = MKTME_KEY_PROGRAM; | ||
53 | |||
54 | if (!pconfig_target_supported(MKTME_TARGET)) | ||
55 | return -ENXIO; | ||
56 | |||
57 | asm volatile(PCONFIG | ||
58 | : "=a" (rax), "=b" (key_program) | ||
59 | : "0" (rax), "1" (key_program) | ||
60 | : "memory", "cc"); | ||
61 | |||
62 | return rax; | ||
63 | } | ||
64 | |||
65 | #endif /* _ASM_X86_INTEL_PCONFIG_H */ | ||
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 460991e3b529..db7ba2feb947 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h | |||
@@ -5,10 +5,6 @@ | |||
5 | unsigned long kaslr_get_random_long(const char *purpose); | 5 | unsigned long kaslr_get_random_long(const char *purpose); |
6 | 6 | ||
7 | #ifdef CONFIG_RANDOMIZE_MEMORY | 7 | #ifdef CONFIG_RANDOMIZE_MEMORY |
8 | extern unsigned long page_offset_base; | ||
9 | extern unsigned long vmalloc_base; | ||
10 | extern unsigned long vmemmap_base; | ||
11 | |||
12 | void kernel_randomize_memory(void); | 8 | void kernel_randomize_memory(void); |
13 | #else | 9 | #else |
14 | static inline void kernel_randomize_memory(void) { } | 10 | static inline void kernel_randomize_memory(void) { } |
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 22c5f3e6f820..8fe61ad21047 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h | |||
@@ -22,6 +22,7 @@ | |||
22 | #ifdef CONFIG_AMD_MEM_ENCRYPT | 22 | #ifdef CONFIG_AMD_MEM_ENCRYPT |
23 | 23 | ||
24 | extern u64 sme_me_mask; | 24 | extern u64 sme_me_mask; |
25 | extern bool sev_enabled; | ||
25 | 26 | ||
26 | void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, | 27 | void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, |
27 | unsigned long decrypted_kernel_vaddr, | 28 | unsigned long decrypted_kernel_vaddr, |
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 9ca8dae9c716..939b1cff4a7b 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h | |||
@@ -11,6 +11,10 @@ | |||
11 | extern unsigned long max_pfn; | 11 | extern unsigned long max_pfn; |
12 | extern unsigned long phys_base; | 12 | extern unsigned long phys_base; |
13 | 13 | ||
14 | extern unsigned long page_offset_base; | ||
15 | extern unsigned long vmalloc_base; | ||
16 | extern unsigned long vmemmap_base; | ||
17 | |||
14 | static inline unsigned long __phys_addr_nodebug(unsigned long x) | 18 | static inline unsigned long __phys_addr_nodebug(unsigned long x) |
15 | { | 19 | { |
16 | unsigned long y = x - __START_KERNEL_map; | 20 | unsigned long y = x - __START_KERNEL_map; |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e1407312c412..2c5a966dc222 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -37,26 +37,24 @@ | |||
37 | * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's | 37 | * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's |
38 | * what Xen requires. | 38 | * what Xen requires. |
39 | */ | 39 | */ |
40 | #ifdef CONFIG_X86_5LEVEL | 40 | #define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) |
41 | #define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) | 41 | #define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) |
42 | #else | ||
43 | #define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) | ||
44 | #endif | ||
45 | 42 | ||
46 | #ifdef CONFIG_RANDOMIZE_MEMORY | 43 | #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT |
47 | #define __PAGE_OFFSET page_offset_base | 44 | #define __PAGE_OFFSET page_offset_base |
48 | #else | 45 | #else |
49 | #define __PAGE_OFFSET __PAGE_OFFSET_BASE | 46 | #define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 |
50 | #endif /* CONFIG_RANDOMIZE_MEMORY */ | 47 | #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ |
51 | 48 | ||
52 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) | 49 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) |
53 | 50 | ||
54 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ | 51 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ |
55 | #ifdef CONFIG_X86_5LEVEL | 52 | |
56 | #define __PHYSICAL_MASK_SHIFT 52 | 53 | #define __PHYSICAL_MASK_SHIFT 52 |
57 | #define __VIRTUAL_MASK_SHIFT 56 | 54 | |
55 | #ifdef CONFIG_X86_5LEVEL | ||
56 | #define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47) | ||
58 | #else | 57 | #else |
59 | #define __PHYSICAL_MASK_SHIFT 46 | ||
60 | #define __VIRTUAL_MASK_SHIFT 47 | 58 | #define __VIRTUAL_MASK_SHIFT 47 |
61 | #endif | 59 | #endif |
62 | 60 | ||
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index c83a2f418cea..9be2bf13825b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -568,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d) | |||
568 | return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); | 568 | return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); |
569 | } | 569 | } |
570 | 570 | ||
571 | static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) | 571 | static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) |
572 | { | 572 | { |
573 | pgdval_t val = native_pgd_val(pgd); | 573 | PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); |
574 | |||
575 | PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val); | ||
576 | } | 574 | } |
577 | 575 | ||
578 | static inline void pgd_clear(pgd_t *pgdp) | 576 | #define set_pgd(pgdp, pgdval) do { \ |
579 | { | 577 | if (pgtable_l5_enabled) \ |
580 | set_pgd(pgdp, __pgd(0)); | 578 | __set_pgd(pgdp, pgdval); \ |
581 | } | 579 | else \ |
580 | set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ | ||
581 | } while (0) | ||
582 | |||
583 | #define pgd_clear(pgdp) do { \ | ||
584 | if (pgtable_l5_enabled) \ | ||
585 | set_pgd(pgdp, __pgd(0)); \ | ||
586 | } while (0) | ||
582 | 587 | ||
583 | #endif /* CONFIG_PGTABLE_LEVELS == 5 */ | 588 | #endif /* CONFIG_PGTABLE_LEVELS == 5 */ |
584 | 589 | ||
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index aff42e1da6ee..263c142a6a6c 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h | |||
@@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, | |||
167 | #if CONFIG_PGTABLE_LEVELS > 4 | 167 | #if CONFIG_PGTABLE_LEVELS > 4 |
168 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) | 168 | static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) |
169 | { | 169 | { |
170 | if (!pgtable_l5_enabled) | ||
171 | return; | ||
170 | paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); | 172 | paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); |
171 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); | 173 | set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); |
172 | } | 174 | } |
@@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); | |||
191 | static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, | 193 | static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, |
192 | unsigned long address) | 194 | unsigned long address) |
193 | { | 195 | { |
194 | ___p4d_free_tlb(tlb, p4d); | 196 | if (pgtable_l5_enabled) |
197 | ___p4d_free_tlb(tlb, p4d); | ||
195 | } | 198 | } |
196 | 199 | ||
197 | #endif /* CONFIG_PGTABLE_LEVELS > 4 */ | 200 | #endif /* CONFIG_PGTABLE_LEVELS > 4 */ |
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 876b4c77d983..6a59a6d0cc50 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h | |||
@@ -44,5 +44,6 @@ typedef union { | |||
44 | */ | 44 | */ |
45 | #define PTRS_PER_PTE 512 | 45 | #define PTRS_PER_PTE 512 |
46 | 46 | ||
47 | #define MAX_POSSIBLE_PHYSMEM_BITS 36 | ||
47 | 48 | ||
48 | #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ | 49 | #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index b444d83cfc95..89d5c8886c85 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; | |||
65 | 65 | ||
66 | #ifndef __PAGETABLE_P4D_FOLDED | 66 | #ifndef __PAGETABLE_P4D_FOLDED |
67 | #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) | 67 | #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) |
68 | #define pgd_clear(pgd) native_pgd_clear(pgd) | 68 | #define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0) |
69 | #endif | 69 | #endif |
70 | 70 | ||
71 | #ifndef set_p4d | 71 | #ifndef set_p4d |
@@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address) | |||
859 | #if CONFIG_PGTABLE_LEVELS > 4 | 859 | #if CONFIG_PGTABLE_LEVELS > 4 |
860 | static inline int pgd_present(pgd_t pgd) | 860 | static inline int pgd_present(pgd_t pgd) |
861 | { | 861 | { |
862 | if (!pgtable_l5_enabled) | ||
863 | return 1; | ||
862 | return pgd_flags(pgd) & _PAGE_PRESENT; | 864 | return pgd_flags(pgd) & _PAGE_PRESENT; |
863 | } | 865 | } |
864 | 866 | ||
@@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) | |||
876 | /* to find an entry in a page-table-directory. */ | 878 | /* to find an entry in a page-table-directory. */ |
877 | static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) | 879 | static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) |
878 | { | 880 | { |
881 | if (!pgtable_l5_enabled) | ||
882 | return (p4d_t *)pgd; | ||
879 | return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); | 883 | return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); |
880 | } | 884 | } |
881 | 885 | ||
@@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd) | |||
883 | { | 887 | { |
884 | unsigned long ignore_flags = _PAGE_USER; | 888 | unsigned long ignore_flags = _PAGE_USER; |
885 | 889 | ||
890 | if (!pgtable_l5_enabled) | ||
891 | return 0; | ||
892 | |||
886 | if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) | 893 | if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) |
887 | ignore_flags |= _PAGE_NX; | 894 | ignore_flags |= _PAGE_NX; |
888 | 895 | ||
@@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd) | |||
891 | 898 | ||
892 | static inline int pgd_none(pgd_t pgd) | 899 | static inline int pgd_none(pgd_t pgd) |
893 | { | 900 | { |
901 | if (!pgtable_l5_enabled) | ||
902 | return 0; | ||
894 | /* | 903 | /* |
895 | * There is no need to do a workaround for the KNL stray | 904 | * There is no need to do a workaround for the KNL stray |
896 | * A/D bit erratum here. PGDs only point to page tables | 905 | * A/D bit erratum here. PGDs only point to page tables |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b3ec519e3982..88a056b01db4 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -34,6 +34,8 @@ static inline void check_pgt_cache(void) { } | |||
34 | void paging_init(void); | 34 | void paging_init(void); |
35 | void sync_initial_page_table(void); | 35 | void sync_initial_page_table(void); |
36 | 36 | ||
37 | static inline int pgd_large(pgd_t pgd) { return 0; } | ||
38 | |||
37 | /* | 39 | /* |
38 | * Define this if things work differently on an i386 and an i486: | 40 | * Define this if things work differently on an i386 and an i486: |
39 | * it will (on an i486) warn about kernel memory accesses that are | 41 | * it will (on an i486) warn about kernel memory accesses that are |
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18a1d23..e3225e83db7d 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h | |||
@@ -15,6 +15,8 @@ | |||
15 | # include <asm/pgtable-2level_types.h> | 15 | # include <asm/pgtable-2level_types.h> |
16 | #endif | 16 | #endif |
17 | 17 | ||
18 | #define pgtable_l5_enabled 0 | ||
19 | |||
18 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | 20 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) |
19 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 21 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
20 | 22 | ||
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 1149d2112b2e..877bc27718ae 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -218,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) | |||
218 | 218 | ||
219 | static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) | 219 | static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) |
220 | { | 220 | { |
221 | #if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) | 221 | pgd_t pgd; |
222 | p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); | 222 | |
223 | #else | 223 | if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { |
224 | *p4dp = p4d; | 224 | *p4dp = p4d; |
225 | #endif | 225 | return; |
226 | } | ||
227 | |||
228 | pgd = native_make_pgd(native_p4d_val(p4d)); | ||
229 | pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); | ||
230 | *p4dp = native_make_p4d(native_pgd_val(pgd)); | ||
226 | } | 231 | } |
227 | 232 | ||
228 | static inline void native_p4d_clear(p4d_t *p4d) | 233 | static inline void native_p4d_clear(p4d_t *p4d) |
229 | { | 234 | { |
230 | #ifdef CONFIG_X86_5LEVEL | ||
231 | native_set_p4d(p4d, native_make_p4d(0)); | 235 | native_set_p4d(p4d, native_make_p4d(0)); |
232 | #else | ||
233 | native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)}); | ||
234 | #endif | ||
235 | } | 236 | } |
236 | 237 | ||
237 | static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) | 238 | static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) |
238 | { | 239 | { |
239 | #ifdef CONFIG_PAGE_TABLE_ISOLATION | ||
240 | *pgdp = pti_set_user_pgd(pgdp, pgd); | 240 | *pgdp = pti_set_user_pgd(pgdp, pgd); |
241 | #else | ||
242 | *pgdp = pgd; | ||
243 | #endif | ||
244 | } | 241 | } |
245 | 242 | ||
246 | static inline void native_pgd_clear(pgd_t *pgd) | 243 | static inline void native_pgd_clear(pgd_t *pgd) |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73dcbc2c..d5c21a382475 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t; | |||
20 | 20 | ||
21 | typedef struct { pteval_t pte; } pte_t; | 21 | typedef struct { pteval_t pte; } pte_t; |
22 | 22 | ||
23 | #ifdef CONFIG_X86_5LEVEL | ||
24 | extern unsigned int __pgtable_l5_enabled; | ||
25 | #ifndef pgtable_l5_enabled | ||
26 | #define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) | ||
27 | #endif | ||
28 | #else | ||
29 | #define pgtable_l5_enabled 0 | ||
30 | #endif | ||
31 | |||
32 | extern unsigned int pgdir_shift; | ||
33 | extern unsigned int ptrs_per_p4d; | ||
34 | |||
23 | #endif /* !__ASSEMBLY__ */ | 35 | #endif /* !__ASSEMBLY__ */ |
24 | 36 | ||
25 | #define SHARED_KERNEL_PMD 0 | 37 | #define SHARED_KERNEL_PMD 0 |
@@ -29,24 +41,28 @@ typedef struct { pteval_t pte; } pte_t; | |||
29 | /* | 41 | /* |
30 | * PGDIR_SHIFT determines what a top-level page table entry can map | 42 | * PGDIR_SHIFT determines what a top-level page table entry can map |
31 | */ | 43 | */ |
32 | #define PGDIR_SHIFT 48 | 44 | #define PGDIR_SHIFT pgdir_shift |
33 | #define PTRS_PER_PGD 512 | 45 | #define PTRS_PER_PGD 512 |
34 | 46 | ||
35 | /* | 47 | /* |
36 | * 4th level page in 5-level paging case | 48 | * 4th level page in 5-level paging case |
37 | */ | 49 | */ |
38 | #define P4D_SHIFT 39 | 50 | #define P4D_SHIFT 39 |
39 | #define PTRS_PER_P4D 512 | 51 | #define MAX_PTRS_PER_P4D 512 |
40 | #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) | 52 | #define PTRS_PER_P4D ptrs_per_p4d |
41 | #define P4D_MASK (~(P4D_SIZE - 1)) | 53 | #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) |
54 | #define P4D_MASK (~(P4D_SIZE - 1)) | ||
55 | |||
56 | #define MAX_POSSIBLE_PHYSMEM_BITS 52 | ||
42 | 57 | ||
43 | #else /* CONFIG_X86_5LEVEL */ | 58 | #else /* CONFIG_X86_5LEVEL */ |
44 | 59 | ||
45 | /* | 60 | /* |
46 | * PGDIR_SHIFT determines what a top-level page table entry can map | 61 | * PGDIR_SHIFT determines what a top-level page table entry can map |
47 | */ | 62 | */ |
48 | #define PGDIR_SHIFT 39 | 63 | #define PGDIR_SHIFT 39 |
49 | #define PTRS_PER_PGD 512 | 64 | #define PTRS_PER_PGD 512 |
65 | #define MAX_PTRS_PER_P4D 1 | ||
50 | 66 | ||
51 | #endif /* CONFIG_X86_5LEVEL */ | 67 | #endif /* CONFIG_X86_5LEVEL */ |
52 | 68 | ||
@@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t; | |||
82 | * range must not overlap with anything except the KASAN shadow area, which | 98 | * range must not overlap with anything except the KASAN shadow area, which |
83 | * is correct as KASAN disables KASLR. | 99 | * is correct as KASAN disables KASLR. |
84 | */ | 100 | */ |
85 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) | 101 | #define MAXMEM (1UL << MAX_PHYSMEM_BITS) |
86 | 102 | ||
87 | #ifdef CONFIG_X86_5LEVEL | 103 | #define LDT_PGD_ENTRY_L4 -3UL |
88 | # define VMALLOC_SIZE_TB _AC(12800, UL) | 104 | #define LDT_PGD_ENTRY_L5 -112UL |
89 | # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) | 105 | #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) |
90 | # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) | 106 | #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) |
91 | # define LDT_PGD_ENTRY _AC(-112, UL) | 107 | |
92 | # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) | 108 | #define __VMALLOC_BASE_L4 0xffffc90000000000 |
93 | #else | 109 | #define __VMALLOC_BASE_L5 0xffa0000000000000 |
94 | # define VMALLOC_SIZE_TB _AC(32, UL) | 110 | |
95 | # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) | 111 | #define VMALLOC_SIZE_TB_L4 32UL |
96 | # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) | 112 | #define VMALLOC_SIZE_TB_L5 12800UL |
97 | # define LDT_PGD_ENTRY _AC(-3, UL) | 113 | |
98 | # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) | 114 | #define __VMEMMAP_BASE_L4 0xffffea0000000000 |
99 | #endif | 115 | #define __VMEMMAP_BASE_L5 0xffd4000000000000 |
100 | 116 | ||
101 | #ifdef CONFIG_RANDOMIZE_MEMORY | 117 | #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT |
102 | # define VMALLOC_START vmalloc_base | 118 | # define VMALLOC_START vmalloc_base |
119 | # define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) | ||
103 | # define VMEMMAP_START vmemmap_base | 120 | # define VMEMMAP_START vmemmap_base |
104 | #else | 121 | #else |
105 | # define VMALLOC_START __VMALLOC_BASE | 122 | # define VMALLOC_START __VMALLOC_BASE_L4 |
106 | # define VMEMMAP_START __VMEMMAP_BASE | 123 | # define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 |
107 | #endif /* CONFIG_RANDOMIZE_MEMORY */ | 124 | # define VMEMMAP_START __VMEMMAP_BASE_L4 |
125 | #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ | ||
108 | 126 | ||
109 | #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) | 127 | #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) |
110 | 128 | ||
111 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) | 129 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) |
112 | /* The module sections ends with the start of the fixmap */ | 130 | /* The module sections ends with the start of the fixmap */ |
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de7440b..6847d85400a8 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -53,12 +53,6 @@ | |||
53 | # define NEED_MOVBE 0 | 53 | # define NEED_MOVBE 0 |
54 | #endif | 54 | #endif |
55 | 55 | ||
56 | #ifdef CONFIG_X86_5LEVEL | ||
57 | # define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) | ||
58 | #else | ||
59 | # define NEED_LA57 0 | ||
60 | #endif | ||
61 | |||
62 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
63 | #ifdef CONFIG_PARAVIRT | 57 | #ifdef CONFIG_PARAVIRT |
64 | /* Paravirtualized systems may not have PSE or PGE available */ | 58 | /* Paravirtualized systems may not have PSE or PGE available */ |
@@ -104,7 +98,7 @@ | |||
104 | #define REQUIRED_MASK13 0 | 98 | #define REQUIRED_MASK13 0 |
105 | #define REQUIRED_MASK14 0 | 99 | #define REQUIRED_MASK14 0 |
106 | #define REQUIRED_MASK15 0 | 100 | #define REQUIRED_MASK15 0 |
107 | #define REQUIRED_MASK16 (NEED_LA57) | 101 | #define REQUIRED_MASK16 0 |
108 | #define REQUIRED_MASK17 0 | 102 | #define REQUIRED_MASK17 0 |
109 | #define REQUIRED_MASK18 0 | 103 | #define REQUIRED_MASK18 0 |
110 | #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) | 104 | #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) |
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d3c43e..4617a2bf123c 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h | |||
@@ -27,13 +27,8 @@ | |||
27 | # endif | 27 | # endif |
28 | #else /* CONFIG_X86_32 */ | 28 | #else /* CONFIG_X86_32 */ |
29 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ | 29 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ |
30 | # ifdef CONFIG_X86_5LEVEL | 30 | # define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) |
31 | # define MAX_PHYSADDR_BITS 52 | 31 | # define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) |
32 | # define MAX_PHYSMEM_BITS 52 | ||
33 | # else | ||
34 | # define MAX_PHYSADDR_BITS 44 | ||
35 | # define MAX_PHYSMEM_BITS 46 | ||
36 | # endif | ||
37 | #endif | 32 | #endif |
38 | 33 | ||
39 | #endif /* CONFIG_SPARSEMEM */ | 34 | #endif /* CONFIG_SPARSEMEM */ |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 88306054bd98..199e15bd3ec5 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -131,6 +131,16 @@ struct x86_hyper_init { | |||
131 | }; | 131 | }; |
132 | 132 | ||
133 | /** | 133 | /** |
134 | * struct x86_init_acpi - x86 ACPI init functions | ||
135 | * @get_root_pointer: get RSDP address | ||
136 | * @reduced_hw_early_init: hardware reduced platform early init | ||
137 | */ | ||
138 | struct x86_init_acpi { | ||
139 | u64 (*get_root_pointer)(void); | ||
140 | void (*reduced_hw_early_init)(void); | ||
141 | }; | ||
142 | |||
143 | /** | ||
134 | * struct x86_init_ops - functions for platform specific setup | 144 | * struct x86_init_ops - functions for platform specific setup |
135 | * | 145 | * |
136 | */ | 146 | */ |
@@ -144,6 +154,7 @@ struct x86_init_ops { | |||
144 | struct x86_init_iommu iommu; | 154 | struct x86_init_iommu iommu; |
145 | struct x86_init_pci pci; | 155 | struct x86_init_pci pci; |
146 | struct x86_hyper_init hyper; | 156 | struct x86_hyper_init hyper; |
157 | struct x86_init_acpi acpi; | ||
147 | }; | 158 | }; |
148 | 159 | ||
149 | /** | 160 | /** |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2aa92094b59d..7a37d9357bc4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) | |||
1376 | * | 1376 | * |
1377 | * We initialize the Hardware-reduced ACPI model here: | 1377 | * We initialize the Hardware-reduced ACPI model here: |
1378 | */ | 1378 | */ |
1379 | void __init acpi_generic_reduced_hw_init(void) | ||
1380 | { | ||
1381 | /* | ||
1382 | * Override x86_init functions and bypass legacy PIC in | ||
1383 | * hardware reduced ACPI mode. | ||
1384 | */ | ||
1385 | x86_init.timers.timer_init = x86_init_noop; | ||
1386 | x86_init.irqs.pre_vector_init = x86_init_noop; | ||
1387 | legacy_pic = &null_legacy_pic; | ||
1388 | } | ||
1389 | |||
1379 | static void __init acpi_reduced_hw_init(void) | 1390 | static void __init acpi_reduced_hw_init(void) |
1380 | { | 1391 | { |
1381 | if (acpi_gbl_reduced_hardware) { | 1392 | if (acpi_gbl_reduced_hardware) |
1382 | /* | 1393 | x86_init.acpi.reduced_hw_early_init(); |
1383 | * Override x86_init functions and bypass legacy pic | ||
1384 | * in Hardware-reduced ACPI mode | ||
1385 | */ | ||
1386 | x86_init.timers.timer_init = x86_init_noop; | ||
1387 | x86_init.irqs.pre_vector_init = x86_init_noop; | ||
1388 | legacy_pic = &null_legacy_pic; | ||
1389 | } | ||
1390 | } | 1394 | } |
1391 | 1395 | ||
1392 | /* | 1396 | /* |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 570e8bb1f386..a66229f51b12 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o | |||
28 | obj-$(CONFIG_PROC_FS) += proc.o | 28 | obj-$(CONFIG_PROC_FS) += proc.o |
29 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o | 29 | obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o |
30 | 30 | ||
31 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 31 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o |
32 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o | 32 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
33 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o | 33 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
34 | obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | 34 | obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c3af167d0a70..b9693b80fc21 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -509,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c) | |||
509 | } | 509 | } |
510 | } | 510 | } |
511 | 511 | ||
512 | #define MSR_IA32_TME_ACTIVATE 0x982 | ||
513 | |||
514 | /* Helpers to access TME_ACTIVATE MSR */ | ||
515 | #define TME_ACTIVATE_LOCKED(x) (x & 0x1) | ||
516 | #define TME_ACTIVATE_ENABLED(x) (x & 0x2) | ||
517 | |||
518 | #define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ | ||
519 | #define TME_ACTIVATE_POLICY_AES_XTS_128 0 | ||
520 | |||
521 | #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ | ||
522 | |||
523 | #define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ | ||
524 | #define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 | ||
525 | |||
526 | /* Values for mktme_status (SW only construct) */ | ||
527 | #define MKTME_ENABLED 0 | ||
528 | #define MKTME_DISABLED 1 | ||
529 | #define MKTME_UNINITIALIZED 2 | ||
530 | static int mktme_status = MKTME_UNINITIALIZED; | ||
531 | |||
532 | static void detect_tme(struct cpuinfo_x86 *c) | ||
533 | { | ||
534 | u64 tme_activate, tme_policy, tme_crypto_algs; | ||
535 | int keyid_bits = 0, nr_keyids = 0; | ||
536 | static u64 tme_activate_cpu0 = 0; | ||
537 | |||
538 | rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); | ||
539 | |||
540 | if (mktme_status != MKTME_UNINITIALIZED) { | ||
541 | if (tme_activate != tme_activate_cpu0) { | ||
542 | /* Broken BIOS? */ | ||
543 | pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); | ||
544 | pr_err_once("x86/tme: MKTME is not usable\n"); | ||
545 | mktme_status = MKTME_DISABLED; | ||
546 | |||
547 | /* Proceed. We may need to exclude bits from x86_phys_bits. */ | ||
548 | } | ||
549 | } else { | ||
550 | tme_activate_cpu0 = tme_activate; | ||
551 | } | ||
552 | |||
553 | if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { | ||
554 | pr_info_once("x86/tme: not enabled by BIOS\n"); | ||
555 | mktme_status = MKTME_DISABLED; | ||
556 | return; | ||
557 | } | ||
558 | |||
559 | if (mktme_status != MKTME_UNINITIALIZED) | ||
560 | goto detect_keyid_bits; | ||
561 | |||
562 | pr_info("x86/tme: enabled by BIOS\n"); | ||
563 | |||
564 | tme_policy = TME_ACTIVATE_POLICY(tme_activate); | ||
565 | if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) | ||
566 | pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); | ||
567 | |||
568 | tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); | ||
569 | if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { | ||
570 | pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", | ||
571 | tme_crypto_algs); | ||
572 | mktme_status = MKTME_DISABLED; | ||
573 | } | ||
574 | detect_keyid_bits: | ||
575 | keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); | ||
576 | nr_keyids = (1UL << keyid_bits) - 1; | ||
577 | if (nr_keyids) { | ||
578 | pr_info_once("x86/mktme: enabled by BIOS\n"); | ||
579 | pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); | ||
580 | } else { | ||
581 | pr_info_once("x86/mktme: disabled by BIOS\n"); | ||
582 | } | ||
583 | |||
584 | if (mktme_status == MKTME_UNINITIALIZED) { | ||
585 | /* MKTME is usable */ | ||
586 | mktme_status = MKTME_ENABLED; | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * KeyID bits effectively lower the number of physical address | ||
591 | * bits. Update cpuinfo_x86::x86_phys_bits accordingly. | ||
592 | */ | ||
593 | c->x86_phys_bits -= keyid_bits; | ||
594 | } | ||
595 | |||
512 | static void init_intel_energy_perf(struct cpuinfo_x86 *c) | 596 | static void init_intel_energy_perf(struct cpuinfo_x86 *c) |
513 | { | 597 | { |
514 | u64 epb; | 598 | u64 epb; |
@@ -679,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c) | |||
679 | if (cpu_has(c, X86_FEATURE_VMX)) | 763 | if (cpu_has(c, X86_FEATURE_VMX)) |
680 | detect_vmx_virtcap(c); | 764 | detect_vmx_virtcap(c); |
681 | 765 | ||
766 | if (cpu_has(c, X86_FEATURE_TME)) | ||
767 | detect_tme(c); | ||
768 | |||
682 | init_intel_energy_perf(c); | 769 | init_intel_energy_perf(c); |
683 | 770 | ||
684 | init_intel_misc_features(c); | 771 | init_intel_misc_features(c); |
diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c new file mode 100644 index 000000000000..0771a905b286 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_pconfig.c | |||
@@ -0,0 +1,82 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * Intel PCONFIG instruction support. | ||
4 | * | ||
5 | * Copyright (C) 2017 Intel Corporation | ||
6 | * | ||
7 | * Author: | ||
8 | * Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | ||
9 | */ | ||
10 | |||
11 | #include <asm/cpufeature.h> | ||
12 | #include <asm/intel_pconfig.h> | ||
13 | |||
14 | #define PCONFIG_CPUID 0x1b | ||
15 | |||
16 | #define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1) | ||
17 | |||
18 | /* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */ | ||
19 | enum { | ||
20 | PCONFIG_CPUID_SUBLEAF_INVALID = 0, | ||
21 | PCONFIG_CPUID_SUBLEAF_TARGETID = 1, | ||
22 | }; | ||
23 | |||
24 | /* Bitmask of supported targets */ | ||
25 | static u64 targets_supported __read_mostly; | ||
26 | |||
27 | int pconfig_target_supported(enum pconfig_target target) | ||
28 | { | ||
29 | /* | ||
30 | * We would need to re-think the implementation once we get > 64 | ||
31 | * PCONFIG targets. Spec allows up to 2^32 targets. | ||
32 | */ | ||
33 | BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64); | ||
34 | |||
35 | if (WARN_ON_ONCE(target >= 64)) | ||
36 | return 0; | ||
37 | return targets_supported & (1ULL << target); | ||
38 | } | ||
39 | |||
40 | static int __init intel_pconfig_init(void) | ||
41 | { | ||
42 | int subleaf; | ||
43 | |||
44 | if (!boot_cpu_has(X86_FEATURE_PCONFIG)) | ||
45 | return 0; | ||
46 | |||
47 | /* | ||
48 | * Scan subleafs of PCONFIG CPUID leaf. | ||
49 | * | ||
50 | * Subleafs of the same type need not to be consecutive. | ||
51 | * | ||
52 | * Stop on the first invalid subleaf type. All subleafs after the first | ||
53 | * invalid are invalid too. | ||
54 | */ | ||
55 | for (subleaf = 0; subleaf < INT_MAX; subleaf++) { | ||
56 | struct cpuid_regs regs; | ||
57 | |||
58 | cpuid_count(PCONFIG_CPUID, subleaf, | ||
59 | ®s.eax, ®s.ebx, ®s.ecx, ®s.edx); | ||
60 | |||
61 | switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) { | ||
62 | case PCONFIG_CPUID_SUBLEAF_INVALID: | ||
63 | /* Stop on the first invalid subleaf */ | ||
64 | goto out; | ||
65 | case PCONFIG_CPUID_SUBLEAF_TARGETID: | ||
66 | /* Mark supported PCONFIG targets */ | ||
67 | if (regs.ebx < 64) | ||
68 | targets_supported |= (1ULL << regs.ebx); | ||
69 | if (regs.ecx < 64) | ||
70 | targets_supported |= (1ULL << regs.ecx); | ||
71 | if (regs.edx < 64) | ||
72 | targets_supported |= (1ULL << regs.edx); | ||
73 | break; | ||
74 | default: | ||
75 | /* Unknown CPUID.PCONFIG subleaf: ignore */ | ||
76 | break; | ||
77 | } | ||
78 | } | ||
79 | out: | ||
80 | return 0; | ||
81 | } | ||
82 | arch_initcall(intel_pconfig_init); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3c1eec17312b..42cf2880d0ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1095,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn) | |||
1095 | * a legal address. | 1095 | * a legal address. |
1096 | */ | 1096 | */ |
1097 | 1097 | ||
1098 | /* | ||
1099 | * Build time check to see if we have a spare virtual bit. Don't want | ||
1100 | * to leave this until run time because most developers don't have a | ||
1101 | * system that can exercise this code path. This will only become a | ||
1102 | * problem if/when we move beyond 5-level page tables. | ||
1103 | * | ||
1104 | * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD) | ||
1105 | */ | ||
1106 | #if PGDIR_SHIFT + 9 < 63 | ||
1107 | decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); | 1098 | decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); |
1108 | #else | ||
1109 | #error "no unused virtual bit available" | ||
1110 | #endif | ||
1111 | 1099 | ||
1112 | if (set_memory_np(decoy_addr, 1)) | 1100 | if (set_memory_np(decoy_addr, 1)) |
1113 | pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); | 1101 | pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); |
@@ -2357,6 +2345,12 @@ static __init int mcheck_init_device(void) | |||
2357 | { | 2345 | { |
2358 | int err; | 2346 | int err; |
2359 | 2347 | ||
2348 | /* | ||
2349 | * Check if we have a spare virtual bit. This will only become | ||
2350 | * a problem if/when we move beyond 5-level page tables. | ||
2351 | */ | ||
2352 | MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63); | ||
2353 | |||
2360 | if (!mce_available(&boot_cpu_data)) { | 2354 | if (!mce_available(&boot_cpu_data)) { |
2361 | err = -EIO; | 2355 | err = -EIO; |
2362 | goto err_out; | 2356 | goto err_out; |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 71c11ad5643e..6a2cb1442e05 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p) | |||
924 | } else if (*p == '!') { | 924 | } else if (*p == '!') { |
925 | start_at = memparse(p+1, &p); | 925 | start_at = memparse(p+1, &p); |
926 | e820__range_add(start_at, mem_size, E820_TYPE_PRAM); | 926 | e820__range_add(start_at, mem_size, E820_TYPE_PRAM); |
927 | } else if (*p == '%') { | ||
928 | enum e820_type from = 0, to = 0; | ||
929 | |||
930 | start_at = memparse(p + 1, &p); | ||
931 | if (*p == '-') | ||
932 | from = simple_strtoull(p + 1, &p, 0); | ||
933 | if (*p == '+') | ||
934 | to = simple_strtoull(p + 1, &p, 0); | ||
935 | if (*p != '\0') | ||
936 | return -EINVAL; | ||
937 | if (from && to) | ||
938 | e820__range_update(start_at, mem_size, from, to); | ||
939 | else if (to) | ||
940 | e820__range_add(start_at, mem_size, to); | ||
941 | else if (from) | ||
942 | e820__range_remove(start_at, mem_size, from, 1); | ||
943 | else | ||
944 | e820__range_remove(start_at, mem_size, 0, 0); | ||
927 | } else { | 945 | } else { |
928 | e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); | 946 | e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); |
929 | } | 947 | } |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7ba5d819ebe3..0c855deee165 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -32,6 +32,11 @@ | |||
32 | #include <asm/microcode.h> | 32 | #include <asm/microcode.h> |
33 | #include <asm/kasan.h> | 33 | #include <asm/kasan.h> |
34 | 34 | ||
35 | #ifdef CONFIG_X86_5LEVEL | ||
36 | #undef pgtable_l5_enabled | ||
37 | #define pgtable_l5_enabled __pgtable_l5_enabled | ||
38 | #endif | ||
39 | |||
35 | /* | 40 | /* |
36 | * Manage page tables very early on. | 41 | * Manage page tables very early on. |
37 | */ | 42 | */ |
@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | |||
39 | static unsigned int __initdata next_early_pgt; | 44 | static unsigned int __initdata next_early_pgt; |
40 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); | 45 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); |
41 | 46 | ||
47 | #ifdef CONFIG_X86_5LEVEL | ||
48 | unsigned int __pgtable_l5_enabled __ro_after_init; | ||
49 | EXPORT_SYMBOL(__pgtable_l5_enabled); | ||
50 | unsigned int pgdir_shift __ro_after_init = 39; | ||
51 | EXPORT_SYMBOL(pgdir_shift); | ||
52 | unsigned int ptrs_per_p4d __ro_after_init = 1; | ||
53 | EXPORT_SYMBOL(ptrs_per_p4d); | ||
54 | #endif | ||
55 | |||
56 | #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT | ||
57 | unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; | ||
58 | EXPORT_SYMBOL(page_offset_base); | ||
59 | unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; | ||
60 | EXPORT_SYMBOL(vmalloc_base); | ||
61 | unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; | ||
62 | EXPORT_SYMBOL(vmemmap_base); | ||
63 | #endif | ||
64 | |||
42 | #define __head __section(.head.text) | 65 | #define __head __section(.head.text) |
43 | 66 | ||
44 | static void __head *fixup_pointer(void *ptr, unsigned long physaddr) | 67 | static void __head *fixup_pointer(void *ptr, unsigned long physaddr) |
@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) | |||
46 | return ptr - (void *)_text + (void *)physaddr; | 69 | return ptr - (void *)_text + (void *)physaddr; |
47 | } | 70 | } |
48 | 71 | ||
72 | static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) | ||
73 | { | ||
74 | return fixup_pointer(ptr, physaddr); | ||
75 | } | ||
76 | |||
77 | #ifdef CONFIG_X86_5LEVEL | ||
78 | static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) | ||
79 | { | ||
80 | return fixup_pointer(ptr, physaddr); | ||
81 | } | ||
82 | |||
83 | static bool __head check_la57_support(unsigned long physaddr) | ||
84 | { | ||
85 | if (native_cpuid_eax(0) < 7) | ||
86 | return false; | ||
87 | |||
88 | if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) | ||
89 | return false; | ||
90 | |||
91 | *fixup_int(&pgtable_l5_enabled, physaddr) = 1; | ||
92 | *fixup_int(&pgdir_shift, physaddr) = 48; | ||
93 | *fixup_int(&ptrs_per_p4d, physaddr) = 512; | ||
94 | *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; | ||
95 | *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; | ||
96 | *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; | ||
97 | |||
98 | return true; | ||
99 | } | ||
100 | #else | ||
101 | static bool __head check_la57_support(unsigned long physaddr) | ||
102 | { | ||
103 | return false; | ||
104 | } | ||
105 | #endif | ||
106 | |||
49 | unsigned long __head __startup_64(unsigned long physaddr, | 107 | unsigned long __head __startup_64(unsigned long physaddr, |
50 | struct boot_params *bp) | 108 | struct boot_params *bp) |
51 | { | 109 | { |
@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
55 | p4dval_t *p4d; | 113 | p4dval_t *p4d; |
56 | pudval_t *pud; | 114 | pudval_t *pud; |
57 | pmdval_t *pmd, pmd_entry; | 115 | pmdval_t *pmd, pmd_entry; |
116 | bool la57; | ||
58 | int i; | 117 | int i; |
59 | unsigned int *next_pgt_ptr; | 118 | unsigned int *next_pgt_ptr; |
60 | 119 | ||
120 | la57 = check_la57_support(physaddr); | ||
121 | |||
61 | /* Is the address too large? */ | 122 | /* Is the address too large? */ |
62 | if (physaddr >> MAX_PHYSMEM_BITS) | 123 | if (physaddr >> MAX_PHYSMEM_BITS) |
63 | for (;;); | 124 | for (;;); |
@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
81 | /* Fixup the physical addresses in the page table */ | 142 | /* Fixup the physical addresses in the page table */ |
82 | 143 | ||
83 | pgd = fixup_pointer(&early_top_pgt, physaddr); | 144 | pgd = fixup_pointer(&early_top_pgt, physaddr); |
84 | pgd[pgd_index(__START_KERNEL_map)] += load_delta; | 145 | p = pgd + pgd_index(__START_KERNEL_map); |
85 | 146 | if (la57) | |
86 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 147 | *p = (unsigned long)level4_kernel_pgt; |
148 | else | ||
149 | *p = (unsigned long)level3_kernel_pgt; | ||
150 | *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; | ||
151 | |||
152 | if (la57) { | ||
87 | p4d = fixup_pointer(&level4_kernel_pgt, physaddr); | 153 | p4d = fixup_pointer(&level4_kernel_pgt, physaddr); |
88 | p4d[511] += load_delta; | 154 | p4d[511] += load_delta; |
89 | } | 155 | } |
@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
108 | 174 | ||
109 | pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); | 175 | pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); |
110 | 176 | ||
111 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 177 | if (la57) { |
112 | p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); | 178 | p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); |
113 | 179 | ||
114 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; | 180 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr, | |||
154 | * Fixup phys_base - remove the memory encryption mask to obtain | 220 | * Fixup phys_base - remove the memory encryption mask to obtain |
155 | * the true physical address. | 221 | * the true physical address. |
156 | */ | 222 | */ |
157 | p = fixup_pointer(&phys_base, physaddr); | 223 | *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); |
158 | *p += load_delta - sme_get_me_mask(); | ||
159 | 224 | ||
160 | /* Encrypt the kernel and related (if SME is active) */ | 225 | /* Encrypt the kernel and related (if SME is active) */ |
161 | sme_encrypt_kernel(bp); | 226 | sme_encrypt_kernel(bp); |
@@ -206,7 +271,7 @@ again: | |||
206 | * critical -- __PAGE_OFFSET would point us back into the dynamic | 271 | * critical -- __PAGE_OFFSET would point us back into the dynamic |
207 | * range and we might end up looping forever... | 272 | * range and we might end up looping forever... |
208 | */ | 273 | */ |
209 | if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 274 | if (!pgtable_l5_enabled) |
210 | p4d_p = pgd_p; | 275 | p4d_p = pgd_p; |
211 | else if (pgd) | 276 | else if (pgd) |
212 | p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); | 277 | p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); |
@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) | |||
322 | BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); | 387 | BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); |
323 | BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); | 388 | BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); |
324 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | 389 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
325 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | 390 | MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
326 | (__START_KERNEL & PGDIR_MASK))); | 391 | (__START_KERNEL & PGDIR_MASK))); |
327 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | 392 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); |
328 | 393 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 0f545b3cf926..48385c1074a5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -39,12 +39,12 @@ | |||
39 | * | 39 | * |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #define l4_index(x) (((x) >> 39) & 511) | ||
42 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) | 43 | #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) |
43 | 44 | ||
44 | #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) | 45 | L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) |
45 | PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) | 46 | L4_START_KERNEL = l4_index(__START_KERNEL_map) |
46 | PGD_START_KERNEL = pgd_index(__START_KERNEL_map) | 47 | |
47 | #endif | ||
48 | L3_START_KERNEL = pud_index(__START_KERNEL_map) | 48 | L3_START_KERNEL = pud_index(__START_KERNEL_map) |
49 | 49 | ||
50 | .text | 50 | .text |
@@ -125,7 +125,10 @@ ENTRY(secondary_startup_64) | |||
125 | /* Enable PAE mode, PGE and LA57 */ | 125 | /* Enable PAE mode, PGE and LA57 */ |
126 | movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx | 126 | movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx |
127 | #ifdef CONFIG_X86_5LEVEL | 127 | #ifdef CONFIG_X86_5LEVEL |
128 | testl $1, __pgtable_l5_enabled(%rip) | ||
129 | jz 1f | ||
128 | orl $X86_CR4_LA57, %ecx | 130 | orl $X86_CR4_LA57, %ecx |
131 | 1: | ||
129 | #endif | 132 | #endif |
130 | movq %rcx, %cr4 | 133 | movq %rcx, %cr4 |
131 | 134 | ||
@@ -374,12 +377,7 @@ GLOBAL(name) | |||
374 | 377 | ||
375 | __INITDATA | 378 | __INITDATA |
376 | NEXT_PGD_PAGE(early_top_pgt) | 379 | NEXT_PGD_PAGE(early_top_pgt) |
377 | .fill 511,8,0 | 380 | .fill 512,8,0 |
378 | #ifdef CONFIG_X86_5LEVEL | ||
379 | .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | ||
380 | #else | ||
381 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | ||
382 | #endif | ||
383 | .fill PTI_USER_PGD_FILL,8,0 | 381 | .fill PTI_USER_PGD_FILL,8,0 |
384 | 382 | ||
385 | NEXT_PAGE(early_dynamic_pgts) | 383 | NEXT_PAGE(early_dynamic_pgts) |
@@ -390,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts) | |||
390 | #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) | 388 | #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) |
391 | NEXT_PGD_PAGE(init_top_pgt) | 389 | NEXT_PGD_PAGE(init_top_pgt) |
392 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 390 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
393 | .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 | 391 | .org init_top_pgt + L4_PAGE_OFFSET*8, 0 |
394 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC | 392 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC |
395 | .org init_top_pgt + PGD_START_KERNEL*8, 0 | 393 | .org init_top_pgt + L4_START_KERNEL*8, 0 |
396 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 394 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
397 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC | 395 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC |
398 | .fill PTI_USER_PGD_FILL,8,0 | 396 | .fill PTI_USER_PGD_FILL,8,0 |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index faeea0b5cbd0..93bd4fb603d1 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void) | |||
350 | { | 350 | { |
351 | VMCOREINFO_NUMBER(phys_base); | 351 | VMCOREINFO_NUMBER(phys_base); |
352 | VMCOREINFO_SYMBOL(init_top_pgt); | 352 | VMCOREINFO_SYMBOL(init_top_pgt); |
353 | VMCOREINFO_NUMBER(pgtable_l5_enabled); | ||
353 | 354 | ||
354 | #ifdef CONFIG_NUMA | 355 | #ifdef CONFIG_NUMA |
355 | VMCOREINFO_SYMBOL(node_data); | 356 | VMCOREINFO_SYMBOL(node_data); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4c616be28506..6285697b6e56 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -189,9 +189,7 @@ struct ist_info ist_info; | |||
189 | #endif | 189 | #endif |
190 | 190 | ||
191 | #else | 191 | #else |
192 | struct cpuinfo_x86 boot_cpu_data __read_mostly = { | 192 | struct cpuinfo_x86 boot_cpu_data __read_mostly; |
193 | .x86_phys_bits = MAX_PHYSMEM_BITS, | ||
194 | }; | ||
195 | EXPORT_SYMBOL(boot_cpu_data); | 193 | EXPORT_SYMBOL(boot_cpu_data); |
196 | #endif | 194 | #endif |
197 | 195 | ||
@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p) | |||
851 | __flush_tlb_all(); | 849 | __flush_tlb_all(); |
852 | #else | 850 | #else |
853 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 851 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
852 | boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; | ||
854 | #endif | 853 | #endif |
855 | 854 | ||
856 | /* | 855 | /* |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 2bccd03bd654..ebda84a91510 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/export.h> | 8 | #include <linux/export.h> |
9 | #include <linux/pci.h> | 9 | #include <linux/pci.h> |
10 | 10 | ||
11 | #include <asm/acpi.h> | ||
11 | #include <asm/bios_ebda.h> | 12 | #include <asm/bios_ebda.h> |
12 | #include <asm/paravirt.h> | 13 | #include <asm/paravirt.h> |
13 | #include <asm/pci_x86.h> | 14 | #include <asm/pci_x86.h> |
@@ -26,10 +27,11 @@ | |||
26 | 27 | ||
27 | void x86_init_noop(void) { } | 28 | void x86_init_noop(void) { } |
28 | void __init x86_init_uint_noop(unsigned int unused) { } | 29 | void __init x86_init_uint_noop(unsigned int unused) { } |
29 | int __init iommu_init_noop(void) { return 0; } | 30 | static int __init iommu_init_noop(void) { return 0; } |
30 | void iommu_shutdown_noop(void) { } | 31 | static void iommu_shutdown_noop(void) { } |
31 | bool __init bool_x86_init_noop(void) { return false; } | 32 | static bool __init bool_x86_init_noop(void) { return false; } |
32 | void x86_op_int_noop(int cpu) { } | 33 | static void x86_op_int_noop(int cpu) { } |
34 | static u64 u64_x86_init_noop(void) { return 0; } | ||
33 | 35 | ||
34 | /* | 36 | /* |
35 | * The platform setup functions are preset with the default functions | 37 | * The platform setup functions are preset with the default functions |
@@ -91,6 +93,11 @@ struct x86_init_ops x86_init __initdata = { | |||
91 | .x2apic_available = bool_x86_init_noop, | 93 | .x2apic_available = bool_x86_init_noop, |
92 | .init_mem_mapping = x86_init_noop, | 94 | .init_mem_mapping = x86_init_noop, |
93 | }, | 95 | }, |
96 | |||
97 | .acpi = { | ||
98 | .get_root_pointer = u64_x86_init_noop, | ||
99 | .reduced_hw_early_init = acpi_generic_reduced_hw_init, | ||
100 | }, | ||
94 | }; | 101 | }; |
95 | 102 | ||
96 | struct x86_cpuinit_ops x86_cpuinit = { | 103 | struct x86_cpuinit_ops x86_cpuinit = { |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 27e9e90a8d35..4b101dd6e52f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,12 +1,15 @@ | |||
1 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
2 | # Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c | 2 | # Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c |
3 | KCOV_INSTRUMENT_tlb.o := n | 3 | KCOV_INSTRUMENT_tlb.o := n |
4 | KCOV_INSTRUMENT_mem_encrypt.o := n | 4 | KCOV_INSTRUMENT_mem_encrypt.o := n |
5 | KCOV_INSTRUMENT_mem_encrypt_identity.o := n | ||
5 | 6 | ||
6 | KASAN_SANITIZE_mem_encrypt.o := n | 7 | KASAN_SANITIZE_mem_encrypt.o := n |
8 | KASAN_SANITIZE_mem_encrypt_identity.o := n | ||
7 | 9 | ||
8 | ifdef CONFIG_FUNCTION_TRACER | 10 | ifdef CONFIG_FUNCTION_TRACER |
9 | CFLAGS_REMOVE_mem_encrypt.o = -pg | 11 | CFLAGS_REMOVE_mem_encrypt.o = -pg |
12 | CFLAGS_REMOVE_mem_encrypt_identity.o = -pg | ||
10 | endif | 13 | endif |
11 | 14 | ||
12 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 15 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | |||
16 | nostackp := $(call cc-option, -fno-stack-protector) | 19 | nostackp := $(call cc-option, -fno-stack-protector) |
17 | CFLAGS_physaddr.o := $(nostackp) | 20 | CFLAGS_physaddr.o := $(nostackp) |
18 | CFLAGS_setup_nx.o := $(nostackp) | 21 | CFLAGS_setup_nx.o := $(nostackp) |
22 | CFLAGS_mem_encrypt_identity.o := $(nostackp) | ||
19 | 23 | ||
20 | CFLAGS_fault.o := -I$(src)/../include/asm/trace | 24 | CFLAGS_fault.o := -I$(src)/../include/asm/trace |
21 | 25 | ||
@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o | |||
47 | obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o | 51 | obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o |
48 | 52 | ||
49 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o | 53 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o |
54 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o | ||
50 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o | 55 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o |
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 421f2664ffa0..51a6f92da2bf 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c | |||
@@ -72,6 +72,31 @@ static const struct file_operations ptdump_curusr_fops = { | |||
72 | }; | 72 | }; |
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) | ||
76 | extern pgd_t *efi_pgd; | ||
77 | static struct dentry *pe_efi; | ||
78 | |||
79 | static int ptdump_show_efi(struct seq_file *m, void *v) | ||
80 | { | ||
81 | if (efi_pgd) | ||
82 | ptdump_walk_pgd_level_debugfs(m, efi_pgd, false); | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int ptdump_open_efi(struct inode *inode, struct file *filp) | ||
87 | { | ||
88 | return single_open(filp, ptdump_show_efi, NULL); | ||
89 | } | ||
90 | |||
91 | static const struct file_operations ptdump_efi_fops = { | ||
92 | .owner = THIS_MODULE, | ||
93 | .open = ptdump_open_efi, | ||
94 | .read = seq_read, | ||
95 | .llseek = seq_lseek, | ||
96 | .release = single_release, | ||
97 | }; | ||
98 | #endif | ||
99 | |||
75 | static struct dentry *dir, *pe_knl, *pe_curknl; | 100 | static struct dentry *dir, *pe_knl, *pe_curknl; |
76 | 101 | ||
77 | static int __init pt_dump_debug_init(void) | 102 | static int __init pt_dump_debug_init(void) |
@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void) | |||
96 | if (!pe_curusr) | 121 | if (!pe_curusr) |
97 | goto err; | 122 | goto err; |
98 | #endif | 123 | #endif |
124 | |||
125 | #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) | ||
126 | pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops); | ||
127 | if (!pe_efi) | ||
128 | goto err; | ||
129 | #endif | ||
130 | |||
99 | return 0; | 131 | return 0; |
100 | err: | 132 | err: |
101 | debugfs_remove_recursive(dir); | 133 | debugfs_remove_recursive(dir); |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2a4849e92831..62a7e9f65dec 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -29,6 +29,7 @@ | |||
29 | struct pg_state { | 29 | struct pg_state { |
30 | int level; | 30 | int level; |
31 | pgprot_t current_prot; | 31 | pgprot_t current_prot; |
32 | pgprotval_t effective_prot; | ||
32 | unsigned long start_address; | 33 | unsigned long start_address; |
33 | unsigned long current_address; | 34 | unsigned long current_address; |
34 | const struct addr_marker *marker; | 35 | const struct addr_marker *marker; |
@@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = { | |||
85 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, | 86 | [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, |
86 | [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, | 87 | [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, |
87 | #ifdef CONFIG_KASAN | 88 | #ifdef CONFIG_KASAN |
88 | [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, | 89 | /* |
89 | [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, | 90 | * These fields get initialized with the (dynamic) |
91 | * KASAN_SHADOW_{START,END} values in pt_dump_init(). | ||
92 | */ | ||
93 | [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, | ||
94 | [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, | ||
90 | #endif | 95 | #endif |
91 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | 96 | #ifdef CONFIG_MODIFY_LDT_SYSCALL |
92 | [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, | 97 | [LDT_NR] = { 0UL, "LDT remap" }, |
93 | #endif | 98 | #endif |
94 | [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, | 99 | [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, |
95 | #ifdef CONFIG_X86_ESPFIX64 | 100 | #ifdef CONFIG_X86_ESPFIX64 |
@@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u) | |||
231 | * print what we collected so far. | 236 | * print what we collected so far. |
232 | */ | 237 | */ |
233 | static void note_page(struct seq_file *m, struct pg_state *st, | 238 | static void note_page(struct seq_file *m, struct pg_state *st, |
234 | pgprot_t new_prot, int level) | 239 | pgprot_t new_prot, pgprotval_t new_eff, int level) |
235 | { | 240 | { |
236 | pgprotval_t prot, cur; | 241 | pgprotval_t prot, cur, eff; |
237 | static const char units[] = "BKMGTPE"; | 242 | static const char units[] = "BKMGTPE"; |
238 | 243 | ||
239 | /* | 244 | /* |
@@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
243 | */ | 248 | */ |
244 | prot = pgprot_val(new_prot); | 249 | prot = pgprot_val(new_prot); |
245 | cur = pgprot_val(st->current_prot); | 250 | cur = pgprot_val(st->current_prot); |
251 | eff = st->effective_prot; | ||
246 | 252 | ||
247 | if (!st->level) { | 253 | if (!st->level) { |
248 | /* First entry */ | 254 | /* First entry */ |
249 | st->current_prot = new_prot; | 255 | st->current_prot = new_prot; |
256 | st->effective_prot = new_eff; | ||
250 | st->level = level; | 257 | st->level = level; |
251 | st->marker = address_markers; | 258 | st->marker = address_markers; |
252 | st->lines = 0; | 259 | st->lines = 0; |
253 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", | 260 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
254 | st->marker->name); | 261 | st->marker->name); |
255 | } else if (prot != cur || level != st->level || | 262 | } else if (prot != cur || new_eff != eff || level != st->level || |
256 | st->current_address >= st->marker[1].start_address) { | 263 | st->current_address >= st->marker[1].start_address) { |
257 | const char *unit = units; | 264 | const char *unit = units; |
258 | unsigned long delta; | 265 | unsigned long delta; |
259 | int width = sizeof(unsigned long) * 2; | 266 | int width = sizeof(unsigned long) * 2; |
260 | pgprotval_t pr = pgprot_val(st->current_prot); | ||
261 | 267 | ||
262 | if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { | 268 | if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) { |
263 | WARN_ONCE(1, | 269 | WARN_ONCE(1, |
264 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", | 270 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", |
265 | (void *)st->start_address, | 271 | (void *)st->start_address, |
@@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
313 | 319 | ||
314 | st->start_address = st->current_address; | 320 | st->start_address = st->current_address; |
315 | st->current_prot = new_prot; | 321 | st->current_prot = new_prot; |
322 | st->effective_prot = new_eff; | ||
316 | st->level = level; | 323 | st->level = level; |
317 | } | 324 | } |
318 | } | 325 | } |
319 | 326 | ||
320 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) | 327 | static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) |
328 | { | ||
329 | return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | | ||
330 | ((prot1 | prot2) & _PAGE_NX); | ||
331 | } | ||
332 | |||
333 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, | ||
334 | pgprotval_t eff_in, unsigned long P) | ||
321 | { | 335 | { |
322 | int i; | 336 | int i; |
323 | pte_t *start; | 337 | pte_t *start; |
324 | pgprotval_t prot; | 338 | pgprotval_t prot, eff; |
325 | 339 | ||
326 | start = (pte_t *)pmd_page_vaddr(addr); | 340 | start = (pte_t *)pmd_page_vaddr(addr); |
327 | for (i = 0; i < PTRS_PER_PTE; i++) { | 341 | for (i = 0; i < PTRS_PER_PTE; i++) { |
328 | prot = pte_flags(*start); | 342 | prot = pte_flags(*start); |
343 | eff = effective_prot(eff_in, prot); | ||
329 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); | 344 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
330 | note_page(m, st, __pgprot(prot), 5); | 345 | note_page(m, st, __pgprot(prot), eff, 5); |
331 | start++; | 346 | start++; |
332 | } | 347 | } |
333 | } | 348 | } |
@@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |||
344 | void *pt) | 359 | void *pt) |
345 | { | 360 | { |
346 | if (__pa(pt) == __pa(kasan_zero_pmd) || | 361 | if (__pa(pt) == __pa(kasan_zero_pmd) || |
347 | #ifdef CONFIG_X86_5LEVEL | 362 | (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) || |
348 | __pa(pt) == __pa(kasan_zero_p4d) || | ||
349 | #endif | ||
350 | __pa(pt) == __pa(kasan_zero_pud)) { | 363 | __pa(pt) == __pa(kasan_zero_pud)) { |
351 | pgprotval_t prot = pte_flags(kasan_zero_pte[0]); | 364 | pgprotval_t prot = pte_flags(kasan_zero_pte[0]); |
352 | note_page(m, st, __pgprot(prot), 5); | 365 | note_page(m, st, __pgprot(prot), 0, 5); |
353 | return true; | 366 | return true; |
354 | } | 367 | } |
355 | return false; | 368 | return false; |
@@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, | |||
364 | 377 | ||
365 | #if PTRS_PER_PMD > 1 | 378 | #if PTRS_PER_PMD > 1 |
366 | 379 | ||
367 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) | 380 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
381 | pgprotval_t eff_in, unsigned long P) | ||
368 | { | 382 | { |
369 | int i; | 383 | int i; |
370 | pmd_t *start, *pmd_start; | 384 | pmd_t *start, *pmd_start; |
371 | pgprotval_t prot; | 385 | pgprotval_t prot, eff; |
372 | 386 | ||
373 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); | 387 | pmd_start = start = (pmd_t *)pud_page_vaddr(addr); |
374 | for (i = 0; i < PTRS_PER_PMD; i++) { | 388 | for (i = 0; i < PTRS_PER_PMD; i++) { |
375 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); | 389 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
376 | if (!pmd_none(*start)) { | 390 | if (!pmd_none(*start)) { |
391 | prot = pmd_flags(*start); | ||
392 | eff = effective_prot(eff_in, prot); | ||
377 | if (pmd_large(*start) || !pmd_present(*start)) { | 393 | if (pmd_large(*start) || !pmd_present(*start)) { |
378 | prot = pmd_flags(*start); | 394 | note_page(m, st, __pgprot(prot), eff, 4); |
379 | note_page(m, st, __pgprot(prot), 4); | ||
380 | } else if (!kasan_page_table(m, st, pmd_start)) { | 395 | } else if (!kasan_page_table(m, st, pmd_start)) { |
381 | walk_pte_level(m, st, *start, | 396 | walk_pte_level(m, st, *start, eff, |
382 | P + i * PMD_LEVEL_MULT); | 397 | P + i * PMD_LEVEL_MULT); |
383 | } | 398 | } |
384 | } else | 399 | } else |
385 | note_page(m, st, __pgprot(0), 4); | 400 | note_page(m, st, __pgprot(0), 0, 4); |
386 | start++; | 401 | start++; |
387 | } | 402 | } |
388 | } | 403 | } |
389 | 404 | ||
390 | #else | 405 | #else |
391 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | 406 | #define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) |
392 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | 407 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) |
393 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | 408 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) |
394 | #endif | 409 | #endif |
395 | 410 | ||
396 | #if PTRS_PER_PUD > 1 | 411 | #if PTRS_PER_PUD > 1 |
397 | 412 | ||
398 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) | 413 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, |
414 | pgprotval_t eff_in, unsigned long P) | ||
399 | { | 415 | { |
400 | int i; | 416 | int i; |
401 | pud_t *start, *pud_start; | 417 | pud_t *start, *pud_start; |
402 | pgprotval_t prot; | 418 | pgprotval_t prot, eff; |
403 | pud_t *prev_pud = NULL; | 419 | pud_t *prev_pud = NULL; |
404 | 420 | ||
405 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); | 421 | pud_start = start = (pud_t *)p4d_page_vaddr(addr); |
@@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, | |||
407 | for (i = 0; i < PTRS_PER_PUD; i++) { | 423 | for (i = 0; i < PTRS_PER_PUD; i++) { |
408 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); | 424 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
409 | if (!pud_none(*start)) { | 425 | if (!pud_none(*start)) { |
426 | prot = pud_flags(*start); | ||
427 | eff = effective_prot(eff_in, prot); | ||
410 | if (pud_large(*start) || !pud_present(*start)) { | 428 | if (pud_large(*start) || !pud_present(*start)) { |
411 | prot = pud_flags(*start); | 429 | note_page(m, st, __pgprot(prot), eff, 3); |
412 | note_page(m, st, __pgprot(prot), 3); | ||
413 | } else if (!kasan_page_table(m, st, pud_start)) { | 430 | } else if (!kasan_page_table(m, st, pud_start)) { |
414 | walk_pmd_level(m, st, *start, | 431 | walk_pmd_level(m, st, *start, eff, |
415 | P + i * PUD_LEVEL_MULT); | 432 | P + i * PUD_LEVEL_MULT); |
416 | } | 433 | } |
417 | } else | 434 | } else |
418 | note_page(m, st, __pgprot(0), 3); | 435 | note_page(m, st, __pgprot(0), 0, 3); |
419 | 436 | ||
420 | prev_pud = start; | 437 | prev_pud = start; |
421 | start++; | 438 | start++; |
@@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, | |||
423 | } | 440 | } |
424 | 441 | ||
425 | #else | 442 | #else |
426 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p) | 443 | #define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) |
427 | #define p4d_large(a) pud_large(__pud(p4d_val(a))) | 444 | #define p4d_large(a) pud_large(__pud(p4d_val(a))) |
428 | #define p4d_none(a) pud_none(__pud(p4d_val(a))) | 445 | #define p4d_none(a) pud_none(__pud(p4d_val(a))) |
429 | #endif | 446 | #endif |
430 | 447 | ||
431 | #if PTRS_PER_P4D > 1 | 448 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, |
432 | 449 | pgprotval_t eff_in, unsigned long P) | |
433 | static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) | ||
434 | { | 450 | { |
435 | int i; | 451 | int i; |
436 | p4d_t *start, *p4d_start; | 452 | p4d_t *start, *p4d_start; |
437 | pgprotval_t prot; | 453 | pgprotval_t prot, eff; |
454 | |||
455 | if (PTRS_PER_P4D == 1) | ||
456 | return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); | ||
438 | 457 | ||
439 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); | 458 | p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); |
440 | 459 | ||
441 | for (i = 0; i < PTRS_PER_P4D; i++) { | 460 | for (i = 0; i < PTRS_PER_P4D; i++) { |
442 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); | 461 | st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); |
443 | if (!p4d_none(*start)) { | 462 | if (!p4d_none(*start)) { |
463 | prot = p4d_flags(*start); | ||
464 | eff = effective_prot(eff_in, prot); | ||
444 | if (p4d_large(*start) || !p4d_present(*start)) { | 465 | if (p4d_large(*start) || !p4d_present(*start)) { |
445 | prot = p4d_flags(*start); | 466 | note_page(m, st, __pgprot(prot), eff, 2); |
446 | note_page(m, st, __pgprot(prot), 2); | ||
447 | } else if (!kasan_page_table(m, st, p4d_start)) { | 467 | } else if (!kasan_page_table(m, st, p4d_start)) { |
448 | walk_pud_level(m, st, *start, | 468 | walk_pud_level(m, st, *start, eff, |
449 | P + i * P4D_LEVEL_MULT); | 469 | P + i * P4D_LEVEL_MULT); |
450 | } | 470 | } |
451 | } else | 471 | } else |
452 | note_page(m, st, __pgprot(0), 2); | 472 | note_page(m, st, __pgprot(0), 0, 2); |
453 | 473 | ||
454 | start++; | 474 | start++; |
455 | } | 475 | } |
456 | } | 476 | } |
457 | 477 | ||
458 | #else | 478 | #define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) |
459 | #define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) | 479 | #define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) |
460 | #define pgd_large(a) p4d_large(__p4d(pgd_val(a))) | ||
461 | #define pgd_none(a) p4d_none(__p4d(pgd_val(a))) | ||
462 | #endif | ||
463 | 480 | ||
464 | static inline bool is_hypervisor_range(int idx) | 481 | static inline bool is_hypervisor_range(int idx) |
465 | { | 482 | { |
@@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
483 | #else | 500 | #else |
484 | pgd_t *start = swapper_pg_dir; | 501 | pgd_t *start = swapper_pg_dir; |
485 | #endif | 502 | #endif |
486 | pgprotval_t prot; | 503 | pgprotval_t prot, eff; |
487 | int i; | 504 | int i; |
488 | struct pg_state st = {}; | 505 | struct pg_state st = {}; |
489 | 506 | ||
@@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
499 | for (i = 0; i < PTRS_PER_PGD; i++) { | 516 | for (i = 0; i < PTRS_PER_PGD; i++) { |
500 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 517 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
501 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { | 518 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
519 | prot = pgd_flags(*start); | ||
520 | #ifdef CONFIG_X86_PAE | ||
521 | eff = _PAGE_USER | _PAGE_RW; | ||
522 | #else | ||
523 | eff = prot; | ||
524 | #endif | ||
502 | if (pgd_large(*start) || !pgd_present(*start)) { | 525 | if (pgd_large(*start) || !pgd_present(*start)) { |
503 | prot = pgd_flags(*start); | 526 | note_page(m, &st, __pgprot(prot), eff, 1); |
504 | note_page(m, &st, __pgprot(prot), 1); | ||
505 | } else { | 527 | } else { |
506 | walk_p4d_level(m, &st, *start, | 528 | walk_p4d_level(m, &st, *start, eff, |
507 | i * PGD_LEVEL_MULT); | 529 | i * PGD_LEVEL_MULT); |
508 | } | 530 | } |
509 | } else | 531 | } else |
510 | note_page(m, &st, __pgprot(0), 1); | 532 | note_page(m, &st, __pgprot(0), 0, 1); |
511 | 533 | ||
512 | cond_resched(); | 534 | cond_resched(); |
513 | start++; | 535 | start++; |
@@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, | |||
515 | 537 | ||
516 | /* Flush out the last page */ | 538 | /* Flush out the last page */ |
517 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | 539 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); |
518 | note_page(m, &st, __pgprot(0), 0); | 540 | note_page(m, &st, __pgprot(0), 0, 0); |
519 | if (!checkwx) | 541 | if (!checkwx) |
520 | return; | 542 | return; |
521 | if (st.wx_pages) | 543 | if (st.wx_pages) |
@@ -570,6 +592,13 @@ static int __init pt_dump_init(void) | |||
570 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; | 592 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; |
571 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | 593 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
572 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; | 594 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; |
595 | #ifdef CONFIG_MODIFY_LDT_SYSCALL | ||
596 | address_markers[LDT_NR].start_address = LDT_BASE_ADDR; | ||
597 | #endif | ||
598 | #ifdef CONFIG_KASAN | ||
599 | address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; | ||
600 | address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; | ||
601 | #endif | ||
573 | #endif | 602 | #endif |
574 | #ifdef CONFIG_X86_32 | 603 | #ifdef CONFIG_X86_32 |
575 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | 604 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f75ea0748b9f..73bd8c95ac71 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -417,11 +417,11 @@ void vmalloc_sync_all(void) | |||
417 | */ | 417 | */ |
418 | static noinline int vmalloc_fault(unsigned long address) | 418 | static noinline int vmalloc_fault(unsigned long address) |
419 | { | 419 | { |
420 | pgd_t *pgd, *pgd_ref; | 420 | pgd_t *pgd, *pgd_k; |
421 | p4d_t *p4d, *p4d_ref; | 421 | p4d_t *p4d, *p4d_k; |
422 | pud_t *pud, *pud_ref; | 422 | pud_t *pud; |
423 | pmd_t *pmd, *pmd_ref; | 423 | pmd_t *pmd; |
424 | pte_t *pte, *pte_ref; | 424 | pte_t *pte; |
425 | 425 | ||
426 | /* Make sure we are in vmalloc area: */ | 426 | /* Make sure we are in vmalloc area: */ |
427 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) | 427 | if (!(address >= VMALLOC_START && address < VMALLOC_END)) |
@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address) | |||
435 | * case just flush: | 435 | * case just flush: |
436 | */ | 436 | */ |
437 | pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); | 437 | pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); |
438 | pgd_ref = pgd_offset_k(address); | 438 | pgd_k = pgd_offset_k(address); |
439 | if (pgd_none(*pgd_ref)) | 439 | if (pgd_none(*pgd_k)) |
440 | return -1; | 440 | return -1; |
441 | 441 | ||
442 | if (CONFIG_PGTABLE_LEVELS > 4) { | 442 | if (pgtable_l5_enabled) { |
443 | if (pgd_none(*pgd)) { | 443 | if (pgd_none(*pgd)) { |
444 | set_pgd(pgd, *pgd_ref); | 444 | set_pgd(pgd, *pgd_k); |
445 | arch_flush_lazy_mmu_mode(); | 445 | arch_flush_lazy_mmu_mode(); |
446 | } else { | 446 | } else { |
447 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); | 447 | BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k)); |
448 | } | 448 | } |
449 | } | 449 | } |
450 | 450 | ||
451 | /* With 4-level paging, copying happens on the p4d level. */ | 451 | /* With 4-level paging, copying happens on the p4d level. */ |
452 | p4d = p4d_offset(pgd, address); | 452 | p4d = p4d_offset(pgd, address); |
453 | p4d_ref = p4d_offset(pgd_ref, address); | 453 | p4d_k = p4d_offset(pgd_k, address); |
454 | if (p4d_none(*p4d_ref)) | 454 | if (p4d_none(*p4d_k)) |
455 | return -1; | 455 | return -1; |
456 | 456 | ||
457 | if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { | 457 | if (p4d_none(*p4d) && !pgtable_l5_enabled) { |
458 | set_p4d(p4d, *p4d_ref); | 458 | set_p4d(p4d, *p4d_k); |
459 | arch_flush_lazy_mmu_mode(); | 459 | arch_flush_lazy_mmu_mode(); |
460 | } else { | 460 | } else { |
461 | BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref)); | 461 | BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k)); |
462 | } | 462 | } |
463 | 463 | ||
464 | /* | ||
465 | * Below here mismatches are bugs because these lower tables | ||
466 | * are shared: | ||
467 | */ | ||
468 | BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); | 464 | BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); |
469 | 465 | ||
470 | pud = pud_offset(p4d, address); | 466 | pud = pud_offset(p4d, address); |
471 | pud_ref = pud_offset(p4d_ref, address); | 467 | if (pud_none(*pud)) |
472 | if (pud_none(*pud_ref)) | ||
473 | return -1; | 468 | return -1; |
474 | 469 | ||
475 | if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) | ||
476 | BUG(); | ||
477 | |||
478 | if (pud_large(*pud)) | 470 | if (pud_large(*pud)) |
479 | return 0; | 471 | return 0; |
480 | 472 | ||
481 | pmd = pmd_offset(pud, address); | 473 | pmd = pmd_offset(pud, address); |
482 | pmd_ref = pmd_offset(pud_ref, address); | 474 | if (pmd_none(*pmd)) |
483 | if (pmd_none(*pmd_ref)) | ||
484 | return -1; | 475 | return -1; |
485 | 476 | ||
486 | if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) | ||
487 | BUG(); | ||
488 | |||
489 | if (pmd_large(*pmd)) | 477 | if (pmd_large(*pmd)) |
490 | return 0; | 478 | return 0; |
491 | 479 | ||
492 | pte_ref = pte_offset_kernel(pmd_ref, address); | ||
493 | if (!pte_present(*pte_ref)) | ||
494 | return -1; | ||
495 | |||
496 | pte = pte_offset_kernel(pmd, address); | 480 | pte = pte_offset_kernel(pmd, address); |
497 | 481 | if (!pte_present(*pte)) | |
498 | /* | 482 | return -1; |
499 | * Don't use pte_page here, because the mappings can point | ||
500 | * outside mem_map, and the NUMA hash lookup cannot handle | ||
501 | * that: | ||
502 | */ | ||
503 | if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | ||
504 | BUG(); | ||
505 | 483 | ||
506 | return 0; | 484 | return 0; |
507 | } | 485 | } |
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index ab33a32df2a8..9aa22be8331e 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c | |||
@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, | |||
120 | result = ident_p4d_init(info, p4d, addr, next); | 120 | result = ident_p4d_init(info, p4d, addr, next); |
121 | if (result) | 121 | if (result) |
122 | return result; | 122 | return result; |
123 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 123 | if (pgtable_l5_enabled) { |
124 | set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); | 124 | set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); |
125 | } else { | 125 | } else { |
126 | /* | 126 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af11a2890235..45241de66785 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str) | |||
88 | } | 88 | } |
89 | __setup("noexec32=", nonx32_setup); | 89 | __setup("noexec32=", nonx32_setup); |
90 | 90 | ||
91 | /* | 91 | static void sync_global_pgds_l5(unsigned long start, unsigned long end) |
92 | * When memory was added make sure all the processes MM have | ||
93 | * suitable PGD entries in the local PGD level page. | ||
94 | */ | ||
95 | #ifdef CONFIG_X86_5LEVEL | ||
96 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
97 | { | 92 | { |
98 | unsigned long addr; | 93 | unsigned long addr; |
99 | 94 | ||
@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
129 | spin_unlock(&pgd_lock); | 124 | spin_unlock(&pgd_lock); |
130 | } | 125 | } |
131 | } | 126 | } |
132 | #else | 127 | |
133 | void sync_global_pgds(unsigned long start, unsigned long end) | 128 | static void sync_global_pgds_l4(unsigned long start, unsigned long end) |
134 | { | 129 | { |
135 | unsigned long addr; | 130 | unsigned long addr; |
136 | 131 | ||
@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
143 | * With folded p4d, pgd_none() is always false, we need to | 138 | * With folded p4d, pgd_none() is always false, we need to |
144 | * handle synchonization on p4d level. | 139 | * handle synchonization on p4d level. |
145 | */ | 140 | */ |
146 | BUILD_BUG_ON(pgd_none(*pgd_ref)); | 141 | MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); |
147 | p4d_ref = p4d_offset(pgd_ref, addr); | 142 | p4d_ref = p4d_offset(pgd_ref, addr); |
148 | 143 | ||
149 | if (p4d_none(*p4d_ref)) | 144 | if (p4d_none(*p4d_ref)) |
@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
173 | spin_unlock(&pgd_lock); | 168 | spin_unlock(&pgd_lock); |
174 | } | 169 | } |
175 | } | 170 | } |
176 | #endif | 171 | |
172 | /* | ||
173 | * When memory was added make sure all the processes MM have | ||
174 | * suitable PGD entries in the local PGD level page. | ||
175 | */ | ||
176 | void sync_global_pgds(unsigned long start, unsigned long end) | ||
177 | { | ||
178 | if (pgtable_l5_enabled) | ||
179 | sync_global_pgds_l5(start, end); | ||
180 | else | ||
181 | sync_global_pgds_l4(start, end); | ||
182 | } | ||
177 | 183 | ||
178 | /* | 184 | /* |
179 | * NOTE: This function is marked __ref because it calls __init function | 185 | * NOTE: This function is marked __ref because it calls __init function |
@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, | |||
632 | unsigned long vaddr = (unsigned long)__va(paddr); | 638 | unsigned long vaddr = (unsigned long)__va(paddr); |
633 | int i = p4d_index(vaddr); | 639 | int i = p4d_index(vaddr); |
634 | 640 | ||
635 | if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 641 | if (!pgtable_l5_enabled) |
636 | return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); | 642 | return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); |
637 | 643 | ||
638 | for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { | 644 | for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { |
@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, | |||
712 | page_size_mask); | 718 | page_size_mask); |
713 | 719 | ||
714 | spin_lock(&init_mm.page_table_lock); | 720 | spin_lock(&init_mm.page_table_lock); |
715 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 721 | if (pgtable_l5_enabled) |
716 | pgd_populate(&init_mm, pgd, p4d); | 722 | pgd_populate(&init_mm, pgd, p4d); |
717 | else | 723 | else |
718 | p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); | 724 | p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); |
@@ -1089,7 +1095,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, | |||
1089 | * 5-level case we should free them. This code will have to change | 1095 | * 5-level case we should free them. This code will have to change |
1090 | * to adapt for boot-time switching between 4 and 5 level page tables. | 1096 | * to adapt for boot-time switching between 4 and 5 level page tables. |
1091 | */ | 1097 | */ |
1092 | if (CONFIG_PGTABLE_LEVELS == 5) | 1098 | if (pgtable_l5_enabled) |
1093 | free_pud_table(pud_base, p4d); | 1099 | free_pud_table(pud_base, p4d); |
1094 | } | 1100 | } |
1095 | 1101 | ||
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index af6f2f9c6a26..d8ff013ea9d0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c | |||
@@ -1,6 +1,12 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define DISABLE_BRANCH_PROFILING | 2 | #define DISABLE_BRANCH_PROFILING |
3 | #define pr_fmt(fmt) "kasan: " fmt | 3 | #define pr_fmt(fmt) "kasan: " fmt |
4 | |||
5 | #ifdef CONFIG_X86_5LEVEL | ||
6 | /* Too early to use cpu_feature_enabled() */ | ||
7 | #define pgtable_l5_enabled __pgtable_l5_enabled | ||
8 | #endif | ||
9 | |||
4 | #include <linux/bootmem.h> | 10 | #include <linux/bootmem.h> |
5 | #include <linux/kasan.h> | 11 | #include <linux/kasan.h> |
6 | #include <linux/kdebug.h> | 12 | #include <linux/kdebug.h> |
@@ -19,7 +25,7 @@ | |||
19 | 25 | ||
20 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; | 26 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; |
21 | 27 | ||
22 | static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); | 28 | static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
23 | 29 | ||
24 | static __init void *early_alloc(size_t size, int nid, bool panic) | 30 | static __init void *early_alloc(size_t size, int nid, bool panic) |
25 | { | 31 | { |
@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start, | |||
176 | * With folded p4d, pgd_clear() is nop, use p4d_clear() | 182 | * With folded p4d, pgd_clear() is nop, use p4d_clear() |
177 | * instead. | 183 | * instead. |
178 | */ | 184 | */ |
179 | if (CONFIG_PGTABLE_LEVELS < 5) | 185 | if (pgtable_l5_enabled) |
180 | p4d_clear(p4d_offset(pgd, start)); | ||
181 | else | ||
182 | pgd_clear(pgd); | 186 | pgd_clear(pgd); |
187 | else | ||
188 | p4d_clear(p4d_offset(pgd, start)); | ||
183 | } | 189 | } |
184 | 190 | ||
185 | pgd = pgd_offset_k(start); | 191 | pgd = pgd_offset_k(start); |
@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) | |||
191 | { | 197 | { |
192 | unsigned long p4d; | 198 | unsigned long p4d; |
193 | 199 | ||
194 | if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 200 | if (!pgtable_l5_enabled) |
195 | return (p4d_t *)pgd; | 201 | return (p4d_t *)pgd; |
196 | 202 | ||
197 | p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; | 203 | p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; |
@@ -272,7 +278,7 @@ void __init kasan_early_init(void) | |||
272 | for (i = 0; i < PTRS_PER_PUD; i++) | 278 | for (i = 0; i < PTRS_PER_PUD; i++) |
273 | kasan_zero_pud[i] = __pud(pud_val); | 279 | kasan_zero_pud[i] = __pud(pud_val); |
274 | 280 | ||
275 | for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) | 281 | for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++) |
276 | kasan_zero_p4d[i] = __p4d(p4d_val); | 282 | kasan_zero_p4d[i] = __p4d(p4d_val); |
277 | 283 | ||
278 | kasan_map_early_shadow(early_top_pgt); | 284 | kasan_map_early_shadow(early_top_pgt); |
@@ -303,7 +309,7 @@ void __init kasan_init(void) | |||
303 | * bunch of things like kernel code, modules, EFI mapping, etc. | 309 | * bunch of things like kernel code, modules, EFI mapping, etc. |
304 | * We need to take extra steps to not overwrite them. | 310 | * We need to take extra steps to not overwrite them. |
305 | */ | 311 | */ |
306 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 312 | if (pgtable_l5_enabled) { |
307 | void *ptr; | 313 | void *ptr; |
308 | 314 | ||
309 | ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); | 315 | ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); |
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aedebd2ebf1e..615cc03ced84 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c | |||
@@ -34,23 +34,12 @@ | |||
34 | #define TB_SHIFT 40 | 34 | #define TB_SHIFT 40 |
35 | 35 | ||
36 | /* | 36 | /* |
37 | * Virtual address start and end range for randomization. | ||
38 | * | ||
39 | * The end address could depend on more configuration options to make the | 37 | * The end address could depend on more configuration options to make the |
40 | * highest amount of space for randomization available, but that's too hard | 38 | * highest amount of space for randomization available, but that's too hard |
41 | * to keep straight and caused issues already. | 39 | * to keep straight and caused issues already. |
42 | */ | 40 | */ |
43 | static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; | ||
44 | static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; | 41 | static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; |
45 | 42 | ||
46 | /* Default values */ | ||
47 | unsigned long page_offset_base = __PAGE_OFFSET_BASE; | ||
48 | EXPORT_SYMBOL(page_offset_base); | ||
49 | unsigned long vmalloc_base = __VMALLOC_BASE; | ||
50 | EXPORT_SYMBOL(vmalloc_base); | ||
51 | unsigned long vmemmap_base = __VMEMMAP_BASE; | ||
52 | EXPORT_SYMBOL(vmemmap_base); | ||
53 | |||
54 | /* | 43 | /* |
55 | * Memory regions randomized by KASLR (except modules that use a separate logic | 44 | * Memory regions randomized by KASLR (except modules that use a separate logic |
56 | * earlier during boot). The list is ordered based on virtual addresses. This | 45 | * earlier during boot). The list is ordered based on virtual addresses. This |
@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region { | |||
60 | unsigned long *base; | 49 | unsigned long *base; |
61 | unsigned long size_tb; | 50 | unsigned long size_tb; |
62 | } kaslr_regions[] = { | 51 | } kaslr_regions[] = { |
63 | { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, | 52 | { &page_offset_base, 0 }, |
64 | { &vmalloc_base, VMALLOC_SIZE_TB }, | 53 | { &vmalloc_base, 0 }, |
65 | { &vmemmap_base, 1 }, | 54 | { &vmemmap_base, 1 }, |
66 | }; | 55 | }; |
67 | 56 | ||
@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void) | |||
84 | void __init kernel_randomize_memory(void) | 73 | void __init kernel_randomize_memory(void) |
85 | { | 74 | { |
86 | size_t i; | 75 | size_t i; |
87 | unsigned long vaddr = vaddr_start; | 76 | unsigned long vaddr_start, vaddr; |
88 | unsigned long rand, memory_tb; | 77 | unsigned long rand, memory_tb; |
89 | struct rnd_state rand_state; | 78 | struct rnd_state rand_state; |
90 | unsigned long remain_entropy; | 79 | unsigned long remain_entropy; |
91 | 80 | ||
81 | vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; | ||
82 | vaddr = vaddr_start; | ||
83 | |||
92 | /* | 84 | /* |
93 | * These BUILD_BUG_ON checks ensure the memory layout is consistent | 85 | * These BUILD_BUG_ON checks ensure the memory layout is consistent |
94 | * with the vaddr_start/vaddr_end variables. These checks are very | 86 | * with the vaddr_start/vaddr_end variables. These checks are very |
@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void) | |||
101 | if (!kaslr_memory_enabled()) | 93 | if (!kaslr_memory_enabled()) |
102 | return; | 94 | return; |
103 | 95 | ||
96 | kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); | ||
97 | kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; | ||
98 | |||
104 | /* | 99 | /* |
105 | * Update Physical memory mapping to available and | 100 | * Update Physical memory mapping to available and |
106 | * add padding if needed (especially for memory hotplug support). | 101 | * add padding if needed (especially for memory hotplug support). |
@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void) | |||
129 | */ | 124 | */ |
130 | entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); | 125 | entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); |
131 | prandom_bytes_state(&rand_state, &rand, sizeof(rand)); | 126 | prandom_bytes_state(&rand_state, &rand, sizeof(rand)); |
132 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 127 | if (pgtable_l5_enabled) |
133 | entropy = (rand % (entropy + 1)) & P4D_MASK; | 128 | entropy = (rand % (entropy + 1)) & P4D_MASK; |
134 | else | 129 | else |
135 | entropy = (rand % (entropy + 1)) & PUD_MASK; | 130 | entropy = (rand % (entropy + 1)) & PUD_MASK; |
@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void) | |||
141 | * randomization alignment. | 136 | * randomization alignment. |
142 | */ | 137 | */ |
143 | vaddr += get_padding(&kaslr_regions[i]); | 138 | vaddr += get_padding(&kaslr_regions[i]); |
144 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 139 | if (pgtable_l5_enabled) |
145 | vaddr = round_up(vaddr + 1, P4D_SIZE); | 140 | vaddr = round_up(vaddr + 1, P4D_SIZE); |
146 | else | 141 | else |
147 | vaddr = round_up(vaddr + 1, PUD_SIZE); | 142 | vaddr = round_up(vaddr + 1, PUD_SIZE); |
@@ -217,7 +212,7 @@ void __meminit init_trampoline(void) | |||
217 | return; | 212 | return; |
218 | } | 213 | } |
219 | 214 | ||
220 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | 215 | if (pgtable_l5_enabled) |
221 | init_trampoline_p4d(); | 216 | init_trampoline_p4d(); |
222 | else | 217 | else |
223 | init_trampoline_pud(); | 218 | init_trampoline_pud(); |
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 1a53071e2e17..3a1b5fe4c2ca 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c | |||
@@ -25,17 +25,12 @@ | |||
25 | #include <asm/bootparam.h> | 25 | #include <asm/bootparam.h> |
26 | #include <asm/set_memory.h> | 26 | #include <asm/set_memory.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/sections.h> | ||
29 | #include <asm/processor-flags.h> | 28 | #include <asm/processor-flags.h> |
30 | #include <asm/msr.h> | 29 | #include <asm/msr.h> |
31 | #include <asm/cmdline.h> | 30 | #include <asm/cmdline.h> |
32 | 31 | ||
33 | #include "mm_internal.h" | 32 | #include "mm_internal.h" |
34 | 33 | ||
35 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | ||
36 | static char sme_cmdline_on[] __initdata = "on"; | ||
37 | static char sme_cmdline_off[] __initdata = "off"; | ||
38 | |||
39 | /* | 34 | /* |
40 | * Since SME related variables are set early in the boot process they must | 35 | * Since SME related variables are set early in the boot process they must |
41 | * reside in the .data section so as not to be zeroed out when the .bss | 36 | * reside in the .data section so as not to be zeroed out when the .bss |
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask); | |||
46 | DEFINE_STATIC_KEY_FALSE(sev_enable_key); | 41 | DEFINE_STATIC_KEY_FALSE(sev_enable_key); |
47 | EXPORT_SYMBOL_GPL(sev_enable_key); | 42 | EXPORT_SYMBOL_GPL(sev_enable_key); |
48 | 43 | ||
49 | static bool sev_enabled __section(.data); | 44 | bool sev_enabled __section(.data); |
50 | 45 | ||
51 | /* Buffer used for early in-place encryption by BSP, no locking needed */ | 46 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
52 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); | 47 | static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); |
@@ -463,574 +458,3 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) | |||
463 | /* Make the SWIOTLB buffer area decrypted */ | 458 | /* Make the SWIOTLB buffer area decrypted */ |
464 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); | 459 | set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); |
465 | } | 460 | } |
466 | |||
467 | struct sme_populate_pgd_data { | ||
468 | void *pgtable_area; | ||
469 | pgd_t *pgd; | ||
470 | |||
471 | pmdval_t pmd_flags; | ||
472 | pteval_t pte_flags; | ||
473 | unsigned long paddr; | ||
474 | |||
475 | unsigned long vaddr; | ||
476 | unsigned long vaddr_end; | ||
477 | }; | ||
478 | |||
479 | static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
480 | { | ||
481 | unsigned long pgd_start, pgd_end, pgd_size; | ||
482 | pgd_t *pgd_p; | ||
483 | |||
484 | pgd_start = ppd->vaddr & PGDIR_MASK; | ||
485 | pgd_end = ppd->vaddr_end & PGDIR_MASK; | ||
486 | |||
487 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); | ||
488 | |||
489 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
490 | |||
491 | memset(pgd_p, 0, pgd_size); | ||
492 | } | ||
493 | |||
494 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | ||
495 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | ||
496 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | ||
497 | #define PMD_FLAGS _KERNPG_TABLE_NOENC | ||
498 | |||
499 | #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
500 | |||
501 | #define PMD_FLAGS_DEC PMD_FLAGS_LARGE | ||
502 | #define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
503 | (_PAGE_PAT | _PAGE_PWT)) | ||
504 | |||
505 | #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) | ||
506 | |||
507 | #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) | ||
508 | |||
509 | #define PTE_FLAGS_DEC PTE_FLAGS | ||
510 | #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
511 | (_PAGE_PAT | _PAGE_PWT)) | ||
512 | |||
513 | #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) | ||
514 | |||
515 | static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
516 | { | ||
517 | pgd_t *pgd_p; | ||
518 | p4d_t *p4d_p; | ||
519 | pud_t *pud_p; | ||
520 | pmd_t *pmd_p; | ||
521 | |||
522 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
523 | if (native_pgd_val(*pgd_p)) { | ||
524 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) | ||
525 | p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
526 | else | ||
527 | pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); | ||
528 | } else { | ||
529 | pgd_t pgd; | ||
530 | |||
531 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
532 | p4d_p = ppd->pgtable_area; | ||
533 | memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); | ||
534 | ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; | ||
535 | |||
536 | pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); | ||
537 | } else { | ||
538 | pud_p = ppd->pgtable_area; | ||
539 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
540 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
541 | |||
542 | pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); | ||
543 | } | ||
544 | native_set_pgd(pgd_p, pgd); | ||
545 | } | ||
546 | |||
547 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
548 | p4d_p += p4d_index(ppd->vaddr); | ||
549 | if (native_p4d_val(*p4d_p)) { | ||
550 | pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); | ||
551 | } else { | ||
552 | p4d_t p4d; | ||
553 | |||
554 | pud_p = ppd->pgtable_area; | ||
555 | memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); | ||
556 | ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; | ||
557 | |||
558 | p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); | ||
559 | native_set_p4d(p4d_p, p4d); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | pud_p += pud_index(ppd->vaddr); | ||
564 | if (native_pud_val(*pud_p)) { | ||
565 | if (native_pud_val(*pud_p) & _PAGE_PSE) | ||
566 | return NULL; | ||
567 | |||
568 | pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); | ||
569 | } else { | ||
570 | pud_t pud; | ||
571 | |||
572 | pmd_p = ppd->pgtable_area; | ||
573 | memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); | ||
574 | ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; | ||
575 | |||
576 | pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); | ||
577 | native_set_pud(pud_p, pud); | ||
578 | } | ||
579 | |||
580 | return pmd_p; | ||
581 | } | ||
582 | |||
583 | static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
584 | { | ||
585 | pmd_t *pmd_p; | ||
586 | |||
587 | pmd_p = sme_prepare_pgd(ppd); | ||
588 | if (!pmd_p) | ||
589 | return; | ||
590 | |||
591 | pmd_p += pmd_index(ppd->vaddr); | ||
592 | if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) | ||
593 | native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); | ||
594 | } | ||
595 | |||
596 | static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
597 | { | ||
598 | pmd_t *pmd_p; | ||
599 | pte_t *pte_p; | ||
600 | |||
601 | pmd_p = sme_prepare_pgd(ppd); | ||
602 | if (!pmd_p) | ||
603 | return; | ||
604 | |||
605 | pmd_p += pmd_index(ppd->vaddr); | ||
606 | if (native_pmd_val(*pmd_p)) { | ||
607 | if (native_pmd_val(*pmd_p) & _PAGE_PSE) | ||
608 | return; | ||
609 | |||
610 | pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); | ||
611 | } else { | ||
612 | pmd_t pmd; | ||
613 | |||
614 | pte_p = ppd->pgtable_area; | ||
615 | memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); | ||
616 | ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; | ||
617 | |||
618 | pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); | ||
619 | native_set_pmd(pmd_p, pmd); | ||
620 | } | ||
621 | |||
622 | pte_p += pte_index(ppd->vaddr); | ||
623 | if (!native_pte_val(*pte_p)) | ||
624 | native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); | ||
625 | } | ||
626 | |||
627 | static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
628 | { | ||
629 | while (ppd->vaddr < ppd->vaddr_end) { | ||
630 | sme_populate_pgd_large(ppd); | ||
631 | |||
632 | ppd->vaddr += PMD_PAGE_SIZE; | ||
633 | ppd->paddr += PMD_PAGE_SIZE; | ||
634 | } | ||
635 | } | ||
636 | |||
637 | static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
638 | { | ||
639 | while (ppd->vaddr < ppd->vaddr_end) { | ||
640 | sme_populate_pgd(ppd); | ||
641 | |||
642 | ppd->vaddr += PAGE_SIZE; | ||
643 | ppd->paddr += PAGE_SIZE; | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
648 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
649 | { | ||
650 | unsigned long vaddr_end; | ||
651 | |||
652 | ppd->pmd_flags = pmd_flags; | ||
653 | ppd->pte_flags = pte_flags; | ||
654 | |||
655 | /* Save original end value since we modify the struct value */ | ||
656 | vaddr_end = ppd->vaddr_end; | ||
657 | |||
658 | /* If start is not 2MB aligned, create PTE entries */ | ||
659 | ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); | ||
660 | __sme_map_range_pte(ppd); | ||
661 | |||
662 | /* Create PMD entries */ | ||
663 | ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; | ||
664 | __sme_map_range_pmd(ppd); | ||
665 | |||
666 | /* If end is not 2MB aligned, create PTE entries */ | ||
667 | ppd->vaddr_end = vaddr_end; | ||
668 | __sme_map_range_pte(ppd); | ||
669 | } | ||
670 | |||
671 | static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
672 | { | ||
673 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
674 | } | ||
675 | |||
676 | static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
677 | { | ||
678 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
679 | } | ||
680 | |||
681 | static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
682 | { | ||
683 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
684 | } | ||
685 | |||
686 | static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
687 | { | ||
688 | unsigned long p4d_size, pud_size, pmd_size, pte_size; | ||
689 | unsigned long total; | ||
690 | |||
691 | /* | ||
692 | * Perform a relatively simplistic calculation of the pagetable | ||
693 | * entries that are needed. Those mappings will be covered mostly | ||
694 | * by 2MB PMD entries so we can conservatively calculate the required | ||
695 | * number of P4D, PUD and PMD structures needed to perform the | ||
696 | * mappings. For mappings that are not 2MB aligned, PTE mappings | ||
697 | * would be needed for the start and end portion of the address range | ||
698 | * that fall outside of the 2MB alignment. This results in, at most, | ||
699 | * two extra pages to hold PTE entries for each range that is mapped. | ||
700 | * Incrementing the count for each covers the case where the addresses | ||
701 | * cross entries. | ||
702 | */ | ||
703 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
704 | p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
705 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
706 | pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; | ||
707 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
708 | } else { | ||
709 | p4d_size = 0; | ||
710 | pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; | ||
711 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
712 | } | ||
713 | pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; | ||
714 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
715 | pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; | ||
716 | |||
717 | total = p4d_size + pud_size + pmd_size + pte_size; | ||
718 | |||
719 | /* | ||
720 | * Now calculate the added pagetable structures needed to populate | ||
721 | * the new pagetables. | ||
722 | */ | ||
723 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | ||
724 | p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
725 | p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; | ||
726 | pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; | ||
727 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
728 | } else { | ||
729 | p4d_size = 0; | ||
730 | pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; | ||
731 | pud_size *= sizeof(pud_t) * PTRS_PER_PUD; | ||
732 | } | ||
733 | pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; | ||
734 | pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; | ||
735 | |||
736 | total += p4d_size + pud_size + pmd_size; | ||
737 | |||
738 | return total; | ||
739 | } | ||
740 | |||
741 | void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) | ||
742 | { | ||
743 | unsigned long workarea_start, workarea_end, workarea_len; | ||
744 | unsigned long execute_start, execute_end, execute_len; | ||
745 | unsigned long kernel_start, kernel_end, kernel_len; | ||
746 | unsigned long initrd_start, initrd_end, initrd_len; | ||
747 | struct sme_populate_pgd_data ppd; | ||
748 | unsigned long pgtable_area_len; | ||
749 | unsigned long decrypted_base; | ||
750 | |||
751 | if (!sme_active()) | ||
752 | return; | ||
753 | |||
754 | /* | ||
755 | * Prepare for encrypting the kernel and initrd by building new | ||
756 | * pagetables with the necessary attributes needed to encrypt the | ||
757 | * kernel in place. | ||
758 | * | ||
759 | * One range of virtual addresses will map the memory occupied | ||
760 | * by the kernel and initrd as encrypted. | ||
761 | * | ||
762 | * Another range of virtual addresses will map the memory occupied | ||
763 | * by the kernel and initrd as decrypted and write-protected. | ||
764 | * | ||
765 | * The use of write-protect attribute will prevent any of the | ||
766 | * memory from being cached. | ||
767 | */ | ||
768 | |||
769 | /* Physical addresses gives us the identity mapped virtual addresses */ | ||
770 | kernel_start = __pa_symbol(_text); | ||
771 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | ||
772 | kernel_len = kernel_end - kernel_start; | ||
773 | |||
774 | initrd_start = 0; | ||
775 | initrd_end = 0; | ||
776 | initrd_len = 0; | ||
777 | #ifdef CONFIG_BLK_DEV_INITRD | ||
778 | initrd_len = (unsigned long)bp->hdr.ramdisk_size | | ||
779 | ((unsigned long)bp->ext_ramdisk_size << 32); | ||
780 | if (initrd_len) { | ||
781 | initrd_start = (unsigned long)bp->hdr.ramdisk_image | | ||
782 | ((unsigned long)bp->ext_ramdisk_image << 32); | ||
783 | initrd_end = PAGE_ALIGN(initrd_start + initrd_len); | ||
784 | initrd_len = initrd_end - initrd_start; | ||
785 | } | ||
786 | #endif | ||
787 | |||
788 | /* Set the encryption workarea to be immediately after the kernel */ | ||
789 | workarea_start = kernel_end; | ||
790 | |||
791 | /* | ||
792 | * Calculate required number of workarea bytes needed: | ||
793 | * executable encryption area size: | ||
794 | * stack page (PAGE_SIZE) | ||
795 | * encryption routine page (PAGE_SIZE) | ||
796 | * intermediate copy buffer (PMD_PAGE_SIZE) | ||
797 | * pagetable structures for the encryption of the kernel | ||
798 | * pagetable structures for workarea (in case not currently mapped) | ||
799 | */ | ||
800 | execute_start = workarea_start; | ||
801 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | ||
802 | execute_len = execute_end - execute_start; | ||
803 | |||
804 | /* | ||
805 | * One PGD for both encrypted and decrypted mappings and a set of | ||
806 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | ||
807 | */ | ||
808 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | ||
809 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | ||
810 | if (initrd_len) | ||
811 | pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; | ||
812 | |||
813 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | ||
814 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | ||
815 | |||
816 | /* | ||
817 | * The total workarea includes the executable encryption area and | ||
818 | * the pagetable area. The start of the workarea is already 2MB | ||
819 | * aligned, align the end of the workarea on a 2MB boundary so that | ||
820 | * we don't try to create/allocate PTE entries from the workarea | ||
821 | * before it is mapped. | ||
822 | */ | ||
823 | workarea_len = execute_len + pgtable_area_len; | ||
824 | workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); | ||
825 | |||
826 | /* | ||
827 | * Set the address to the start of where newly created pagetable | ||
828 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | ||
829 | * structures are created when the workarea is added to the current | ||
830 | * pagetables and when the new encrypted and decrypted kernel | ||
831 | * mappings are populated. | ||
832 | */ | ||
833 | ppd.pgtable_area = (void *)execute_end; | ||
834 | |||
835 | /* | ||
836 | * Make sure the current pagetable structure has entries for | ||
837 | * addressing the workarea. | ||
838 | */ | ||
839 | ppd.pgd = (pgd_t *)native_read_cr3_pa(); | ||
840 | ppd.paddr = workarea_start; | ||
841 | ppd.vaddr = workarea_start; | ||
842 | ppd.vaddr_end = workarea_end; | ||
843 | sme_map_range_decrypted(&ppd); | ||
844 | |||
845 | /* Flush the TLB - no globals so cr3 is enough */ | ||
846 | native_write_cr3(__native_read_cr3()); | ||
847 | |||
848 | /* | ||
849 | * A new pagetable structure is being built to allow for the kernel | ||
850 | * and initrd to be encrypted. It starts with an empty PGD that will | ||
851 | * then be populated with new PUDs and PMDs as the encrypted and | ||
852 | * decrypted kernel mappings are created. | ||
853 | */ | ||
854 | ppd.pgd = ppd.pgtable_area; | ||
855 | memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); | ||
856 | ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; | ||
857 | |||
858 | /* | ||
859 | * A different PGD index/entry must be used to get different | ||
860 | * pagetable entries for the decrypted mapping. Choose the next | ||
861 | * PGD index and convert it to a virtual address to be used as | ||
862 | * the base of the mapping. | ||
863 | */ | ||
864 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | ||
865 | if (initrd_len) { | ||
866 | unsigned long check_base; | ||
867 | |||
868 | check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); | ||
869 | decrypted_base = max(decrypted_base, check_base); | ||
870 | } | ||
871 | decrypted_base <<= PGDIR_SHIFT; | ||
872 | |||
873 | /* Add encrypted kernel (identity) mappings */ | ||
874 | ppd.paddr = kernel_start; | ||
875 | ppd.vaddr = kernel_start; | ||
876 | ppd.vaddr_end = kernel_end; | ||
877 | sme_map_range_encrypted(&ppd); | ||
878 | |||
879 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | ||
880 | ppd.paddr = kernel_start; | ||
881 | ppd.vaddr = kernel_start + decrypted_base; | ||
882 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
883 | sme_map_range_decrypted_wp(&ppd); | ||
884 | |||
885 | if (initrd_len) { | ||
886 | /* Add encrypted initrd (identity) mappings */ | ||
887 | ppd.paddr = initrd_start; | ||
888 | ppd.vaddr = initrd_start; | ||
889 | ppd.vaddr_end = initrd_end; | ||
890 | sme_map_range_encrypted(&ppd); | ||
891 | /* | ||
892 | * Add decrypted, write-protected initrd (non-identity) mappings | ||
893 | */ | ||
894 | ppd.paddr = initrd_start; | ||
895 | ppd.vaddr = initrd_start + decrypted_base; | ||
896 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
897 | sme_map_range_decrypted_wp(&ppd); | ||
898 | } | ||
899 | |||
900 | /* Add decrypted workarea mappings to both kernel mappings */ | ||
901 | ppd.paddr = workarea_start; | ||
902 | ppd.vaddr = workarea_start; | ||
903 | ppd.vaddr_end = workarea_end; | ||
904 | sme_map_range_decrypted(&ppd); | ||
905 | |||
906 | ppd.paddr = workarea_start; | ||
907 | ppd.vaddr = workarea_start + decrypted_base; | ||
908 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
909 | sme_map_range_decrypted(&ppd); | ||
910 | |||
911 | /* Perform the encryption */ | ||
912 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | ||
913 | kernel_len, workarea_start, (unsigned long)ppd.pgd); | ||
914 | |||
915 | if (initrd_len) | ||
916 | sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, | ||
917 | initrd_len, workarea_start, | ||
918 | (unsigned long)ppd.pgd); | ||
919 | |||
920 | /* | ||
921 | * At this point we are running encrypted. Remove the mappings for | ||
922 | * the decrypted areas - all that is needed for this is to remove | ||
923 | * the PGD entry/entries. | ||
924 | */ | ||
925 | ppd.vaddr = kernel_start + decrypted_base; | ||
926 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
927 | sme_clear_pgd(&ppd); | ||
928 | |||
929 | if (initrd_len) { | ||
930 | ppd.vaddr = initrd_start + decrypted_base; | ||
931 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
932 | sme_clear_pgd(&ppd); | ||
933 | } | ||
934 | |||
935 | ppd.vaddr = workarea_start + decrypted_base; | ||
936 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
937 | sme_clear_pgd(&ppd); | ||
938 | |||
939 | /* Flush the TLB - no globals so cr3 is enough */ | ||
940 | native_write_cr3(__native_read_cr3()); | ||
941 | } | ||
942 | |||
943 | void __init __nostackprotector sme_enable(struct boot_params *bp) | ||
944 | { | ||
945 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; | ||
946 | unsigned int eax, ebx, ecx, edx; | ||
947 | unsigned long feature_mask; | ||
948 | bool active_by_default; | ||
949 | unsigned long me_mask; | ||
950 | char buffer[16]; | ||
951 | u64 msr; | ||
952 | |||
953 | /* Check for the SME/SEV support leaf */ | ||
954 | eax = 0x80000000; | ||
955 | ecx = 0; | ||
956 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
957 | if (eax < 0x8000001f) | ||
958 | return; | ||
959 | |||
960 | #define AMD_SME_BIT BIT(0) | ||
961 | #define AMD_SEV_BIT BIT(1) | ||
962 | /* | ||
963 | * Set the feature mask (SME or SEV) based on whether we are | ||
964 | * running under a hypervisor. | ||
965 | */ | ||
966 | eax = 1; | ||
967 | ecx = 0; | ||
968 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
969 | feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
970 | |||
971 | /* | ||
972 | * Check for the SME/SEV feature: | ||
973 | * CPUID Fn8000_001F[EAX] | ||
974 | * - Bit 0 - Secure Memory Encryption support | ||
975 | * - Bit 1 - Secure Encrypted Virtualization support | ||
976 | * CPUID Fn8000_001F[EBX] | ||
977 | * - Bits 5:0 - Pagetable bit position used to indicate encryption | ||
978 | */ | ||
979 | eax = 0x8000001f; | ||
980 | ecx = 0; | ||
981 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
982 | if (!(eax & feature_mask)) | ||
983 | return; | ||
984 | |||
985 | me_mask = 1UL << (ebx & 0x3f); | ||
986 | |||
987 | /* Check if memory encryption is enabled */ | ||
988 | if (feature_mask == AMD_SME_BIT) { | ||
989 | /* For SME, check the SYSCFG MSR */ | ||
990 | msr = __rdmsr(MSR_K8_SYSCFG); | ||
991 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | ||
992 | return; | ||
993 | } else { | ||
994 | /* For SEV, check the SEV MSR */ | ||
995 | msr = __rdmsr(MSR_AMD64_SEV); | ||
996 | if (!(msr & MSR_AMD64_SEV_ENABLED)) | ||
997 | return; | ||
998 | |||
999 | /* SEV state cannot be controlled by a command line option */ | ||
1000 | sme_me_mask = me_mask; | ||
1001 | sev_enabled = true; | ||
1002 | return; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * Fixups have not been applied to phys_base yet and we're running | ||
1007 | * identity mapped, so we must obtain the address to the SME command | ||
1008 | * line argument data using rip-relative addressing. | ||
1009 | */ | ||
1010 | asm ("lea sme_cmdline_arg(%%rip), %0" | ||
1011 | : "=r" (cmdline_arg) | ||
1012 | : "p" (sme_cmdline_arg)); | ||
1013 | asm ("lea sme_cmdline_on(%%rip), %0" | ||
1014 | : "=r" (cmdline_on) | ||
1015 | : "p" (sme_cmdline_on)); | ||
1016 | asm ("lea sme_cmdline_off(%%rip), %0" | ||
1017 | : "=r" (cmdline_off) | ||
1018 | : "p" (sme_cmdline_off)); | ||
1019 | |||
1020 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | ||
1021 | active_by_default = true; | ||
1022 | else | ||
1023 | active_by_default = false; | ||
1024 | |||
1025 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | ||
1026 | ((u64)bp->ext_cmd_line_ptr << 32)); | ||
1027 | |||
1028 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | ||
1029 | |||
1030 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | ||
1031 | sme_me_mask = me_mask; | ||
1032 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | ||
1033 | sme_me_mask = 0; | ||
1034 | else | ||
1035 | sme_me_mask = active_by_default ? me_mask : 0; | ||
1036 | } | ||
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c new file mode 100644 index 000000000000..1b2197d13832 --- /dev/null +++ b/arch/x86/mm/mem_encrypt_identity.c | |||
@@ -0,0 +1,564 @@ | |||
1 | /* | ||
2 | * AMD Memory Encryption Support | ||
3 | * | ||
4 | * Copyright (C) 2016 Advanced Micro Devices, Inc. | ||
5 | * | ||
6 | * Author: Tom Lendacky <thomas.lendacky@amd.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #define DISABLE_BRANCH_PROFILING | ||
14 | |||
15 | /* | ||
16 | * Since we're dealing with identity mappings, physical and virtual | ||
17 | * addresses are the same, so override these defines which are ultimately | ||
18 | * used by the headers in misc.h. | ||
19 | */ | ||
20 | #define __pa(x) ((unsigned long)(x)) | ||
21 | #define __va(x) ((void *)((unsigned long)(x))) | ||
22 | |||
23 | /* | ||
24 | * Special hack: we have to be careful, because no indirections are | ||
25 | * allowed here, and paravirt_ops is a kind of one. As it will only run in | ||
26 | * baremetal anyway, we just keep it from happening. (This list needs to | ||
27 | * be extended when new paravirt and debugging variants are added.) | ||
28 | */ | ||
29 | #undef CONFIG_PARAVIRT | ||
30 | #undef CONFIG_PARAVIRT_SPINLOCKS | ||
31 | |||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/mm.h> | ||
34 | #include <linux/mem_encrypt.h> | ||
35 | |||
36 | #include <asm/setup.h> | ||
37 | #include <asm/sections.h> | ||
38 | #include <asm/cmdline.h> | ||
39 | |||
40 | #include "mm_internal.h" | ||
41 | |||
42 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | ||
43 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | ||
44 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | ||
45 | #define PMD_FLAGS _KERNPG_TABLE_NOENC | ||
46 | |||
47 | #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) | ||
48 | |||
49 | #define PMD_FLAGS_DEC PMD_FLAGS_LARGE | ||
50 | #define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
51 | (_PAGE_PAT | _PAGE_PWT)) | ||
52 | |||
53 | #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) | ||
54 | |||
55 | #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) | ||
56 | |||
57 | #define PTE_FLAGS_DEC PTE_FLAGS | ||
58 | #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ | ||
59 | (_PAGE_PAT | _PAGE_PWT)) | ||
60 | |||
61 | #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) | ||
62 | |||
63 | struct sme_populate_pgd_data { | ||
64 | void *pgtable_area; | ||
65 | pgd_t *pgd; | ||
66 | |||
67 | pmdval_t pmd_flags; | ||
68 | pteval_t pte_flags; | ||
69 | unsigned long paddr; | ||
70 | |||
71 | unsigned long vaddr; | ||
72 | unsigned long vaddr_end; | ||
73 | }; | ||
74 | |||
75 | static char sme_cmdline_arg[] __initdata = "mem_encrypt"; | ||
76 | static char sme_cmdline_on[] __initdata = "on"; | ||
77 | static char sme_cmdline_off[] __initdata = "off"; | ||
78 | |||
79 | static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
80 | { | ||
81 | unsigned long pgd_start, pgd_end, pgd_size; | ||
82 | pgd_t *pgd_p; | ||
83 | |||
84 | pgd_start = ppd->vaddr & PGDIR_MASK; | ||
85 | pgd_end = ppd->vaddr_end & PGDIR_MASK; | ||
86 | |||
87 | pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); | ||
88 | |||
89 | pgd_p = ppd->pgd + pgd_index(ppd->vaddr); | ||
90 | |||
91 | memset(pgd_p, 0, pgd_size); | ||
92 | } | ||
93 | |||
94 | static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
95 | { | ||
96 | pgd_t *pgd; | ||
97 | p4d_t *p4d; | ||
98 | pud_t *pud; | ||
99 | pmd_t *pmd; | ||
100 | |||
101 | pgd = ppd->pgd + pgd_index(ppd->vaddr); | ||
102 | if (pgd_none(*pgd)) { | ||
103 | p4d = ppd->pgtable_area; | ||
104 | memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D); | ||
105 | ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D; | ||
106 | set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d))); | ||
107 | } | ||
108 | |||
109 | p4d = p4d_offset(pgd, ppd->vaddr); | ||
110 | if (p4d_none(*p4d)) { | ||
111 | pud = ppd->pgtable_area; | ||
112 | memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD); | ||
113 | ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD; | ||
114 | set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud))); | ||
115 | } | ||
116 | |||
117 | pud = pud_offset(p4d, ppd->vaddr); | ||
118 | if (pud_none(*pud)) { | ||
119 | pmd = ppd->pgtable_area; | ||
120 | memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD); | ||
121 | ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD; | ||
122 | set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); | ||
123 | } | ||
124 | |||
125 | if (pud_large(*pud)) | ||
126 | return NULL; | ||
127 | |||
128 | return pud; | ||
129 | } | ||
130 | |||
131 | static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
132 | { | ||
133 | pud_t *pud; | ||
134 | pmd_t *pmd; | ||
135 | |||
136 | pud = sme_prepare_pgd(ppd); | ||
137 | if (!pud) | ||
138 | return; | ||
139 | |||
140 | pmd = pmd_offset(pud, ppd->vaddr); | ||
141 | if (pmd_large(*pmd)) | ||
142 | return; | ||
143 | |||
144 | set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); | ||
145 | } | ||
146 | |||
147 | static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
148 | { | ||
149 | pud_t *pud; | ||
150 | pmd_t *pmd; | ||
151 | pte_t *pte; | ||
152 | |||
153 | pud = sme_prepare_pgd(ppd); | ||
154 | if (!pud) | ||
155 | return; | ||
156 | |||
157 | pmd = pmd_offset(pud, ppd->vaddr); | ||
158 | if (pmd_none(*pmd)) { | ||
159 | pte = ppd->pgtable_area; | ||
160 | memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); | ||
161 | ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; | ||
162 | set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); | ||
163 | } | ||
164 | |||
165 | if (pmd_large(*pmd)) | ||
166 | return; | ||
167 | |||
168 | pte = pte_offset_map(pmd, ppd->vaddr); | ||
169 | if (pte_none(*pte)) | ||
170 | set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); | ||
171 | } | ||
172 | |||
173 | static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
174 | { | ||
175 | while (ppd->vaddr < ppd->vaddr_end) { | ||
176 | sme_populate_pgd_large(ppd); | ||
177 | |||
178 | ppd->vaddr += PMD_PAGE_SIZE; | ||
179 | ppd->paddr += PMD_PAGE_SIZE; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
184 | { | ||
185 | while (ppd->vaddr < ppd->vaddr_end) { | ||
186 | sme_populate_pgd(ppd); | ||
187 | |||
188 | ppd->vaddr += PAGE_SIZE; | ||
189 | ppd->paddr += PAGE_SIZE; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
194 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
195 | { | ||
196 | unsigned long vaddr_end; | ||
197 | |||
198 | ppd->pmd_flags = pmd_flags; | ||
199 | ppd->pte_flags = pte_flags; | ||
200 | |||
201 | /* Save original end value since we modify the struct value */ | ||
202 | vaddr_end = ppd->vaddr_end; | ||
203 | |||
204 | /* If start is not 2MB aligned, create PTE entries */ | ||
205 | ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); | ||
206 | __sme_map_range_pte(ppd); | ||
207 | |||
208 | /* Create PMD entries */ | ||
209 | ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; | ||
210 | __sme_map_range_pmd(ppd); | ||
211 | |||
212 | /* If end is not 2MB aligned, create PTE entries */ | ||
213 | ppd->vaddr_end = vaddr_end; | ||
214 | __sme_map_range_pte(ppd); | ||
215 | } | ||
216 | |||
217 | static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
218 | { | ||
219 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
220 | } | ||
221 | |||
222 | static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
223 | { | ||
224 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
225 | } | ||
226 | |||
227 | static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
228 | { | ||
229 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
230 | } | ||
231 | |||
232 | static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
233 | { | ||
234 | unsigned long entries = 0, tables = 0; | ||
235 | |||
236 | /* | ||
237 | * Perform a relatively simplistic calculation of the pagetable | ||
238 | * entries that are needed. Those mappings will be covered mostly | ||
239 | * by 2MB PMD entries so we can conservatively calculate the required | ||
240 | * number of P4D, PUD and PMD structures needed to perform the | ||
241 | * mappings. For mappings that are not 2MB aligned, PTE mappings | ||
242 | * would be needed for the start and end portion of the address range | ||
243 | * that fall outside of the 2MB alignment. This results in, at most, | ||
244 | * two extra pages to hold PTE entries for each range that is mapped. | ||
245 | * Incrementing the count for each covers the case where the addresses | ||
246 | * cross entries. | ||
247 | */ | ||
248 | |||
249 | /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */ | ||
250 | if (PTRS_PER_P4D > 1) | ||
251 | entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D; | ||
252 | entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD; | ||
253 | entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD; | ||
254 | entries += 2 * sizeof(pte_t) * PTRS_PER_PTE; | ||
255 | |||
256 | /* | ||
257 | * Now calculate the added pagetable structures needed to populate | ||
258 | * the new pagetables. | ||
259 | */ | ||
260 | |||
261 | if (PTRS_PER_P4D > 1) | ||
262 | tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D; | ||
263 | tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD; | ||
264 | tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD; | ||
265 | |||
266 | return entries + tables; | ||
267 | } | ||
268 | |||
269 | void __init sme_encrypt_kernel(struct boot_params *bp) | ||
270 | { | ||
271 | unsigned long workarea_start, workarea_end, workarea_len; | ||
272 | unsigned long execute_start, execute_end, execute_len; | ||
273 | unsigned long kernel_start, kernel_end, kernel_len; | ||
274 | unsigned long initrd_start, initrd_end, initrd_len; | ||
275 | struct sme_populate_pgd_data ppd; | ||
276 | unsigned long pgtable_area_len; | ||
277 | unsigned long decrypted_base; | ||
278 | |||
279 | if (!sme_active()) | ||
280 | return; | ||
281 | |||
282 | /* | ||
283 | * Prepare for encrypting the kernel and initrd by building new | ||
284 | * pagetables with the necessary attributes needed to encrypt the | ||
285 | * kernel in place. | ||
286 | * | ||
287 | * One range of virtual addresses will map the memory occupied | ||
288 | * by the kernel and initrd as encrypted. | ||
289 | * | ||
290 | * Another range of virtual addresses will map the memory occupied | ||
291 | * by the kernel and initrd as decrypted and write-protected. | ||
292 | * | ||
293 | * The use of write-protect attribute will prevent any of the | ||
294 | * memory from being cached. | ||
295 | */ | ||
296 | |||
297 | /* Physical addresses gives us the identity mapped virtual addresses */ | ||
298 | kernel_start = __pa_symbol(_text); | ||
299 | kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); | ||
300 | kernel_len = kernel_end - kernel_start; | ||
301 | |||
302 | initrd_start = 0; | ||
303 | initrd_end = 0; | ||
304 | initrd_len = 0; | ||
305 | #ifdef CONFIG_BLK_DEV_INITRD | ||
306 | initrd_len = (unsigned long)bp->hdr.ramdisk_size | | ||
307 | ((unsigned long)bp->ext_ramdisk_size << 32); | ||
308 | if (initrd_len) { | ||
309 | initrd_start = (unsigned long)bp->hdr.ramdisk_image | | ||
310 | ((unsigned long)bp->ext_ramdisk_image << 32); | ||
311 | initrd_end = PAGE_ALIGN(initrd_start + initrd_len); | ||
312 | initrd_len = initrd_end - initrd_start; | ||
313 | } | ||
314 | #endif | ||
315 | |||
316 | /* Set the encryption workarea to be immediately after the kernel */ | ||
317 | workarea_start = kernel_end; | ||
318 | |||
319 | /* | ||
320 | * Calculate required number of workarea bytes needed: | ||
321 | * executable encryption area size: | ||
322 | * stack page (PAGE_SIZE) | ||
323 | * encryption routine page (PAGE_SIZE) | ||
324 | * intermediate copy buffer (PMD_PAGE_SIZE) | ||
325 | * pagetable structures for the encryption of the kernel | ||
326 | * pagetable structures for workarea (in case not currently mapped) | ||
327 | */ | ||
328 | execute_start = workarea_start; | ||
329 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; | ||
330 | execute_len = execute_end - execute_start; | ||
331 | |||
332 | /* | ||
333 | * One PGD for both encrypted and decrypted mappings and a set of | ||
334 | * PUDs and PMDs for each of the encrypted and decrypted mappings. | ||
335 | */ | ||
336 | pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; | ||
337 | pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; | ||
338 | if (initrd_len) | ||
339 | pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; | ||
340 | |||
341 | /* PUDs and PMDs needed in the current pagetables for the workarea */ | ||
342 | pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); | ||
343 | |||
344 | /* | ||
345 | * The total workarea includes the executable encryption area and | ||
346 | * the pagetable area. The start of the workarea is already 2MB | ||
347 | * aligned, align the end of the workarea on a 2MB boundary so that | ||
348 | * we don't try to create/allocate PTE entries from the workarea | ||
349 | * before it is mapped. | ||
350 | */ | ||
351 | workarea_len = execute_len + pgtable_area_len; | ||
352 | workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); | ||
353 | |||
354 | /* | ||
355 | * Set the address to the start of where newly created pagetable | ||
356 | * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable | ||
357 | * structures are created when the workarea is added to the current | ||
358 | * pagetables and when the new encrypted and decrypted kernel | ||
359 | * mappings are populated. | ||
360 | */ | ||
361 | ppd.pgtable_area = (void *)execute_end; | ||
362 | |||
363 | /* | ||
364 | * Make sure the current pagetable structure has entries for | ||
365 | * addressing the workarea. | ||
366 | */ | ||
367 | ppd.pgd = (pgd_t *)native_read_cr3_pa(); | ||
368 | ppd.paddr = workarea_start; | ||
369 | ppd.vaddr = workarea_start; | ||
370 | ppd.vaddr_end = workarea_end; | ||
371 | sme_map_range_decrypted(&ppd); | ||
372 | |||
373 | /* Flush the TLB - no globals so cr3 is enough */ | ||
374 | native_write_cr3(__native_read_cr3()); | ||
375 | |||
376 | /* | ||
377 | * A new pagetable structure is being built to allow for the kernel | ||
378 | * and initrd to be encrypted. It starts with an empty PGD that will | ||
379 | * then be populated with new PUDs and PMDs as the encrypted and | ||
380 | * decrypted kernel mappings are created. | ||
381 | */ | ||
382 | ppd.pgd = ppd.pgtable_area; | ||
383 | memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); | ||
384 | ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; | ||
385 | |||
386 | /* | ||
387 | * A different PGD index/entry must be used to get different | ||
388 | * pagetable entries for the decrypted mapping. Choose the next | ||
389 | * PGD index and convert it to a virtual address to be used as | ||
390 | * the base of the mapping. | ||
391 | */ | ||
392 | decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); | ||
393 | if (initrd_len) { | ||
394 | unsigned long check_base; | ||
395 | |||
396 | check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); | ||
397 | decrypted_base = max(decrypted_base, check_base); | ||
398 | } | ||
399 | decrypted_base <<= PGDIR_SHIFT; | ||
400 | |||
401 | /* Add encrypted kernel (identity) mappings */ | ||
402 | ppd.paddr = kernel_start; | ||
403 | ppd.vaddr = kernel_start; | ||
404 | ppd.vaddr_end = kernel_end; | ||
405 | sme_map_range_encrypted(&ppd); | ||
406 | |||
407 | /* Add decrypted, write-protected kernel (non-identity) mappings */ | ||
408 | ppd.paddr = kernel_start; | ||
409 | ppd.vaddr = kernel_start + decrypted_base; | ||
410 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
411 | sme_map_range_decrypted_wp(&ppd); | ||
412 | |||
413 | if (initrd_len) { | ||
414 | /* Add encrypted initrd (identity) mappings */ | ||
415 | ppd.paddr = initrd_start; | ||
416 | ppd.vaddr = initrd_start; | ||
417 | ppd.vaddr_end = initrd_end; | ||
418 | sme_map_range_encrypted(&ppd); | ||
419 | /* | ||
420 | * Add decrypted, write-protected initrd (non-identity) mappings | ||
421 | */ | ||
422 | ppd.paddr = initrd_start; | ||
423 | ppd.vaddr = initrd_start + decrypted_base; | ||
424 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
425 | sme_map_range_decrypted_wp(&ppd); | ||
426 | } | ||
427 | |||
428 | /* Add decrypted workarea mappings to both kernel mappings */ | ||
429 | ppd.paddr = workarea_start; | ||
430 | ppd.vaddr = workarea_start; | ||
431 | ppd.vaddr_end = workarea_end; | ||
432 | sme_map_range_decrypted(&ppd); | ||
433 | |||
434 | ppd.paddr = workarea_start; | ||
435 | ppd.vaddr = workarea_start + decrypted_base; | ||
436 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
437 | sme_map_range_decrypted(&ppd); | ||
438 | |||
439 | /* Perform the encryption */ | ||
440 | sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, | ||
441 | kernel_len, workarea_start, (unsigned long)ppd.pgd); | ||
442 | |||
443 | if (initrd_len) | ||
444 | sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, | ||
445 | initrd_len, workarea_start, | ||
446 | (unsigned long)ppd.pgd); | ||
447 | |||
448 | /* | ||
449 | * At this point we are running encrypted. Remove the mappings for | ||
450 | * the decrypted areas - all that is needed for this is to remove | ||
451 | * the PGD entry/entries. | ||
452 | */ | ||
453 | ppd.vaddr = kernel_start + decrypted_base; | ||
454 | ppd.vaddr_end = kernel_end + decrypted_base; | ||
455 | sme_clear_pgd(&ppd); | ||
456 | |||
457 | if (initrd_len) { | ||
458 | ppd.vaddr = initrd_start + decrypted_base; | ||
459 | ppd.vaddr_end = initrd_end + decrypted_base; | ||
460 | sme_clear_pgd(&ppd); | ||
461 | } | ||
462 | |||
463 | ppd.vaddr = workarea_start + decrypted_base; | ||
464 | ppd.vaddr_end = workarea_end + decrypted_base; | ||
465 | sme_clear_pgd(&ppd); | ||
466 | |||
467 | /* Flush the TLB - no globals so cr3 is enough */ | ||
468 | native_write_cr3(__native_read_cr3()); | ||
469 | } | ||
470 | |||
471 | void __init sme_enable(struct boot_params *bp) | ||
472 | { | ||
473 | const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; | ||
474 | unsigned int eax, ebx, ecx, edx; | ||
475 | unsigned long feature_mask; | ||
476 | bool active_by_default; | ||
477 | unsigned long me_mask; | ||
478 | char buffer[16]; | ||
479 | u64 msr; | ||
480 | |||
481 | /* Check for the SME/SEV support leaf */ | ||
482 | eax = 0x80000000; | ||
483 | ecx = 0; | ||
484 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
485 | if (eax < 0x8000001f) | ||
486 | return; | ||
487 | |||
488 | #define AMD_SME_BIT BIT(0) | ||
489 | #define AMD_SEV_BIT BIT(1) | ||
490 | /* | ||
491 | * Set the feature mask (SME or SEV) based on whether we are | ||
492 | * running under a hypervisor. | ||
493 | */ | ||
494 | eax = 1; | ||
495 | ecx = 0; | ||
496 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
497 | feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
498 | |||
499 | /* | ||
500 | * Check for the SME/SEV feature: | ||
501 | * CPUID Fn8000_001F[EAX] | ||
502 | * - Bit 0 - Secure Memory Encryption support | ||
503 | * - Bit 1 - Secure Encrypted Virtualization support | ||
504 | * CPUID Fn8000_001F[EBX] | ||
505 | * - Bits 5:0 - Pagetable bit position used to indicate encryption | ||
506 | */ | ||
507 | eax = 0x8000001f; | ||
508 | ecx = 0; | ||
509 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
510 | if (!(eax & feature_mask)) | ||
511 | return; | ||
512 | |||
513 | me_mask = 1UL << (ebx & 0x3f); | ||
514 | |||
515 | /* Check if memory encryption is enabled */ | ||
516 | if (feature_mask == AMD_SME_BIT) { | ||
517 | /* For SME, check the SYSCFG MSR */ | ||
518 | msr = __rdmsr(MSR_K8_SYSCFG); | ||
519 | if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) | ||
520 | return; | ||
521 | } else { | ||
522 | /* For SEV, check the SEV MSR */ | ||
523 | msr = __rdmsr(MSR_AMD64_SEV); | ||
524 | if (!(msr & MSR_AMD64_SEV_ENABLED)) | ||
525 | return; | ||
526 | |||
527 | /* SEV state cannot be controlled by a command line option */ | ||
528 | sme_me_mask = me_mask; | ||
529 | sev_enabled = true; | ||
530 | return; | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Fixups have not been applied to phys_base yet and we're running | ||
535 | * identity mapped, so we must obtain the address to the SME command | ||
536 | * line argument data using rip-relative addressing. | ||
537 | */ | ||
538 | asm ("lea sme_cmdline_arg(%%rip), %0" | ||
539 | : "=r" (cmdline_arg) | ||
540 | : "p" (sme_cmdline_arg)); | ||
541 | asm ("lea sme_cmdline_on(%%rip), %0" | ||
542 | : "=r" (cmdline_on) | ||
543 | : "p" (sme_cmdline_on)); | ||
544 | asm ("lea sme_cmdline_off(%%rip), %0" | ||
545 | : "=r" (cmdline_off) | ||
546 | : "p" (sme_cmdline_off)); | ||
547 | |||
548 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) | ||
549 | active_by_default = true; | ||
550 | else | ||
551 | active_by_default = false; | ||
552 | |||
553 | cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | | ||
554 | ((u64)bp->ext_cmd_line_ptr << 32)); | ||
555 | |||
556 | cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); | ||
557 | |||
558 | if (!strncmp(buffer, cmdline_on, sizeof(buffer))) | ||
559 | sme_me_mask = me_mask; | ||
560 | else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) | ||
561 | sme_me_mask = 0; | ||
562 | else | ||
563 | sme_me_mask = active_by_default ? me_mask : 0; | ||
564 | } | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index aca6295350f3..e8a4a09e20f1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
60 | } | 60 | } |
61 | printk(KERN_CONT "\n"); | 61 | printk(KERN_CONT "\n"); |
62 | } | 62 | } |
63 | |||
64 | unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, | ||
65 | unsigned long end_pfn) | ||
66 | { | ||
67 | unsigned long nr_pages = end_pfn - start_pfn; | ||
68 | |||
69 | if (!nr_pages) | ||
70 | return 0; | ||
71 | |||
72 | return (nr_pages + 1) * sizeof(struct page); | ||
73 | } | ||
74 | #endif | 63 | #endif |
75 | 64 | ||
76 | extern unsigned long highend_pfn, highstart_pfn; | 65 | extern unsigned long highend_pfn, highstart_pfn; |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7f1a51399674..e055d1a06699 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) | |||
157 | unsigned long sp = current_stack_pointer; | 157 | unsigned long sp = current_stack_pointer; |
158 | pgd_t *pgd = pgd_offset(mm, sp); | 158 | pgd_t *pgd = pgd_offset(mm, sp); |
159 | 159 | ||
160 | if (CONFIG_PGTABLE_LEVELS > 4) { | 160 | if (pgtable_l5_enabled) { |
161 | if (unlikely(pgd_none(*pgd))) { | 161 | if (unlikely(pgd_none(*pgd))) { |
162 | pgd_t *pgd_ref = pgd_offset_k(sp); | 162 | pgd_t *pgd_ref = pgd_offset_k(sp); |
163 | 163 | ||
@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
613 | { | 613 | { |
614 | int cpu; | 614 | int cpu; |
615 | 615 | ||
616 | struct flush_tlb_info info = { | 616 | struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { |
617 | .mm = mm, | 617 | .mm = mm, |
618 | }; | 618 | }; |
619 | 619 | ||
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index f9cfbc0d1f33..7f443bd1411d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/ioport.h> | 27 | #include <linux/ioport.h> |
28 | #include <linux/mc146818rtc.h> | 28 | #include <linux/mc146818rtc.h> |
29 | #include <linux/efi.h> | 29 | #include <linux/efi.h> |
30 | #include <linux/export.h> | ||
30 | #include <linux/uaccess.h> | 31 | #include <linux/uaccess.h> |
31 | #include <linux/io.h> | 32 | #include <linux/io.h> |
32 | #include <linux/reboot.h> | 33 | #include <linux/reboot.h> |
@@ -190,7 +191,8 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) | |||
190 | early_code_mapping_set_exec(0); | 191 | early_code_mapping_set_exec(0); |
191 | } | 192 | } |
192 | 193 | ||
193 | static pgd_t *efi_pgd; | 194 | pgd_t *efi_pgd; |
195 | EXPORT_SYMBOL_GPL(efi_pgd); | ||
194 | 196 | ||
195 | /* | 197 | /* |
196 | * We need our own copy of the higher levels of the page tables | 198 | * We need our own copy of the higher levels of the page tables |
@@ -225,7 +227,7 @@ int __init efi_alloc_page_tables(void) | |||
225 | 227 | ||
226 | pud = pud_alloc(&init_mm, p4d, EFI_VA_END); | 228 | pud = pud_alloc(&init_mm, p4d, EFI_VA_END); |
227 | if (!pud) { | 229 | if (!pud) { |
228 | if (CONFIG_PGTABLE_LEVELS > 4) | 230 | if (pgtable_l5_enabled) |
229 | free_page((unsigned long) pgd_page_vaddr(*pgd)); | 231 | free_page((unsigned long) pgd_page_vaddr(*pgd)); |
230 | free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); | 232 | free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); |
231 | return -ENOMEM; | 233 | return -ENOMEM; |
@@ -255,8 +257,8 @@ void efi_sync_low_kernel_mappings(void) | |||
255 | * only span a single PGD entry and that the entry also maps | 257 | * only span a single PGD entry and that the entry also maps |
256 | * other important kernel regions. | 258 | * other important kernel regions. |
257 | */ | 259 | */ |
258 | BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); | 260 | MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); |
259 | BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != | 261 | MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != |
260 | (EFI_VA_END & PGDIR_MASK)); | 262 | (EFI_VA_END & PGDIR_MASK)); |
261 | 263 | ||
262 | pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); | 264 | pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); |
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index fb1df9488e98..2ebdf31d9996 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c | |||
@@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void) | |||
199 | 199 | ||
200 | legacy_pic = &null_legacy_pic; | 200 | legacy_pic = &null_legacy_pic; |
201 | 201 | ||
202 | /* | ||
203 | * Do nothing for now as everything needed done in | ||
204 | * x86_intel_mid_early_setup() below. | ||
205 | */ | ||
206 | x86_init.acpi.reduced_hw_early_init = x86_init_noop; | ||
207 | |||
202 | pm_power_off = intel_mid_power_off; | 208 | pm_power_off = intel_mid_power_off; |
203 | machine_ops.emergency_restart = intel_mid_reboot; | 209 | machine_ops.emergency_restart = intel_mid_reboot; |
204 | 210 | ||
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 0ef5e5204968..74a532989308 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c | |||
@@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) | |||
50 | { | 50 | { |
51 | pmd_t *pmd; | 51 | pmd_t *pmd; |
52 | pud_t *pud; | 52 | pud_t *pud; |
53 | p4d_t *p4d; | 53 | p4d_t *p4d = NULL; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * The new mapping only has to cover the page containing the image | 56 | * The new mapping only has to cover the page containing the image |
@@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) | |||
66 | * tables used by the image kernel. | 66 | * tables used by the image kernel. |
67 | */ | 67 | */ |
68 | 68 | ||
69 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 69 | if (pgtable_l5_enabled) { |
70 | p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); | 70 | p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); |
71 | if (!p4d) | 71 | if (!p4d) |
72 | return -ENOMEM; | 72 | return -ENOMEM; |
@@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) | |||
84 | __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); | 84 | __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); |
85 | set_pud(pud + pud_index(restore_jump_address), | 85 | set_pud(pud + pud_index(restore_jump_address), |
86 | __pud(__pa(pmd) | _KERNPG_TABLE)); | 86 | __pud(__pa(pmd) | _KERNPG_TABLE)); |
87 | if (IS_ENABLED(CONFIG_X86_5LEVEL)) { | 87 | if (p4d) { |
88 | set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE)); | 88 | set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE)); |
89 | set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE)); | 89 | set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE)); |
90 | } else { | 90 | } else { |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index f605825a04ab..c1f98f32c45f 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -18,9 +18,6 @@ config XEN_PV | |||
18 | bool "Xen PV guest support" | 18 | bool "Xen PV guest support" |
19 | default y | 19 | default y |
20 | depends on XEN | 20 | depends on XEN |
21 | # XEN_PV is not ready to work with 5-level paging. | ||
22 | # Changes to hypervisor are also required. | ||
23 | depends on !X86_5LEVEL | ||
24 | select XEN_HAVE_PVMMU | 21 | select XEN_HAVE_PVMMU |
25 | select XEN_HAVE_VPMU | 22 | select XEN_HAVE_VPMU |
26 | help | 23 | help |
@@ -79,6 +76,4 @@ config XEN_DEBUG_FS | |||
79 | config XEN_PVH | 76 | config XEN_PVH |
80 | bool "Support for running as a PVH guest" | 77 | bool "Support for running as a PVH guest" |
81 | depends on XEN && XEN_PVHVM && ACPI | 78 | depends on XEN && XEN_PVHVM && ACPI |
82 | # Pre-built page tables are not ready to handle 5-level paging. | ||
83 | depends on !X86_5LEVEL | ||
84 | def_bool n | 79 | def_bool n |
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 436c4f003e17..aa1c6a6831a9 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/io_apic.h> | 6 | #include <asm/io_apic.h> |
7 | #include <asm/hypervisor.h> | 7 | #include <asm/hypervisor.h> |
8 | #include <asm/e820/api.h> | 8 | #include <asm/e820/api.h> |
9 | #include <asm/x86_init.h> | ||
9 | 10 | ||
10 | #include <asm/xen/interface.h> | 11 | #include <asm/xen/interface.h> |
11 | #include <asm/xen/hypercall.h> | 12 | #include <asm/xen/hypercall.h> |
@@ -16,15 +17,20 @@ | |||
16 | /* | 17 | /* |
17 | * PVH variables. | 18 | * PVH variables. |
18 | * | 19 | * |
19 | * xen_pvh and pvh_bootparams need to live in data segment since they | 20 | * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment |
20 | * are used after startup_{32|64}, which clear .bss, are invoked. | 21 | * since they are used after startup_{32|64}, which clear .bss, are invoked. |
21 | */ | 22 | */ |
22 | bool xen_pvh __attribute__((section(".data"))) = 0; | 23 | bool xen_pvh __attribute__((section(".data"))) = 0; |
23 | struct boot_params pvh_bootparams __attribute__((section(".data"))); | 24 | struct boot_params pvh_bootparams __attribute__((section(".data"))); |
25 | struct hvm_start_info pvh_start_info __attribute__((section(".data"))); | ||
24 | 26 | ||
25 | struct hvm_start_info pvh_start_info; | ||
26 | unsigned int pvh_start_info_sz = sizeof(pvh_start_info); | 27 | unsigned int pvh_start_info_sz = sizeof(pvh_start_info); |
27 | 28 | ||
29 | static u64 pvh_get_root_pointer(void) | ||
30 | { | ||
31 | return pvh_start_info.rsdp_paddr; | ||
32 | } | ||
33 | |||
28 | static void __init init_pvh_bootparams(void) | 34 | static void __init init_pvh_bootparams(void) |
29 | { | 35 | { |
30 | struct xen_memory_map memmap; | 36 | struct xen_memory_map memmap; |
@@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void) | |||
71 | */ | 77 | */ |
72 | pvh_bootparams.hdr.version = 0x212; | 78 | pvh_bootparams.hdr.version = 0x212; |
73 | pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ | 79 | pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ |
80 | |||
81 | x86_init.acpi.get_root_pointer = pvh_get_root_pointer; | ||
74 | } | 82 | } |
75 | 83 | ||
76 | /* | 84 | /* |
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index aae88fec9941..d20763472920 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c | |||
@@ -538,6 +538,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) | |||
538 | 538 | ||
539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | 539 | xen_mc_issue(PARAVIRT_LAZY_MMU); |
540 | } | 540 | } |
541 | |||
542 | #if CONFIG_PGTABLE_LEVELS >= 5 | ||
543 | __visible p4dval_t xen_p4d_val(p4d_t p4d) | ||
544 | { | ||
545 | return pte_mfn_to_pfn(p4d.p4d); | ||
546 | } | ||
547 | PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val); | ||
548 | |||
549 | __visible p4d_t xen_make_p4d(p4dval_t p4d) | ||
550 | { | ||
551 | p4d = pte_pfn_to_mfn(p4d); | ||
552 | |||
553 | return native_make_p4d(p4d); | ||
554 | } | ||
555 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); | ||
556 | #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ | ||
541 | #endif /* CONFIG_X86_64 */ | 557 | #endif /* CONFIG_X86_64 */ |
542 | 558 | ||
543 | static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, | 559 | static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, |
@@ -2411,6 +2427,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2411 | 2427 | ||
2412 | .alloc_pud = xen_alloc_pmd_init, | 2428 | .alloc_pud = xen_alloc_pmd_init, |
2413 | .release_pud = xen_release_pmd_init, | 2429 | .release_pud = xen_release_pmd_init, |
2430 | |||
2431 | #if CONFIG_PGTABLE_LEVELS >= 5 | ||
2432 | .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), | ||
2433 | .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), | ||
2434 | #endif | ||
2414 | #endif /* CONFIG_X86_64 */ | 2435 | #endif /* CONFIG_X86_64 */ |
2415 | 2436 | ||
2416 | .activate_mm = xen_activate_mm, | 2437 | .activate_mm = xen_activate_mm, |
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 3bb46cb24a99..7ca41bf023c9 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c | |||
@@ -189,12 +189,15 @@ early_param("acpi_rsdp", setup_acpi_rsdp); | |||
189 | 189 | ||
190 | acpi_physical_address __init acpi_os_get_root_pointer(void) | 190 | acpi_physical_address __init acpi_os_get_root_pointer(void) |
191 | { | 191 | { |
192 | acpi_physical_address pa = 0; | 192 | acpi_physical_address pa; |
193 | 193 | ||
194 | #ifdef CONFIG_KEXEC | 194 | #ifdef CONFIG_KEXEC |
195 | if (acpi_rsdp) | 195 | if (acpi_rsdp) |
196 | return acpi_rsdp; | 196 | return acpi_rsdp; |
197 | #endif | 197 | #endif |
198 | pa = acpi_arch_get_root_pointer(); | ||
199 | if (pa) | ||
200 | return pa; | ||
198 | 201 | ||
199 | if (efi_enabled(EFI_CONFIG_TABLES)) { | 202 | if (efi_enabled(EFI_CONFIG_TABLES)) { |
200 | if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) | 203 | if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) |
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h index dfbd9d990637..9c2e0708eb82 100644 --- a/include/asm-generic/5level-fixup.h +++ b/include/asm-generic/5level-fixup.h | |||
@@ -8,6 +8,7 @@ | |||
8 | #define P4D_SHIFT PGDIR_SHIFT | 8 | #define P4D_SHIFT PGDIR_SHIFT |
9 | #define P4D_SIZE PGDIR_SIZE | 9 | #define P4D_SIZE PGDIR_SIZE |
10 | #define P4D_MASK PGDIR_MASK | 10 | #define P4D_MASK PGDIR_MASK |
11 | #define MAX_PTRS_PER_P4D 1 | ||
11 | #define PTRS_PER_P4D 1 | 12 | #define PTRS_PER_P4D 1 |
12 | 13 | ||
13 | #define p4d_t pgd_t | 14 | #define p4d_t pgd_t |
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h index 8f22f55de17a..1a29b2a0282b 100644 --- a/include/asm-generic/pgtable-nop4d.h +++ b/include/asm-generic/pgtable-nop4d.h | |||
@@ -8,10 +8,11 @@ | |||
8 | 8 | ||
9 | typedef struct { pgd_t pgd; } p4d_t; | 9 | typedef struct { pgd_t pgd; } p4d_t; |
10 | 10 | ||
11 | #define P4D_SHIFT PGDIR_SHIFT | 11 | #define P4D_SHIFT PGDIR_SHIFT |
12 | #define PTRS_PER_P4D 1 | 12 | #define MAX_PTRS_PER_P4D 1 |
13 | #define P4D_SIZE (1UL << P4D_SHIFT) | 13 | #define PTRS_PER_P4D 1 |
14 | #define P4D_MASK (~(P4D_SIZE-1)) | 14 | #define P4D_SIZE (1UL << P4D_SHIFT) |
15 | #define P4D_MASK (~(P4D_SIZE-1)) | ||
15 | 16 | ||
16 | /* | 17 | /* |
17 | * The "pgd_xxx()" functions here are trivial for a folded two-level | 18 | * The "pgd_xxx()" functions here are trivial for a folded two-level |
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 968173ec2726..15bfb15c2fa5 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h | |||
@@ -623,6 +623,13 @@ bool acpi_gtdt_c3stop(int type); | |||
623 | int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); | 623 | int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); |
624 | #endif | 624 | #endif |
625 | 625 | ||
626 | #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER | ||
627 | static inline u64 acpi_arch_get_root_pointer(void) | ||
628 | { | ||
629 | return 0; | ||
630 | } | ||
631 | #endif | ||
632 | |||
626 | #else /* !CONFIG_ACPI */ | 633 | #else /* !CONFIG_ACPI */ |
627 | 634 | ||
628 | #define acpi_disabled 1 | 635 | #define acpi_disabled 1 |
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index adc13474a53b..d6459bd1376d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h | |||
@@ -18,7 +18,7 @@ extern unsigned char kasan_zero_page[PAGE_SIZE]; | |||
18 | extern pte_t kasan_zero_pte[PTRS_PER_PTE]; | 18 | extern pte_t kasan_zero_pte[PTRS_PER_PTE]; |
19 | extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; | 19 | extern pmd_t kasan_zero_pmd[PTRS_PER_PMD]; |
20 | extern pud_t kasan_zero_pud[PTRS_PER_PUD]; | 20 | extern pud_t kasan_zero_pud[PTRS_PER_PUD]; |
21 | extern p4d_t kasan_zero_p4d[PTRS_PER_P4D]; | 21 | extern p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D]; |
22 | 22 | ||
23 | void kasan_populate_zero_shadow(const void *shadow_start, | 23 | void kasan_populate_zero_shadow(const void *shadow_start, |
24 | const void *shadow_end); | 24 | const void *shadow_end); |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7522a6987595..a2db4576e499 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -816,10 +816,6 @@ int local_memory_node(int node_id); | |||
816 | static inline int local_memory_node(int node_id) { return node_id; }; | 816 | static inline int local_memory_node(int node_id) { return node_id; }; |
817 | #endif | 817 | #endif |
818 | 818 | ||
819 | #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE | ||
820 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | ||
821 | #endif | ||
822 | |||
823 | /* | 819 | /* |
824 | * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. | 820 | * zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc. |
825 | */ | 821 | */ |
@@ -1289,7 +1285,6 @@ struct mminit_pfnnid_cache { | |||
1289 | #endif | 1285 | #endif |
1290 | 1286 | ||
1291 | void memory_present(int nid, unsigned long start, unsigned long end); | 1287 | void memory_present(int nid, unsigned long start, unsigned long end); |
1292 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | ||
1293 | 1288 | ||
1294 | /* | 1289 | /* |
1295 | * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we | 1290 | * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we |
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 554e4c0f23a2..f436246ccc79 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c | |||
@@ -31,7 +31,7 @@ | |||
31 | unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; | 31 | unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; |
32 | 32 | ||
33 | #if CONFIG_PGTABLE_LEVELS > 4 | 33 | #if CONFIG_PGTABLE_LEVELS > 4 |
34 | p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; | 34 | p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss; |
35 | #endif | 35 | #endif |
36 | #if CONFIG_PGTABLE_LEVELS > 3 | 36 | #if CONFIG_PGTABLE_LEVELS > 3 |
37 | pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; | 37 | pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; |
diff --git a/mm/sparse.c b/mm/sparse.c index 7af5e7a92528..79b26f98d793 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -236,28 +236,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) | |||
236 | } | 236 | } |
237 | 237 | ||
238 | /* | 238 | /* |
239 | * Only used by the i386 NUMA architecures, but relatively | ||
240 | * generic code. | ||
241 | */ | ||
242 | unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, | ||
243 | unsigned long end_pfn) | ||
244 | { | ||
245 | unsigned long pfn; | ||
246 | unsigned long nr_pages = 0; | ||
247 | |||
248 | mminit_validate_memmodel_limits(&start_pfn, &end_pfn); | ||
249 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | ||
250 | if (nid != early_pfn_to_nid(pfn)) | ||
251 | continue; | ||
252 | |||
253 | if (pfn_present(pfn)) | ||
254 | nr_pages += PAGES_PER_SECTION; | ||
255 | } | ||
256 | |||
257 | return nr_pages * sizeof(struct page); | ||
258 | } | ||
259 | |||
260 | /* | ||
261 | * Subtle, we encode the real pfn into the mem_map such that | 239 | * Subtle, we encode the real pfn into the mem_map such that |
262 | * the identity pfn - section_mem_map will return the actual | 240 | * the identity pfn - section_mem_map will return the actual |
263 | * physical page frame number. | 241 | * physical page frame number. |
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c3013505c305..b7f61cd1c709 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -84,18 +84,19 @@ | |||
84 | * This is made more complicated by various memory models and PAE. | 84 | * This is made more complicated by various memory models and PAE. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | #ifndef MAX_PHYSMEM_BITS | 87 | #ifndef MAX_POSSIBLE_PHYSMEM_BITS |
88 | #ifdef CONFIG_HIGHMEM64G | 88 | #ifdef MAX_PHYSMEM_BITS |
89 | #define MAX_PHYSMEM_BITS 36 | 89 | #define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS |
90 | #else /* !CONFIG_HIGHMEM64G */ | 90 | #else |
91 | /* | 91 | /* |
92 | * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just | 92 | * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just |
93 | * be PAGE_SHIFT | 93 | * be PAGE_SHIFT |
94 | */ | 94 | */ |
95 | #define MAX_PHYSMEM_BITS BITS_PER_LONG | 95 | #define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG |
96 | #endif | 96 | #endif |
97 | #endif | 97 | #endif |
98 | #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) | 98 | |
99 | #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) | ||
99 | 100 | ||
100 | /* | 101 | /* |
101 | * Memory for allocating for handle keeps object position by | 102 | * Memory for allocating for handle keeps object position by |