diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-03 17:45:09 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-03 17:45:09 -0400 |
commit | 7a69f9c60b49699579f5bfb71f928cceba0afe1a (patch) | |
tree | bf3b5640bbd9f23beeb5a55d18348d65bafff8e8 /arch/x86/boot/compressed | |
parent | 9bc088ab66be8978fbc981ba9644468fa2c2fd3f (diff) | |
parent | 8781fb7e9749da424e01daacd14834b674658c63 (diff) |
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar:
"The main changes in this cycle were:
- Continued work to add support for 5-level paging provided by future
Intel CPUs. In particular we switch the x86 GUP code to the generic
implementation. (Kirill A. Shutemov)
- Continued work to add PCID CPU support to native kernels as well.
In this round most of the focus is on reworking/refreshing the TLB
flush infrastructure for the upcoming PCID changes. (Andy
Lutomirski)"
* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits)
x86/mm: Delete a big outdated comment about TLB flushing
x86/mm: Don't reenter flush_tlb_func_common()
x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging
x86/ftrace: Exclude functions in head64.c from function-tracing
x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap
x86/mm: Remove reset_lazy_tlbstate()
x86/ldt: Simplify the LDT switching logic
x86/boot/64: Put __startup_64() into .head.text
x86/mm: Add support for 5-level paging for KASLR
x86/mm: Make kernel_physical_mapping_init() support 5-level paging
x86/mm: Add sync_global_pgds() for configuration with 5-level paging
x86/boot/64: Add support of additional page table level during early boot
x86/boot/64: Rename init_level4_pgt and early_level4_pgt
x86/boot/64: Rewrite startup_64() in C
x86/boot/compressed: Enable 5-level paging during decompression stage
x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations
x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations
x86/boot/efi: Cleanup initialization of GDT entries
x86/asm: Fix comment in return_from_SYSCALL_64()
x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation
...
Diffstat (limited to 'arch/x86/boot/compressed')
-rw-r--r-- | arch/x86/boot/compressed/eboot.c | 73 | ||||
-rw-r--r-- | arch/x86/boot/compressed/head_64.S | 86 | ||||
-rw-r--r-- | arch/x86/boot/compressed/pagetable.c | 18 |
3 files changed, 148 insertions, 29 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index cbf4b87f55b9..c3e869eaef0c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c, | |||
1046 | memset((char *)gdt->address, 0x0, gdt->size); | 1046 | memset((char *)gdt->address, 0x0, gdt->size); |
1047 | desc = (struct desc_struct *)gdt->address; | 1047 | desc = (struct desc_struct *)gdt->address; |
1048 | 1048 | ||
1049 | /* The first GDT is a dummy and the second is unused. */ | 1049 | /* The first GDT is a dummy. */ |
1050 | desc += 2; | 1050 | desc++; |
1051 | |||
1052 | if (IS_ENABLED(CONFIG_X86_64)) { | ||
1053 | /* __KERNEL32_CS */ | ||
1054 | desc->limit0 = 0xffff; | ||
1055 | desc->base0 = 0x0000; | ||
1056 | desc->base1 = 0x0000; | ||
1057 | desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; | ||
1058 | desc->s = DESC_TYPE_CODE_DATA; | ||
1059 | desc->dpl = 0; | ||
1060 | desc->p = 1; | ||
1061 | desc->limit = 0xf; | ||
1062 | desc->avl = 0; | ||
1063 | desc->l = 0; | ||
1064 | desc->d = SEG_OP_SIZE_32BIT; | ||
1065 | desc->g = SEG_GRANULARITY_4KB; | ||
1066 | desc->base2 = 0x00; | ||
1067 | desc++; | ||
1068 | } else { | ||
1069 | /* Second entry is unused on 32-bit */ | ||
1070 | desc++; | ||
1071 | } | ||
1051 | 1072 | ||
1073 | /* __KERNEL_CS */ | ||
1052 | desc->limit0 = 0xffff; | 1074 | desc->limit0 = 0xffff; |
1053 | desc->base0 = 0x0000; | 1075 | desc->base0 = 0x0000; |
1054 | desc->base1 = 0x0000; | 1076 | desc->base1 = 0x0000; |
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c, | |||
1058 | desc->p = 1; | 1080 | desc->p = 1; |
1059 | desc->limit = 0xf; | 1081 | desc->limit = 0xf; |
1060 | desc->avl = 0; | 1082 | desc->avl = 0; |
1061 | desc->l = 0; | 1083 | if (IS_ENABLED(CONFIG_X86_64)) { |
1062 | desc->d = SEG_OP_SIZE_32BIT; | 1084 | desc->l = 1; |
1085 | desc->d = 0; | ||
1086 | } else { | ||
1087 | desc->l = 0; | ||
1088 | desc->d = SEG_OP_SIZE_32BIT; | ||
1089 | } | ||
1063 | desc->g = SEG_GRANULARITY_4KB; | 1090 | desc->g = SEG_GRANULARITY_4KB; |
1064 | desc->base2 = 0x00; | 1091 | desc->base2 = 0x00; |
1065 | |||
1066 | desc++; | 1092 | desc++; |
1093 | |||
1094 | /* __KERNEL_DS */ | ||
1067 | desc->limit0 = 0xffff; | 1095 | desc->limit0 = 0xffff; |
1068 | desc->base0 = 0x0000; | 1096 | desc->base0 = 0x0000; |
1069 | desc->base1 = 0x0000; | 1097 | desc->base1 = 0x0000; |
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c, | |||
1077 | desc->d = SEG_OP_SIZE_32BIT; | 1105 | desc->d = SEG_OP_SIZE_32BIT; |
1078 | desc->g = SEG_GRANULARITY_4KB; | 1106 | desc->g = SEG_GRANULARITY_4KB; |
1079 | desc->base2 = 0x00; | 1107 | desc->base2 = 0x00; |
1080 | |||
1081 | #ifdef CONFIG_X86_64 | ||
1082 | /* Task segment value */ | ||
1083 | desc++; | 1108 | desc++; |
1084 | desc->limit0 = 0x0000; | 1109 | |
1085 | desc->base0 = 0x0000; | 1110 | if (IS_ENABLED(CONFIG_X86_64)) { |
1086 | desc->base1 = 0x0000; | 1111 | /* Task segment value */ |
1087 | desc->type = SEG_TYPE_TSS; | 1112 | desc->limit0 = 0x0000; |
1088 | desc->s = 0; | 1113 | desc->base0 = 0x0000; |
1089 | desc->dpl = 0; | 1114 | desc->base1 = 0x0000; |
1090 | desc->p = 1; | 1115 | desc->type = SEG_TYPE_TSS; |
1091 | desc->limit = 0x0; | 1116 | desc->s = 0; |
1092 | desc->avl = 0; | 1117 | desc->dpl = 0; |
1093 | desc->l = 0; | 1118 | desc->p = 1; |
1094 | desc->d = 0; | 1119 | desc->limit = 0x0; |
1095 | desc->g = SEG_GRANULARITY_4KB; | 1120 | desc->avl = 0; |
1096 | desc->base2 = 0x00; | 1121 | desc->l = 0; |
1097 | #endif /* CONFIG_X86_64 */ | 1122 | desc->d = 0; |
1123 | desc->g = SEG_GRANULARITY_4KB; | ||
1124 | desc->base2 = 0x00; | ||
1125 | desc++; | ||
1126 | } | ||
1098 | 1127 | ||
1099 | asm volatile("cli"); | 1128 | asm volatile("cli"); |
1100 | asm volatile ("lgdt %0" : : "m" (*gdt)); | 1129 | asm volatile ("lgdt %0" : : "m" (*gdt)); |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d2ae1f821e0c..fbf4c32d0b62 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -346,6 +346,48 @@ preferred_addr: | |||
346 | /* Set up the stack */ | 346 | /* Set up the stack */ |
347 | leaq boot_stack_end(%rbx), %rsp | 347 | leaq boot_stack_end(%rbx), %rsp |
348 | 348 | ||
349 | #ifdef CONFIG_X86_5LEVEL | ||
350 | /* Check if 5-level paging has already enabled */ | ||
351 | movq %cr4, %rax | ||
352 | testl $X86_CR4_LA57, %eax | ||
353 | jnz lvl5 | ||
354 | |||
355 | /* | ||
356 | * At this point we are in long mode with 4-level paging enabled, | ||
357 | * but we want to enable 5-level paging. | ||
358 | * | ||
359 | * The problem is that we cannot do it directly. Setting LA57 in | ||
360 | * long mode would trigger #GP. So we need to switch off long mode | ||
361 | * first. | ||
362 | * | ||
363 | * NOTE: This is not going to work if bootloader put us above 4G | ||
364 | * limit. | ||
365 | * | ||
366 | * The first step is go into compatibility mode. | ||
367 | */ | ||
368 | |||
369 | /* Clear additional page table */ | ||
370 | leaq lvl5_pgtable(%rbx), %rdi | ||
371 | xorq %rax, %rax | ||
372 | movq $(PAGE_SIZE/8), %rcx | ||
373 | rep stosq | ||
374 | |||
375 | /* | ||
376 | * Setup current CR3 as the first and only entry in a new top level | ||
377 | * page table. | ||
378 | */ | ||
379 | movq %cr3, %rdi | ||
380 | leaq 0x7 (%rdi), %rax | ||
381 | movq %rax, lvl5_pgtable(%rbx) | ||
382 | |||
383 | /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ | ||
384 | pushq $__KERNEL32_CS | ||
385 | leaq compatible_mode(%rip), %rax | ||
386 | pushq %rax | ||
387 | lretq | ||
388 | lvl5: | ||
389 | #endif | ||
390 | |||
349 | /* Zero EFLAGS */ | 391 | /* Zero EFLAGS */ |
350 | pushq $0 | 392 | pushq $0 |
351 | popfq | 393 | popfq |
@@ -429,6 +471,44 @@ relocated: | |||
429 | jmp *%rax | 471 | jmp *%rax |
430 | 472 | ||
431 | .code32 | 473 | .code32 |
474 | #ifdef CONFIG_X86_5LEVEL | ||
475 | compatible_mode: | ||
476 | /* Setup data and stack segments */ | ||
477 | movl $__KERNEL_DS, %eax | ||
478 | movl %eax, %ds | ||
479 | movl %eax, %ss | ||
480 | |||
481 | /* Disable paging */ | ||
482 | movl %cr0, %eax | ||
483 | btrl $X86_CR0_PG_BIT, %eax | ||
484 | movl %eax, %cr0 | ||
485 | |||
486 | /* Point CR3 to 5-level paging */ | ||
487 | leal lvl5_pgtable(%ebx), %eax | ||
488 | movl %eax, %cr3 | ||
489 | |||
490 | /* Enable PAE and LA57 mode */ | ||
491 | movl %cr4, %eax | ||
492 | orl $(X86_CR4_PAE | X86_CR4_LA57), %eax | ||
493 | movl %eax, %cr4 | ||
494 | |||
495 | /* Calculate address we are running at */ | ||
496 | call 1f | ||
497 | 1: popl %edi | ||
498 | subl $1b, %edi | ||
499 | |||
500 | /* Prepare stack for far return to Long Mode */ | ||
501 | pushl $__KERNEL_CS | ||
502 | leal lvl5(%edi), %eax | ||
503 | push %eax | ||
504 | |||
505 | /* Enable paging back */ | ||
506 | movl $(X86_CR0_PG | X86_CR0_PE), %eax | ||
507 | movl %eax, %cr0 | ||
508 | |||
509 | lret | ||
510 | #endif | ||
511 | |||
432 | no_longmode: | 512 | no_longmode: |
433 | /* This isn't an x86-64 CPU so hang */ | 513 | /* This isn't an x86-64 CPU so hang */ |
434 | 1: | 514 | 1: |
@@ -442,7 +522,7 @@ gdt: | |||
442 | .word gdt_end - gdt | 522 | .word gdt_end - gdt |
443 | .long gdt | 523 | .long gdt |
444 | .word 0 | 524 | .word 0 |
445 | .quad 0x0000000000000000 /* NULL descriptor */ | 525 | .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ |
446 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | 526 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ |
447 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | 527 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ |
448 | .quad 0x0080890000000000 /* TS descriptor */ | 528 | .quad 0x0080890000000000 /* TS descriptor */ |
@@ -486,3 +566,7 @@ boot_stack_end: | |||
486 | .balign 4096 | 566 | .balign 4096 |
487 | pgtable: | 567 | pgtable: |
488 | .fill BOOT_PGT_SIZE, 1, 0 | 568 | .fill BOOT_PGT_SIZE, 1, 0 |
569 | #ifdef CONFIG_X86_5LEVEL | ||
570 | lvl5_pgtable: | ||
571 | .fill PAGE_SIZE, 1, 0 | ||
572 | #endif | ||
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 1d78f1739087..28029be47fbb 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c | |||
@@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context) | |||
63 | static struct alloc_pgt_data pgt_data; | 63 | static struct alloc_pgt_data pgt_data; |
64 | 64 | ||
65 | /* The top level page table entry pointer. */ | 65 | /* The top level page table entry pointer. */ |
66 | static unsigned long level4p; | 66 | static unsigned long top_level_pgt; |
67 | 67 | ||
68 | /* | 68 | /* |
69 | * Mapping information structure passed to kernel_ident_mapping_init(). | 69 | * Mapping information structure passed to kernel_ident_mapping_init(). |
@@ -91,9 +91,15 @@ void initialize_identity_maps(void) | |||
91 | * If we came here via startup_32(), cr3 will be _pgtable already | 91 | * If we came here via startup_32(), cr3 will be _pgtable already |
92 | * and we must append to the existing area instead of entirely | 92 | * and we must append to the existing area instead of entirely |
93 | * overwriting it. | 93 | * overwriting it. |
94 | * | ||
95 | * With 5-level paging, we use '_pgtable' to allocate the p4d page table, | ||
96 | * the top-level page table is allocated separately. | ||
97 | * | ||
98 | * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level | ||
99 | * cases. On 4-level paging it's equal to 'top_level_pgt'. | ||
94 | */ | 100 | */ |
95 | level4p = read_cr3(); | 101 | top_level_pgt = read_cr3_pa(); |
96 | if (level4p == (unsigned long)_pgtable) { | 102 | if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) { |
97 | debug_putstr("booted via startup_32()\n"); | 103 | debug_putstr("booted via startup_32()\n"); |
98 | pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; | 104 | pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; |
99 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; | 105 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; |
@@ -103,7 +109,7 @@ void initialize_identity_maps(void) | |||
103 | pgt_data.pgt_buf = _pgtable; | 109 | pgt_data.pgt_buf = _pgtable; |
104 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE; | 110 | pgt_data.pgt_buf_size = BOOT_PGT_SIZE; |
105 | memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); | 111 | memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); |
106 | level4p = (unsigned long)alloc_pgt_page(&pgt_data); | 112 | top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data); |
107 | } | 113 | } |
108 | } | 114 | } |
109 | 115 | ||
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size) | |||
123 | return; | 129 | return; |
124 | 130 | ||
125 | /* Build the mapping. */ | 131 | /* Build the mapping. */ |
126 | kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, | 132 | kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, |
127 | start, end); | 133 | start, end); |
128 | } | 134 | } |
129 | 135 | ||
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size) | |||
134 | */ | 140 | */ |
135 | void finalize_identity_maps(void) | 141 | void finalize_identity_maps(void) |
136 | { | 142 | { |
137 | write_cr3(level4p); | 143 | write_cr3(top_level_pgt); |
138 | } | 144 | } |