aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/boot/compressed
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 17:45:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 17:45:09 -0400
commit7a69f9c60b49699579f5bfb71f928cceba0afe1a (patch)
treebf3b5640bbd9f23beeb5a55d18348d65bafff8e8 /arch/x86/boot/compressed
parent9bc088ab66be8978fbc981ba9644468fa2c2fd3f (diff)
parent8781fb7e9749da424e01daacd14834b674658c63 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main changes in this cycle were: - Continued work to add support for 5-level paging provided by future Intel CPUs. In particular we switch the x86 GUP code to the generic implementation. (Kirill A. Shutemov) - Continued work to add PCID CPU support to native kernels as well. In this round most of the focus is on reworking/refreshing the TLB flush infrastructure for the upcoming PCID changes. (Andy Lutomirski)" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits) x86/mm: Delete a big outdated comment about TLB flushing x86/mm: Don't reenter flush_tlb_func_common() x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging x86/ftrace: Exclude functions in head64.c from function-tracing x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap x86/mm: Remove reset_lazy_tlbstate() x86/ldt: Simplify the LDT switching logic x86/boot/64: Put __startup_64() into .head.text x86/mm: Add support for 5-level paging for KASLR x86/mm: Make kernel_physical_mapping_init() support 5-level paging x86/mm: Add sync_global_pgds() for configuration with 5-level paging x86/boot/64: Add support of additional page table level during early boot x86/boot/64: Rename init_level4_pgt and early_level4_pgt x86/boot/64: Rewrite startup_64() in C x86/boot/compressed: Enable 5-level paging during decompression stage x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations x86/boot/efi: Cleanup initialization of GDT entries x86/asm: Fix comment in return_from_SYSCALL_64() x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation ...
Diffstat (limited to 'arch/x86/boot/compressed')
-rw-r--r--arch/x86/boot/compressed/eboot.c73
-rw-r--r--arch/x86/boot/compressed/head_64.S86
-rw-r--r--arch/x86/boot/compressed/pagetable.c18
3 files changed, 148 insertions, 29 deletions
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cbf4b87f55b9..c3e869eaef0c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
1046 memset((char *)gdt->address, 0x0, gdt->size); 1046 memset((char *)gdt->address, 0x0, gdt->size);
1047 desc = (struct desc_struct *)gdt->address; 1047 desc = (struct desc_struct *)gdt->address;
1048 1048
1049 /* The first GDT is a dummy and the second is unused. */ 1049 /* The first GDT is a dummy. */
1050 desc += 2; 1050 desc++;
1051
1052 if (IS_ENABLED(CONFIG_X86_64)) {
1053 /* __KERNEL32_CS */
1054 desc->limit0 = 0xffff;
1055 desc->base0 = 0x0000;
1056 desc->base1 = 0x0000;
1057 desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
1058 desc->s = DESC_TYPE_CODE_DATA;
1059 desc->dpl = 0;
1060 desc->p = 1;
1061 desc->limit = 0xf;
1062 desc->avl = 0;
1063 desc->l = 0;
1064 desc->d = SEG_OP_SIZE_32BIT;
1065 desc->g = SEG_GRANULARITY_4KB;
1066 desc->base2 = 0x00;
1067 desc++;
1068 } else {
1069 /* Second entry is unused on 32-bit */
1070 desc++;
1071 }
1051 1072
1073 /* __KERNEL_CS */
1052 desc->limit0 = 0xffff; 1074 desc->limit0 = 0xffff;
1053 desc->base0 = 0x0000; 1075 desc->base0 = 0x0000;
1054 desc->base1 = 0x0000; 1076 desc->base1 = 0x0000;
@@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
1058 desc->p = 1; 1080 desc->p = 1;
1059 desc->limit = 0xf; 1081 desc->limit = 0xf;
1060 desc->avl = 0; 1082 desc->avl = 0;
1061 desc->l = 0; 1083 if (IS_ENABLED(CONFIG_X86_64)) {
1062 desc->d = SEG_OP_SIZE_32BIT; 1084 desc->l = 1;
1085 desc->d = 0;
1086 } else {
1087 desc->l = 0;
1088 desc->d = SEG_OP_SIZE_32BIT;
1089 }
1063 desc->g = SEG_GRANULARITY_4KB; 1090 desc->g = SEG_GRANULARITY_4KB;
1064 desc->base2 = 0x00; 1091 desc->base2 = 0x00;
1065
1066 desc++; 1092 desc++;
1093
1094 /* __KERNEL_DS */
1067 desc->limit0 = 0xffff; 1095 desc->limit0 = 0xffff;
1068 desc->base0 = 0x0000; 1096 desc->base0 = 0x0000;
1069 desc->base1 = 0x0000; 1097 desc->base1 = 0x0000;
@@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
1077 desc->d = SEG_OP_SIZE_32BIT; 1105 desc->d = SEG_OP_SIZE_32BIT;
1078 desc->g = SEG_GRANULARITY_4KB; 1106 desc->g = SEG_GRANULARITY_4KB;
1079 desc->base2 = 0x00; 1107 desc->base2 = 0x00;
1080
1081#ifdef CONFIG_X86_64
1082 /* Task segment value */
1083 desc++; 1108 desc++;
1084 desc->limit0 = 0x0000; 1109
1085 desc->base0 = 0x0000; 1110 if (IS_ENABLED(CONFIG_X86_64)) {
1086 desc->base1 = 0x0000; 1111 /* Task segment value */
1087 desc->type = SEG_TYPE_TSS; 1112 desc->limit0 = 0x0000;
1088 desc->s = 0; 1113 desc->base0 = 0x0000;
1089 desc->dpl = 0; 1114 desc->base1 = 0x0000;
1090 desc->p = 1; 1115 desc->type = SEG_TYPE_TSS;
1091 desc->limit = 0x0; 1116 desc->s = 0;
1092 desc->avl = 0; 1117 desc->dpl = 0;
1093 desc->l = 0; 1118 desc->p = 1;
1094 desc->d = 0; 1119 desc->limit = 0x0;
1095 desc->g = SEG_GRANULARITY_4KB; 1120 desc->avl = 0;
1096 desc->base2 = 0x00; 1121 desc->l = 0;
1097#endif /* CONFIG_X86_64 */ 1122 desc->d = 0;
1123 desc->g = SEG_GRANULARITY_4KB;
1124 desc->base2 = 0x00;
1125 desc++;
1126 }
1098 1127
1099 asm volatile("cli"); 1128 asm volatile("cli");
1100 asm volatile ("lgdt %0" : : "m" (*gdt)); 1129 asm volatile ("lgdt %0" : : "m" (*gdt));
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d2ae1f821e0c..fbf4c32d0b62 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -346,6 +346,48 @@ preferred_addr:
346 /* Set up the stack */ 346 /* Set up the stack */
347 leaq boot_stack_end(%rbx), %rsp 347 leaq boot_stack_end(%rbx), %rsp
348 348
349#ifdef CONFIG_X86_5LEVEL
350 /* Check if 5-level paging has already enabled */
351 movq %cr4, %rax
352 testl $X86_CR4_LA57, %eax
353 jnz lvl5
354
355 /*
356 * At this point we are in long mode with 4-level paging enabled,
357 * but we want to enable 5-level paging.
358 *
359 * The problem is that we cannot do it directly. Setting LA57 in
360 * long mode would trigger #GP. So we need to switch off long mode
361 * first.
362 *
363 * NOTE: This is not going to work if bootloader put us above 4G
364 * limit.
365 *
366 * The first step is go into compatibility mode.
367 */
368
369 /* Clear additional page table */
370 leaq lvl5_pgtable(%rbx), %rdi
371 xorq %rax, %rax
372 movq $(PAGE_SIZE/8), %rcx
373 rep stosq
374
375 /*
376 * Setup current CR3 as the first and only entry in a new top level
377 * page table.
378 */
379 movq %cr3, %rdi
380 leaq 0x7 (%rdi), %rax
381 movq %rax, lvl5_pgtable(%rbx)
382
383 /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
384 pushq $__KERNEL32_CS
385 leaq compatible_mode(%rip), %rax
386 pushq %rax
387 lretq
388lvl5:
389#endif
390
349 /* Zero EFLAGS */ 391 /* Zero EFLAGS */
350 pushq $0 392 pushq $0
351 popfq 393 popfq
@@ -429,6 +471,44 @@ relocated:
429 jmp *%rax 471 jmp *%rax
430 472
431 .code32 473 .code32
474#ifdef CONFIG_X86_5LEVEL
475compatible_mode:
476 /* Setup data and stack segments */
477 movl $__KERNEL_DS, %eax
478 movl %eax, %ds
479 movl %eax, %ss
480
481 /* Disable paging */
482 movl %cr0, %eax
483 btrl $X86_CR0_PG_BIT, %eax
484 movl %eax, %cr0
485
486 /* Point CR3 to 5-level paging */
487 leal lvl5_pgtable(%ebx), %eax
488 movl %eax, %cr3
489
490 /* Enable PAE and LA57 mode */
491 movl %cr4, %eax
492 orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
493 movl %eax, %cr4
494
495 /* Calculate address we are running at */
496 call 1f
4971: popl %edi
498 subl $1b, %edi
499
500 /* Prepare stack for far return to Long Mode */
501 pushl $__KERNEL_CS
502 leal lvl5(%edi), %eax
503 push %eax
504
505 /* Enable paging back */
506 movl $(X86_CR0_PG | X86_CR0_PE), %eax
507 movl %eax, %cr0
508
509 lret
510#endif
511
432no_longmode: 512no_longmode:
433 /* This isn't an x86-64 CPU so hang */ 513 /* This isn't an x86-64 CPU so hang */
4341: 5141:
@@ -442,7 +522,7 @@ gdt:
442 .word gdt_end - gdt 522 .word gdt_end - gdt
443 .long gdt 523 .long gdt
444 .word 0 524 .word 0
445 .quad 0x0000000000000000 /* NULL descriptor */ 525 .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
446 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 526 .quad 0x00af9a000000ffff /* __KERNEL_CS */
447 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 527 .quad 0x00cf92000000ffff /* __KERNEL_DS */
448 .quad 0x0080890000000000 /* TS descriptor */ 528 .quad 0x0080890000000000 /* TS descriptor */
@@ -486,3 +566,7 @@ boot_stack_end:
486 .balign 4096 566 .balign 4096
487pgtable: 567pgtable:
488 .fill BOOT_PGT_SIZE, 1, 0 568 .fill BOOT_PGT_SIZE, 1, 0
569#ifdef CONFIG_X86_5LEVEL
570lvl5_pgtable:
571 .fill PAGE_SIZE, 1, 0
572#endif
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c
index 1d78f1739087..28029be47fbb 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/pagetable.c
@@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context)
63static struct alloc_pgt_data pgt_data; 63static struct alloc_pgt_data pgt_data;
64 64
65/* The top level page table entry pointer. */ 65/* The top level page table entry pointer. */
66static unsigned long level4p; 66static unsigned long top_level_pgt;
67 67
68/* 68/*
69 * Mapping information structure passed to kernel_ident_mapping_init(). 69 * Mapping information structure passed to kernel_ident_mapping_init().
@@ -91,9 +91,15 @@ void initialize_identity_maps(void)
91 * If we came here via startup_32(), cr3 will be _pgtable already 91 * If we came here via startup_32(), cr3 will be _pgtable already
92 * and we must append to the existing area instead of entirely 92 * and we must append to the existing area instead of entirely
93 * overwriting it. 93 * overwriting it.
94 *
95 * With 5-level paging, we use '_pgtable' to allocate the p4d page table,
96 * the top-level page table is allocated separately.
97 *
98 * p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
99 * cases. On 4-level paging it's equal to 'top_level_pgt'.
94 */ 100 */
95 level4p = read_cr3(); 101 top_level_pgt = read_cr3_pa();
96 if (level4p == (unsigned long)_pgtable) { 102 if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
97 debug_putstr("booted via startup_32()\n"); 103 debug_putstr("booted via startup_32()\n");
98 pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE; 104 pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
99 pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE; 105 pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
@@ -103,7 +109,7 @@ void initialize_identity_maps(void)
103 pgt_data.pgt_buf = _pgtable; 109 pgt_data.pgt_buf = _pgtable;
104 pgt_data.pgt_buf_size = BOOT_PGT_SIZE; 110 pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
105 memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size); 111 memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
106 level4p = (unsigned long)alloc_pgt_page(&pgt_data); 112 top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
107 } 113 }
108} 114}
109 115
@@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
123 return; 129 return;
124 130
125 /* Build the mapping. */ 131 /* Build the mapping. */
126 kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p, 132 kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
127 start, end); 133 start, end);
128} 134}
129 135
@@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
134 */ 140 */
135void finalize_identity_maps(void) 141void finalize_identity_maps(void)
136{ 142{
137 write_cr3(level4p); 143 write_cr3(top_level_pgt);
138} 144}