aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 17:45:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-03 17:45:09 -0400
commit7a69f9c60b49699579f5bfb71f928cceba0afe1a (patch)
treebf3b5640bbd9f23beeb5a55d18348d65bafff8e8 /arch/x86/kernel
parent9bc088ab66be8978fbc981ba9644468fa2c2fd3f (diff)
parent8781fb7e9749da424e01daacd14834b674658c63 (diff)
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Ingo Molnar: "The main changes in this cycle were: - Continued work to add support for 5-level paging provided by future Intel CPUs. In particular we switch the x86 GUP code to the generic implementation. (Kirill A. Shutemov) - Continued work to add PCID CPU support to native kernels as well. In this round most of the focus is on reworking/refreshing the TLB flush infrastructure for the upcoming PCID changes. (Andy Lutomirski)" * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits) x86/mm: Delete a big outdated comment about TLB flushing x86/mm: Don't reenter flush_tlb_func_common() x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging x86/ftrace: Exclude functions in head64.c from function-tracing x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap x86/mm: Remove reset_lazy_tlbstate() x86/ldt: Simplify the LDT switching logic x86/boot/64: Put __startup_64() into .head.text x86/mm: Add support for 5-level paging for KASLR x86/mm: Make kernel_physical_mapping_init() support 5-level paging x86/mm: Add sync_global_pgds() for configuration with 5-level paging x86/boot/64: Add support of additional page table level during early boot x86/boot/64: Rename init_level4_pgt and early_level4_pgt x86/boot/64: Rewrite startup_64() in C x86/boot/compressed: Enable 5-level paging during decompression stage x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations x86/boot/efi: Cleanup initialization of GDT entries x86/asm: Fix comment in return_from_SYSCALL_64() x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/espfix_64.c2
-rw-r--r--arch/x86/kernel/head64.c145
-rw-r--r--arch/x86/kernel/head_64.S131
-rw-r--r--arch/x86/kernel/ldt.c56
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/step.c2
11 files changed, 198 insertions, 150 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3c7c419c4e3e..a01892bdd61a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -18,6 +18,7 @@ CFLAGS_REMOVE_pvclock.o = -pg
18CFLAGS_REMOVE_kvmclock.o = -pg 18CFLAGS_REMOVE_kvmclock.o = -pg
19CFLAGS_REMOVE_ftrace.o = -pg 19CFLAGS_REMOVE_ftrace.o = -pg
20CFLAGS_REMOVE_early_printk.o = -pg 20CFLAGS_REMOVE_early_printk.o = -pg
21CFLAGS_REMOVE_head64.o = -pg
21endif 22endif
22 23
23KASAN_SANITIZE_head$(BITS).o := n 24KASAN_SANITIZE_head$(BITS).o := n
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 8e598a1ad986..6b91e2eb8d3f 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -125,7 +125,7 @@ void __init init_espfix_bsp(void)
125 p4d_t *p4d; 125 p4d_t *p4d;
126 126
127 /* Install the espfix pud into the kernel page directory */ 127 /* Install the espfix pud into the kernel page directory */
128 pgd = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; 128 pgd = &init_top_pgt[pgd_index(ESPFIX_BASE_ADDR)];
129 p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR); 129 p4d = p4d_alloc(&init_mm, pgd, ESPFIX_BASE_ADDR);
130 p4d_populate(&init_mm, p4d, espfix_pud_page); 130 p4d_populate(&init_mm, p4d, espfix_pud_page);
131 131
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 43b7002f44fb..46c3c73e7f43 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -33,17 +33,120 @@
33/* 33/*
34 * Manage page tables very early on. 34 * Manage page tables very early on.
35 */ 35 */
36extern pgd_t early_level4_pgt[PTRS_PER_PGD]; 36extern pgd_t early_top_pgt[PTRS_PER_PGD];
37extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; 37extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
38static unsigned int __initdata next_early_pgt = 2; 38static unsigned int __initdata next_early_pgt;
39pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); 39pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
40 40
41#define __head __section(.head.text)
42
43static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
44{
45 return ptr - (void *)_text + (void *)physaddr;
46}
47
48void __head __startup_64(unsigned long physaddr)
49{
50 unsigned long load_delta, *p;
51 pgdval_t *pgd;
52 p4dval_t *p4d;
53 pudval_t *pud;
54 pmdval_t *pmd, pmd_entry;
55 int i;
56
57 /* Is the address too large? */
58 if (physaddr >> MAX_PHYSMEM_BITS)
59 for (;;);
60
61 /*
62 * Compute the delta between the address I am compiled to run at
63 * and the address I am actually running at.
64 */
65 load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
66
67 /* Is the address not 2M aligned? */
68 if (load_delta & ~PMD_PAGE_MASK)
69 for (;;);
70
71 /* Fixup the physical addresses in the page table */
72
73 pgd = fixup_pointer(&early_top_pgt, physaddr);
74 pgd[pgd_index(__START_KERNEL_map)] += load_delta;
75
76 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
77 p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
78 p4d[511] += load_delta;
79 }
80
81 pud = fixup_pointer(&level3_kernel_pgt, physaddr);
82 pud[510] += load_delta;
83 pud[511] += load_delta;
84
85 pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
86 pmd[506] += load_delta;
87
88 /*
89 * Set up the identity mapping for the switchover. These
90 * entries should *NOT* have the global bit set! This also
91 * creates a bunch of nonsense entries but that is fine --
92 * it avoids problems around wraparound.
93 */
94
95 pud = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
96 pmd = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
97
98 if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
99 p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
100
101 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
102 pgd[i + 0] = (pgdval_t)p4d + _KERNPG_TABLE;
103 pgd[i + 1] = (pgdval_t)p4d + _KERNPG_TABLE;
104
105 i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D;
106 p4d[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
107 p4d[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
108 } else {
109 i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
110 pgd[i + 0] = (pgdval_t)pud + _KERNPG_TABLE;
111 pgd[i + 1] = (pgdval_t)pud + _KERNPG_TABLE;
112 }
113
114 i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD;
115 pud[i + 0] = (pudval_t)pmd + _KERNPG_TABLE;
116 pud[i + 1] = (pudval_t)pmd + _KERNPG_TABLE;
117
118 pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
119 pmd_entry += physaddr;
120
121 for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) {
122 int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD;
123 pmd[idx] = pmd_entry + i * PMD_SIZE;
124 }
125
126 /*
127 * Fixup the kernel text+data virtual addresses. Note that
128 * we might write invalid pmds, when the kernel is relocated
129 * cleanup_highmap() fixes this up along with the mappings
130 * beyond _end.
131 */
132
133 pmd = fixup_pointer(level2_kernel_pgt, physaddr);
134 for (i = 0; i < PTRS_PER_PMD; i++) {
135 if (pmd[i] & _PAGE_PRESENT)
136 pmd[i] += load_delta;
137 }
138
139 /* Fixup phys_base */
140 p = fixup_pointer(&phys_base, physaddr);
141 *p += load_delta;
142}
143
41/* Wipe all early page tables except for the kernel symbol map */ 144/* Wipe all early page tables except for the kernel symbol map */
42static void __init reset_early_page_tables(void) 145static void __init reset_early_page_tables(void)
43{ 146{
44 memset(early_level4_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1)); 147 memset(early_top_pgt, 0, sizeof(pgd_t)*(PTRS_PER_PGD-1));
45 next_early_pgt = 0; 148 next_early_pgt = 0;
46 write_cr3(__pa_nodebug(early_level4_pgt)); 149 write_cr3(__pa_nodebug(early_top_pgt));
47} 150}
48 151
49/* Create a new PMD entry */ 152/* Create a new PMD entry */
@@ -51,15 +154,16 @@ int __init early_make_pgtable(unsigned long address)
51{ 154{
52 unsigned long physaddr = address - __PAGE_OFFSET; 155 unsigned long physaddr = address - __PAGE_OFFSET;
53 pgdval_t pgd, *pgd_p; 156 pgdval_t pgd, *pgd_p;
157 p4dval_t p4d, *p4d_p;
54 pudval_t pud, *pud_p; 158 pudval_t pud, *pud_p;
55 pmdval_t pmd, *pmd_p; 159 pmdval_t pmd, *pmd_p;
56 160
57 /* Invalid address or early pgt is done ? */ 161 /* Invalid address or early pgt is done ? */
58 if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt)) 162 if (physaddr >= MAXMEM || read_cr3_pa() != __pa_nodebug(early_top_pgt))
59 return -1; 163 return -1;
60 164
61again: 165again:
62 pgd_p = &early_level4_pgt[pgd_index(address)].pgd; 166 pgd_p = &early_top_pgt[pgd_index(address)].pgd;
63 pgd = *pgd_p; 167 pgd = *pgd_p;
64 168
65 /* 169 /*
@@ -67,8 +171,25 @@ again:
67 * critical -- __PAGE_OFFSET would point us back into the dynamic 171 * critical -- __PAGE_OFFSET would point us back into the dynamic
68 * range and we might end up looping forever... 172 * range and we might end up looping forever...
69 */ 173 */
70 if (pgd) 174 if (!IS_ENABLED(CONFIG_X86_5LEVEL))
71 pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); 175 p4d_p = pgd_p;
176 else if (pgd)
177 p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
178 else {
179 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
180 reset_early_page_tables();
181 goto again;
182 }
183
184 p4d_p = (p4dval_t *)early_dynamic_pgts[next_early_pgt++];
185 memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
186 *pgd_p = (pgdval_t)p4d_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
187 }
188 p4d_p += p4d_index(address);
189 p4d = *p4d_p;
190
191 if (p4d)
192 pud_p = (pudval_t *)((p4d & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
72 else { 193 else {
73 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { 194 if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) {
74 reset_early_page_tables(); 195 reset_early_page_tables();
@@ -77,7 +198,7 @@ again:
77 198
78 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; 199 pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++];
79 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); 200 memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
80 *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; 201 *p4d_p = (p4dval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE;
81 } 202 }
82 pud_p += pud_index(address); 203 pud_p += pud_index(address);
83 pud = *pud_p; 204 pud = *pud_p;
@@ -156,7 +277,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
156 277
157 clear_bss(); 278 clear_bss();
158 279
159 clear_page(init_level4_pgt); 280 clear_page(init_top_pgt);
160 281
161 kasan_early_init(); 282 kasan_early_init();
162 283
@@ -171,8 +292,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
171 */ 292 */
172 load_ucode_bsp(); 293 load_ucode_bsp();
173 294
174 /* set init_level4_pgt kernel high mapping*/ 295 /* set init_top_pgt kernel high mapping*/
175 init_level4_pgt[511] = early_level4_pgt[511]; 296 init_top_pgt[511] = early_top_pgt[511];
176 297
177 x86_64_start_reservations(real_mode_data); 298 x86_64_start_reservations(real_mode_data);
178} 299}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ac9d327d2e42..6225550883df 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -37,10 +37,11 @@
37 * 37 *
38 */ 38 */
39 39
40#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
40#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 41#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
41 42
42L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) 43PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
43L4_START_KERNEL = pgd_index(__START_KERNEL_map) 44PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
44L3_START_KERNEL = pud_index(__START_KERNEL_map) 45L3_START_KERNEL = pud_index(__START_KERNEL_map)
45 46
46 .text 47 .text
@@ -72,101 +73,12 @@ startup_64:
72 /* Sanitize CPU configuration */ 73 /* Sanitize CPU configuration */
73 call verify_cpu 74 call verify_cpu
74 75
75 /*
76 * Compute the delta between the address I am compiled to run at and the
77 * address I am actually running at.
78 */
79 leaq _text(%rip), %rbp
80 subq $_text - __START_KERNEL_map, %rbp
81
82 /* Is the address not 2M aligned? */
83 testl $~PMD_PAGE_MASK, %ebp
84 jnz bad_address
85
86 /*
87 * Is the address too large?
88 */
89 leaq _text(%rip), %rax
90 shrq $MAX_PHYSMEM_BITS, %rax
91 jnz bad_address
92
93 /*
94 * Fixup the physical addresses in the page table
95 */
96 addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip)
97
98 addq %rbp, level3_kernel_pgt + (510*8)(%rip)
99 addq %rbp, level3_kernel_pgt + (511*8)(%rip)
100
101 addq %rbp, level2_fixmap_pgt + (506*8)(%rip)
102
103 /*
104 * Set up the identity mapping for the switchover. These
105 * entries should *NOT* have the global bit set! This also
106 * creates a bunch of nonsense entries but that is fine --
107 * it avoids problems around wraparound.
108 */
109 leaq _text(%rip), %rdi 76 leaq _text(%rip), %rdi
110 leaq early_level4_pgt(%rip), %rbx 77 pushq %rsi
111 78 call __startup_64
112 movq %rdi, %rax 79 popq %rsi
113 shrq $PGDIR_SHIFT, %rax
114
115 leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
116 movq %rdx, 0(%rbx,%rax,8)
117 movq %rdx, 8(%rbx,%rax,8)
118
119 addq $PAGE_SIZE, %rdx
120 movq %rdi, %rax
121 shrq $PUD_SHIFT, %rax
122 andl $(PTRS_PER_PUD-1), %eax
123 movq %rdx, PAGE_SIZE(%rbx,%rax,8)
124 incl %eax
125 andl $(PTRS_PER_PUD-1), %eax
126 movq %rdx, PAGE_SIZE(%rbx,%rax,8)
127
128 addq $PAGE_SIZE * 2, %rbx
129 movq %rdi, %rax
130 shrq $PMD_SHIFT, %rdi
131 addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
132 leaq (_end - 1)(%rip), %rcx
133 shrq $PMD_SHIFT, %rcx
134 subq %rdi, %rcx
135 incl %ecx
136
1371:
138 andq $(PTRS_PER_PMD - 1), %rdi
139 movq %rax, (%rbx,%rdi,8)
140 incq %rdi
141 addq $PMD_SIZE, %rax
142 decl %ecx
143 jnz 1b
144
145 test %rbp, %rbp
146 jz .Lskip_fixup
147 80
148 /* 81 movq $(early_top_pgt - __START_KERNEL_map), %rax
149 * Fixup the kernel text+data virtual addresses. Note that
150 * we might write invalid pmds, when the kernel is relocated
151 * cleanup_highmap() fixes this up along with the mappings
152 * beyond _end.
153 */
154 leaq level2_kernel_pgt(%rip), %rdi
155 leaq PAGE_SIZE(%rdi), %r8
156 /* See if it is a valid page table entry */
1571: testb $_PAGE_PRESENT, 0(%rdi)
158 jz 2f
159 addq %rbp, 0(%rdi)
160 /* Go to the next page */
1612: addq $8, %rdi
162 cmp %r8, %rdi
163 jne 1b
164
165 /* Fixup phys_base */
166 addq %rbp, phys_base(%rip)
167
168.Lskip_fixup:
169 movq $(early_level4_pgt - __START_KERNEL_map), %rax
170 jmp 1f 82 jmp 1f
171ENTRY(secondary_startup_64) 83ENTRY(secondary_startup_64)
172 /* 84 /*
@@ -186,14 +98,17 @@ ENTRY(secondary_startup_64)
186 /* Sanitize CPU configuration */ 98 /* Sanitize CPU configuration */
187 call verify_cpu 99 call verify_cpu
188 100
189 movq $(init_level4_pgt - __START_KERNEL_map), %rax 101 movq $(init_top_pgt - __START_KERNEL_map), %rax
1901: 1021:
191 103
192 /* Enable PAE mode and PGE */ 104 /* Enable PAE mode, PGE and LA57 */
193 movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx 105 movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
106#ifdef CONFIG_X86_5LEVEL
107 orl $X86_CR4_LA57, %ecx
108#endif
194 movq %rcx, %cr4 109 movq %rcx, %cr4
195 110
196 /* Setup early boot stage 4 level pagetables. */ 111 /* Setup early boot stage 4-/5-level pagetables. */
197 addq phys_base(%rip), %rax 112 addq phys_base(%rip), %rax
198 movq %rax, %cr3 113 movq %rax, %cr3
199 114
@@ -417,9 +332,13 @@ GLOBAL(name)
417 .endr 332 .endr
418 333
419 __INITDATA 334 __INITDATA
420NEXT_PAGE(early_level4_pgt) 335NEXT_PAGE(early_top_pgt)
421 .fill 511,8,0 336 .fill 511,8,0
337#ifdef CONFIG_X86_5LEVEL
338 .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
339#else
422 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE 340 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
341#endif
423 342
424NEXT_PAGE(early_dynamic_pgts) 343NEXT_PAGE(early_dynamic_pgts)
425 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 344 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -427,14 +346,14 @@ NEXT_PAGE(early_dynamic_pgts)
427 .data 346 .data
428 347
429#ifndef CONFIG_XEN 348#ifndef CONFIG_XEN
430NEXT_PAGE(init_level4_pgt) 349NEXT_PAGE(init_top_pgt)
431 .fill 512,8,0 350 .fill 512,8,0
432#else 351#else
433NEXT_PAGE(init_level4_pgt) 352NEXT_PAGE(init_top_pgt)
434 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 353 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
435 .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 354 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
436 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE 355 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
437 .org init_level4_pgt + L4_START_KERNEL*8, 0 356 .org init_top_pgt + PGD_START_KERNEL*8, 0
438 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ 357 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
439 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE 358 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
440 359
@@ -448,6 +367,12 @@ NEXT_PAGE(level2_ident_pgt)
448 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) 367 PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
449#endif 368#endif
450 369
370#ifdef CONFIG_X86_5LEVEL
371NEXT_PAGE(level4_kernel_pgt)
372 .fill 511,8,0
373 .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
374#endif
375
451NEXT_PAGE(level3_kernel_pgt) 376NEXT_PAGE(level3_kernel_pgt)
452 .fill L3_START_KERNEL,8,0 377 .fill L3_START_KERNEL,8,0
453 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ 378 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index d4a15831ac58..a870910c8565 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -22,24 +22,25 @@
22#include <asm/syscalls.h> 22#include <asm/syscalls.h>
23 23
24/* context.lock is held for us, so we don't need any locking. */ 24/* context.lock is held for us, so we don't need any locking. */
25static void flush_ldt(void *current_mm) 25static void flush_ldt(void *__mm)
26{ 26{
27 struct mm_struct *mm = __mm;
27 mm_context_t *pc; 28 mm_context_t *pc;
28 29
29 if (current->active_mm != current_mm) 30 if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
30 return; 31 return;
31 32
32 pc = &current->active_mm->context; 33 pc = &mm->context;
33 set_ldt(pc->ldt->entries, pc->ldt->size); 34 set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
34} 35}
35 36
36/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */ 37/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
37static struct ldt_struct *alloc_ldt_struct(unsigned int size) 38static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
38{ 39{
39 struct ldt_struct *new_ldt; 40 struct ldt_struct *new_ldt;
40 unsigned int alloc_size; 41 unsigned int alloc_size;
41 42
42 if (size > LDT_ENTRIES) 43 if (num_entries > LDT_ENTRIES)
43 return NULL; 44 return NULL;
44 45
45 new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL); 46 new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
@@ -47,7 +48,7 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
47 return NULL; 48 return NULL;
48 49
49 BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct)); 50 BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
50 alloc_size = size * LDT_ENTRY_SIZE; 51 alloc_size = num_entries * LDT_ENTRY_SIZE;
51 52
52 /* 53 /*
53 * Xen is very picky: it requires a page-aligned LDT that has no 54 * Xen is very picky: it requires a page-aligned LDT that has no
@@ -65,14 +66,14 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int size)
65 return NULL; 66 return NULL;
66 } 67 }
67 68
68 new_ldt->size = size; 69 new_ldt->nr_entries = num_entries;
69 return new_ldt; 70 return new_ldt;
70} 71}
71 72
72/* After calling this, the LDT is immutable. */ 73/* After calling this, the LDT is immutable. */
73static void finalize_ldt_struct(struct ldt_struct *ldt) 74static void finalize_ldt_struct(struct ldt_struct *ldt)
74{ 75{
75 paravirt_alloc_ldt(ldt->entries, ldt->size); 76 paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
76} 77}
77 78
78/* context.lock is held */ 79/* context.lock is held */
@@ -91,8 +92,8 @@ static void free_ldt_struct(struct ldt_struct *ldt)
91 if (likely(!ldt)) 92 if (likely(!ldt))
92 return; 93 return;
93 94
94 paravirt_free_ldt(ldt->entries, ldt->size); 95 paravirt_free_ldt(ldt->entries, ldt->nr_entries);
95 if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) 96 if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
96 vfree_atomic(ldt->entries); 97 vfree_atomic(ldt->entries);
97 else 98 else
98 free_page((unsigned long)ldt->entries); 99 free_page((unsigned long)ldt->entries);
@@ -122,14 +123,14 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
122 goto out_unlock; 123 goto out_unlock;
123 } 124 }
124 125
125 new_ldt = alloc_ldt_struct(old_mm->context.ldt->size); 126 new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
126 if (!new_ldt) { 127 if (!new_ldt) {
127 retval = -ENOMEM; 128 retval = -ENOMEM;
128 goto out_unlock; 129 goto out_unlock;
129 } 130 }
130 131
131 memcpy(new_ldt->entries, old_mm->context.ldt->entries, 132 memcpy(new_ldt->entries, old_mm->context.ldt->entries,
132 new_ldt->size * LDT_ENTRY_SIZE); 133 new_ldt->nr_entries * LDT_ENTRY_SIZE);
133 finalize_ldt_struct(new_ldt); 134 finalize_ldt_struct(new_ldt);
134 135
135 mm->context.ldt = new_ldt; 136 mm->context.ldt = new_ldt;
@@ -152,9 +153,9 @@ void destroy_context_ldt(struct mm_struct *mm)
152 153
153static int read_ldt(void __user *ptr, unsigned long bytecount) 154static int read_ldt(void __user *ptr, unsigned long bytecount)
154{ 155{
155 int retval;
156 unsigned long size;
157 struct mm_struct *mm = current->mm; 156 struct mm_struct *mm = current->mm;
157 unsigned long entries_size;
158 int retval;
158 159
159 mutex_lock(&mm->context.lock); 160 mutex_lock(&mm->context.lock);
160 161
@@ -166,18 +167,18 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
166 if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) 167 if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
167 bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; 168 bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
168 169
169 size = mm->context.ldt->size * LDT_ENTRY_SIZE; 170 entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
170 if (size > bytecount) 171 if (entries_size > bytecount)
171 size = bytecount; 172 entries_size = bytecount;
172 173
173 if (copy_to_user(ptr, mm->context.ldt->entries, size)) { 174 if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
174 retval = -EFAULT; 175 retval = -EFAULT;
175 goto out_unlock; 176 goto out_unlock;
176 } 177 }
177 178
178 if (size != bytecount) { 179 if (entries_size != bytecount) {
179 /* Zero-fill the rest and pretend we read bytecount bytes. */ 180 /* Zero-fill the rest and pretend we read bytecount bytes. */
180 if (clear_user(ptr + size, bytecount - size)) { 181 if (clear_user(ptr + entries_size, bytecount - entries_size)) {
181 retval = -EFAULT; 182 retval = -EFAULT;
182 goto out_unlock; 183 goto out_unlock;
183 } 184 }
@@ -208,7 +209,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
208{ 209{
209 struct mm_struct *mm = current->mm; 210 struct mm_struct *mm = current->mm;
210 struct ldt_struct *new_ldt, *old_ldt; 211 struct ldt_struct *new_ldt, *old_ldt;
211 unsigned int oldsize, newsize; 212 unsigned int old_nr_entries, new_nr_entries;
212 struct user_desc ldt_info; 213 struct user_desc ldt_info;
213 struct desc_struct ldt; 214 struct desc_struct ldt;
214 int error; 215 int error;
@@ -247,17 +248,18 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
247 248
248 mutex_lock(&mm->context.lock); 249 mutex_lock(&mm->context.lock);
249 250
250 old_ldt = mm->context.ldt; 251 old_ldt = mm->context.ldt;
251 oldsize = old_ldt ? old_ldt->size : 0; 252 old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
252 newsize = max(ldt_info.entry_number + 1, oldsize); 253 new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
253 254
254 error = -ENOMEM; 255 error = -ENOMEM;
255 new_ldt = alloc_ldt_struct(newsize); 256 new_ldt = alloc_ldt_struct(new_nr_entries);
256 if (!new_ldt) 257 if (!new_ldt)
257 goto out_unlock; 258 goto out_unlock;
258 259
259 if (old_ldt) 260 if (old_ldt)
260 memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE); 261 memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
262
261 new_ldt->entries[ldt_info.entry_number] = ldt; 263 new_ldt->entries[ldt_info.entry_number] = ldt;
262 finalize_ldt_struct(new_ldt); 264 finalize_ldt_struct(new_ldt);
263 265
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6f5ca4ebe6e5..cb0a30473c23 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -347,7 +347,7 @@ void machine_kexec(struct kimage *image)
347void arch_crash_save_vmcoreinfo(void) 347void arch_crash_save_vmcoreinfo(void)
348{ 348{
349 VMCOREINFO_NUMBER(phys_base); 349 VMCOREINFO_NUMBER(phys_base);
350 VMCOREINFO_SYMBOL(init_level4_pgt); 350 VMCOREINFO_SYMBOL(init_top_pgt);
351 351
352#ifdef CONFIG_NUMA 352#ifdef CONFIG_NUMA
353 VMCOREINFO_SYMBOL(node_data); 353 VMCOREINFO_SYMBOL(node_data);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3586996fc50d..bc0a849589bb 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -391,7 +391,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
391 391
392 .read_cr2 = native_read_cr2, 392 .read_cr2 = native_read_cr2,
393 .write_cr2 = native_write_cr2, 393 .write_cr2 = native_write_cr2,
394 .read_cr3 = native_read_cr3, 394 .read_cr3 = __native_read_cr3,
395 .write_cr3 = native_write_cr3, 395 .write_cr3 = native_write_cr3,
396 396
397 .flush_tlb_user = native_flush_tlb, 397 .flush_tlb_user = native_flush_tlb,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index ffeae818aa7a..c6d6dc5f8bb2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -92,7 +92,7 @@ void __show_regs(struct pt_regs *regs, int all)
92 92
93 cr0 = read_cr0(); 93 cr0 = read_cr0();
94 cr2 = read_cr2(); 94 cr2 = read_cr2();
95 cr3 = read_cr3(); 95 cr3 = __read_cr3();
96 cr4 = __read_cr4(); 96 cr4 = __read_cr4();
97 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 97 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
98 cr0, cr2, cr3, cr4); 98 cr0, cr2, cr3, cr4);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b6840bf3940b..c3169be4c596 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -104,7 +104,7 @@ void __show_regs(struct pt_regs *regs, int all)
104 104
105 cr0 = read_cr0(); 105 cr0 = read_cr0();
106 cr2 = read_cr2(); 106 cr2 = read_cr2();
107 cr3 = read_cr3(); 107 cr3 = __read_cr3();
108 cr4 = __read_cr4(); 108 cr4 = __read_cr4();
109 109
110 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 110 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
@@ -142,7 +142,7 @@ void release_thread(struct task_struct *dead_task)
142 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", 142 pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
143 dead_task->comm, 143 dead_task->comm,
144 dead_task->mm->context.ldt->entries, 144 dead_task->mm->context.ldt->entries,
145 dead_task->mm->context.ldt->size); 145 dead_task->mm->context.ldt->nr_entries);
146 BUG(); 146 BUG();
147 } 147 }
148#endif 148#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 045e4f993bd2..b474c8de7fba 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1589,7 +1589,6 @@ void native_cpu_die(unsigned int cpu)
1589void play_dead_common(void) 1589void play_dead_common(void)
1590{ 1590{
1591 idle_task_exit(); 1591 idle_task_exit();
1592 reset_lazy_tlbstate();
1593 1592
1594 /* Ack it */ 1593 /* Ack it */
1595 (void)cpu_report_death(); 1594 (void)cpu_report_death();
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f07f83b3611b..5f25cfbd952e 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -34,7 +34,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
34 34
35 mutex_lock(&child->mm->context.lock); 35 mutex_lock(&child->mm->context.lock);
36 if (unlikely(!child->mm->context.ldt || 36 if (unlikely(!child->mm->context.ldt ||
37 seg >= child->mm->context.ldt->size)) 37 seg >= child->mm->context.ldt->nr_entries))
38 addr = -1L; /* bogus selector, access would fault */ 38 addr = -1L; /* bogus selector, access would fault */
39 else { 39 else {
40 desc = &child->mm->context.ldt->entries[seg]; 40 desc = &child->mm->context.ldt->entries[seg];