aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h2
-rw-r--r--arch/x86/include/asm/setup.h3
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/entry_64.S73
-rw-r--r--arch/x86/kernel/espfix_64.c208
-rw-r--r--arch/x86/kernel/ldt.c11
-rw-r--r--arch/x86/kernel/smpboot.c7
-rw-r--r--arch/x86/mm/dump_pagetables.c44
-rw-r--r--init/main.c4
10 files changed, 329 insertions, 26 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index c584a51add15..afe68ddbe6a4 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
12ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole 12ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) 13ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
14... unused hole ... 14... unused hole ...
15ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
16... unused hole ...
15ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 17ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
16ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space 18ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
17ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls 19ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index c883bf726398..7166e25ecb57 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t;
61#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) 61#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
62#define MODULES_END _AC(0xffffffffff000000, UL) 62#define MODULES_END _AC(0xffffffffff000000, UL)
63#define MODULES_LEN (MODULES_END - MODULES_VADDR) 63#define MODULES_LEN (MODULES_END - MODULES_VADDR)
64#define ESPFIX_PGD_ENTRY _AC(-2, UL)
65#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
64 66
65#define EARLY_DYNAMIC_PAGE_TABLES 64 67#define EARLY_DYNAMIC_PAGE_TABLES 64
66 68
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 9264f04a4c55..9e3be3329a7e 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -57,6 +57,9 @@ extern void x86_ce4100_early_setup(void);
57static inline void x86_ce4100_early_setup(void) { } 57static inline void x86_ce4100_early_setup(void) { }
58#endif 58#endif
59 59
60extern void init_espfix_bsp(void);
61extern void init_espfix_ap(void);
62
60#ifndef _SETUP 63#ifndef _SETUP
61 64
62/* 65/*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f4d96000d33a..1cc3789d99d9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29obj-y += syscall_$(BITS).o vsyscall_gtod.o 29obj-y += syscall_$(BITS).o vsyscall_gtod.o
30obj-$(CONFIG_X86_64) += vsyscall_64.o 30obj-$(CONFIG_X86_64) += vsyscall_64.o
31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
32obj-$(CONFIG_X86_64) += espfix_64.o
32obj-$(CONFIG_SYSFS) += ksysfs.o 33obj-$(CONFIG_SYSFS) += ksysfs.o
33obj-y += bootflag.o e820.o 34obj-y += bootflag.o e820.o
34obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 35obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..bffaa986cafc 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -58,6 +58,7 @@
58#include <asm/asm.h> 58#include <asm/asm.h>
59#include <asm/context_tracking.h> 59#include <asm/context_tracking.h>
60#include <asm/smap.h> 60#include <asm/smap.h>
61#include <asm/pgtable_types.h>
61#include <linux/err.h> 62#include <linux/err.h>
62 63
63/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 64/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
@@ -1040,8 +1041,16 @@ restore_args:
1040 RESTORE_ARGS 1,8,1 1041 RESTORE_ARGS 1,8,1
1041 1042
1042irq_return: 1043irq_return:
1044 /*
1045 * Are we returning to a stack segment from the LDT? Note: in
1046 * 64-bit mode SS:RSP on the exception stack is always valid.
1047 */
1048 testb $4,(SS-RIP)(%rsp)
1049 jnz irq_return_ldt
1050
1051irq_return_iret:
1043 INTERRUPT_RETURN 1052 INTERRUPT_RETURN
1044 _ASM_EXTABLE(irq_return, bad_iret) 1053 _ASM_EXTABLE(irq_return_iret, bad_iret)
1045 1054
1046#ifdef CONFIG_PARAVIRT 1055#ifdef CONFIG_PARAVIRT
1047ENTRY(native_iret) 1056ENTRY(native_iret)
@@ -1049,6 +1058,30 @@ ENTRY(native_iret)
1049 _ASM_EXTABLE(native_iret, bad_iret) 1058 _ASM_EXTABLE(native_iret, bad_iret)
1050#endif 1059#endif
1051 1060
1061irq_return_ldt:
1062 pushq_cfi %rax
1063 pushq_cfi %rdi
1064 SWAPGS
1065 movq PER_CPU_VAR(espfix_waddr),%rdi
1066 movq %rax,(0*8)(%rdi) /* RAX */
1067 movq (2*8)(%rsp),%rax /* RIP */
1068 movq %rax,(1*8)(%rdi)
1069 movq (3*8)(%rsp),%rax /* CS */
1070 movq %rax,(2*8)(%rdi)
1071 movq (4*8)(%rsp),%rax /* RFLAGS */
1072 movq %rax,(3*8)(%rdi)
1073 movq (6*8)(%rsp),%rax /* SS */
1074 movq %rax,(5*8)(%rdi)
1075 movq (5*8)(%rsp),%rax /* RSP */
1076 movq %rax,(4*8)(%rdi)
1077 andl $0xffff0000,%eax
1078 popq_cfi %rdi
1079 orq PER_CPU_VAR(espfix_stack),%rax
1080 SWAPGS
1081 movq %rax,%rsp
1082 popq_cfi %rax
1083 jmp irq_return_iret
1084
1052 .section .fixup,"ax" 1085 .section .fixup,"ax"
1053bad_iret: 1086bad_iret:
1054 /* 1087 /*
@@ -1110,9 +1143,41 @@ ENTRY(retint_kernel)
1110 call preempt_schedule_irq 1143 call preempt_schedule_irq
1111 jmp exit_intr 1144 jmp exit_intr
1112#endif 1145#endif
1113
1114 CFI_ENDPROC 1146 CFI_ENDPROC
1115END(common_interrupt) 1147END(common_interrupt)
1148
1149 /*
1150 * If IRET takes a fault on the espfix stack, then we
1151 * end up promoting it to a doublefault. In that case,
1152 * modify the stack to make it look like we just entered
1153 * the #GP handler from user space, similar to bad_iret.
1154 */
1155 ALIGN
1156__do_double_fault:
1157 XCPT_FRAME 1 RDI+8
1158 movq RSP(%rdi),%rax /* Trap on the espfix stack? */
1159 sarq $PGDIR_SHIFT,%rax
1160 cmpl $ESPFIX_PGD_ENTRY,%eax
1161 jne do_double_fault /* No, just deliver the fault */
1162 cmpl $__KERNEL_CS,CS(%rdi)
1163 jne do_double_fault
1164 movq RIP(%rdi),%rax
1165 cmpq $irq_return_iret,%rax
1166#ifdef CONFIG_PARAVIRT
1167 je 1f
1168 cmpq $native_iret,%rax
1169#endif
1170 jne do_double_fault /* This shouldn't happen... */
11711:
1172 movq PER_CPU_VAR(kernel_stack),%rax
1173 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
1174 movq %rax,RSP(%rdi)
1175 movq $0,(%rax) /* Missing (lost) #GP error code */
1176 movq $general_protection,RIP(%rdi)
1177 retq
1178 CFI_ENDPROC
1179END(__do_double_fault)
1180
1116/* 1181/*
1117 * End of kprobes section 1182 * End of kprobes section
1118 */ 1183 */
@@ -1314,7 +1379,7 @@ zeroentry overflow do_overflow
1314zeroentry bounds do_bounds 1379zeroentry bounds do_bounds
1315zeroentry invalid_op do_invalid_op 1380zeroentry invalid_op do_invalid_op
1316zeroentry device_not_available do_device_not_available 1381zeroentry device_not_available do_device_not_available
1317paranoiderrorentry double_fault do_double_fault 1382paranoiderrorentry double_fault __do_double_fault
1318zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun 1383zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
1319errorentry invalid_TSS do_invalid_TSS 1384errorentry invalid_TSS do_invalid_TSS
1320errorentry segment_not_present do_segment_not_present 1385errorentry segment_not_present do_segment_not_present
@@ -1601,7 +1666,7 @@ error_sti:
1601 */ 1666 */
1602error_kernelspace: 1667error_kernelspace:
1603 incl %ebx 1668 incl %ebx
1604 leaq irq_return(%rip),%rcx 1669 leaq irq_return_iret(%rip),%rcx
1605 cmpq %rcx,RIP+8(%rsp) 1670 cmpq %rcx,RIP+8(%rsp)
1606 je error_swapgs 1671 je error_swapgs
1607 movl %ecx,%eax /* zero extend */ 1672 movl %ecx,%eax /* zero extend */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
new file mode 100644
index 000000000000..8a64da36310f
--- /dev/null
+++ b/arch/x86/kernel/espfix_64.c
@@ -0,0 +1,208 @@
1/* ----------------------------------------------------------------------- *
2 *
3 * Copyright 2014 Intel Corporation; author: H. Peter Anvin
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * ----------------------------------------------------------------------- */
15
16/*
17 * The IRET instruction, when returning to a 16-bit segment, only
18 * restores the bottom 16 bits of the user space stack pointer. This
19 * causes some 16-bit software to break, but it also leaks kernel state
20 * to user space.
21 *
22 * This works around this by creating percpu "ministacks", each of which
23 * is mapped 2^16 times 64K apart. When we detect that the return SS is
24 * on the LDT, we copy the IRET frame to the ministack and use the
25 * relevant alias to return to userspace. The ministacks are mapped
26 * readonly, so if the IRET fault we promote #GP to #DF which is an IST
27 * vector and thus has its own stack; we then do the fixup in the #DF
28 * handler.
29 *
30 * This file sets up the ministacks and the related page tables. The
31 * actual ministack invocation is in entry_64.S.
32 */
33
34#include <linux/init.h>
35#include <linux/init_task.h>
36#include <linux/kernel.h>
37#include <linux/percpu.h>
38#include <linux/gfp.h>
39#include <linux/random.h>
40#include <asm/pgtable.h>
41#include <asm/pgalloc.h>
42#include <asm/setup.h>
43
44/*
45 * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
46 * it up to a cache line to avoid unnecessary sharing.
47 */
48#define ESPFIX_STACK_SIZE (8*8UL)
49#define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE)
50
51/* There is address space for how many espfix pages? */
52#define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16))
53
54#define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE)
55#if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS
56# error "Need more than one PGD for the ESPFIX hack"
57#endif
58
59#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
60
61/* This contains the *bottom* address of the espfix stack */
62DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
63DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
64
65/* Initialization mutex - should this be a spinlock? */
66static DEFINE_MUTEX(espfix_init_mutex);
67
68/* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */
69#define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE)
70static void *espfix_pages[ESPFIX_MAX_PAGES];
71
72static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD]
73 __aligned(PAGE_SIZE);
74
75static unsigned int page_random, slot_random;
76
77/*
78 * This returns the bottom address of the espfix stack for a specific CPU.
79 * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case
80 * we have to account for some amount of padding at the end of each page.
81 */
82static inline unsigned long espfix_base_addr(unsigned int cpu)
83{
84 unsigned long page, slot;
85 unsigned long addr;
86
87 page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random;
88 slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE;
89 addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE);
90 addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16);
91 addr += ESPFIX_BASE_ADDR;
92 return addr;
93}
94
95#define PTE_STRIDE (65536/PAGE_SIZE)
96#define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE)
97#define ESPFIX_PMD_CLONES PTRS_PER_PMD
98#define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES))
99
100#define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX)
101
102static void init_espfix_random(void)
103{
104 unsigned long rand;
105
106 /*
107 * This is run before the entropy pools are initialized,
108 * but this is hopefully better than nothing.
109 */
110 if (!arch_get_random_long(&rand)) {
111 /* The constant is an arbitrary large prime */
112 rdtscll(rand);
113 rand *= 0xc345c6b72fd16123UL;
114 }
115
116 slot_random = rand % ESPFIX_STACKS_PER_PAGE;
117 page_random = (rand / ESPFIX_STACKS_PER_PAGE)
118 & (ESPFIX_PAGE_SPACE - 1);
119}
120
121void __init init_espfix_bsp(void)
122{
123 pgd_t *pgd_p;
124 pteval_t ptemask;
125
126 ptemask = __supported_pte_mask;
127
128 /* Install the espfix pud into the kernel page directory */
129 pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
130 pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
131
132 /* Randomize the locations */
133 init_espfix_random();
134
135 /* The rest is the same as for any other processor */
136 init_espfix_ap();
137}
138
139void init_espfix_ap(void)
140{
141 unsigned int cpu, page;
142 unsigned long addr;
143 pud_t pud, *pud_p;
144 pmd_t pmd, *pmd_p;
145 pte_t pte, *pte_p;
146 int n;
147 void *stack_page;
148 pteval_t ptemask;
149
150 /* We only have to do this once... */
151 if (likely(this_cpu_read(espfix_stack)))
152 return; /* Already initialized */
153
154 cpu = smp_processor_id();
155 addr = espfix_base_addr(cpu);
156 page = cpu/ESPFIX_STACKS_PER_PAGE;
157
158 /* Did another CPU already set this up? */
159 stack_page = ACCESS_ONCE(espfix_pages[page]);
160 if (likely(stack_page))
161 goto done;
162
163 mutex_lock(&espfix_init_mutex);
164
165 /* Did we race on the lock? */
166 stack_page = ACCESS_ONCE(espfix_pages[page]);
167 if (stack_page)
168 goto unlock_done;
169
170 ptemask = __supported_pte_mask;
171
172 pud_p = &espfix_pud_page[pud_index(addr)];
173 pud = *pud_p;
174 if (!pud_present(pud)) {
175 pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);
176 pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask));
177 paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);
178 for (n = 0; n < ESPFIX_PUD_CLONES; n++)
179 set_pud(&pud_p[n], pud);
180 }
181
182 pmd_p = pmd_offset(&pud, addr);
183 pmd = *pmd_p;
184 if (!pmd_present(pmd)) {
185 pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);
186 pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask));
187 paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT);
188 for (n = 0; n < ESPFIX_PMD_CLONES; n++)
189 set_pmd(&pmd_p[n], pmd);
190 }
191
192 pte_p = pte_offset_kernel(&pmd, addr);
193 stack_page = (void *)__get_free_page(GFP_KERNEL);
194 pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask));
195 paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);
196 for (n = 0; n < ESPFIX_PTE_CLONES; n++)
197 set_pte(&pte_p[n*PTE_STRIDE], pte);
198
199 /* Job is done for this CPU and any CPU which shares this page */
200 ACCESS_ONCE(espfix_pages[page]) = stack_page;
201
202unlock_done:
203 mutex_unlock(&espfix_init_mutex);
204done:
205 this_cpu_write(espfix_stack, addr);
206 this_cpu_write(espfix_waddr, (unsigned long)stack_page
207 + (addr & ~PAGE_MASK));
208}
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index af1d14a9ebda..ebc987398923 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -229,17 +229,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
229 } 229 }
230 } 230 }
231 231
232 /*
233 * On x86-64 we do not support 16-bit segments due to
234 * IRET leaking the high bits of the kernel stack address.
235 */
236#ifdef CONFIG_X86_64
237 if (!ldt_info.seg_32bit) {
238 error = -EINVAL;
239 goto out_unlock;
240 }
241#endif
242
243 fill_ldt(&ldt, &ldt_info); 232 fill_ldt(&ldt, &ldt_info);
244 if (oldmode) 233 if (oldmode)
245 ldt.avl = 0; 234 ldt.avl = 0;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 34826934d4a7..61a5350850fb 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -244,6 +244,13 @@ static void notrace start_secondary(void *unused)
244 check_tsc_sync_target(); 244 check_tsc_sync_target();
245 245
246 /* 246 /*
247 * Enable the espfix hack for this CPU
248 */
249#ifdef CONFIG_X86_64
250 init_espfix_ap();
251#endif
252
253 /*
247 * We need to hold vector_lock so there the set of online cpus 254 * We need to hold vector_lock so there the set of online cpus
248 * does not change while we are assigning vectors to cpus. Holding 255 * does not change while we are assigning vectors to cpus. Holding
249 * this lock ensures we don't half assign or remove an irq from a cpu. 256 * this lock ensures we don't half assign or remove an irq from a cpu.
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 20621d753d5f..167ffcac16ed 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -30,12 +30,14 @@ struct pg_state {
30 unsigned long start_address; 30 unsigned long start_address;
31 unsigned long current_address; 31 unsigned long current_address;
32 const struct addr_marker *marker; 32 const struct addr_marker *marker;
33 unsigned long lines;
33 bool to_dmesg; 34 bool to_dmesg;
34}; 35};
35 36
36struct addr_marker { 37struct addr_marker {
37 unsigned long start_address; 38 unsigned long start_address;
38 const char *name; 39 const char *name;
40 unsigned long max_lines;
39}; 41};
40 42
41/* indices for address_markers; keep sync'd w/ address_markers below */ 43/* indices for address_markers; keep sync'd w/ address_markers below */
@@ -46,6 +48,7 @@ enum address_markers_idx {
46 LOW_KERNEL_NR, 48 LOW_KERNEL_NR,
47 VMALLOC_START_NR, 49 VMALLOC_START_NR,
48 VMEMMAP_START_NR, 50 VMEMMAP_START_NR,
51 ESPFIX_START_NR,
49 HIGH_KERNEL_NR, 52 HIGH_KERNEL_NR,
50 MODULES_VADDR_NR, 53 MODULES_VADDR_NR,
51 MODULES_END_NR, 54 MODULES_END_NR,
@@ -68,6 +71,7 @@ static struct addr_marker address_markers[] = {
68 { PAGE_OFFSET, "Low Kernel Mapping" }, 71 { PAGE_OFFSET, "Low Kernel Mapping" },
69 { VMALLOC_START, "vmalloc() Area" }, 72 { VMALLOC_START, "vmalloc() Area" },
70 { VMEMMAP_START, "Vmemmap" }, 73 { VMEMMAP_START, "Vmemmap" },
74 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 },
71 { __START_KERNEL_map, "High Kernel Mapping" }, 75 { __START_KERNEL_map, "High Kernel Mapping" },
72 { MODULES_VADDR, "Modules" }, 76 { MODULES_VADDR, "Modules" },
73 { MODULES_END, "End Modules" }, 77 { MODULES_END, "End Modules" },
@@ -182,7 +186,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
182 pgprot_t new_prot, int level) 186 pgprot_t new_prot, int level)
183{ 187{
184 pgprotval_t prot, cur; 188 pgprotval_t prot, cur;
185 static const char units[] = "KMGTPE"; 189 static const char units[] = "BKMGTPE";
186 190
187 /* 191 /*
188 * If we have a "break" in the series, we need to flush the state that 192 * If we have a "break" in the series, we need to flush the state that
@@ -197,6 +201,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
197 st->current_prot = new_prot; 201 st->current_prot = new_prot;
198 st->level = level; 202 st->level = level;
199 st->marker = address_markers; 203 st->marker = address_markers;
204 st->lines = 0;
200 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 205 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
201 st->marker->name); 206 st->marker->name);
202 } else if (prot != cur || level != st->level || 207 } else if (prot != cur || level != st->level ||
@@ -208,17 +213,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
208 /* 213 /*
209 * Now print the actual finished series 214 * Now print the actual finished series
210 */ 215 */
211 pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", 216 if (!st->marker->max_lines ||
212 width, st->start_address, 217 st->lines < st->marker->max_lines) {
213 width, st->current_address); 218 pt_dump_seq_printf(m, st->to_dmesg,
214 219 "0x%0*lx-0x%0*lx ",
215 delta = (st->current_address - st->start_address) >> 10; 220 width, st->start_address,
216 while (!(delta & 1023) && unit[1]) { 221 width, st->current_address);
217 delta >>= 10; 222
218 unit++; 223 delta = st->current_address - st->start_address;
224 while (!(delta & 1023) && unit[1]) {
225 delta >>= 10;
226 unit++;
227 }
228 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ",
229 delta, *unit);
230 printk_prot(m, st->current_prot, st->level,
231 st->to_dmesg);
219 } 232 }
220 pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); 233 st->lines++;
221 printk_prot(m, st->current_prot, st->level, st->to_dmesg);
222 234
223 /* 235 /*
224 * We print markers for special areas of address space, 236 * We print markers for special areas of address space,
@@ -226,7 +238,17 @@ static void note_page(struct seq_file *m, struct pg_state *st,
226 * This helps in the interpretation. 238 * This helps in the interpretation.
227 */ 239 */
228 if (st->current_address >= st->marker[1].start_address) { 240 if (st->current_address >= st->marker[1].start_address) {
241 if (st->marker->max_lines &&
242 st->lines > st->marker->max_lines) {
243 unsigned long nskip =
244 st->lines - st->marker->max_lines;
245 pt_dump_seq_printf(m, st->to_dmesg,
246 "... %lu entr%s skipped ... \n",
247 nskip,
248 nskip == 1 ? "y" : "ies");
249 }
229 st->marker++; 250 st->marker++;
251 st->lines = 0;
230 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", 252 pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
231 st->marker->name); 253 st->marker->name);
232 } 254 }
diff --git a/init/main.c b/init/main.c
index 9c7fd4c9249f..70fc00e7db06 100644
--- a/init/main.c
+++ b/init/main.c
@@ -617,6 +617,10 @@ asmlinkage void __init start_kernel(void)
617 if (efi_enabled(EFI_RUNTIME_SERVICES)) 617 if (efi_enabled(EFI_RUNTIME_SERVICES))
618 efi_enter_virtual_mode(); 618 efi_enter_virtual_mode();
619#endif 619#endif
620#ifdef CONFIG_X86_64
621 /* Should be run before the first non-init thread is created */
622 init_espfix_bsp();
623#endif
620 thread_info_cache_init(); 624 thread_info_cache_init();
621 cred_init(); 625 cred_init();
622 fork_init(totalram_pages); 626 fork_init(totalram_pages);