diff options
-rw-r--r-- | Documentation/x86/x86_64/mm.txt | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64_types.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/setup.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 73 | ||||
-rw-r--r-- | arch/x86/kernel/espfix_64.c | 208 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/dump_pagetables.c | 44 | ||||
-rw-r--r-- | init/main.c | 4 |
10 files changed, 329 insertions, 26 deletions
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index c584a51add15..afe68ddbe6a4 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt | |||
@@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space | |||
12 | ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole | 12 | ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole |
13 | ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) | 13 | ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) |
14 | ... unused hole ... | 14 | ... unused hole ... |
15 | ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks | ||
16 | ... unused hole ... | ||
15 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 | 17 | ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 |
16 | ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space | 18 | ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space |
17 | ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls | 19 | ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index c883bf726398..7166e25ecb57 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -61,6 +61,8 @@ typedef struct { pteval_t pte; } pte_t; | |||
61 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) | 61 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) |
62 | #define MODULES_END _AC(0xffffffffff000000, UL) | 62 | #define MODULES_END _AC(0xffffffffff000000, UL) |
63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
64 | #define ESPFIX_PGD_ENTRY _AC(-2, UL) | ||
65 | #define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT) | ||
64 | 66 | ||
65 | #define EARLY_DYNAMIC_PAGE_TABLES 64 | 67 | #define EARLY_DYNAMIC_PAGE_TABLES 64 |
66 | 68 | ||
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 9264f04a4c55..9e3be3329a7e 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -57,6 +57,9 @@ extern void x86_ce4100_early_setup(void); | |||
57 | static inline void x86_ce4100_early_setup(void) { } | 57 | static inline void x86_ce4100_early_setup(void) { } |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | extern void init_espfix_bsp(void); | ||
61 | extern void init_espfix_ap(void); | ||
62 | |||
60 | #ifndef _SETUP | 63 | #ifndef _SETUP |
61 | 64 | ||
62 | /* | 65 | /* |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f4d96000d33a..1cc3789d99d9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -29,6 +29,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
29 | obj-y += syscall_$(BITS).o vsyscall_gtod.o | 29 | obj-y += syscall_$(BITS).o vsyscall_gtod.o |
30 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
32 | obj-$(CONFIG_X86_64) += espfix_64.o | ||
32 | obj-$(CONFIG_SYSFS) += ksysfs.o | 33 | obj-$(CONFIG_SYSFS) += ksysfs.o |
33 | obj-y += bootflag.o e820.o | 34 | obj-y += bootflag.o e820.o |
34 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 35 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1e96c3628bf2..bffaa986cafc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/asm.h> | 58 | #include <asm/asm.h> |
59 | #include <asm/context_tracking.h> | 59 | #include <asm/context_tracking.h> |
60 | #include <asm/smap.h> | 60 | #include <asm/smap.h> |
61 | #include <asm/pgtable_types.h> | ||
61 | #include <linux/err.h> | 62 | #include <linux/err.h> |
62 | 63 | ||
63 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 64 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
@@ -1040,8 +1041,16 @@ restore_args: | |||
1040 | RESTORE_ARGS 1,8,1 | 1041 | RESTORE_ARGS 1,8,1 |
1041 | 1042 | ||
1042 | irq_return: | 1043 | irq_return: |
1044 | /* | ||
1045 | * Are we returning to a stack segment from the LDT? Note: in | ||
1046 | * 64-bit mode SS:RSP on the exception stack is always valid. | ||
1047 | */ | ||
1048 | testb $4,(SS-RIP)(%rsp) | ||
1049 | jnz irq_return_ldt | ||
1050 | |||
1051 | irq_return_iret: | ||
1043 | INTERRUPT_RETURN | 1052 | INTERRUPT_RETURN |
1044 | _ASM_EXTABLE(irq_return, bad_iret) | 1053 | _ASM_EXTABLE(irq_return_iret, bad_iret) |
1045 | 1054 | ||
1046 | #ifdef CONFIG_PARAVIRT | 1055 | #ifdef CONFIG_PARAVIRT |
1047 | ENTRY(native_iret) | 1056 | ENTRY(native_iret) |
@@ -1049,6 +1058,30 @@ ENTRY(native_iret) | |||
1049 | _ASM_EXTABLE(native_iret, bad_iret) | 1058 | _ASM_EXTABLE(native_iret, bad_iret) |
1050 | #endif | 1059 | #endif |
1051 | 1060 | ||
1061 | irq_return_ldt: | ||
1062 | pushq_cfi %rax | ||
1063 | pushq_cfi %rdi | ||
1064 | SWAPGS | ||
1065 | movq PER_CPU_VAR(espfix_waddr),%rdi | ||
1066 | movq %rax,(0*8)(%rdi) /* RAX */ | ||
1067 | movq (2*8)(%rsp),%rax /* RIP */ | ||
1068 | movq %rax,(1*8)(%rdi) | ||
1069 | movq (3*8)(%rsp),%rax /* CS */ | ||
1070 | movq %rax,(2*8)(%rdi) | ||
1071 | movq (4*8)(%rsp),%rax /* RFLAGS */ | ||
1072 | movq %rax,(3*8)(%rdi) | ||
1073 | movq (6*8)(%rsp),%rax /* SS */ | ||
1074 | movq %rax,(5*8)(%rdi) | ||
1075 | movq (5*8)(%rsp),%rax /* RSP */ | ||
1076 | movq %rax,(4*8)(%rdi) | ||
1077 | andl $0xffff0000,%eax | ||
1078 | popq_cfi %rdi | ||
1079 | orq PER_CPU_VAR(espfix_stack),%rax | ||
1080 | SWAPGS | ||
1081 | movq %rax,%rsp | ||
1082 | popq_cfi %rax | ||
1083 | jmp irq_return_iret | ||
1084 | |||
1052 | .section .fixup,"ax" | 1085 | .section .fixup,"ax" |
1053 | bad_iret: | 1086 | bad_iret: |
1054 | /* | 1087 | /* |
@@ -1110,9 +1143,41 @@ ENTRY(retint_kernel) | |||
1110 | call preempt_schedule_irq | 1143 | call preempt_schedule_irq |
1111 | jmp exit_intr | 1144 | jmp exit_intr |
1112 | #endif | 1145 | #endif |
1113 | |||
1114 | CFI_ENDPROC | 1146 | CFI_ENDPROC |
1115 | END(common_interrupt) | 1147 | END(common_interrupt) |
1148 | |||
1149 | /* | ||
1150 | * If IRET takes a fault on the espfix stack, then we | ||
1151 | * end up promoting it to a doublefault. In that case, | ||
1152 | * modify the stack to make it look like we just entered | ||
1153 | * the #GP handler from user space, similar to bad_iret. | ||
1154 | */ | ||
1155 | ALIGN | ||
1156 | __do_double_fault: | ||
1157 | XCPT_FRAME 1 RDI+8 | ||
1158 | movq RSP(%rdi),%rax /* Trap on the espfix stack? */ | ||
1159 | sarq $PGDIR_SHIFT,%rax | ||
1160 | cmpl $ESPFIX_PGD_ENTRY,%eax | ||
1161 | jne do_double_fault /* No, just deliver the fault */ | ||
1162 | cmpl $__KERNEL_CS,CS(%rdi) | ||
1163 | jne do_double_fault | ||
1164 | movq RIP(%rdi),%rax | ||
1165 | cmpq $irq_return_iret,%rax | ||
1166 | #ifdef CONFIG_PARAVIRT | ||
1167 | je 1f | ||
1168 | cmpq $native_iret,%rax | ||
1169 | #endif | ||
1170 | jne do_double_fault /* This shouldn't happen... */ | ||
1171 | 1: | ||
1172 | movq PER_CPU_VAR(kernel_stack),%rax | ||
1173 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ | ||
1174 | movq %rax,RSP(%rdi) | ||
1175 | movq $0,(%rax) /* Missing (lost) #GP error code */ | ||
1176 | movq $general_protection,RIP(%rdi) | ||
1177 | retq | ||
1178 | CFI_ENDPROC | ||
1179 | END(__do_double_fault) | ||
1180 | |||
1116 | /* | 1181 | /* |
1117 | * End of kprobes section | 1182 | * End of kprobes section |
1118 | */ | 1183 | */ |
@@ -1314,7 +1379,7 @@ zeroentry overflow do_overflow | |||
1314 | zeroentry bounds do_bounds | 1379 | zeroentry bounds do_bounds |
1315 | zeroentry invalid_op do_invalid_op | 1380 | zeroentry invalid_op do_invalid_op |
1316 | zeroentry device_not_available do_device_not_available | 1381 | zeroentry device_not_available do_device_not_available |
1317 | paranoiderrorentry double_fault do_double_fault | 1382 | paranoiderrorentry double_fault __do_double_fault |
1318 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun | 1383 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1319 | errorentry invalid_TSS do_invalid_TSS | 1384 | errorentry invalid_TSS do_invalid_TSS |
1320 | errorentry segment_not_present do_segment_not_present | 1385 | errorentry segment_not_present do_segment_not_present |
@@ -1601,7 +1666,7 @@ error_sti: | |||
1601 | */ | 1666 | */ |
1602 | error_kernelspace: | 1667 | error_kernelspace: |
1603 | incl %ebx | 1668 | incl %ebx |
1604 | leaq irq_return(%rip),%rcx | 1669 | leaq irq_return_iret(%rip),%rcx |
1605 | cmpq %rcx,RIP+8(%rsp) | 1670 | cmpq %rcx,RIP+8(%rsp) |
1606 | je error_swapgs | 1671 | je error_swapgs |
1607 | movl %ecx,%eax /* zero extend */ | 1672 | movl %ecx,%eax /* zero extend */ |
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c new file mode 100644 index 000000000000..8a64da36310f --- /dev/null +++ b/arch/x86/kernel/espfix_64.c | |||
@@ -0,0 +1,208 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright 2014 Intel Corporation; author: H. Peter Anvin | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * ----------------------------------------------------------------------- */ | ||
15 | |||
16 | /* | ||
17 | * The IRET instruction, when returning to a 16-bit segment, only | ||
18 | * restores the bottom 16 bits of the user space stack pointer. This | ||
19 | * causes some 16-bit software to break, but it also leaks kernel state | ||
20 | * to user space. | ||
21 | * | ||
22 | * This works around this by creating percpu "ministacks", each of which | ||
23 | * is mapped 2^16 times 64K apart. When we detect that the return SS is | ||
24 | * on the LDT, we copy the IRET frame to the ministack and use the | ||
25 | * relevant alias to return to userspace. The ministacks are mapped | ||
26 | * readonly, so if the IRET fault we promote #GP to #DF which is an IST | ||
27 | * vector and thus has its own stack; we then do the fixup in the #DF | ||
28 | * handler. | ||
29 | * | ||
30 | * This file sets up the ministacks and the related page tables. The | ||
31 | * actual ministack invocation is in entry_64.S. | ||
32 | */ | ||
33 | |||
34 | #include <linux/init.h> | ||
35 | #include <linux/init_task.h> | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/percpu.h> | ||
38 | #include <linux/gfp.h> | ||
39 | #include <linux/random.h> | ||
40 | #include <asm/pgtable.h> | ||
41 | #include <asm/pgalloc.h> | ||
42 | #include <asm/setup.h> | ||
43 | |||
44 | /* | ||
45 | * Note: we only need 6*8 = 48 bytes for the espfix stack, but round | ||
46 | * it up to a cache line to avoid unnecessary sharing. | ||
47 | */ | ||
48 | #define ESPFIX_STACK_SIZE (8*8UL) | ||
49 | #define ESPFIX_STACKS_PER_PAGE (PAGE_SIZE/ESPFIX_STACK_SIZE) | ||
50 | |||
51 | /* There is address space for how many espfix pages? */ | ||
52 | #define ESPFIX_PAGE_SPACE (1UL << (PGDIR_SHIFT-PAGE_SHIFT-16)) | ||
53 | |||
54 | #define ESPFIX_MAX_CPUS (ESPFIX_STACKS_PER_PAGE * ESPFIX_PAGE_SPACE) | ||
55 | #if CONFIG_NR_CPUS > ESPFIX_MAX_CPUS | ||
56 | # error "Need more than one PGD for the ESPFIX hack" | ||
57 | #endif | ||
58 | |||
59 | #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) | ||
60 | |||
61 | /* This contains the *bottom* address of the espfix stack */ | ||
62 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); | ||
63 | DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); | ||
64 | |||
65 | /* Initialization mutex - should this be a spinlock? */ | ||
66 | static DEFINE_MUTEX(espfix_init_mutex); | ||
67 | |||
68 | /* Page allocation bitmap - each page serves ESPFIX_STACKS_PER_PAGE CPUs */ | ||
69 | #define ESPFIX_MAX_PAGES DIV_ROUND_UP(CONFIG_NR_CPUS, ESPFIX_STACKS_PER_PAGE) | ||
70 | static void *espfix_pages[ESPFIX_MAX_PAGES]; | ||
71 | |||
72 | static __page_aligned_bss pud_t espfix_pud_page[PTRS_PER_PUD] | ||
73 | __aligned(PAGE_SIZE); | ||
74 | |||
75 | static unsigned int page_random, slot_random; | ||
76 | |||
77 | /* | ||
78 | * This returns the bottom address of the espfix stack for a specific CPU. | ||
79 | * The math allows for a non-power-of-two ESPFIX_STACK_SIZE, in which case | ||
80 | * we have to account for some amount of padding at the end of each page. | ||
81 | */ | ||
82 | static inline unsigned long espfix_base_addr(unsigned int cpu) | ||
83 | { | ||
84 | unsigned long page, slot; | ||
85 | unsigned long addr; | ||
86 | |||
87 | page = (cpu / ESPFIX_STACKS_PER_PAGE) ^ page_random; | ||
88 | slot = (cpu + slot_random) % ESPFIX_STACKS_PER_PAGE; | ||
89 | addr = (page << PAGE_SHIFT) + (slot * ESPFIX_STACK_SIZE); | ||
90 | addr = (addr & 0xffffUL) | ((addr & ~0xffffUL) << 16); | ||
91 | addr += ESPFIX_BASE_ADDR; | ||
92 | return addr; | ||
93 | } | ||
94 | |||
95 | #define PTE_STRIDE (65536/PAGE_SIZE) | ||
96 | #define ESPFIX_PTE_CLONES (PTRS_PER_PTE/PTE_STRIDE) | ||
97 | #define ESPFIX_PMD_CLONES PTRS_PER_PMD | ||
98 | #define ESPFIX_PUD_CLONES (65536/(ESPFIX_PTE_CLONES*ESPFIX_PMD_CLONES)) | ||
99 | |||
100 | #define PGTABLE_PROT ((_KERNPG_TABLE & ~_PAGE_RW) | _PAGE_NX) | ||
101 | |||
102 | static void init_espfix_random(void) | ||
103 | { | ||
104 | unsigned long rand; | ||
105 | |||
106 | /* | ||
107 | * This is run before the entropy pools are initialized, | ||
108 | * but this is hopefully better than nothing. | ||
109 | */ | ||
110 | if (!arch_get_random_long(&rand)) { | ||
111 | /* The constant is an arbitrary large prime */ | ||
112 | rdtscll(rand); | ||
113 | rand *= 0xc345c6b72fd16123UL; | ||
114 | } | ||
115 | |||
116 | slot_random = rand % ESPFIX_STACKS_PER_PAGE; | ||
117 | page_random = (rand / ESPFIX_STACKS_PER_PAGE) | ||
118 | & (ESPFIX_PAGE_SPACE - 1); | ||
119 | } | ||
120 | |||
121 | void __init init_espfix_bsp(void) | ||
122 | { | ||
123 | pgd_t *pgd_p; | ||
124 | pteval_t ptemask; | ||
125 | |||
126 | ptemask = __supported_pte_mask; | ||
127 | |||
128 | /* Install the espfix pud into the kernel page directory */ | ||
129 | pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)]; | ||
130 | pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page); | ||
131 | |||
132 | /* Randomize the locations */ | ||
133 | init_espfix_random(); | ||
134 | |||
135 | /* The rest is the same as for any other processor */ | ||
136 | init_espfix_ap(); | ||
137 | } | ||
138 | |||
139 | void init_espfix_ap(void) | ||
140 | { | ||
141 | unsigned int cpu, page; | ||
142 | unsigned long addr; | ||
143 | pud_t pud, *pud_p; | ||
144 | pmd_t pmd, *pmd_p; | ||
145 | pte_t pte, *pte_p; | ||
146 | int n; | ||
147 | void *stack_page; | ||
148 | pteval_t ptemask; | ||
149 | |||
150 | /* We only have to do this once... */ | ||
151 | if (likely(this_cpu_read(espfix_stack))) | ||
152 | return; /* Already initialized */ | ||
153 | |||
154 | cpu = smp_processor_id(); | ||
155 | addr = espfix_base_addr(cpu); | ||
156 | page = cpu/ESPFIX_STACKS_PER_PAGE; | ||
157 | |||
158 | /* Did another CPU already set this up? */ | ||
159 | stack_page = ACCESS_ONCE(espfix_pages[page]); | ||
160 | if (likely(stack_page)) | ||
161 | goto done; | ||
162 | |||
163 | mutex_lock(&espfix_init_mutex); | ||
164 | |||
165 | /* Did we race on the lock? */ | ||
166 | stack_page = ACCESS_ONCE(espfix_pages[page]); | ||
167 | if (stack_page) | ||
168 | goto unlock_done; | ||
169 | |||
170 | ptemask = __supported_pte_mask; | ||
171 | |||
172 | pud_p = &espfix_pud_page[pud_index(addr)]; | ||
173 | pud = *pud_p; | ||
174 | if (!pud_present(pud)) { | ||
175 | pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); | ||
176 | pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); | ||
177 | paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); | ||
178 | for (n = 0; n < ESPFIX_PUD_CLONES; n++) | ||
179 | set_pud(&pud_p[n], pud); | ||
180 | } | ||
181 | |||
182 | pmd_p = pmd_offset(&pud, addr); | ||
183 | pmd = *pmd_p; | ||
184 | if (!pmd_present(pmd)) { | ||
185 | pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); | ||
186 | pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); | ||
187 | paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); | ||
188 | for (n = 0; n < ESPFIX_PMD_CLONES; n++) | ||
189 | set_pmd(&pmd_p[n], pmd); | ||
190 | } | ||
191 | |||
192 | pte_p = pte_offset_kernel(&pmd, addr); | ||
193 | stack_page = (void *)__get_free_page(GFP_KERNEL); | ||
194 | pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); | ||
195 | paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT); | ||
196 | for (n = 0; n < ESPFIX_PTE_CLONES; n++) | ||
197 | set_pte(&pte_p[n*PTE_STRIDE], pte); | ||
198 | |||
199 | /* Job is done for this CPU and any CPU which shares this page */ | ||
200 | ACCESS_ONCE(espfix_pages[page]) = stack_page; | ||
201 | |||
202 | unlock_done: | ||
203 | mutex_unlock(&espfix_init_mutex); | ||
204 | done: | ||
205 | this_cpu_write(espfix_stack, addr); | ||
206 | this_cpu_write(espfix_waddr, (unsigned long)stack_page | ||
207 | + (addr & ~PAGE_MASK)); | ||
208 | } | ||
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index af1d14a9ebda..ebc987398923 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -229,17 +229,6 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) | |||
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * On x86-64 we do not support 16-bit segments due to | ||
234 | * IRET leaking the high bits of the kernel stack address. | ||
235 | */ | ||
236 | #ifdef CONFIG_X86_64 | ||
237 | if (!ldt_info.seg_32bit) { | ||
238 | error = -EINVAL; | ||
239 | goto out_unlock; | ||
240 | } | ||
241 | #endif | ||
242 | |||
243 | fill_ldt(&ldt, &ldt_info); | 232 | fill_ldt(&ldt, &ldt_info); |
244 | if (oldmode) | 233 | if (oldmode) |
245 | ldt.avl = 0; | 234 | ldt.avl = 0; |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 34826934d4a7..61a5350850fb 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -244,6 +244,13 @@ static void notrace start_secondary(void *unused) | |||
244 | check_tsc_sync_target(); | 244 | check_tsc_sync_target(); |
245 | 245 | ||
246 | /* | 246 | /* |
247 | * Enable the espfix hack for this CPU | ||
248 | */ | ||
249 | #ifdef CONFIG_X86_64 | ||
250 | init_espfix_ap(); | ||
251 | #endif | ||
252 | |||
253 | /* | ||
247 | * We need to hold vector_lock so there the set of online cpus | 254 | * We need to hold vector_lock so there the set of online cpus |
248 | * does not change while we are assigning vectors to cpus. Holding | 255 | * does not change while we are assigning vectors to cpus. Holding |
249 | * this lock ensures we don't half assign or remove an irq from a cpu. | 256 | * this lock ensures we don't half assign or remove an irq from a cpu. |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 20621d753d5f..167ffcac16ed 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -30,12 +30,14 @@ struct pg_state { | |||
30 | unsigned long start_address; | 30 | unsigned long start_address; |
31 | unsigned long current_address; | 31 | unsigned long current_address; |
32 | const struct addr_marker *marker; | 32 | const struct addr_marker *marker; |
33 | unsigned long lines; | ||
33 | bool to_dmesg; | 34 | bool to_dmesg; |
34 | }; | 35 | }; |
35 | 36 | ||
36 | struct addr_marker { | 37 | struct addr_marker { |
37 | unsigned long start_address; | 38 | unsigned long start_address; |
38 | const char *name; | 39 | const char *name; |
40 | unsigned long max_lines; | ||
39 | }; | 41 | }; |
40 | 42 | ||
41 | /* indices for address_markers; keep sync'd w/ address_markers below */ | 43 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
@@ -46,6 +48,7 @@ enum address_markers_idx { | |||
46 | LOW_KERNEL_NR, | 48 | LOW_KERNEL_NR, |
47 | VMALLOC_START_NR, | 49 | VMALLOC_START_NR, |
48 | VMEMMAP_START_NR, | 50 | VMEMMAP_START_NR, |
51 | ESPFIX_START_NR, | ||
49 | HIGH_KERNEL_NR, | 52 | HIGH_KERNEL_NR, |
50 | MODULES_VADDR_NR, | 53 | MODULES_VADDR_NR, |
51 | MODULES_END_NR, | 54 | MODULES_END_NR, |
@@ -68,6 +71,7 @@ static struct addr_marker address_markers[] = { | |||
68 | { PAGE_OFFSET, "Low Kernel Mapping" }, | 71 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
69 | { VMALLOC_START, "vmalloc() Area" }, | 72 | { VMALLOC_START, "vmalloc() Area" }, |
70 | { VMEMMAP_START, "Vmemmap" }, | 73 | { VMEMMAP_START, "Vmemmap" }, |
74 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, | ||
71 | { __START_KERNEL_map, "High Kernel Mapping" }, | 75 | { __START_KERNEL_map, "High Kernel Mapping" }, |
72 | { MODULES_VADDR, "Modules" }, | 76 | { MODULES_VADDR, "Modules" }, |
73 | { MODULES_END, "End Modules" }, | 77 | { MODULES_END, "End Modules" }, |
@@ -182,7 +186,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
182 | pgprot_t new_prot, int level) | 186 | pgprot_t new_prot, int level) |
183 | { | 187 | { |
184 | pgprotval_t prot, cur; | 188 | pgprotval_t prot, cur; |
185 | static const char units[] = "KMGTPE"; | 189 | static const char units[] = "BKMGTPE"; |
186 | 190 | ||
187 | /* | 191 | /* |
188 | * If we have a "break" in the series, we need to flush the state that | 192 | * If we have a "break" in the series, we need to flush the state that |
@@ -197,6 +201,7 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
197 | st->current_prot = new_prot; | 201 | st->current_prot = new_prot; |
198 | st->level = level; | 202 | st->level = level; |
199 | st->marker = address_markers; | 203 | st->marker = address_markers; |
204 | st->lines = 0; | ||
200 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", | 205 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
201 | st->marker->name); | 206 | st->marker->name); |
202 | } else if (prot != cur || level != st->level || | 207 | } else if (prot != cur || level != st->level || |
@@ -208,17 +213,24 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
208 | /* | 213 | /* |
209 | * Now print the actual finished series | 214 | * Now print the actual finished series |
210 | */ | 215 | */ |
211 | pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", | 216 | if (!st->marker->max_lines || |
212 | width, st->start_address, | 217 | st->lines < st->marker->max_lines) { |
213 | width, st->current_address); | 218 | pt_dump_seq_printf(m, st->to_dmesg, |
214 | 219 | "0x%0*lx-0x%0*lx ", | |
215 | delta = (st->current_address - st->start_address) >> 10; | 220 | width, st->start_address, |
216 | while (!(delta & 1023) && unit[1]) { | 221 | width, st->current_address); |
217 | delta >>= 10; | 222 | |
218 | unit++; | 223 | delta = st->current_address - st->start_address; |
224 | while (!(delta & 1023) && unit[1]) { | ||
225 | delta >>= 10; | ||
226 | unit++; | ||
227 | } | ||
228 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | ||
229 | delta, *unit); | ||
230 | printk_prot(m, st->current_prot, st->level, | ||
231 | st->to_dmesg); | ||
219 | } | 232 | } |
220 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); | 233 | st->lines++; |
221 | printk_prot(m, st->current_prot, st->level, st->to_dmesg); | ||
222 | 234 | ||
223 | /* | 235 | /* |
224 | * We print markers for special areas of address space, | 236 | * We print markers for special areas of address space, |
@@ -226,7 +238,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
226 | * This helps in the interpretation. | 238 | * This helps in the interpretation. |
227 | */ | 239 | */ |
228 | if (st->current_address >= st->marker[1].start_address) { | 240 | if (st->current_address >= st->marker[1].start_address) { |
241 | if (st->marker->max_lines && | ||
242 | st->lines > st->marker->max_lines) { | ||
243 | unsigned long nskip = | ||
244 | st->lines - st->marker->max_lines; | ||
245 | pt_dump_seq_printf(m, st->to_dmesg, | ||
246 | "... %lu entr%s skipped ... \n", | ||
247 | nskip, | ||
248 | nskip == 1 ? "y" : "ies"); | ||
249 | } | ||
229 | st->marker++; | 250 | st->marker++; |
251 | st->lines = 0; | ||
230 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", | 252 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
231 | st->marker->name); | 253 | st->marker->name); |
232 | } | 254 | } |
diff --git a/init/main.c b/init/main.c index 9c7fd4c9249f..70fc00e7db06 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -617,6 +617,10 @@ asmlinkage void __init start_kernel(void) | |||
617 | if (efi_enabled(EFI_RUNTIME_SERVICES)) | 617 | if (efi_enabled(EFI_RUNTIME_SERVICES)) |
618 | efi_enter_virtual_mode(); | 618 | efi_enter_virtual_mode(); |
619 | #endif | 619 | #endif |
620 | #ifdef CONFIG_X86_64 | ||
621 | /* Should be run before the first non-init thread is created */ | ||
622 | init_espfix_bsp(); | ||
623 | #endif | ||
620 | thread_info_cache_init(); | 624 | thread_info_cache_init(); |
621 | cred_init(); | 625 | cred_init(); |
622 | fork_init(totalram_pages); | 626 | fork_init(totalram_pages); |