diff options
Diffstat (limited to 'arch/x86')
29 files changed, 245 insertions, 124 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 704f58aa79cd..45abc363dd3e 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -76,8 +76,10 @@ suffix-$(CONFIG_KERNEL_XZ) := xz | |||
76 | suffix-$(CONFIG_KERNEL_LZO) := lzo | 76 | suffix-$(CONFIG_KERNEL_LZO) := lzo |
77 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 | 77 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 |
78 | 78 | ||
79 | RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ | ||
80 | perl $(srctree)/arch/x86/tools/calc_run_size.pl) | ||
79 | quiet_cmd_mkpiggy = MKPIGGY $@ | 81 | quiet_cmd_mkpiggy = MKPIGGY $@ |
80 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) | 82 | cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) |
81 | 83 | ||
82 | targets += piggy.S | 84 | targets += piggy.S |
83 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE | 85 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index cbed1407a5cd..1d7fbbcc196d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -207,7 +207,8 @@ relocated: | |||
207 | * Do the decompression, and jump to the new kernel.. | 207 | * Do the decompression, and jump to the new kernel.. |
208 | */ | 208 | */ |
209 | /* push arguments for decompress_kernel: */ | 209 | /* push arguments for decompress_kernel: */ |
210 | pushl $z_output_len /* decompressed length */ | 210 | pushl $z_run_size /* size of kernel with .bss and .brk */ |
211 | pushl $z_output_len /* decompressed length, end of relocs */ | ||
211 | leal z_extract_offset_negative(%ebx), %ebp | 212 | leal z_extract_offset_negative(%ebx), %ebp |
212 | pushl %ebp /* output address */ | 213 | pushl %ebp /* output address */ |
213 | pushl $z_input_len /* input_len */ | 214 | pushl $z_input_len /* input_len */ |
@@ -217,7 +218,7 @@ relocated: | |||
217 | pushl %eax /* heap area */ | 218 | pushl %eax /* heap area */ |
218 | pushl %esi /* real mode pointer */ | 219 | pushl %esi /* real mode pointer */ |
219 | call decompress_kernel /* returns kernel location in %eax */ | 220 | call decompress_kernel /* returns kernel location in %eax */ |
220 | addl $24, %esp | 221 | addl $28, %esp |
221 | 222 | ||
222 | /* | 223 | /* |
223 | * Jump to the decompressed kernel. | 224 | * Jump to the decompressed kernel. |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2884e0c3e8a5..6b1766c6c082 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -402,13 +402,16 @@ relocated: | |||
402 | * Do the decompression, and jump to the new kernel.. | 402 | * Do the decompression, and jump to the new kernel.. |
403 | */ | 403 | */ |
404 | pushq %rsi /* Save the real mode argument */ | 404 | pushq %rsi /* Save the real mode argument */ |
405 | movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ | ||
406 | pushq %r9 | ||
405 | movq %rsi, %rdi /* real mode address */ | 407 | movq %rsi, %rdi /* real mode address */ |
406 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ | 408 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ |
407 | leaq input_data(%rip), %rdx /* input_data */ | 409 | leaq input_data(%rip), %rdx /* input_data */ |
408 | movl $z_input_len, %ecx /* input_len */ | 410 | movl $z_input_len, %ecx /* input_len */ |
409 | movq %rbp, %r8 /* output target address */ | 411 | movq %rbp, %r8 /* output target address */ |
410 | movq $z_output_len, %r9 /* decompressed length */ | 412 | movq $z_output_len, %r9 /* decompressed length, end of relocs */ |
411 | call decompress_kernel /* returns kernel location in %rax */ | 413 | call decompress_kernel /* returns kernel location in %rax */ |
414 | popq %r9 | ||
412 | popq %rsi | 415 | popq %rsi |
413 | 416 | ||
414 | /* | 417 | /* |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 57ab74df7eea..30dd59a9f0b4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -358,7 +358,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
358 | unsigned char *input_data, | 358 | unsigned char *input_data, |
359 | unsigned long input_len, | 359 | unsigned long input_len, |
360 | unsigned char *output, | 360 | unsigned char *output, |
361 | unsigned long output_len) | 361 | unsigned long output_len, |
362 | unsigned long run_size) | ||
362 | { | 363 | { |
363 | real_mode = rmode; | 364 | real_mode = rmode; |
364 | 365 | ||
@@ -381,8 +382,14 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
381 | free_mem_ptr = heap; /* Heap */ | 382 | free_mem_ptr = heap; /* Heap */ |
382 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 383 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
383 | 384 | ||
384 | output = choose_kernel_location(input_data, input_len, | 385 | /* |
385 | output, output_len); | 386 | * The memory hole needed for the kernel is the larger of either |
387 | * the entire decompressed kernel plus relocation table, or the | ||
388 | * entire decompressed kernel plus .bss and .brk sections. | ||
389 | */ | ||
390 | output = choose_kernel_location(input_data, input_len, output, | ||
391 | output_len > run_size ? output_len | ||
392 | : run_size); | ||
386 | 393 | ||
387 | /* Validate memory location choices. */ | 394 | /* Validate memory location choices. */ |
388 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | 395 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) |
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index b669ab65bf6c..d8222f213182 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c | |||
@@ -36,11 +36,13 @@ int main(int argc, char *argv[]) | |||
36 | uint32_t olen; | 36 | uint32_t olen; |
37 | long ilen; | 37 | long ilen; |
38 | unsigned long offs; | 38 | unsigned long offs; |
39 | unsigned long run_size; | ||
39 | FILE *f = NULL; | 40 | FILE *f = NULL; |
40 | int retval = 1; | 41 | int retval = 1; |
41 | 42 | ||
42 | if (argc < 2) { | 43 | if (argc < 3) { |
43 | fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); | 44 | fprintf(stderr, "Usage: %s compressed_file run_size\n", |
45 | argv[0]); | ||
44 | goto bail; | 46 | goto bail; |
45 | } | 47 | } |
46 | 48 | ||
@@ -74,6 +76,7 @@ int main(int argc, char *argv[]) | |||
74 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ | 76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ |
75 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ | 77 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ |
76 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ | 78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ |
79 | run_size = atoi(argv[2]); | ||
77 | 80 | ||
78 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); | 81 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); |
79 | printf(".globl z_input_len\n"); | 82 | printf(".globl z_input_len\n"); |
@@ -85,6 +88,8 @@ int main(int argc, char *argv[]) | |||
85 | /* z_extract_offset_negative allows simplification of head_32.S */ | 88 | /* z_extract_offset_negative allows simplification of head_32.S */ |
86 | printf(".globl z_extract_offset_negative\n"); | 89 | printf(".globl z_extract_offset_negative\n"); |
87 | printf("z_extract_offset_negative = -0x%lx\n", offs); | 90 | printf("z_extract_offset_negative = -0x%lx\n", offs); |
91 | printf(".globl z_run_size\n"); | ||
92 | printf("z_run_size = %lu\n", run_size); | ||
88 | 93 | ||
89 | printf(".globl input_data, input_data_end\n"); | 94 | printf(".globl input_data, input_data_end\n"); |
90 | printf("input_data:\n"); | 95 | printf("input_data:\n"); |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index b8237d8a1e0c..0cdbe6e81b45 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -74,6 +74,9 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
74 | #define __raw_readw __readw | 74 | #define __raw_readw __readw |
75 | #define __raw_readl __readl | 75 | #define __raw_readl __readl |
76 | 76 | ||
77 | #define writeb_relaxed(v, a) __writeb(v, a) | ||
78 | #define writew_relaxed(v, a) __writew(v, a) | ||
79 | #define writel_relaxed(v, a) __writel(v, a) | ||
77 | #define __raw_writeb __writeb | 80 | #define __raw_writeb __writeb |
78 | #define __raw_writew __writew | 81 | #define __raw_writew __writew |
79 | #define __raw_writel __writel | 82 | #define __raw_writel __writel |
@@ -86,6 +89,7 @@ build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | |||
86 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | 89 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") |
87 | 90 | ||
88 | #define readq_relaxed(a) readq(a) | 91 | #define readq_relaxed(a) readq(a) |
92 | #define writeq_relaxed(v, a) writeq(v, a) | ||
89 | 93 | ||
90 | #define __raw_readq(a) readq(a) | 94 | #define __raw_readq(a) readq(a) |
91 | #define __raw_writeq(val, addr) writeq(val, addr) | 95 | #define __raw_writeq(val, addr) writeq(val, addr) |
@@ -310,8 +314,8 @@ BUILDIO(b, b, char) | |||
310 | BUILDIO(w, w, short) | 314 | BUILDIO(w, w, short) |
311 | BUILDIO(l, , int) | 315 | BUILDIO(l, , int) |
312 | 316 | ||
313 | extern void *xlate_dev_mem_ptr(unsigned long phys); | 317 | extern void *xlate_dev_mem_ptr(phys_addr_t phys); |
314 | extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); | 318 | extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr); |
315 | 319 | ||
316 | extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, | 320 | extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, |
317 | unsigned long prot_val); | 321 | unsigned long prot_val); |
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #define THREAD_SIZE_ORDER 1 | 20 | #define THREAD_SIZE_ORDER 1 |
21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
22 | 22 | ||
23 | #define STACKFAULT_STACK 0 | ||
24 | #define DOUBLEFAULT_STACK 1 | 23 | #define DOUBLEFAULT_STACK 1 |
25 | #define NMI_STACK 0 | 24 | #define NMI_STACK 0 |
26 | #define DEBUG_STACK 0 | 25 | #define DEBUG_STACK 0 |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -14,12 +14,11 @@ | |||
14 | #define IRQ_STACK_ORDER 2 | 14 | #define IRQ_STACK_ORDER 2 |
15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) | 15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) |
16 | 16 | ||
17 | #define STACKFAULT_STACK 1 | 17 | #define DOUBLEFAULT_STACK 1 |
18 | #define DOUBLEFAULT_STACK 2 | 18 | #define NMI_STACK 2 |
19 | #define NMI_STACK 3 | 19 | #define DEBUG_STACK 3 |
20 | #define DEBUG_STACK 4 | 20 | #define MCE_STACK 4 |
21 | #define MCE_STACK 5 | 21 | #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ |
22 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | ||
23 | 22 | ||
24 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | 23 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) |
25 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | 24 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd27e08e23c..8cd1cc3bc835 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -150,6 +150,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | void cpu_disable_common(void); | 152 | void cpu_disable_common(void); |
153 | void cpu_die_common(unsigned int cpu); | ||
153 | void native_smp_prepare_boot_cpu(void); | 154 | void native_smp_prepare_boot_cpu(void); |
154 | void native_smp_prepare_cpus(unsigned int max_cpus); | 155 | void native_smp_prepare_cpus(unsigned int max_cpus); |
155 | void native_smp_cpus_done(unsigned int max_cpus); | 156 | void native_smp_cpus_done(unsigned int max_cpus); |
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 9295016485c9..a4efe477ceab 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -183,8 +183,20 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
183 | 183 | ||
184 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 184 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
185 | { | 185 | { |
186 | while (arch_spin_is_locked(lock)) | 186 | __ticket_t head = ACCESS_ONCE(lock->tickets.head); |
187 | |||
188 | for (;;) { | ||
189 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
190 | /* | ||
191 | * We need to check "unlocked" in a loop, tmp.head == head | ||
192 | * can be false positive because of overflow. | ||
193 | */ | ||
194 | if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || | ||
195 | tmp.head != head) | ||
196 | break; | ||
197 | |||
187 | cpu_relax(); | 198 | cpu_relax(); |
199 | } | ||
188 | } | 200 | } |
189 | 201 | ||
190 | /* | 202 | /* |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 854053889d4d..547e344a6dc6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -141,7 +141,7 @@ struct thread_info { | |||
141 | /* Only used for 64 bit */ | 141 | /* Only used for 64 bit */ |
142 | #define _TIF_DO_NOTIFY_MASK \ | 142 | #define _TIF_DO_NOTIFY_MASK \ |
143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ | 143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ |
144 | _TIF_USER_RETURN_NOTIFY) | 144 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) |
145 | 145 | ||
146 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
147 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); | |||
39 | 39 | ||
40 | #ifdef CONFIG_TRACING | 40 | #ifdef CONFIG_TRACING |
41 | asmlinkage void trace_page_fault(void); | 41 | asmlinkage void trace_page_fault(void); |
42 | #define trace_stack_segment stack_segment | ||
42 | #define trace_divide_error divide_error | 43 | #define trace_divide_error divide_error |
43 | #define trace_bounds bounds | 44 | #define trace_bounds bounds |
44 | #define trace_invalid_op invalid_op | 45 | #define trace_invalid_op invalid_op |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index f04dbb3069b8..5caed1dd7ccf 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -21,6 +21,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, | 21 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | 22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, |
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, | 23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, |
24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, | ||
24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, | 25 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, |
25 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, | 26 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, |
26 | {} | 27 | {} |
@@ -30,6 +31,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids); | |||
30 | static const struct pci_device_id amd_nb_link_ids[] = { | 31 | static const struct pci_device_id amd_nb_link_ids[] = { |
31 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, | 32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, | 33 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, |
34 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, | ||
33 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, | 35 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, |
34 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, | 36 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, |
35 | {} | 37 | {} |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b4f78c9ba19..cfa9b5b2c27a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |||
146 | 146 | ||
147 | static int __init x86_xsave_setup(char *s) | 147 | static int __init x86_xsave_setup(char *s) |
148 | { | 148 | { |
149 | if (strlen(s)) | ||
150 | return 0; | ||
149 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 151 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
150 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 152 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
151 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | 153 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 7aa1acc79789..06674473b0e6 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c | |||
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size) | |||
108 | * load_microcode_amd() to save equivalent cpu table and microcode patches in | 108 | * load_microcode_amd() to save equivalent cpu table and microcode patches in |
109 | * kernel heap memory. | 109 | * kernel heap memory. |
110 | */ | 110 | */ |
111 | static void apply_ucode_in_initrd(void *ucode, size_t size) | 111 | static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) |
112 | { | 112 | { |
113 | struct equiv_cpu_entry *eq; | 113 | struct equiv_cpu_entry *eq; |
114 | size_t *cont_sz; | 114 | size_t *cont_sz; |
115 | u32 *header; | 115 | u32 *header; |
116 | u8 *data, **cont; | 116 | u8 *data, **cont; |
117 | u8 (*patch)[PATCH_MAX_SIZE]; | ||
117 | u16 eq_id = 0; | 118 | u16 eq_id = 0; |
118 | int offset, left; | 119 | int offset, left; |
119 | u32 rev, eax, ebx, ecx, edx; | 120 | u32 rev, eax, ebx, ecx, edx; |
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
123 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); | 124 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); |
124 | cont_sz = (size_t *)__pa_nodebug(&container_size); | 125 | cont_sz = (size_t *)__pa_nodebug(&container_size); |
125 | cont = (u8 **)__pa_nodebug(&container); | 126 | cont = (u8 **)__pa_nodebug(&container); |
127 | patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); | ||
126 | #else | 128 | #else |
127 | new_rev = &ucode_new_rev; | 129 | new_rev = &ucode_new_rev; |
128 | cont_sz = &container_size; | 130 | cont_sz = &container_size; |
129 | cont = &container; | 131 | cont = &container; |
132 | patch = &amd_ucode_patch; | ||
130 | #endif | 133 | #endif |
131 | 134 | ||
132 | data = ucode; | 135 | data = ucode; |
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
213 | rev = mc->hdr.patch_id; | 216 | rev = mc->hdr.patch_id; |
214 | *new_rev = rev; | 217 | *new_rev = rev; |
215 | 218 | ||
216 | /* save ucode patch */ | 219 | if (save_patch) |
217 | memcpy(amd_ucode_patch, mc, | 220 | memcpy(patch, mc, |
218 | min_t(u32, header[1], PATCH_MAX_SIZE)); | 221 | min_t(u32, header[1], PATCH_MAX_SIZE)); |
219 | } | 222 | } |
220 | } | 223 | } |
221 | 224 | ||
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void) | |||
246 | *data = cp.data; | 249 | *data = cp.data; |
247 | *size = cp.size; | 250 | *size = cp.size; |
248 | 251 | ||
249 | apply_ucode_in_initrd(cp.data, cp.size); | 252 | apply_ucode_in_initrd(cp.data, cp.size, true); |
250 | } | 253 | } |
251 | 254 | ||
252 | #ifdef CONFIG_X86_32 | 255 | #ifdef CONFIG_X86_32 |
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void) | |||
263 | size_t *usize; | 266 | size_t *usize; |
264 | void **ucode; | 267 | void **ucode; |
265 | 268 | ||
266 | mc = (struct microcode_amd *)__pa(amd_ucode_patch); | 269 | mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); |
267 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { | 270 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { |
268 | __apply_microcode_amd(mc); | 271 | __apply_microcode_amd(mc); |
269 | return; | 272 | return; |
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void) | |||
275 | if (!*ucode || !*usize) | 278 | if (!*ucode || !*usize) |
276 | return; | 279 | return; |
277 | 280 | ||
278 | apply_ucode_in_initrd(*ucode, *usize); | 281 | apply_ucode_in_initrd(*ucode, *usize, false); |
279 | } | 282 | } |
280 | 283 | ||
281 | static void __init collect_cpu_sig_on_bsp(void *arg) | 284 | static void __init collect_cpu_sig_on_bsp(void *arg) |
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void) | |||
339 | * AP has a different equivalence ID than BSP, looks like | 342 | * AP has a different equivalence ID than BSP, looks like |
340 | * mixed-steppings silicon so go through the ucode blob anew. | 343 | * mixed-steppings silicon so go through the ucode blob anew. |
341 | */ | 344 | */ |
342 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); | 345 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); |
343 | } | 346 | } |
344 | } | 347 | } |
345 | #endif | 348 | #endif |
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void) | |||
347 | int __init save_microcode_in_initrd_amd(void) | 350 | int __init save_microcode_in_initrd_amd(void) |
348 | { | 351 | { |
349 | unsigned long cont; | 352 | unsigned long cont; |
353 | int retval = 0; | ||
350 | enum ucode_state ret; | 354 | enum ucode_state ret; |
355 | u8 *cont_va; | ||
351 | u32 eax; | 356 | u32 eax; |
352 | 357 | ||
353 | if (!container) | 358 | if (!container) |
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void) | |||
355 | 360 | ||
356 | #ifdef CONFIG_X86_32 | 361 | #ifdef CONFIG_X86_32 |
357 | get_bsp_sig(); | 362 | get_bsp_sig(); |
358 | cont = (unsigned long)container; | 363 | cont = (unsigned long)container; |
364 | cont_va = __va(container); | ||
359 | #else | 365 | #else |
360 | /* | 366 | /* |
361 | * We need the physical address of the container for both bitness since | 367 | * We need the physical address of the container for both bitness since |
362 | * boot_params.hdr.ramdisk_image is a physical address. | 368 | * boot_params.hdr.ramdisk_image is a physical address. |
363 | */ | 369 | */ |
364 | cont = __pa(container); | 370 | cont = __pa(container); |
371 | cont_va = container; | ||
365 | #endif | 372 | #endif |
366 | 373 | ||
367 | /* | 374 | /* |
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void) | |||
372 | if (relocated_ramdisk) | 379 | if (relocated_ramdisk) |
373 | container = (u8 *)(__va(relocated_ramdisk) + | 380 | container = (u8 *)(__va(relocated_ramdisk) + |
374 | (cont - boot_params.hdr.ramdisk_image)); | 381 | (cont - boot_params.hdr.ramdisk_image)); |
382 | else | ||
383 | container = cont_va; | ||
375 | 384 | ||
376 | if (ucode_new_rev) | 385 | if (ucode_new_rev) |
377 | pr_info("microcode: updated early to new patch_level=0x%08x\n", | 386 | pr_info("microcode: updated early to new patch_level=0x%08x\n", |
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void) | |||
382 | 391 | ||
383 | ret = load_microcode_amd(eax, container, container_size); | 392 | ret = load_microcode_amd(eax, container, container_size); |
384 | if (ret != UCODE_OK) | 393 | if (ret != UCODE_OK) |
385 | return -EINVAL; | 394 | retval = -EINVAL; |
386 | 395 | ||
387 | /* | 396 | /* |
388 | * This will be freed any msec now, stash patches for the current | 397 | * This will be freed any msec now, stash patches for the current |
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void) | |||
391 | container = NULL; | 400 | container = NULL; |
392 | container_size = 0; | 401 | container_size = 0; |
393 | 402 | ||
394 | return 0; | 403 | return retval; |
395 | } | 404 | } |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dd9d6190b08d..08fe6e8a726e 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -465,6 +465,16 @@ static void mc_bp_resume(void) | |||
465 | 465 | ||
466 | if (uci->valid && uci->mc) | 466 | if (uci->valid && uci->mc) |
467 | microcode_ops->apply_microcode(cpu); | 467 | microcode_ops->apply_microcode(cpu); |
468 | #ifdef CONFIG_X86_64 | ||
469 | else if (!uci->mc) | ||
470 | /* | ||
471 | * We might resume and not have applied late microcode but still | ||
472 | * have a newer patch stashed from the early loader. We don't | ||
473 | * have it in uci->mc so we have to load it the same way we're | ||
474 | * applying patches early on the APs. | ||
475 | */ | ||
476 | load_ucode_ap(); | ||
477 | #endif | ||
468 | } | 478 | } |
469 | 479 | ||
470 | static struct syscore_ops mc_syscore_ops = { | 480 | static struct syscore_ops mc_syscore_ops = { |
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index 5f28a64e71ea..2c017f242a78 100644 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
@@ -124,7 +124,7 @@ void __init load_ucode_bsp(void) | |||
124 | static bool check_loader_disabled_ap(void) | 124 | static bool check_loader_disabled_ap(void) |
125 | { | 125 | { |
126 | #ifdef CONFIG_X86_32 | 126 | #ifdef CONFIG_X86_32 |
127 | return __pa_nodebug(dis_ucode_ldr); | 127 | return *((bool *)__pa_nodebug(&dis_ucode_ldr)); |
128 | #else | 128 | #else |
129 | return dis_ucode_ldr; | 129 | return dis_ucode_ldr; |
130 | #endif | 130 | #endif |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1abcb50b48ae..ff86f19b5758 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { | |||
24 | [ DEBUG_STACK-1 ] = "#DB", | 24 | [ DEBUG_STACK-1 ] = "#DB", |
25 | [ NMI_STACK-1 ] = "NMI", | 25 | [ NMI_STACK-1 ] = "NMI", |
26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | 26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
27 | [ STACKFAULT_STACK-1 ] = "#SS", | ||
28 | [ MCE_STACK-1 ] = "#MC", | 27 | [ MCE_STACK-1 ] = "#MC", |
29 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
30 | [ N_EXCEPTION_STACKS ... | 29 | [ N_EXCEPTION_STACKS ... |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index df088bb03fb3..c0226ab54106 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -828,9 +828,15 @@ ENTRY(native_iret) | |||
828 | jnz native_irq_return_ldt | 828 | jnz native_irq_return_ldt |
829 | #endif | 829 | #endif |
830 | 830 | ||
831 | .global native_irq_return_iret | ||
831 | native_irq_return_iret: | 832 | native_irq_return_iret: |
833 | /* | ||
834 | * This may fault. Non-paranoid faults on return to userspace are | ||
835 | * handled by fixup_bad_iret. These include #SS, #GP, and #NP. | ||
836 | * Double-faults due to espfix64 are handled in do_double_fault. | ||
837 | * Other faults here are fatal. | ||
838 | */ | ||
832 | iretq | 839 | iretq |
833 | _ASM_EXTABLE(native_irq_return_iret, bad_iret) | ||
834 | 840 | ||
835 | #ifdef CONFIG_X86_ESPFIX64 | 841 | #ifdef CONFIG_X86_ESPFIX64 |
836 | native_irq_return_ldt: | 842 | native_irq_return_ldt: |
@@ -858,25 +864,6 @@ native_irq_return_ldt: | |||
858 | jmp native_irq_return_iret | 864 | jmp native_irq_return_iret |
859 | #endif | 865 | #endif |
860 | 866 | ||
861 | .section .fixup,"ax" | ||
862 | bad_iret: | ||
863 | /* | ||
864 | * The iret traps when the %cs or %ss being restored is bogus. | ||
865 | * We've lost the original trap vector and error code. | ||
866 | * #GPF is the most likely one to get for an invalid selector. | ||
867 | * So pretend we completed the iret and took the #GPF in user mode. | ||
868 | * | ||
869 | * We are now running with the kernel GS after exception recovery. | ||
870 | * But error_entry expects us to have user GS to match the user %cs, | ||
871 | * so swap back. | ||
872 | */ | ||
873 | pushq $0 | ||
874 | |||
875 | SWAPGS | ||
876 | jmp general_protection | ||
877 | |||
878 | .previous | ||
879 | |||
880 | /* edi: workmask, edx: work */ | 867 | /* edi: workmask, edx: work */ |
881 | retint_careful: | 868 | retint_careful: |
882 | CFI_RESTORE_STATE | 869 | CFI_RESTORE_STATE |
@@ -922,37 +909,6 @@ ENTRY(retint_kernel) | |||
922 | CFI_ENDPROC | 909 | CFI_ENDPROC |
923 | END(common_interrupt) | 910 | END(common_interrupt) |
924 | 911 | ||
925 | /* | ||
926 | * If IRET takes a fault on the espfix stack, then we | ||
927 | * end up promoting it to a doublefault. In that case, | ||
928 | * modify the stack to make it look like we just entered | ||
929 | * the #GP handler from user space, similar to bad_iret. | ||
930 | */ | ||
931 | #ifdef CONFIG_X86_ESPFIX64 | ||
932 | ALIGN | ||
933 | __do_double_fault: | ||
934 | XCPT_FRAME 1 RDI+8 | ||
935 | movq RSP(%rdi),%rax /* Trap on the espfix stack? */ | ||
936 | sarq $PGDIR_SHIFT,%rax | ||
937 | cmpl $ESPFIX_PGD_ENTRY,%eax | ||
938 | jne do_double_fault /* No, just deliver the fault */ | ||
939 | cmpl $__KERNEL_CS,CS(%rdi) | ||
940 | jne do_double_fault | ||
941 | movq RIP(%rdi),%rax | ||
942 | cmpq $native_irq_return_iret,%rax | ||
943 | jne do_double_fault /* This shouldn't happen... */ | ||
944 | movq PER_CPU_VAR(kernel_stack),%rax | ||
945 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ | ||
946 | movq %rax,RSP(%rdi) | ||
947 | movq $0,(%rax) /* Missing (lost) #GP error code */ | ||
948 | movq $general_protection,RIP(%rdi) | ||
949 | retq | ||
950 | CFI_ENDPROC | ||
951 | END(__do_double_fault) | ||
952 | #else | ||
953 | # define __do_double_fault do_double_fault | ||
954 | #endif | ||
955 | |||
956 | /* | 912 | /* |
957 | * APIC interrupts. | 913 | * APIC interrupts. |
958 | */ | 914 | */ |
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0 | |||
1124 | idtentry bounds do_bounds has_error_code=0 | 1080 | idtentry bounds do_bounds has_error_code=0 |
1125 | idtentry invalid_op do_invalid_op has_error_code=0 | 1081 | idtentry invalid_op do_invalid_op has_error_code=0 |
1126 | idtentry device_not_available do_device_not_available has_error_code=0 | 1082 | idtentry device_not_available do_device_not_available has_error_code=0 |
1127 | idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 | 1083 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 |
1128 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 | 1084 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
1129 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 | 1085 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
1130 | idtentry segment_not_present do_segment_not_present has_error_code=1 | 1086 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ | |||
1289 | 1245 | ||
1290 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1246 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1291 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1247 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1292 | idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 | 1248 | idtentry stack_segment do_stack_segment has_error_code=1 |
1293 | #ifdef CONFIG_XEN | 1249 | #ifdef CONFIG_XEN |
1294 | idtentry xen_debug do_debug has_error_code=0 | 1250 | idtentry xen_debug do_debug has_error_code=0 |
1295 | idtentry xen_int3 do_int3 has_error_code=0 | 1251 | idtentry xen_int3 do_int3 has_error_code=0 |
@@ -1399,17 +1355,16 @@ error_sti: | |||
1399 | 1355 | ||
1400 | /* | 1356 | /* |
1401 | * There are two places in the kernel that can potentially fault with | 1357 | * There are two places in the kernel that can potentially fault with |
1402 | * usergs. Handle them here. The exception handlers after iret run with | 1358 | * usergs. Handle them here. B stepping K8s sometimes report a |
1403 | * kernel gs again, so don't set the user space flag. B stepping K8s | 1359 | * truncated RIP for IRET exceptions returning to compat mode. Check |
1404 | * sometimes report an truncated RIP for IRET exceptions returning to | 1360 | * for these here too. |
1405 | * compat mode. Check for these here too. | ||
1406 | */ | 1361 | */ |
1407 | error_kernelspace: | 1362 | error_kernelspace: |
1408 | CFI_REL_OFFSET rcx, RCX+8 | 1363 | CFI_REL_OFFSET rcx, RCX+8 |
1409 | incl %ebx | 1364 | incl %ebx |
1410 | leaq native_irq_return_iret(%rip),%rcx | 1365 | leaq native_irq_return_iret(%rip),%rcx |
1411 | cmpq %rcx,RIP+8(%rsp) | 1366 | cmpq %rcx,RIP+8(%rsp) |
1412 | je error_swapgs | 1367 | je error_bad_iret |
1413 | movl %ecx,%eax /* zero extend */ | 1368 | movl %ecx,%eax /* zero extend */ |
1414 | cmpq %rax,RIP+8(%rsp) | 1369 | cmpq %rax,RIP+8(%rsp) |
1415 | je bstep_iret | 1370 | je bstep_iret |
@@ -1420,7 +1375,15 @@ error_kernelspace: | |||
1420 | bstep_iret: | 1375 | bstep_iret: |
1421 | /* Fix truncated RIP */ | 1376 | /* Fix truncated RIP */ |
1422 | movq %rcx,RIP+8(%rsp) | 1377 | movq %rcx,RIP+8(%rsp) |
1423 | jmp error_swapgs | 1378 | /* fall through */ |
1379 | |||
1380 | error_bad_iret: | ||
1381 | SWAPGS | ||
1382 | mov %rsp,%rdi | ||
1383 | call fixup_bad_iret | ||
1384 | mov %rax,%rsp | ||
1385 | decl %ebx /* Return to usergs */ | ||
1386 | jmp error_sti | ||
1424 | CFI_ENDPROC | 1387 | CFI_ENDPROC |
1425 | END(error_entry) | 1388 | END(error_entry) |
1426 | 1389 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 749b0e423419..e510618b2e91 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) | |||
1484 | */ | 1484 | */ |
1485 | if (work & _TIF_NOHZ) { | 1485 | if (work & _TIF_NOHZ) { |
1486 | user_exit(); | 1486 | user_exit(); |
1487 | work &= ~TIF_NOHZ; | 1487 | work &= ~_TIF_NOHZ; |
1488 | } | 1488 | } |
1489 | 1489 | ||
1490 | #ifdef CONFIG_SECCOMP | 1490 | #ifdef CONFIG_SECCOMP |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4d2128ac70bd..668d8f2a8781 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -1303,10 +1303,14 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1303 | numa_remove_cpu(cpu); | 1303 | numa_remove_cpu(cpu); |
1304 | } | 1304 | } |
1305 | 1305 | ||
1306 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
1307 | |||
1306 | void cpu_disable_common(void) | 1308 | void cpu_disable_common(void) |
1307 | { | 1309 | { |
1308 | int cpu = smp_processor_id(); | 1310 | int cpu = smp_processor_id(); |
1309 | 1311 | ||
1312 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1313 | |||
1310 | remove_siblinginfo(cpu); | 1314 | remove_siblinginfo(cpu); |
1311 | 1315 | ||
1312 | /* It's now safe to remove this processor from the online map */ | 1316 | /* It's now safe to remove this processor from the online map */ |
@@ -1316,8 +1320,6 @@ void cpu_disable_common(void) | |||
1316 | fixup_irqs(); | 1320 | fixup_irqs(); |
1317 | } | 1321 | } |
1318 | 1322 | ||
1319 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
1320 | |||
1321 | int native_cpu_disable(void) | 1323 | int native_cpu_disable(void) |
1322 | { | 1324 | { |
1323 | int ret; | 1325 | int ret; |
@@ -1327,16 +1329,21 @@ int native_cpu_disable(void) | |||
1327 | return ret; | 1329 | return ret; |
1328 | 1330 | ||
1329 | clear_local_APIC(); | 1331 | clear_local_APIC(); |
1330 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1331 | cpu_disable_common(); | 1332 | cpu_disable_common(); |
1332 | 1333 | ||
1333 | return 0; | 1334 | return 0; |
1334 | } | 1335 | } |
1335 | 1336 | ||
1337 | void cpu_die_common(unsigned int cpu) | ||
1338 | { | ||
1339 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | ||
1340 | } | ||
1341 | |||
1336 | void native_cpu_die(unsigned int cpu) | 1342 | void native_cpu_die(unsigned int cpu) |
1337 | { | 1343 | { |
1338 | /* We don't do anything here: idle task is faking death itself. */ | 1344 | /* We don't do anything here: idle task is faking death itself. */ |
1339 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | 1345 | |
1346 | cpu_die_common(cpu); | ||
1340 | 1347 | ||
1341 | /* They ack this in play_dead() by setting CPU_DEAD */ | 1348 | /* They ack this in play_dead() by setting CPU_DEAD */ |
1342 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1349 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc1..de801f22128a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) | |||
233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) | 233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) | 234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | 235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) |
236 | #ifdef CONFIG_X86_32 | ||
237 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 236 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
238 | #endif | ||
239 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) | 237 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
240 | 238 | ||
241 | #ifdef CONFIG_X86_64 | 239 | #ifdef CONFIG_X86_64 |
242 | /* Runs on IST stack */ | 240 | /* Runs on IST stack */ |
243 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
244 | { | ||
245 | enum ctx_state prev_state; | ||
246 | |||
247 | prev_state = exception_enter(); | ||
248 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
249 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { | ||
250 | preempt_conditional_sti(regs); | ||
251 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | ||
252 | preempt_conditional_cli(regs); | ||
253 | } | ||
254 | exception_exit(prev_state); | ||
255 | } | ||
256 | |||
257 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 241 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
258 | { | 242 | { |
259 | static const char str[] = "double fault"; | 243 | static const char str[] = "double fault"; |
260 | struct task_struct *tsk = current; | 244 | struct task_struct *tsk = current; |
261 | 245 | ||
246 | #ifdef CONFIG_X86_ESPFIX64 | ||
247 | extern unsigned char native_irq_return_iret[]; | ||
248 | |||
249 | /* | ||
250 | * If IRET takes a non-IST fault on the espfix64 stack, then we | ||
251 | * end up promoting it to a doublefault. In that case, modify | ||
252 | * the stack to make it look like we just entered the #GP | ||
253 | * handler from user space, similar to bad_iret. | ||
254 | */ | ||
255 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && | ||
256 | regs->cs == __KERNEL_CS && | ||
257 | regs->ip == (unsigned long)native_irq_return_iret) | ||
258 | { | ||
259 | struct pt_regs *normal_regs = task_pt_regs(current); | ||
260 | |||
261 | /* Fake a #GP(0) from userspace. */ | ||
262 | memmove(&normal_regs->ip, (void *)regs->sp, 5*8); | ||
263 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | ||
264 | regs->ip = (unsigned long)general_protection; | ||
265 | regs->sp = (unsigned long)&normal_regs->orig_ax; | ||
266 | return; | ||
267 | } | ||
268 | #endif | ||
269 | |||
262 | exception_enter(); | 270 | exception_enter(); |
263 | /* Return not checked because double check cannot be ignored */ | 271 | /* Return not checked because double check cannot be ignored */ |
264 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 272 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
@@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
399 | return regs; | 407 | return regs; |
400 | } | 408 | } |
401 | NOKPROBE_SYMBOL(sync_regs); | 409 | NOKPROBE_SYMBOL(sync_regs); |
410 | |||
411 | struct bad_iret_stack { | ||
412 | void *error_entry_ret; | ||
413 | struct pt_regs regs; | ||
414 | }; | ||
415 | |||
416 | asmlinkage __visible | ||
417 | struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | ||
418 | { | ||
419 | /* | ||
420 | * This is called from entry_64.S early in handling a fault | ||
421 | * caused by a bad iret to user mode. To handle the fault | ||
422 | * correctly, we want move our stack frame to task_pt_regs | ||
423 | * and we want to pretend that the exception came from the | ||
424 | * iret target. | ||
425 | */ | ||
426 | struct bad_iret_stack *new_stack = | ||
427 | container_of(task_pt_regs(current), | ||
428 | struct bad_iret_stack, regs); | ||
429 | |||
430 | /* Copy the IRET target to the new stack. */ | ||
431 | memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); | ||
432 | |||
433 | /* Copy the remainder of the stack from the current stack. */ | ||
434 | memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); | ||
435 | |||
436 | BUG_ON(!user_mode_vm(&new_stack->regs)); | ||
437 | return new_stack; | ||
438 | } | ||
402 | #endif | 439 | #endif |
403 | 440 | ||
404 | /* | 441 | /* |
@@ -778,7 +815,7 @@ void __init trap_init(void) | |||
778 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); | 815 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); |
779 | set_intr_gate(X86_TRAP_TS, invalid_TSS); | 816 | set_intr_gate(X86_TRAP_TS, invalid_TSS); |
780 | set_intr_gate(X86_TRAP_NP, segment_not_present); | 817 | set_intr_gate(X86_TRAP_NP, segment_not_present); |
781 | set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); | 818 | set_intr_gate(X86_TRAP_SS, stack_segment); |
782 | set_intr_gate(X86_TRAP_GP, general_protection); | 819 | set_intr_gate(X86_TRAP_GP, general_protection); |
783 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); | 820 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); |
784 | set_intr_gate(X86_TRAP_MF, coprocessor_error); | 821 | set_intr_gate(X86_TRAP_MF, coprocessor_error); |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 5edf088ca51e..9f8a2faf5040 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -4287,6 +4287,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4287 | fetch_register_operand(op); | 4287 | fetch_register_operand(op); |
4288 | break; | 4288 | break; |
4289 | case OpCL: | 4289 | case OpCL: |
4290 | op->type = OP_IMM; | ||
4290 | op->bytes = 1; | 4291 | op->bytes = 1; |
4291 | op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; | 4292 | op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; |
4292 | break; | 4293 | break; |
@@ -4294,6 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4294 | rc = decode_imm(ctxt, op, 1, true); | 4295 | rc = decode_imm(ctxt, op, 1, true); |
4295 | break; | 4296 | break; |
4296 | case OpOne: | 4297 | case OpOne: |
4298 | op->type = OP_IMM; | ||
4297 | op->bytes = 1; | 4299 | op->bytes = 1; |
4298 | op->val = 1; | 4300 | op->val = 1; |
4299 | break; | 4301 | break; |
@@ -4352,21 +4354,27 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4352 | ctxt->memop.bytes = ctxt->op_bytes + 2; | 4354 | ctxt->memop.bytes = ctxt->op_bytes + 2; |
4353 | goto mem_common; | 4355 | goto mem_common; |
4354 | case OpES: | 4356 | case OpES: |
4357 | op->type = OP_IMM; | ||
4355 | op->val = VCPU_SREG_ES; | 4358 | op->val = VCPU_SREG_ES; |
4356 | break; | 4359 | break; |
4357 | case OpCS: | 4360 | case OpCS: |
4361 | op->type = OP_IMM; | ||
4358 | op->val = VCPU_SREG_CS; | 4362 | op->val = VCPU_SREG_CS; |
4359 | break; | 4363 | break; |
4360 | case OpSS: | 4364 | case OpSS: |
4365 | op->type = OP_IMM; | ||
4361 | op->val = VCPU_SREG_SS; | 4366 | op->val = VCPU_SREG_SS; |
4362 | break; | 4367 | break; |
4363 | case OpDS: | 4368 | case OpDS: |
4369 | op->type = OP_IMM; | ||
4364 | op->val = VCPU_SREG_DS; | 4370 | op->val = VCPU_SREG_DS; |
4365 | break; | 4371 | break; |
4366 | case OpFS: | 4372 | case OpFS: |
4373 | op->type = OP_IMM; | ||
4367 | op->val = VCPU_SREG_FS; | 4374 | op->val = VCPU_SREG_FS; |
4368 | break; | 4375 | break; |
4369 | case OpGS: | 4376 | case OpGS: |
4377 | op->type = OP_IMM; | ||
4370 | op->val = VCPU_SREG_GS; | 4378 | op->val = VCPU_SREG_GS; |
4371 | break; | 4379 | break; |
4372 | case OpImplicit: | 4380 | case OpImplicit: |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ac1c4de3a484..978f402006ee 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -630,7 +630,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) | |||
630 | * kvm mmu, before reclaiming the page, we should | 630 | * kvm mmu, before reclaiming the page, we should |
631 | * unmap it from mmu first. | 631 | * unmap it from mmu first. |
632 | */ | 632 | */ |
633 | WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); | 633 | WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn))); |
634 | 634 | ||
635 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) | 635 | if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) |
636 | kvm_set_pfn_accessed(pfn); | 636 | kvm_set_pfn_accessed(pfn); |
@@ -2461,7 +2461,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
2461 | spte |= PT_PAGE_SIZE_MASK; | 2461 | spte |= PT_PAGE_SIZE_MASK; |
2462 | if (tdp_enabled) | 2462 | if (tdp_enabled) |
2463 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, | 2463 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
2464 | kvm_is_mmio_pfn(pfn)); | 2464 | kvm_is_reserved_pfn(pfn)); |
2465 | 2465 | ||
2466 | if (host_writable) | 2466 | if (host_writable) |
2467 | spte |= SPTE_HOST_WRITEABLE; | 2467 | spte |= SPTE_HOST_WRITEABLE; |
@@ -2737,7 +2737,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, | |||
2737 | * PT_PAGE_TABLE_LEVEL and there would be no adjustment done | 2737 | * PT_PAGE_TABLE_LEVEL and there would be no adjustment done |
2738 | * here. | 2738 | * here. |
2739 | */ | 2739 | */ |
2740 | if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && | 2740 | if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && |
2741 | level == PT_PAGE_TABLE_LEVEL && | 2741 | level == PT_PAGE_TABLE_LEVEL && |
2742 | PageTransCompound(pfn_to_page(pfn)) && | 2742 | PageTransCompound(pfn_to_page(pfn)) && |
2743 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { | 2743 | !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { |
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 7609e0e421ec..1318f75d56e4 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c | |||
@@ -41,9 +41,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst, | |||
41 | while (((unsigned long)src & 6) && len >= 2) { | 41 | while (((unsigned long)src & 6) && len >= 2) { |
42 | __u16 val16; | 42 | __u16 val16; |
43 | 43 | ||
44 | *errp = __get_user(val16, (const __u16 __user *)src); | 44 | if (__get_user(val16, (const __u16 __user *)src)) |
45 | if (*errp) | 45 | goto out_err; |
46 | return isum; | ||
47 | 46 | ||
48 | *(__u16 *)dst = val16; | 47 | *(__u16 *)dst = val16; |
49 | isum = (__force __wsum)add32_with_carry( | 48 | isum = (__force __wsum)add32_with_carry( |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4cb8763868fc..4e5dfec750fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1123,7 +1123,7 @@ void mark_rodata_ro(void) | |||
1123 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 1123 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
1124 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); | 1124 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); |
1125 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); | 1125 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
1126 | unsigned long all_end = PFN_ALIGN(&_end); | 1126 | unsigned long all_end; |
1127 | 1127 | ||
1128 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 1128 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
1129 | (end - start) >> 10); | 1129 | (end - start) >> 10); |
@@ -1134,7 +1134,16 @@ void mark_rodata_ro(void) | |||
1134 | /* | 1134 | /* |
1135 | * The rodata/data/bss/brk section (but not the kernel text!) | 1135 | * The rodata/data/bss/brk section (but not the kernel text!) |
1136 | * should also be not-executable. | 1136 | * should also be not-executable. |
1137 | * | ||
1138 | * We align all_end to PMD_SIZE because the existing mapping | ||
1139 | * is a full PMD. If we would align _brk_end to PAGE_SIZE we | ||
1140 | * split the PMD and the reminder between _brk_end and the end | ||
1141 | * of the PMD will remain mapped executable. | ||
1142 | * | ||
1143 | * Any PMD which was setup after the one which covers _brk_end | ||
1144 | * has been zapped already via cleanup_highmem(). | ||
1137 | */ | 1145 | */ |
1146 | all_end = roundup((unsigned long)_brk_end, PMD_SIZE); | ||
1138 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); | 1147 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); |
1139 | 1148 | ||
1140 | rodata_test(); | 1149 | rodata_test(); |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index af78e50ca6ce..b12f43c192cf 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -327,7 +327,7 @@ EXPORT_SYMBOL(iounmap); | |||
327 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem | 327 | * Convert a physical pointer to a virtual kernel pointer for /dev/mem |
328 | * access | 328 | * access |
329 | */ | 329 | */ |
330 | void *xlate_dev_mem_ptr(unsigned long phys) | 330 | void *xlate_dev_mem_ptr(phys_addr_t phys) |
331 | { | 331 | { |
332 | void *addr; | 332 | void *addr; |
333 | unsigned long start = phys & PAGE_MASK; | 333 | unsigned long start = phys & PAGE_MASK; |
@@ -343,7 +343,7 @@ void *xlate_dev_mem_ptr(unsigned long phys) | |||
343 | return addr; | 343 | return addr; |
344 | } | 344 | } |
345 | 345 | ||
346 | void unxlate_dev_mem_ptr(unsigned long phys, void *addr) | 346 | void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) |
347 | { | 347 | { |
348 | if (page_is_ram(phys >> PAGE_SHIFT)) | 348 | if (page_is_ram(phys >> PAGE_SHIFT)) |
349 | return; | 349 | return; |
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl new file mode 100644 index 000000000000..23210baade2d --- /dev/null +++ b/arch/x86/tools/calc_run_size.pl | |||
@@ -0,0 +1,39 @@ | |||
1 | #!/usr/bin/perl | ||
2 | # | ||
3 | # Calculate the amount of space needed to run the kernel, including room for | ||
4 | # the .bss and .brk sections. | ||
5 | # | ||
6 | # Usage: | ||
7 | # objdump -h a.out | perl calc_run_size.pl | ||
8 | use strict; | ||
9 | |||
10 | my $mem_size = 0; | ||
11 | my $file_offset = 0; | ||
12 | |||
13 | my $sections=" *[0-9]+ \.(?:bss|brk) +"; | ||
14 | while (<>) { | ||
15 | if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { | ||
16 | my $size = hex($1); | ||
17 | my $offset = hex($2); | ||
18 | $mem_size += $size; | ||
19 | if ($file_offset == 0) { | ||
20 | $file_offset = $offset; | ||
21 | } elsif ($file_offset != $offset) { | ||
22 | # BFD linker shows the same file offset in ELF. | ||
23 | # Gold linker shows them as consecutive. | ||
24 | next if ($file_offset + $mem_size == $offset + $size); | ||
25 | |||
26 | printf STDERR "file_offset: 0x%lx\n", $file_offset; | ||
27 | printf STDERR "mem_size: 0x%lx\n", $mem_size; | ||
28 | printf STDERR "offset: 0x%lx\n", $offset; | ||
29 | printf STDERR "size: 0x%lx\n", $size; | ||
30 | |||
31 | die ".bss and .brk are non-contiguous\n"; | ||
32 | } | ||
33 | } | ||
34 | } | ||
35 | |||
36 | if ($file_offset == 0) { | ||
37 | die "Never found .bss or .brk file offset\n"; | ||
38 | } | ||
39 | printf("%d\n", $mem_size + $file_offset); | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8650cdb53209..4c071aeb8417 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -510,6 +510,9 @@ static void xen_cpu_die(unsigned int cpu) | |||
510 | current->state = TASK_UNINTERRUPTIBLE; | 510 | current->state = TASK_UNINTERRUPTIBLE; |
511 | schedule_timeout(HZ/10); | 511 | schedule_timeout(HZ/10); |
512 | } | 512 | } |
513 | |||
514 | cpu_die_common(cpu); | ||
515 | |||
513 | xen_smp_intr_free(cpu); | 516 | xen_smp_intr_free(cpu); |
514 | xen_uninit_lock_cpu(cpu); | 517 | xen_uninit_lock_cpu(cpu); |
515 | xen_teardown_timer(cpu); | 518 | xen_teardown_timer(cpu); |