aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2014-12-08 20:39:29 -0500
commitba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch)
treec08504e4d2fa51ac91cef544f336d0169806c49f /arch/x86
parent8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff)
parentaa583096d9767892983332e7c1a984bd17e3cd39 (diff)
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/boot/compressed/misc.c13
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c9
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/page_64_types.h11
-rw-r--r--arch/x86/include/asm/preempt.h1
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/apb_timer.c2
-rw-r--r--arch/x86/kernel/apic/apic.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile7
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c33
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c8
-rw-r--r--arch/x86/kernel/cpu/microcode/core_early.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c14
-rw-r--r--arch/x86/kernel/cpu/perf_event.h1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c173
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c49
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S81
-rw-r--r--arch/x86/kernel/i8259.c3
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/traps.c71
-rw-r--r--arch/x86/kernel/tsc.c5
-rw-r--r--arch/x86/kvm/emulate.c63
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/lib/csum-wrappers_64.c5
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/mm/pageattr.c2
-rw-r--r--arch/x86/platform/intel-mid/sfi.c2
-rw-r--r--arch/x86/tools/calc_run_size.pl39
-rw-r--r--arch/x86/xen/smp.c3
44 files changed, 355 insertions, 349 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f2327e88e07c..41a503c15862 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -142,6 +142,10 @@ config INSTRUCTION_DECODER
142 def_bool y 142 def_bool y
143 depends on KPROBES || PERF_EVENTS || UPROBES 143 depends on KPROBES || PERF_EVENTS || UPROBES
144 144
145config PERF_EVENTS_INTEL_UNCORE
146 def_bool y
147 depends on PERF_EVENTS && CPU_SUP_INTEL && PCI
148
145config OUTPUT_FORMAT 149config OUTPUT_FORMAT
146 string 150 string
147 default "elf32-i386" if X86_32 151 default "elf32-i386" if X86_32
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 704f58aa79cd..be1e07d4b596 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -76,8 +76,10 @@ suffix-$(CONFIG_KERNEL_XZ) := xz
76suffix-$(CONFIG_KERNEL_LZO) := lzo 76suffix-$(CONFIG_KERNEL_LZO) := lzo
77suffix-$(CONFIG_KERNEL_LZ4) := lz4 77suffix-$(CONFIG_KERNEL_LZ4) := lz4
78 78
79RUN_SIZE = $(shell objdump -h vmlinux | \
80 perl $(srctree)/arch/x86/tools/calc_run_size.pl)
79quiet_cmd_mkpiggy = MKPIGGY $@ 81quiet_cmd_mkpiggy = MKPIGGY $@
80 cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) 82 cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
81 83
82targets += piggy.S 84targets += piggy.S
83$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE 85$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index cbed1407a5cd..1d7fbbcc196d 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -207,7 +207,8 @@ relocated:
207 * Do the decompression, and jump to the new kernel.. 207 * Do the decompression, and jump to the new kernel..
208 */ 208 */
209 /* push arguments for decompress_kernel: */ 209 /* push arguments for decompress_kernel: */
210 pushl $z_output_len /* decompressed length */ 210 pushl $z_run_size /* size of kernel with .bss and .brk */
211 pushl $z_output_len /* decompressed length, end of relocs */
211 leal z_extract_offset_negative(%ebx), %ebp 212 leal z_extract_offset_negative(%ebx), %ebp
212 pushl %ebp /* output address */ 213 pushl %ebp /* output address */
213 pushl $z_input_len /* input_len */ 214 pushl $z_input_len /* input_len */
@@ -217,7 +218,7 @@ relocated:
217 pushl %eax /* heap area */ 218 pushl %eax /* heap area */
218 pushl %esi /* real mode pointer */ 219 pushl %esi /* real mode pointer */
219 call decompress_kernel /* returns kernel location in %eax */ 220 call decompress_kernel /* returns kernel location in %eax */
220 addl $24, %esp 221 addl $28, %esp
221 222
222/* 223/*
223 * Jump to the decompressed kernel. 224 * Jump to the decompressed kernel.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2884e0c3e8a5..6b1766c6c082 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -402,13 +402,16 @@ relocated:
402 * Do the decompression, and jump to the new kernel.. 402 * Do the decompression, and jump to the new kernel..
403 */ 403 */
404 pushq %rsi /* Save the real mode argument */ 404 pushq %rsi /* Save the real mode argument */
405 movq $z_run_size, %r9 /* size of kernel with .bss and .brk */
406 pushq %r9
405 movq %rsi, %rdi /* real mode address */ 407 movq %rsi, %rdi /* real mode address */
406 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ 408 leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
407 leaq input_data(%rip), %rdx /* input_data */ 409 leaq input_data(%rip), %rdx /* input_data */
408 movl $z_input_len, %ecx /* input_len */ 410 movl $z_input_len, %ecx /* input_len */
409 movq %rbp, %r8 /* output target address */ 411 movq %rbp, %r8 /* output target address */
410 movq $z_output_len, %r9 /* decompressed length */ 412 movq $z_output_len, %r9 /* decompressed length, end of relocs */
411 call decompress_kernel /* returns kernel location in %rax */ 413 call decompress_kernel /* returns kernel location in %rax */
414 popq %r9
412 popq %rsi 415 popq %rsi
413 416
414/* 417/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 57ab74df7eea..30dd59a9f0b4 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -358,7 +358,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
358 unsigned char *input_data, 358 unsigned char *input_data,
359 unsigned long input_len, 359 unsigned long input_len,
360 unsigned char *output, 360 unsigned char *output,
361 unsigned long output_len) 361 unsigned long output_len,
362 unsigned long run_size)
362{ 363{
363 real_mode = rmode; 364 real_mode = rmode;
364 365
@@ -381,8 +382,14 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
381 free_mem_ptr = heap; /* Heap */ 382 free_mem_ptr = heap; /* Heap */
382 free_mem_end_ptr = heap + BOOT_HEAP_SIZE; 383 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
383 384
384 output = choose_kernel_location(input_data, input_len, 385 /*
385 output, output_len); 386 * The memory hole needed for the kernel is the larger of either
387 * the entire decompressed kernel plus relocation table, or the
388 * entire decompressed kernel plus .bss and .brk sections.
389 */
390 output = choose_kernel_location(input_data, input_len, output,
391 output_len > run_size ? output_len
392 : run_size);
386 393
387 /* Validate memory location choices. */ 394 /* Validate memory location choices. */
388 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) 395 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index b669ab65bf6c..d8222f213182 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -36,11 +36,13 @@ int main(int argc, char *argv[])
36 uint32_t olen; 36 uint32_t olen;
37 long ilen; 37 long ilen;
38 unsigned long offs; 38 unsigned long offs;
39 unsigned long run_size;
39 FILE *f = NULL; 40 FILE *f = NULL;
40 int retval = 1; 41 int retval = 1;
41 42
42 if (argc < 2) { 43 if (argc < 3) {
43 fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); 44 fprintf(stderr, "Usage: %s compressed_file run_size\n",
45 argv[0]);
44 goto bail; 46 goto bail;
45 } 47 }
46 48
@@ -74,6 +76,7 @@ int main(int argc, char *argv[])
74 offs += olen >> 12; /* Add 8 bytes for each 32K block */ 76 offs += olen >> 12; /* Add 8 bytes for each 32K block */
75 offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ 77 offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
76 offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ 78 offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
79 run_size = atoi(argv[2]);
77 80
78 printf(".section \".rodata..compressed\",\"a\",@progbits\n"); 81 printf(".section \".rodata..compressed\",\"a\",@progbits\n");
79 printf(".globl z_input_len\n"); 82 printf(".globl z_input_len\n");
@@ -85,6 +88,8 @@ int main(int argc, char *argv[])
85 /* z_extract_offset_negative allows simplification of head_32.S */ 88 /* z_extract_offset_negative allows simplification of head_32.S */
86 printf(".globl z_extract_offset_negative\n"); 89 printf(".globl z_extract_offset_negative\n");
87 printf("z_extract_offset_negative = -0x%lx\n", offs); 90 printf("z_extract_offset_negative = -0x%lx\n", offs);
91 printf(".globl z_run_size\n");
92 printf("z_run_size = %lu\n", run_size);
88 93
89 printf(".globl input_data, input_data_end\n"); 94 printf(".globl input_data, input_data_end\n");
90 printf("input_data:\n"); 95 printf("input_data:\n");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 8ffba18395c8..ffe71228fc10 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target)
157 * ourselves. To save a few cycles, we can check whether 157 * ourselves. To save a few cycles, we can check whether
158 * NT was set instead of doing an unconditional popfq. 158 * NT was set instead of doing an unconditional popfq.
159 */ 159 */
160 testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ 160 testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
161 jnz sysenter_fix_flags 161 jnz sysenter_fix_flags
162sysenter_flags_fixed: 162sysenter_flags_fixed:
163 163
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f48b17df4224..3a52ee0e726d 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -20,7 +20,6 @@
20#define THREAD_SIZE_ORDER 1 20#define THREAD_SIZE_ORDER 1
21#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 21#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
22 22
23#define STACKFAULT_STACK 0
24#define DOUBLEFAULT_STACK 1 23#define DOUBLEFAULT_STACK 1
25#define NMI_STACK 0 24#define NMI_STACK 0
26#define DEBUG_STACK 0 25#define DEBUG_STACK 0
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 678205195ae1..75450b2c7be4 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,12 +14,11 @@
14#define IRQ_STACK_ORDER 2 14#define IRQ_STACK_ORDER 2
15#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) 15#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
16 16
17#define STACKFAULT_STACK 1 17#define DOUBLEFAULT_STACK 1
18#define DOUBLEFAULT_STACK 2 18#define NMI_STACK 2
19#define NMI_STACK 3 19#define DEBUG_STACK 3
20#define DEBUG_STACK 4 20#define MCE_STACK 4
21#define MCE_STACK 5 21#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
22#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
23 22
24#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) 23#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
25#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) 24#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 7024c12f7bfe..400873450e33 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -105,6 +105,7 @@ static __always_inline bool should_resched(void)
105# ifdef CONFIG_CONTEXT_TRACKING 105# ifdef CONFIG_CONTEXT_TRACKING
106 extern asmlinkage void ___preempt_schedule_context(void); 106 extern asmlinkage void ___preempt_schedule_context(void);
107# define __preempt_schedule_context() asm ("call ___preempt_schedule_context") 107# define __preempt_schedule_context() asm ("call ___preempt_schedule_context")
108 extern asmlinkage void preempt_schedule_context(void);
108# endif 109# endif
109#endif 110#endif
110 111
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd27e08e23c..8cd1cc3bc835 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -150,6 +150,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
150} 150}
151 151
152void cpu_disable_common(void); 152void cpu_disable_common(void);
153void cpu_die_common(unsigned int cpu);
153void native_smp_prepare_boot_cpu(void); 154void native_smp_prepare_boot_cpu(void);
154void native_smp_prepare_cpus(unsigned int max_cpus); 155void native_smp_prepare_cpus(unsigned int max_cpus);
155void native_smp_cpus_done(unsigned int max_cpus); 156void native_smp_cpus_done(unsigned int max_cpus);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 854053889d4d..547e344a6dc6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -141,7 +141,7 @@ struct thread_info {
141/* Only used for 64 bit */ 141/* Only used for 64 bit */
142#define _TIF_DO_NOTIFY_MASK \ 142#define _TIF_DO_NOTIFY_MASK \
143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ 143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
144 _TIF_USER_RETURN_NOTIFY) 144 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
145 145
146/* flags to check in __switch_to() */ 146/* flags to check in __switch_to() */
147#define _TIF_WORK_CTXSW \ 147#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bc8352e7010a..707adc6549d8 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void);
39 39
40#ifdef CONFIG_TRACING 40#ifdef CONFIG_TRACING
41asmlinkage void trace_page_fault(void); 41asmlinkage void trace_page_fault(void);
42#define trace_stack_segment stack_segment
42#define trace_divide_error divide_error 43#define trace_divide_error divide_error
43#define trace_bounds bounds 44#define trace_bounds bounds
44#define trace_invalid_op invalid_op 45#define trace_invalid_op invalid_op
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b436fc735aa4..a142e77693e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -397,7 +397,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger,
397 397
398 /* Don't set up the ACPI SCI because it's already set up */ 398 /* Don't set up the ACPI SCI because it's already set up */
399 if (acpi_gbl_FADT.sci_interrupt == gsi) 399 if (acpi_gbl_FADT.sci_interrupt == gsi)
400 return gsi; 400 return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
401 401
402 trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; 402 trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
403 polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; 403 polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
@@ -604,14 +604,18 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger)
604 604
605int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) 605int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
606{ 606{
607 int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); 607 int irq;
608 608
609 if (irq >= 0) { 609 if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
610 *irqp = gsi;
611 } else {
612 irq = mp_map_gsi_to_irq(gsi,
613 IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
614 if (irq < 0)
615 return -1;
610 *irqp = irq; 616 *irqp = irq;
611 return 0;
612 } 617 }
613 618 return 0;
614 return -1;
615} 619}
616EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); 620EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
617 621
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 5972b108f15a..b708738d016e 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev)
185 185
186 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); 186 irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
187 irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); 187 irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
188 /* APB timer irqs are set up as mp_irqs, timer is edge type */
189 __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge");
190} 188}
191 189
192/* Should be called with per cpu */ 190/* Should be called with per cpu */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 00853b254ab0..ba6cc041edb1 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1297,7 +1297,7 @@ void setup_local_APIC(void)
1297 unsigned int value, queued; 1297 unsigned int value, queued;
1298 int i, j, acked = 0; 1298 int i, j, acked = 0;
1299 unsigned long long tsc = 0, ntsc; 1299 unsigned long long tsc = 0, ntsc;
1300 long long max_loops = cpu_khz; 1300 long long max_loops = cpu_khz ? cpu_khz : 1000000;
1301 1301
1302 if (cpu_has_tsc) 1302 if (cpu_has_tsc)
1303 rdtscll(tsc); 1303 rdtscll(tsc);
@@ -1383,7 +1383,7 @@ void setup_local_APIC(void)
1383 break; 1383 break;
1384 } 1384 }
1385 if (queued) { 1385 if (queued) {
1386 if (cpu_has_tsc) { 1386 if (cpu_has_tsc && cpu_khz) {
1387 rdtscll(ntsc); 1387 rdtscll(ntsc);
1388 max_loops = (cpu_khz << 10) - (ntsc - tsc); 1388 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1389 } else 1389 } else
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 01d5453b5502..e27b49d7c922 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -39,9 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
39endif 39endif
40obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o 40obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
41obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o 41obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
42obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o
43obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o
44obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o 42obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
43
44obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
45 perf_event_intel_uncore_snb.o \
46 perf_event_intel_uncore_snbep.o \
47 perf_event_intel_uncore_nhmex.o
45endif 48endif
46 49
47 50
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b4f78c9ba19..cfa9b5b2c27a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
146 146
147static int __init x86_xsave_setup(char *s) 147static int __init x86_xsave_setup(char *s)
148{ 148{
149 if (strlen(s))
150 return 0;
149 setup_clear_cpu_cap(X86_FEATURE_XSAVE); 151 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
150 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 152 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
151 setup_clear_cpu_cap(X86_FEATURE_XSAVES); 153 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 1ef456273172..9cc6b6f25f42 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -213,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
213{ 213{
214#ifdef CONFIG_X86_F00F_BUG 214#ifdef CONFIG_X86_F00F_BUG
215 /* 215 /*
216 * All current models of Pentium and Pentium with MMX technology CPUs 216 * All models of Pentium and Pentium with MMX technology CPUs
217 * have the F0 0F bug, which lets nonprivileged users lock up the 217 * have the F0 0F bug, which lets nonprivileged users lock up the
218 * system. Announce that the fault handler will be checking for it. 218 * system. Announce that the fault handler will be checking for it.
219 * The Quark is also family 5, but does not have the same bug.
219 */ 220 */
220 clear_cpu_bug(c, X86_BUG_F00F); 221 clear_cpu_bug(c, X86_BUG_F00F);
221 if (!paravirt_enabled() && c->x86 == 5) { 222 if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) {
222 static int f00f_workaround_enabled; 223 static int f00f_workaround_enabled;
223 224
224 set_cpu_bug(c, X86_BUG_F00F); 225 set_cpu_bug(c, X86_BUG_F00F);
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 7aa1acc79789..06674473b0e6 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size)
108 * load_microcode_amd() to save equivalent cpu table and microcode patches in 108 * load_microcode_amd() to save equivalent cpu table and microcode patches in
109 * kernel heap memory. 109 * kernel heap memory.
110 */ 110 */
111static void apply_ucode_in_initrd(void *ucode, size_t size) 111static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
112{ 112{
113 struct equiv_cpu_entry *eq; 113 struct equiv_cpu_entry *eq;
114 size_t *cont_sz; 114 size_t *cont_sz;
115 u32 *header; 115 u32 *header;
116 u8 *data, **cont; 116 u8 *data, **cont;
117 u8 (*patch)[PATCH_MAX_SIZE];
117 u16 eq_id = 0; 118 u16 eq_id = 0;
118 int offset, left; 119 int offset, left;
119 u32 rev, eax, ebx, ecx, edx; 120 u32 rev, eax, ebx, ecx, edx;
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
123 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); 124 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
124 cont_sz = (size_t *)__pa_nodebug(&container_size); 125 cont_sz = (size_t *)__pa_nodebug(&container_size);
125 cont = (u8 **)__pa_nodebug(&container); 126 cont = (u8 **)__pa_nodebug(&container);
127 patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
126#else 128#else
127 new_rev = &ucode_new_rev; 129 new_rev = &ucode_new_rev;
128 cont_sz = &container_size; 130 cont_sz = &container_size;
129 cont = &container; 131 cont = &container;
132 patch = &amd_ucode_patch;
130#endif 133#endif
131 134
132 data = ucode; 135 data = ucode;
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
213 rev = mc->hdr.patch_id; 216 rev = mc->hdr.patch_id;
214 *new_rev = rev; 217 *new_rev = rev;
215 218
216 /* save ucode patch */ 219 if (save_patch)
217 memcpy(amd_ucode_patch, mc, 220 memcpy(patch, mc,
218 min_t(u32, header[1], PATCH_MAX_SIZE)); 221 min_t(u32, header[1], PATCH_MAX_SIZE));
219 } 222 }
220 } 223 }
221 224
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void)
246 *data = cp.data; 249 *data = cp.data;
247 *size = cp.size; 250 *size = cp.size;
248 251
249 apply_ucode_in_initrd(cp.data, cp.size); 252 apply_ucode_in_initrd(cp.data, cp.size, true);
250} 253}
251 254
252#ifdef CONFIG_X86_32 255#ifdef CONFIG_X86_32
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void)
263 size_t *usize; 266 size_t *usize;
264 void **ucode; 267 void **ucode;
265 268
266 mc = (struct microcode_amd *)__pa(amd_ucode_patch); 269 mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
267 if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { 270 if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
268 __apply_microcode_amd(mc); 271 __apply_microcode_amd(mc);
269 return; 272 return;
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void)
275 if (!*ucode || !*usize) 278 if (!*ucode || !*usize)
276 return; 279 return;
277 280
278 apply_ucode_in_initrd(*ucode, *usize); 281 apply_ucode_in_initrd(*ucode, *usize, false);
279} 282}
280 283
281static void __init collect_cpu_sig_on_bsp(void *arg) 284static void __init collect_cpu_sig_on_bsp(void *arg)
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void)
339 * AP has a different equivalence ID than BSP, looks like 342 * AP has a different equivalence ID than BSP, looks like
340 * mixed-steppings silicon so go through the ucode blob anew. 343 * mixed-steppings silicon so go through the ucode blob anew.
341 */ 344 */
342 apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); 345 apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
343 } 346 }
344} 347}
345#endif 348#endif
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void)
347int __init save_microcode_in_initrd_amd(void) 350int __init save_microcode_in_initrd_amd(void)
348{ 351{
349 unsigned long cont; 352 unsigned long cont;
353 int retval = 0;
350 enum ucode_state ret; 354 enum ucode_state ret;
355 u8 *cont_va;
351 u32 eax; 356 u32 eax;
352 357
353 if (!container) 358 if (!container)
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void)
355 360
356#ifdef CONFIG_X86_32 361#ifdef CONFIG_X86_32
357 get_bsp_sig(); 362 get_bsp_sig();
358 cont = (unsigned long)container; 363 cont = (unsigned long)container;
364 cont_va = __va(container);
359#else 365#else
360 /* 366 /*
361 * We need the physical address of the container for both bitness since 367 * We need the physical address of the container for both bitness since
362 * boot_params.hdr.ramdisk_image is a physical address. 368 * boot_params.hdr.ramdisk_image is a physical address.
363 */ 369 */
364 cont = __pa(container); 370 cont = __pa(container);
371 cont_va = container;
365#endif 372#endif
366 373
367 /* 374 /*
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void)
372 if (relocated_ramdisk) 379 if (relocated_ramdisk)
373 container = (u8 *)(__va(relocated_ramdisk) + 380 container = (u8 *)(__va(relocated_ramdisk) +
374 (cont - boot_params.hdr.ramdisk_image)); 381 (cont - boot_params.hdr.ramdisk_image));
382 else
383 container = cont_va;
375 384
376 if (ucode_new_rev) 385 if (ucode_new_rev)
377 pr_info("microcode: updated early to new patch_level=0x%08x\n", 386 pr_info("microcode: updated early to new patch_level=0x%08x\n",
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void)
382 391
383 ret = load_microcode_amd(eax, container, container_size); 392 ret = load_microcode_amd(eax, container, container_size);
384 if (ret != UCODE_OK) 393 if (ret != UCODE_OK)
385 return -EINVAL; 394 retval = -EINVAL;
386 395
387 /* 396 /*
388 * This will be freed any msec now, stash patches for the current 397 * This will be freed any msec now, stash patches for the current
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void)
391 container = NULL; 400 container = NULL;
392 container_size = 0; 401 container_size = 0;
393 402
394 return 0; 403 return retval;
395} 404}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index dd9d6190b08d..2ce9051174e6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -465,6 +465,14 @@ static void mc_bp_resume(void)
465 465
466 if (uci->valid && uci->mc) 466 if (uci->valid && uci->mc)
467 microcode_ops->apply_microcode(cpu); 467 microcode_ops->apply_microcode(cpu);
468 else if (!uci->mc)
469 /*
470 * We might resume and not have applied late microcode but still
471 * have a newer patch stashed from the early loader. We don't
472 * have it in uci->mc so we have to load it the same way we're
473 * applying patches early on the APs.
474 */
475 load_ucode_ap();
468} 476}
469 477
470static struct syscore_ops mc_syscore_ops = { 478static struct syscore_ops mc_syscore_ops = {
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c
index 5f28a64e71ea..2c017f242a78 100644
--- a/arch/x86/kernel/cpu/microcode/core_early.c
+++ b/arch/x86/kernel/cpu/microcode/core_early.c
@@ -124,7 +124,7 @@ void __init load_ucode_bsp(void)
124static bool check_loader_disabled_ap(void) 124static bool check_loader_disabled_ap(void)
125{ 125{
126#ifdef CONFIG_X86_32 126#ifdef CONFIG_X86_32
127 return __pa_nodebug(dis_ucode_ldr); 127 return *((bool *)__pa_nodebug(&dis_ucode_ldr));
128#else 128#else
129 return dis_ucode_ldr; 129 return dis_ucode_ldr;
130#endif 130#endif
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1b8299dd3d91..143e5f5dc855 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -243,8 +243,9 @@ static bool check_hw_exists(void)
243 243
244msr_fail: 244msr_fail:
245 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); 245 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
246 printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR 246 printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n",
247 "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); 247 boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR,
248 reg, val_new);
248 249
249 return false; 250 return false;
250} 251}
@@ -444,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event)
444 if (event->attr.type == PERF_TYPE_RAW) 445 if (event->attr.type == PERF_TYPE_RAW)
445 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; 446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
446 447
447 if (event->attr.sample_period && x86_pmu.limit_period) {
448 if (x86_pmu.limit_period(event, event->attr.sample_period) >
449 event->attr.sample_period)
450 return -EINVAL;
451 }
452
453 return x86_setup_perfctr(event); 448 return x86_setup_perfctr(event);
454} 449}
455 450
@@ -987,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event)
987 if (left > x86_pmu.max_period) 982 if (left > x86_pmu.max_period)
988 left = x86_pmu.max_period; 983 left = x86_pmu.max_period;
989 984
990 if (x86_pmu.limit_period)
991 left = x86_pmu.limit_period(event, left);
992
993 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; 985 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
994 986
995 /* 987 /*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index d98a34d435d7..fc5eb390b368 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -445,7 +445,6 @@ struct x86_pmu {
445 struct x86_pmu_quirk *quirks; 445 struct x86_pmu_quirk *quirks;
446 int perfctr_second_write; 446 int perfctr_second_write;
447 bool late_ack; 447 bool late_ack;
448 unsigned (*limit_period)(struct perf_event *event, unsigned l);
449 448
450 /* 449 /*
451 * sysfs attrs 450 * sysfs attrs
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a73947c53b65..944bf019b74f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = {
220 EVENT_CONSTRAINT_END 220 EVENT_CONSTRAINT_END
221}; 221};
222 222
223static struct event_constraint intel_bdw_event_constraints[] = {
224 FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
225 FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
226 FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
227 INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
228 INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
229 EVENT_CONSTRAINT_END
230};
231
232static u64 intel_pmu_event_map(int hw_event) 223static u64 intel_pmu_event_map(int hw_event)
233{ 224{
234 return intel_perfmon_event_map[hw_event]; 225 return intel_perfmon_event_map[hw_event];
@@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids
424 415
425}; 416};
426 417
427static __initconst const u64 hsw_hw_cache_event_ids
428 [PERF_COUNT_HW_CACHE_MAX]
429 [PERF_COUNT_HW_CACHE_OP_MAX]
430 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
431{
432 [ C(L1D ) ] = {
433 [ C(OP_READ) ] = {
434 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
435 [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
436 },
437 [ C(OP_WRITE) ] = {
438 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
439 [ C(RESULT_MISS) ] = 0x0,
440 },
441 [ C(OP_PREFETCH) ] = {
442 [ C(RESULT_ACCESS) ] = 0x0,
443 [ C(RESULT_MISS) ] = 0x0,
444 },
445 },
446 [ C(L1I ) ] = {
447 [ C(OP_READ) ] = {
448 [ C(RESULT_ACCESS) ] = 0x0,
449 [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
450 },
451 [ C(OP_WRITE) ] = {
452 [ C(RESULT_ACCESS) ] = -1,
453 [ C(RESULT_MISS) ] = -1,
454 },
455 [ C(OP_PREFETCH) ] = {
456 [ C(RESULT_ACCESS) ] = 0x0,
457 [ C(RESULT_MISS) ] = 0x0,
458 },
459 },
460 [ C(LL ) ] = {
461 [ C(OP_READ) ] = {
462 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
463 [ C(RESULT_ACCESS) ] = 0x1b7,
464 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
465 L3_MISS|ANY_SNOOP */
466 [ C(RESULT_MISS) ] = 0x1b7,
467 },
468 [ C(OP_WRITE) ] = {
469 [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
470 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
471 [ C(RESULT_MISS) ] = 0x1b7,
472 },
473 [ C(OP_PREFETCH) ] = {
474 [ C(RESULT_ACCESS) ] = 0x0,
475 [ C(RESULT_MISS) ] = 0x0,
476 },
477 },
478 [ C(DTLB) ] = {
479 [ C(OP_READ) ] = {
480 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
481 [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
482 },
483 [ C(OP_WRITE) ] = {
484 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
485 [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
486 },
487 [ C(OP_PREFETCH) ] = {
488 [ C(RESULT_ACCESS) ] = 0x0,
489 [ C(RESULT_MISS) ] = 0x0,
490 },
491 },
492 [ C(ITLB) ] = {
493 [ C(OP_READ) ] = {
494 [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
495 [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
496 },
497 [ C(OP_WRITE) ] = {
498 [ C(RESULT_ACCESS) ] = -1,
499 [ C(RESULT_MISS) ] = -1,
500 },
501 [ C(OP_PREFETCH) ] = {
502 [ C(RESULT_ACCESS) ] = -1,
503 [ C(RESULT_MISS) ] = -1,
504 },
505 },
506 [ C(BPU ) ] = {
507 [ C(OP_READ) ] = {
508 [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
509 [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
510 },
511 [ C(OP_WRITE) ] = {
512 [ C(RESULT_ACCESS) ] = -1,
513 [ C(RESULT_MISS) ] = -1,
514 },
515 [ C(OP_PREFETCH) ] = {
516 [ C(RESULT_ACCESS) ] = -1,
517 [ C(RESULT_MISS) ] = -1,
518 },
519 },
520};
521
522static __initconst const u64 hsw_hw_cache_extra_regs
523 [PERF_COUNT_HW_CACHE_MAX]
524 [PERF_COUNT_HW_CACHE_OP_MAX]
525 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
526{
527 [ C(LL ) ] = {
528 [ C(OP_READ) ] = {
529 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
530 [ C(RESULT_ACCESS) ] = 0x2d5,
531 /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
532 L3_MISS|ANY_SNOOP */
533 [ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
534 },
535 [ C(OP_WRITE) ] = {
536 [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
537 /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
538 [ C(RESULT_MISS) ] = 0x3fbc020122ull,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0x0,
542 [ C(RESULT_MISS) ] = 0x0,
543 },
544 },
545};
546
547static __initconst const u64 westmere_hw_cache_event_ids 418static __initconst const u64 westmere_hw_cache_event_ids
548 [PERF_COUNT_HW_CACHE_MAX] 419 [PERF_COUNT_HW_CACHE_MAX]
549 [PERF_COUNT_HW_CACHE_OP_MAX] 420 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
2034 return c; 1905 return c;
2035} 1906}
2036 1907
2037/*
2038 * Broadwell:
2039 * The INST_RETIRED.ALL period always needs to have lowest
2040 * 6bits cleared (BDM57). It shall not use a period smaller
2041 * than 100 (BDM11). We combine the two to enforce
2042 * a min-period of 128.
2043 */
2044static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
2045{
2046 if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
2047 X86_CONFIG(.event=0xc0, .umask=0x01)) {
2048 if (left < 128)
2049 left = 128;
2050 left &= ~0x3fu;
2051 }
2052 return left;
2053}
2054
2055PMU_FORMAT_ATTR(event, "config:0-7" ); 1908PMU_FORMAT_ATTR(event, "config:0-7" );
2056PMU_FORMAT_ATTR(umask, "config:8-15" ); 1909PMU_FORMAT_ATTR(umask, "config:8-15" );
2057PMU_FORMAT_ATTR(edge, "config:18" ); 1910PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void)
2692 case 69: /* 22nm Haswell ULT */ 2545 case 69: /* 22nm Haswell ULT */
2693 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ 2546 case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
2694 x86_pmu.late_ack = true; 2547 x86_pmu.late_ack = true;
2695 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 2548 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2696 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); 2549 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2697 2550
2698 intel_pmu_lbr_init_snb(); 2551 intel_pmu_lbr_init_snb();
2699 2552
@@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void)
2712 pr_cont("Haswell events, "); 2565 pr_cont("Haswell events, ");
2713 break; 2566 break;
2714 2567
2715 case 61: /* 14nm Broadwell Core-M */
2716 x86_pmu.late_ack = true;
2717 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2718 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2719
2720 intel_pmu_lbr_init_snb();
2721
2722 x86_pmu.event_constraints = intel_bdw_event_constraints;
2723 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2724 x86_pmu.extra_regs = intel_snbep_extra_regs;
2725 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2726 /* all extra regs are per-cpu when HT is on */
2727 x86_pmu.er_flags |= ERF_HAS_RSP_1;
2728 x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2729
2730 x86_pmu.hw_config = hsw_hw_config;
2731 x86_pmu.get_event_constraints = hsw_get_event_constraints;
2732 x86_pmu.cpu_events = hsw_events_attrs;
2733 x86_pmu.limit_period = bdw_limit_period;
2734 pr_cont("Broadwell events, ");
2735 break;
2736
2737 default: 2568 default:
2738 switch (x86_pmu.version) { 2569 switch (x86_pmu.version) {
2739 case 1: 2570 case 1:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index adf138eac85c..f9ed429d6e4f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -486,14 +486,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = {
486 .attrs = snbep_uncore_qpi_formats_attr, 486 .attrs = snbep_uncore_qpi_formats_attr,
487}; 487};
488 488
489#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ 489#define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
490 .init_box = snbep_uncore_msr_init_box, \
491 .disable_box = snbep_uncore_msr_disable_box, \ 490 .disable_box = snbep_uncore_msr_disable_box, \
492 .enable_box = snbep_uncore_msr_enable_box, \ 491 .enable_box = snbep_uncore_msr_enable_box, \
493 .disable_event = snbep_uncore_msr_disable_event, \ 492 .disable_event = snbep_uncore_msr_disable_event, \
494 .enable_event = snbep_uncore_msr_enable_event, \ 493 .enable_event = snbep_uncore_msr_enable_event, \
495 .read_counter = uncore_msr_read_counter 494 .read_counter = uncore_msr_read_counter
496 495
496#define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \
497 __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \
498 .init_box = snbep_uncore_msr_init_box \
499
497static struct intel_uncore_ops snbep_uncore_msr_ops = { 500static struct intel_uncore_ops snbep_uncore_msr_ops = {
498 SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), 501 SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
499}; 502};
@@ -1919,6 +1922,30 @@ static struct intel_uncore_type hswep_uncore_cbox = {
1919 .format_group = &hswep_uncore_cbox_format_group, 1922 .format_group = &hswep_uncore_cbox_format_group,
1920}; 1923};
1921 1924
1925/*
1926 * Write SBOX Initialization register bit by bit to avoid spurious #GPs
1927 */
1928static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box)
1929{
1930 unsigned msr = uncore_msr_box_ctl(box);
1931
1932 if (msr) {
1933 u64 init = SNBEP_PMON_BOX_CTL_INT;
1934 u64 flags = 0;
1935 int i;
1936
1937 for_each_set_bit(i, (unsigned long *)&init, 64) {
1938 flags |= (1ULL << i);
1939 wrmsrl(msr, flags);
1940 }
1941 }
1942}
1943
1944static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = {
1945 __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
1946 .init_box = hswep_uncore_sbox_msr_init_box
1947};
1948
1922static struct attribute *hswep_uncore_sbox_formats_attr[] = { 1949static struct attribute *hswep_uncore_sbox_formats_attr[] = {
1923 &format_attr_event.attr, 1950 &format_attr_event.attr,
1924 &format_attr_umask.attr, 1951 &format_attr_umask.attr,
@@ -1944,7 +1971,7 @@ static struct intel_uncore_type hswep_uncore_sbox = {
1944 .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, 1971 .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
1945 .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, 1972 .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL,
1946 .msr_offset = HSWEP_SBOX_MSR_OFFSET, 1973 .msr_offset = HSWEP_SBOX_MSR_OFFSET,
1947 .ops = &snbep_uncore_msr_ops, 1974 .ops = &hswep_uncore_sbox_msr_ops,
1948 .format_group = &hswep_uncore_sbox_format_group, 1975 .format_group = &hswep_uncore_sbox_format_group,
1949}; 1976};
1950 1977
@@ -2025,13 +2052,27 @@ static struct intel_uncore_type hswep_uncore_imc = {
2025 SNBEP_UNCORE_PCI_COMMON_INIT(), 2052 SNBEP_UNCORE_PCI_COMMON_INIT(),
2026}; 2053};
2027 2054
2055static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8};
2056
2057static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event)
2058{
2059 struct pci_dev *pdev = box->pci_dev;
2060 struct hw_perf_event *hwc = &event->hw;
2061 u64 count = 0;
2062
2063 pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count);
2064 pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1);
2065
2066 return count;
2067}
2068
2028static struct intel_uncore_ops hswep_uncore_irp_ops = { 2069static struct intel_uncore_ops hswep_uncore_irp_ops = {
2029 .init_box = snbep_uncore_pci_init_box, 2070 .init_box = snbep_uncore_pci_init_box,
2030 .disable_box = snbep_uncore_pci_disable_box, 2071 .disable_box = snbep_uncore_pci_disable_box,
2031 .enable_box = snbep_uncore_pci_enable_box, 2072 .enable_box = snbep_uncore_pci_enable_box,
2032 .disable_event = ivbep_uncore_irp_disable_event, 2073 .disable_event = ivbep_uncore_irp_disable_event,
2033 .enable_event = ivbep_uncore_irp_enable_event, 2074 .enable_event = ivbep_uncore_irp_enable_event,
2034 .read_counter = ivbep_uncore_irp_read_counter, 2075 .read_counter = hswep_uncore_irp_read_counter,
2035}; 2076};
2036 2077
2037static struct intel_uncore_type hswep_uncore_irp = { 2078static struct intel_uncore_type hswep_uncore_irp = {
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 1abcb50b48ae..ff86f19b5758 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB", 24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI", 25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF", 26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ STACKFAULT_STACK-1 ] = "#SS",
28 [ MCE_STACK-1 ] = "#MC", 27 [ MCE_STACK-1 ] = "#MC",
29#if DEBUG_STKSZ > EXCEPTION_STKSZ 28#if DEBUG_STKSZ > EXCEPTION_STKSZ
30 [ N_EXCEPTION_STACKS ... 29 [ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b553ed89e5f5..344b63f18d14 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -447,15 +447,14 @@ sysenter_exit:
447sysenter_audit: 447sysenter_audit:
448 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) 448 testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
449 jnz syscall_trace_entry 449 jnz syscall_trace_entry
450 addl $4,%esp 450 /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
451 CFI_ADJUST_CFA_OFFSET -4 451 movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
452 movl %esi,4(%esp) /* 5th arg: 4th syscall arg */ 452 /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
453 movl %edx,(%esp) /* 4th arg: 3rd syscall arg */ 453 pushl_cfi PT_ESI(%esp) /* a3: 5th arg */
454 /* %ecx already in %ecx 3rd arg: 2nd syscall arg */ 454 pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */
455 movl %ebx,%edx /* 2nd arg: 1st syscall arg */
456 /* %eax already in %eax 1st arg: syscall number */
457 call __audit_syscall_entry 455 call __audit_syscall_entry
458 pushl_cfi %ebx 456 popl_cfi %ecx /* get that remapped edx off the stack */
457 popl_cfi %ecx /* get that remapped esi off the stack */
459 movl PT_EAX(%esp),%eax /* reload syscall number */ 458 movl PT_EAX(%esp),%eax /* reload syscall number */
460 jmp sysenter_do_call 459 jmp sysenter_do_call
461 460
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb03fb3..c0226ab54106 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,9 +828,15 @@ ENTRY(native_iret)
828 jnz native_irq_return_ldt 828 jnz native_irq_return_ldt
829#endif 829#endif
830 830
831.global native_irq_return_iret
831native_irq_return_iret: 832native_irq_return_iret:
833 /*
834 * This may fault. Non-paranoid faults on return to userspace are
835 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
836 * Double-faults due to espfix64 are handled in do_double_fault.
837 * Other faults here are fatal.
838 */
832 iretq 839 iretq
833 _ASM_EXTABLE(native_irq_return_iret, bad_iret)
834 840
835#ifdef CONFIG_X86_ESPFIX64 841#ifdef CONFIG_X86_ESPFIX64
836native_irq_return_ldt: 842native_irq_return_ldt:
@@ -858,25 +864,6 @@ native_irq_return_ldt:
858 jmp native_irq_return_iret 864 jmp native_irq_return_iret
859#endif 865#endif
860 866
861 .section .fixup,"ax"
862bad_iret:
863 /*
864 * The iret traps when the %cs or %ss being restored is bogus.
865 * We've lost the original trap vector and error code.
866 * #GPF is the most likely one to get for an invalid selector.
867 * So pretend we completed the iret and took the #GPF in user mode.
868 *
869 * We are now running with the kernel GS after exception recovery.
870 * But error_entry expects us to have user GS to match the user %cs,
871 * so swap back.
872 */
873 pushq $0
874
875 SWAPGS
876 jmp general_protection
877
878 .previous
879
880 /* edi: workmask, edx: work */ 867 /* edi: workmask, edx: work */
881retint_careful: 868retint_careful:
882 CFI_RESTORE_STATE 869 CFI_RESTORE_STATE
@@ -922,37 +909,6 @@ ENTRY(retint_kernel)
922 CFI_ENDPROC 909 CFI_ENDPROC
923END(common_interrupt) 910END(common_interrupt)
924 911
925 /*
926 * If IRET takes a fault on the espfix stack, then we
927 * end up promoting it to a doublefault. In that case,
928 * modify the stack to make it look like we just entered
929 * the #GP handler from user space, similar to bad_iret.
930 */
931#ifdef CONFIG_X86_ESPFIX64
932 ALIGN
933__do_double_fault:
934 XCPT_FRAME 1 RDI+8
935 movq RSP(%rdi),%rax /* Trap on the espfix stack? */
936 sarq $PGDIR_SHIFT,%rax
937 cmpl $ESPFIX_PGD_ENTRY,%eax
938 jne do_double_fault /* No, just deliver the fault */
939 cmpl $__KERNEL_CS,CS(%rdi)
940 jne do_double_fault
941 movq RIP(%rdi),%rax
942 cmpq $native_irq_return_iret,%rax
943 jne do_double_fault /* This shouldn't happen... */
944 movq PER_CPU_VAR(kernel_stack),%rax
945 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
946 movq %rax,RSP(%rdi)
947 movq $0,(%rax) /* Missing (lost) #GP error code */
948 movq $general_protection,RIP(%rdi)
949 retq
950 CFI_ENDPROC
951END(__do_double_fault)
952#else
953# define __do_double_fault do_double_fault
954#endif
955
956/* 912/*
957 * APIC interrupts. 913 * APIC interrupts.
958 */ 914 */
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0
1124idtentry bounds do_bounds has_error_code=0 1080idtentry bounds do_bounds has_error_code=0
1125idtentry invalid_op do_invalid_op has_error_code=0 1081idtentry invalid_op do_invalid_op has_error_code=0
1126idtentry device_not_available do_device_not_available has_error_code=0 1082idtentry device_not_available do_device_not_available has_error_code=0
1127idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 1083idtentry double_fault do_double_fault has_error_code=1 paranoid=1
1128idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1084idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1129idtentry invalid_TSS do_invalid_TSS has_error_code=1 1085idtentry invalid_TSS do_invalid_TSS has_error_code=1
1130idtentry segment_not_present do_segment_not_present has_error_code=1 1086idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1289 1245
1290idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1246idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1291idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1247idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1292idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 1248idtentry stack_segment do_stack_segment has_error_code=1
1293#ifdef CONFIG_XEN 1249#ifdef CONFIG_XEN
1294idtentry xen_debug do_debug has_error_code=0 1250idtentry xen_debug do_debug has_error_code=0
1295idtentry xen_int3 do_int3 has_error_code=0 1251idtentry xen_int3 do_int3 has_error_code=0
@@ -1399,17 +1355,16 @@ error_sti:
1399 1355
1400/* 1356/*
1401 * There are two places in the kernel that can potentially fault with 1357 * There are two places in the kernel that can potentially fault with
1402 * usergs. Handle them here. The exception handlers after iret run with 1358 * usergs. Handle them here. B stepping K8s sometimes report a
1403 * kernel gs again, so don't set the user space flag. B stepping K8s 1359 * truncated RIP for IRET exceptions returning to compat mode. Check
1404 * sometimes report an truncated RIP for IRET exceptions returning to 1360 * for these here too.
1405 * compat mode. Check for these here too.
1406 */ 1361 */
1407error_kernelspace: 1362error_kernelspace:
1408 CFI_REL_OFFSET rcx, RCX+8 1363 CFI_REL_OFFSET rcx, RCX+8
1409 incl %ebx 1364 incl %ebx
1410 leaq native_irq_return_iret(%rip),%rcx 1365 leaq native_irq_return_iret(%rip),%rcx
1411 cmpq %rcx,RIP+8(%rsp) 1366 cmpq %rcx,RIP+8(%rsp)
1412 je error_swapgs 1367 je error_bad_iret
1413 movl %ecx,%eax /* zero extend */ 1368 movl %ecx,%eax /* zero extend */
1414 cmpq %rax,RIP+8(%rsp) 1369 cmpq %rax,RIP+8(%rsp)
1415 je bstep_iret 1370 je bstep_iret
@@ -1420,7 +1375,15 @@ error_kernelspace:
1420bstep_iret: 1375bstep_iret:
1421 /* Fix truncated RIP */ 1376 /* Fix truncated RIP */
1422 movq %rcx,RIP+8(%rsp) 1377 movq %rcx,RIP+8(%rsp)
1423 jmp error_swapgs 1378 /* fall through */
1379
1380error_bad_iret:
1381 SWAPGS
1382 mov %rsp,%rdi
1383 call fixup_bad_iret
1384 mov %rax,%rsp
1385 decl %ebx /* Return to usergs */
1386 jmp error_sti
1424 CFI_ENDPROC 1387 CFI_ENDPROC
1425END(error_entry) 1388END(error_entry)
1426 1389
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 8af817105e29..e7cc5370cd2f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq)
111{ 111{
112 disable_irq_nosync(irq); 112 disable_irq_nosync(irq);
113 io_apic_irqs &= ~(1<<irq); 113 io_apic_irqs &= ~(1<<irq);
114 irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, 114 irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
115 i8259A_chip.name);
116 enable_irq(irq); 115 enable_irq(irq);
117} 116}
118 117
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 44f1ed42fdf2..4de73ee78361 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -70,7 +70,6 @@ int vector_used_by_percpu_irq(unsigned int vector)
70void __init init_ISA_irqs(void) 70void __init init_ISA_irqs(void)
71{ 71{
72 struct irq_chip *chip = legacy_pic->chip; 72 struct irq_chip *chip = legacy_pic->chip;
73 const char *name = chip->name;
74 int i; 73 int i;
75 74
76#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) 75#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
@@ -79,7 +78,7 @@ void __init init_ISA_irqs(void)
79 legacy_pic->init(0); 78 legacy_pic->init(0);
80 79
81 for (i = 0; i < nr_legacy_irqs(); i++) 80 for (i = 0; i < nr_legacy_irqs(); i++)
82 irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); 81 irq_set_chip_and_handler(i, chip, handle_level_irq);
83} 82}
84 83
85void __init init_IRQ(void) 84void __init init_IRQ(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 749b0e423419..e510618b2e91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
1484 */ 1484 */
1485 if (work & _TIF_NOHZ) { 1485 if (work & _TIF_NOHZ) {
1486 user_exit(); 1486 user_exit();
1487 work &= ~TIF_NOHZ; 1487 work &= ~_TIF_NOHZ;
1488 } 1488 }
1489 1489
1490#ifdef CONFIG_SECCOMP 1490#ifdef CONFIG_SECCOMP
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 235cfd39e0d7..ab08aa2276fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1128,7 +1128,6 @@ void __init setup_arch(char **cmdline_p)
1128 setup_real_mode(); 1128 setup_real_mode();
1129 1129
1130 memblock_set_current_limit(get_max_mapped()); 1130 memblock_set_current_limit(get_max_mapped());
1131 dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
1132 1131
1133 /* 1132 /*
1134 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 1133 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1159,6 +1158,7 @@ void __init setup_arch(char **cmdline_p)
1159 early_acpi_boot_init(); 1158 early_acpi_boot_init();
1160 1159
1161 initmem_init(); 1160 initmem_init();
1161 dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
1162 1162
1163 /* 1163 /*
1164 * Reserve memory for crash kernel after SRAT is parsed so that it 1164 * Reserve memory for crash kernel after SRAT is parsed so that it
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2d5200e56357..668d8f2a8781 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,8 +102,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
103EXPORT_PER_CPU_SYMBOL(cpu_info); 103EXPORT_PER_CPU_SYMBOL(cpu_info);
104 104
105static DEFINE_PER_CPU(struct completion, die_complete);
106
107atomic_t init_deasserted; 105atomic_t init_deasserted;
108 106
109/* 107/*
@@ -1305,10 +1303,14 @@ static void __ref remove_cpu_from_maps(int cpu)
1305 numa_remove_cpu(cpu); 1303 numa_remove_cpu(cpu);
1306} 1304}
1307 1305
1306static DEFINE_PER_CPU(struct completion, die_complete);
1307
1308void cpu_disable_common(void) 1308void cpu_disable_common(void)
1309{ 1309{
1310 int cpu = smp_processor_id(); 1310 int cpu = smp_processor_id();
1311 1311
1312 init_completion(&per_cpu(die_complete, smp_processor_id()));
1313
1312 remove_siblinginfo(cpu); 1314 remove_siblinginfo(cpu);
1313 1315
1314 /* It's now safe to remove this processor from the online map */ 1316 /* It's now safe to remove this processor from the online map */
@@ -1327,16 +1329,21 @@ int native_cpu_disable(void)
1327 return ret; 1329 return ret;
1328 1330
1329 clear_local_APIC(); 1331 clear_local_APIC();
1330 init_completion(&per_cpu(die_complete, smp_processor_id()));
1331 cpu_disable_common(); 1332 cpu_disable_common();
1332 1333
1333 return 0; 1334 return 0;
1334} 1335}
1335 1336
1337void cpu_die_common(unsigned int cpu)
1338{
1339 wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
1340}
1341
1336void native_cpu_die(unsigned int cpu) 1342void native_cpu_die(unsigned int cpu)
1337{ 1343{
1338 /* We don't do anything here: idle task is faking death itself. */ 1344 /* We don't do anything here: idle task is faking death itself. */
1339 wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); 1345
1346 cpu_die_common(cpu);
1340 1347
1341 /* They ack this in play_dead() by setting CPU_DEAD */ 1348 /* They ack this in play_dead() by setting CPU_DEAD */
1342 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1349 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d0e922fafc1..de801f22128a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
236#ifdef CONFIG_X86_32
237DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 236DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
238#endif
239DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 237DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
240 238
241#ifdef CONFIG_X86_64 239#ifdef CONFIG_X86_64
242/* Runs on IST stack */ 240/* Runs on IST stack */
243dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
244{
245 enum ctx_state prev_state;
246
247 prev_state = exception_enter();
248 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
249 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
250 preempt_conditional_sti(regs);
251 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
252 preempt_conditional_cli(regs);
253 }
254 exception_exit(prev_state);
255}
256
257dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 241dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
258{ 242{
259 static const char str[] = "double fault"; 243 static const char str[] = "double fault";
260 struct task_struct *tsk = current; 244 struct task_struct *tsk = current;
261 245
246#ifdef CONFIG_X86_ESPFIX64
247 extern unsigned char native_irq_return_iret[];
248
249 /*
250 * If IRET takes a non-IST fault on the espfix64 stack, then we
251 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret.
254 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS &&
257 regs->ip == (unsigned long)native_irq_return_iret)
258 {
259 struct pt_regs *normal_regs = task_pt_regs(current);
260
261 /* Fake a #GP(0) from userspace. */
262 memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax;
266 return;
267 }
268#endif
269
262 exception_enter(); 270 exception_enter();
263 /* Return not checked because double check cannot be ignored */ 271 /* Return not checked because double check cannot be ignored */
264 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
399 return regs; 407 return regs;
400} 408}
401NOKPROBE_SYMBOL(sync_regs); 409NOKPROBE_SYMBOL(sync_regs);
410
411struct bad_iret_stack {
412 void *error_entry_ret;
413 struct pt_regs regs;
414};
415
416asmlinkage __visible
417struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
418{
419 /*
420 * This is called from entry_64.S early in handling a fault
421 * caused by a bad iret to user mode. To handle the fault
422 * correctly, we want move our stack frame to task_pt_regs
423 * and we want to pretend that the exception came from the
424 * iret target.
425 */
426 struct bad_iret_stack *new_stack =
427 container_of(task_pt_regs(current),
428 struct bad_iret_stack, regs);
429
430 /* Copy the IRET target to the new stack. */
431 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
432
433 /* Copy the remainder of the stack from the current stack. */
434 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
435
436 BUG_ON(!user_mode_vm(&new_stack->regs));
437 return new_stack;
438}
402#endif 439#endif
403 440
404/* 441/*
@@ -778,7 +815,7 @@ void __init trap_init(void)
778 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 815 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
779 set_intr_gate(X86_TRAP_TS, invalid_TSS); 816 set_intr_gate(X86_TRAP_TS, invalid_TSS);
780 set_intr_gate(X86_TRAP_NP, segment_not_present); 817 set_intr_gate(X86_TRAP_NP, segment_not_present);
781 set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); 818 set_intr_gate(X86_TRAP_SS, stack_segment);
782 set_intr_gate(X86_TRAP_GP, general_protection); 819 set_intr_gate(X86_TRAP_GP, general_protection);
783 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 820 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
784 set_intr_gate(X86_TRAP_MF, coprocessor_error); 821 set_intr_gate(X86_TRAP_MF, coprocessor_error);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index b6025f9e36c6..b7e50bba3bbb 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1166,14 +1166,17 @@ void __init tsc_init(void)
1166 1166
1167 x86_init.timers.tsc_pre_init(); 1167 x86_init.timers.tsc_pre_init();
1168 1168
1169 if (!cpu_has_tsc) 1169 if (!cpu_has_tsc) {
1170 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1170 return; 1171 return;
1172 }
1171 1173
1172 tsc_khz = x86_platform.calibrate_tsc(); 1174 tsc_khz = x86_platform.calibrate_tsc();
1173 cpu_khz = tsc_khz; 1175 cpu_khz = tsc_khz;
1174 1176
1175 if (!tsc_khz) { 1177 if (!tsc_khz) {
1176 mark_tsc_unstable("could not calculate TSC khz"); 1178 mark_tsc_unstable("could not calculate TSC khz");
1179 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
1177 return; 1180 return;
1178 } 1181 }
1179 1182
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 749f9fa38254..9f8a2faf5040 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -574,12 +574,14 @@ static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
574 case 4: 574 case 4:
575 ctxt->_eip = (u32)dst; 575 ctxt->_eip = (u32)dst;
576 break; 576 break;
577#ifdef CONFIG_X86_64
577 case 8: 578 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) || 579 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1))) 580 (!cs_l && (dst >> 32) != 0))
580 return emulate_gp(ctxt, 0); 581 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst; 582 ctxt->_eip = dst;
582 break; 583 break;
584#endif
583 default: 585 default:
584 WARN(1, "unsupported eip assignment size\n"); 586 WARN(1, "unsupported eip assignment size\n");
585 } 587 }
@@ -641,7 +643,8 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
641 643
642static int __linearize(struct x86_emulate_ctxt *ctxt, 644static int __linearize(struct x86_emulate_ctxt *ctxt,
643 struct segmented_address addr, 645 struct segmented_address addr,
644 unsigned size, bool write, bool fetch, 646 unsigned *max_size, unsigned size,
647 bool write, bool fetch,
645 ulong *linear) 648 ulong *linear)
646{ 649{
647 struct desc_struct desc; 650 struct desc_struct desc;
@@ -652,10 +655,15 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
652 unsigned cpl; 655 unsigned cpl;
653 656
654 la = seg_base(ctxt, addr.seg) + addr.ea; 657 la = seg_base(ctxt, addr.seg) + addr.ea;
658 *max_size = 0;
655 switch (ctxt->mode) { 659 switch (ctxt->mode) {
656 case X86EMUL_MODE_PROT64: 660 case X86EMUL_MODE_PROT64:
657 if (((signed long)la << 16) >> 16 != la) 661 if (((signed long)la << 16) >> 16 != la)
658 return emulate_gp(ctxt, 0); 662 return emulate_gp(ctxt, 0);
663
664 *max_size = min_t(u64, ~0u, (1ull << 48) - la);
665 if (size > *max_size)
666 goto bad;
659 break; 667 break;
660 default: 668 default:
661 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, 669 usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
@@ -673,20 +681,25 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
673 if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && 681 if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
674 (ctxt->d & NoBigReal)) { 682 (ctxt->d & NoBigReal)) {
675 /* la is between zero and 0xffff */ 683 /* la is between zero and 0xffff */
676 if (la > 0xffff || (u32)(la + size - 1) > 0xffff) 684 if (la > 0xffff)
677 goto bad; 685 goto bad;
686 *max_size = 0x10000 - la;
678 } else if ((desc.type & 8) || !(desc.type & 4)) { 687 } else if ((desc.type & 8) || !(desc.type & 4)) {
679 /* expand-up segment */ 688 /* expand-up segment */
680 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 689 if (addr.ea > lim)
681 goto bad; 690 goto bad;
691 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
682 } else { 692 } else {
683 /* expand-down segment */ 693 /* expand-down segment */
684 if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) 694 if (addr.ea <= lim)
685 goto bad; 695 goto bad;
686 lim = desc.d ? 0xffffffff : 0xffff; 696 lim = desc.d ? 0xffffffff : 0xffff;
687 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 697 if (addr.ea > lim)
688 goto bad; 698 goto bad;
699 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
689 } 700 }
701 if (size > *max_size)
702 goto bad;
690 cpl = ctxt->ops->cpl(ctxt); 703 cpl = ctxt->ops->cpl(ctxt);
691 if (!(desc.type & 8)) { 704 if (!(desc.type & 8)) {
692 /* data segment */ 705 /* data segment */
@@ -711,9 +724,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
711 return X86EMUL_CONTINUE; 724 return X86EMUL_CONTINUE;
712bad: 725bad:
713 if (addr.seg == VCPU_SREG_SS) 726 if (addr.seg == VCPU_SREG_SS)
714 return emulate_ss(ctxt, sel); 727 return emulate_ss(ctxt, 0);
715 else 728 else
716 return emulate_gp(ctxt, sel); 729 return emulate_gp(ctxt, 0);
717} 730}
718 731
719static int linearize(struct x86_emulate_ctxt *ctxt, 732static int linearize(struct x86_emulate_ctxt *ctxt,
@@ -721,7 +734,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
721 unsigned size, bool write, 734 unsigned size, bool write,
722 ulong *linear) 735 ulong *linear)
723{ 736{
724 return __linearize(ctxt, addr, size, write, false, linear); 737 unsigned max_size;
738 return __linearize(ctxt, addr, &max_size, size, write, false, linear);
725} 739}
726 740
727 741
@@ -746,17 +760,27 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
746static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) 760static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
747{ 761{
748 int rc; 762 int rc;
749 unsigned size; 763 unsigned size, max_size;
750 unsigned long linear; 764 unsigned long linear;
751 int cur_size = ctxt->fetch.end - ctxt->fetch.data; 765 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
752 struct segmented_address addr = { .seg = VCPU_SREG_CS, 766 struct segmented_address addr = { .seg = VCPU_SREG_CS,
753 .ea = ctxt->eip + cur_size }; 767 .ea = ctxt->eip + cur_size };
754 768
755 size = 15UL ^ cur_size; 769 /*
756 rc = __linearize(ctxt, addr, size, false, true, &linear); 770 * We do not know exactly how many bytes will be needed, and
771 * __linearize is expensive, so fetch as much as possible. We
772 * just have to avoid going beyond the 15 byte limit, the end
773 * of the segment, or the end of the page.
774 *
775 * __linearize is called with size 0 so that it does not do any
776 * boundary check itself. Instead, we use max_size to check
777 * against op_size.
778 */
779 rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
757 if (unlikely(rc != X86EMUL_CONTINUE)) 780 if (unlikely(rc != X86EMUL_CONTINUE))
758 return rc; 781 return rc;
759 782
783 size = min_t(unsigned, 15UL ^ cur_size, max_size);
760 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); 784 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
761 785
762 /* 786 /*
@@ -766,7 +790,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
766 * still, we must have hit the 15-byte boundary. 790 * still, we must have hit the 15-byte boundary.
767 */ 791 */
768 if (unlikely(size < op_size)) 792 if (unlikely(size < op_size))
769 return X86EMUL_UNHANDLEABLE; 793 return emulate_gp(ctxt, 0);
794
770 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, 795 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
771 size, &ctxt->exception); 796 size, &ctxt->exception);
772 if (unlikely(rc != X86EMUL_CONTINUE)) 797 if (unlikely(rc != X86EMUL_CONTINUE))
@@ -2012,7 +2037,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2012 2037
2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); 2038 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2014 if (rc != X86EMUL_CONTINUE) { 2039 if (rc != X86EMUL_CONTINUE) {
2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); 2040 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */ 2041 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); 2042 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc; 2043 return rc;
@@ -2109,7 +2134,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2109 return rc; 2134 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l); 2135 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) { 2136 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); 2137 WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); 2138 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 } 2139 }
2115 return rc; 2140 return rc;
@@ -4262,6 +4287,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4262 fetch_register_operand(op); 4287 fetch_register_operand(op);
4263 break; 4288 break;
4264 case OpCL: 4289 case OpCL:
4290 op->type = OP_IMM;
4265 op->bytes = 1; 4291 op->bytes = 1;
4266 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; 4292 op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
4267 break; 4293 break;
@@ -4269,6 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4269 rc = decode_imm(ctxt, op, 1, true); 4295 rc = decode_imm(ctxt, op, 1, true);
4270 break; 4296 break;
4271 case OpOne: 4297 case OpOne:
4298 op->type = OP_IMM;
4272 op->bytes = 1; 4299 op->bytes = 1;
4273 op->val = 1; 4300 op->val = 1;
4274 break; 4301 break;
@@ -4327,21 +4354,27 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4327 ctxt->memop.bytes = ctxt->op_bytes + 2; 4354 ctxt->memop.bytes = ctxt->op_bytes + 2;
4328 goto mem_common; 4355 goto mem_common;
4329 case OpES: 4356 case OpES:
4357 op->type = OP_IMM;
4330 op->val = VCPU_SREG_ES; 4358 op->val = VCPU_SREG_ES;
4331 break; 4359 break;
4332 case OpCS: 4360 case OpCS:
4361 op->type = OP_IMM;
4333 op->val = VCPU_SREG_CS; 4362 op->val = VCPU_SREG_CS;
4334 break; 4363 break;
4335 case OpSS: 4364 case OpSS:
4365 op->type = OP_IMM;
4336 op->val = VCPU_SREG_SS; 4366 op->val = VCPU_SREG_SS;
4337 break; 4367 break;
4338 case OpDS: 4368 case OpDS:
4369 op->type = OP_IMM;
4339 op->val = VCPU_SREG_DS; 4370 op->val = VCPU_SREG_DS;
4340 break; 4371 break;
4341 case OpFS: 4372 case OpFS:
4373 op->type = OP_IMM;
4342 op->val = VCPU_SREG_FS; 4374 op->val = VCPU_SREG_FS;
4343 break; 4375 break;
4344 case OpGS: 4376 case OpGS:
4377 op->type = OP_IMM;
4345 op->val = VCPU_SREG_GS; 4378 op->val = VCPU_SREG_GS;
4346 break; 4379 break;
4347 case OpImplicit: 4380 case OpImplicit:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a8b76c4c95e2..3e556c68351b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4579,7 +4579,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
4579 vmcs_write32(TPR_THRESHOLD, 0); 4579 vmcs_write32(TPR_THRESHOLD, 0);
4580 } 4580 }
4581 4581
4582 kvm_vcpu_reload_apic_access_page(vcpu); 4582 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
4583 4583
4584 if (vmx_vm_has_apicv(vcpu->kvm)) 4584 if (vmx_vm_has_apicv(vcpu->kvm))
4585 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); 4585 memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
@@ -6426,6 +6426,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
6426 const unsigned long *fields = shadow_read_write_fields; 6426 const unsigned long *fields = shadow_read_write_fields;
6427 const int num_fields = max_shadow_read_write_fields; 6427 const int num_fields = max_shadow_read_write_fields;
6428 6428
6429 preempt_disable();
6430
6429 vmcs_load(shadow_vmcs); 6431 vmcs_load(shadow_vmcs);
6430 6432
6431 for (i = 0; i < num_fields; i++) { 6433 for (i = 0; i < num_fields; i++) {
@@ -6449,6 +6451,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
6449 6451
6450 vmcs_clear(shadow_vmcs); 6452 vmcs_clear(shadow_vmcs);
6451 vmcs_load(vmx->loaded_vmcs->vmcs); 6453 vmcs_load(vmx->loaded_vmcs->vmcs);
6454
6455 preempt_enable();
6452} 6456}
6453 6457
6454static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) 6458static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 7609e0e421ec..1318f75d56e4 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -41,9 +41,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
41 while (((unsigned long)src & 6) && len >= 2) { 41 while (((unsigned long)src & 6) && len >= 2) {
42 __u16 val16; 42 __u16 val16;
43 43
44 *errp = __get_user(val16, (const __u16 __user *)src); 44 if (__get_user(val16, (const __u16 __user *)src))
45 if (*errp) 45 goto out_err;
46 return isum;
47 46
48 *(__u16 *)dst = val16; 47 *(__u16 *)dst = val16;
49 isum = (__force __wsum)add32_with_carry( 48 isum = (__force __wsum)add32_with_carry(
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4cb8763868fc..4e5dfec750fc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1123,7 +1123,7 @@ void mark_rodata_ro(void)
1123 unsigned long end = (unsigned long) &__end_rodata_hpage_align; 1123 unsigned long end = (unsigned long) &__end_rodata_hpage_align;
1124 unsigned long text_end = PFN_ALIGN(&__stop___ex_table); 1124 unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
1125 unsigned long rodata_end = PFN_ALIGN(&__end_rodata); 1125 unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
1126 unsigned long all_end = PFN_ALIGN(&_end); 1126 unsigned long all_end;
1127 1127
1128 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", 1128 printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
1129 (end - start) >> 10); 1129 (end - start) >> 10);
@@ -1134,7 +1134,16 @@ void mark_rodata_ro(void)
1134 /* 1134 /*
1135 * The rodata/data/bss/brk section (but not the kernel text!) 1135 * The rodata/data/bss/brk section (but not the kernel text!)
1136 * should also be not-executable. 1136 * should also be not-executable.
1137 *
1138 * We align all_end to PMD_SIZE because the existing mapping
1139 * is a full PMD. If we would align _brk_end to PAGE_SIZE we
1140 * split the PMD and the reminder between _brk_end and the end
1141 * of the PMD will remain mapped executable.
1142 *
1143 * Any PMD which was setup after the one which covers _brk_end
1144 * has been zapped already via cleanup_highmem().
1137 */ 1145 */
1146 all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
1138 set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); 1147 set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
1139 1148
1140 rodata_test(); 1149 rodata_test();
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index ae242a7c11c7..36de293caf25 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -409,7 +409,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
409 psize = page_level_size(level); 409 psize = page_level_size(level);
410 pmask = page_level_mask(level); 410 pmask = page_level_mask(level);
411 offset = virt_addr & ~pmask; 411 offset = virt_addr & ~pmask;
412 phys_addr = pte_pfn(*pte) << PAGE_SHIFT; 412 phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
413 return (phys_addr | offset); 413 return (phys_addr | offset);
414} 414}
415EXPORT_SYMBOL_GPL(slow_virt_to_phys); 415EXPORT_SYMBOL_GPL(slow_virt_to_phys);
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
index 3c53a90fdb18..c14ad34776c4 100644
--- a/arch/x86/platform/intel-mid/sfi.c
+++ b/arch/x86/platform/intel-mid/sfi.c
@@ -106,6 +106,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table)
106 mp_irq.dstapic = MP_APIC_ALL; 106 mp_irq.dstapic = MP_APIC_ALL;
107 mp_irq.dstirq = pentry->irq; 107 mp_irq.dstirq = pentry->irq;
108 mp_save_irq(&mp_irq); 108 mp_save_irq(&mp_irq);
109 mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
109 } 110 }
110 111
111 return 0; 112 return 0;
@@ -176,6 +177,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
176 mp_irq.dstapic = MP_APIC_ALL; 177 mp_irq.dstapic = MP_APIC_ALL;
177 mp_irq.dstirq = pentry->irq; 178 mp_irq.dstirq = pentry->irq;
178 mp_save_irq(&mp_irq); 179 mp_save_irq(&mp_irq);
180 mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC);
179 } 181 }
180 return 0; 182 return 0;
181} 183}
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl
new file mode 100644
index 000000000000..23210baade2d
--- /dev/null
+++ b/arch/x86/tools/calc_run_size.pl
@@ -0,0 +1,39 @@
1#!/usr/bin/perl
2#
3# Calculate the amount of space needed to run the kernel, including room for
4# the .bss and .brk sections.
5#
6# Usage:
7# objdump -h a.out | perl calc_run_size.pl
8use strict;
9
10my $mem_size = 0;
11my $file_offset = 0;
12
13my $sections=" *[0-9]+ \.(?:bss|brk) +";
14while (<>) {
15 if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) {
16 my $size = hex($1);
17 my $offset = hex($2);
18 $mem_size += $size;
19 if ($file_offset == 0) {
20 $file_offset = $offset;
21 } elsif ($file_offset != $offset) {
22 # BFD linker shows the same file offset in ELF.
23 # Gold linker shows them as consecutive.
24 next if ($file_offset + $mem_size == $offset + $size);
25
26 printf STDERR "file_offset: 0x%lx\n", $file_offset;
27 printf STDERR "mem_size: 0x%lx\n", $mem_size;
28 printf STDERR "offset: 0x%lx\n", $offset;
29 printf STDERR "size: 0x%lx\n", $size;
30
31 die ".bss and .brk are non-contiguous\n";
32 }
33 }
34}
35
36if ($file_offset == 0) {
37 die "Never found .bss or .brk file offset\n";
38}
39printf("%d\n", $mem_size + $file_offset);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 8650cdb53209..4c071aeb8417 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -510,6 +510,9 @@ static void xen_cpu_die(unsigned int cpu)
510 current->state = TASK_UNINTERRUPTIBLE; 510 current->state = TASK_UNINTERRUPTIBLE;
511 schedule_timeout(HZ/10); 511 schedule_timeout(HZ/10);
512 } 512 }
513
514 cpu_die_common(cpu);
515
513 xen_smp_intr_free(cpu); 516 xen_smp_intr_free(cpu);
514 xen_uninit_lock_cpu(cpu); 517 xen_uninit_lock_cpu(cpu);
515 xen_teardown_timer(cpu); 518 xen_teardown_timer(cpu);