diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
commit | ba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch) | |
tree | c08504e4d2fa51ac91cef544f336d0169806c49f /arch/x86 | |
parent | 8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff) | |
parent | aa583096d9767892983332e7c1a984bd17e3cd39 (diff) |
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'arch/x86')
44 files changed, 355 insertions, 349 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2327e88e07c..41a503c15862 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -142,6 +142,10 @@ config INSTRUCTION_DECODER | |||
142 | def_bool y | 142 | def_bool y |
143 | depends on KPROBES || PERF_EVENTS || UPROBES | 143 | depends on KPROBES || PERF_EVENTS || UPROBES |
144 | 144 | ||
145 | config PERF_EVENTS_INTEL_UNCORE | ||
146 | def_bool y | ||
147 | depends on PERF_EVENTS && CPU_SUP_INTEL && PCI | ||
148 | |||
145 | config OUTPUT_FORMAT | 149 | config OUTPUT_FORMAT |
146 | string | 150 | string |
147 | default "elf32-i386" if X86_32 | 151 | default "elf32-i386" if X86_32 |
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 704f58aa79cd..be1e07d4b596 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -76,8 +76,10 @@ suffix-$(CONFIG_KERNEL_XZ) := xz | |||
76 | suffix-$(CONFIG_KERNEL_LZO) := lzo | 76 | suffix-$(CONFIG_KERNEL_LZO) := lzo |
77 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 | 77 | suffix-$(CONFIG_KERNEL_LZ4) := lz4 |
78 | 78 | ||
79 | RUN_SIZE = $(shell objdump -h vmlinux | \ | ||
80 | perl $(srctree)/arch/x86/tools/calc_run_size.pl) | ||
79 | quiet_cmd_mkpiggy = MKPIGGY $@ | 81 | quiet_cmd_mkpiggy = MKPIGGY $@ |
80 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) | 82 | cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) |
81 | 83 | ||
82 | targets += piggy.S | 84 | targets += piggy.S |
83 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE | 85 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE |
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index cbed1407a5cd..1d7fbbcc196d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -207,7 +207,8 @@ relocated: | |||
207 | * Do the decompression, and jump to the new kernel.. | 207 | * Do the decompression, and jump to the new kernel.. |
208 | */ | 208 | */ |
209 | /* push arguments for decompress_kernel: */ | 209 | /* push arguments for decompress_kernel: */ |
210 | pushl $z_output_len /* decompressed length */ | 210 | pushl $z_run_size /* size of kernel with .bss and .brk */ |
211 | pushl $z_output_len /* decompressed length, end of relocs */ | ||
211 | leal z_extract_offset_negative(%ebx), %ebp | 212 | leal z_extract_offset_negative(%ebx), %ebp |
212 | pushl %ebp /* output address */ | 213 | pushl %ebp /* output address */ |
213 | pushl $z_input_len /* input_len */ | 214 | pushl $z_input_len /* input_len */ |
@@ -217,7 +218,7 @@ relocated: | |||
217 | pushl %eax /* heap area */ | 218 | pushl %eax /* heap area */ |
218 | pushl %esi /* real mode pointer */ | 219 | pushl %esi /* real mode pointer */ |
219 | call decompress_kernel /* returns kernel location in %eax */ | 220 | call decompress_kernel /* returns kernel location in %eax */ |
220 | addl $24, %esp | 221 | addl $28, %esp |
221 | 222 | ||
222 | /* | 223 | /* |
223 | * Jump to the decompressed kernel. | 224 | * Jump to the decompressed kernel. |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2884e0c3e8a5..6b1766c6c082 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -402,13 +402,16 @@ relocated: | |||
402 | * Do the decompression, and jump to the new kernel.. | 402 | * Do the decompression, and jump to the new kernel.. |
403 | */ | 403 | */ |
404 | pushq %rsi /* Save the real mode argument */ | 404 | pushq %rsi /* Save the real mode argument */ |
405 | movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ | ||
406 | pushq %r9 | ||
405 | movq %rsi, %rdi /* real mode address */ | 407 | movq %rsi, %rdi /* real mode address */ |
406 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ | 408 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ |
407 | leaq input_data(%rip), %rdx /* input_data */ | 409 | leaq input_data(%rip), %rdx /* input_data */ |
408 | movl $z_input_len, %ecx /* input_len */ | 410 | movl $z_input_len, %ecx /* input_len */ |
409 | movq %rbp, %r8 /* output target address */ | 411 | movq %rbp, %r8 /* output target address */ |
410 | movq $z_output_len, %r9 /* decompressed length */ | 412 | movq $z_output_len, %r9 /* decompressed length, end of relocs */ |
411 | call decompress_kernel /* returns kernel location in %rax */ | 413 | call decompress_kernel /* returns kernel location in %rax */ |
414 | popq %r9 | ||
412 | popq %rsi | 415 | popq %rsi |
413 | 416 | ||
414 | /* | 417 | /* |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 57ab74df7eea..30dd59a9f0b4 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -358,7 +358,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
358 | unsigned char *input_data, | 358 | unsigned char *input_data, |
359 | unsigned long input_len, | 359 | unsigned long input_len, |
360 | unsigned char *output, | 360 | unsigned char *output, |
361 | unsigned long output_len) | 361 | unsigned long output_len, |
362 | unsigned long run_size) | ||
362 | { | 363 | { |
363 | real_mode = rmode; | 364 | real_mode = rmode; |
364 | 365 | ||
@@ -381,8 +382,14 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, | |||
381 | free_mem_ptr = heap; /* Heap */ | 382 | free_mem_ptr = heap; /* Heap */ |
382 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 383 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
383 | 384 | ||
384 | output = choose_kernel_location(input_data, input_len, | 385 | /* |
385 | output, output_len); | 386 | * The memory hole needed for the kernel is the larger of either |
387 | * the entire decompressed kernel plus relocation table, or the | ||
388 | * entire decompressed kernel plus .bss and .brk sections. | ||
389 | */ | ||
390 | output = choose_kernel_location(input_data, input_len, output, | ||
391 | output_len > run_size ? output_len | ||
392 | : run_size); | ||
386 | 393 | ||
387 | /* Validate memory location choices. */ | 394 | /* Validate memory location choices. */ |
388 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | 395 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) |
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index b669ab65bf6c..d8222f213182 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c | |||
@@ -36,11 +36,13 @@ int main(int argc, char *argv[]) | |||
36 | uint32_t olen; | 36 | uint32_t olen; |
37 | long ilen; | 37 | long ilen; |
38 | unsigned long offs; | 38 | unsigned long offs; |
39 | unsigned long run_size; | ||
39 | FILE *f = NULL; | 40 | FILE *f = NULL; |
40 | int retval = 1; | 41 | int retval = 1; |
41 | 42 | ||
42 | if (argc < 2) { | 43 | if (argc < 3) { |
43 | fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); | 44 | fprintf(stderr, "Usage: %s compressed_file run_size\n", |
45 | argv[0]); | ||
44 | goto bail; | 46 | goto bail; |
45 | } | 47 | } |
46 | 48 | ||
@@ -74,6 +76,7 @@ int main(int argc, char *argv[]) | |||
74 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ | 76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ |
75 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ | 77 | offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ |
76 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ | 78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ |
79 | run_size = atoi(argv[2]); | ||
77 | 80 | ||
78 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); | 81 | printf(".section \".rodata..compressed\",\"a\",@progbits\n"); |
79 | printf(".globl z_input_len\n"); | 82 | printf(".globl z_input_len\n"); |
@@ -85,6 +88,8 @@ int main(int argc, char *argv[]) | |||
85 | /* z_extract_offset_negative allows simplification of head_32.S */ | 88 | /* z_extract_offset_negative allows simplification of head_32.S */ |
86 | printf(".globl z_extract_offset_negative\n"); | 89 | printf(".globl z_extract_offset_negative\n"); |
87 | printf("z_extract_offset_negative = -0x%lx\n", offs); | 90 | printf("z_extract_offset_negative = -0x%lx\n", offs); |
91 | printf(".globl z_run_size\n"); | ||
92 | printf("z_run_size = %lu\n", run_size); | ||
88 | 93 | ||
89 | printf(".globl input_data, input_data_end\n"); | 94 | printf(".globl input_data, input_data_end\n"); |
90 | printf("input_data:\n"); | 95 | printf("input_data:\n"); |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 8ffba18395c8..ffe71228fc10 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target) | |||
157 | * ourselves. To save a few cycles, we can check whether | 157 | * ourselves. To save a few cycles, we can check whether |
158 | * NT was set instead of doing an unconditional popfq. | 158 | * NT was set instead of doing an unconditional popfq. |
159 | */ | 159 | */ |
160 | testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */ | 160 | testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) |
161 | jnz sysenter_fix_flags | 161 | jnz sysenter_fix_flags |
162 | sysenter_flags_fixed: | 162 | sysenter_flags_fixed: |
163 | 163 | ||
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #define THREAD_SIZE_ORDER 1 | 20 | #define THREAD_SIZE_ORDER 1 |
21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
22 | 22 | ||
23 | #define STACKFAULT_STACK 0 | ||
24 | #define DOUBLEFAULT_STACK 1 | 23 | #define DOUBLEFAULT_STACK 1 |
25 | #define NMI_STACK 0 | 24 | #define NMI_STACK 0 |
26 | #define DEBUG_STACK 0 | 25 | #define DEBUG_STACK 0 |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -14,12 +14,11 @@ | |||
14 | #define IRQ_STACK_ORDER 2 | 14 | #define IRQ_STACK_ORDER 2 |
15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) | 15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) |
16 | 16 | ||
17 | #define STACKFAULT_STACK 1 | 17 | #define DOUBLEFAULT_STACK 1 |
18 | #define DOUBLEFAULT_STACK 2 | 18 | #define NMI_STACK 2 |
19 | #define NMI_STACK 3 | 19 | #define DEBUG_STACK 3 |
20 | #define DEBUG_STACK 4 | 20 | #define MCE_STACK 4 |
21 | #define MCE_STACK 5 | 21 | #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ |
22 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | ||
23 | 22 | ||
24 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | 23 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) |
25 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | 24 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) |
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 7024c12f7bfe..400873450e33 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h | |||
@@ -105,6 +105,7 @@ static __always_inline bool should_resched(void) | |||
105 | # ifdef CONFIG_CONTEXT_TRACKING | 105 | # ifdef CONFIG_CONTEXT_TRACKING |
106 | extern asmlinkage void ___preempt_schedule_context(void); | 106 | extern asmlinkage void ___preempt_schedule_context(void); |
107 | # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") | 107 | # define __preempt_schedule_context() asm ("call ___preempt_schedule_context") |
108 | extern asmlinkage void preempt_schedule_context(void); | ||
108 | # endif | 109 | # endif |
109 | #endif | 110 | #endif |
110 | 111 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 8cd27e08e23c..8cd1cc3bc835 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -150,6 +150,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) | |||
150 | } | 150 | } |
151 | 151 | ||
152 | void cpu_disable_common(void); | 152 | void cpu_disable_common(void); |
153 | void cpu_die_common(unsigned int cpu); | ||
153 | void native_smp_prepare_boot_cpu(void); | 154 | void native_smp_prepare_boot_cpu(void); |
154 | void native_smp_prepare_cpus(unsigned int max_cpus); | 155 | void native_smp_prepare_cpus(unsigned int max_cpus); |
155 | void native_smp_cpus_done(unsigned int max_cpus); | 156 | void native_smp_cpus_done(unsigned int max_cpus); |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 854053889d4d..547e344a6dc6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -141,7 +141,7 @@ struct thread_info { | |||
141 | /* Only used for 64 bit */ | 141 | /* Only used for 64 bit */ |
142 | #define _TIF_DO_NOTIFY_MASK \ | 142 | #define _TIF_DO_NOTIFY_MASK \ |
143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ | 143 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ |
144 | _TIF_USER_RETURN_NOTIFY) | 144 | _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE) |
145 | 145 | ||
146 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
147 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); | |||
39 | 39 | ||
40 | #ifdef CONFIG_TRACING | 40 | #ifdef CONFIG_TRACING |
41 | asmlinkage void trace_page_fault(void); | 41 | asmlinkage void trace_page_fault(void); |
42 | #define trace_stack_segment stack_segment | ||
42 | #define trace_divide_error divide_error | 43 | #define trace_divide_error divide_error |
43 | #define trace_bounds bounds | 44 | #define trace_bounds bounds |
44 | #define trace_invalid_op invalid_op | 45 | #define trace_invalid_op invalid_op |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b436fc735aa4..a142e77693e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -397,7 +397,7 @@ static int mp_register_gsi(struct device *dev, u32 gsi, int trigger, | |||
397 | 397 | ||
398 | /* Don't set up the ACPI SCI because it's already set up */ | 398 | /* Don't set up the ACPI SCI because it's already set up */ |
399 | if (acpi_gbl_FADT.sci_interrupt == gsi) | 399 | if (acpi_gbl_FADT.sci_interrupt == gsi) |
400 | return gsi; | 400 | return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC); |
401 | 401 | ||
402 | trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; | 402 | trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1; |
403 | polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; | 403 | polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1; |
@@ -604,14 +604,18 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
604 | 604 | ||
605 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) | 605 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) |
606 | { | 606 | { |
607 | int irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); | 607 | int irq; |
608 | 608 | ||
609 | if (irq >= 0) { | 609 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { |
610 | *irqp = gsi; | ||
611 | } else { | ||
612 | irq = mp_map_gsi_to_irq(gsi, | ||
613 | IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); | ||
614 | if (irq < 0) | ||
615 | return -1; | ||
610 | *irqp = irq; | 616 | *irqp = irq; |
611 | return 0; | ||
612 | } | 617 | } |
613 | 618 | return 0; | |
614 | return -1; | ||
615 | } | 619 | } |
616 | EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); | 620 | EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); |
617 | 621 | ||
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 5972b108f15a..b708738d016e 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -185,8 +185,6 @@ static void apbt_setup_irq(struct apbt_dev *adev) | |||
185 | 185 | ||
186 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); | 186 | irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT); |
187 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | 187 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); |
188 | /* APB timer irqs are set up as mp_irqs, timer is edge type */ | ||
189 | __irq_set_handler(adev->irq, handle_edge_irq, 0, "edge"); | ||
190 | } | 188 | } |
191 | 189 | ||
192 | /* Should be called with per cpu */ | 190 | /* Should be called with per cpu */ |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 00853b254ab0..ba6cc041edb1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -1297,7 +1297,7 @@ void setup_local_APIC(void) | |||
1297 | unsigned int value, queued; | 1297 | unsigned int value, queued; |
1298 | int i, j, acked = 0; | 1298 | int i, j, acked = 0; |
1299 | unsigned long long tsc = 0, ntsc; | 1299 | unsigned long long tsc = 0, ntsc; |
1300 | long long max_loops = cpu_khz; | 1300 | long long max_loops = cpu_khz ? cpu_khz : 1000000; |
1301 | 1301 | ||
1302 | if (cpu_has_tsc) | 1302 | if (cpu_has_tsc) |
1303 | rdtscll(tsc); | 1303 | rdtscll(tsc); |
@@ -1383,7 +1383,7 @@ void setup_local_APIC(void) | |||
1383 | break; | 1383 | break; |
1384 | } | 1384 | } |
1385 | if (queued) { | 1385 | if (queued) { |
1386 | if (cpu_has_tsc) { | 1386 | if (cpu_has_tsc && cpu_khz) { |
1387 | rdtscll(ntsc); | 1387 | rdtscll(ntsc); |
1388 | max_loops = (cpu_khz << 10) - (ntsc - tsc); | 1388 | max_loops = (cpu_khz << 10) - (ntsc - tsc); |
1389 | } else | 1389 | } else |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 01d5453b5502..e27b49d7c922 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -39,9 +39,12 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | |||
39 | endif | 39 | endif |
40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 40 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 41 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_uncore_snb.o | ||
43 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore_snbep.o perf_event_intel_uncore_nhmex.o | ||
44 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o | 42 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o |
43 | |||
44 | obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ | ||
45 | perf_event_intel_uncore_snb.o \ | ||
46 | perf_event_intel_uncore_snbep.o \ | ||
47 | perf_event_intel_uncore_nhmex.o | ||
45 | endif | 48 | endif |
46 | 49 | ||
47 | 50 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4b4f78c9ba19..cfa9b5b2c27a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -146,6 +146,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | |||
146 | 146 | ||
147 | static int __init x86_xsave_setup(char *s) | 147 | static int __init x86_xsave_setup(char *s) |
148 | { | 148 | { |
149 | if (strlen(s)) | ||
150 | return 0; | ||
149 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 151 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
150 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 152 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
151 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | 153 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1ef456273172..9cc6b6f25f42 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -213,12 +213,13 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
213 | { | 213 | { |
214 | #ifdef CONFIG_X86_F00F_BUG | 214 | #ifdef CONFIG_X86_F00F_BUG |
215 | /* | 215 | /* |
216 | * All current models of Pentium and Pentium with MMX technology CPUs | 216 | * All models of Pentium and Pentium with MMX technology CPUs |
217 | * have the F0 0F bug, which lets nonprivileged users lock up the | 217 | * have the F0 0F bug, which lets nonprivileged users lock up the |
218 | * system. Announce that the fault handler will be checking for it. | 218 | * system. Announce that the fault handler will be checking for it. |
219 | * The Quark is also family 5, but does not have the same bug. | ||
219 | */ | 220 | */ |
220 | clear_cpu_bug(c, X86_BUG_F00F); | 221 | clear_cpu_bug(c, X86_BUG_F00F); |
221 | if (!paravirt_enabled() && c->x86 == 5) { | 222 | if (!paravirt_enabled() && c->x86 == 5 && c->x86_model < 9) { |
222 | static int f00f_workaround_enabled; | 223 | static int f00f_workaround_enabled; |
223 | 224 | ||
224 | set_cpu_bug(c, X86_BUG_F00F); | 225 | set_cpu_bug(c, X86_BUG_F00F); |
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 7aa1acc79789..06674473b0e6 100644 --- a/arch/x86/kernel/cpu/microcode/amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c | |||
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size) | |||
108 | * load_microcode_amd() to save equivalent cpu table and microcode patches in | 108 | * load_microcode_amd() to save equivalent cpu table and microcode patches in |
109 | * kernel heap memory. | 109 | * kernel heap memory. |
110 | */ | 110 | */ |
111 | static void apply_ucode_in_initrd(void *ucode, size_t size) | 111 | static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch) |
112 | { | 112 | { |
113 | struct equiv_cpu_entry *eq; | 113 | struct equiv_cpu_entry *eq; |
114 | size_t *cont_sz; | 114 | size_t *cont_sz; |
115 | u32 *header; | 115 | u32 *header; |
116 | u8 *data, **cont; | 116 | u8 *data, **cont; |
117 | u8 (*patch)[PATCH_MAX_SIZE]; | ||
117 | u16 eq_id = 0; | 118 | u16 eq_id = 0; |
118 | int offset, left; | 119 | int offset, left; |
119 | u32 rev, eax, ebx, ecx, edx; | 120 | u32 rev, eax, ebx, ecx, edx; |
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
123 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); | 124 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); |
124 | cont_sz = (size_t *)__pa_nodebug(&container_size); | 125 | cont_sz = (size_t *)__pa_nodebug(&container_size); |
125 | cont = (u8 **)__pa_nodebug(&container); | 126 | cont = (u8 **)__pa_nodebug(&container); |
127 | patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); | ||
126 | #else | 128 | #else |
127 | new_rev = &ucode_new_rev; | 129 | new_rev = &ucode_new_rev; |
128 | cont_sz = &container_size; | 130 | cont_sz = &container_size; |
129 | cont = &container; | 131 | cont = &container; |
132 | patch = &amd_ucode_patch; | ||
130 | #endif | 133 | #endif |
131 | 134 | ||
132 | data = ucode; | 135 | data = ucode; |
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
213 | rev = mc->hdr.patch_id; | 216 | rev = mc->hdr.patch_id; |
214 | *new_rev = rev; | 217 | *new_rev = rev; |
215 | 218 | ||
216 | /* save ucode patch */ | 219 | if (save_patch) |
217 | memcpy(amd_ucode_patch, mc, | 220 | memcpy(patch, mc, |
218 | min_t(u32, header[1], PATCH_MAX_SIZE)); | 221 | min_t(u32, header[1], PATCH_MAX_SIZE)); |
219 | } | 222 | } |
220 | } | 223 | } |
221 | 224 | ||
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void) | |||
246 | *data = cp.data; | 249 | *data = cp.data; |
247 | *size = cp.size; | 250 | *size = cp.size; |
248 | 251 | ||
249 | apply_ucode_in_initrd(cp.data, cp.size); | 252 | apply_ucode_in_initrd(cp.data, cp.size, true); |
250 | } | 253 | } |
251 | 254 | ||
252 | #ifdef CONFIG_X86_32 | 255 | #ifdef CONFIG_X86_32 |
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void) | |||
263 | size_t *usize; | 266 | size_t *usize; |
264 | void **ucode; | 267 | void **ucode; |
265 | 268 | ||
266 | mc = (struct microcode_amd *)__pa(amd_ucode_patch); | 269 | mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); |
267 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { | 270 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { |
268 | __apply_microcode_amd(mc); | 271 | __apply_microcode_amd(mc); |
269 | return; | 272 | return; |
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void) | |||
275 | if (!*ucode || !*usize) | 278 | if (!*ucode || !*usize) |
276 | return; | 279 | return; |
277 | 280 | ||
278 | apply_ucode_in_initrd(*ucode, *usize); | 281 | apply_ucode_in_initrd(*ucode, *usize, false); |
279 | } | 282 | } |
280 | 283 | ||
281 | static void __init collect_cpu_sig_on_bsp(void *arg) | 284 | static void __init collect_cpu_sig_on_bsp(void *arg) |
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void) | |||
339 | * AP has a different equivalence ID than BSP, looks like | 342 | * AP has a different equivalence ID than BSP, looks like |
340 | * mixed-steppings silicon so go through the ucode blob anew. | 343 | * mixed-steppings silicon so go through the ucode blob anew. |
341 | */ | 344 | */ |
342 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); | 345 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false); |
343 | } | 346 | } |
344 | } | 347 | } |
345 | #endif | 348 | #endif |
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void) | |||
347 | int __init save_microcode_in_initrd_amd(void) | 350 | int __init save_microcode_in_initrd_amd(void) |
348 | { | 351 | { |
349 | unsigned long cont; | 352 | unsigned long cont; |
353 | int retval = 0; | ||
350 | enum ucode_state ret; | 354 | enum ucode_state ret; |
355 | u8 *cont_va; | ||
351 | u32 eax; | 356 | u32 eax; |
352 | 357 | ||
353 | if (!container) | 358 | if (!container) |
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void) | |||
355 | 360 | ||
356 | #ifdef CONFIG_X86_32 | 361 | #ifdef CONFIG_X86_32 |
357 | get_bsp_sig(); | 362 | get_bsp_sig(); |
358 | cont = (unsigned long)container; | 363 | cont = (unsigned long)container; |
364 | cont_va = __va(container); | ||
359 | #else | 365 | #else |
360 | /* | 366 | /* |
361 | * We need the physical address of the container for both bitness since | 367 | * We need the physical address of the container for both bitness since |
362 | * boot_params.hdr.ramdisk_image is a physical address. | 368 | * boot_params.hdr.ramdisk_image is a physical address. |
363 | */ | 369 | */ |
364 | cont = __pa(container); | 370 | cont = __pa(container); |
371 | cont_va = container; | ||
365 | #endif | 372 | #endif |
366 | 373 | ||
367 | /* | 374 | /* |
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void) | |||
372 | if (relocated_ramdisk) | 379 | if (relocated_ramdisk) |
373 | container = (u8 *)(__va(relocated_ramdisk) + | 380 | container = (u8 *)(__va(relocated_ramdisk) + |
374 | (cont - boot_params.hdr.ramdisk_image)); | 381 | (cont - boot_params.hdr.ramdisk_image)); |
382 | else | ||
383 | container = cont_va; | ||
375 | 384 | ||
376 | if (ucode_new_rev) | 385 | if (ucode_new_rev) |
377 | pr_info("microcode: updated early to new patch_level=0x%08x\n", | 386 | pr_info("microcode: updated early to new patch_level=0x%08x\n", |
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void) | |||
382 | 391 | ||
383 | ret = load_microcode_amd(eax, container, container_size); | 392 | ret = load_microcode_amd(eax, container, container_size); |
384 | if (ret != UCODE_OK) | 393 | if (ret != UCODE_OK) |
385 | return -EINVAL; | 394 | retval = -EINVAL; |
386 | 395 | ||
387 | /* | 396 | /* |
388 | * This will be freed any msec now, stash patches for the current | 397 | * This will be freed any msec now, stash patches for the current |
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void) | |||
391 | container = NULL; | 400 | container = NULL; |
392 | container_size = 0; | 401 | container_size = 0; |
393 | 402 | ||
394 | return 0; | 403 | return retval; |
395 | } | 404 | } |
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dd9d6190b08d..2ce9051174e6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
@@ -465,6 +465,14 @@ static void mc_bp_resume(void) | |||
465 | 465 | ||
466 | if (uci->valid && uci->mc) | 466 | if (uci->valid && uci->mc) |
467 | microcode_ops->apply_microcode(cpu); | 467 | microcode_ops->apply_microcode(cpu); |
468 | else if (!uci->mc) | ||
469 | /* | ||
470 | * We might resume and not have applied late microcode but still | ||
471 | * have a newer patch stashed from the early loader. We don't | ||
472 | * have it in uci->mc so we have to load it the same way we're | ||
473 | * applying patches early on the APs. | ||
474 | */ | ||
475 | load_ucode_ap(); | ||
468 | } | 476 | } |
469 | 477 | ||
470 | static struct syscore_ops mc_syscore_ops = { | 478 | static struct syscore_ops mc_syscore_ops = { |
diff --git a/arch/x86/kernel/cpu/microcode/core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index 5f28a64e71ea..2c017f242a78 100644 --- a/arch/x86/kernel/cpu/microcode/core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
@@ -124,7 +124,7 @@ void __init load_ucode_bsp(void) | |||
124 | static bool check_loader_disabled_ap(void) | 124 | static bool check_loader_disabled_ap(void) |
125 | { | 125 | { |
126 | #ifdef CONFIG_X86_32 | 126 | #ifdef CONFIG_X86_32 |
127 | return __pa_nodebug(dis_ucode_ldr); | 127 | return *((bool *)__pa_nodebug(&dis_ucode_ldr)); |
128 | #else | 128 | #else |
129 | return dis_ucode_ldr; | 129 | return dis_ucode_ldr; |
130 | #endif | 130 | #endif |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1b8299dd3d91..143e5f5dc855 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -243,8 +243,9 @@ static bool check_hw_exists(void) | |||
243 | 243 | ||
244 | msr_fail: | 244 | msr_fail: |
245 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); | 245 | printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); |
246 | printk(boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR | 246 | printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", |
247 | "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new); | 247 | boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, |
248 | reg, val_new); | ||
248 | 249 | ||
249 | return false; | 250 | return false; |
250 | } | 251 | } |
@@ -444,12 +445,6 @@ int x86_pmu_hw_config(struct perf_event *event) | |||
444 | if (event->attr.type == PERF_TYPE_RAW) | 445 | if (event->attr.type == PERF_TYPE_RAW) |
445 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | 446 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; |
446 | 447 | ||
447 | if (event->attr.sample_period && x86_pmu.limit_period) { | ||
448 | if (x86_pmu.limit_period(event, event->attr.sample_period) > | ||
449 | event->attr.sample_period) | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | |||
453 | return x86_setup_perfctr(event); | 448 | return x86_setup_perfctr(event); |
454 | } | 449 | } |
455 | 450 | ||
@@ -987,9 +982,6 @@ int x86_perf_event_set_period(struct perf_event *event) | |||
987 | if (left > x86_pmu.max_period) | 982 | if (left > x86_pmu.max_period) |
988 | left = x86_pmu.max_period; | 983 | left = x86_pmu.max_period; |
989 | 984 | ||
990 | if (x86_pmu.limit_period) | ||
991 | left = x86_pmu.limit_period(event, left); | ||
992 | |||
993 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; | 985 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; |
994 | 986 | ||
995 | /* | 987 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index d98a34d435d7..fc5eb390b368 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -445,7 +445,6 @@ struct x86_pmu { | |||
445 | struct x86_pmu_quirk *quirks; | 445 | struct x86_pmu_quirk *quirks; |
446 | int perfctr_second_write; | 446 | int perfctr_second_write; |
447 | bool late_ack; | 447 | bool late_ack; |
448 | unsigned (*limit_period)(struct perf_event *event, unsigned l); | ||
449 | 448 | ||
450 | /* | 449 | /* |
451 | * sysfs attrs | 450 | * sysfs attrs |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a73947c53b65..944bf019b74f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -220,15 +220,6 @@ static struct event_constraint intel_hsw_event_constraints[] = { | |||
220 | EVENT_CONSTRAINT_END | 220 | EVENT_CONSTRAINT_END |
221 | }; | 221 | }; |
222 | 222 | ||
223 | static struct event_constraint intel_bdw_event_constraints[] = { | ||
224 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
225 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
226 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ | ||
227 | INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ | ||
228 | INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ | ||
229 | EVENT_CONSTRAINT_END | ||
230 | }; | ||
231 | |||
232 | static u64 intel_pmu_event_map(int hw_event) | 223 | static u64 intel_pmu_event_map(int hw_event) |
233 | { | 224 | { |
234 | return intel_perfmon_event_map[hw_event]; | 225 | return intel_perfmon_event_map[hw_event]; |
@@ -424,126 +415,6 @@ static __initconst const u64 snb_hw_cache_event_ids | |||
424 | 415 | ||
425 | }; | 416 | }; |
426 | 417 | ||
427 | static __initconst const u64 hsw_hw_cache_event_ids | ||
428 | [PERF_COUNT_HW_CACHE_MAX] | ||
429 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
430 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
431 | { | ||
432 | [ C(L1D ) ] = { | ||
433 | [ C(OP_READ) ] = { | ||
434 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
435 | [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */ | ||
436 | }, | ||
437 | [ C(OP_WRITE) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
439 | [ C(RESULT_MISS) ] = 0x0, | ||
440 | }, | ||
441 | [ C(OP_PREFETCH) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x0, | ||
443 | [ C(RESULT_MISS) ] = 0x0, | ||
444 | }, | ||
445 | }, | ||
446 | [ C(L1I ) ] = { | ||
447 | [ C(OP_READ) ] = { | ||
448 | [ C(RESULT_ACCESS) ] = 0x0, | ||
449 | [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */ | ||
450 | }, | ||
451 | [ C(OP_WRITE) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = -1, | ||
453 | [ C(RESULT_MISS) ] = -1, | ||
454 | }, | ||
455 | [ C(OP_PREFETCH) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = 0x0, | ||
457 | [ C(RESULT_MISS) ] = 0x0, | ||
458 | }, | ||
459 | }, | ||
460 | [ C(LL ) ] = { | ||
461 | [ C(OP_READ) ] = { | ||
462 | /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ | ||
463 | [ C(RESULT_ACCESS) ] = 0x1b7, | ||
464 | /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| | ||
465 | L3_MISS|ANY_SNOOP */ | ||
466 | [ C(RESULT_MISS) ] = 0x1b7, | ||
467 | }, | ||
468 | [ C(OP_WRITE) ] = { | ||
469 | [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */ | ||
470 | /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ | ||
471 | [ C(RESULT_MISS) ] = 0x1b7, | ||
472 | }, | ||
473 | [ C(OP_PREFETCH) ] = { | ||
474 | [ C(RESULT_ACCESS) ] = 0x0, | ||
475 | [ C(RESULT_MISS) ] = 0x0, | ||
476 | }, | ||
477 | }, | ||
478 | [ C(DTLB) ] = { | ||
479 | [ C(OP_READ) ] = { | ||
480 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */ | ||
481 | [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */ | ||
482 | }, | ||
483 | [ C(OP_WRITE) ] = { | ||
484 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */ | ||
485 | [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ | ||
486 | }, | ||
487 | [ C(OP_PREFETCH) ] = { | ||
488 | [ C(RESULT_ACCESS) ] = 0x0, | ||
489 | [ C(RESULT_MISS) ] = 0x0, | ||
490 | }, | ||
491 | }, | ||
492 | [ C(ITLB) ] = { | ||
493 | [ C(OP_READ) ] = { | ||
494 | [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */ | ||
495 | [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */ | ||
496 | }, | ||
497 | [ C(OP_WRITE) ] = { | ||
498 | [ C(RESULT_ACCESS) ] = -1, | ||
499 | [ C(RESULT_MISS) ] = -1, | ||
500 | }, | ||
501 | [ C(OP_PREFETCH) ] = { | ||
502 | [ C(RESULT_ACCESS) ] = -1, | ||
503 | [ C(RESULT_MISS) ] = -1, | ||
504 | }, | ||
505 | }, | ||
506 | [ C(BPU ) ] = { | ||
507 | [ C(OP_READ) ] = { | ||
508 | [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
509 | [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
510 | }, | ||
511 | [ C(OP_WRITE) ] = { | ||
512 | [ C(RESULT_ACCESS) ] = -1, | ||
513 | [ C(RESULT_MISS) ] = -1, | ||
514 | }, | ||
515 | [ C(OP_PREFETCH) ] = { | ||
516 | [ C(RESULT_ACCESS) ] = -1, | ||
517 | [ C(RESULT_MISS) ] = -1, | ||
518 | }, | ||
519 | }, | ||
520 | }; | ||
521 | |||
522 | static __initconst const u64 hsw_hw_cache_extra_regs | ||
523 | [PERF_COUNT_HW_CACHE_MAX] | ||
524 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
525 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
526 | { | ||
527 | [ C(LL ) ] = { | ||
528 | [ C(OP_READ) ] = { | ||
529 | /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */ | ||
530 | [ C(RESULT_ACCESS) ] = 0x2d5, | ||
531 | /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE| | ||
532 | L3_MISS|ANY_SNOOP */ | ||
533 | [ C(RESULT_MISS) ] = 0x3fbc0202d5ull, | ||
534 | }, | ||
535 | [ C(OP_WRITE) ] = { | ||
536 | [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */ | ||
537 | /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */ | ||
538 | [ C(RESULT_MISS) ] = 0x3fbc020122ull, | ||
539 | }, | ||
540 | [ C(OP_PREFETCH) ] = { | ||
541 | [ C(RESULT_ACCESS) ] = 0x0, | ||
542 | [ C(RESULT_MISS) ] = 0x0, | ||
543 | }, | ||
544 | }, | ||
545 | }; | ||
546 | |||
547 | static __initconst const u64 westmere_hw_cache_event_ids | 418 | static __initconst const u64 westmere_hw_cache_event_ids |
548 | [PERF_COUNT_HW_CACHE_MAX] | 419 | [PERF_COUNT_HW_CACHE_MAX] |
549 | [PERF_COUNT_HW_CACHE_OP_MAX] | 420 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -2034,24 +1905,6 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
2034 | return c; | 1905 | return c; |
2035 | } | 1906 | } |
2036 | 1907 | ||
2037 | /* | ||
2038 | * Broadwell: | ||
2039 | * The INST_RETIRED.ALL period always needs to have lowest | ||
2040 | * 6bits cleared (BDM57). It shall not use a period smaller | ||
2041 | * than 100 (BDM11). We combine the two to enforce | ||
2042 | * a min-period of 128. | ||
2043 | */ | ||
2044 | static unsigned bdw_limit_period(struct perf_event *event, unsigned left) | ||
2045 | { | ||
2046 | if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == | ||
2047 | X86_CONFIG(.event=0xc0, .umask=0x01)) { | ||
2048 | if (left < 128) | ||
2049 | left = 128; | ||
2050 | left &= ~0x3fu; | ||
2051 | } | ||
2052 | return left; | ||
2053 | } | ||
2054 | |||
2055 | PMU_FORMAT_ATTR(event, "config:0-7" ); | 1908 | PMU_FORMAT_ATTR(event, "config:0-7" ); |
2056 | PMU_FORMAT_ATTR(umask, "config:8-15" ); | 1909 | PMU_FORMAT_ATTR(umask, "config:8-15" ); |
2057 | PMU_FORMAT_ATTR(edge, "config:18" ); | 1910 | PMU_FORMAT_ATTR(edge, "config:18" ); |
@@ -2692,8 +2545,8 @@ __init int intel_pmu_init(void) | |||
2692 | case 69: /* 22nm Haswell ULT */ | 2545 | case 69: /* 22nm Haswell ULT */ |
2693 | case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ | 2546 | case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ |
2694 | x86_pmu.late_ack = true; | 2547 | x86_pmu.late_ack = true; |
2695 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | 2548 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); |
2696 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | 2549 | memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); |
2697 | 2550 | ||
2698 | intel_pmu_lbr_init_snb(); | 2551 | intel_pmu_lbr_init_snb(); |
2699 | 2552 | ||
@@ -2712,28 +2565,6 @@ __init int intel_pmu_init(void) | |||
2712 | pr_cont("Haswell events, "); | 2565 | pr_cont("Haswell events, "); |
2713 | break; | 2566 | break; |
2714 | 2567 | ||
2715 | case 61: /* 14nm Broadwell Core-M */ | ||
2716 | x86_pmu.late_ack = true; | ||
2717 | memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids)); | ||
2718 | memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); | ||
2719 | |||
2720 | intel_pmu_lbr_init_snb(); | ||
2721 | |||
2722 | x86_pmu.event_constraints = intel_bdw_event_constraints; | ||
2723 | x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; | ||
2724 | x86_pmu.extra_regs = intel_snbep_extra_regs; | ||
2725 | x86_pmu.pebs_aliases = intel_pebs_aliases_snb; | ||
2726 | /* all extra regs are per-cpu when HT is on */ | ||
2727 | x86_pmu.er_flags |= ERF_HAS_RSP_1; | ||
2728 | x86_pmu.er_flags |= ERF_NO_HT_SHARING; | ||
2729 | |||
2730 | x86_pmu.hw_config = hsw_hw_config; | ||
2731 | x86_pmu.get_event_constraints = hsw_get_event_constraints; | ||
2732 | x86_pmu.cpu_events = hsw_events_attrs; | ||
2733 | x86_pmu.limit_period = bdw_limit_period; | ||
2734 | pr_cont("Broadwell events, "); | ||
2735 | break; | ||
2736 | |||
2737 | default: | 2568 | default: |
2738 | switch (x86_pmu.version) { | 2569 | switch (x86_pmu.version) { |
2739 | case 1: | 2570 | case 1: |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index adf138eac85c..f9ed429d6e4f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | |||
@@ -486,14 +486,17 @@ static struct attribute_group snbep_uncore_qpi_format_group = { | |||
486 | .attrs = snbep_uncore_qpi_formats_attr, | 486 | .attrs = snbep_uncore_qpi_formats_attr, |
487 | }; | 487 | }; |
488 | 488 | ||
489 | #define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ | 489 | #define __SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ |
490 | .init_box = snbep_uncore_msr_init_box, \ | ||
491 | .disable_box = snbep_uncore_msr_disable_box, \ | 490 | .disable_box = snbep_uncore_msr_disable_box, \ |
492 | .enable_box = snbep_uncore_msr_enable_box, \ | 491 | .enable_box = snbep_uncore_msr_enable_box, \ |
493 | .disable_event = snbep_uncore_msr_disable_event, \ | 492 | .disable_event = snbep_uncore_msr_disable_event, \ |
494 | .enable_event = snbep_uncore_msr_enable_event, \ | 493 | .enable_event = snbep_uncore_msr_enable_event, \ |
495 | .read_counter = uncore_msr_read_counter | 494 | .read_counter = uncore_msr_read_counter |
496 | 495 | ||
496 | #define SNBEP_UNCORE_MSR_OPS_COMMON_INIT() \ | ||
497 | __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), \ | ||
498 | .init_box = snbep_uncore_msr_init_box \ | ||
499 | |||
497 | static struct intel_uncore_ops snbep_uncore_msr_ops = { | 500 | static struct intel_uncore_ops snbep_uncore_msr_ops = { |
498 | SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), | 501 | SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), |
499 | }; | 502 | }; |
@@ -1919,6 +1922,30 @@ static struct intel_uncore_type hswep_uncore_cbox = { | |||
1919 | .format_group = &hswep_uncore_cbox_format_group, | 1922 | .format_group = &hswep_uncore_cbox_format_group, |
1920 | }; | 1923 | }; |
1921 | 1924 | ||
1925 | /* | ||
1926 | * Write SBOX Initialization register bit by bit to avoid spurious #GPs | ||
1927 | */ | ||
1928 | static void hswep_uncore_sbox_msr_init_box(struct intel_uncore_box *box) | ||
1929 | { | ||
1930 | unsigned msr = uncore_msr_box_ctl(box); | ||
1931 | |||
1932 | if (msr) { | ||
1933 | u64 init = SNBEP_PMON_BOX_CTL_INT; | ||
1934 | u64 flags = 0; | ||
1935 | int i; | ||
1936 | |||
1937 | for_each_set_bit(i, (unsigned long *)&init, 64) { | ||
1938 | flags |= (1ULL << i); | ||
1939 | wrmsrl(msr, flags); | ||
1940 | } | ||
1941 | } | ||
1942 | } | ||
1943 | |||
1944 | static struct intel_uncore_ops hswep_uncore_sbox_msr_ops = { | ||
1945 | __SNBEP_UNCORE_MSR_OPS_COMMON_INIT(), | ||
1946 | .init_box = hswep_uncore_sbox_msr_init_box | ||
1947 | }; | ||
1948 | |||
1922 | static struct attribute *hswep_uncore_sbox_formats_attr[] = { | 1949 | static struct attribute *hswep_uncore_sbox_formats_attr[] = { |
1923 | &format_attr_event.attr, | 1950 | &format_attr_event.attr, |
1924 | &format_attr_umask.attr, | 1951 | &format_attr_umask.attr, |
@@ -1944,7 +1971,7 @@ static struct intel_uncore_type hswep_uncore_sbox = { | |||
1944 | .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, | 1971 | .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, |
1945 | .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, | 1972 | .box_ctl = HSWEP_S0_MSR_PMON_BOX_CTL, |
1946 | .msr_offset = HSWEP_SBOX_MSR_OFFSET, | 1973 | .msr_offset = HSWEP_SBOX_MSR_OFFSET, |
1947 | .ops = &snbep_uncore_msr_ops, | 1974 | .ops = &hswep_uncore_sbox_msr_ops, |
1948 | .format_group = &hswep_uncore_sbox_format_group, | 1975 | .format_group = &hswep_uncore_sbox_format_group, |
1949 | }; | 1976 | }; |
1950 | 1977 | ||
@@ -2025,13 +2052,27 @@ static struct intel_uncore_type hswep_uncore_imc = { | |||
2025 | SNBEP_UNCORE_PCI_COMMON_INIT(), | 2052 | SNBEP_UNCORE_PCI_COMMON_INIT(), |
2026 | }; | 2053 | }; |
2027 | 2054 | ||
2055 | static unsigned hswep_uncore_irp_ctrs[] = {0xa0, 0xa8, 0xb0, 0xb8}; | ||
2056 | |||
2057 | static u64 hswep_uncore_irp_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
2058 | { | ||
2059 | struct pci_dev *pdev = box->pci_dev; | ||
2060 | struct hw_perf_event *hwc = &event->hw; | ||
2061 | u64 count = 0; | ||
2062 | |||
2063 | pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx], (u32 *)&count); | ||
2064 | pci_read_config_dword(pdev, hswep_uncore_irp_ctrs[hwc->idx] + 4, (u32 *)&count + 1); | ||
2065 | |||
2066 | return count; | ||
2067 | } | ||
2068 | |||
2028 | static struct intel_uncore_ops hswep_uncore_irp_ops = { | 2069 | static struct intel_uncore_ops hswep_uncore_irp_ops = { |
2029 | .init_box = snbep_uncore_pci_init_box, | 2070 | .init_box = snbep_uncore_pci_init_box, |
2030 | .disable_box = snbep_uncore_pci_disable_box, | 2071 | .disable_box = snbep_uncore_pci_disable_box, |
2031 | .enable_box = snbep_uncore_pci_enable_box, | 2072 | .enable_box = snbep_uncore_pci_enable_box, |
2032 | .disable_event = ivbep_uncore_irp_disable_event, | 2073 | .disable_event = ivbep_uncore_irp_disable_event, |
2033 | .enable_event = ivbep_uncore_irp_enable_event, | 2074 | .enable_event = ivbep_uncore_irp_enable_event, |
2034 | .read_counter = ivbep_uncore_irp_read_counter, | 2075 | .read_counter = hswep_uncore_irp_read_counter, |
2035 | }; | 2076 | }; |
2036 | 2077 | ||
2037 | static struct intel_uncore_type hswep_uncore_irp = { | 2078 | static struct intel_uncore_type hswep_uncore_irp = { |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1abcb50b48ae..ff86f19b5758 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { | |||
24 | [ DEBUG_STACK-1 ] = "#DB", | 24 | [ DEBUG_STACK-1 ] = "#DB", |
25 | [ NMI_STACK-1 ] = "NMI", | 25 | [ NMI_STACK-1 ] = "NMI", |
26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | 26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
27 | [ STACKFAULT_STACK-1 ] = "#SS", | ||
28 | [ MCE_STACK-1 ] = "#MC", | 27 | [ MCE_STACK-1 ] = "#MC", |
29 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
30 | [ N_EXCEPTION_STACKS ... | 29 | [ N_EXCEPTION_STACKS ... |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b553ed89e5f5..344b63f18d14 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -447,15 +447,14 @@ sysenter_exit: | |||
447 | sysenter_audit: | 447 | sysenter_audit: |
448 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 448 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) |
449 | jnz syscall_trace_entry | 449 | jnz syscall_trace_entry |
450 | addl $4,%esp | 450 | /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */ |
451 | CFI_ADJUST_CFA_OFFSET -4 | 451 | movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */ |
452 | movl %esi,4(%esp) /* 5th arg: 4th syscall arg */ | 452 | /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */ |
453 | movl %edx,(%esp) /* 4th arg: 3rd syscall arg */ | 453 | pushl_cfi PT_ESI(%esp) /* a3: 5th arg */ |
454 | /* %ecx already in %ecx 3rd arg: 2nd syscall arg */ | 454 | pushl_cfi PT_EDX+4(%esp) /* a2: 4th arg */ |
455 | movl %ebx,%edx /* 2nd arg: 1st syscall arg */ | ||
456 | /* %eax already in %eax 1st arg: syscall number */ | ||
457 | call __audit_syscall_entry | 455 | call __audit_syscall_entry |
458 | pushl_cfi %ebx | 456 | popl_cfi %ecx /* get that remapped edx off the stack */ |
457 | popl_cfi %ecx /* get that remapped esi off the stack */ | ||
459 | movl PT_EAX(%esp),%eax /* reload syscall number */ | 458 | movl PT_EAX(%esp),%eax /* reload syscall number */ |
460 | jmp sysenter_do_call | 459 | jmp sysenter_do_call |
461 | 460 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index df088bb03fb3..c0226ab54106 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -828,9 +828,15 @@ ENTRY(native_iret) | |||
828 | jnz native_irq_return_ldt | 828 | jnz native_irq_return_ldt |
829 | #endif | 829 | #endif |
830 | 830 | ||
831 | .global native_irq_return_iret | ||
831 | native_irq_return_iret: | 832 | native_irq_return_iret: |
833 | /* | ||
834 | * This may fault. Non-paranoid faults on return to userspace are | ||
835 | * handled by fixup_bad_iret. These include #SS, #GP, and #NP. | ||
836 | * Double-faults due to espfix64 are handled in do_double_fault. | ||
837 | * Other faults here are fatal. | ||
838 | */ | ||
832 | iretq | 839 | iretq |
833 | _ASM_EXTABLE(native_irq_return_iret, bad_iret) | ||
834 | 840 | ||
835 | #ifdef CONFIG_X86_ESPFIX64 | 841 | #ifdef CONFIG_X86_ESPFIX64 |
836 | native_irq_return_ldt: | 842 | native_irq_return_ldt: |
@@ -858,25 +864,6 @@ native_irq_return_ldt: | |||
858 | jmp native_irq_return_iret | 864 | jmp native_irq_return_iret |
859 | #endif | 865 | #endif |
860 | 866 | ||
861 | .section .fixup,"ax" | ||
862 | bad_iret: | ||
863 | /* | ||
864 | * The iret traps when the %cs or %ss being restored is bogus. | ||
865 | * We've lost the original trap vector and error code. | ||
866 | * #GPF is the most likely one to get for an invalid selector. | ||
867 | * So pretend we completed the iret and took the #GPF in user mode. | ||
868 | * | ||
869 | * We are now running with the kernel GS after exception recovery. | ||
870 | * But error_entry expects us to have user GS to match the user %cs, | ||
871 | * so swap back. | ||
872 | */ | ||
873 | pushq $0 | ||
874 | |||
875 | SWAPGS | ||
876 | jmp general_protection | ||
877 | |||
878 | .previous | ||
879 | |||
880 | /* edi: workmask, edx: work */ | 867 | /* edi: workmask, edx: work */ |
881 | retint_careful: | 868 | retint_careful: |
882 | CFI_RESTORE_STATE | 869 | CFI_RESTORE_STATE |
@@ -922,37 +909,6 @@ ENTRY(retint_kernel) | |||
922 | CFI_ENDPROC | 909 | CFI_ENDPROC |
923 | END(common_interrupt) | 910 | END(common_interrupt) |
924 | 911 | ||
925 | /* | ||
926 | * If IRET takes a fault on the espfix stack, then we | ||
927 | * end up promoting it to a doublefault. In that case, | ||
928 | * modify the stack to make it look like we just entered | ||
929 | * the #GP handler from user space, similar to bad_iret. | ||
930 | */ | ||
931 | #ifdef CONFIG_X86_ESPFIX64 | ||
932 | ALIGN | ||
933 | __do_double_fault: | ||
934 | XCPT_FRAME 1 RDI+8 | ||
935 | movq RSP(%rdi),%rax /* Trap on the espfix stack? */ | ||
936 | sarq $PGDIR_SHIFT,%rax | ||
937 | cmpl $ESPFIX_PGD_ENTRY,%eax | ||
938 | jne do_double_fault /* No, just deliver the fault */ | ||
939 | cmpl $__KERNEL_CS,CS(%rdi) | ||
940 | jne do_double_fault | ||
941 | movq RIP(%rdi),%rax | ||
942 | cmpq $native_irq_return_iret,%rax | ||
943 | jne do_double_fault /* This shouldn't happen... */ | ||
944 | movq PER_CPU_VAR(kernel_stack),%rax | ||
945 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ | ||
946 | movq %rax,RSP(%rdi) | ||
947 | movq $0,(%rax) /* Missing (lost) #GP error code */ | ||
948 | movq $general_protection,RIP(%rdi) | ||
949 | retq | ||
950 | CFI_ENDPROC | ||
951 | END(__do_double_fault) | ||
952 | #else | ||
953 | # define __do_double_fault do_double_fault | ||
954 | #endif | ||
955 | |||
956 | /* | 912 | /* |
957 | * APIC interrupts. | 913 | * APIC interrupts. |
958 | */ | 914 | */ |
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0 | |||
1124 | idtentry bounds do_bounds has_error_code=0 | 1080 | idtentry bounds do_bounds has_error_code=0 |
1125 | idtentry invalid_op do_invalid_op has_error_code=0 | 1081 | idtentry invalid_op do_invalid_op has_error_code=0 |
1126 | idtentry device_not_available do_device_not_available has_error_code=0 | 1082 | idtentry device_not_available do_device_not_available has_error_code=0 |
1127 | idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 | 1083 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 |
1128 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 | 1084 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
1129 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 | 1085 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
1130 | idtentry segment_not_present do_segment_not_present has_error_code=1 | 1086 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ | |||
1289 | 1245 | ||
1290 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1246 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1291 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1247 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
1292 | idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 | 1248 | idtentry stack_segment do_stack_segment has_error_code=1 |
1293 | #ifdef CONFIG_XEN | 1249 | #ifdef CONFIG_XEN |
1294 | idtentry xen_debug do_debug has_error_code=0 | 1250 | idtentry xen_debug do_debug has_error_code=0 |
1295 | idtentry xen_int3 do_int3 has_error_code=0 | 1251 | idtentry xen_int3 do_int3 has_error_code=0 |
@@ -1399,17 +1355,16 @@ error_sti: | |||
1399 | 1355 | ||
1400 | /* | 1356 | /* |
1401 | * There are two places in the kernel that can potentially fault with | 1357 | * There are two places in the kernel that can potentially fault with |
1402 | * usergs. Handle them here. The exception handlers after iret run with | 1358 | * usergs. Handle them here. B stepping K8s sometimes report a |
1403 | * kernel gs again, so don't set the user space flag. B stepping K8s | 1359 | * truncated RIP for IRET exceptions returning to compat mode. Check |
1404 | * sometimes report an truncated RIP for IRET exceptions returning to | 1360 | * for these here too. |
1405 | * compat mode. Check for these here too. | ||
1406 | */ | 1361 | */ |
1407 | error_kernelspace: | 1362 | error_kernelspace: |
1408 | CFI_REL_OFFSET rcx, RCX+8 | 1363 | CFI_REL_OFFSET rcx, RCX+8 |
1409 | incl %ebx | 1364 | incl %ebx |
1410 | leaq native_irq_return_iret(%rip),%rcx | 1365 | leaq native_irq_return_iret(%rip),%rcx |
1411 | cmpq %rcx,RIP+8(%rsp) | 1366 | cmpq %rcx,RIP+8(%rsp) |
1412 | je error_swapgs | 1367 | je error_bad_iret |
1413 | movl %ecx,%eax /* zero extend */ | 1368 | movl %ecx,%eax /* zero extend */ |
1414 | cmpq %rax,RIP+8(%rsp) | 1369 | cmpq %rax,RIP+8(%rsp) |
1415 | je bstep_iret | 1370 | je bstep_iret |
@@ -1420,7 +1375,15 @@ error_kernelspace: | |||
1420 | bstep_iret: | 1375 | bstep_iret: |
1421 | /* Fix truncated RIP */ | 1376 | /* Fix truncated RIP */ |
1422 | movq %rcx,RIP+8(%rsp) | 1377 | movq %rcx,RIP+8(%rsp) |
1423 | jmp error_swapgs | 1378 | /* fall through */ |
1379 | |||
1380 | error_bad_iret: | ||
1381 | SWAPGS | ||
1382 | mov %rsp,%rdi | ||
1383 | call fixup_bad_iret | ||
1384 | mov %rax,%rsp | ||
1385 | decl %ebx /* Return to usergs */ | ||
1386 | jmp error_sti | ||
1424 | CFI_ENDPROC | 1387 | CFI_ENDPROC |
1425 | END(error_entry) | 1388 | END(error_entry) |
1426 | 1389 | ||
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 8af817105e29..e7cc5370cd2f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -111,8 +111,7 @@ static void make_8259A_irq(unsigned int irq) | |||
111 | { | 111 | { |
112 | disable_irq_nosync(irq); | 112 | disable_irq_nosync(irq); |
113 | io_apic_irqs &= ~(1<<irq); | 113 | io_apic_irqs &= ~(1<<irq); |
114 | irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, | 114 | irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq); |
115 | i8259A_chip.name); | ||
116 | enable_irq(irq); | 115 | enable_irq(irq); |
117 | } | 116 | } |
118 | 117 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 44f1ed42fdf2..4de73ee78361 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -70,7 +70,6 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
70 | void __init init_ISA_irqs(void) | 70 | void __init init_ISA_irqs(void) |
71 | { | 71 | { |
72 | struct irq_chip *chip = legacy_pic->chip; | 72 | struct irq_chip *chip = legacy_pic->chip; |
73 | const char *name = chip->name; | ||
74 | int i; | 73 | int i; |
75 | 74 | ||
76 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 75 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
@@ -79,7 +78,7 @@ void __init init_ISA_irqs(void) | |||
79 | legacy_pic->init(0); | 78 | legacy_pic->init(0); |
80 | 79 | ||
81 | for (i = 0; i < nr_legacy_irqs(); i++) | 80 | for (i = 0; i < nr_legacy_irqs(); i++) |
82 | irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); | 81 | irq_set_chip_and_handler(i, chip, handle_level_irq); |
83 | } | 82 | } |
84 | 83 | ||
85 | void __init init_IRQ(void) | 84 | void __init init_IRQ(void) |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 749b0e423419..e510618b2e91 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1484,7 +1484,7 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch) | |||
1484 | */ | 1484 | */ |
1485 | if (work & _TIF_NOHZ) { | 1485 | if (work & _TIF_NOHZ) { |
1486 | user_exit(); | 1486 | user_exit(); |
1487 | work &= ~TIF_NOHZ; | 1487 | work &= ~_TIF_NOHZ; |
1488 | } | 1488 | } |
1489 | 1489 | ||
1490 | #ifdef CONFIG_SECCOMP | 1490 | #ifdef CONFIG_SECCOMP |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 235cfd39e0d7..ab08aa2276fb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -1128,7 +1128,6 @@ void __init setup_arch(char **cmdline_p) | |||
1128 | setup_real_mode(); | 1128 | setup_real_mode(); |
1129 | 1129 | ||
1130 | memblock_set_current_limit(get_max_mapped()); | 1130 | memblock_set_current_limit(get_max_mapped()); |
1131 | dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); | ||
1132 | 1131 | ||
1133 | /* | 1132 | /* |
1134 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 1133 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
@@ -1159,6 +1158,7 @@ void __init setup_arch(char **cmdline_p) | |||
1159 | early_acpi_boot_init(); | 1158 | early_acpi_boot_init(); |
1160 | 1159 | ||
1161 | initmem_init(); | 1160 | initmem_init(); |
1161 | dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT); | ||
1162 | 1162 | ||
1163 | /* | 1163 | /* |
1164 | * Reserve memory for crash kernel after SRAT is parsed so that it | 1164 | * Reserve memory for crash kernel after SRAT is parsed so that it |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2d5200e56357..668d8f2a8781 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -102,8 +102,6 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); | |||
102 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 102 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
103 | EXPORT_PER_CPU_SYMBOL(cpu_info); | 103 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
104 | 104 | ||
105 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
106 | |||
107 | atomic_t init_deasserted; | 105 | atomic_t init_deasserted; |
108 | 106 | ||
109 | /* | 107 | /* |
@@ -1305,10 +1303,14 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1305 | numa_remove_cpu(cpu); | 1303 | numa_remove_cpu(cpu); |
1306 | } | 1304 | } |
1307 | 1305 | ||
1306 | static DEFINE_PER_CPU(struct completion, die_complete); | ||
1307 | |||
1308 | void cpu_disable_common(void) | 1308 | void cpu_disable_common(void) |
1309 | { | 1309 | { |
1310 | int cpu = smp_processor_id(); | 1310 | int cpu = smp_processor_id(); |
1311 | 1311 | ||
1312 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1313 | |||
1312 | remove_siblinginfo(cpu); | 1314 | remove_siblinginfo(cpu); |
1313 | 1315 | ||
1314 | /* It's now safe to remove this processor from the online map */ | 1316 | /* It's now safe to remove this processor from the online map */ |
@@ -1327,16 +1329,21 @@ int native_cpu_disable(void) | |||
1327 | return ret; | 1329 | return ret; |
1328 | 1330 | ||
1329 | clear_local_APIC(); | 1331 | clear_local_APIC(); |
1330 | init_completion(&per_cpu(die_complete, smp_processor_id())); | ||
1331 | cpu_disable_common(); | 1332 | cpu_disable_common(); |
1332 | 1333 | ||
1333 | return 0; | 1334 | return 0; |
1334 | } | 1335 | } |
1335 | 1336 | ||
1337 | void cpu_die_common(unsigned int cpu) | ||
1338 | { | ||
1339 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | ||
1340 | } | ||
1341 | |||
1336 | void native_cpu_die(unsigned int cpu) | 1342 | void native_cpu_die(unsigned int cpu) |
1337 | { | 1343 | { |
1338 | /* We don't do anything here: idle task is faking death itself. */ | 1344 | /* We don't do anything here: idle task is faking death itself. */ |
1339 | wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ); | 1345 | |
1346 | cpu_die_common(cpu); | ||
1340 | 1347 | ||
1341 | /* They ack this in play_dead() by setting CPU_DEAD */ | 1348 | /* They ack this in play_dead() by setting CPU_DEAD */ |
1342 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1349 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc1..de801f22128a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) | |||
233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) | 233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) | 234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | 235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) |
236 | #ifdef CONFIG_X86_32 | ||
237 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 236 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
238 | #endif | ||
239 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) | 237 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
240 | 238 | ||
241 | #ifdef CONFIG_X86_64 | 239 | #ifdef CONFIG_X86_64 |
242 | /* Runs on IST stack */ | 240 | /* Runs on IST stack */ |
243 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
244 | { | ||
245 | enum ctx_state prev_state; | ||
246 | |||
247 | prev_state = exception_enter(); | ||
248 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
249 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { | ||
250 | preempt_conditional_sti(regs); | ||
251 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | ||
252 | preempt_conditional_cli(regs); | ||
253 | } | ||
254 | exception_exit(prev_state); | ||
255 | } | ||
256 | |||
257 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 241 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
258 | { | 242 | { |
259 | static const char str[] = "double fault"; | 243 | static const char str[] = "double fault"; |
260 | struct task_struct *tsk = current; | 244 | struct task_struct *tsk = current; |
261 | 245 | ||
246 | #ifdef CONFIG_X86_ESPFIX64 | ||
247 | extern unsigned char native_irq_return_iret[]; | ||
248 | |||
249 | /* | ||
250 | * If IRET takes a non-IST fault on the espfix64 stack, then we | ||
251 | * end up promoting it to a doublefault. In that case, modify | ||
252 | * the stack to make it look like we just entered the #GP | ||
253 | * handler from user space, similar to bad_iret. | ||
254 | */ | ||
255 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && | ||
256 | regs->cs == __KERNEL_CS && | ||
257 | regs->ip == (unsigned long)native_irq_return_iret) | ||
258 | { | ||
259 | struct pt_regs *normal_regs = task_pt_regs(current); | ||
260 | |||
261 | /* Fake a #GP(0) from userspace. */ | ||
262 | memmove(&normal_regs->ip, (void *)regs->sp, 5*8); | ||
263 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | ||
264 | regs->ip = (unsigned long)general_protection; | ||
265 | regs->sp = (unsigned long)&normal_regs->orig_ax; | ||
266 | return; | ||
267 | } | ||
268 | #endif | ||
269 | |||
262 | exception_enter(); | 270 | exception_enter(); |
263 | /* Return not checked because double check cannot be ignored */ | 271 | /* Return not checked because double check cannot be ignored */ |
264 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 272 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
@@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
399 | return regs; | 407 | return regs; |
400 | } | 408 | } |
401 | NOKPROBE_SYMBOL(sync_regs); | 409 | NOKPROBE_SYMBOL(sync_regs); |
410 | |||
411 | struct bad_iret_stack { | ||
412 | void *error_entry_ret; | ||
413 | struct pt_regs regs; | ||
414 | }; | ||
415 | |||
416 | asmlinkage __visible | ||
417 | struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | ||
418 | { | ||
419 | /* | ||
420 | * This is called from entry_64.S early in handling a fault | ||
421 | * caused by a bad iret to user mode. To handle the fault | ||
422 | * correctly, we want move our stack frame to task_pt_regs | ||
423 | * and we want to pretend that the exception came from the | ||
424 | * iret target. | ||
425 | */ | ||
426 | struct bad_iret_stack *new_stack = | ||
427 | container_of(task_pt_regs(current), | ||
428 | struct bad_iret_stack, regs); | ||
429 | |||
430 | /* Copy the IRET target to the new stack. */ | ||
431 | memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); | ||
432 | |||
433 | /* Copy the remainder of the stack from the current stack. */ | ||
434 | memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); | ||
435 | |||
436 | BUG_ON(!user_mode_vm(&new_stack->regs)); | ||
437 | return new_stack; | ||
438 | } | ||
402 | #endif | 439 | #endif |
403 | 440 | ||
404 | /* | 441 | /* |
@@ -778,7 +815,7 @@ void __init trap_init(void) | |||
778 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); | 815 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); |
779 | set_intr_gate(X86_TRAP_TS, invalid_TSS); | 816 | set_intr_gate(X86_TRAP_TS, invalid_TSS); |
780 | set_intr_gate(X86_TRAP_NP, segment_not_present); | 817 | set_intr_gate(X86_TRAP_NP, segment_not_present); |
781 | set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); | 818 | set_intr_gate(X86_TRAP_SS, stack_segment); |
782 | set_intr_gate(X86_TRAP_GP, general_protection); | 819 | set_intr_gate(X86_TRAP_GP, general_protection); |
783 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); | 820 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); |
784 | set_intr_gate(X86_TRAP_MF, coprocessor_error); | 821 | set_intr_gate(X86_TRAP_MF, coprocessor_error); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index b6025f9e36c6..b7e50bba3bbb 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -1166,14 +1166,17 @@ void __init tsc_init(void) | |||
1166 | 1166 | ||
1167 | x86_init.timers.tsc_pre_init(); | 1167 | x86_init.timers.tsc_pre_init(); |
1168 | 1168 | ||
1169 | if (!cpu_has_tsc) | 1169 | if (!cpu_has_tsc) { |
1170 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
1170 | return; | 1171 | return; |
1172 | } | ||
1171 | 1173 | ||
1172 | tsc_khz = x86_platform.calibrate_tsc(); | 1174 | tsc_khz = x86_platform.calibrate_tsc(); |
1173 | cpu_khz = tsc_khz; | 1175 | cpu_khz = tsc_khz; |
1174 | 1176 | ||
1175 | if (!tsc_khz) { | 1177 | if (!tsc_khz) { |
1176 | mark_tsc_unstable("could not calculate TSC khz"); | 1178 | mark_tsc_unstable("could not calculate TSC khz"); |
1179 | setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); | ||
1177 | return; | 1180 | return; |
1178 | } | 1181 | } |
1179 | 1182 | ||
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 749f9fa38254..9f8a2faf5040 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -574,12 +574,14 @@ static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, | |||
574 | case 4: | 574 | case 4: |
575 | ctxt->_eip = (u32)dst; | 575 | ctxt->_eip = (u32)dst; |
576 | break; | 576 | break; |
577 | #ifdef CONFIG_X86_64 | ||
577 | case 8: | 578 | case 8: |
578 | if ((cs_l && is_noncanonical_address(dst)) || | 579 | if ((cs_l && is_noncanonical_address(dst)) || |
579 | (!cs_l && (dst & ~(u32)-1))) | 580 | (!cs_l && (dst >> 32) != 0)) |
580 | return emulate_gp(ctxt, 0); | 581 | return emulate_gp(ctxt, 0); |
581 | ctxt->_eip = dst; | 582 | ctxt->_eip = dst; |
582 | break; | 583 | break; |
584 | #endif | ||
583 | default: | 585 | default: |
584 | WARN(1, "unsupported eip assignment size\n"); | 586 | WARN(1, "unsupported eip assignment size\n"); |
585 | } | 587 | } |
@@ -641,7 +643,8 @@ static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size) | |||
641 | 643 | ||
642 | static int __linearize(struct x86_emulate_ctxt *ctxt, | 644 | static int __linearize(struct x86_emulate_ctxt *ctxt, |
643 | struct segmented_address addr, | 645 | struct segmented_address addr, |
644 | unsigned size, bool write, bool fetch, | 646 | unsigned *max_size, unsigned size, |
647 | bool write, bool fetch, | ||
645 | ulong *linear) | 648 | ulong *linear) |
646 | { | 649 | { |
647 | struct desc_struct desc; | 650 | struct desc_struct desc; |
@@ -652,10 +655,15 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
652 | unsigned cpl; | 655 | unsigned cpl; |
653 | 656 | ||
654 | la = seg_base(ctxt, addr.seg) + addr.ea; | 657 | la = seg_base(ctxt, addr.seg) + addr.ea; |
658 | *max_size = 0; | ||
655 | switch (ctxt->mode) { | 659 | switch (ctxt->mode) { |
656 | case X86EMUL_MODE_PROT64: | 660 | case X86EMUL_MODE_PROT64: |
657 | if (((signed long)la << 16) >> 16 != la) | 661 | if (((signed long)la << 16) >> 16 != la) |
658 | return emulate_gp(ctxt, 0); | 662 | return emulate_gp(ctxt, 0); |
663 | |||
664 | *max_size = min_t(u64, ~0u, (1ull << 48) - la); | ||
665 | if (size > *max_size) | ||
666 | goto bad; | ||
659 | break; | 667 | break; |
660 | default: | 668 | default: |
661 | usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, | 669 | usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, |
@@ -673,20 +681,25 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
673 | if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && | 681 | if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && |
674 | (ctxt->d & NoBigReal)) { | 682 | (ctxt->d & NoBigReal)) { |
675 | /* la is between zero and 0xffff */ | 683 | /* la is between zero and 0xffff */ |
676 | if (la > 0xffff || (u32)(la + size - 1) > 0xffff) | 684 | if (la > 0xffff) |
677 | goto bad; | 685 | goto bad; |
686 | *max_size = 0x10000 - la; | ||
678 | } else if ((desc.type & 8) || !(desc.type & 4)) { | 687 | } else if ((desc.type & 8) || !(desc.type & 4)) { |
679 | /* expand-up segment */ | 688 | /* expand-up segment */ |
680 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 689 | if (addr.ea > lim) |
681 | goto bad; | 690 | goto bad; |
691 | *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); | ||
682 | } else { | 692 | } else { |
683 | /* expand-down segment */ | 693 | /* expand-down segment */ |
684 | if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) | 694 | if (addr.ea <= lim) |
685 | goto bad; | 695 | goto bad; |
686 | lim = desc.d ? 0xffffffff : 0xffff; | 696 | lim = desc.d ? 0xffffffff : 0xffff; |
687 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 697 | if (addr.ea > lim) |
688 | goto bad; | 698 | goto bad; |
699 | *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); | ||
689 | } | 700 | } |
701 | if (size > *max_size) | ||
702 | goto bad; | ||
690 | cpl = ctxt->ops->cpl(ctxt); | 703 | cpl = ctxt->ops->cpl(ctxt); |
691 | if (!(desc.type & 8)) { | 704 | if (!(desc.type & 8)) { |
692 | /* data segment */ | 705 | /* data segment */ |
@@ -711,9 +724,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
711 | return X86EMUL_CONTINUE; | 724 | return X86EMUL_CONTINUE; |
712 | bad: | 725 | bad: |
713 | if (addr.seg == VCPU_SREG_SS) | 726 | if (addr.seg == VCPU_SREG_SS) |
714 | return emulate_ss(ctxt, sel); | 727 | return emulate_ss(ctxt, 0); |
715 | else | 728 | else |
716 | return emulate_gp(ctxt, sel); | 729 | return emulate_gp(ctxt, 0); |
717 | } | 730 | } |
718 | 731 | ||
719 | static int linearize(struct x86_emulate_ctxt *ctxt, | 732 | static int linearize(struct x86_emulate_ctxt *ctxt, |
@@ -721,7 +734,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt, | |||
721 | unsigned size, bool write, | 734 | unsigned size, bool write, |
722 | ulong *linear) | 735 | ulong *linear) |
723 | { | 736 | { |
724 | return __linearize(ctxt, addr, size, write, false, linear); | 737 | unsigned max_size; |
738 | return __linearize(ctxt, addr, &max_size, size, write, false, linear); | ||
725 | } | 739 | } |
726 | 740 | ||
727 | 741 | ||
@@ -746,17 +760,27 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | |||
746 | static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) | 760 | static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) |
747 | { | 761 | { |
748 | int rc; | 762 | int rc; |
749 | unsigned size; | 763 | unsigned size, max_size; |
750 | unsigned long linear; | 764 | unsigned long linear; |
751 | int cur_size = ctxt->fetch.end - ctxt->fetch.data; | 765 | int cur_size = ctxt->fetch.end - ctxt->fetch.data; |
752 | struct segmented_address addr = { .seg = VCPU_SREG_CS, | 766 | struct segmented_address addr = { .seg = VCPU_SREG_CS, |
753 | .ea = ctxt->eip + cur_size }; | 767 | .ea = ctxt->eip + cur_size }; |
754 | 768 | ||
755 | size = 15UL ^ cur_size; | 769 | /* |
756 | rc = __linearize(ctxt, addr, size, false, true, &linear); | 770 | * We do not know exactly how many bytes will be needed, and |
771 | * __linearize is expensive, so fetch as much as possible. We | ||
772 | * just have to avoid going beyond the 15 byte limit, the end | ||
773 | * of the segment, or the end of the page. | ||
774 | * | ||
775 | * __linearize is called with size 0 so that it does not do any | ||
776 | * boundary check itself. Instead, we use max_size to check | ||
777 | * against op_size. | ||
778 | */ | ||
779 | rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear); | ||
757 | if (unlikely(rc != X86EMUL_CONTINUE)) | 780 | if (unlikely(rc != X86EMUL_CONTINUE)) |
758 | return rc; | 781 | return rc; |
759 | 782 | ||
783 | size = min_t(unsigned, 15UL ^ cur_size, max_size); | ||
760 | size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); | 784 | size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); |
761 | 785 | ||
762 | /* | 786 | /* |
@@ -766,7 +790,8 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) | |||
766 | * still, we must have hit the 15-byte boundary. | 790 | * still, we must have hit the 15-byte boundary. |
767 | */ | 791 | */ |
768 | if (unlikely(size < op_size)) | 792 | if (unlikely(size < op_size)) |
769 | return X86EMUL_UNHANDLEABLE; | 793 | return emulate_gp(ctxt, 0); |
794 | |||
770 | rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, | 795 | rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, |
771 | size, &ctxt->exception); | 796 | size, &ctxt->exception); |
772 | if (unlikely(rc != X86EMUL_CONTINUE)) | 797 | if (unlikely(rc != X86EMUL_CONTINUE)) |
@@ -2012,7 +2037,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | |||
2012 | 2037 | ||
2013 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); | 2038 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
2014 | if (rc != X86EMUL_CONTINUE) { | 2039 | if (rc != X86EMUL_CONTINUE) { |
2015 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); | 2040 | WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); |
2016 | /* assigning eip failed; restore the old cs */ | 2041 | /* assigning eip failed; restore the old cs */ |
2017 | ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); | 2042 | ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); |
2018 | return rc; | 2043 | return rc; |
@@ -2109,7 +2134,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
2109 | return rc; | 2134 | return rc; |
2110 | rc = assign_eip_far(ctxt, eip, new_desc.l); | 2135 | rc = assign_eip_far(ctxt, eip, new_desc.l); |
2111 | if (rc != X86EMUL_CONTINUE) { | 2136 | if (rc != X86EMUL_CONTINUE) { |
2112 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); | 2137 | WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64); |
2113 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | 2138 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); |
2114 | } | 2139 | } |
2115 | return rc; | 2140 | return rc; |
@@ -4262,6 +4287,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4262 | fetch_register_operand(op); | 4287 | fetch_register_operand(op); |
4263 | break; | 4288 | break; |
4264 | case OpCL: | 4289 | case OpCL: |
4290 | op->type = OP_IMM; | ||
4265 | op->bytes = 1; | 4291 | op->bytes = 1; |
4266 | op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; | 4292 | op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; |
4267 | break; | 4293 | break; |
@@ -4269,6 +4295,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4269 | rc = decode_imm(ctxt, op, 1, true); | 4295 | rc = decode_imm(ctxt, op, 1, true); |
4270 | break; | 4296 | break; |
4271 | case OpOne: | 4297 | case OpOne: |
4298 | op->type = OP_IMM; | ||
4272 | op->bytes = 1; | 4299 | op->bytes = 1; |
4273 | op->val = 1; | 4300 | op->val = 1; |
4274 | break; | 4301 | break; |
@@ -4327,21 +4354,27 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4327 | ctxt->memop.bytes = ctxt->op_bytes + 2; | 4354 | ctxt->memop.bytes = ctxt->op_bytes + 2; |
4328 | goto mem_common; | 4355 | goto mem_common; |
4329 | case OpES: | 4356 | case OpES: |
4357 | op->type = OP_IMM; | ||
4330 | op->val = VCPU_SREG_ES; | 4358 | op->val = VCPU_SREG_ES; |
4331 | break; | 4359 | break; |
4332 | case OpCS: | 4360 | case OpCS: |
4361 | op->type = OP_IMM; | ||
4333 | op->val = VCPU_SREG_CS; | 4362 | op->val = VCPU_SREG_CS; |
4334 | break; | 4363 | break; |
4335 | case OpSS: | 4364 | case OpSS: |
4365 | op->type = OP_IMM; | ||
4336 | op->val = VCPU_SREG_SS; | 4366 | op->val = VCPU_SREG_SS; |
4337 | break; | 4367 | break; |
4338 | case OpDS: | 4368 | case OpDS: |
4369 | op->type = OP_IMM; | ||
4339 | op->val = VCPU_SREG_DS; | 4370 | op->val = VCPU_SREG_DS; |
4340 | break; | 4371 | break; |
4341 | case OpFS: | 4372 | case OpFS: |
4373 | op->type = OP_IMM; | ||
4342 | op->val = VCPU_SREG_FS; | 4374 | op->val = VCPU_SREG_FS; |
4343 | break; | 4375 | break; |
4344 | case OpGS: | 4376 | case OpGS: |
4377 | op->type = OP_IMM; | ||
4345 | op->val = VCPU_SREG_GS; | 4378 | op->val = VCPU_SREG_GS; |
4346 | break; | 4379 | break; |
4347 | case OpImplicit: | 4380 | case OpImplicit: |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a8b76c4c95e2..3e556c68351b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -4579,7 +4579,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4579 | vmcs_write32(TPR_THRESHOLD, 0); | 4579 | vmcs_write32(TPR_THRESHOLD, 0); |
4580 | } | 4580 | } |
4581 | 4581 | ||
4582 | kvm_vcpu_reload_apic_access_page(vcpu); | 4582 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); |
4583 | 4583 | ||
4584 | if (vmx_vm_has_apicv(vcpu->kvm)) | 4584 | if (vmx_vm_has_apicv(vcpu->kvm)) |
4585 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); | 4585 | memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); |
@@ -6426,6 +6426,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6426 | const unsigned long *fields = shadow_read_write_fields; | 6426 | const unsigned long *fields = shadow_read_write_fields; |
6427 | const int num_fields = max_shadow_read_write_fields; | 6427 | const int num_fields = max_shadow_read_write_fields; |
6428 | 6428 | ||
6429 | preempt_disable(); | ||
6430 | |||
6429 | vmcs_load(shadow_vmcs); | 6431 | vmcs_load(shadow_vmcs); |
6430 | 6432 | ||
6431 | for (i = 0; i < num_fields; i++) { | 6433 | for (i = 0; i < num_fields; i++) { |
@@ -6449,6 +6451,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) | |||
6449 | 6451 | ||
6450 | vmcs_clear(shadow_vmcs); | 6452 | vmcs_clear(shadow_vmcs); |
6451 | vmcs_load(vmx->loaded_vmcs->vmcs); | 6453 | vmcs_load(vmx->loaded_vmcs->vmcs); |
6454 | |||
6455 | preempt_enable(); | ||
6452 | } | 6456 | } |
6453 | 6457 | ||
6454 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | 6458 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) |
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index 7609e0e421ec..1318f75d56e4 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c | |||
@@ -41,9 +41,8 @@ csum_partial_copy_from_user(const void __user *src, void *dst, | |||
41 | while (((unsigned long)src & 6) && len >= 2) { | 41 | while (((unsigned long)src & 6) && len >= 2) { |
42 | __u16 val16; | 42 | __u16 val16; |
43 | 43 | ||
44 | *errp = __get_user(val16, (const __u16 __user *)src); | 44 | if (__get_user(val16, (const __u16 __user *)src)) |
45 | if (*errp) | 45 | goto out_err; |
46 | return isum; | ||
47 | 46 | ||
48 | *(__u16 *)dst = val16; | 47 | *(__u16 *)dst = val16; |
49 | isum = (__force __wsum)add32_with_carry( | 48 | isum = (__force __wsum)add32_with_carry( |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4cb8763868fc..4e5dfec750fc 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1123,7 +1123,7 @@ void mark_rodata_ro(void) | |||
1123 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; | 1123 | unsigned long end = (unsigned long) &__end_rodata_hpage_align; |
1124 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); | 1124 | unsigned long text_end = PFN_ALIGN(&__stop___ex_table); |
1125 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); | 1125 | unsigned long rodata_end = PFN_ALIGN(&__end_rodata); |
1126 | unsigned long all_end = PFN_ALIGN(&_end); | 1126 | unsigned long all_end; |
1127 | 1127 | ||
1128 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 1128 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
1129 | (end - start) >> 10); | 1129 | (end - start) >> 10); |
@@ -1134,7 +1134,16 @@ void mark_rodata_ro(void) | |||
1134 | /* | 1134 | /* |
1135 | * The rodata/data/bss/brk section (but not the kernel text!) | 1135 | * The rodata/data/bss/brk section (but not the kernel text!) |
1136 | * should also be not-executable. | 1136 | * should also be not-executable. |
1137 | * | ||
1138 | * We align all_end to PMD_SIZE because the existing mapping | ||
1139 | * is a full PMD. If we would align _brk_end to PAGE_SIZE we | ||
1140 | * split the PMD and the reminder between _brk_end and the end | ||
1141 | * of the PMD will remain mapped executable. | ||
1142 | * | ||
1143 | * Any PMD which was setup after the one which covers _brk_end | ||
1144 | * has been zapped already via cleanup_highmem(). | ||
1137 | */ | 1145 | */ |
1146 | all_end = roundup((unsigned long)_brk_end, PMD_SIZE); | ||
1138 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); | 1147 | set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); |
1139 | 1148 | ||
1140 | rodata_test(); | 1149 | rodata_test(); |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index ae242a7c11c7..36de293caf25 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -409,7 +409,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) | |||
409 | psize = page_level_size(level); | 409 | psize = page_level_size(level); |
410 | pmask = page_level_mask(level); | 410 | pmask = page_level_mask(level); |
411 | offset = virt_addr & ~pmask; | 411 | offset = virt_addr & ~pmask; |
412 | phys_addr = pte_pfn(*pte) << PAGE_SHIFT; | 412 | phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; |
413 | return (phys_addr | offset); | 413 | return (phys_addr | offset); |
414 | } | 414 | } |
415 | EXPORT_SYMBOL_GPL(slow_virt_to_phys); | 415 | EXPORT_SYMBOL_GPL(slow_virt_to_phys); |
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 3c53a90fdb18..c14ad34776c4 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c | |||
@@ -106,6 +106,7 @@ int __init sfi_parse_mtmr(struct sfi_table_header *table) | |||
106 | mp_irq.dstapic = MP_APIC_ALL; | 106 | mp_irq.dstapic = MP_APIC_ALL; |
107 | mp_irq.dstirq = pentry->irq; | 107 | mp_irq.dstirq = pentry->irq; |
108 | mp_save_irq(&mp_irq); | 108 | mp_save_irq(&mp_irq); |
109 | mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); | ||
109 | } | 110 | } |
110 | 111 | ||
111 | return 0; | 112 | return 0; |
@@ -176,6 +177,7 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table) | |||
176 | mp_irq.dstapic = MP_APIC_ALL; | 177 | mp_irq.dstapic = MP_APIC_ALL; |
177 | mp_irq.dstirq = pentry->irq; | 178 | mp_irq.dstirq = pentry->irq; |
178 | mp_save_irq(&mp_irq); | 179 | mp_save_irq(&mp_irq); |
180 | mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC); | ||
179 | } | 181 | } |
180 | return 0; | 182 | return 0; |
181 | } | 183 | } |
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl new file mode 100644 index 000000000000..23210baade2d --- /dev/null +++ b/arch/x86/tools/calc_run_size.pl | |||
@@ -0,0 +1,39 @@ | |||
1 | #!/usr/bin/perl | ||
2 | # | ||
3 | # Calculate the amount of space needed to run the kernel, including room for | ||
4 | # the .bss and .brk sections. | ||
5 | # | ||
6 | # Usage: | ||
7 | # objdump -h a.out | perl calc_run_size.pl | ||
8 | use strict; | ||
9 | |||
10 | my $mem_size = 0; | ||
11 | my $file_offset = 0; | ||
12 | |||
13 | my $sections=" *[0-9]+ \.(?:bss|brk) +"; | ||
14 | while (<>) { | ||
15 | if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) { | ||
16 | my $size = hex($1); | ||
17 | my $offset = hex($2); | ||
18 | $mem_size += $size; | ||
19 | if ($file_offset == 0) { | ||
20 | $file_offset = $offset; | ||
21 | } elsif ($file_offset != $offset) { | ||
22 | # BFD linker shows the same file offset in ELF. | ||
23 | # Gold linker shows them as consecutive. | ||
24 | next if ($file_offset + $mem_size == $offset + $size); | ||
25 | |||
26 | printf STDERR "file_offset: 0x%lx\n", $file_offset; | ||
27 | printf STDERR "mem_size: 0x%lx\n", $mem_size; | ||
28 | printf STDERR "offset: 0x%lx\n", $offset; | ||
29 | printf STDERR "size: 0x%lx\n", $size; | ||
30 | |||
31 | die ".bss and .brk are non-contiguous\n"; | ||
32 | } | ||
33 | } | ||
34 | } | ||
35 | |||
36 | if ($file_offset == 0) { | ||
37 | die "Never found .bss or .brk file offset\n"; | ||
38 | } | ||
39 | printf("%d\n", $mem_size + $file_offset); | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8650cdb53209..4c071aeb8417 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -510,6 +510,9 @@ static void xen_cpu_die(unsigned int cpu) | |||
510 | current->state = TASK_UNINTERRUPTIBLE; | 510 | current->state = TASK_UNINTERRUPTIBLE; |
511 | schedule_timeout(HZ/10); | 511 | schedule_timeout(HZ/10); |
512 | } | 512 | } |
513 | |||
514 | cpu_die_common(cpu); | ||
515 | |||
513 | xen_smp_intr_free(cpu); | 516 | xen_smp_intr_free(cpu); |
514 | xen_uninit_lock_cpu(cpu); | 517 | xen_uninit_lock_cpu(cpu); |
515 | xen_teardown_timer(cpu); | 518 | xen_teardown_timer(cpu); |