aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-03-14 02:44:11 -0400
committerIngo Molnar <mingo@elte.hu>2012-03-14 02:44:11 -0400
commitcd593accdcc27ccbe6498d9ad1c2b6cc8e1d830d (patch)
tree9424d3ac86e753706cc6c3d7e6072d2a73711e29 /arch/x86/kernel
parent11b91d6fe7272452c999573bab33c15c2f03dc31 (diff)
parentfde7d9049e55ab85a390be7f415d74c9f62dd0f9 (diff)
Merge tag 'v3.3-rc7' into x86/mce
Merge reason: Update from an ancient -rc1 base to an almost-final stable kernel. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c44
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c2
-rw-r--r--arch/x86/kernel/dumpstack.c3
-rw-r--r--arch/x86/kernel/dumpstack_64.c8
-rw-r--r--arch/x86/kernel/entry_64.S9
-rw-r--r--arch/x86/kernel/microcode_amd.c25
-rw-r--r--arch/x86/kernel/process_32.c26
-rw-r--r--arch/x86/kernel/process_64.c30
-rw-r--r--arch/x86/kernel/reboot.c36
-rw-r--r--arch/x86/kernel/traps.c43
-rw-r--r--arch/x86/kernel/xsave.c12
16 files changed, 174 insertions, 117 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d43cad74f166..c0f7d68d318f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1044,6 +1044,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
1044 1044
1045DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1045DEFINE_PER_CPU(unsigned int, irq_count) = -1;
1046 1046
1047DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1048EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
1049
1047/* 1050/*
1048 * Special IST stacks which the CPU switches to when it calls 1051 * Special IST stacks which the CPU switches to when it calls
1049 * an IST-marked descriptor entry. Up to 7 stacks (hardware 1052 * an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1111,6 +1114,8 @@ void debug_stack_reset(void)
1111 1114
1112DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 1115DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1113EXPORT_PER_CPU_SYMBOL(current_task); 1116EXPORT_PER_CPU_SYMBOL(current_task);
1117DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1118EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
1114 1119
1115#ifdef CONFIG_CC_STACKPROTECTOR 1120#ifdef CONFIG_CC_STACKPROTECTOR
1116DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 1121DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b45e5e7a901..73d08ed98a64 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
326 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; 326 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327} 327}
328 328
329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, 329static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
330 int index)
331{ 330{
332 int node; 331 int node;
333 332
@@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
725#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 724#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
726 725
727#ifdef CONFIG_SMP 726#ifdef CONFIG_SMP
728static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 727
728static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
729{ 729{
730 struct _cpuid4_info *this_leaf, *sibling_leaf; 730 struct _cpuid4_info *this_leaf;
731 unsigned long num_threads_sharing; 731 int ret, i, sibling;
732 int index_msb, i, sibling;
733 struct cpuinfo_x86 *c = &cpu_data(cpu); 732 struct cpuinfo_x86 *c = &cpu_data(cpu);
734 733
735 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 734 ret = 0;
735 if (index == 3) {
736 ret = 1;
736 for_each_cpu(i, cpu_llc_shared_mask(cpu)) { 737 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
737 if (!per_cpu(ici_cpuid4_info, i)) 738 if (!per_cpu(ici_cpuid4_info, i))
738 continue; 739 continue;
@@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
743 set_bit(sibling, this_leaf->shared_cpu_map); 744 set_bit(sibling, this_leaf->shared_cpu_map);
744 } 745 }
745 } 746 }
746 return; 747 } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
748 ret = 1;
749 for_each_cpu(i, cpu_sibling_mask(cpu)) {
750 if (!per_cpu(ici_cpuid4_info, i))
751 continue;
752 this_leaf = CPUID4_INFO_IDX(i, index);
753 for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
754 if (!cpu_online(sibling))
755 continue;
756 set_bit(sibling, this_leaf->shared_cpu_map);
757 }
758 }
747 } 759 }
760
761 return ret;
762}
763
764static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
765{
766 struct _cpuid4_info *this_leaf, *sibling_leaf;
767 unsigned long num_threads_sharing;
768 int index_msb, i;
769 struct cpuinfo_x86 *c = &cpu_data(cpu);
770
771 if (c->x86_vendor == X86_VENDOR_AMD) {
772 if (cache_shared_amd_cpu_map_setup(cpu, index))
773 return;
774 }
775
748 this_leaf = CPUID4_INFO_IDX(cpu, index); 776 this_leaf = CPUID4_INFO_IDX(cpu, index);
749 num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; 777 num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
750 778
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 786e76a86322..e4eeaaf58a47 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -528,6 +528,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
528 528
529 sprintf(name, "threshold_bank%i", bank); 529 sprintf(name, "threshold_bank%i", bank);
530 530
531#ifdef CONFIG_SMP
531 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 532 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
532 i = cpumask_first(cpu_llc_shared_mask(cpu)); 533 i = cpumask_first(cpu_llc_shared_mask(cpu));
533 534
@@ -553,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
553 554
554 goto out; 555 goto out;
555 } 556 }
557#endif
556 558
557 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); 559 b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
558 if (!b) { 560 if (!b) {
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8944062f46e2..c30c807ddc72 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -147,7 +147,9 @@ struct cpu_hw_events {
147 /* 147 /*
148 * AMD specific bits 148 * AMD specific bits
149 */ 149 */
150 struct amd_nb *amd_nb; 150 struct amd_nb *amd_nb;
151 /* Inverted mask of bits to clear in the perf_ctr ctrl registers */
152 u64 perf_ctr_virt_mask;
151 153
152 void *kfree_on_online; 154 void *kfree_on_online;
153}; 155};
@@ -417,9 +419,11 @@ void x86_pmu_disable_all(void);
417static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, 419static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
418 u64 enable_mask) 420 u64 enable_mask)
419{ 421{
422 u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
423
420 if (hwc->extra_reg.reg) 424 if (hwc->extra_reg.reg)
421 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); 425 wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
422 wrmsrl(hwc->config_base, hwc->config | enable_mask); 426 wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask);
423} 427}
424 428
425void x86_pmu_enable_all(int added); 429void x86_pmu_enable_all(int added);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 0397b23be8e9..67250a52430b 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,4 +1,5 @@
1#include <linux/perf_event.h> 1#include <linux/perf_event.h>
2#include <linux/export.h>
2#include <linux/types.h> 3#include <linux/types.h>
3#include <linux/init.h> 4#include <linux/init.h>
4#include <linux/slab.h> 5#include <linux/slab.h>
@@ -357,7 +358,9 @@ static void amd_pmu_cpu_starting(int cpu)
357 struct amd_nb *nb; 358 struct amd_nb *nb;
358 int i, nb_id; 359 int i, nb_id;
359 360
360 if (boot_cpu_data.x86_max_cores < 2) 361 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
362
363 if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15)
361 return; 364 return;
362 365
363 nb_id = amd_get_nb_id(cpu); 366 nb_id = amd_get_nb_id(cpu);
@@ -587,9 +590,9 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
587 .put_event_constraints = amd_put_event_constraints, 590 .put_event_constraints = amd_put_event_constraints,
588 591
589 .cpu_prepare = amd_pmu_cpu_prepare, 592 .cpu_prepare = amd_pmu_cpu_prepare,
590 .cpu_starting = amd_pmu_cpu_starting,
591 .cpu_dead = amd_pmu_cpu_dead, 593 .cpu_dead = amd_pmu_cpu_dead,
592#endif 594#endif
595 .cpu_starting = amd_pmu_cpu_starting,
593}; 596};
594 597
595__init int amd_pmu_init(void) 598__init int amd_pmu_init(void)
@@ -621,3 +624,33 @@ __init int amd_pmu_init(void)
621 624
622 return 0; 625 return 0;
623} 626}
627
628void amd_pmu_enable_virt(void)
629{
630 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
631
632 cpuc->perf_ctr_virt_mask = 0;
633
634 /* Reload all events */
635 x86_pmu_disable_all();
636 x86_pmu_enable_all(0);
637}
638EXPORT_SYMBOL_GPL(amd_pmu_enable_virt);
639
640void amd_pmu_disable_virt(void)
641{
642 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
643
644 /*
645 * We only mask out the Host-only bit so that host-only counting works
646 * when SVM is disabled. If someone sets up a guest-only counter when
647 * SVM is disabled the Guest-only bits still gets set and the counter
648 * will not count anything.
649 */
650 cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
651
652 /* Reload all events */
653 x86_pmu_disable_all();
654 x86_pmu_enable_all(0);
655}
656EXPORT_SYMBOL_GPL(amd_pmu_disable_virt);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 73da6b64f5b7..d6bd49faa40c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -439,7 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; 439 hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
440 440
441 cpuc->pebs_enabled |= 1ULL << hwc->idx; 441 cpuc->pebs_enabled |= 1ULL << hwc->idx;
442 WARN_ON_ONCE(cpuc->enabled);
443 442
444 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) 443 if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
445 intel_pmu_lbr_enable(event); 444 intel_pmu_lbr_enable(event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 3fab3de3ce96..47a7e63bfe54 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -72,8 +72,6 @@ void intel_pmu_lbr_enable(struct perf_event *event)
72 if (!x86_pmu.lbr_nr) 72 if (!x86_pmu.lbr_nr)
73 return; 73 return;
74 74
75 WARN_ON_ONCE(cpuc->enabled);
76
77 /* 75 /*
78 * Reset the LBR stack if we changed task context to 76 * Reset the LBR stack if we changed task context to
79 * avoid data leaks. 77 * avoid data leaks.
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 1aae78f775fc..4025fe4f928f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -252,7 +252,8 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
252 unsigned short ss; 252 unsigned short ss;
253 unsigned long sp; 253 unsigned long sp;
254#endif 254#endif
255 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); 255 printk(KERN_DEFAULT
256 "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
256#ifdef CONFIG_PREEMPT 257#ifdef CONFIG_PREEMPT
257 printk("PREEMPT "); 258 printk("PREEMPT ");
258#endif 259#endif
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6d728d9284bd..17107bd6e1f0 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -129,7 +129,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
129 if (!stack) { 129 if (!stack) {
130 if (regs) 130 if (regs)
131 stack = (unsigned long *)regs->sp; 131 stack = (unsigned long *)regs->sp;
132 else if (task && task != current) 132 else if (task != current)
133 stack = (unsigned long *)task->thread.sp; 133 stack = (unsigned long *)task->thread.sp;
134 else 134 else
135 stack = &dummy; 135 stack = &dummy;
@@ -269,11 +269,11 @@ void show_registers(struct pt_regs *regs)
269 unsigned char c; 269 unsigned char c;
270 u8 *ip; 270 u8 *ip;
271 271
272 printk(KERN_EMERG "Stack:\n"); 272 printk(KERN_DEFAULT "Stack:\n");
273 show_stack_log_lvl(NULL, regs, (unsigned long *)sp, 273 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
274 0, KERN_EMERG); 274 0, KERN_DEFAULT);
275 275
276 printk(KERN_EMERG "Code: "); 276 printk(KERN_DEFAULT "Code: ");
277 277
278 ip = (u8 *)regs->ip - code_prologue; 278 ip = (u8 *)regs->ip - code_prologue;
279 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 279 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3fe8239fd8fb..1333d9851778 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1532,10 +1532,17 @@ ENTRY(nmi)
1532 pushq_cfi %rdx 1532 pushq_cfi %rdx
1533 1533
1534 /* 1534 /*
1535 * If %cs was not the kernel segment, then the NMI triggered in user
1536 * space, which means it is definitely not nested.
1537 */
1538 cmpl $__KERNEL_CS, 16(%rsp)
1539 jne first_nmi
1540
1541 /*
1535 * Check the special variable on the stack to see if NMIs are 1542 * Check the special variable on the stack to see if NMIs are
1536 * executing. 1543 * executing.
1537 */ 1544 */
1538 cmp $1, -8(%rsp) 1545 cmpl $1, -8(%rsp)
1539 je nested_nmi 1546 je nested_nmi
1540 1547
1541 /* 1548 /*
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index fe86493f3ed1..73465aab28f8 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -311,13 +311,33 @@ out:
311 return state; 311 return state;
312} 312}
313 313
314/*
315 * AMD microcode firmware naming convention, up to family 15h they are in
316 * the legacy file:
317 *
318 * amd-ucode/microcode_amd.bin
319 *
320 * This legacy file is always smaller than 2K in size.
321 *
322 * Starting at family 15h they are in family specific firmware files:
323 *
324 * amd-ucode/microcode_amd_fam15h.bin
325 * amd-ucode/microcode_amd_fam16h.bin
326 * ...
327 *
328 * These might be larger than 2K.
329 */
314static enum ucode_state request_microcode_amd(int cpu, struct device *device) 330static enum ucode_state request_microcode_amd(int cpu, struct device *device)
315{ 331{
316 const char *fw_name = "amd-ucode/microcode_amd.bin"; 332 char fw_name[36] = "amd-ucode/microcode_amd.bin";
317 const struct firmware *fw; 333 const struct firmware *fw;
318 enum ucode_state ret = UCODE_NFOUND; 334 enum ucode_state ret = UCODE_NFOUND;
335 struct cpuinfo_x86 *c = &cpu_data(cpu);
336
337 if (c->x86 >= 0x15)
338 snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
319 339
320 if (request_firmware(&fw, fw_name, device)) { 340 if (request_firmware(&fw, (const char *)fw_name, device)) {
321 pr_err("failed to load file %s\n", fw_name); 341 pr_err("failed to load file %s\n", fw_name);
322 goto out; 342 goto out;
323 } 343 }
@@ -340,7 +360,6 @@ out:
340static enum ucode_state 360static enum ucode_state
341request_microcode_user(int cpu, const void __user *buf, size_t size) 361request_microcode_user(int cpu, const void __user *buf, size_t size)
342{ 362{
343 pr_info("AMD microcode update via /dev/cpu/microcode not supported\n");
344 return UCODE_ERROR; 363 return UCODE_ERROR;
345} 364}
346 365
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 485204f58cda..c08d1ff12b7c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -214,6 +214,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
214 214
215 task_user_gs(p) = get_user_gs(regs); 215 task_user_gs(p) = get_user_gs(regs);
216 216
217 p->fpu_counter = 0;
217 p->thread.io_bitmap_ptr = NULL; 218 p->thread.io_bitmap_ptr = NULL;
218 tsk = current; 219 tsk = current;
219 err = -ENOMEM; 220 err = -ENOMEM;
@@ -299,22 +300,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 *next = &next_p->thread; 300 *next = &next_p->thread;
300 int cpu = smp_processor_id(); 301 int cpu = smp_processor_id();
301 struct tss_struct *tss = &per_cpu(init_tss, cpu); 302 struct tss_struct *tss = &per_cpu(init_tss, cpu);
302 bool preload_fpu; 303 fpu_switch_t fpu;
303 304
304 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 305 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
305 306
306 /* 307 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
307 * If the task has used fpu the last 5 timeslices, just do a full
308 * restore of the math state immediately to avoid the trap; the
309 * chances of needing FPU soon are obviously high now
310 */
311 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
312
313 __unlazy_fpu(prev_p);
314
315 /* we're going to use this soon, after a few expensive things */
316 if (preload_fpu)
317 prefetch(next->fpu.state);
318 308
319 /* 309 /*
320 * Reload esp0. 310 * Reload esp0.
@@ -354,11 +344,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
354 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 344 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
355 __switch_to_xtra(prev_p, next_p, tss); 345 __switch_to_xtra(prev_p, next_p, tss);
356 346
357 /* If we're going to preload the fpu context, make sure clts
358 is run while we're batching the cpu state updates. */
359 if (preload_fpu)
360 clts();
361
362 /* 347 /*
363 * Leave lazy mode, flushing any hypercalls made here. 348 * Leave lazy mode, flushing any hypercalls made here.
364 * This must be done before restoring TLS segments so 349 * This must be done before restoring TLS segments so
@@ -368,15 +353,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
368 */ 353 */
369 arch_end_context_switch(next_p); 354 arch_end_context_switch(next_p);
370 355
371 if (preload_fpu)
372 __math_state_restore();
373
374 /* 356 /*
375 * Restore %gs if needed (which is common) 357 * Restore %gs if needed (which is common)
376 */ 358 */
377 if (prev->gs | next->gs) 359 if (prev->gs | next->gs)
378 lazy_load_gs(next->gs); 360 lazy_load_gs(next->gs);
379 361
362 switch_fpu_finish(next_p, fpu);
363
380 percpu_write(current_task, next_p); 364 percpu_write(current_task, next_p);
381 365
382 return prev_p; 366 return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9b9fe4a85c87..cfa5c90c01db 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -286,6 +286,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
286 286
287 set_tsk_thread_flag(p, TIF_FORK); 287 set_tsk_thread_flag(p, TIF_FORK);
288 288
289 p->fpu_counter = 0;
289 p->thread.io_bitmap_ptr = NULL; 290 p->thread.io_bitmap_ptr = NULL;
290 291
291 savesegment(gs, p->thread.gsindex); 292 savesegment(gs, p->thread.gsindex);
@@ -386,18 +387,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 387 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 388 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 389 unsigned fsindex, gsindex;
389 bool preload_fpu; 390 fpu_switch_t fpu;
390 391
391 /* 392 fpu = switch_fpu_prepare(prev_p, next_p, cpu);
392 * If the task has used fpu the last 5 timeslices, just do a full
393 * restore of the math state immediately to avoid the trap; the
394 * chances of needing FPU soon are obviously high now
395 */
396 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
397
398 /* we're going to use this soon, after a few expensive things */
399 if (preload_fpu)
400 prefetch(next->fpu.state);
401 393
402 /* 394 /*
403 * Reload esp0, LDT and the page table pointer: 395 * Reload esp0, LDT and the page table pointer:
@@ -427,13 +419,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
427 419
428 load_TLS(next, cpu); 420 load_TLS(next, cpu);
429 421
430 /* Must be after DS reload */
431 __unlazy_fpu(prev_p);
432
433 /* Make sure cpu is ready for new context */
434 if (preload_fpu)
435 clts();
436
437 /* 422 /*
438 * Leave lazy mode, flushing any hypercalls made here. 423 * Leave lazy mode, flushing any hypercalls made here.
439 * This must be done before restoring TLS segments so 424 * This must be done before restoring TLS segments so
@@ -474,6 +459,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
474 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 459 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
475 prev->gsindex = gsindex; 460 prev->gsindex = gsindex;
476 461
462 switch_fpu_finish(next_p, fpu);
463
477 /* 464 /*
478 * Switch the PDA and FPU contexts. 465 * Switch the PDA and FPU contexts.
479 */ 466 */
@@ -492,13 +479,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
492 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 479 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
493 __switch_to_xtra(prev_p, next_p, tss); 480 __switch_to_xtra(prev_p, next_p, tss);
494 481
495 /*
496 * Preload the FPU context, now that we've determined that the
497 * task is likely to be using it.
498 */
499 if (preload_fpu)
500 __math_state_restore();
501
502 return prev_p; 482 return prev_p;
503} 483}
504 484
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 37a458b521a6..d840e69a853c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -39,6 +39,14 @@ static int reboot_mode;
39enum reboot_type reboot_type = BOOT_ACPI; 39enum reboot_type reboot_type = BOOT_ACPI;
40int reboot_force; 40int reboot_force;
41 41
42/* This variable is used privately to keep track of whether or not
43 * reboot_type is still set to its default value (i.e., reboot= hasn't
44 * been set on the command line). This is needed so that we can
45 * suppress DMI scanning for reboot quirks. Without it, it's
46 * impossible to override a faulty reboot quirk without recompiling.
47 */
48static int reboot_default = 1;
49
42#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) 50#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
43static int reboot_cpu = -1; 51static int reboot_cpu = -1;
44#endif 52#endif
@@ -67,6 +75,12 @@ bool port_cf9_safe = false;
67static int __init reboot_setup(char *str) 75static int __init reboot_setup(char *str)
68{ 76{
69 for (;;) { 77 for (;;) {
78 /* Having anything passed on the command line via
79 * reboot= will cause us to disable DMI checking
80 * below.
81 */
82 reboot_default = 0;
83
70 switch (*str) { 84 switch (*str) {
71 case 'w': 85 case 'w':
72 reboot_mode = 0x1234; 86 reboot_mode = 0x1234;
@@ -295,14 +309,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
295 DMI_MATCH(DMI_BOARD_NAME, "P4S800"), 309 DMI_MATCH(DMI_BOARD_NAME, "P4S800"),
296 }, 310 },
297 }, 311 },
298 { /* Handle problems with rebooting on VersaLogic Menlow boards */
299 .callback = set_bios_reboot,
300 .ident = "VersaLogic Menlow based board",
301 .matches = {
302 DMI_MATCH(DMI_BOARD_VENDOR, "VersaLogic Corporation"),
303 DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
304 },
305 },
306 { /* Handle reboot issue on Acer Aspire one */ 312 { /* Handle reboot issue on Acer Aspire one */
307 .callback = set_kbd_reboot, 313 .callback = set_kbd_reboot,
308 .ident = "Acer Aspire One A110", 314 .ident = "Acer Aspire One A110",
@@ -316,7 +322,12 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
316 322
317static int __init reboot_init(void) 323static int __init reboot_init(void)
318{ 324{
319 dmi_check_system(reboot_dmi_table); 325 /* Only do the DMI check if reboot_type hasn't been overridden
326 * on the command line
327 */
328 if (reboot_default) {
329 dmi_check_system(reboot_dmi_table);
330 }
320 return 0; 331 return 0;
321} 332}
322core_initcall(reboot_init); 333core_initcall(reboot_init);
@@ -465,7 +476,12 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
465 476
466static int __init pci_reboot_init(void) 477static int __init pci_reboot_init(void)
467{ 478{
468 dmi_check_system(pci_reboot_dmi_table); 479 /* Only do the DMI check if reboot_type hasn't been overridden
480 * on the command line
481 */
482 if (reboot_default) {
483 dmi_check_system(pci_reboot_dmi_table);
484 }
469 return 0; 485 return 0;
470} 486}
471core_initcall(pci_reboot_init); 487core_initcall(pci_reboot_init);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 482ec3af2067..4bbe04d96744 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -571,41 +571,18 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
571} 571}
572 572
573/* 573/*
574 * __math_state_restore assumes that cr0.TS is already clear and the
575 * fpu state is all ready for use. Used during context switch.
576 */
577void __math_state_restore(void)
578{
579 struct thread_info *thread = current_thread_info();
580 struct task_struct *tsk = thread->task;
581
582 /*
583 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
584 */
585 if (unlikely(restore_fpu_checking(tsk))) {
586 stts();
587 force_sig(SIGSEGV, tsk);
588 return;
589 }
590
591 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
592 tsk->fpu_counter++;
593}
594
595/*
596 * 'math_state_restore()' saves the current math information in the 574 * 'math_state_restore()' saves the current math information in the
597 * old math state array, and gets the new ones from the current task 575 * old math state array, and gets the new ones from the current task
598 * 576 *
599 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 577 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
600 * Don't touch unless you *really* know how it works. 578 * Don't touch unless you *really* know how it works.
601 * 579 *
602 * Must be called with kernel preemption disabled (in this case, 580 * Must be called with kernel preemption disabled (eg with local
603 * local interrupts are disabled at the call-site in entry.S). 581 * local interrupts as in the case of do_device_not_available).
604 */ 582 */
605asmlinkage void math_state_restore(void) 583void math_state_restore(void)
606{ 584{
607 struct thread_info *thread = current_thread_info(); 585 struct task_struct *tsk = current;
608 struct task_struct *tsk = thread->task;
609 586
610 if (!tsk_used_math(tsk)) { 587 if (!tsk_used_math(tsk)) {
611 local_irq_enable(); 588 local_irq_enable();
@@ -622,9 +599,17 @@ asmlinkage void math_state_restore(void)
622 local_irq_disable(); 599 local_irq_disable();
623 } 600 }
624 601
625 clts(); /* Allow maths ops (or we recurse) */ 602 __thread_fpu_begin(tsk);
603 /*
604 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
605 */
606 if (unlikely(restore_fpu_checking(tsk))) {
607 __thread_fpu_end(tsk);
608 force_sig(SIGSEGV, tsk);
609 return;
610 }
626 611
627 __math_state_restore(); 612 tsk->fpu_counter++;
628} 613}
629EXPORT_SYMBOL_GPL(math_state_restore); 614EXPORT_SYMBOL_GPL(math_state_restore);
630 615
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a3911343976b..711091114119 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -47,7 +47,7 @@ void __sanitize_i387_state(struct task_struct *tsk)
47 if (!fx) 47 if (!fx)
48 return; 48 return;
49 49
50 BUG_ON(task_thread_info(tsk)->status & TS_USEDFPU); 50 BUG_ON(__thread_has_fpu(tsk));
51 51
52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
53 53
@@ -168,7 +168,7 @@ int save_i387_xstate(void __user *buf)
168 if (!used_math()) 168 if (!used_math())
169 return 0; 169 return 0;
170 170
171 if (task_thread_info(tsk)->status & TS_USEDFPU) { 171 if (user_has_fpu()) {
172 if (use_xsave()) 172 if (use_xsave())
173 err = xsave_user(buf); 173 err = xsave_user(buf);
174 else 174 else
@@ -176,8 +176,7 @@ int save_i387_xstate(void __user *buf)
176 176
177 if (err) 177 if (err)
178 return err; 178 return err;
179 task_thread_info(tsk)->status &= ~TS_USEDFPU; 179 user_fpu_end();
180 stts();
181 } else { 180 } else {
182 sanitize_i387_state(tsk); 181 sanitize_i387_state(tsk);
183 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, 182 if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave,
@@ -292,10 +291,7 @@ int restore_i387_xstate(void __user *buf)
292 return err; 291 return err;
293 } 292 }
294 293
295 if (!(task_thread_info(current)->status & TS_USEDFPU)) { 294 user_fpu_begin();
296 clts();
297 task_thread_info(current)->status |= TS_USEDFPU;
298 }
299 if (use_xsave()) 295 if (use_xsave())
300 err = restore_user_xstate(buf); 296 err = restore_user_xstate(buf);
301 else 297 else