aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/i387.h110
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/traps.c55
4 files changed, 133 insertions, 42 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index f5376676f89c..a850b4d8d14d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
29extern void fpu_init(void); 29extern void fpu_init(void);
30extern void mxcsr_feature_mask_init(void); 30extern void mxcsr_feature_mask_init(void);
31extern int init_fpu(struct task_struct *child); 31extern int init_fpu(struct task_struct *child);
32extern void __math_state_restore(struct task_struct *);
32extern void math_state_restore(void); 33extern void math_state_restore(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 35
@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
212#endif /* CONFIG_X86_64 */ 213#endif /* CONFIG_X86_64 */
213 214
214/* 215/*
215 * These must be called with preempt disabled 216 * These must be called with preempt disabled. Returns
217 * 'true' if the FPU state is still intact.
216 */ 218 */
217static inline void fpu_save_init(struct fpu *fpu) 219static inline int fpu_save_init(struct fpu *fpu)
218{ 220{
219 if (use_xsave()) { 221 if (use_xsave()) {
220 fpu_xsave(fpu); 222 fpu_xsave(fpu);
@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
223 * xsave header may indicate the init state of the FP. 225 * xsave header may indicate the init state of the FP.
224 */ 226 */
225 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 227 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
226 return; 228 return 1;
227 } else if (use_fxsr()) { 229 } else if (use_fxsr()) {
228 fpu_fxsave(fpu); 230 fpu_fxsave(fpu);
229 } else { 231 } else {
230 asm volatile("fnsave %[fx]; fwait" 232 asm volatile("fnsave %[fx]; fwait"
231 : [fx] "=m" (fpu->state->fsave)); 233 : [fx] "=m" (fpu->state->fsave));
232 return; 234 return 0;
233 } 235 }
234 236
235 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) 237 /*
238 * If exceptions are pending, we need to clear them so
239 * that we don't randomly get exceptions later.
240 *
241 * FIXME! Is this perhaps only true for the old-style
242 * irq13 case? Maybe we could leave the x87 state
243 * intact otherwise?
244 */
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
236 asm volatile("fnclex"); 246 asm volatile("fnclex");
247 return 0;
248 }
249 return 1;
237} 250}
238 251
239static inline void __save_init_fpu(struct task_struct *tsk) 252static inline int __save_init_fpu(struct task_struct *tsk)
240{ 253{
241 fpu_save_init(&tsk->thread.fpu); 254 return fpu_save_init(&tsk->thread.fpu);
242} 255}
243 256
244static inline int fpu_fxrstor_checking(struct fpu *fpu) 257static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
301} 314}
302 315
303/* 316/*
304 * Signal frame handlers... 317 * FPU state switching for scheduling.
318 *
319 * This is a two-stage process:
320 *
321 * - switch_fpu_prepare() saves the old state and
322 * sets the new state of the CR0.TS bit. This is
323 * done within the context of the old process.
324 *
325 * - switch_fpu_finish() restores the new state as
326 * necessary.
305 */ 327 */
306extern int save_i387_xstate(void __user *buf); 328typedef struct { int preload; } fpu_switch_t;
307extern int restore_i387_xstate(void __user *buf); 329
330/*
331 * FIXME! We could do a totally lazy restore, but we need to
332 * add a per-cpu "this was the task that last touched the FPU
333 * on this CPU" variable, and the task needs to have a "I last
334 * touched the FPU on this CPU" and check them.
335 *
336 * We don't do that yet, so "fpu_lazy_restore()" always returns
337 * false, but some day..
338 */
339#define fpu_lazy_restore(tsk) (0)
340#define fpu_lazy_state_intact(tsk) do { } while (0)
341
342static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
343{
344 fpu_switch_t fpu;
345
346 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
347 if (__thread_has_fpu(old)) {
348 if (__save_init_fpu(old))
349 fpu_lazy_state_intact(old);
350 __thread_clear_has_fpu(old);
351 old->fpu_counter++;
352
353 /* Don't change CR0.TS if we just switch! */
354 if (fpu.preload) {
355 __thread_set_has_fpu(new);
356 prefetch(new->thread.fpu.state);
357 } else
358 stts();
359 } else {
360 old->fpu_counter = 0;
361 if (fpu.preload) {
362 if (fpu_lazy_restore(new))
363 fpu.preload = 0;
364 else
365 prefetch(new->thread.fpu.state);
366 __thread_fpu_begin(new);
367 }
368 }
369 return fpu;
370}
308 371
309static inline void __unlazy_fpu(struct task_struct *tsk) 372/*
373 * By the time this gets called, we've already cleared CR0.TS and
374 * given the process the FPU if we are going to preload the FPU
375 * state - all we need to do is to conditionally restore the register
376 * state itself.
377 */
378static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
310{ 379{
311 if (__thread_has_fpu(tsk)) { 380 if (fpu.preload)
312 __save_init_fpu(tsk); 381 __math_state_restore(new);
313 __thread_fpu_end(tsk);
314 } else
315 tsk->fpu_counter = 0;
316} 382}
317 383
384/*
385 * Signal frame handlers...
386 */
387extern int save_i387_xstate(void __user *buf);
388extern int restore_i387_xstate(void __user *buf);
389
318static inline void __clear_fpu(struct task_struct *tsk) 390static inline void __clear_fpu(struct task_struct *tsk)
319{ 391{
320 if (__thread_has_fpu(tsk)) { 392 if (__thread_has_fpu(tsk)) {
@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
474static inline void unlazy_fpu(struct task_struct *tsk) 546static inline void unlazy_fpu(struct task_struct *tsk)
475{ 547{
476 preempt_disable(); 548 preempt_disable();
477 __unlazy_fpu(tsk); 549 if (__thread_has_fpu(tsk)) {
550 __save_init_fpu(tsk);
551 __thread_fpu_end(tsk);
552 } else
553 tsk->fpu_counter = 0;
478 preempt_enable(); 554 preempt_enable();
479} 555}
480 556
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 324cd722b447..80bfe1ab0031 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 *next = &next_p->thread; 299 *next = &next_p->thread;
300 int cpu = smp_processor_id(); 300 int cpu = smp_processor_id();
301 struct tss_struct *tss = &per_cpu(init_tss, cpu); 301 struct tss_struct *tss = &per_cpu(init_tss, cpu);
302 fpu_switch_t fpu;
302 303
303 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 304 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
304 305
305 __unlazy_fpu(prev_p); 306 fpu = switch_fpu_prepare(prev_p, next_p);
306 307
307 /* 308 /*
308 * Reload esp0. 309 * Reload esp0.
@@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
357 if (prev->gs | next->gs) 358 if (prev->gs | next->gs)
358 lazy_load_gs(next->gs); 359 lazy_load_gs(next->gs);
359 360
361 switch_fpu_finish(next_p, fpu);
362
360 percpu_write(current_task, next_p); 363 percpu_write(current_task, next_p);
361 364
362 return prev_p; 365 return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 753e803f7197..1fd94bc4279d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,8 +386,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 386 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 387 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 388 unsigned fsindex, gsindex;
389 fpu_switch_t fpu;
389 390
390 __unlazy_fpu(prev_p); 391 fpu = switch_fpu_prepare(prev_p, next_p);
391 392
392 /* 393 /*
393 * Reload esp0, LDT and the page table pointer: 394 * Reload esp0, LDT and the page table pointer:
@@ -457,6 +458,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
457 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 458 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
458 prev->gsindex = gsindex; 459 prev->gsindex = gsindex;
459 460
461 switch_fpu_finish(next_p, fpu);
462
460 /* 463 /*
461 * Switch the PDA and FPU contexts. 464 * Switch the PDA and FPU contexts.
462 */ 465 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ad25e51f40c4..77da5b475ad2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -571,6 +571,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
571} 571}
572 572
573/* 573/*
574 * This gets called with the process already owning the
575 * FPU state, and with CR0.TS cleared. It just needs to
576 * restore the FPU register state.
577 */
578void __math_state_restore(struct task_struct *tsk)
579{
580 /* We need a safe address that is cheap to find and that is already
581 in L1. We've just brought in "tsk->thread.has_fpu", so use that */
582#define safe_address (tsk->thread.has_fpu)
583
584 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
585 is pending. Clear the x87 state here by setting it to fixed
586 values. safe_address is a random variable that should be in L1 */
587 alternative_input(
588 ASM_NOP8 ASM_NOP2,
589 "emms\n\t" /* clear stack tags */
590 "fildl %P[addr]", /* set F?P to defined value */
591 X86_FEATURE_FXSAVE_LEAK,
592 [addr] "m" (safe_address));
593
594 /*
595 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
596 */
597 if (unlikely(restore_fpu_checking(tsk))) {
598 __thread_fpu_end(tsk);
599 force_sig(SIGSEGV, tsk);
600 return;
601 }
602}
603
604/*
574 * 'math_state_restore()' saves the current math information in the 605 * 'math_state_restore()' saves the current math information in the
575 * old math state array, and gets the new ones from the current task 606 * old math state array, and gets the new ones from the current task
576 * 607 *
@@ -584,10 +615,6 @@ void math_state_restore(void)
584{ 615{
585 struct task_struct *tsk = current; 616 struct task_struct *tsk = current;
586 617
587 /* We need a safe address that is cheap to find and that is already
588 in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
589#define safe_address (tsk->thread.has_fpu)
590
591 if (!tsk_used_math(tsk)) { 618 if (!tsk_used_math(tsk)) {
592 local_irq_enable(); 619 local_irq_enable();
593 /* 620 /*
@@ -604,25 +631,7 @@ void math_state_restore(void)
604 } 631 }
605 632
606 __thread_fpu_begin(tsk); 633 __thread_fpu_begin(tsk);
607 634 __math_state_restore(tsk);
608 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
609 is pending. Clear the x87 state here by setting it to fixed
610 values. safe_address is a random variable that should be in L1 */
611 alternative_input(
612 ASM_NOP8 ASM_NOP2,
613 "emms\n\t" /* clear stack tags */
614 "fildl %P[addr]", /* set F?P to defined value */
615 X86_FEATURE_FXSAVE_LEAK,
616 [addr] "m" (safe_address));
617
618 /*
619 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
620 */
621 if (unlikely(restore_fpu_checking(tsk))) {
622 __thread_fpu_end(tsk);
623 force_sig(SIGSEGV, tsk);
624 return;
625 }
626 635
627 tsk->fpu_counter++; 636 tsk->fpu_counter++;
628} 637}