aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-02-18 15:56:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-02-18 17:03:48 -0500
commit34ddc81a230b15c0e345b6b253049db731499f7e (patch)
tree0c3afd68071ec1a8a1d8724ef9a42ef845ecf402 /arch/x86
parentf94edacf998516ac9d849f7bc6949a703977a7f3 (diff)
i387: re-introduce FPU state preloading at context switch time
After all the FPU state cleanups and finally finding the problem that caused all our FPU save/restore problems, this re-introduces the preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: do not preload FPU state at task switch time"). However, instead of simply reverting the removal, this reimplements preloading with several fixes, most notably - properly abstracted as a true FPU state switch, rather than as open-coded save and restore with various hacks. In particular, implementing it as a proper FPU state switch allows us to optimize the CR0.TS flag accesses: there is no reason to set the TS bit only to then almost immediately clear it again. CR0 accesses are quite slow and expensive, don't flip the bit back and forth for no good reason. - Make sure that the same model works for both x86-32 and x86-64, so that there are no gratuitous differences between the two due to the way they save and restore segment state differently due to architectural differences that really don't matter to the FPU state. - Avoid exposing the "preload" state to the context switch routines, and in particular allow the concept of lazy state restore: if nothing else has used the FPU in the meantime, and the process is still on the same CPU, we can avoid restoring state from memory entirely, just re-expose the state that is still in the FPU unit. That optimized lazy restore isn't actually implemented here, but the infrastructure is set up for it. Of course, older CPU's that use 'fnsave' to save the state cannot take advantage of this, since the state saving also trashes the state. In other words, there is now an actual _design_ to the FPU state saving, rather than just random historical baggage. Hopefully it's easier to follow as a result. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/i387.h110
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/traps.c55
4 files changed, 133 insertions, 42 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index f5376676f89c..a850b4d8d14d 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
29extern void fpu_init(void); 29extern void fpu_init(void);
30extern void mxcsr_feature_mask_init(void); 30extern void mxcsr_feature_mask_init(void);
31extern int init_fpu(struct task_struct *child); 31extern int init_fpu(struct task_struct *child);
32extern void __math_state_restore(struct task_struct *);
32extern void math_state_restore(void); 33extern void math_state_restore(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 35
@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
212#endif /* CONFIG_X86_64 */ 213#endif /* CONFIG_X86_64 */
213 214
214/* 215/*
215 * These must be called with preempt disabled 216 * These must be called with preempt disabled. Returns
217 * 'true' if the FPU state is still intact.
216 */ 218 */
217static inline void fpu_save_init(struct fpu *fpu) 219static inline int fpu_save_init(struct fpu *fpu)
218{ 220{
219 if (use_xsave()) { 221 if (use_xsave()) {
220 fpu_xsave(fpu); 222 fpu_xsave(fpu);
@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
223 * xsave header may indicate the init state of the FP. 225 * xsave header may indicate the init state of the FP.
224 */ 226 */
225 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 227 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
226 return; 228 return 1;
227 } else if (use_fxsr()) { 229 } else if (use_fxsr()) {
228 fpu_fxsave(fpu); 230 fpu_fxsave(fpu);
229 } else { 231 } else {
230 asm volatile("fnsave %[fx]; fwait" 232 asm volatile("fnsave %[fx]; fwait"
231 : [fx] "=m" (fpu->state->fsave)); 233 : [fx] "=m" (fpu->state->fsave));
232 return; 234 return 0;
233 } 235 }
234 236
235 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) 237 /*
238 * If exceptions are pending, we need to clear them so
239 * that we don't randomly get exceptions later.
240 *
241 * FIXME! Is this perhaps only true for the old-style
242 * irq13 case? Maybe we could leave the x87 state
243 * intact otherwise?
244 */
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
236 asm volatile("fnclex"); 246 asm volatile("fnclex");
247 return 0;
248 }
249 return 1;
237} 250}
238 251
239static inline void __save_init_fpu(struct task_struct *tsk) 252static inline int __save_init_fpu(struct task_struct *tsk)
240{ 253{
241 fpu_save_init(&tsk->thread.fpu); 254 return fpu_save_init(&tsk->thread.fpu);
242} 255}
243 256
244static inline int fpu_fxrstor_checking(struct fpu *fpu) 257static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
301} 314}
302 315
303/* 316/*
304 * Signal frame handlers... 317 * FPU state switching for scheduling.
318 *
319 * This is a two-stage process:
320 *
321 * - switch_fpu_prepare() saves the old state and
322 * sets the new state of the CR0.TS bit. This is
323 * done within the context of the old process.
324 *
325 * - switch_fpu_finish() restores the new state as
326 * necessary.
305 */ 327 */
306extern int save_i387_xstate(void __user *buf); 328typedef struct { int preload; } fpu_switch_t;
307extern int restore_i387_xstate(void __user *buf); 329
330/*
331 * FIXME! We could do a totally lazy restore, but we need to
332 * add a per-cpu "this was the task that last touched the FPU
333 * on this CPU" variable, and the task needs to have a "I last
334 * touched the FPU on this CPU" and check them.
335 *
336 * We don't do that yet, so "fpu_lazy_restore()" always returns
337 * false, but some day..
338 */
339#define fpu_lazy_restore(tsk) (0)
340#define fpu_lazy_state_intact(tsk) do { } while (0)
341
342static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
343{
344 fpu_switch_t fpu;
345
346 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
347 if (__thread_has_fpu(old)) {
348 if (__save_init_fpu(old))
349 fpu_lazy_state_intact(old);
350 __thread_clear_has_fpu(old);
351 old->fpu_counter++;
352
353 /* Don't change CR0.TS if we just switch! */
354 if (fpu.preload) {
355 __thread_set_has_fpu(new);
356 prefetch(new->thread.fpu.state);
357 } else
358 stts();
359 } else {
360 old->fpu_counter = 0;
361 if (fpu.preload) {
362 if (fpu_lazy_restore(new))
363 fpu.preload = 0;
364 else
365 prefetch(new->thread.fpu.state);
366 __thread_fpu_begin(new);
367 }
368 }
369 return fpu;
370}
308 371
309static inline void __unlazy_fpu(struct task_struct *tsk) 372/*
373 * By the time this gets called, we've already cleared CR0.TS and
374 * given the process the FPU if we are going to preload the FPU
375 * state - all we need to do is to conditionally restore the register
376 * state itself.
377 */
378static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
310{ 379{
311 if (__thread_has_fpu(tsk)) { 380 if (fpu.preload)
312 __save_init_fpu(tsk); 381 __math_state_restore(new);
313 __thread_fpu_end(tsk);
314 } else
315 tsk->fpu_counter = 0;
316} 382}
317 383
384/*
385 * Signal frame handlers...
386 */
387extern int save_i387_xstate(void __user *buf);
388extern int restore_i387_xstate(void __user *buf);
389
318static inline void __clear_fpu(struct task_struct *tsk) 390static inline void __clear_fpu(struct task_struct *tsk)
319{ 391{
320 if (__thread_has_fpu(tsk)) { 392 if (__thread_has_fpu(tsk)) {
@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
474static inline void unlazy_fpu(struct task_struct *tsk) 546static inline void unlazy_fpu(struct task_struct *tsk)
475{ 547{
476 preempt_disable(); 548 preempt_disable();
477 __unlazy_fpu(tsk); 549 if (__thread_has_fpu(tsk)) {
550 __save_init_fpu(tsk);
551 __thread_fpu_end(tsk);
552 } else
553 tsk->fpu_counter = 0;
478 preempt_enable(); 554 preempt_enable();
479} 555}
480 556
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 324cd722b447..80bfe1ab0031 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 *next = &next_p->thread; 299 *next = &next_p->thread;
300 int cpu = smp_processor_id(); 300 int cpu = smp_processor_id();
301 struct tss_struct *tss = &per_cpu(init_tss, cpu); 301 struct tss_struct *tss = &per_cpu(init_tss, cpu);
302 fpu_switch_t fpu;
302 303
303 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 304 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
304 305
305 __unlazy_fpu(prev_p); 306 fpu = switch_fpu_prepare(prev_p, next_p);
306 307
307 /* 308 /*
308 * Reload esp0. 309 * Reload esp0.
@@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
357 if (prev->gs | next->gs) 358 if (prev->gs | next->gs)
358 lazy_load_gs(next->gs); 359 lazy_load_gs(next->gs);
359 360
361 switch_fpu_finish(next_p, fpu);
362
360 percpu_write(current_task, next_p); 363 percpu_write(current_task, next_p);
361 364
362 return prev_p; 365 return prev_p;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 753e803f7197..1fd94bc4279d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,8 +386,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 386 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 387 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 388 unsigned fsindex, gsindex;
389 fpu_switch_t fpu;
389 390
390 __unlazy_fpu(prev_p); 391 fpu = switch_fpu_prepare(prev_p, next_p);
391 392
392 /* 393 /*
393 * Reload esp0, LDT and the page table pointer: 394 * Reload esp0, LDT and the page table pointer:
@@ -457,6 +458,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
457 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 458 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
458 prev->gsindex = gsindex; 459 prev->gsindex = gsindex;
459 460
461 switch_fpu_finish(next_p, fpu);
462
460 /* 463 /*
461 * Switch the PDA and FPU contexts. 464 * Switch the PDA and FPU contexts.
462 */ 465 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ad25e51f40c4..77da5b475ad2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -571,6 +571,37 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
571} 571}
572 572
573/* 573/*
574 * This gets called with the process already owning the
575 * FPU state, and with CR0.TS cleared. It just needs to
576 * restore the FPU register state.
577 */
578void __math_state_restore(struct task_struct *tsk)
579{
580 /* We need a safe address that is cheap to find and that is already
581 in L1. We've just brought in "tsk->thread.has_fpu", so use that */
582#define safe_address (tsk->thread.has_fpu)
583
584 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
585 is pending. Clear the x87 state here by setting it to fixed
586 values. safe_address is a random variable that should be in L1 */
587 alternative_input(
588 ASM_NOP8 ASM_NOP2,
589 "emms\n\t" /* clear stack tags */
590 "fildl %P[addr]", /* set F?P to defined value */
591 X86_FEATURE_FXSAVE_LEAK,
592 [addr] "m" (safe_address));
593
594 /*
595 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
596 */
597 if (unlikely(restore_fpu_checking(tsk))) {
598 __thread_fpu_end(tsk);
599 force_sig(SIGSEGV, tsk);
600 return;
601 }
602}
603
604/*
574 * 'math_state_restore()' saves the current math information in the 605 * 'math_state_restore()' saves the current math information in the
575 * old math state array, and gets the new ones from the current task 606 * old math state array, and gets the new ones from the current task
576 * 607 *
@@ -584,10 +615,6 @@ void math_state_restore(void)
584{ 615{
585 struct task_struct *tsk = current; 616 struct task_struct *tsk = current;
586 617
587 /* We need a safe address that is cheap to find and that is already
588 in L1. We're just bringing in "tsk->thread.has_fpu", so use that */
589#define safe_address (tsk->thread.has_fpu)
590
591 if (!tsk_used_math(tsk)) { 618 if (!tsk_used_math(tsk)) {
592 local_irq_enable(); 619 local_irq_enable();
593 /* 620 /*
@@ -604,25 +631,7 @@ void math_state_restore(void)
604 } 631 }
605 632
606 __thread_fpu_begin(tsk); 633 __thread_fpu_begin(tsk);
607 634 __math_state_restore(tsk);
608 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
609 is pending. Clear the x87 state here by setting it to fixed
610 values. safe_address is a random variable that should be in L1 */
611 alternative_input(
612 ASM_NOP8 ASM_NOP2,
613 "emms\n\t" /* clear stack tags */
614 "fildl %P[addr]", /* set F?P to defined value */
615 X86_FEATURE_FXSAVE_LEAK,
616 [addr] "m" (safe_address));
617
618 /*
619 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
620 */
621 if (unlikely(restore_fpu_checking(tsk))) {
622 __thread_fpu_end(tsk);
623 force_sig(SIGSEGV, tsk);
624 return;
625 }
626 635
627 tsk->fpu_counter++; 636 tsk->fpu_counter++;
628} 637}