aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 10:58:08 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 10:58:08 -0400
commit625037cc405eabbfd2a39e9297e583a94886225f (patch)
treec0a62f6cb2a565efb7cd494f558f0acbadcbabb0
parent8fafa0a789faaff4318cbfa9c2f827d2198505dc (diff)
parent17950c5b243f99cbabef173415ee988c52104d7e (diff)
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-64: move clts into batch cpu state updates when preloading fpu x86-64: move unlazy_fpu() into lazy cpu state part of context switch x86-32: make sure clts is batched during context switch x86: split out core __math_state_restore
-rw-r--r--arch/x86/include/asm/i387.h1
-rw-r--r--arch/x86/kernel/process_32.c27
-rw-r--r--arch/x86/kernel/process_64.c33
-rw-r--r--arch/x86/kernel/traps.c33
4 files changed, 61 insertions, 33 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index fb7f0d64e14f..0b20bbb758f2 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -26,6 +26,7 @@ extern void fpu_init(void);
26extern void mxcsr_feature_mask_init(void); 26extern void mxcsr_feature_mask_init(void);
27extern int init_fpu(struct task_struct *child); 27extern int init_fpu(struct task_struct *child);
28extern asmlinkage void math_state_restore(void); 28extern asmlinkage void math_state_restore(void);
29extern void __math_state_restore(void);
29extern void init_thread_xstate(void); 30extern void init_thread_xstate(void);
30extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 31extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
31 32
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..a80eddd41658 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
350 *next = &next_p->thread; 350 *next = &next_p->thread;
351 int cpu = smp_processor_id(); 351 int cpu = smp_processor_id();
352 struct tss_struct *tss = &per_cpu(init_tss, cpu); 352 struct tss_struct *tss = &per_cpu(init_tss, cpu);
353 bool preload_fpu;
353 354
354 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 355 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
355 356
356 __unlazy_fpu(prev_p); 357 /*
358 * If the task has used fpu the last 5 timeslices, just do a full
359 * restore of the math state immediately to avoid the trap; the
360 * chances of needing FPU soon are obviously high now
361 */
362 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
357 363
364 __unlazy_fpu(prev_p);
358 365
359 /* we're going to use this soon, after a few expensive things */ 366 /* we're going to use this soon, after a few expensive things */
360 if (next_p->fpu_counter > 5) 367 if (preload_fpu)
361 prefetch(next->xstate); 368 prefetch(next->xstate);
362 369
363 /* 370 /*
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
398 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 405 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
399 __switch_to_xtra(prev_p, next_p, tss); 406 __switch_to_xtra(prev_p, next_p, tss);
400 407
408 /* If we're going to preload the fpu context, make sure clts
409 is run while we're batching the cpu state updates. */
410 if (preload_fpu)
411 clts();
412
401 /* 413 /*
402 * Leave lazy mode, flushing any hypercalls made here. 414 * Leave lazy mode, flushing any hypercalls made here.
403 * This must be done before restoring TLS segments so 415 * This must be done before restoring TLS segments so
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 */ 419 */
408 arch_end_context_switch(next_p); 420 arch_end_context_switch(next_p);
409 421
410 /* If the task has used fpu the last 5 timeslices, just do a full 422 if (preload_fpu)
411 * restore of the math state immediately to avoid the trap; the 423 __math_state_restore();
412 * chances of needing FPU soon are obviously high now
413 *
414 * tsk_used_math() checks prevent calling math_state_restore(),
415 * which can sleep in the case of !tsk_used_math()
416 */
417 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
418 math_state_restore();
419 424
420 /* 425 /*
421 * Restore %gs if needed (which is common) 426 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..a28279dbb07c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 386 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 387 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 388 unsigned fsindex, gsindex;
389 bool preload_fpu;
390
391 /*
392 * If the task has used fpu the last 5 timeslices, just do a full
393 * restore of the math state immediately to avoid the trap; the
394 * chances of needing FPU soon are obviously high now
395 */
396 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
389 397
390 /* we're going to use this soon, after a few expensive things */ 398 /* we're going to use this soon, after a few expensive things */
391 if (next_p->fpu_counter > 5) 399 if (preload_fpu)
392 prefetch(next->xstate); 400 prefetch(next->xstate);
393 401
394 /* 402 /*
@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
419 427
420 load_TLS(next, cpu); 428 load_TLS(next, cpu);
421 429
430 /* Must be after DS reload */
431 unlazy_fpu(prev_p);
432
433 /* Make sure cpu is ready for new context */
434 if (preload_fpu)
435 clts();
436
422 /* 437 /*
423 * Leave lazy mode, flushing any hypercalls made here. 438 * Leave lazy mode, flushing any hypercalls made here.
424 * This must be done before restoring TLS segments so 439 * This must be done before restoring TLS segments so
@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
459 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 474 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
460 prev->gsindex = gsindex; 475 prev->gsindex = gsindex;
461 476
462 /* Must be after DS reload */
463 unlazy_fpu(prev_p);
464
465 /* 477 /*
466 * Switch the PDA and FPU contexts. 478 * Switch the PDA and FPU contexts.
467 */ 479 */
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
480 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 492 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
481 __switch_to_xtra(prev_p, next_p, tss); 493 __switch_to_xtra(prev_p, next_p, tss);
482 494
483 /* If the task has used fpu the last 5 timeslices, just do a full 495 /*
484 * restore of the math state immediately to avoid the trap; the 496 * Preload the FPU context, now that we've determined that the
485 * chances of needing FPU soon are obviously high now 497 * task is likely to be using it.
486 *
487 * tsk_used_math() checks prevent calling math_state_restore(),
488 * which can sleep in the case of !tsk_used_math()
489 */ 498 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 499 if (preload_fpu)
491 math_state_restore(); 500 __math_state_restore();
492 return prev_p; 501 return prev_p;
493} 502}
494 503
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6fe85c272a2b..83264922a878 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -795,6 +795,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
795} 795}
796 796
797/* 797/*
798 * __math_state_restore assumes that cr0.TS is already clear and the
799 * fpu state is all ready for use. Used during context switch.
800 */
801void __math_state_restore(void)
802{
803 struct thread_info *thread = current_thread_info();
804 struct task_struct *tsk = thread->task;
805
806 /*
807 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
808 */
809 if (unlikely(restore_fpu_checking(tsk))) {
810 stts();
811 force_sig(SIGSEGV, tsk);
812 return;
813 }
814
815 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
816 tsk->fpu_counter++;
817}
818
819/*
798 * 'math_state_restore()' saves the current math information in the 820 * 'math_state_restore()' saves the current math information in the
799 * old math state array, and gets the new ones from the current task 821 * old math state array, and gets the new ones from the current task
800 * 822 *
@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void)
825 } 847 }
826 848
827 clts(); /* Allow maths ops (or we recurse) */ 849 clts(); /* Allow maths ops (or we recurse) */
828 /*
829 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
830 */
831 if (unlikely(restore_fpu_checking(tsk))) {
832 stts();
833 force_sig(SIGSEGV, tsk);
834 return;
835 }
836 850
837 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 851 __math_state_restore();
838 tsk->fpu_counter++;
839} 852}
840EXPORT_SYMBOL_GPL(math_state_restore); 853EXPORT_SYMBOL_GPL(math_state_restore);
841 854