diff options
-rw-r--r-- | arch/x86/include/asm/i387.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 27 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 33 |
4 files changed, 61 insertions, 33 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index fb7f0d64e14f..0b20bbb758f2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -26,6 +26,7 @@ extern void fpu_init(void); | |||
26 | extern void mxcsr_feature_mask_init(void); | 26 | extern void mxcsr_feature_mask_init(void); |
27 | extern int init_fpu(struct task_struct *child); | 27 | extern int init_fpu(struct task_struct *child); |
28 | extern asmlinkage void math_state_restore(void); | 28 | extern asmlinkage void math_state_restore(void); |
29 | extern void __math_state_restore(void); | ||
29 | extern void init_thread_xstate(void); | 30 | extern void init_thread_xstate(void); |
30 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 31 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
31 | 32 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984af..a80eddd41658 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
350 | *next = &next_p->thread; | 350 | *next = &next_p->thread; |
351 | int cpu = smp_processor_id(); | 351 | int cpu = smp_processor_id(); |
352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
353 | bool preload_fpu; | ||
353 | 354 | ||
354 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 355 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
355 | 356 | ||
356 | __unlazy_fpu(prev_p); | 357 | /* |
358 | * If the task has used fpu the last 5 timeslices, just do a full | ||
359 | * restore of the math state immediately to avoid the trap; the | ||
360 | * chances of needing FPU soon are obviously high now | ||
361 | */ | ||
362 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
357 | 363 | ||
364 | __unlazy_fpu(prev_p); | ||
358 | 365 | ||
359 | /* we're going to use this soon, after a few expensive things */ | 366 | /* we're going to use this soon, after a few expensive things */ |
360 | if (next_p->fpu_counter > 5) | 367 | if (preload_fpu) |
361 | prefetch(next->xstate); | 368 | prefetch(next->xstate); |
362 | 369 | ||
363 | /* | 370 | /* |
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
398 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 405 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
399 | __switch_to_xtra(prev_p, next_p, tss); | 406 | __switch_to_xtra(prev_p, next_p, tss); |
400 | 407 | ||
408 | /* If we're going to preload the fpu context, make sure clts | ||
409 | is run while we're batching the cpu state updates. */ | ||
410 | if (preload_fpu) | ||
411 | clts(); | ||
412 | |||
401 | /* | 413 | /* |
402 | * Leave lazy mode, flushing any hypercalls made here. | 414 | * Leave lazy mode, flushing any hypercalls made here. |
403 | * This must be done before restoring TLS segments so | 415 | * This must be done before restoring TLS segments so |
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
407 | */ | 419 | */ |
408 | arch_end_context_switch(next_p); | 420 | arch_end_context_switch(next_p); |
409 | 421 | ||
410 | /* If the task has used fpu the last 5 timeslices, just do a full | 422 | if (preload_fpu) |
411 | * restore of the math state immediately to avoid the trap; the | 423 | __math_state_restore(); |
412 | * chances of needing FPU soon are obviously high now | ||
413 | * | ||
414 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
415 | * which can sleep in the case of !tsk_used_math() | ||
416 | */ | ||
417 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | ||
418 | math_state_restore(); | ||
419 | 424 | ||
420 | /* | 425 | /* |
421 | * Restore %gs if needed (which is common) | 426 | * Restore %gs if needed (which is common) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9d..a28279dbb07c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
386 | int cpu = smp_processor_id(); | 386 | int cpu = smp_processor_id(); |
387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
388 | unsigned fsindex, gsindex; | 388 | unsigned fsindex, gsindex; |
389 | bool preload_fpu; | ||
390 | |||
391 | /* | ||
392 | * If the task has used fpu the last 5 timeslices, just do a full | ||
393 | * restore of the math state immediately to avoid the trap; the | ||
394 | * chances of needing FPU soon are obviously high now | ||
395 | */ | ||
396 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
389 | 397 | ||
390 | /* we're going to use this soon, after a few expensive things */ | 398 | /* we're going to use this soon, after a few expensive things */ |
391 | if (next_p->fpu_counter > 5) | 399 | if (preload_fpu) |
392 | prefetch(next->xstate); | 400 | prefetch(next->xstate); |
393 | 401 | ||
394 | /* | 402 | /* |
@@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
419 | 427 | ||
420 | load_TLS(next, cpu); | 428 | load_TLS(next, cpu); |
421 | 429 | ||
430 | /* Must be after DS reload */ | ||
431 | unlazy_fpu(prev_p); | ||
432 | |||
433 | /* Make sure cpu is ready for new context */ | ||
434 | if (preload_fpu) | ||
435 | clts(); | ||
436 | |||
422 | /* | 437 | /* |
423 | * Leave lazy mode, flushing any hypercalls made here. | 438 | * Leave lazy mode, flushing any hypercalls made here. |
424 | * This must be done before restoring TLS segments so | 439 | * This must be done before restoring TLS segments so |
@@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
459 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 474 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
460 | prev->gsindex = gsindex; | 475 | prev->gsindex = gsindex; |
461 | 476 | ||
462 | /* Must be after DS reload */ | ||
463 | unlazy_fpu(prev_p); | ||
464 | |||
465 | /* | 477 | /* |
466 | * Switch the PDA and FPU contexts. | 478 | * Switch the PDA and FPU contexts. |
467 | */ | 479 | */ |
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
480 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 492 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
481 | __switch_to_xtra(prev_p, next_p, tss); | 493 | __switch_to_xtra(prev_p, next_p, tss); |
482 | 494 | ||
483 | /* If the task has used fpu the last 5 timeslices, just do a full | 495 | /* |
484 | * restore of the math state immediately to avoid the trap; the | 496 | * Preload the FPU context, now that we've determined that the |
485 | * chances of needing FPU soon are obviously high now | 497 | * task is likely to be using it. |
486 | * | ||
487 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
488 | * which can sleep in the case of !tsk_used_math() | ||
489 | */ | 498 | */ |
490 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | 499 | if (preload_fpu) |
491 | math_state_restore(); | 500 | __math_state_restore(); |
492 | return prev_p; | 501 | return prev_p; |
493 | } | 502 | } |
494 | 503 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6fe85c272a2b..83264922a878 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -795,6 +795,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | |||
795 | } | 795 | } |
796 | 796 | ||
797 | /* | 797 | /* |
798 | * __math_state_restore assumes that cr0.TS is already clear and the | ||
799 | * fpu state is all ready for use. Used during context switch. | ||
800 | */ | ||
801 | void __math_state_restore(void) | ||
802 | { | ||
803 | struct thread_info *thread = current_thread_info(); | ||
804 | struct task_struct *tsk = thread->task; | ||
805 | |||
806 | /* | ||
807 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
808 | */ | ||
809 | if (unlikely(restore_fpu_checking(tsk))) { | ||
810 | stts(); | ||
811 | force_sig(SIGSEGV, tsk); | ||
812 | return; | ||
813 | } | ||
814 | |||
815 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
816 | tsk->fpu_counter++; | ||
817 | } | ||
818 | |||
819 | /* | ||
798 | * 'math_state_restore()' saves the current math information in the | 820 | * 'math_state_restore()' saves the current math information in the |
799 | * old math state array, and gets the new ones from the current task | 821 | * old math state array, and gets the new ones from the current task |
800 | * | 822 | * |
@@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void) | |||
825 | } | 847 | } |
826 | 848 | ||
827 | clts(); /* Allow maths ops (or we recurse) */ | 849 | clts(); /* Allow maths ops (or we recurse) */ |
828 | /* | ||
829 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
830 | */ | ||
831 | if (unlikely(restore_fpu_checking(tsk))) { | ||
832 | stts(); | ||
833 | force_sig(SIGSEGV, tsk); | ||
834 | return; | ||
835 | } | ||
836 | 850 | ||
837 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 851 | __math_state_restore(); |
838 | tsk->fpu_counter++; | ||
839 | } | 852 | } |
840 | EXPORT_SYMBOL_GPL(math_state_restore); | 853 | EXPORT_SYMBOL_GPL(math_state_restore); |
841 | 854 | ||