diff options
| -rw-r--r-- | arch/x86/include/asm/i387.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 27 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 33 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 33 | 
4 files changed, 61 insertions, 33 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index fb7f0d64e14f..0b20bbb758f2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h  | |||
| @@ -26,6 +26,7 @@ extern void fpu_init(void); | |||
| 26 | extern void mxcsr_feature_mask_init(void); | 26 | extern void mxcsr_feature_mask_init(void); | 
| 27 | extern int init_fpu(struct task_struct *child); | 27 | extern int init_fpu(struct task_struct *child); | 
| 28 | extern asmlinkage void math_state_restore(void); | 28 | extern asmlinkage void math_state_restore(void); | 
| 29 | extern void __math_state_restore(void); | ||
| 29 | extern void init_thread_xstate(void); | 30 | extern void init_thread_xstate(void); | 
| 30 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 31 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 
| 31 | 32 | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984af..a80eddd41658 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c  | |||
| @@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 350 | *next = &next_p->thread; | 350 | *next = &next_p->thread; | 
| 351 | int cpu = smp_processor_id(); | 351 | int cpu = smp_processor_id(); | 
| 352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 
| 353 | bool preload_fpu; | ||
| 353 | 354 | ||
| 354 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 355 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 
| 355 | 356 | ||
| 356 | __unlazy_fpu(prev_p); | 357 | /* | 
| 358 | * If the task has used fpu the last 5 timeslices, just do a full | ||
| 359 | * restore of the math state immediately to avoid the trap; the | ||
| 360 | * chances of needing FPU soon are obviously high now | ||
| 361 | */ | ||
| 362 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
| 357 | 363 | ||
| 364 | __unlazy_fpu(prev_p); | ||
| 358 | 365 | ||
| 359 | /* we're going to use this soon, after a few expensive things */ | 366 | /* we're going to use this soon, after a few expensive things */ | 
| 360 | if (next_p->fpu_counter > 5) | 367 | if (preload_fpu) | 
| 361 | prefetch(next->xstate); | 368 | prefetch(next->xstate); | 
| 362 | 369 | ||
| 363 | /* | 370 | /* | 
| @@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 398 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 405 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 
| 399 | __switch_to_xtra(prev_p, next_p, tss); | 406 | __switch_to_xtra(prev_p, next_p, tss); | 
| 400 | 407 | ||
| 408 | /* If we're going to preload the fpu context, make sure clts | ||
| 409 | is run while we're batching the cpu state updates. */ | ||
| 410 | if (preload_fpu) | ||
| 411 | clts(); | ||
| 412 | |||
| 401 | /* | 413 | /* | 
| 402 | * Leave lazy mode, flushing any hypercalls made here. | 414 | * Leave lazy mode, flushing any hypercalls made here. | 
| 403 | * This must be done before restoring TLS segments so | 415 | * This must be done before restoring TLS segments so | 
| @@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 407 | */ | 419 | */ | 
| 408 | arch_end_context_switch(next_p); | 420 | arch_end_context_switch(next_p); | 
| 409 | 421 | ||
| 410 | /* If the task has used fpu the last 5 timeslices, just do a full | 422 | if (preload_fpu) | 
| 411 | * restore of the math state immediately to avoid the trap; the | 423 | __math_state_restore(); | 
| 412 | * chances of needing FPU soon are obviously high now | ||
| 413 | * | ||
| 414 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
| 415 | * which can sleep in the case of !tsk_used_math() | ||
| 416 | */ | ||
| 417 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | ||
| 418 | math_state_restore(); | ||
| 419 | 424 | ||
| 420 | /* | 425 | /* | 
| 421 | * Restore %gs if needed (which is common) | 426 | * Restore %gs if needed (which is common) | 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9d..a28279dbb07c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c  | |||
| @@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 386 | int cpu = smp_processor_id(); | 386 | int cpu = smp_processor_id(); | 
| 387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 
| 388 | unsigned fsindex, gsindex; | 388 | unsigned fsindex, gsindex; | 
| 389 | bool preload_fpu; | ||
| 390 | |||
| 391 | /* | ||
| 392 | * If the task has used fpu the last 5 timeslices, just do a full | ||
| 393 | * restore of the math state immediately to avoid the trap; the | ||
| 394 | * chances of needing FPU soon are obviously high now | ||
| 395 | */ | ||
| 396 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
| 389 | 397 | ||
| 390 | /* we're going to use this soon, after a few expensive things */ | 398 | /* we're going to use this soon, after a few expensive things */ | 
| 391 | if (next_p->fpu_counter > 5) | 399 | if (preload_fpu) | 
| 392 | prefetch(next->xstate); | 400 | prefetch(next->xstate); | 
| 393 | 401 | ||
| 394 | /* | 402 | /* | 
| @@ -419,6 +427,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 419 | 427 | ||
| 420 | load_TLS(next, cpu); | 428 | load_TLS(next, cpu); | 
| 421 | 429 | ||
| 430 | /* Must be after DS reload */ | ||
| 431 | unlazy_fpu(prev_p); | ||
| 432 | |||
| 433 | /* Make sure cpu is ready for new context */ | ||
| 434 | if (preload_fpu) | ||
| 435 | clts(); | ||
| 436 | |||
| 422 | /* | 437 | /* | 
| 423 | * Leave lazy mode, flushing any hypercalls made here. | 438 | * Leave lazy mode, flushing any hypercalls made here. | 
| 424 | * This must be done before restoring TLS segments so | 439 | * This must be done before restoring TLS segments so | 
| @@ -459,9 +474,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 459 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 474 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 
| 460 | prev->gsindex = gsindex; | 475 | prev->gsindex = gsindex; | 
| 461 | 476 | ||
| 462 | /* Must be after DS reload */ | ||
| 463 | unlazy_fpu(prev_p); | ||
| 464 | |||
| 465 | /* | 477 | /* | 
| 466 | * Switch the PDA and FPU contexts. | 478 | * Switch the PDA and FPU contexts. | 
| 467 | */ | 479 | */ | 
| @@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 480 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 492 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 
| 481 | __switch_to_xtra(prev_p, next_p, tss); | 493 | __switch_to_xtra(prev_p, next_p, tss); | 
| 482 | 494 | ||
| 483 | /* If the task has used fpu the last 5 timeslices, just do a full | 495 | /* | 
| 484 | * restore of the math state immediately to avoid the trap; the | 496 | * Preload the FPU context, now that we've determined that the | 
| 485 | * chances of needing FPU soon are obviously high now | 497 | * task is likely to be using it. | 
| 486 | * | ||
| 487 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
| 488 | * which can sleep in the case of !tsk_used_math() | ||
| 489 | */ | 498 | */ | 
| 490 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | 499 | if (preload_fpu) | 
| 491 | math_state_restore(); | 500 | __math_state_restore(); | 
| 492 | return prev_p; | 501 | return prev_p; | 
| 493 | } | 502 | } | 
| 494 | 503 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6fe85c272a2b..83264922a878 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c  | |||
| @@ -795,6 +795,28 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | |||
| 795 | } | 795 | } | 
| 796 | 796 | ||
| 797 | /* | 797 | /* | 
| 798 | * __math_state_restore assumes that cr0.TS is already clear and the | ||
| 799 | * fpu state is all ready for use. Used during context switch. | ||
| 800 | */ | ||
| 801 | void __math_state_restore(void) | ||
| 802 | { | ||
| 803 | struct thread_info *thread = current_thread_info(); | ||
| 804 | struct task_struct *tsk = thread->task; | ||
| 805 | |||
| 806 | /* | ||
| 807 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
| 808 | */ | ||
| 809 | if (unlikely(restore_fpu_checking(tsk))) { | ||
| 810 | stts(); | ||
| 811 | force_sig(SIGSEGV, tsk); | ||
| 812 | return; | ||
| 813 | } | ||
| 814 | |||
| 815 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
| 816 | tsk->fpu_counter++; | ||
| 817 | } | ||
| 818 | |||
| 819 | /* | ||
| 798 | * 'math_state_restore()' saves the current math information in the | 820 | * 'math_state_restore()' saves the current math information in the | 
| 799 | * old math state array, and gets the new ones from the current task | 821 | * old math state array, and gets the new ones from the current task | 
| 800 | * | 822 | * | 
| @@ -825,17 +847,8 @@ asmlinkage void math_state_restore(void) | |||
| 825 | } | 847 | } | 
| 826 | 848 | ||
| 827 | clts(); /* Allow maths ops (or we recurse) */ | 849 | clts(); /* Allow maths ops (or we recurse) */ | 
| 828 | /* | ||
| 829 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
| 830 | */ | ||
| 831 | if (unlikely(restore_fpu_checking(tsk))) { | ||
| 832 | stts(); | ||
| 833 | force_sig(SIGSEGV, tsk); | ||
| 834 | return; | ||
| 835 | } | ||
| 836 | 850 | ||
| 837 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 851 | __math_state_restore(); | 
| 838 | tsk->fpu_counter++; | ||
| 839 | } | 852 | } | 
| 840 | EXPORT_SYMBOL_GPL(math_state_restore); | 853 | EXPORT_SYMBOL_GPL(math_state_restore); | 
| 841 | 854 | ||
