aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-24 04:01:01 -0400
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-06-17 16:27:58 -0400
commit17950c5b243f99cbabef173415ee988c52104d7e (patch)
treef2df6943769456b789b228243544a0307544f926 /arch/x86
parent16d9dbf0c2bd167fdd942b83592d59696c7b73bd (diff)
x86-64: move clts into batch cpu state updates when preloading fpu
When a task is likely to be using the fpu, we preload its state during the context switch, rather than waiting for it to run an fpu instruction. Make sure the clts() happens while we're doing batched fpu state updates to optimise paravirtualized context switches. [ Impact: optimise paravirtual FPU context switch ] Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/process_64.c27
1 files changed, 18 insertions, 9 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c9b8904736db..a28279dbb07c 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -386,9 +386,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 386 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 387 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 388 unsigned fsindex, gsindex;
389 bool preload_fpu;
390
391 /*
392 * If the task has used fpu the last 5 timeslices, just do a full
393 * restore of the math state immediately to avoid the trap; the
394 * chances of needing FPU soon are obviously high now
395 */
396 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
389 397
390 /* we're going to use this soon, after a few expensive things */ 398 /* we're going to use this soon, after a few expensive things */
391 if (next_p->fpu_counter > 5) 399 if (preload_fpu)
392 prefetch(next->xstate); 400 prefetch(next->xstate);
393 401
394 /* 402 /*
@@ -422,6 +430,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
422 /* Must be after DS reload */ 430 /* Must be after DS reload */
423 unlazy_fpu(prev_p); 431 unlazy_fpu(prev_p);
424 432
433 /* Make sure cpu is ready for new context */
434 if (preload_fpu)
435 clts();
436
425 /* 437 /*
426 * Leave lazy mode, flushing any hypercalls made here. 438 * Leave lazy mode, flushing any hypercalls made here.
427 * This must be done before restoring TLS segments so 439 * This must be done before restoring TLS segments so
@@ -480,15 +492,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
480 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 492 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
481 __switch_to_xtra(prev_p, next_p, tss); 493 __switch_to_xtra(prev_p, next_p, tss);
482 494
483 /* If the task has used fpu the last 5 timeslices, just do a full 495 /*
484 * restore of the math state immediately to avoid the trap; the 496 * Preload the FPU context, now that we've determined that the
485 * chances of needing FPU soon are obviously high now 497 * task is likely to be using it.
486 *
487 * tsk_used_math() checks prevent calling math_state_restore(),
488 * which can sleep in the case of !tsk_used_math()
489 */ 498 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 499 if (preload_fpu)
491 math_state_restore(); 500 __math_state_restore();
492 return prev_p; 501 return prev_p;
493} 502}
494 503