aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-24 03:45:26 -0400
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-06-17 16:21:25 -0400
commit2fcddce10f6771cfa0c56fd1e826d50d67d100b7 (patch)
tree08a1a21922509b9ba3b896dfc1951a6d0ab0d8f4 /arch
parente6e9cac8c3417b43498b243c1f8f11780e157168 (diff)
x86-32: make sure clts is batched during context switch
If we're preloading the fpu state during context switch, make sure the clts happens while we're batching the cpu context update, then do the actual __math_state_restore once the updates are flushed. This allows more efficient context switches when running paravirtualized, as all the hypercalls can be folded together into one. [ Impact: optimise paravirtual FPU context switch ] Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/process_32.c27
1 files changed, 16 insertions, 11 deletions
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..a80eddd41658 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -350,14 +350,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
350 *next = &next_p->thread; 350 *next = &next_p->thread;
351 int cpu = smp_processor_id(); 351 int cpu = smp_processor_id();
352 struct tss_struct *tss = &per_cpu(init_tss, cpu); 352 struct tss_struct *tss = &per_cpu(init_tss, cpu);
353 bool preload_fpu;
353 354
354 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 355 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
355 356
356 __unlazy_fpu(prev_p); 357 /*
358 * If the task has used fpu the last 5 timeslices, just do a full
359 * restore of the math state immediately to avoid the trap; the
360 * chances of needing FPU soon are obviously high now
361 */
362 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
357 363
364 __unlazy_fpu(prev_p);
358 365
359 /* we're going to use this soon, after a few expensive things */ 366 /* we're going to use this soon, after a few expensive things */
360 if (next_p->fpu_counter > 5) 367 if (preload_fpu)
361 prefetch(next->xstate); 368 prefetch(next->xstate);
362 369
363 /* 370 /*
@@ -398,6 +405,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
398 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 405 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
399 __switch_to_xtra(prev_p, next_p, tss); 406 __switch_to_xtra(prev_p, next_p, tss);
400 407
408 /* If we're going to preload the fpu context, make sure clts
409 is run while we're batching the cpu state updates. */
410 if (preload_fpu)
411 clts();
412
401 /* 413 /*
402 * Leave lazy mode, flushing any hypercalls made here. 414 * Leave lazy mode, flushing any hypercalls made here.
403 * This must be done before restoring TLS segments so 415 * This must be done before restoring TLS segments so
@@ -407,15 +419,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 */ 419 */
408 arch_end_context_switch(next_p); 420 arch_end_context_switch(next_p);
409 421
410 /* If the task has used fpu the last 5 timeslices, just do a full 422 if (preload_fpu)
411 * restore of the math state immediately to avoid the trap; the 423 __math_state_restore();
412 * chances of needing FPU soon are obviously high now
413 *
414 * tsk_used_math() checks prevent calling math_state_restore(),
415 * which can sleep in the case of !tsk_used_math()
416 */
417 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
418 math_state_restore();
419 424
420 /* 425 /*
421 * Restore %gs if needed (which is common) 426 * Restore %gs if needed (which is common)