aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/process_32.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-02-18 15:56:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-02-18 17:03:48 -0500
commit34ddc81a230b15c0e345b6b253049db731499f7e (patch)
tree0c3afd68071ec1a8a1d8724ef9a42ef845ecf402 /arch/x86/kernel/process_32.c
parentf94edacf998516ac9d849f7bc6949a703977a7f3 (diff)
i387: re-introduce FPU state preloading at context switch time
After all the FPU state cleanups and finally finding the problem that caused all our FPU save/restore problems, this re-introduces the preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: do not preload FPU state at task switch time"). However, instead of simply reverting the removal, this reimplements preloading with several fixes, most notably - properly abstracted as a true FPU state switch, rather than as open-coded save and restore with various hacks. In particular, implementing it as a proper FPU state switch allows us to optimize the CR0.TS flag accesses: there is no reason to set the TS bit only to then almost immediately clear it again. CR0 accesses are quite slow and expensive, don't flip the bit back and forth for no good reason. - Make sure that the same model works for both x86-32 and x86-64, so that there are no gratuitous differences between the two due to the way they save and restore segment state differently due to architectural differences that really don't matter to the FPU state. - Avoid exposing the "preload" state to the context switch routines, and in particular allow the concept of lazy state restore: if nothing else has used the FPU in the meantime, and the process is still on the same CPU, we can avoid restoring state from memory entirely, just re-expose the state that is still in the FPU unit. That optimized lazy restore isn't actually implemented here, but the infrastructure is set up for it. Of course, older CPU's that use 'fnsave' to save the state cannot take advantage of this, since the state saving also trashes the state. In other words, there is now an actual _design_ to the FPU state saving, rather than just random historical baggage. Hopefully it's easier to follow as a result. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86/kernel/process_32.c')
-rw-r--r--arch/x86/kernel/process_32.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 324cd722b447..80bfe1ab0031 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -299,10 +299,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 *next = &next_p->thread; 299 *next = &next_p->thread;
300 int cpu = smp_processor_id(); 300 int cpu = smp_processor_id();
301 struct tss_struct *tss = &per_cpu(init_tss, cpu); 301 struct tss_struct *tss = &per_cpu(init_tss, cpu);
302 fpu_switch_t fpu;
302 303
303 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 304 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
304 305
305 __unlazy_fpu(prev_p); 306 fpu = switch_fpu_prepare(prev_p, next_p);
306 307
307 /* 308 /*
308 * Reload esp0. 309 * Reload esp0.
@@ -357,6 +358,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
357 if (prev->gs | next->gs) 358 if (prev->gs | next->gs)
358 lazy_load_gs(next->gs); 359 lazy_load_gs(next->gs);
359 360
361 switch_fpu_finish(next_p, fpu);
362
360 percpu_write(current_task, next_p); 363 percpu_write(current_task, next_p);
361 364
362 return prev_p; 365 return prev_p;