aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-02-18 15:56:35 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2012-02-29 19:34:26 -0500
commitf4def3f88dc57648d1603656f1ffdf498bfce1ee (patch)
treea277f40828d1123325362c4d5823454de5e63e88 /arch/x86/include/asm
parent0a9d89d976531bd5ea7fce618cee886c79b43e07 (diff)
i387: re-introduce FPU state preloading at context switch time
commit 34ddc81a230b15c0e345b6b253049db731499f7e upstream. After all the FPU state cleanups and finally finding the problem that caused all our FPU save/restore problems, this re-introduces the preloading of FPU state that was removed in commit b3b0870ef3ff ("i387: do not preload FPU state at task switch time"). However, instead of simply reverting the removal, this reimplements preloading with several fixes, most notably - properly abstracted as a true FPU state switch, rather than as open-coded save and restore with various hacks. In particular, implementing it as a proper FPU state switch allows us to optimize the CR0.TS flag accesses: there is no reason to set the TS bit only to then almost immediately clear it again. CR0 accesses are quite slow and expensive, don't flip the bit back and forth for no good reason. - Make sure that the same model works for both x86-32 and x86-64, so that there are no gratuitous differences between the two due to the way they save and restore segment state differently due to architectural differences that really don't matter to the FPU state. - Avoid exposing the "preload" state to the context switch routines, and in particular allow the concept of lazy state restore: if nothing else has used the FPU in the meantime, and the process is still on the same CPU, we can avoid restoring state from memory entirely, just re-expose the state that is still in the FPU unit. That optimized lazy restore isn't actually implemented here, but the infrastructure is set up for it. Of course, older CPU's that use 'fnsave' to save the state cannot take advantage of this, since the state saving also trashes the state. In other words, there is now an actual _design_ to the FPU state saving, rather than just random historical baggage. Hopefully it's easier to follow as a result. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/i387.h110
1 files changed, 93 insertions, 17 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index f5376676f89..a850b4d8d14 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -29,6 +29,7 @@ extern unsigned int sig_xstate_size;
29extern void fpu_init(void); 29extern void fpu_init(void);
30extern void mxcsr_feature_mask_init(void); 30extern void mxcsr_feature_mask_init(void);
31extern int init_fpu(struct task_struct *child); 31extern int init_fpu(struct task_struct *child);
32extern void __math_state_restore(struct task_struct *);
32extern void math_state_restore(void); 33extern void math_state_restore(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 35
@@ -212,9 +213,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
212#endif /* CONFIG_X86_64 */ 213#endif /* CONFIG_X86_64 */
213 214
214/* 215/*
215 * These must be called with preempt disabled 216 * These must be called with preempt disabled. Returns
217 * 'true' if the FPU state is still intact.
216 */ 218 */
217static inline void fpu_save_init(struct fpu *fpu) 219static inline int fpu_save_init(struct fpu *fpu)
218{ 220{
219 if (use_xsave()) { 221 if (use_xsave()) {
220 fpu_xsave(fpu); 222 fpu_xsave(fpu);
@@ -223,22 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu)
223 * xsave header may indicate the init state of the FP. 225 * xsave header may indicate the init state of the FP.
224 */ 226 */
225 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 227 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
226 return; 228 return 1;
227 } else if (use_fxsr()) { 229 } else if (use_fxsr()) {
228 fpu_fxsave(fpu); 230 fpu_fxsave(fpu);
229 } else { 231 } else {
230 asm volatile("fnsave %[fx]; fwait" 232 asm volatile("fnsave %[fx]; fwait"
231 : [fx] "=m" (fpu->state->fsave)); 233 : [fx] "=m" (fpu->state->fsave));
232 return; 234 return 0;
233 } 235 }
234 236
235 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) 237 /*
238 * If exceptions are pending, we need to clear them so
239 * that we don't randomly get exceptions later.
240 *
241 * FIXME! Is this perhaps only true for the old-style
242 * irq13 case? Maybe we could leave the x87 state
243 * intact otherwise?
244 */
245 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
236 asm volatile("fnclex"); 246 asm volatile("fnclex");
247 return 0;
248 }
249 return 1;
237} 250}
238 251
239static inline void __save_init_fpu(struct task_struct *tsk) 252static inline int __save_init_fpu(struct task_struct *tsk)
240{ 253{
241 fpu_save_init(&tsk->thread.fpu); 254 return fpu_save_init(&tsk->thread.fpu);
242} 255}
243 256
244static inline int fpu_fxrstor_checking(struct fpu *fpu) 257static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -301,20 +314,79 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
301} 314}
302 315
303/* 316/*
304 * Signal frame handlers... 317 * FPU state switching for scheduling.
318 *
319 * This is a two-stage process:
320 *
321 * - switch_fpu_prepare() saves the old state and
322 * sets the new state of the CR0.TS bit. This is
323 * done within the context of the old process.
324 *
325 * - switch_fpu_finish() restores the new state as
326 * necessary.
305 */ 327 */
306extern int save_i387_xstate(void __user *buf); 328typedef struct { int preload; } fpu_switch_t;
307extern int restore_i387_xstate(void __user *buf); 329
330/*
331 * FIXME! We could do a totally lazy restore, but we need to
332 * add a per-cpu "this was the task that last touched the FPU
333 * on this CPU" variable, and the task needs to have a "I last
334 * touched the FPU on this CPU" and check them.
335 *
336 * We don't do that yet, so "fpu_lazy_restore()" always returns
337 * false, but some day..
338 */
339#define fpu_lazy_restore(tsk) (0)
340#define fpu_lazy_state_intact(tsk) do { } while (0)
341
342static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new)
343{
344 fpu_switch_t fpu;
345
346 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
347 if (__thread_has_fpu(old)) {
348 if (__save_init_fpu(old))
349 fpu_lazy_state_intact(old);
350 __thread_clear_has_fpu(old);
351 old->fpu_counter++;
352
353 /* Don't change CR0.TS if we just switch! */
354 if (fpu.preload) {
355 __thread_set_has_fpu(new);
356 prefetch(new->thread.fpu.state);
357 } else
358 stts();
359 } else {
360 old->fpu_counter = 0;
361 if (fpu.preload) {
362 if (fpu_lazy_restore(new))
363 fpu.preload = 0;
364 else
365 prefetch(new->thread.fpu.state);
366 __thread_fpu_begin(new);
367 }
368 }
369 return fpu;
370}
308 371
309static inline void __unlazy_fpu(struct task_struct *tsk) 372/*
373 * By the time this gets called, we've already cleared CR0.TS and
374 * given the process the FPU if we are going to preload the FPU
375 * state - all we need to do is to conditionally restore the register
376 * state itself.
377 */
378static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
310{ 379{
311 if (__thread_has_fpu(tsk)) { 380 if (fpu.preload)
312 __save_init_fpu(tsk); 381 __math_state_restore(new);
313 __thread_fpu_end(tsk);
314 } else
315 tsk->fpu_counter = 0;
316} 382}
317 383
384/*
385 * Signal frame handlers...
386 */
387extern int save_i387_xstate(void __user *buf);
388extern int restore_i387_xstate(void __user *buf);
389
318static inline void __clear_fpu(struct task_struct *tsk) 390static inline void __clear_fpu(struct task_struct *tsk)
319{ 391{
320 if (__thread_has_fpu(tsk)) { 392 if (__thread_has_fpu(tsk)) {
@@ -474,7 +546,11 @@ static inline void save_init_fpu(struct task_struct *tsk)
474static inline void unlazy_fpu(struct task_struct *tsk) 546static inline void unlazy_fpu(struct task_struct *tsk)
475{ 547{
476 preempt_disable(); 548 preempt_disable();
477 __unlazy_fpu(tsk); 549 if (__thread_has_fpu(tsk)) {
550 __save_init_fpu(tsk);
551 __thread_fpu_end(tsk);
552 } else
553 tsk->fpu_counter = 0;
478 preempt_enable(); 554 preempt_enable();
479} 555}
480 556