aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/i387.h175
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/thread_info.h2
3 files changed, 139 insertions, 40 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 01b115d86770..247904945d3f 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -32,6 +32,8 @@ extern int init_fpu(struct task_struct *child);
32extern void math_state_restore(void); 32extern void math_state_restore(void);
33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 33extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
34 34
35DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
36
35extern user_regset_active_fn fpregs_active, xfpregs_active; 37extern user_regset_active_fn fpregs_active, xfpregs_active;
36extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, 38extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
37 xstateregs_get; 39 xstateregs_get;
@@ -212,9 +214,10 @@ static inline void fpu_fxsave(struct fpu *fpu)
212#endif /* CONFIG_X86_64 */ 214#endif /* CONFIG_X86_64 */
213 215
214/* 216/*
215 * These must be called with preempt disabled 217 * These must be called with preempt disabled. Returns
218 * 'true' if the FPU state is still intact.
216 */ 219 */
217static inline void fpu_save_init(struct fpu *fpu) 220static inline int fpu_save_init(struct fpu *fpu)
218{ 221{
219 if (use_xsave()) { 222 if (use_xsave()) {
220 fpu_xsave(fpu); 223 fpu_xsave(fpu);
@@ -223,22 +226,33 @@ static inline void fpu_save_init(struct fpu *fpu)
223 * xsave header may indicate the init state of the FP. 226 * xsave header may indicate the init state of the FP.
224 */ 227 */
225 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) 228 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
226 return; 229 return 1;
227 } else if (use_fxsr()) { 230 } else if (use_fxsr()) {
228 fpu_fxsave(fpu); 231 fpu_fxsave(fpu);
229 } else { 232 } else {
230 asm volatile("fnsave %[fx]; fwait" 233 asm volatile("fnsave %[fx]; fwait"
231 : [fx] "=m" (fpu->state->fsave)); 234 : [fx] "=m" (fpu->state->fsave));
232 return; 235 return 0;
233 } 236 }
234 237
235 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) 238 /*
239 * If exceptions are pending, we need to clear them so
240 * that we don't randomly get exceptions later.
241 *
242 * FIXME! Is this perhaps only true for the old-style
243 * irq13 case? Maybe we could leave the x87 state
244 * intact otherwise?
245 */
246 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
236 asm volatile("fnclex"); 247 asm volatile("fnclex");
248 return 0;
249 }
250 return 1;
237} 251}
238 252
239static inline void __save_init_fpu(struct task_struct *tsk) 253static inline int __save_init_fpu(struct task_struct *tsk)
240{ 254{
241 fpu_save_init(&tsk->thread.fpu); 255 return fpu_save_init(&tsk->thread.fpu);
242} 256}
243 257
244static inline int fpu_fxrstor_checking(struct fpu *fpu) 258static inline int fpu_fxrstor_checking(struct fpu *fpu)
@@ -256,6 +270,16 @@ static inline int fpu_restore_checking(struct fpu *fpu)
256 270
257static inline int restore_fpu_checking(struct task_struct *tsk) 271static inline int restore_fpu_checking(struct task_struct *tsk)
258{ 272{
273 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
274 is pending. Clear the x87 state here by setting it to fixed
275 values. "m" is a random variable that should be in L1 */
276 alternative_input(
277 ASM_NOP8 ASM_NOP2,
278 "emms\n\t" /* clear stack tags */
279 "fildl %P[addr]", /* set F?P to defined value */
280 X86_FEATURE_FXSAVE_LEAK,
281 [addr] "m" (tsk->thread.fpu.has_fpu));
282
259 return fpu_restore_checking(&tsk->thread.fpu); 283 return fpu_restore_checking(&tsk->thread.fpu);
260} 284}
261 285
@@ -264,21 +288,23 @@ static inline int restore_fpu_checking(struct task_struct *tsk)
264 * be preemption protection *and* they need to be 288 * be preemption protection *and* they need to be
265 * properly paired with the CR0.TS changes! 289 * properly paired with the CR0.TS changes!
266 */ 290 */
267static inline int __thread_has_fpu(struct thread_info *ti) 291static inline int __thread_has_fpu(struct task_struct *tsk)
268{ 292{
269 return ti->status & TS_USEDFPU; 293 return tsk->thread.fpu.has_fpu;
270} 294}
271 295
272/* Must be paired with an 'stts' after! */ 296/* Must be paired with an 'stts' after! */
273static inline void __thread_clear_has_fpu(struct thread_info *ti) 297static inline void __thread_clear_has_fpu(struct task_struct *tsk)
274{ 298{
275 ti->status &= ~TS_USEDFPU; 299 tsk->thread.fpu.has_fpu = 0;
300 percpu_write(fpu_owner_task, NULL);
276} 301}
277 302
278/* Must be paired with a 'clts' before! */ 303/* Must be paired with a 'clts' before! */
279static inline void __thread_set_has_fpu(struct thread_info *ti) 304static inline void __thread_set_has_fpu(struct task_struct *tsk)
280{ 305{
281 ti->status |= TS_USEDFPU; 306 tsk->thread.fpu.has_fpu = 1;
307 percpu_write(fpu_owner_task, tsk);
282} 308}
283 309
284/* 310/*
@@ -288,41 +314,108 @@ static inline void __thread_set_has_fpu(struct thread_info *ti)
288 * These generally need preemption protection to work, 314 * These generally need preemption protection to work,
289 * do try to avoid using these on their own. 315 * do try to avoid using these on their own.
290 */ 316 */
291static inline void __thread_fpu_end(struct thread_info *ti) 317static inline void __thread_fpu_end(struct task_struct *tsk)
292{ 318{
293 __thread_clear_has_fpu(ti); 319 __thread_clear_has_fpu(tsk);
294 stts(); 320 stts();
295} 321}
296 322
297static inline void __thread_fpu_begin(struct thread_info *ti) 323static inline void __thread_fpu_begin(struct task_struct *tsk)
298{ 324{
299 clts(); 325 clts();
300 __thread_set_has_fpu(ti); 326 __thread_set_has_fpu(tsk);
301} 327}
302 328
303/* 329/*
304 * Signal frame handlers... 330 * FPU state switching for scheduling.
331 *
332 * This is a two-stage process:
333 *
334 * - switch_fpu_prepare() saves the old state and
335 * sets the new state of the CR0.TS bit. This is
336 * done within the context of the old process.
337 *
338 * - switch_fpu_finish() restores the new state as
339 * necessary.
305 */ 340 */
306extern int save_i387_xstate(void __user *buf); 341typedef struct { int preload; } fpu_switch_t;
307extern int restore_i387_xstate(void __user *buf);
308 342
309static inline void __unlazy_fpu(struct task_struct *tsk) 343/*
344 * FIXME! We could do a totally lazy restore, but we need to
345 * add a per-cpu "this was the task that last touched the FPU
346 * on this CPU" variable, and the task needs to have a "I last
347 * touched the FPU on this CPU" and check them.
348 *
349 * We don't do that yet, so "fpu_lazy_restore()" always returns
350 * false, but some day..
351 */
352static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
310{ 353{
311 if (__thread_has_fpu(task_thread_info(tsk))) { 354 return new == percpu_read_stable(fpu_owner_task) &&
312 __save_init_fpu(tsk); 355 cpu == new->thread.fpu.last_cpu;
313 __thread_fpu_end(task_thread_info(tsk)); 356}
314 } else 357
315 tsk->fpu_counter = 0; 358static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
359{
360 fpu_switch_t fpu;
361
362 fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
363 if (__thread_has_fpu(old)) {
364 if (!__save_init_fpu(old))
365 cpu = ~0;
366 old->thread.fpu.last_cpu = cpu;
367 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
368
369 /* Don't change CR0.TS if we just switch! */
370 if (fpu.preload) {
371 new->fpu_counter++;
372 __thread_set_has_fpu(new);
373 prefetch(new->thread.fpu.state);
374 } else
375 stts();
376 } else {
377 old->fpu_counter = 0;
378 old->thread.fpu.last_cpu = ~0;
379 if (fpu.preload) {
380 new->fpu_counter++;
381 if (fpu_lazy_restore(new, cpu))
382 fpu.preload = 0;
383 else
384 prefetch(new->thread.fpu.state);
385 __thread_fpu_begin(new);
386 }
387 }
388 return fpu;
389}
390
391/*
392 * By the time this gets called, we've already cleared CR0.TS and
393 * given the process the FPU if we are going to preload the FPU
394 * state - all we need to do is to conditionally restore the register
395 * state itself.
396 */
397static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
398{
399 if (fpu.preload) {
400 if (unlikely(restore_fpu_checking(new)))
401 __thread_fpu_end(new);
402 }
316} 403}
317 404
405/*
406 * Signal frame handlers...
407 */
408extern int save_i387_xstate(void __user *buf);
409extern int restore_i387_xstate(void __user *buf);
410
318static inline void __clear_fpu(struct task_struct *tsk) 411static inline void __clear_fpu(struct task_struct *tsk)
319{ 412{
320 if (__thread_has_fpu(task_thread_info(tsk))) { 413 if (__thread_has_fpu(tsk)) {
321 /* Ignore delayed exceptions from user space */ 414 /* Ignore delayed exceptions from user space */
322 asm volatile("1: fwait\n" 415 asm volatile("1: fwait\n"
323 "2:\n" 416 "2:\n"
324 _ASM_EXTABLE(1b, 2b)); 417 _ASM_EXTABLE(1b, 2b));
325 __thread_fpu_end(task_thread_info(tsk)); 418 __thread_fpu_end(tsk);
326 } 419 }
327} 420}
328 421
@@ -337,7 +430,7 @@ static inline void __clear_fpu(struct task_struct *tsk)
337 */ 430 */
338static inline bool interrupted_kernel_fpu_idle(void) 431static inline bool interrupted_kernel_fpu_idle(void)
339{ 432{
340 return !__thread_has_fpu(current_thread_info()) && 433 return !__thread_has_fpu(current) &&
341 (read_cr0() & X86_CR0_TS); 434 (read_cr0() & X86_CR0_TS);
342} 435}
343 436
@@ -371,16 +464,18 @@ static inline bool irq_fpu_usable(void)
371 464
372static inline void kernel_fpu_begin(void) 465static inline void kernel_fpu_begin(void)
373{ 466{
374 struct thread_info *me = current_thread_info(); 467 struct task_struct *me = current;
375 468
376 WARN_ON_ONCE(!irq_fpu_usable()); 469 WARN_ON_ONCE(!irq_fpu_usable());
377 preempt_disable(); 470 preempt_disable();
378 if (__thread_has_fpu(me)) { 471 if (__thread_has_fpu(me)) {
379 __save_init_fpu(me->task); 472 __save_init_fpu(me);
380 __thread_clear_has_fpu(me); 473 __thread_clear_has_fpu(me);
381 /* We do 'stts()' in kernel_fpu_end() */ 474 /* We do 'stts()' in kernel_fpu_end() */
382 } else 475 } else {
476 percpu_write(fpu_owner_task, NULL);
383 clts(); 477 clts();
478 }
384} 479}
385 480
386static inline void kernel_fpu_end(void) 481static inline void kernel_fpu_end(void)
@@ -441,13 +536,13 @@ static inline void irq_ts_restore(int TS_state)
441 */ 536 */
442static inline int user_has_fpu(void) 537static inline int user_has_fpu(void)
443{ 538{
444 return __thread_has_fpu(current_thread_info()); 539 return __thread_has_fpu(current);
445} 540}
446 541
447static inline void user_fpu_end(void) 542static inline void user_fpu_end(void)
448{ 543{
449 preempt_disable(); 544 preempt_disable();
450 __thread_fpu_end(current_thread_info()); 545 __thread_fpu_end(current);
451 preempt_enable(); 546 preempt_enable();
452} 547}
453 548
@@ -455,7 +550,7 @@ static inline void user_fpu_begin(void)
455{ 550{
456 preempt_disable(); 551 preempt_disable();
457 if (!user_has_fpu()) 552 if (!user_has_fpu())
458 __thread_fpu_begin(current_thread_info()); 553 __thread_fpu_begin(current);
459 preempt_enable(); 554 preempt_enable();
460} 555}
461 556
@@ -464,17 +559,21 @@ static inline void user_fpu_begin(void)
464 */ 559 */
465static inline void save_init_fpu(struct task_struct *tsk) 560static inline void save_init_fpu(struct task_struct *tsk)
466{ 561{
467 WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk))); 562 WARN_ON_ONCE(!__thread_has_fpu(tsk));
468 preempt_disable(); 563 preempt_disable();
469 __save_init_fpu(tsk); 564 __save_init_fpu(tsk);
470 __thread_fpu_end(task_thread_info(tsk)); 565 __thread_fpu_end(tsk);
471 preempt_enable(); 566 preempt_enable();
472} 567}
473 568
474static inline void unlazy_fpu(struct task_struct *tsk) 569static inline void unlazy_fpu(struct task_struct *tsk)
475{ 570{
476 preempt_disable(); 571 preempt_disable();
477 __unlazy_fpu(tsk); 572 if (__thread_has_fpu(tsk)) {
573 __save_init_fpu(tsk);
574 __thread_fpu_end(tsk);
575 } else
576 tsk->fpu_counter = 0;
478 preempt_enable(); 577 preempt_enable();
479} 578}
480 579
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index aa9088c26931..58545c97d071 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -374,6 +374,8 @@ union thread_xstate {
374}; 374};
375 375
376struct fpu { 376struct fpu {
377 unsigned int last_cpu;
378 unsigned int has_fpu;
377 union thread_xstate *state; 379 union thread_xstate *state;
378}; 380};
379 381
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index bc817cd8b443..cfd8144d5527 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -247,8 +247,6 @@ static inline struct thread_info *current_thread_info(void)
247 * ever touches our thread-synchronous status, so we don't 247 * ever touches our thread-synchronous status, so we don't
248 * have to worry about atomic accesses. 248 * have to worry about atomic accesses.
249 */ 249 */
250#define TS_USEDFPU 0x0001 /* FPU was used by this task
251 this quantum (SMP) */
252#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ 250#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
253#define TS_POLLING 0x0004 /* idle task polling need_resched, 251#define TS_POLLING 0x0004 /* idle task polling need_resched,
254 skip sending interrupt */ 252 skip sending interrupt */