diff options
-rw-r--r-- | arch/x86/include/asm/i387.h | 35 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 2 |
5 files changed, 29 insertions, 15 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 74c607b37e87..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -32,6 +32,8 @@ extern int init_fpu(struct task_struct *child); | |||
32 | extern void math_state_restore(void); | 32 | extern void math_state_restore(void); |
33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
34 | 34 | ||
35 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
36 | |||
35 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 37 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
36 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 38 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
37 | xstateregs_get; | 39 | xstateregs_get; |
@@ -276,7 +278,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
276 | "emms\n\t" /* clear stack tags */ | 278 | "emms\n\t" /* clear stack tags */ |
277 | "fildl %P[addr]", /* set F?P to defined value */ | 279 | "fildl %P[addr]", /* set F?P to defined value */ |
278 | X86_FEATURE_FXSAVE_LEAK, | 280 | X86_FEATURE_FXSAVE_LEAK, |
279 | [addr] "m" (tsk->thread.has_fpu)); | 281 | [addr] "m" (tsk->thread.fpu.has_fpu)); |
280 | 282 | ||
281 | return fpu_restore_checking(&tsk->thread.fpu); | 283 | return fpu_restore_checking(&tsk->thread.fpu); |
282 | } | 284 | } |
@@ -288,19 +290,21 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
288 | */ | 290 | */ |
289 | static inline int __thread_has_fpu(struct task_struct *tsk) | 291 | static inline int __thread_has_fpu(struct task_struct *tsk) |
290 | { | 292 | { |
291 | return tsk->thread.has_fpu; | 293 | return tsk->thread.fpu.has_fpu; |
292 | } | 294 | } |
293 | 295 | ||
294 | /* Must be paired with an 'stts' after! */ | 296 | /* Must be paired with an 'stts' after! */ |
295 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | 297 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) |
296 | { | 298 | { |
297 | tsk->thread.has_fpu = 0; | 299 | tsk->thread.fpu.has_fpu = 0; |
300 | percpu_write(fpu_owner_task, NULL); | ||
298 | } | 301 | } |
299 | 302 | ||
300 | /* Must be paired with a 'clts' before! */ | 303 | /* Must be paired with a 'clts' before! */ |
301 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | 304 | static inline void __thread_set_has_fpu(struct task_struct *tsk) |
302 | { | 305 | { |
303 | tsk->thread.has_fpu = 1; | 306 | tsk->thread.fpu.has_fpu = 1; |
307 | percpu_write(fpu_owner_task, tsk); | ||
304 | } | 308 | } |
305 | 309 | ||
306 | /* | 310 | /* |
@@ -345,18 +349,22 @@ typedef struct { int preload; } fpu_switch_t; | |||
345 | * We don't do that yet, so "fpu_lazy_restore()" always returns | 349 | * We don't do that yet, so "fpu_lazy_restore()" always returns |
346 | * false, but some day.. | 350 | * false, but some day.. |
347 | */ | 351 | */ |
348 | #define fpu_lazy_restore(tsk) (0) | 352 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) |
349 | #define fpu_lazy_state_intact(tsk) do { } while (0) | 353 | { |
354 | return new == percpu_read_stable(fpu_owner_task) && | ||
355 | cpu == new->thread.fpu.last_cpu; | ||
356 | } | ||
350 | 357 | ||
351 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) | 358 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
352 | { | 359 | { |
353 | fpu_switch_t fpu; | 360 | fpu_switch_t fpu; |
354 | 361 | ||
355 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | 362 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; |
356 | if (__thread_has_fpu(old)) { | 363 | if (__thread_has_fpu(old)) { |
357 | if (__save_init_fpu(old)) | 364 | if (!__save_init_fpu(old)) |
358 | fpu_lazy_state_intact(old); | 365 | cpu = ~0; |
359 | __thread_clear_has_fpu(old); | 366 | old->thread.fpu.last_cpu = cpu; |
367 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | ||
360 | 368 | ||
361 | /* Don't change CR0.TS if we just switch! */ | 369 | /* Don't change CR0.TS if we just switch! */ |
362 | if (fpu.preload) { | 370 | if (fpu.preload) { |
@@ -367,9 +375,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
367 | stts(); | 375 | stts(); |
368 | } else { | 376 | } else { |
369 | old->fpu_counter = 0; | 377 | old->fpu_counter = 0; |
378 | old->thread.fpu.last_cpu = ~0; | ||
370 | if (fpu.preload) { | 379 | if (fpu.preload) { |
371 | new->fpu_counter++; | 380 | new->fpu_counter++; |
372 | if (fpu_lazy_restore(new)) | 381 | if (fpu_lazy_restore(new, cpu)) |
373 | fpu.preload = 0; | 382 | fpu.preload = 0; |
374 | else | 383 | else |
375 | prefetch(new->thread.fpu.state); | 384 | prefetch(new->thread.fpu.state); |
@@ -463,8 +472,10 @@ static inline void kernel_fpu_begin(void) | |||
463 | __save_init_fpu(me); | 472 | __save_init_fpu(me); |
464 | __thread_clear_has_fpu(me); | 473 | __thread_clear_has_fpu(me); |
465 | /* We do 'stts()' in kernel_fpu_end() */ | 474 | /* We do 'stts()' in kernel_fpu_end() */ |
466 | } else | 475 | } else { |
476 | percpu_write(fpu_owner_task, NULL); | ||
467 | clts(); | 477 | clts(); |
478 | } | ||
468 | } | 479 | } |
469 | 480 | ||
470 | static inline void kernel_fpu_end(void) | 481 | static inline void kernel_fpu_end(void) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f7c89e231c6c..58545c97d071 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -374,6 +374,8 @@ union thread_xstate { | |||
374 | }; | 374 | }; |
375 | 375 | ||
376 | struct fpu { | 376 | struct fpu { |
377 | unsigned int last_cpu; | ||
378 | unsigned int has_fpu; | ||
377 | union thread_xstate *state; | 379 | union thread_xstate *state; |
378 | }; | 380 | }; |
379 | 381 | ||
@@ -454,7 +456,6 @@ struct thread_struct { | |||
454 | unsigned long trap_no; | 456 | unsigned long trap_no; |
455 | unsigned long error_code; | 457 | unsigned long error_code; |
456 | /* floating point and extended processor state */ | 458 | /* floating point and extended processor state */ |
457 | unsigned long has_fpu; | ||
458 | struct fpu fpu; | 459 | struct fpu fpu; |
459 | #ifdef CONFIG_X86_32 | 460 | #ifdef CONFIG_X86_32 |
460 | /* Virtual 86 mode info */ | 461 | /* Virtual 86 mode info */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d43cad74f166..b667148dfad7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1044,6 +1044,8 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = | |||
1044 | 1044 | ||
1045 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1045 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
1046 | 1046 | ||
1047 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
1048 | |||
1047 | /* | 1049 | /* |
1048 | * Special IST stacks which the CPU switches to when it calls | 1050 | * Special IST stacks which the CPU switches to when it calls |
1049 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | 1051 | * an IST-marked descriptor entry. Up to 7 stacks (hardware |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bc32761bc27a..c08d1ff12b7c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -304,7 +304,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
304 | 304 | ||
305 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 305 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
306 | 306 | ||
307 | fpu = switch_fpu_prepare(prev_p, next_p); | 307 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * Reload esp0. | 310 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8ad880b3bc1c..cfa5c90c01db 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -389,7 +389,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
389 | unsigned fsindex, gsindex; | 389 | unsigned fsindex, gsindex; |
390 | fpu_switch_t fpu; | 390 | fpu_switch_t fpu; |
391 | 391 | ||
392 | fpu = switch_fpu_prepare(prev_p, next_p); | 392 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
393 | 393 | ||
394 | /* | 394 | /* |
395 | * Reload esp0, LDT and the page table pointer: | 395 | * Reload esp0, LDT and the page table pointer: |