diff options
| -rw-r--r-- | arch/x86/include/asm/i387.h | 35 | ||||
| -rw-r--r-- | arch/x86/include/asm/processor.h | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 2 |
5 files changed, 29 insertions, 15 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 74c607b37e87..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -32,6 +32,8 @@ extern int init_fpu(struct task_struct *child); | |||
| 32 | extern void math_state_restore(void); | 32 | extern void math_state_restore(void); |
| 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
| 34 | 34 | ||
| 35 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 36 | |||
| 35 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 37 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
| 36 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 38 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
| 37 | xstateregs_get; | 39 | xstateregs_get; |
| @@ -276,7 +278,7 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
| 276 | "emms\n\t" /* clear stack tags */ | 278 | "emms\n\t" /* clear stack tags */ |
| 277 | "fildl %P[addr]", /* set F?P to defined value */ | 279 | "fildl %P[addr]", /* set F?P to defined value */ |
| 278 | X86_FEATURE_FXSAVE_LEAK, | 280 | X86_FEATURE_FXSAVE_LEAK, |
| 279 | [addr] "m" (tsk->thread.has_fpu)); | 281 | [addr] "m" (tsk->thread.fpu.has_fpu)); |
| 280 | 282 | ||
| 281 | return fpu_restore_checking(&tsk->thread.fpu); | 283 | return fpu_restore_checking(&tsk->thread.fpu); |
| 282 | } | 284 | } |
| @@ -288,19 +290,21 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
| 288 | */ | 290 | */ |
| 289 | static inline int __thread_has_fpu(struct task_struct *tsk) | 291 | static inline int __thread_has_fpu(struct task_struct *tsk) |
| 290 | { | 292 | { |
| 291 | return tsk->thread.has_fpu; | 293 | return tsk->thread.fpu.has_fpu; |
| 292 | } | 294 | } |
| 293 | 295 | ||
| 294 | /* Must be paired with an 'stts' after! */ | 296 | /* Must be paired with an 'stts' after! */ |
| 295 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | 297 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) |
| 296 | { | 298 | { |
| 297 | tsk->thread.has_fpu = 0; | 299 | tsk->thread.fpu.has_fpu = 0; |
| 300 | percpu_write(fpu_owner_task, NULL); | ||
| 298 | } | 301 | } |
| 299 | 302 | ||
| 300 | /* Must be paired with a 'clts' before! */ | 303 | /* Must be paired with a 'clts' before! */ |
| 301 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | 304 | static inline void __thread_set_has_fpu(struct task_struct *tsk) |
| 302 | { | 305 | { |
| 303 | tsk->thread.has_fpu = 1; | 306 | tsk->thread.fpu.has_fpu = 1; |
| 307 | percpu_write(fpu_owner_task, tsk); | ||
| 304 | } | 308 | } |
| 305 | 309 | ||
| 306 | /* | 310 | /* |
| @@ -345,18 +349,22 @@ typedef struct { int preload; } fpu_switch_t; | |||
| 345 | * We don't do that yet, so "fpu_lazy_restore()" always returns | 349 | * We don't do that yet, so "fpu_lazy_restore()" always returns |
| 346 | * false, but some day.. | 350 | * false, but some day.. |
| 347 | */ | 351 | */ |
| 348 | #define fpu_lazy_restore(tsk) (0) | 352 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) |
| 349 | #define fpu_lazy_state_intact(tsk) do { } while (0) | 353 | { |
| 354 | return new == percpu_read_stable(fpu_owner_task) && | ||
| 355 | cpu == new->thread.fpu.last_cpu; | ||
| 356 | } | ||
| 350 | 357 | ||
| 351 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) | 358 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
| 352 | { | 359 | { |
| 353 | fpu_switch_t fpu; | 360 | fpu_switch_t fpu; |
| 354 | 361 | ||
| 355 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | 362 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; |
| 356 | if (__thread_has_fpu(old)) { | 363 | if (__thread_has_fpu(old)) { |
| 357 | if (__save_init_fpu(old)) | 364 | if (!__save_init_fpu(old)) |
| 358 | fpu_lazy_state_intact(old); | 365 | cpu = ~0; |
| 359 | __thread_clear_has_fpu(old); | 366 | old->thread.fpu.last_cpu = cpu; |
| 367 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | ||
| 360 | 368 | ||
| 361 | /* Don't change CR0.TS if we just switch! */ | 369 | /* Don't change CR0.TS if we just switch! */ |
| 362 | if (fpu.preload) { | 370 | if (fpu.preload) { |
| @@ -367,9 +375,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
| 367 | stts(); | 375 | stts(); |
| 368 | } else { | 376 | } else { |
| 369 | old->fpu_counter = 0; | 377 | old->fpu_counter = 0; |
| 378 | old->thread.fpu.last_cpu = ~0; | ||
| 370 | if (fpu.preload) { | 379 | if (fpu.preload) { |
| 371 | new->fpu_counter++; | 380 | new->fpu_counter++; |
| 372 | if (fpu_lazy_restore(new)) | 381 | if (fpu_lazy_restore(new, cpu)) |
| 373 | fpu.preload = 0; | 382 | fpu.preload = 0; |
| 374 | else | 383 | else |
| 375 | prefetch(new->thread.fpu.state); | 384 | prefetch(new->thread.fpu.state); |
| @@ -463,8 +472,10 @@ static inline void kernel_fpu_begin(void) | |||
| 463 | __save_init_fpu(me); | 472 | __save_init_fpu(me); |
| 464 | __thread_clear_has_fpu(me); | 473 | __thread_clear_has_fpu(me); |
| 465 | /* We do 'stts()' in kernel_fpu_end() */ | 474 | /* We do 'stts()' in kernel_fpu_end() */ |
| 466 | } else | 475 | } else { |
| 476 | percpu_write(fpu_owner_task, NULL); | ||
| 467 | clts(); | 477 | clts(); |
| 478 | } | ||
| 468 | } | 479 | } |
| 469 | 480 | ||
| 470 | static inline void kernel_fpu_end(void) | 481 | static inline void kernel_fpu_end(void) |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f7c89e231c6c..58545c97d071 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -374,6 +374,8 @@ union thread_xstate { | |||
| 374 | }; | 374 | }; |
| 375 | 375 | ||
| 376 | struct fpu { | 376 | struct fpu { |
| 377 | unsigned int last_cpu; | ||
| 378 | unsigned int has_fpu; | ||
| 377 | union thread_xstate *state; | 379 | union thread_xstate *state; |
| 378 | }; | 380 | }; |
| 379 | 381 | ||
| @@ -454,7 +456,6 @@ struct thread_struct { | |||
| 454 | unsigned long trap_no; | 456 | unsigned long trap_no; |
| 455 | unsigned long error_code; | 457 | unsigned long error_code; |
| 456 | /* floating point and extended processor state */ | 458 | /* floating point and extended processor state */ |
| 457 | unsigned long has_fpu; | ||
| 458 | struct fpu fpu; | 459 | struct fpu fpu; |
| 459 | #ifdef CONFIG_X86_32 | 460 | #ifdef CONFIG_X86_32 |
| 460 | /* Virtual 86 mode info */ | 461 | /* Virtual 86 mode info */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d43cad74f166..b667148dfad7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1044,6 +1044,8 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = | |||
| 1044 | 1044 | ||
| 1045 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1045 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
| 1046 | 1046 | ||
| 1047 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 1048 | |||
| 1047 | /* | 1049 | /* |
| 1048 | * Special IST stacks which the CPU switches to when it calls | 1050 | * Special IST stacks which the CPU switches to when it calls |
| 1049 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | 1051 | * an IST-marked descriptor entry. Up to 7 stacks (hardware |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index bc32761bc27a..c08d1ff12b7c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -304,7 +304,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 304 | 304 | ||
| 305 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 305 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
| 306 | 306 | ||
| 307 | fpu = switch_fpu_prepare(prev_p, next_p); | 307 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
| 308 | 308 | ||
| 309 | /* | 309 | /* |
| 310 | * Reload esp0. | 310 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8ad880b3bc1c..cfa5c90c01db 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -389,7 +389,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 389 | unsigned fsindex, gsindex; | 389 | unsigned fsindex, gsindex; |
| 390 | fpu_switch_t fpu; | 390 | fpu_switch_t fpu; |
| 391 | 391 | ||
| 392 | fpu = switch_fpu_prepare(prev_p, next_p); | 392 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); |
| 393 | 393 | ||
| 394 | /* | 394 | /* |
| 395 | * Reload esp0, LDT and the page table pointer: | 395 | * Reload esp0, LDT and the page table pointer: |
