diff options
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 83 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 52 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/xsave.c | 13 |
5 files changed, 74 insertions, 85 deletions
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 72ba21a8b5fc..810f20fd4e4e 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); | |||
67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | 67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | /* | ||
71 | * Must be run with preemption disabled: this clears the fpu_owner_task, | ||
72 | * on this CPU. | ||
73 | * | ||
74 | * This will disable any lazy FPU state restore of the current FPU state, | ||
75 | * but if the current thread owns the FPU, it will still be saved by. | ||
76 | */ | ||
77 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
78 | { | ||
79 | per_cpu(fpu_owner_task, cpu) = NULL; | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * Used to indicate that the FPU state in memory is newer than the FPU | ||
84 | * state in registers, and the FPU state should be reloaded next time the | ||
85 | * task is run. Only safe on the current task, or non-running tasks. | ||
86 | */ | ||
87 | static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) | ||
88 | { | ||
89 | tsk->thread.fpu.last_cpu = ~0; | ||
90 | } | ||
91 | |||
92 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
93 | { | ||
94 | return new == this_cpu_read_stable(fpu_owner_task) && | ||
95 | cpu == new->thread.fpu.last_cpu; | ||
96 | } | ||
97 | |||
70 | static inline int is_ia32_compat_frame(void) | 98 | static inline int is_ia32_compat_frame(void) |
71 | { | 99 | { |
72 | return config_enabled(CONFIG_IA32_EMULATION) && | 100 | return config_enabled(CONFIG_IA32_EMULATION) && |
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void) | |||
107 | 135 | ||
108 | static inline void fx_finit(struct i387_fxsave_struct *fx) | 136 | static inline void fx_finit(struct i387_fxsave_struct *fx) |
109 | { | 137 | { |
110 | memset(fx, 0, xstate_size); | ||
111 | fx->cwd = 0x37f; | 138 | fx->cwd = 0x37f; |
112 | fx->mxcsr = MXCSR_DEFAULT; | 139 | fx->mxcsr = MXCSR_DEFAULT; |
113 | } | 140 | } |
@@ -400,24 +427,6 @@ static inline void drop_init_fpu(struct task_struct *tsk) | |||
400 | */ | 427 | */ |
401 | typedef struct { int preload; } fpu_switch_t; | 428 | typedef struct { int preload; } fpu_switch_t; |
402 | 429 | ||
403 | /* | ||
404 | * Must be run with preemption disabled: this clears the fpu_owner_task, | ||
405 | * on this CPU. | ||
406 | * | ||
407 | * This will disable any lazy FPU state restore of the current FPU state, | ||
408 | * but if the current thread owns the FPU, it will still be saved by. | ||
409 | */ | ||
410 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
411 | { | ||
412 | per_cpu(fpu_owner_task, cpu) = NULL; | ||
413 | } | ||
414 | |||
415 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
416 | { | ||
417 | return new == this_cpu_read_stable(fpu_owner_task) && | ||
418 | cpu == new->thread.fpu.last_cpu; | ||
419 | } | ||
420 | |||
421 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | 430 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
422 | { | 431 | { |
423 | fpu_switch_t fpu; | 432 | fpu_switch_t fpu; |
@@ -426,13 +435,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
426 | * If the task has used the math, pre-load the FPU on xsave processors | 435 | * If the task has used the math, pre-load the FPU on xsave processors |
427 | * or if the past 5 consecutive context-switches used math. | 436 | * or if the past 5 consecutive context-switches used math. |
428 | */ | 437 | */ |
429 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || | 438 | fpu.preload = tsk_used_math(new) && |
430 | new->thread.fpu_counter > 5); | 439 | (use_eager_fpu() || new->thread.fpu_counter > 5); |
440 | |||
431 | if (__thread_has_fpu(old)) { | 441 | if (__thread_has_fpu(old)) { |
432 | if (!__save_init_fpu(old)) | 442 | if (!__save_init_fpu(old)) |
433 | cpu = ~0; | 443 | task_disable_lazy_fpu_restore(old); |
434 | old->thread.fpu.last_cpu = cpu; | 444 | else |
435 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | 445 | old->thread.fpu.last_cpu = cpu; |
446 | |||
447 | /* But leave fpu_owner_task! */ | ||
448 | old->thread.fpu.has_fpu = 0; | ||
436 | 449 | ||
437 | /* Don't change CR0.TS if we just switch! */ | 450 | /* Don't change CR0.TS if we just switch! */ |
438 | if (fpu.preload) { | 451 | if (fpu.preload) { |
@@ -443,10 +456,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
443 | stts(); | 456 | stts(); |
444 | } else { | 457 | } else { |
445 | old->thread.fpu_counter = 0; | 458 | old->thread.fpu_counter = 0; |
446 | old->thread.fpu.last_cpu = ~0; | 459 | task_disable_lazy_fpu_restore(old); |
447 | if (fpu.preload) { | 460 | if (fpu.preload) { |
448 | new->thread.fpu_counter++; | 461 | new->thread.fpu_counter++; |
449 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) | 462 | if (fpu_lazy_restore(new, cpu)) |
450 | fpu.preload = 0; | 463 | fpu.preload = 0; |
451 | else | 464 | else |
452 | prefetch(new->thread.fpu.state); | 465 | prefetch(new->thread.fpu.state); |
@@ -520,24 +533,6 @@ static inline void __save_fpu(struct task_struct *tsk) | |||
520 | } | 533 | } |
521 | 534 | ||
522 | /* | 535 | /* |
523 | * These disable preemption on their own and are safe | ||
524 | */ | ||
525 | static inline void save_init_fpu(struct task_struct *tsk) | ||
526 | { | ||
527 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
528 | |||
529 | if (use_eager_fpu()) { | ||
530 | __save_fpu(tsk); | ||
531 | return; | ||
532 | } | ||
533 | |||
534 | preempt_disable(); | ||
535 | __save_init_fpu(tsk); | ||
536 | __thread_fpu_end(tsk); | ||
537 | preempt_enable(); | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * i387 state interaction | 536 | * i387 state interaction |
542 | */ | 537 | */ |
543 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 538 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index d5651fce0b71..29e982ada854 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void) | |||
42 | * be set (so that the clts/stts pair does nothing that is | 42 | * be set (so that the clts/stts pair does nothing that is |
43 | * visible in the interrupted kernel thread). | 43 | * visible in the interrupted kernel thread). |
44 | * | 44 | * |
45 | * Except for the eagerfpu case when we return 1 unless we've already | 45 | * Except for the eagerfpu case when we return true; in the likely case |
46 | * been eager and saved the state in kernel_fpu_begin(). | 46 | * the thread has FPU but we are not going to set/clear TS. |
47 | */ | 47 | */ |
48 | static inline bool interrupted_kernel_fpu_idle(void) | 48 | static inline bool interrupted_kernel_fpu_idle(void) |
49 | { | 49 | { |
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void) | |||
51 | return false; | 51 | return false; |
52 | 52 | ||
53 | if (use_eager_fpu()) | 53 | if (use_eager_fpu()) |
54 | return __thread_has_fpu(current); | 54 | return true; |
55 | 55 | ||
56 | return !__thread_has_fpu(current) && | 56 | return !__thread_has_fpu(current) && |
57 | (read_cr0() & X86_CR0_TS); | 57 | (read_cr0() & X86_CR0_TS); |
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void) | |||
94 | 94 | ||
95 | if (__thread_has_fpu(me)) { | 95 | if (__thread_has_fpu(me)) { |
96 | __save_init_fpu(me); | 96 | __save_init_fpu(me); |
97 | } else if (!use_eager_fpu()) { | 97 | } else { |
98 | this_cpu_write(fpu_owner_task, NULL); | 98 | this_cpu_write(fpu_owner_task, NULL); |
99 | clts(); | 99 | if (!use_eager_fpu()) |
100 | clts(); | ||
100 | } | 101 | } |
101 | } | 102 | } |
102 | EXPORT_SYMBOL(__kernel_fpu_begin); | 103 | EXPORT_SYMBOL(__kernel_fpu_begin); |
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk) | |||
120 | { | 121 | { |
121 | preempt_disable(); | 122 | preempt_disable(); |
122 | if (__thread_has_fpu(tsk)) { | 123 | if (__thread_has_fpu(tsk)) { |
123 | __save_init_fpu(tsk); | 124 | if (use_eager_fpu()) { |
124 | __thread_fpu_end(tsk); | 125 | __save_fpu(tsk); |
125 | } else | 126 | } else { |
126 | tsk->thread.fpu_counter = 0; | 127 | __save_init_fpu(tsk); |
128 | __thread_fpu_end(tsk); | ||
129 | } | ||
130 | } | ||
127 | preempt_enable(); | 131 | preempt_enable(); |
128 | } | 132 | } |
129 | EXPORT_SYMBOL(unlazy_fpu); | 133 | EXPORT_SYMBOL(unlazy_fpu); |
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu) | |||
221 | return; | 225 | return; |
222 | } | 226 | } |
223 | 227 | ||
228 | memset(fpu->state, 0, xstate_size); | ||
229 | |||
224 | if (cpu_has_fxsr) { | 230 | if (cpu_has_fxsr) { |
225 | fx_finit(&fpu->state->fxsave); | 231 | fx_finit(&fpu->state->fxsave); |
226 | } else { | 232 | } else { |
227 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 233 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
228 | memset(fp, 0, xstate_size); | ||
229 | fp->cwd = 0xffff037fu; | 234 | fp->cwd = 0xffff037fu; |
230 | fp->swd = 0xffff0000u; | 235 | fp->swd = 0xffff0000u; |
231 | fp->twd = 0xffffffffu; | 236 | fp->twd = 0xffffffffu; |
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk) | |||
247 | if (tsk_used_math(tsk)) { | 252 | if (tsk_used_math(tsk)) { |
248 | if (cpu_has_fpu && tsk == current) | 253 | if (cpu_has_fpu && tsk == current) |
249 | unlazy_fpu(tsk); | 254 | unlazy_fpu(tsk); |
250 | tsk->thread.fpu.last_cpu = ~0; | 255 | task_disable_lazy_fpu_restore(tsk); |
251 | return 0; | 256 | return 0; |
252 | } | 257 | } |
253 | 258 | ||
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
336 | unsigned int pos, unsigned int count, | 341 | unsigned int pos, unsigned int count, |
337 | void *kbuf, void __user *ubuf) | 342 | void *kbuf, void __user *ubuf) |
338 | { | 343 | { |
344 | struct xsave_struct *xsave = &target->thread.fpu.state->xsave; | ||
339 | int ret; | 345 | int ret; |
340 | 346 | ||
341 | if (!cpu_has_xsave) | 347 | if (!cpu_has_xsave) |
@@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
350 | * memory layout in the thread struct, so that we can copy the entire | 356 | * memory layout in the thread struct, so that we can copy the entire |
351 | * xstateregs to the user using one user_regset_copyout(). | 357 | * xstateregs to the user using one user_regset_copyout(). |
352 | */ | 358 | */ |
353 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, | 359 | memcpy(&xsave->i387.sw_reserved, |
354 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 360 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
355 | |||
356 | /* | 361 | /* |
357 | * Copy the xstate memory layout. | 362 | * Copy the xstate memory layout. |
358 | */ | 363 | */ |
359 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 364 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); |
360 | &target->thread.fpu.state->xsave, 0, -1); | ||
361 | return ret; | 365 | return ret; |
362 | } | 366 | } |
363 | 367 | ||
@@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
365 | unsigned int pos, unsigned int count, | 369 | unsigned int pos, unsigned int count, |
366 | const void *kbuf, const void __user *ubuf) | 370 | const void *kbuf, const void __user *ubuf) |
367 | { | 371 | { |
372 | struct xsave_struct *xsave = &target->thread.fpu.state->xsave; | ||
368 | int ret; | 373 | int ret; |
369 | struct xsave_hdr_struct *xsave_hdr; | ||
370 | 374 | ||
371 | if (!cpu_has_xsave) | 375 | if (!cpu_has_xsave) |
372 | return -ENODEV; | 376 | return -ENODEV; |
@@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
375 | if (ret) | 379 | if (ret) |
376 | return ret; | 380 | return ret; |
377 | 381 | ||
378 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 382 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); |
379 | &target->thread.fpu.state->xsave, 0, -1); | ||
380 | |||
381 | /* | 383 | /* |
382 | * mxcsr reserved bits must be masked to zero for security reasons. | 384 | * mxcsr reserved bits must be masked to zero for security reasons. |
383 | */ | 385 | */ |
384 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | 386 | xsave->i387.mxcsr &= mxcsr_feature_mask; |
385 | 387 | xsave->xsave_hdr.xstate_bv &= pcntxt_mask; | |
386 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; | ||
387 | |||
388 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
389 | /* | 388 | /* |
390 | * These bits must be zero. | 389 | * These bits must be zero. |
391 | */ | 390 | */ |
392 | memset(xsave_hdr->reserved, 0, 48); | 391 | memset(&xsave->xsave_hdr.reserved, 0, 48); |
393 | |||
394 | return ret; | 392 | return ret; |
395 | } | 393 | } |
396 | 394 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 046e2d620bbe..dcaf4b00d0b4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -69,8 +69,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
69 | 69 | ||
70 | dst->thread.fpu_counter = 0; | 70 | dst->thread.fpu_counter = 0; |
71 | dst->thread.fpu.has_fpu = 0; | 71 | dst->thread.fpu.has_fpu = 0; |
72 | dst->thread.fpu.last_cpu = ~0; | ||
73 | dst->thread.fpu.state = NULL; | 72 | dst->thread.fpu.state = NULL; |
73 | task_disable_lazy_fpu_restore(dst); | ||
74 | if (tsk_used_math(src)) { | 74 | if (tsk_used_math(src)) { |
75 | int err = fpu_alloc(&dst->thread.fpu); | 75 | int err = fpu_alloc(&dst->thread.fpu); |
76 | if (err) | 76 | if (err) |
@@ -131,6 +131,7 @@ void flush_thread(void) | |||
131 | 131 | ||
132 | flush_ptrace_hw_breakpoint(tsk); | 132 | flush_ptrace_hw_breakpoint(tsk); |
133 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 133 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
134 | |||
134 | drop_init_fpu(tsk); | 135 | drop_init_fpu(tsk); |
135 | /* | 136 | /* |
136 | * Free the FPU state for non xsave platforms. They get reallocated | 137 | * Free the FPU state for non xsave platforms. They get reallocated |
@@ -138,6 +139,12 @@ void flush_thread(void) | |||
138 | */ | 139 | */ |
139 | if (!use_eager_fpu()) | 140 | if (!use_eager_fpu()) |
140 | free_thread_xstate(tsk); | 141 | free_thread_xstate(tsk); |
142 | else if (!used_math()) { | ||
143 | /* kthread execs. TODO: cleanup this horror. */ | ||
144 | if (WARN_ON(init_fpu(current))) | ||
145 | force_sig(SIGKILL, current); | ||
146 | math_state_restore(); | ||
147 | } | ||
141 | } | 148 | } |
142 | 149 | ||
143 | static void hard_disable_TSC(void) | 150 | static void hard_disable_TSC(void) |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4ff5d162ff9f..7ee7369d5aec 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -734,7 +734,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
734 | /* | 734 | /* |
735 | * Save the info for the exception handler and clear the error. | 735 | * Save the info for the exception handler and clear the error. |
736 | */ | 736 | */ |
737 | save_init_fpu(task); | 737 | unlazy_fpu(task); |
738 | task->thread.trap_nr = trapnr; | 738 | task->thread.trap_nr = trapnr; |
739 | task->thread.error_code = error_code; | 739 | task->thread.error_code = error_code; |
740 | info.si_signo = SIGFPE; | 740 | info.si_signo = SIGFPE; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index cdc6cf903078..0bf82c5ac529 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -690,7 +690,7 @@ void eager_fpu_init(void) | |||
690 | { | 690 | { |
691 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | 691 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; |
692 | 692 | ||
693 | clear_used_math(); | 693 | WARN_ON(used_math()); |
694 | current_thread_info()->status = 0; | 694 | current_thread_info()->status = 0; |
695 | 695 | ||
696 | if (eagerfpu == ENABLE) | 696 | if (eagerfpu == ENABLE) |
@@ -705,17 +705,6 @@ void eager_fpu_init(void) | |||
705 | boot_func(); | 705 | boot_func(); |
706 | boot_func = NULL; | 706 | boot_func = NULL; |
707 | } | 707 | } |
708 | |||
709 | /* | ||
710 | * This is same as math_state_restore(). But use_xsave() is | ||
711 | * not yet patched to use math_state_restore(). | ||
712 | */ | ||
713 | init_fpu(current); | ||
714 | __thread_fpu_begin(current); | ||
715 | if (cpu_has_xsave) | ||
716 | xrstor_state(init_xstate_buf, -1); | ||
717 | else | ||
718 | fxrstor_checking(&init_xstate_buf->i387); | ||
719 | } | 708 | } |
720 | 709 | ||
721 | /* | 710 | /* |