diff options
| -rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 96 | ||||
| -rw-r--r-- | arch/x86/include/asm/i387.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/xsave.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 7 | ||||
| -rw-r--r-- | arch/x86/kernel/i387.c | 20 | ||||
| -rw-r--r-- | arch/x86/kernel/process.c | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/xsave.c | 57 |
10 files changed, 146 insertions, 61 deletions
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 52202a6b12aa..8ca0f9f45ac4 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
| @@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) | |||
| 291 | static inline void __thread_fpu_end(struct task_struct *tsk) | 291 | static inline void __thread_fpu_end(struct task_struct *tsk) |
| 292 | { | 292 | { |
| 293 | __thread_clear_has_fpu(tsk); | 293 | __thread_clear_has_fpu(tsk); |
| 294 | stts(); | 294 | if (!use_xsave()) |
| 295 | stts(); | ||
| 295 | } | 296 | } |
| 296 | 297 | ||
| 297 | static inline void __thread_fpu_begin(struct task_struct *tsk) | 298 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
| 298 | { | 299 | { |
| 299 | clts(); | 300 | if (!use_xsave()) |
| 301 | clts(); | ||
| 300 | __thread_set_has_fpu(tsk); | 302 | __thread_set_has_fpu(tsk); |
| 301 | } | 303 | } |
| 302 | 304 | ||
| 305 | static inline void __drop_fpu(struct task_struct *tsk) | ||
| 306 | { | ||
| 307 | if (__thread_has_fpu(tsk)) { | ||
| 308 | /* Ignore delayed exceptions from user space */ | ||
| 309 | asm volatile("1: fwait\n" | ||
| 310 | "2:\n" | ||
| 311 | _ASM_EXTABLE(1b, 2b)); | ||
| 312 | __thread_fpu_end(tsk); | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | static inline void drop_fpu(struct task_struct *tsk) | ||
| 317 | { | ||
| 318 | /* | ||
| 319 | * Forget coprocessor state.. | ||
| 320 | */ | ||
| 321 | preempt_disable(); | ||
| 322 | tsk->fpu_counter = 0; | ||
| 323 | __drop_fpu(tsk); | ||
| 324 | clear_used_math(); | ||
| 325 | preempt_enable(); | ||
| 326 | } | ||
| 327 | |||
| 328 | static inline void drop_init_fpu(struct task_struct *tsk) | ||
| 329 | { | ||
| 330 | if (!use_xsave()) | ||
| 331 | drop_fpu(tsk); | ||
| 332 | else | ||
| 333 | xrstor_state(init_xstate_buf, -1); | ||
| 334 | } | ||
| 335 | |||
| 303 | /* | 336 | /* |
| 304 | * FPU state switching for scheduling. | 337 | * FPU state switching for scheduling. |
| 305 | * | 338 | * |
| @@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
| 333 | { | 366 | { |
| 334 | fpu_switch_t fpu; | 367 | fpu_switch_t fpu; |
| 335 | 368 | ||
| 336 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | 369 | /* |
| 370 | * If the task has used the math, pre-load the FPU on xsave processors | ||
| 371 | * or if the past 5 consecutive context-switches used math. | ||
| 372 | */ | ||
| 373 | fpu.preload = tsk_used_math(new) && (use_xsave() || | ||
| 374 | new->fpu_counter > 5); | ||
| 337 | if (__thread_has_fpu(old)) { | 375 | if (__thread_has_fpu(old)) { |
| 338 | if (!__save_init_fpu(old)) | 376 | if (!__save_init_fpu(old)) |
| 339 | cpu = ~0; | 377 | cpu = ~0; |
| @@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
| 345 | new->fpu_counter++; | 383 | new->fpu_counter++; |
| 346 | __thread_set_has_fpu(new); | 384 | __thread_set_has_fpu(new); |
| 347 | prefetch(new->thread.fpu.state); | 385 | prefetch(new->thread.fpu.state); |
| 348 | } else | 386 | } else if (!use_xsave()) |
| 349 | stts(); | 387 | stts(); |
| 350 | } else { | 388 | } else { |
| 351 | old->fpu_counter = 0; | 389 | old->fpu_counter = 0; |
| 352 | old->thread.fpu.last_cpu = ~0; | 390 | old->thread.fpu.last_cpu = ~0; |
| 353 | if (fpu.preload) { | 391 | if (fpu.preload) { |
| 354 | new->fpu_counter++; | 392 | new->fpu_counter++; |
| 355 | if (fpu_lazy_restore(new, cpu)) | 393 | if (!use_xsave() && fpu_lazy_restore(new, cpu)) |
| 356 | fpu.preload = 0; | 394 | fpu.preload = 0; |
| 357 | else | 395 | else |
| 358 | prefetch(new->thread.fpu.state); | 396 | prefetch(new->thread.fpu.state); |
| @@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | |||
| 372 | { | 410 | { |
| 373 | if (fpu.preload) { | 411 | if (fpu.preload) { |
| 374 | if (unlikely(restore_fpu_checking(new))) | 412 | if (unlikely(restore_fpu_checking(new))) |
| 375 | __thread_fpu_end(new); | 413 | drop_init_fpu(new); |
| 376 | } | 414 | } |
| 377 | } | 415 | } |
| 378 | 416 | ||
| @@ -400,17 +438,6 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) | |||
| 400 | return __restore_xstate_sig(buf, buf_fx, size); | 438 | return __restore_xstate_sig(buf, buf_fx, size); |
| 401 | } | 439 | } |
| 402 | 440 | ||
| 403 | static inline void __drop_fpu(struct task_struct *tsk) | ||
| 404 | { | ||
| 405 | if (__thread_has_fpu(tsk)) { | ||
| 406 | /* Ignore delayed exceptions from user space */ | ||
| 407 | asm volatile("1: fwait\n" | ||
| 408 | "2:\n" | ||
| 409 | _ASM_EXTABLE(1b, 2b)); | ||
| 410 | __thread_fpu_end(tsk); | ||
| 411 | } | ||
| 412 | } | ||
| 413 | |||
| 414 | /* | 441 | /* |
| 415 | * Need to be preemption-safe. | 442 | * Need to be preemption-safe. |
| 416 | * | 443 | * |
| @@ -431,24 +458,18 @@ static inline void user_fpu_begin(void) | |||
| 431 | static inline void save_init_fpu(struct task_struct *tsk) | 458 | static inline void save_init_fpu(struct task_struct *tsk) |
| 432 | { | 459 | { |
| 433 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | 460 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
| 461 | |||
| 462 | if (use_xsave()) { | ||
| 463 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | ||
| 464 | return; | ||
| 465 | } | ||
| 466 | |||
| 434 | preempt_disable(); | 467 | preempt_disable(); |
| 435 | __save_init_fpu(tsk); | 468 | __save_init_fpu(tsk); |
| 436 | __thread_fpu_end(tsk); | 469 | __thread_fpu_end(tsk); |
| 437 | preempt_enable(); | 470 | preempt_enable(); |
| 438 | } | 471 | } |
| 439 | 472 | ||
| 440 | static inline void drop_fpu(struct task_struct *tsk) | ||
| 441 | { | ||
| 442 | /* | ||
| 443 | * Forget coprocessor state.. | ||
| 444 | */ | ||
| 445 | tsk->fpu_counter = 0; | ||
| 446 | preempt_disable(); | ||
| 447 | __drop_fpu(tsk); | ||
| 448 | preempt_enable(); | ||
| 449 | clear_used_math(); | ||
| 450 | } | ||
| 451 | |||
| 452 | /* | 473 | /* |
| 453 | * i387 state interaction | 474 | * i387 state interaction |
| 454 | */ | 475 | */ |
| @@ -503,12 +524,21 @@ static inline void fpu_free(struct fpu *fpu) | |||
| 503 | } | 524 | } |
| 504 | } | 525 | } |
| 505 | 526 | ||
| 506 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | 527 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) |
| 507 | { | 528 | { |
| 508 | memcpy(dst->state, src->state, xstate_size); | 529 | if (use_xsave()) { |
| 509 | } | 530 | struct xsave_struct *xsave = &dst->thread.fpu.state->xsave; |
| 510 | 531 | ||
| 511 | extern void fpu_finit(struct fpu *fpu); | 532 | memset(&xsave->xsave_hdr, 0, sizeof(struct xsave_hdr_struct)); |
| 533 | xsave_state(xsave, -1); | ||
| 534 | } else { | ||
| 535 | struct fpu *dfpu = &dst->thread.fpu; | ||
| 536 | struct fpu *sfpu = &src->thread.fpu; | ||
| 537 | |||
| 538 | unlazy_fpu(src); | ||
| 539 | memcpy(dfpu->state, sfpu->state, xstate_size); | ||
| 540 | } | ||
| 541 | } | ||
| 512 | 542 | ||
| 513 | static inline unsigned long | 543 | static inline unsigned long |
| 514 | alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, | 544 | alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cca214f..6c3bd3782818 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -19,6 +19,7 @@ struct pt_regs; | |||
| 19 | struct user_i387_struct; | 19 | struct user_i387_struct; |
| 20 | 20 | ||
| 21 | extern int init_fpu(struct task_struct *child); | 21 | extern int init_fpu(struct task_struct *child); |
| 22 | extern void fpu_finit(struct fpu *fpu); | ||
| 22 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 23 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
| 23 | extern void math_state_restore(void); | 24 | extern void math_state_restore(void); |
| 24 | 25 | ||
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index c1d989a15193..2ddee1b87793 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | extern unsigned int xstate_size; | 34 | extern unsigned int xstate_size; |
| 35 | extern u64 pcntxt_mask; | 35 | extern u64 pcntxt_mask; |
| 36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
| 37 | extern struct xsave_struct *init_xstate_buf; | ||
| 37 | 38 | ||
| 38 | extern void xsave_init(void); | 39 | extern void xsave_init(void); |
| 39 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 40 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f8..d0e910da16c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
| @@ -165,10 +165,15 @@ void __init check_bugs(void) | |||
| 165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
| 166 | #endif | 166 | #endif |
| 167 | check_config(); | 167 | check_config(); |
| 168 | check_fpu(); | ||
| 169 | check_hlt(); | 168 | check_hlt(); |
| 170 | check_popad(); | 169 | check_popad(); |
| 171 | init_utsname()->machine[1] = | 170 | init_utsname()->machine[1] = |
| 172 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 171 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
| 173 | alternative_instructions(); | 172 | alternative_instructions(); |
| 173 | |||
| 174 | /* | ||
| 175 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
| 176 | * alternative instructions. | ||
| 177 | */ | ||
| 178 | check_fpu(); | ||
| 174 | } | 179 | } |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index ab6a2e8028ae..528557470ddb 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
| @@ -22,7 +22,15 @@ | |||
| 22 | /* | 22 | /* |
| 23 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
| 24 | * | 24 | * |
| 25 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | 25 | * For now, on xsave platforms we will return interrupted |
| 26 | * kernel FPU as not-idle. TBD: As we use non-lazy FPU restore | ||
| 27 | * for xsave platforms, ideally we can change the return value | ||
| 28 | * to something like __thread_has_fpu(current). But we need to | ||
| 29 | * be careful of doing __thread_clear_has_fpu() before saving | ||
| 30 | * the FPU etc for supporting nested uses etc. For now, take | ||
| 31 | * the simple route! | ||
| 32 | * | ||
| 33 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
| 26 | * pair does nothing at all: the thread must not have fpu (so | 34 | * pair does nothing at all: the thread must not have fpu (so |
| 27 | * that we don't try to save the FPU state), and TS must | 35 | * that we don't try to save the FPU state), and TS must |
| 28 | * be set (so that the clts/stts pair does nothing that is | 36 | * be set (so that the clts/stts pair does nothing that is |
| @@ -30,6 +38,9 @@ | |||
| 30 | */ | 38 | */ |
| 31 | static inline bool interrupted_kernel_fpu_idle(void) | 39 | static inline bool interrupted_kernel_fpu_idle(void) |
| 32 | { | 40 | { |
| 41 | if (use_xsave()) | ||
| 42 | return 0; | ||
| 43 | |||
| 33 | return !__thread_has_fpu(current) && | 44 | return !__thread_has_fpu(current) && |
| 34 | (read_cr0() & X86_CR0_TS); | 45 | (read_cr0() & X86_CR0_TS); |
| 35 | } | 46 | } |
| @@ -73,7 +84,7 @@ void kernel_fpu_begin(void) | |||
| 73 | __save_init_fpu(me); | 84 | __save_init_fpu(me); |
| 74 | __thread_clear_has_fpu(me); | 85 | __thread_clear_has_fpu(me); |
| 75 | /* We do 'stts()' in kernel_fpu_end() */ | 86 | /* We do 'stts()' in kernel_fpu_end() */ |
| 76 | } else { | 87 | } else if (!use_xsave()) { |
| 77 | this_cpu_write(fpu_owner_task, NULL); | 88 | this_cpu_write(fpu_owner_task, NULL); |
| 78 | clts(); | 89 | clts(); |
| 79 | } | 90 | } |
| @@ -82,7 +93,10 @@ EXPORT_SYMBOL(kernel_fpu_begin); | |||
| 82 | 93 | ||
| 83 | void kernel_fpu_end(void) | 94 | void kernel_fpu_end(void) |
| 84 | { | 95 | { |
| 85 | stts(); | 96 | if (use_xsave()) |
| 97 | math_state_restore(); | ||
| 98 | else | ||
| 99 | stts(); | ||
| 86 | preempt_enable(); | 100 | preempt_enable(); |
| 87 | } | 101 | } |
| 88 | EXPORT_SYMBOL(kernel_fpu_end); | 102 | EXPORT_SYMBOL(kernel_fpu_end); |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 30069d1a6a4d..c21e30f8923b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
| 66 | { | 66 | { |
| 67 | int ret; | 67 | int ret; |
| 68 | 68 | ||
| 69 | unlazy_fpu(src); | ||
| 70 | |||
| 71 | *dst = *src; | 69 | *dst = *src; |
| 72 | if (fpu_allocated(&src->thread.fpu)) { | 70 | if (fpu_allocated(&src->thread.fpu)) { |
| 73 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); | 71 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
| 74 | ret = fpu_alloc(&dst->thread.fpu); | 72 | ret = fpu_alloc(&dst->thread.fpu); |
| 75 | if (ret) | 73 | if (ret) |
| 76 | return ret; | 74 | return ret; |
| 77 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); | 75 | fpu_copy(dst, src); |
| 78 | } | 76 | } |
| 79 | return 0; | 77 | return 0; |
| 80 | } | 78 | } |
| @@ -153,7 +151,13 @@ void flush_thread(void) | |||
| 153 | 151 | ||
| 154 | flush_ptrace_hw_breakpoint(tsk); | 152 | flush_ptrace_hw_breakpoint(tsk); |
| 155 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
| 156 | drop_fpu(tsk); | 154 | drop_init_fpu(tsk); |
| 155 | /* | ||
| 156 | * Free the FPU state for non xsave platforms. They get reallocated | ||
| 157 | * lazily at the first use. | ||
| 158 | */ | ||
| 159 | if (!use_xsave()) | ||
| 160 | free_thread_xstate(tsk); | ||
| 157 | } | 161 | } |
| 158 | 162 | ||
| 159 | static void hard_disable_TSC(void) | 163 | static void hard_disable_TSC(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121b..b9ff83c7135b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
| 190 | regs->cs = __USER_CS; | 190 | regs->cs = __USER_CS; |
| 191 | regs->ip = new_ip; | 191 | regs->ip = new_ip; |
| 192 | regs->sp = new_sp; | 192 | regs->sp = new_sp; |
| 193 | /* | ||
| 194 | * Free the old FP and other extended state | ||
| 195 | */ | ||
| 196 | free_thread_xstate(current); | ||
| 197 | } | 193 | } |
| 198 | EXPORT_SYMBOL_GPL(start_thread); | 194 | EXPORT_SYMBOL_GPL(start_thread); |
| 199 | 195 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb8..8a6d20ce1978 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
| 232 | regs->cs = _cs; | 232 | regs->cs = _cs; |
| 233 | regs->ss = _ss; | 233 | regs->ss = _ss; |
| 234 | regs->flags = X86_EFLAGS_IF; | 234 | regs->flags = X86_EFLAGS_IF; |
| 235 | /* | ||
| 236 | * Free the old FP and other extended state | ||
| 237 | */ | ||
| 238 | free_thread_xstate(current); | ||
| 239 | } | 235 | } |
| 240 | 236 | ||
| 241 | void | 237 | void |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341c9369..ac7d5275f6e8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -613,11 +613,12 @@ void math_state_restore(void) | |||
| 613 | } | 613 | } |
| 614 | 614 | ||
| 615 | __thread_fpu_begin(tsk); | 615 | __thread_fpu_begin(tsk); |
| 616 | |||
| 616 | /* | 617 | /* |
| 617 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | 618 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
| 618 | */ | 619 | */ |
| 619 | if (unlikely(restore_fpu_checking(tsk))) { | 620 | if (unlikely(restore_fpu_checking(tsk))) { |
| 620 | __thread_fpu_end(tsk); | 621 | drop_init_fpu(tsk); |
| 621 | force_sig(SIGSEGV, tsk); | 622 | force_sig(SIGSEGV, tsk); |
| 622 | return; | 623 | return; |
| 623 | } | 624 | } |
| @@ -629,6 +630,8 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
| 629 | dotraplinkage void __kprobes | 630 | dotraplinkage void __kprobes |
| 630 | do_device_not_available(struct pt_regs *regs, long error_code) | 631 | do_device_not_available(struct pt_regs *regs, long error_code) |
| 631 | { | 632 | { |
| 633 | BUG_ON(use_xsave()); | ||
| 634 | |||
| 632 | #ifdef CONFIG_MATH_EMULATION | 635 | #ifdef CONFIG_MATH_EMULATION |
| 633 | if (read_cr0() & X86_CR0_EM) { | 636 | if (read_cr0() & X86_CR0_EM) { |
| 634 | struct math_emu_info info = { }; | 637 | struct math_emu_info info = { }; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 4ac5f2e135b4..e7752bd7cac8 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
| @@ -21,7 +21,7 @@ u64 pcntxt_mask; | |||
| 21 | /* | 21 | /* |
| 22 | * Represents init state for the supported extended state. | 22 | * Represents init state for the supported extended state. |
| 23 | */ | 23 | */ |
| 24 | static struct xsave_struct *init_xstate_buf; | 24 | struct xsave_struct *init_xstate_buf; |
| 25 | 25 | ||
| 26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | 26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; |
| 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; |
| @@ -268,7 +268,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
| 268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | 268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) |
| 269 | return -1; | 269 | return -1; |
| 270 | 270 | ||
| 271 | drop_fpu(tsk); /* trigger finit */ | 271 | drop_init_fpu(tsk); /* trigger finit */ |
| 272 | 272 | ||
| 273 | return 0; | 273 | return 0; |
| 274 | } | 274 | } |
| @@ -340,7 +340,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
| 340 | config_enabled(CONFIG_IA32_EMULATION)); | 340 | config_enabled(CONFIG_IA32_EMULATION)); |
| 341 | 341 | ||
| 342 | if (!buf) { | 342 | if (!buf) { |
| 343 | drop_fpu(tsk); | 343 | drop_init_fpu(tsk); |
| 344 | return 0; | 344 | return 0; |
| 345 | } | 345 | } |
| 346 | 346 | ||
| @@ -380,15 +380,30 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
| 380 | */ | 380 | */ |
| 381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | 381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
| 382 | struct user_i387_ia32_struct env; | 382 | struct user_i387_ia32_struct env; |
| 383 | int err = 0; | ||
| 383 | 384 | ||
| 385 | /* | ||
| 386 | * Drop the current fpu which clears used_math(). This ensures | ||
| 387 | * that any context-switch during the copy of the new state, | ||
| 388 | * avoids the intermediate state from getting restored/saved. | ||
| 389 | * Thus avoiding the new restored state from getting corrupted. | ||
| 390 | * We will be ready to restore/save the state only after | ||
| 391 | * set_used_math() is again set. | ||
| 392 | */ | ||
| 384 | drop_fpu(tsk); | 393 | drop_fpu(tsk); |
| 385 | 394 | ||
| 386 | if (__copy_from_user(xsave, buf_fx, state_size) || | 395 | if (__copy_from_user(xsave, buf_fx, state_size) || |
| 387 | __copy_from_user(&env, buf, sizeof(env))) | 396 | __copy_from_user(&env, buf, sizeof(env))) { |
| 388 | return -1; | 397 | err = -1; |
| 398 | } else { | ||
| 399 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
| 400 | set_used_math(); | ||
| 401 | } | ||
| 389 | 402 | ||
| 390 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | 403 | if (use_xsave()) |
| 391 | set_used_math(); | 404 | math_state_restore(); |
| 405 | |||
| 406 | return err; | ||
| 392 | } else { | 407 | } else { |
| 393 | /* | 408 | /* |
| 394 | * For 64-bit frames and 32-bit fsave frames, restore the user | 409 | * For 64-bit frames and 32-bit fsave frames, restore the user |
| @@ -396,7 +411,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | |||
| 396 | */ | 411 | */ |
| 397 | user_fpu_begin(); | 412 | user_fpu_begin(); |
| 398 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { | 413 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
| 399 | drop_fpu(tsk); | 414 | drop_init_fpu(tsk); |
| 400 | return -1; | 415 | return -1; |
| 401 | } | 416 | } |
| 402 | } | 417 | } |
| @@ -435,11 +450,29 @@ static void prepare_fx_sw_frame(void) | |||
| 435 | */ | 450 | */ |
| 436 | static inline void xstate_enable(void) | 451 | static inline void xstate_enable(void) |
| 437 | { | 452 | { |
| 453 | clts(); | ||
| 438 | set_in_cr4(X86_CR4_OSXSAVE); | 454 | set_in_cr4(X86_CR4_OSXSAVE); |
| 439 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | 455 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); |
| 440 | } | 456 | } |
| 441 | 457 | ||
| 442 | /* | 458 | /* |
| 459 | * This is same as math_state_restore(). But use_xsave() is not yet | ||
| 460 | * patched to use math_state_restore(). | ||
| 461 | */ | ||
| 462 | static inline void init_restore_xstate(void) | ||
| 463 | { | ||
| 464 | init_fpu(current); | ||
| 465 | __thread_fpu_begin(current); | ||
| 466 | xrstor_state(init_xstate_buf, -1); | ||
| 467 | } | ||
| 468 | |||
| 469 | static inline void xstate_enable_ap(void) | ||
| 470 | { | ||
| 471 | xstate_enable(); | ||
| 472 | init_restore_xstate(); | ||
| 473 | } | ||
| 474 | |||
| 475 | /* | ||
| 443 | * Record the offsets and sizes of different state managed by the xsave | 476 | * Record the offsets and sizes of different state managed by the xsave |
| 444 | * memory layout. | 477 | * memory layout. |
| 445 | */ | 478 | */ |
| @@ -479,7 +512,6 @@ static void __init setup_xstate_init(void) | |||
| 479 | __alignof__(struct xsave_struct)); | 512 | __alignof__(struct xsave_struct)); |
| 480 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 513 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; |
| 481 | 514 | ||
| 482 | clts(); | ||
| 483 | /* | 515 | /* |
| 484 | * Init all the features state with header_bv being 0x0 | 516 | * Init all the features state with header_bv being 0x0 |
| 485 | */ | 517 | */ |
| @@ -489,7 +521,6 @@ static void __init setup_xstate_init(void) | |||
| 489 | * of any feature which is not represented by all zero's. | 521 | * of any feature which is not represented by all zero's. |
| 490 | */ | 522 | */ |
| 491 | xsave_state(init_xstate_buf, -1); | 523 | xsave_state(init_xstate_buf, -1); |
| 492 | stts(); | ||
| 493 | } | 524 | } |
| 494 | 525 | ||
| 495 | /* | 526 | /* |
| @@ -533,6 +564,10 @@ static void __init xstate_enable_boot_cpu(void) | |||
| 533 | 564 | ||
| 534 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
| 535 | pcntxt_mask, xstate_size); | 566 | pcntxt_mask, xstate_size); |
| 567 | |||
| 568 | current->thread.fpu.state = | ||
| 569 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
| 570 | init_restore_xstate(); | ||
| 536 | } | 571 | } |
| 537 | 572 | ||
| 538 | /* | 573 | /* |
| @@ -551,6 +586,6 @@ void __cpuinit xsave_init(void) | |||
| 551 | return; | 586 | return; |
| 552 | 587 | ||
| 553 | this_func = next_func; | 588 | this_func = next_func; |
| 554 | next_func = xstate_enable; | 589 | next_func = xstate_enable_ap; |
| 555 | this_func(); | 590 | this_func(); |
| 556 | } | 591 | } |
