summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 13:24:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-07 13:24:10 -0400
commit8ff468c29e9a9c3afe9152c10c7b141343270bf3 (patch)
tree208b1b9a211d64801dcc22e3eeed81253784ff7f
parent68253e718c2778427db451e39a8366aa49982b71 (diff)
parentd9c9ce34ed5c892323cbf5b4f9a4c498e036316a (diff)
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU state handling updates from Borislav Petkov: "This contains work started by Rik van Riel and brought to fruition by Sebastian Andrzej Siewior with the main goal to optimize when to load FPU registers: only when returning to userspace and not on every context switch (while the task remains in the kernel). In addition, this optimization makes kernel_fpu_begin() cheaper by requiring registers saving only on the first invocation and skipping that in following ones. What is more, this series cleans up and streamlines many aspects of the already complex FPU code, hopefully making it more palatable for future improvements and simplifications. Finally, there's a __user annotations fix from Jann Horn" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (29 commits) x86/fpu: Fault-in user stack if copy_fpstate_to_sigframe() fails x86/pkeys: Add PKRU value to init_fpstate x86/fpu: Restore regs in copy_fpstate_to_sigframe() in order to use the fastpath x86/fpu: Add a fastpath to copy_fpstate_to_sigframe() x86/fpu: Add a fastpath to __fpu__restore_sig() x86/fpu: Defer FPU state load until return to userspace x86/fpu: Merge the two code paths in __fpu__restore_sig() x86/fpu: Restore from kernel memory on the 64-bit path too x86/fpu: Inline copy_user_to_fpregs_zeroing() x86/fpu: Update xstate's PKRU value on write_pkru() x86/fpu: Prepare copy_fpstate_to_sigframe() for TIF_NEED_FPU_LOAD x86/fpu: Always store the registers in copy_fpstate_to_sigframe() x86/entry: Add TIF_NEED_FPU_LOAD x86/fpu: Eager switch PKRU state x86/pkeys: Don't check if PKRU is zero before writing it x86/fpu: Only write PKRU if it is different from current x86/pkeys: Provide *pkru() helpers x86/fpu: Use a feature number instead of mask in two more helpers x86/fpu: Make __raw_xsave_addr() use a feature number instead of mask x86/fpu: Add an __fpregs_load_activate() internal helper ...
-rw-r--r--Documentation/preempt-locking.txt1
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/ia32/ia32_signal.c17
-rw-r--r--arch/x86/include/asm/fpu/api.h31
-rw-r--r--arch/x86/include/asm/fpu/internal.h133
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h9
-rw-r--r--arch/x86/include/asm/fpu/xstate.h8
-rw-r--r--arch/x86/include/asm/pgtable.h29
-rw-r--r--arch/x86/include/asm/special_insns.h19
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h13
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/fpu/core.c195
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c24
-rw-r--r--arch/x86/kernel/fpu/signal.c202
-rw-r--r--arch/x86/kernel/fpu/xstate.c42
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c11
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/signal.c21
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kvm/vmx/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--arch/x86/math-emu/fpu_entry.c3
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pkeys.c21
28 files changed, 512 insertions, 359 deletions
diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt
index 509f5a422d57..dce336134e54 100644
--- a/Documentation/preempt-locking.txt
+++ b/Documentation/preempt-locking.txt
@@ -52,7 +52,6 @@ preemption must be disabled around such regions.
52 52
53Note, some FPU functions are already explicitly preempt safe. For example, 53Note, some FPU functions are already explicitly preempt safe. For example,
54kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. 54kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
55However, fpu__restore() must be called with preemption disabled.
56 55
57 56
58RULE #3: Lock acquire and release must be performed by same task 57RULE #3: Lock acquire and release must be performed by same task
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7bc105f47d21..51beb8d29123 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -25,12 +25,13 @@
25#include <linux/uprobes.h> 25#include <linux/uprobes.h>
26#include <linux/livepatch.h> 26#include <linux/livepatch.h>
27#include <linux/syscalls.h> 27#include <linux/syscalls.h>
28#include <linux/uaccess.h>
28 29
29#include <asm/desc.h> 30#include <asm/desc.h>
30#include <asm/traps.h> 31#include <asm/traps.h>
31#include <asm/vdso.h> 32#include <asm/vdso.h>
32#include <linux/uaccess.h>
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/fpu/api.h>
34 35
35#define CREATE_TRACE_POINTS 36#define CREATE_TRACE_POINTS
36#include <trace/events/syscalls.h> 37#include <trace/events/syscalls.h>
@@ -196,6 +197,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
196 if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) 197 if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
197 exit_to_usermode_loop(regs, cached_flags); 198 exit_to_usermode_loop(regs, cached_flags);
198 199
200 /* Reload ti->flags; we may have rescheduled above. */
201 cached_flags = READ_ONCE(ti->flags);
202
203 fpregs_assert_state_consistent();
204 if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
205 switch_fpu_return();
206
199#ifdef CONFIG_COMPAT 207#ifdef CONFIG_COMPAT
200 /* 208 /*
201 * Compat syscalls set TS_COMPAT. Make sure we clear it before 209 * Compat syscalls set TS_COMPAT. Make sure we clear it before
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 4d5fcd47ab75..629d1ee05599 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -221,8 +221,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
221 size_t frame_size, 221 size_t frame_size,
222 void __user **fpstate) 222 void __user **fpstate)
223{ 223{
224 struct fpu *fpu = &current->thread.fpu; 224 unsigned long sp, fx_aligned, math_size;
225 unsigned long sp;
226 225
227 /* Default to using normal stack */ 226 /* Default to using normal stack */
228 sp = regs->sp; 227 sp = regs->sp;
@@ -236,15 +235,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
236 ksig->ka.sa.sa_restorer) 235 ksig->ka.sa.sa_restorer)
237 sp = (unsigned long) ksig->ka.sa.sa_restorer; 236 sp = (unsigned long) ksig->ka.sa.sa_restorer;
238 237
239 if (fpu->initialized) { 238 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
240 unsigned long fx_aligned, math_size; 239 *fpstate = (struct _fpstate_32 __user *) sp;
241 240 if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
242 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); 241 math_size) < 0)
243 *fpstate = (struct _fpstate_32 __user *) sp; 242 return (void __user *) -1L;
244 if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
245 math_size) < 0)
246 return (void __user *) -1L;
247 }
248 243
249 sp -= frame_size; 244 sp -= frame_size;
250 /* Align the stack pointer according to the i386 ABI, 245 /* Align the stack pointer according to the i386 ABI,
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b56d504af654..b774c52e5411 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -10,6 +10,7 @@
10 10
11#ifndef _ASM_X86_FPU_API_H 11#ifndef _ASM_X86_FPU_API_H
12#define _ASM_X86_FPU_API_H 12#define _ASM_X86_FPU_API_H
13#include <linux/bottom_half.h>
13 14
14/* 15/*
15 * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It 16 * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
@@ -21,6 +22,36 @@
21extern void kernel_fpu_begin(void); 22extern void kernel_fpu_begin(void);
22extern void kernel_fpu_end(void); 23extern void kernel_fpu_end(void);
23extern bool irq_fpu_usable(void); 24extern bool irq_fpu_usable(void);
25extern void fpregs_mark_activate(void);
26
27/*
28 * Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
29 * A context switch will (and softirq might) save CPU's FPU registers to
30 * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
31 * a random state.
32 */
33static inline void fpregs_lock(void)
34{
35 preempt_disable();
36 local_bh_disable();
37}
38
39static inline void fpregs_unlock(void)
40{
41 local_bh_enable();
42 preempt_enable();
43}
44
45#ifdef CONFIG_X86_DEBUG_FPU
46extern void fpregs_assert_state_consistent(void);
47#else
48static inline void fpregs_assert_state_consistent(void) { }
49#endif
50
51/*
52 * Load the task FPU state before returning to userspace.
53 */
54extern void switch_fpu_return(void);
24 55
25/* 56/*
26 * Query the presence of one or more xfeatures. Works on any legacy CPU as well. 57 * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 745a19d34f23..9e27fa05a7ae 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -14,6 +14,7 @@
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/mm.h>
17 18
18#include <asm/user.h> 19#include <asm/user.h>
19#include <asm/fpu/api.h> 20#include <asm/fpu/api.h>
@@ -24,14 +25,12 @@
24/* 25/*
25 * High level FPU state handling functions: 26 * High level FPU state handling functions:
26 */ 27 */
27extern void fpu__initialize(struct fpu *fpu);
28extern void fpu__prepare_read(struct fpu *fpu); 28extern void fpu__prepare_read(struct fpu *fpu);
29extern void fpu__prepare_write(struct fpu *fpu); 29extern void fpu__prepare_write(struct fpu *fpu);
30extern void fpu__save(struct fpu *fpu); 30extern void fpu__save(struct fpu *fpu);
31extern void fpu__restore(struct fpu *fpu);
32extern int fpu__restore_sig(void __user *buf, int ia32_frame); 31extern int fpu__restore_sig(void __user *buf, int ia32_frame);
33extern void fpu__drop(struct fpu *fpu); 32extern void fpu__drop(struct fpu *fpu);
34extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); 33extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
35extern void fpu__clear(struct fpu *fpu); 34extern void fpu__clear(struct fpu *fpu);
36extern int fpu__exception_code(struct fpu *fpu, int trap_nr); 35extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
37extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); 36extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
@@ -122,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
122 err; \ 121 err; \
123}) 122})
124 123
124#define kernel_insn_err(insn, output, input...) \
125({ \
126 int err; \
127 asm volatile("1:" #insn "\n\t" \
128 "2:\n" \
129 ".section .fixup,\"ax\"\n" \
130 "3: movl $-1,%[err]\n" \
131 " jmp 2b\n" \
132 ".previous\n" \
133 _ASM_EXTABLE(1b, 3b) \
134 : [err] "=r" (err), output \
135 : "0"(0), input); \
136 err; \
137})
138
125#define kernel_insn(insn, output, input...) \ 139#define kernel_insn(insn, output, input...) \
126 asm volatile("1:" #insn "\n\t" \ 140 asm volatile("1:" #insn "\n\t" \
127 "2:\n" \ 141 "2:\n" \
@@ -150,6 +164,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
150 kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); 164 kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
151} 165}
152 166
167static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
168{
169 if (IS_ENABLED(CONFIG_X86_32))
170 return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
171 else
172 return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
173}
174
153static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) 175static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
154{ 176{
155 if (IS_ENABLED(CONFIG_X86_32)) 177 if (IS_ENABLED(CONFIG_X86_32))
@@ -163,6 +185,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx)
163 kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 185 kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
164} 186}
165 187
188static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
189{
190 return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
191}
192
166static inline int copy_user_to_fregs(struct fregs_state __user *fx) 193static inline int copy_user_to_fregs(struct fregs_state __user *fx)
167{ 194{
168 return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 195 return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
@@ -363,6 +390,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
363} 390}
364 391
365/* 392/*
393 * Restore xstate from kernel space xsave area, return an error code instead of
394 * an exception.
395 */
396static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
397{
398 u32 lmask = mask;
399 u32 hmask = mask >> 32;
400 int err;
401
402 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
403
404 return err;
405}
406
407/*
366 * These must be called with preempt disabled. Returns 408 * These must be called with preempt disabled. Returns
367 * 'true' if the FPU state is still intact and we can 409 * 'true' if the FPU state is still intact and we can
368 * keep registers active. 410 * keep registers active.
@@ -487,6 +529,25 @@ static inline void fpregs_activate(struct fpu *fpu)
487} 529}
488 530
489/* 531/*
532 * Internal helper, do not use directly. Use switch_fpu_return() instead.
533 */
534static inline void __fpregs_load_activate(void)
535{
536 struct fpu *fpu = &current->thread.fpu;
537 int cpu = smp_processor_id();
538
539 if (WARN_ON_ONCE(current->mm == NULL))
540 return;
541
542 if (!fpregs_state_valid(fpu, cpu)) {
543 copy_kernel_to_fpregs(&fpu->state);
544 fpregs_activate(fpu);
545 fpu->last_cpu = cpu;
546 }
547 clear_thread_flag(TIF_NEED_FPU_LOAD);
548}
549
550/*
490 * FPU state switching for scheduling. 551 * FPU state switching for scheduling.
491 * 552 *
492 * This is a two-stage process: 553 * This is a two-stage process:
@@ -494,12 +555,23 @@ static inline void fpregs_activate(struct fpu *fpu)
494 * - switch_fpu_prepare() saves the old state. 555 * - switch_fpu_prepare() saves the old state.
495 * This is done within the context of the old process. 556 * This is done within the context of the old process.
496 * 557 *
497 * - switch_fpu_finish() restores the new state as 558 * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
498 * necessary. 559 * will get loaded on return to userspace, or when the kernel needs it.
560 *
561 * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
562 * are saved in the current thread's FPU register state.
563 *
564 * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
565 * hold current()'s FPU registers. It is required to load the
566 * registers before returning to userland or using the content
567 * otherwise.
568 *
569 * The FPU context is only stored/restored for a user task and
570 * ->mm is used to distinguish between kernel and user threads.
499 */ 571 */
500static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) 572static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
501{ 573{
502 if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { 574 if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
503 if (!copy_fpregs_to_fpstate(old_fpu)) 575 if (!copy_fpregs_to_fpstate(old_fpu))
504 old_fpu->last_cpu = -1; 576 old_fpu->last_cpu = -1;
505 else 577 else
@@ -507,8 +579,7 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
507 579
508 /* But leave fpu_fpregs_owner_ctx! */ 580 /* But leave fpu_fpregs_owner_ctx! */
509 trace_x86_fpu_regs_deactivated(old_fpu); 581 trace_x86_fpu_regs_deactivated(old_fpu);
510 } else 582 }
511 old_fpu->last_cpu = -1;
512} 583}
513 584
514/* 585/*
@@ -516,36 +587,32 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
516 */ 587 */
517 588
518/* 589/*
519 * Set up the userspace FPU context for the new task, if the task 590 * Load PKRU from the FPU context if available. Delay loading of the
520 * has used the FPU. 591 * complete FPU state until the return to userland.
521 */ 592 */
522static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) 593static inline void switch_fpu_finish(struct fpu *new_fpu)
523{ 594{
524 bool preload = static_cpu_has(X86_FEATURE_FPU) && 595 u32 pkru_val = init_pkru_value;
525 new_fpu->initialized; 596 struct pkru_state *pk;
526 597
527 if (preload) { 598 if (!static_cpu_has(X86_FEATURE_FPU))
528 if (!fpregs_state_valid(new_fpu, cpu)) 599 return;
529 copy_kernel_to_fpregs(&new_fpu->state);
530 fpregs_activate(new_fpu);
531 }
532}
533 600
534/* 601 set_thread_flag(TIF_NEED_FPU_LOAD);
535 * Needs to be preemption-safe. 602
536 * 603 if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
537 * NOTE! user_fpu_begin() must be used only immediately before restoring 604 return;
538 * the save state. It does not do any saving/restoring on its own. In
539 * lazy FPU mode, it is just an optimization to avoid a #NM exception,
540 * the task can lose the FPU right after preempt_enable().
541 */
542static inline void user_fpu_begin(void)
543{
544 struct fpu *fpu = &current->thread.fpu;
545 605
546 preempt_disable(); 606 /*
547 fpregs_activate(fpu); 607 * PKRU state is switched eagerly because it needs to be valid before we
548 preempt_enable(); 608 * return to userland e.g. for a copy_to_user() operation.
609 */
610 if (current->mm) {
611 pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
612 if (pk)
613 pkru_val = pk->pkru;
614 }
615 __write_pkru(pkru_val);
549} 616}
550 617
551/* 618/*
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 44bbc39a57b3..7fb516b6893a 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
22 22
23extern void convert_from_fxsr(struct user_i387_ia32_struct *env, 23extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
24 struct task_struct *tsk); 24 struct task_struct *tsk);
25extern void convert_to_fxsr(struct task_struct *tsk, 25extern void convert_to_fxsr(struct fxregs_state *fxsave,
26 const struct user_i387_ia32_struct *env); 26 const struct user_i387_ia32_struct *env);
27 27
28unsigned long 28unsigned long
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 2e32e178e064..f098f6cab94b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -294,15 +294,6 @@ struct fpu {
294 unsigned int last_cpu; 294 unsigned int last_cpu;
295 295
296 /* 296 /*
297 * @initialized:
298 *
299 * This flag indicates whether this context is initialized: if the task
300 * is not running then we can restore from this context, if the task
301 * is running then we should save into this context.
302 */
303 unsigned char initialized;
304
305 /*
306 * @avx512_timestamp: 297 * @avx512_timestamp:
307 * 298 *
308 * Records the timestamp of AVX512 use during last context switch. 299 * Records the timestamp of AVX512 use during last context switch.
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c..7e42b285c856 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -2,9 +2,11 @@
2#ifndef __ASM_X86_XSAVE_H 2#ifndef __ASM_X86_XSAVE_H
3#define __ASM_X86_XSAVE_H 3#define __ASM_X86_XSAVE_H
4 4
5#include <linux/uaccess.h>
5#include <linux/types.h> 6#include <linux/types.h>
7
6#include <asm/processor.h> 8#include <asm/processor.h>
7#include <linux/uaccess.h> 9#include <asm/user.h>
8 10
9/* Bit 63 of XCR0 is reserved for future expansion */ 11/* Bit 63 of XCR0 is reserved for future expansion */
10#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) 12#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
@@ -46,8 +48,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
46 u64 xstate_mask); 48 u64 xstate_mask);
47 49
48void fpu__xstate_clear_all_cpu_caps(void); 50void fpu__xstate_clear_all_cpu_caps(void);
49void *get_xsave_addr(struct xregs_state *xsave, int xstate); 51void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
50const void *get_xsave_field_ptr(int xstate_field); 52const void *get_xsave_field_ptr(int xfeature_nr);
51int using_compacted_format(void); 53int using_compacted_format(void);
52int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); 54int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
53int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); 55int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3a221942f805..5e0509b41986 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,8 @@
23 23
24#ifndef __ASSEMBLY__ 24#ifndef __ASSEMBLY__
25#include <asm/x86_init.h> 25#include <asm/x86_init.h>
26#include <asm/fpu/xstate.h>
27#include <asm/fpu/api.h>
26 28
27extern pgd_t early_top_pgt[PTRS_PER_PGD]; 29extern pgd_t early_top_pgt[PTRS_PER_PGD];
28int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); 30int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -127,14 +129,29 @@ static inline int pte_dirty(pte_t pte)
127static inline u32 read_pkru(void) 129static inline u32 read_pkru(void)
128{ 130{
129 if (boot_cpu_has(X86_FEATURE_OSPKE)) 131 if (boot_cpu_has(X86_FEATURE_OSPKE))
130 return __read_pkru(); 132 return rdpkru();
131 return 0; 133 return 0;
132} 134}
133 135
134static inline void write_pkru(u32 pkru) 136static inline void write_pkru(u32 pkru)
135{ 137{
136 if (boot_cpu_has(X86_FEATURE_OSPKE)) 138 struct pkru_state *pk;
137 __write_pkru(pkru); 139
140 if (!boot_cpu_has(X86_FEATURE_OSPKE))
141 return;
142
143 pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
144
145 /*
146 * The PKRU value in xstate needs to be in sync with the value that is
147 * written to the CPU. The FPU restore on return to userland would
148 * otherwise load the previous value again.
149 */
150 fpregs_lock();
151 if (pk)
152 pk->pkru = pkru;
153 __write_pkru(pkru);
154 fpregs_unlock();
138} 155}
139 156
140static inline int pte_young(pte_t pte) 157static inline int pte_young(pte_t pte)
@@ -1358,6 +1375,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
1358#define PKRU_WD_BIT 0x2 1375#define PKRU_WD_BIT 0x2
1359#define PKRU_BITS_PER_PKEY 2 1376#define PKRU_BITS_PER_PKEY 2
1360 1377
1378#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
1379extern u32 init_pkru_value;
1380#else
1381#define init_pkru_value 0
1382#endif
1383
1361static inline bool __pkru_allows_read(u32 pkru, u16 pkey) 1384static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
1362{ 1385{
1363 int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; 1386 int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..0a3c4cab39db 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val)
92#endif 92#endif
93 93
94#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 94#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
95static inline u32 __read_pkru(void) 95static inline u32 rdpkru(void)
96{ 96{
97 u32 ecx = 0; 97 u32 ecx = 0;
98 u32 edx, pkru; 98 u32 edx, pkru;
@@ -107,7 +107,7 @@ static inline u32 __read_pkru(void)
107 return pkru; 107 return pkru;
108} 108}
109 109
110static inline void __write_pkru(u32 pkru) 110static inline void wrpkru(u32 pkru)
111{ 111{
112 u32 ecx = 0, edx = 0; 112 u32 ecx = 0, edx = 0;
113 113
@@ -118,8 +118,21 @@ static inline void __write_pkru(u32 pkru)
118 asm volatile(".byte 0x0f,0x01,0xef\n\t" 118 asm volatile(".byte 0x0f,0x01,0xef\n\t"
119 : : "a" (pkru), "c"(ecx), "d"(edx)); 119 : : "a" (pkru), "c"(ecx), "d"(edx));
120} 120}
121
122static inline void __write_pkru(u32 pkru)
123{
124 /*
125 * WRPKRU is relatively expensive compared to RDPKRU.
126 * Avoid WRPKRU when it would not change the value.
127 */
128 if (pkru == rdpkru())
129 return;
130
131 wrpkru(pkru);
132}
133
121#else 134#else
122static inline u32 __read_pkru(void) 135static inline u32 rdpkru(void)
123{ 136{
124 return 0; 137 return 0;
125} 138}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0eccbcb8447..f9453536f9bb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
88#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ 88#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
89#define TIF_UPROBE 12 /* breakpointed or singlestepping */ 89#define TIF_UPROBE 12 /* breakpointed or singlestepping */
90#define TIF_PATCH_PENDING 13 /* pending live patching update */ 90#define TIF_PATCH_PENDING 13 /* pending live patching update */
91#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
91#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ 92#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
92#define TIF_NOTSC 16 /* TSC is not accessible in userland */ 93#define TIF_NOTSC 16 /* TSC is not accessible in userland */
93#define TIF_IA32 17 /* IA32 compatibility process */ 94#define TIF_IA32 17 /* IA32 compatibility process */
@@ -117,6 +118,7 @@ struct thread_info {
117#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) 118#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
118#define _TIF_UPROBE (1 << TIF_UPROBE) 119#define _TIF_UPROBE (1 << TIF_UPROBE)
119#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) 120#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
121#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
120#define _TIF_NOCPUID (1 << TIF_NOCPUID) 122#define _TIF_NOCPUID (1 << TIF_NOCPUID)
121#define _TIF_NOTSC (1 << TIF_NOTSC) 123#define _TIF_NOTSC (1 << TIF_NOTSC)
122#define _TIF_IA32 (1 << TIF_IA32) 124#define _TIF_IA32 (1 << TIF_IA32)
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 069c04be1507..879b77792f94 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
13 13
14 TP_STRUCT__entry( 14 TP_STRUCT__entry(
15 __field(struct fpu *, fpu) 15 __field(struct fpu *, fpu)
16 __field(bool, initialized) 16 __field(bool, load_fpu)
17 __field(u64, xfeatures) 17 __field(u64, xfeatures)
18 __field(u64, xcomp_bv) 18 __field(u64, xcomp_bv)
19 ), 19 ),
20 20
21 TP_fast_assign( 21 TP_fast_assign(
22 __entry->fpu = fpu; 22 __entry->fpu = fpu;
23 __entry->initialized = fpu->initialized; 23 __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
24 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { 24 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
25 __entry->xfeatures = fpu->state.xsave.header.xfeatures; 25 __entry->xfeatures = fpu->state.xsave.header.xfeatures;
26 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; 26 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
27 } 27 }
28 ), 28 ),
29 TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", 29 TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
30 __entry->fpu, 30 __entry->fpu,
31 __entry->initialized, 31 __entry->load_fpu,
32 __entry->xfeatures, 32 __entry->xfeatures,
33 __entry->xcomp_bv 33 __entry->xcomp_bv
34 ) 34 )
@@ -64,11 +64,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
64 TP_ARGS(fpu) 64 TP_ARGS(fpu)
65); 65);
66 66
67DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
68 TP_PROTO(struct fpu *fpu),
69 TP_ARGS(fpu)
70);
71
72DEFINE_EVENT(x86_fpu, x86_fpu_init_state, 67DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
73 TP_PROTO(struct fpu *fpu), 68 TP_PROTO(struct fpu *fpu),
74 TP_ARGS(fpu) 69 TP_ARGS(fpu)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 37640544e12f..8739bdfe9bdf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -372,6 +372,8 @@ static bool pku_disabled;
372 372
373static __always_inline void setup_pku(struct cpuinfo_x86 *c) 373static __always_inline void setup_pku(struct cpuinfo_x86 *c)
374{ 374{
375 struct pkru_state *pk;
376
375 /* check the boot processor, plus compile options for PKU: */ 377 /* check the boot processor, plus compile options for PKU: */
376 if (!cpu_feature_enabled(X86_FEATURE_PKU)) 378 if (!cpu_feature_enabled(X86_FEATURE_PKU))
377 return; 379 return;
@@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
382 return; 384 return;
383 385
384 cr4_set_bits(X86_CR4_PKE); 386 cr4_set_bits(X86_CR4_PKE);
387 pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
388 if (pk)
389 pk->pkru = init_pkru_value;
385 /* 390 /*
386 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE 391 * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
387 * cpuid bit to be set. We need to ensure that we 392 * cpuid bit to be set. We need to ensure that we
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 2e5003fef51a..ce243f76bdb7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,24 +101,21 @@ static void __kernel_fpu_begin(void)
101 101
102 kernel_fpu_disable(); 102 kernel_fpu_disable();
103 103
104 if (fpu->initialized) { 104 if (current->mm) {
105 /* 105 if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
106 * Ignore return value -- we don't care if reg state 106 set_thread_flag(TIF_NEED_FPU_LOAD);
107 * is clobbered. 107 /*
108 */ 108 * Ignore return value -- we don't care if reg state
109 copy_fpregs_to_fpstate(fpu); 109 * is clobbered.
110 } else { 110 */
111 __cpu_invalidate_fpregs_state(); 111 copy_fpregs_to_fpstate(fpu);
112 }
112 } 113 }
114 __cpu_invalidate_fpregs_state();
113} 115}
114 116
115static void __kernel_fpu_end(void) 117static void __kernel_fpu_end(void)
116{ 118{
117 struct fpu *fpu = &current->thread.fpu;
118
119 if (fpu->initialized)
120 copy_kernel_to_fpregs(&fpu->state);
121
122 kernel_fpu_enable(); 119 kernel_fpu_enable();
123} 120}
124 121
@@ -145,15 +142,17 @@ void fpu__save(struct fpu *fpu)
145{ 142{
146 WARN_ON_FPU(fpu != &current->thread.fpu); 143 WARN_ON_FPU(fpu != &current->thread.fpu);
147 144
148 preempt_disable(); 145 fpregs_lock();
149 trace_x86_fpu_before_save(fpu); 146 trace_x86_fpu_before_save(fpu);
150 if (fpu->initialized) { 147
148 if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
151 if (!copy_fpregs_to_fpstate(fpu)) { 149 if (!copy_fpregs_to_fpstate(fpu)) {
152 copy_kernel_to_fpregs(&fpu->state); 150 copy_kernel_to_fpregs(&fpu->state);
153 } 151 }
154 } 152 }
153
155 trace_x86_fpu_after_save(fpu); 154 trace_x86_fpu_after_save(fpu);
156 preempt_enable(); 155 fpregs_unlock();
157} 156}
158EXPORT_SYMBOL_GPL(fpu__save); 157EXPORT_SYMBOL_GPL(fpu__save);
159 158
@@ -186,11 +185,14 @@ void fpstate_init(union fpregs_state *state)
186} 185}
187EXPORT_SYMBOL_GPL(fpstate_init); 186EXPORT_SYMBOL_GPL(fpstate_init);
188 187
189int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 188int fpu__copy(struct task_struct *dst, struct task_struct *src)
190{ 189{
190 struct fpu *dst_fpu = &dst->thread.fpu;
191 struct fpu *src_fpu = &src->thread.fpu;
192
191 dst_fpu->last_cpu = -1; 193 dst_fpu->last_cpu = -1;
192 194
193 if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) 195 if (!static_cpu_has(X86_FEATURE_FPU))
194 return 0; 196 return 0;
195 197
196 WARN_ON_FPU(src_fpu != &current->thread.fpu); 198 WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -202,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
202 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); 204 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
203 205
204 /* 206 /*
205 * Save current FPU registers directly into the child 207 * If the FPU registers are not current just memcpy() the state.
206 * FPU context, without any memory-to-memory copying. 208 * Otherwise save current FPU registers directly into the child's FPU
209 * context, without any memory-to-memory copying.
207 * 210 *
208 * ( The function 'fails' in the FNSAVE case, which destroys 211 * ( The function 'fails' in the FNSAVE case, which destroys
209 * register contents so we have to copy them back. ) 212 * register contents so we have to load them back. )
210 */ 213 */
211 if (!copy_fpregs_to_fpstate(dst_fpu)) { 214 fpregs_lock();
212 memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); 215 if (test_thread_flag(TIF_NEED_FPU_LOAD))
213 copy_kernel_to_fpregs(&src_fpu->state); 216 memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
214 } 217
218 else if (!copy_fpregs_to_fpstate(dst_fpu))
219 copy_kernel_to_fpregs(&dst_fpu->state);
220
221 fpregs_unlock();
222
223 set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
215 224
216 trace_x86_fpu_copy_src(src_fpu); 225 trace_x86_fpu_copy_src(src_fpu);
217 trace_x86_fpu_copy_dst(dst_fpu); 226 trace_x86_fpu_copy_dst(dst_fpu);
@@ -223,20 +232,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
223 * Activate the current task's in-memory FPU context, 232 * Activate the current task's in-memory FPU context,
224 * if it has not been used before: 233 * if it has not been used before:
225 */ 234 */
226void fpu__initialize(struct fpu *fpu) 235static void fpu__initialize(struct fpu *fpu)
227{ 236{
228 WARN_ON_FPU(fpu != &current->thread.fpu); 237 WARN_ON_FPU(fpu != &current->thread.fpu);
229 238
230 if (!fpu->initialized) { 239 set_thread_flag(TIF_NEED_FPU_LOAD);
231 fpstate_init(&fpu->state); 240 fpstate_init(&fpu->state);
232 trace_x86_fpu_init_state(fpu); 241 trace_x86_fpu_init_state(fpu);
233
234 trace_x86_fpu_activate_state(fpu);
235 /* Safe to do for the current task: */
236 fpu->initialized = 1;
237 }
238} 242}
239EXPORT_SYMBOL_GPL(fpu__initialize);
240 243
241/* 244/*
242 * This function must be called before we read a task's fpstate. 245 * This function must be called before we read a task's fpstate.
@@ -248,32 +251,20 @@ EXPORT_SYMBOL_GPL(fpu__initialize);
248 * 251 *
249 * - or it's called for stopped tasks (ptrace), in which case the 252 * - or it's called for stopped tasks (ptrace), in which case the
250 * registers were already saved by the context-switch code when 253 * registers were already saved by the context-switch code when
251 * the task scheduled out - we only have to initialize the registers 254 * the task scheduled out.
252 * if they've never been initialized.
253 * 255 *
254 * If the task has used the FPU before then save it. 256 * If the task has used the FPU before then save it.
255 */ 257 */
256void fpu__prepare_read(struct fpu *fpu) 258void fpu__prepare_read(struct fpu *fpu)
257{ 259{
258 if (fpu == &current->thread.fpu) { 260 if (fpu == &current->thread.fpu)
259 fpu__save(fpu); 261 fpu__save(fpu);
260 } else {
261 if (!fpu->initialized) {
262 fpstate_init(&fpu->state);
263 trace_x86_fpu_init_state(fpu);
264
265 trace_x86_fpu_activate_state(fpu);
266 /* Safe to do for current and for stopped child tasks: */
267 fpu->initialized = 1;
268 }
269 }
270} 262}
271 263
272/* 264/*
273 * This function must be called before we write a task's fpstate. 265 * This function must be called before we write a task's fpstate.
274 * 266 *
275 * If the task has used the FPU before then invalidate any cached FPU registers. 267 * Invalidate any cached FPU registers.
276 * If the task has not used the FPU before then initialize its fpstate.
277 * 268 *
278 * After this function call, after registers in the fpstate are 269 * After this function call, after registers in the fpstate are
279 * modified and the child task has woken up, the child task will 270 * modified and the child task has woken up, the child task will
@@ -290,44 +281,11 @@ void fpu__prepare_write(struct fpu *fpu)
290 */ 281 */
291 WARN_ON_FPU(fpu == &current->thread.fpu); 282 WARN_ON_FPU(fpu == &current->thread.fpu);
292 283
293 if (fpu->initialized) { 284 /* Invalidate any cached state: */
294 /* Invalidate any cached state: */ 285 __fpu_invalidate_fpregs_state(fpu);
295 __fpu_invalidate_fpregs_state(fpu);
296 } else {
297 fpstate_init(&fpu->state);
298 trace_x86_fpu_init_state(fpu);
299
300 trace_x86_fpu_activate_state(fpu);
301 /* Safe to do for stopped child tasks: */
302 fpu->initialized = 1;
303 }
304} 286}
305 287
306/* 288/*
307 * 'fpu__restore()' is called to copy FPU registers from
308 * the FPU fpstate to the live hw registers and to activate
309 * access to the hardware registers, so that FPU instructions
310 * can be used afterwards.
311 *
312 * Must be called with kernel preemption disabled (for example
313 * with local interrupts disabled, as it is in the case of
314 * do_device_not_available()).
315 */
316void fpu__restore(struct fpu *fpu)
317{
318 fpu__initialize(fpu);
319
320 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
321 kernel_fpu_disable();
322 trace_x86_fpu_before_restore(fpu);
323 fpregs_activate(fpu);
324 copy_kernel_to_fpregs(&fpu->state);
325 trace_x86_fpu_after_restore(fpu);
326 kernel_fpu_enable();
327}
328EXPORT_SYMBOL_GPL(fpu__restore);
329
330/*
331 * Drops current FPU state: deactivates the fpregs and 289 * Drops current FPU state: deactivates the fpregs and
332 * the fpstate. NOTE: it still leaves previous contents 290 * the fpstate. NOTE: it still leaves previous contents
333 * in the fpregs in the eager-FPU case. 291 * in the fpregs in the eager-FPU case.
@@ -341,17 +299,13 @@ void fpu__drop(struct fpu *fpu)
341 preempt_disable(); 299 preempt_disable();
342 300
343 if (fpu == &current->thread.fpu) { 301 if (fpu == &current->thread.fpu) {
344 if (fpu->initialized) { 302 /* Ignore delayed exceptions from user space */
345 /* Ignore delayed exceptions from user space */ 303 asm volatile("1: fwait\n"
346 asm volatile("1: fwait\n" 304 "2:\n"
347 "2:\n" 305 _ASM_EXTABLE(1b, 2b));
348 _ASM_EXTABLE(1b, 2b)); 306 fpregs_deactivate(fpu);
349 fpregs_deactivate(fpu);
350 }
351 } 307 }
352 308
353 fpu->initialized = 0;
354
355 trace_x86_fpu_dropped(fpu); 309 trace_x86_fpu_dropped(fpu);
356 310
357 preempt_enable(); 311 preempt_enable();
@@ -363,6 +317,8 @@ void fpu__drop(struct fpu *fpu)
363 */ 317 */
364static inline void copy_init_fpstate_to_fpregs(void) 318static inline void copy_init_fpstate_to_fpregs(void)
365{ 319{
320 fpregs_lock();
321
366 if (use_xsave()) 322 if (use_xsave())
367 copy_kernel_to_xregs(&init_fpstate.xsave, -1); 323 copy_kernel_to_xregs(&init_fpstate.xsave, -1);
368 else if (static_cpu_has(X86_FEATURE_FXSR)) 324 else if (static_cpu_has(X86_FEATURE_FXSR))
@@ -372,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void)
372 328
373 if (boot_cpu_has(X86_FEATURE_OSPKE)) 329 if (boot_cpu_has(X86_FEATURE_OSPKE))
374 copy_init_pkru_to_fpregs(); 330 copy_init_pkru_to_fpregs();
331
332 fpregs_mark_activate();
333 fpregs_unlock();
375} 334}
376 335
377/* 336/*
@@ -389,16 +348,52 @@ void fpu__clear(struct fpu *fpu)
389 /* 348 /*
390 * Make sure fpstate is cleared and initialized. 349 * Make sure fpstate is cleared and initialized.
391 */ 350 */
392 if (static_cpu_has(X86_FEATURE_FPU)) { 351 fpu__initialize(fpu);
393 preempt_disable(); 352 if (static_cpu_has(X86_FEATURE_FPU))
394 fpu__initialize(fpu);
395 user_fpu_begin();
396 copy_init_fpstate_to_fpregs(); 353 copy_init_fpstate_to_fpregs();
397 preempt_enable();
398 }
399} 354}
400 355
401/* 356/*
357 * Load FPU context before returning to userspace.
358 */
359void switch_fpu_return(void)
360{
361 if (!static_cpu_has(X86_FEATURE_FPU))
362 return;
363
364 __fpregs_load_activate();
365}
366EXPORT_SYMBOL_GPL(switch_fpu_return);
367
368#ifdef CONFIG_X86_DEBUG_FPU
369/*
370 * If current FPU state according to its tracking (loaded FPU context on this
371 * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
372 * loaded on return to userland.
373 */
374void fpregs_assert_state_consistent(void)
375{
376 struct fpu *fpu = &current->thread.fpu;
377
378 if (test_thread_flag(TIF_NEED_FPU_LOAD))
379 return;
380
381 WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
382}
383EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
384#endif
385
386void fpregs_mark_activate(void)
387{
388 struct fpu *fpu = &current->thread.fpu;
389
390 fpregs_activate(fpu);
391 fpu->last_cpu = smp_processor_id();
392 clear_thread_flag(TIF_NEED_FPU_LOAD);
393}
394EXPORT_SYMBOL_GPL(fpregs_mark_activate);
395
396/*
402 * x87 math exception handling: 397 * x87 math exception handling:
403 */ 398 */
404 399
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6abd83572b01..20d8fa7124c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void)
239 239
240 WARN_ON_FPU(!on_boot_cpu); 240 WARN_ON_FPU(!on_boot_cpu);
241 on_boot_cpu = 0; 241 on_boot_cpu = 0;
242
243 WARN_ON_FPU(current->thread.fpu.initialized);
244} 242}
245 243
246/* 244/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index bc02f5144b95..d652b939ccfb 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -15,16 +15,12 @@
15 */ 15 */
16int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) 16int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
17{ 17{
18 struct fpu *target_fpu = &target->thread.fpu; 18 return regset->n;
19
20 return target_fpu->initialized ? regset->n : 0;
21} 19}
22 20
23int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) 21int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
24{ 22{
25 struct fpu *target_fpu = &target->thread.fpu; 23 if (boot_cpu_has(X86_FEATURE_FXSR))
26
27 if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
28 return regset->n; 24 return regset->n;
29 else 25 else
30 return 0; 26 return 0;
@@ -269,11 +265,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
269 memcpy(&to[i], &from[i], sizeof(to[0])); 265 memcpy(&to[i], &from[i], sizeof(to[0]));
270} 266}
271 267
272void convert_to_fxsr(struct task_struct *tsk, 268void convert_to_fxsr(struct fxregs_state *fxsave,
273 const struct user_i387_ia32_struct *env) 269 const struct user_i387_ia32_struct *env)
274 270
275{ 271{
276 struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
277 struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; 272 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
278 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; 273 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
279 int i; 274 int i;
@@ -350,7 +345,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
350 345
351 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); 346 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
352 if (!ret) 347 if (!ret)
353 convert_to_fxsr(target, &env); 348 convert_to_fxsr(&target->thread.fpu.state.fxsave, &env);
354 349
355 /* 350 /*
356 * update the header bit in the xsave header, indicating the 351 * update the header bit in the xsave header, indicating the
@@ -371,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
371int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) 366int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
372{ 367{
373 struct task_struct *tsk = current; 368 struct task_struct *tsk = current;
374 struct fpu *fpu = &tsk->thread.fpu;
375 int fpvalid;
376
377 fpvalid = fpu->initialized;
378 if (fpvalid)
379 fpvalid = !fpregs_get(tsk, NULL,
380 0, sizeof(struct user_i387_ia32_struct),
381 ufpu, NULL);
382 369
383 return fpvalid; 370 return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct),
371 ufpu, NULL);
384} 372}
385EXPORT_SYMBOL(dump_fpu); 373EXPORT_SYMBOL(dump_fpu);
386 374
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index f6a1d299627c..5a8d118bc423 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -92,13 +92,13 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
92 return err; 92 return err;
93 93
94 err |= __put_user(FP_XSTATE_MAGIC2, 94 err |= __put_user(FP_XSTATE_MAGIC2,
95 (__u32 *)(buf + fpu_user_xstate_size)); 95 (__u32 __user *)(buf + fpu_user_xstate_size));
96 96
97 /* 97 /*
98 * Read the xfeatures which we copied (directly from the cpu or 98 * Read the xfeatures which we copied (directly from the cpu or
99 * from the state in task struct) to the user buffers. 99 * from the state in task struct) to the user buffers.
100 */ 100 */
101 err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); 101 err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
102 102
103 /* 103 /*
104 * For legacy compatible, we always set FP/SSE bits in the bit 104 * For legacy compatible, we always set FP/SSE bits in the bit
@@ -113,7 +113,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
113 */ 113 */
114 xfeatures |= XFEATURE_MASK_FPSSE; 114 xfeatures |= XFEATURE_MASK_FPSSE;
115 115
116 err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); 116 err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
117 117
118 return err; 118 return err;
119} 119}
@@ -144,9 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
144 * buf == buf_fx for 64-bit frames and 32-bit fsave frame. 144 * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
145 * buf != buf_fx for 32-bit frames with fxstate. 145 * buf != buf_fx for 32-bit frames with fxstate.
146 * 146 *
147 * If the fpu, extended register state is live, save the state directly 147 * Try to save it directly to the user frame with disabled page fault handler.
148 * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, 148 * If this fails then do the slow path where the FPU state is first saved to
149 * copy the thread's fpu state to the user frame starting at 'buf_fx'. 149 * task's fpu->state and then copy it to the user frame pointed to by the
150 * aligned pointer 'buf_fx'.
150 * 151 *
151 * If this is a 32-bit frame with fxstate, put a fsave header before 152 * If this is a 32-bit frame with fxstate, put a fsave header before
152 * the aligned state at 'buf_fx'. 153 * the aligned state at 'buf_fx'.
@@ -156,10 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
156 */ 157 */
157int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) 158int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
158{ 159{
159 struct fpu *fpu = &current->thread.fpu;
160 struct xregs_state *xsave = &fpu->state.xsave;
161 struct task_struct *tsk = current; 160 struct task_struct *tsk = current;
162 int ia32_fxstate = (buf != buf_fx); 161 int ia32_fxstate = (buf != buf_fx);
162 int ret;
163 163
164 ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || 164 ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
165 IS_ENABLED(CONFIG_IA32_EMULATION)); 165 IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -172,28 +172,34 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
172 sizeof(struct user_i387_ia32_struct), NULL, 172 sizeof(struct user_i387_ia32_struct), NULL,
173 (struct _fpstate_32 __user *) buf) ? -1 : 1; 173 (struct _fpstate_32 __user *) buf) ? -1 : 1;
174 174
175 if (fpu->initialized || using_compacted_format()) { 175retry:
176 /* Save the live register state to the user directly. */ 176 /*
177 if (copy_fpregs_to_sigframe(buf_fx)) 177 * Load the FPU registers if they are not valid for the current task.
178 return -1; 178 * With a valid FPU state we can attempt to save the state directly to
179 /* Update the thread's fxstate to save the fsave header. */ 179 * userland's stack frame which will likely succeed. If it does not,
180 if (ia32_fxstate) 180 * resolve the fault in the user memory and try again.
181 copy_fxregs_to_kernel(fpu); 181 */
182 } else { 182 fpregs_lock();
183 /* 183 if (test_thread_flag(TIF_NEED_FPU_LOAD))
184 * It is a *bug* if kernel uses compacted-format for xsave 184 __fpregs_load_activate();
185 * area and we copy it out directly to a signal frame. It 185
186 * should have been handled above by saving the registers 186 pagefault_disable();
187 * directly. 187 ret = copy_fpregs_to_sigframe(buf_fx);
188 */ 188 pagefault_enable();
189 if (boot_cpu_has(X86_FEATURE_XSAVES)) { 189 fpregs_unlock();
190 WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); 190
191 return -1; 191 if (ret) {
192 } 192 int aligned_size;
193 193 int nr_pages;
194 fpstate_sanitize_xstate(fpu); 194
195 if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) 195 aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size;
196 return -1; 196 nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE);
197
198 ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages,
199 NULL, FOLL_WRITE);
200 if (ret == nr_pages)
201 goto retry;
202 return -EFAULT;
197 } 203 }
198 204
199 /* Save the fsave header for the 32-bit frames. */ 205 /* Save the fsave header for the 32-bit frames. */
@@ -207,11 +213,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
207} 213}
208 214
209static inline void 215static inline void
210sanitize_restored_xstate(struct task_struct *tsk, 216sanitize_restored_xstate(union fpregs_state *state,
211 struct user_i387_ia32_struct *ia32_env, 217 struct user_i387_ia32_struct *ia32_env,
212 u64 xfeatures, int fx_only) 218 u64 xfeatures, int fx_only)
213{ 219{
214 struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; 220 struct xregs_state *xsave = &state->xsave;
215 struct xstate_header *header = &xsave->header; 221 struct xstate_header *header = &xsave->header;
216 222
217 if (use_xsave()) { 223 if (use_xsave()) {
@@ -238,17 +244,18 @@ sanitize_restored_xstate(struct task_struct *tsk,
238 */ 244 */
239 xsave->i387.mxcsr &= mxcsr_feature_mask; 245 xsave->i387.mxcsr &= mxcsr_feature_mask;
240 246
241 convert_to_fxsr(tsk, ia32_env); 247 if (ia32_env)
248 convert_to_fxsr(&state->fxsave, ia32_env);
242 } 249 }
243} 250}
244 251
245/* 252/*
246 * Restore the extended state if present. Otherwise, restore the FP/SSE state. 253 * Restore the extended state if present. Otherwise, restore the FP/SSE state.
247 */ 254 */
248static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) 255static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
249{ 256{
250 if (use_xsave()) { 257 if (use_xsave()) {
251 if ((unsigned long)buf % 64 || fx_only) { 258 if (fx_only) {
252 u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; 259 u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
253 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); 260 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
254 return copy_user_to_fxregs(buf); 261 return copy_user_to_fxregs(buf);
@@ -266,12 +273,15 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_
266 273
267static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) 274static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
268{ 275{
276 struct user_i387_ia32_struct *envp = NULL;
277 int state_size = fpu_kernel_xstate_size;
269 int ia32_fxstate = (buf != buf_fx); 278 int ia32_fxstate = (buf != buf_fx);
270 struct task_struct *tsk = current; 279 struct task_struct *tsk = current;
271 struct fpu *fpu = &tsk->thread.fpu; 280 struct fpu *fpu = &tsk->thread.fpu;
272 int state_size = fpu_kernel_xstate_size; 281 struct user_i387_ia32_struct env;
273 u64 xfeatures = 0; 282 u64 xfeatures = 0;
274 int fx_only = 0; 283 int fx_only = 0;
284 int ret = 0;
275 285
276 ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || 286 ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
277 IS_ENABLED(CONFIG_IA32_EMULATION)); 287 IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -284,8 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
284 if (!access_ok(buf, size)) 294 if (!access_ok(buf, size))
285 return -EACCES; 295 return -EACCES;
286 296
287 fpu__initialize(fpu);
288
289 if (!static_cpu_has(X86_FEATURE_FPU)) 297 if (!static_cpu_has(X86_FEATURE_FPU))
290 return fpregs_soft_set(current, NULL, 298 return fpregs_soft_set(current, NULL,
291 0, sizeof(struct user_i387_ia32_struct), 299 0, sizeof(struct user_i387_ia32_struct),
@@ -308,61 +316,101 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
308 } 316 }
309 } 317 }
310 318
319 /*
320 * The current state of the FPU registers does not matter. By setting
321 * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
322 * is not modified on context switch and that the xstate is considered
323 * to be loaded again on return to userland (overriding last_cpu avoids
324 * the optimisation).
325 */
326 set_thread_flag(TIF_NEED_FPU_LOAD);
327 __fpu_invalidate_fpregs_state(fpu);
328
329 if ((unsigned long)buf_fx % 64)
330 fx_only = 1;
331 /*
332 * For 32-bit frames with fxstate, copy the fxstate so it can be
333 * reconstructed later.
334 */
311 if (ia32_fxstate) { 335 if (ia32_fxstate) {
336 ret = __copy_from_user(&env, buf, sizeof(env));
337 if (ret)
338 goto err_out;
339 envp = &env;
340 } else {
312 /* 341 /*
313 * For 32-bit frames with fxstate, copy the user state to the 342 * Attempt to restore the FPU registers directly from user
314 * thread's fpu state, reconstruct fxstate from the fsave 343 * memory. For that to succeed, the user access cannot cause
315 * header. Validate and sanitize the copied state. 344 * page faults. If it does, fall back to the slow path below,
345 * going through the kernel buffer with the enabled pagefault
346 * handler.
316 */ 347 */
317 struct user_i387_ia32_struct env; 348 fpregs_lock();
318 int err = 0; 349 pagefault_disable();
350 ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
351 pagefault_enable();
352 if (!ret) {
353 fpregs_mark_activate();
354 fpregs_unlock();
355 return 0;
356 }
357 fpregs_unlock();
358 }
319 359
320 /* 360
321 * Drop the current fpu which clears fpu->initialized. This ensures 361 if (use_xsave() && !fx_only) {
322 * that any context-switch during the copy of the new state, 362 u64 init_bv = xfeatures_mask & ~xfeatures;
323 * avoids the intermediate state from getting restored/saved.
324 * Thus avoiding the new restored state from getting corrupted.
325 * We will be ready to restore/save the state only after
326 * fpu->initialized is again set.
327 */
328 fpu__drop(fpu);
329 363
330 if (using_compacted_format()) { 364 if (using_compacted_format()) {
331 err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 365 ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
332 } else { 366 } else {
333 err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); 367 ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
334 368
335 if (!err && state_size > offsetof(struct xregs_state, header)) 369 if (!ret && state_size > offsetof(struct xregs_state, header))
336 err = validate_xstate_header(&fpu->state.xsave.header); 370 ret = validate_xstate_header(&fpu->state.xsave.header);
337 } 371 }
372 if (ret)
373 goto err_out;
338 374
339 if (err || __copy_from_user(&env, buf, sizeof(env))) { 375 sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
340 fpstate_init(&fpu->state); 376
341 trace_x86_fpu_init_state(fpu); 377 fpregs_lock();
342 err = -1; 378 if (unlikely(init_bv))
343 } else { 379 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
344 sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); 380 ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
381
382 } else if (use_fxsr()) {
383 ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
384 if (ret) {
385 ret = -EFAULT;
386 goto err_out;
345 } 387 }
346 388
347 local_bh_disable(); 389 sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
348 fpu->initialized = 1;
349 fpu__restore(fpu);
350 local_bh_enable();
351 390
352 return err; 391 fpregs_lock();
353 } else { 392 if (use_xsave()) {
354 /* 393 u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
355 * For 64-bit frames and 32-bit fsave frames, restore the user 394 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
356 * state to the registers directly (with exceptions handled).
357 */
358 user_fpu_begin();
359 if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
360 fpu__clear(fpu);
361 return -1;
362 } 395 }
396
397 ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave);
398 } else {
399 ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
400 if (ret)
401 goto err_out;
402
403 fpregs_lock();
404 ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
363 } 405 }
406 if (!ret)
407 fpregs_mark_activate();
408 fpregs_unlock();
364 409
365 return 0; 410err_out:
411 if (ret)
412 fpu__clear(fpu);
413 return ret;
366} 414}
367 415
368static inline int xstate_sigframe_size(void) 416static inline int xstate_sigframe_size(void)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d7432c2b1051..9c459fd1d38e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -805,20 +805,18 @@ void fpu__resume_cpu(void)
805} 805}
806 806
807/* 807/*
808 * Given an xstate feature mask, calculate where in the xsave 808 * Given an xstate feature nr, calculate where in the xsave
809 * buffer the state is. Callers should ensure that the buffer 809 * buffer the state is. Callers should ensure that the buffer
810 * is valid. 810 * is valid.
811 */ 811 */
812static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) 812static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
813{ 813{
814 int feature_nr = fls64(xstate_feature_mask) - 1; 814 if (!xfeature_enabled(xfeature_nr)) {
815
816 if (!xfeature_enabled(feature_nr)) {
817 WARN_ON_FPU(1); 815 WARN_ON_FPU(1);
818 return NULL; 816 return NULL;
819 } 817 }
820 818
821 return (void *)xsave + xstate_comp_offsets[feature_nr]; 819 return (void *)xsave + xstate_comp_offsets[xfeature_nr];
822} 820}
823/* 821/*
824 * Given the xsave area and a state inside, this function returns the 822 * Given the xsave area and a state inside, this function returns the
@@ -832,13 +830,13 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask
832 * 830 *
833 * Inputs: 831 * Inputs:
834 * xstate: the thread's storage area for all FPU data 832 * xstate: the thread's storage area for all FPU data
835 * xstate_feature: state which is defined in xsave.h (e.g. 833 * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
836 * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) 834 * XFEATURE_SSE, etc...)
837 * Output: 835 * Output:
838 * address of the state in the xsave area, or NULL if the 836 * address of the state in the xsave area, or NULL if the
839 * field is not present in the xsave buffer. 837 * field is not present in the xsave buffer.
840 */ 838 */
841void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) 839void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
842{ 840{
843 /* 841 /*
844 * Do we even *have* xsave state? 842 * Do we even *have* xsave state?
@@ -851,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
851 * have not enabled. Remember that pcntxt_mask is 849 * have not enabled. Remember that pcntxt_mask is
852 * what we write to the XCR0 register. 850 * what we write to the XCR0 register.
853 */ 851 */
854 WARN_ONCE(!(xfeatures_mask & xstate_feature), 852 WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
855 "get of unsupported state"); 853 "get of unsupported state");
856 /* 854 /*
857 * This assumes the last 'xsave*' instruction to 855 * This assumes the last 'xsave*' instruction to
858 * have requested that 'xstate_feature' be saved. 856 * have requested that 'xfeature_nr' be saved.
859 * If it did not, we might be seeing and old value 857 * If it did not, we might be seeing and old value
860 * of the field in the buffer. 858 * of the field in the buffer.
861 * 859 *
@@ -864,10 +862,10 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
864 * or because the "init optimization" caused it 862 * or because the "init optimization" caused it
865 * to not be saved. 863 * to not be saved.
866 */ 864 */
867 if (!(xsave->header.xfeatures & xstate_feature)) 865 if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
868 return NULL; 866 return NULL;
869 867
870 return __raw_xsave_addr(xsave, xstate_feature); 868 return __raw_xsave_addr(xsave, xfeature_nr);
871} 869}
872EXPORT_SYMBOL_GPL(get_xsave_addr); 870EXPORT_SYMBOL_GPL(get_xsave_addr);
873 871
@@ -882,25 +880,23 @@ EXPORT_SYMBOL_GPL(get_xsave_addr);
882 * Note that this only works on the current task. 880 * Note that this only works on the current task.
883 * 881 *
884 * Inputs: 882 * Inputs:
885 * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, 883 * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
886 * XFEATURE_MASK_SSE, etc...) 884 * XFEATURE_SSE, etc...)
887 * Output: 885 * Output:
888 * address of the state in the xsave area or NULL if the state 886 * address of the state in the xsave area or NULL if the state
889 * is not present or is in its 'init state'. 887 * is not present or is in its 'init state'.
890 */ 888 */
891const void *get_xsave_field_ptr(int xsave_state) 889const void *get_xsave_field_ptr(int xfeature_nr)
892{ 890{
893 struct fpu *fpu = &current->thread.fpu; 891 struct fpu *fpu = &current->thread.fpu;
894 892
895 if (!fpu->initialized)
896 return NULL;
897 /* 893 /*
898 * fpu__save() takes the CPU's xstate registers 894 * fpu__save() takes the CPU's xstate registers
899 * and saves them off to the 'fpu memory buffer. 895 * and saves them off to the 'fpu memory buffer.
900 */ 896 */
901 fpu__save(fpu); 897 fpu__save(fpu);
902 898
903 return get_xsave_addr(&fpu->state.xsave, xsave_state); 899 return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
904} 900}
905 901
906#ifdef CONFIG_ARCH_HAS_PKEYS 902#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1016,7 +1012,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
1016 * Copy only in-use xstates: 1012 * Copy only in-use xstates:
1017 */ 1013 */
1018 if ((header.xfeatures >> i) & 1) { 1014 if ((header.xfeatures >> i) & 1) {
1019 void *src = __raw_xsave_addr(xsave, 1 << i); 1015 void *src = __raw_xsave_addr(xsave, i);
1020 1016
1021 offset = xstate_offsets[i]; 1017 offset = xstate_offsets[i];
1022 size = xstate_sizes[i]; 1018 size = xstate_sizes[i];
@@ -1102,7 +1098,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
1102 * Copy only in-use xstates: 1098 * Copy only in-use xstates:
1103 */ 1099 */
1104 if ((header.xfeatures >> i) & 1) { 1100 if ((header.xfeatures >> i) & 1) {
1105 void *src = __raw_xsave_addr(xsave, 1 << i); 1101 void *src = __raw_xsave_addr(xsave, i);
1106 1102
1107 offset = xstate_offsets[i]; 1103 offset = xstate_offsets[i];
1108 size = xstate_sizes[i]; 1104 size = xstate_sizes[i];
@@ -1159,7 +1155,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
1159 u64 mask = ((u64)1 << i); 1155 u64 mask = ((u64)1 << i);
1160 1156
1161 if (hdr.xfeatures & mask) { 1157 if (hdr.xfeatures & mask) {
1162 void *dst = __raw_xsave_addr(xsave, 1 << i); 1158 void *dst = __raw_xsave_addr(xsave, i);
1163 1159
1164 offset = xstate_offsets[i]; 1160 offset = xstate_offsets[i];
1165 size = xstate_sizes[i]; 1161 size = xstate_sizes[i];
@@ -1213,7 +1209,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
1213 u64 mask = ((u64)1 << i); 1209 u64 mask = ((u64)1 << i);
1214 1210
1215 if (hdr.xfeatures & mask) { 1211 if (hdr.xfeatures & mask) {
1216 void *dst = __raw_xsave_addr(xsave, 1 << i); 1212 void *dst = __raw_xsave_addr(xsave, i);
1217 1213
1218 offset = xstate_offsets[i]; 1214 offset = xstate_offsets[i];
1219 size = xstate_sizes[i]; 1215 size = xstate_sizes[i];
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index d1d312d012a6..75fea0d48c0e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
101 dst->thread.vm86 = NULL; 101 dst->thread.vm86 = NULL;
102#endif 102#endif
103 103
104 return fpu__copy(&dst->thread.fpu, &src->thread.fpu); 104 return fpu__copy(dst, src);
105} 105}
106 106
107/* 107/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 70933193878c..2399e910d109 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -241,7 +241,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
241 241
242 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 242 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
243 243
244 switch_fpu_prepare(prev_fpu, cpu); 244 if (!test_thread_flag(TIF_NEED_FPU_LOAD))
245 switch_fpu_prepare(prev_fpu, cpu);
245 246
246 /* 247 /*
247 * Save away %gs. No need to save %fs, as it was saved on the 248 * Save away %gs. No need to save %fs, as it was saved on the
@@ -274,9 +275,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
274 /* 275 /*
275 * Leave lazy mode, flushing any hypercalls made here. 276 * Leave lazy mode, flushing any hypercalls made here.
276 * This must be done before restoring TLS segments so 277 * This must be done before restoring TLS segments so
277 * the GDT and LDT are properly updated, and must be 278 * the GDT and LDT are properly updated.
278 * done before fpu__restore(), so the TS bit is up
279 * to date.
280 */ 279 */
281 arch_end_context_switch(next_p); 280 arch_end_context_switch(next_p);
282 281
@@ -297,10 +296,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
297 if (prev->gs | next->gs) 296 if (prev->gs | next->gs)
298 lazy_load_gs(next->gs); 297 lazy_load_gs(next->gs);
299 298
300 switch_fpu_finish(next_fpu, cpu);
301
302 this_cpu_write(current_task, next_p); 299 this_cpu_write(current_task, next_p);
303 300
301 switch_fpu_finish(next_fpu);
302
304 /* Load the Intel cache allocation PQR MSR. */ 303 /* Load the Intel cache allocation PQR MSR. */
305 resctrl_sched_in(); 304 resctrl_sched_in();
306 305
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 844a28b29967..f8e1af380cdf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -521,7 +521,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
521 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && 521 WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
522 this_cpu_read(irq_count) != -1); 522 this_cpu_read(irq_count) != -1);
523 523
524 switch_fpu_prepare(prev_fpu, cpu); 524 if (!test_thread_flag(TIF_NEED_FPU_LOAD))
525 switch_fpu_prepare(prev_fpu, cpu);
525 526
526 /* We must save %fs and %gs before load_TLS() because 527 /* We must save %fs and %gs before load_TLS() because
527 * %fs and %gs may be cleared by load_TLS(). 528 * %fs and %gs may be cleared by load_TLS().
@@ -539,9 +540,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539 /* 540 /*
540 * Leave lazy mode, flushing any hypercalls made here. This 541 * Leave lazy mode, flushing any hypercalls made here. This
541 * must be done after loading TLS entries in the GDT but before 542 * must be done after loading TLS entries in the GDT but before
542 * loading segments that might reference them, and and it must 543 * loading segments that might reference them.
543 * be done before fpu__restore(), so the TS bit is up to
544 * date.
545 */ 544 */
546 arch_end_context_switch(next_p); 545 arch_end_context_switch(next_p);
547 546
@@ -569,14 +568,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
569 568
570 x86_fsgsbase_load(prev, next); 569 x86_fsgsbase_load(prev, next);
571 570
572 switch_fpu_finish(next_fpu, cpu);
573
574 /* 571 /*
575 * Switch the PDA and FPU contexts. 572 * Switch the PDA and FPU contexts.
576 */ 573 */
577 this_cpu_write(current_task, next_p); 574 this_cpu_write(current_task, next_p);
578 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); 575 this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
579 576
577 switch_fpu_finish(next_fpu);
578
580 /* Reload sp0. */ 579 /* Reload sp0. */
581 update_task_stack(next_p); 580 update_task_stack(next_p);
582 581
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index dff90fb6a9af..364813cea647 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -205,7 +205,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
205 put_user_ex(regs->ss, &sc->ss); 205 put_user_ex(regs->ss, &sc->ss);
206#endif /* CONFIG_X86_32 */ 206#endif /* CONFIG_X86_32 */
207 207
208 put_user_ex(fpstate, &sc->fpstate); 208 put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate);
209 209
210 /* non-iBCS2 extensions.. */ 210 /* non-iBCS2 extensions.. */
211 put_user_ex(mask, &sc->oldmask); 211 put_user_ex(mask, &sc->oldmask);
@@ -245,7 +245,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
245 unsigned long sp = regs->sp; 245 unsigned long sp = regs->sp;
246 unsigned long buf_fx = 0; 246 unsigned long buf_fx = 0;
247 int onsigstack = on_sig_stack(sp); 247 int onsigstack = on_sig_stack(sp);
248 struct fpu *fpu = &current->thread.fpu; 248 int ret;
249 249
250 /* redzone */ 250 /* redzone */
251 if (IS_ENABLED(CONFIG_X86_64)) 251 if (IS_ENABLED(CONFIG_X86_64))
@@ -264,11 +264,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
264 sp = (unsigned long) ka->sa.sa_restorer; 264 sp = (unsigned long) ka->sa.sa_restorer;
265 } 265 }
266 266
267 if (fpu->initialized) { 267 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
268 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), 268 &buf_fx, &math_size);
269 &buf_fx, &math_size); 269 *fpstate = (void __user *)sp;
270 *fpstate = (void __user *)sp;
271 }
272 270
273 sp = align_sigframe(sp - frame_size); 271 sp = align_sigframe(sp - frame_size);
274 272
@@ -280,8 +278,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
280 return (void __user *)-1L; 278 return (void __user *)-1L;
281 279
282 /* save i387 and extended state */ 280 /* save i387 and extended state */
283 if (fpu->initialized && 281 ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
284 copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) 282 if (ret < 0)
285 return (void __user *)-1L; 283 return (void __user *)-1L;
286 284
287 return (void __user *)sp; 285 return (void __user *)sp;
@@ -574,7 +572,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
574 restorer = NULL; 572 restorer = NULL;
575 err |= -EFAULT; 573 err |= -EFAULT;
576 } 574 }
577 put_user_ex(restorer, &frame->pretcode); 575 put_user_ex(restorer, (unsigned long __user *)&frame->pretcode);
578 } put_user_catch(err); 576 } put_user_catch(err);
579 577
580 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, 578 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
@@ -765,8 +763,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
765 /* 763 /*
766 * Ensure the signal handler starts with the new fpu state. 764 * Ensure the signal handler starts with the new fpu state.
767 */ 765 */
768 if (fpu->initialized) 766 fpu__clear(fpu);
769 fpu__clear(fpu);
770 } 767 }
771 signal_setup_done(failed, ksig, stepping); 768 signal_setup_done(failed, ksig, stepping);
772} 769}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d26f9e9c3d83..8b6d03e55d2f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -456,7 +456,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
456 * which is all zeros which indicates MPX was not 456 * which is all zeros which indicates MPX was not
457 * responsible for the exception. 457 * responsible for the exception.
458 */ 458 */
459 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); 459 bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
460 if (!bndcsr) 460 if (!bndcsr)
461 goto exit_trap; 461 goto exit_trap;
462 462
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0c955bb286ff..9663d41cc2bc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6500,7 +6500,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
6500 */ 6500 */
6501 if (static_cpu_has(X86_FEATURE_PKU) && 6501 if (static_cpu_has(X86_FEATURE_PKU) &&
6502 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { 6502 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
6503 vcpu->arch.pkru = __read_pkru(); 6503 vcpu->arch.pkru = rdpkru();
6504 if (vcpu->arch.pkru != vmx->host_pkru) 6504 if (vcpu->arch.pkru != vmx->host_pkru)
6505 __write_pkru(vmx->host_pkru); 6505 __write_pkru(vmx->host_pkru);
6506 } 6506 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5edc8e3ce1d..d75bb97b983c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3681,15 +3681,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3681 */ 3681 */
3682 valid = xstate_bv & ~XFEATURE_MASK_FPSSE; 3682 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3683 while (valid) { 3683 while (valid) {
3684 u64 feature = valid & -valid; 3684 u64 xfeature_mask = valid & -valid;
3685 int index = fls64(feature) - 1; 3685 int xfeature_nr = fls64(xfeature_mask) - 1;
3686 void *src = get_xsave_addr(xsave, feature); 3686 void *src = get_xsave_addr(xsave, xfeature_nr);
3687 3687
3688 if (src) { 3688 if (src) {
3689 u32 size, offset, ecx, edx; 3689 u32 size, offset, ecx, edx;
3690 cpuid_count(XSTATE_CPUID, index, 3690 cpuid_count(XSTATE_CPUID, xfeature_nr,
3691 &size, &offset, &ecx, &edx); 3691 &size, &offset, &ecx, &edx);
3692 if (feature == XFEATURE_MASK_PKRU) 3692 if (xfeature_nr == XFEATURE_PKRU)
3693 memcpy(dest + offset, &vcpu->arch.pkru, 3693 memcpy(dest + offset, &vcpu->arch.pkru,
3694 sizeof(vcpu->arch.pkru)); 3694 sizeof(vcpu->arch.pkru));
3695 else 3695 else
@@ -3697,7 +3697,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3697 3697
3698 } 3698 }
3699 3699
3700 valid -= feature; 3700 valid -= xfeature_mask;
3701 } 3701 }
3702} 3702}
3703 3703
@@ -3724,22 +3724,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3724 */ 3724 */
3725 valid = xstate_bv & ~XFEATURE_MASK_FPSSE; 3725 valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
3726 while (valid) { 3726 while (valid) {
3727 u64 feature = valid & -valid; 3727 u64 xfeature_mask = valid & -valid;
3728 int index = fls64(feature) - 1; 3728 int xfeature_nr = fls64(xfeature_mask) - 1;
3729 void *dest = get_xsave_addr(xsave, feature); 3729 void *dest = get_xsave_addr(xsave, xfeature_nr);
3730 3730
3731 if (dest) { 3731 if (dest) {
3732 u32 size, offset, ecx, edx; 3732 u32 size, offset, ecx, edx;
3733 cpuid_count(XSTATE_CPUID, index, 3733 cpuid_count(XSTATE_CPUID, xfeature_nr,
3734 &size, &offset, &ecx, &edx); 3734 &size, &offset, &ecx, &edx);
3735 if (feature == XFEATURE_MASK_PKRU) 3735 if (xfeature_nr == XFEATURE_PKRU)
3736 memcpy(&vcpu->arch.pkru, src + offset, 3736 memcpy(&vcpu->arch.pkru, src + offset,
3737 sizeof(vcpu->arch.pkru)); 3737 sizeof(vcpu->arch.pkru));
3738 else 3738 else
3739 memcpy(dest, src + offset, size); 3739 memcpy(dest, src + offset, size);
3740 } 3740 }
3741 3741
3742 valid -= feature; 3742 valid -= xfeature_mask;
3743 } 3743 }
3744} 3744}
3745 3745
@@ -7899,6 +7899,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
7899 wait_lapic_expire(vcpu); 7899 wait_lapic_expire(vcpu);
7900 guest_enter_irqoff(); 7900 guest_enter_irqoff();
7901 7901
7902 fpregs_assert_state_consistent();
7903 if (test_thread_flag(TIF_NEED_FPU_LOAD))
7904 switch_fpu_return();
7905
7902 if (unlikely(vcpu->arch.switch_db_regs)) { 7906 if (unlikely(vcpu->arch.switch_db_regs)) {
7903 set_debugreg(0, 7); 7907 set_debugreg(0, 7);
7904 set_debugreg(vcpu->arch.eff_db[0], 0); 7908 set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -8157,22 +8161,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
8157/* Swap (qemu) user FPU context for the guest FPU context. */ 8161/* Swap (qemu) user FPU context for the guest FPU context. */
8158static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 8162static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
8159{ 8163{
8160 preempt_disable(); 8164 fpregs_lock();
8165
8161 copy_fpregs_to_fpstate(&current->thread.fpu); 8166 copy_fpregs_to_fpstate(&current->thread.fpu);
8162 /* PKRU is separately restored in kvm_x86_ops->run. */ 8167 /* PKRU is separately restored in kvm_x86_ops->run. */
8163 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, 8168 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
8164 ~XFEATURE_MASK_PKRU); 8169 ~XFEATURE_MASK_PKRU);
8165 preempt_enable(); 8170
8171 fpregs_mark_activate();
8172 fpregs_unlock();
8173
8166 trace_kvm_fpu(1); 8174 trace_kvm_fpu(1);
8167} 8175}
8168 8176
8169/* When vcpu_run ends, restore user space FPU context. */ 8177/* When vcpu_run ends, restore user space FPU context. */
8170static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 8178static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
8171{ 8179{
8172 preempt_disable(); 8180 fpregs_lock();
8181
8173 copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); 8182 copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
8174 copy_kernel_to_fpregs(&current->thread.fpu.state); 8183 copy_kernel_to_fpregs(&current->thread.fpu.state);
8175 preempt_enable(); 8184
8185 fpregs_mark_activate();
8186 fpregs_unlock();
8187
8176 ++vcpu->stat.fpu_reload; 8188 ++vcpu->stat.fpu_reload;
8177 trace_kvm_fpu(0); 8189 trace_kvm_fpu(0);
8178} 8190}
@@ -8870,11 +8882,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
8870 if (init_event) 8882 if (init_event)
8871 kvm_put_guest_fpu(vcpu); 8883 kvm_put_guest_fpu(vcpu);
8872 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, 8884 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
8873 XFEATURE_MASK_BNDREGS); 8885 XFEATURE_BNDREGS);
8874 if (mpx_state_buffer) 8886 if (mpx_state_buffer)
8875 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); 8887 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
8876 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, 8888 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
8877 XFEATURE_MASK_BNDCSR); 8889 XFEATURE_BNDCSR);
8878 if (mpx_state_buffer) 8890 if (mpx_state_buffer)
8879 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); 8891 memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
8880 if (init_event) 8892 if (init_event)
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9e2ba7e667f6..a873da6b46d6 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info)
113 unsigned long code_base = 0; 113 unsigned long code_base = 0;
114 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ 114 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
115 struct desc_struct code_descriptor; 115 struct desc_struct code_descriptor;
116 struct fpu *fpu = &current->thread.fpu;
117
118 fpu__initialize(fpu);
119 116
120#ifdef RE_ENTRANT_CHECKING 117#ifdef RE_ENTRANT_CHECKING
121 if (emulating) { 118 if (emulating) {
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c805db6236b4..59726aaf4671 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
142 goto err_out; 142 goto err_out;
143 } 143 }
144 /* get bndregs field from current task's xsave area */ 144 /* get bndregs field from current task's xsave area */
145 bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); 145 bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
146 if (!bndregs) { 146 if (!bndregs) {
147 err = -EINVAL; 147 err = -EINVAL;
148 goto err_out; 148 goto err_out;
@@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void)
190 * The bounds directory pointer is stored in a register 190 * The bounds directory pointer is stored in a register
191 * only accessible if we first do an xsave. 191 * only accessible if we first do an xsave.
192 */ 192 */
193 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); 193 bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
194 if (!bndcsr) 194 if (!bndcsr)
195 return MPX_INVALID_BOUNDS_DIR; 195 return MPX_INVALID_BOUNDS_DIR;
196 196
@@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void)
376 const struct mpx_bndcsr *bndcsr; 376 const struct mpx_bndcsr *bndcsr;
377 struct mm_struct *mm = current->mm; 377 struct mm_struct *mm = current->mm;
378 378
379 bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); 379 bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
380 if (!bndcsr) 380 if (!bndcsr)
381 return -EINVAL; 381 return -EINVAL;
382 /* 382 /*
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 047a77f6a10c..1dcfc91c8f0c 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,6 +18,7 @@
18 18
19#include <asm/cpufeature.h> /* boot_cpu_has, ... */ 19#include <asm/cpufeature.h> /* boot_cpu_has, ... */
20#include <asm/mmu_context.h> /* vma_pkey() */ 20#include <asm/mmu_context.h> /* vma_pkey() */
21#include <asm/fpu/internal.h> /* init_fpstate */
21 22
22int __execute_only_pkey(struct mm_struct *mm) 23int __execute_only_pkey(struct mm_struct *mm)
23{ 24{
@@ -39,17 +40,12 @@ int __execute_only_pkey(struct mm_struct *mm)
39 * dance to set PKRU if we do not need to. Check it 40 * dance to set PKRU if we do not need to. Check it
40 * first and assume that if the execute-only pkey is 41 * first and assume that if the execute-only pkey is
41 * write-disabled that we do not have to set it 42 * write-disabled that we do not have to set it
42 * ourselves. We need preempt off so that nobody 43 * ourselves.
43 * can make fpregs inactive.
44 */ 44 */
45 preempt_disable();
46 if (!need_to_set_mm_pkey && 45 if (!need_to_set_mm_pkey &&
47 current->thread.fpu.initialized &&
48 !__pkru_allows_read(read_pkru(), execute_only_pkey)) { 46 !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
49 preempt_enable();
50 return execute_only_pkey; 47 return execute_only_pkey;
51 } 48 }
52 preempt_enable();
53 49
54 /* 50 /*
55 * Set up PKRU so that it denies access for everything 51 * Set up PKRU so that it denies access for everything
@@ -131,7 +127,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
131 * in the process's lifetime will not accidentally get access 127 * in the process's lifetime will not accidentally get access
132 * to data which is pkey-protected later on. 128 * to data which is pkey-protected later on.
133 */ 129 */
134static
135u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | 130u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
136 PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) | 131 PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
137 PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) | 132 PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
@@ -148,13 +143,6 @@ void copy_init_pkru_to_fpregs(void)
148{ 143{
149 u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value); 144 u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
150 /* 145 /*
151 * Any write to PKRU takes it out of the XSAVE 'init
152 * state' which increases context switch cost. Avoid
153 * writing 0 when PKRU was already 0.
154 */
155 if (!init_pkru_value_snapshot && !read_pkru())
156 return;
157 /*
158 * Override the PKRU state that came from 'init_fpstate' 146 * Override the PKRU state that came from 'init_fpstate'
159 * with the baseline from the process. 147 * with the baseline from the process.
160 */ 148 */
@@ -174,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
174static ssize_t init_pkru_write_file(struct file *file, 162static ssize_t init_pkru_write_file(struct file *file,
175 const char __user *user_buf, size_t count, loff_t *ppos) 163 const char __user *user_buf, size_t count, loff_t *ppos)
176{ 164{
165 struct pkru_state *pk;
177 char buf[32]; 166 char buf[32];
178 ssize_t len; 167 ssize_t len;
179 u32 new_init_pkru; 168 u32 new_init_pkru;
@@ -196,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file,
196 return -EINVAL; 185 return -EINVAL;
197 186
198 WRITE_ONCE(init_pkru_value, new_init_pkru); 187 WRITE_ONCE(init_pkru_value, new_init_pkru);
188 pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
189 if (!pk)
190 return -EINVAL;
191 pk->pkru = new_init_pkru;
199 return count; 192 return count;
200} 193}
201 194