diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 14:10:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 14:10:52 -0400 |
commit | ac07f5c3cb0cf19258c55cdf210aa4ac91ca7330 (patch) | |
tree | 6741e6ca18db5e83641064fd1bdc31a818a19a21 | |
parent | 3b29b03a462346473b7d0e6c6013fe093a4ac0d1 (diff) | |
parent | b1a74bf8212367be2b1d6685c11a84e056eaaaf1 (diff) |
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86/fpu update from Ingo Molnar:
"The biggest change is the addition of the non-lazy (eager) FPU saving
support model and enabling it on CPUs with optimized xsaveopt/xrstor
FPU state saving instructions.
There are also various Sparse fixes"
Fix up trivial add-add conflict in arch/x86/kernel/traps.c
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, kvm: fix kvm's usage of kernel_fpu_begin/end()
x86, fpu: remove cpu_has_xmm check in the fx_finit()
x86, fpu: make eagerfpu= boot param tri-state
x86, fpu: enable eagerfpu by default for xsaveopt
x86, fpu: decouple non-lazy/eager fpu restore from xsave
x86, fpu: use non-lazy fpu restore for processors supporting xsave
lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models
x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage
x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()
x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()
x86, fpu: drop_fpu() before restoring new state from sigframe
x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels
x86, fpu: Consolidate inline asm routines for saving/restoring fpu state
x86, signal: Cleanup ifdefs and is_ia32, is_x32
-rw-r--r-- | Documentation/kernel-parameters.txt | 6 | ||||
-rw-r--r-- | arch/x86/ia32/ia32_signal.c | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu-internal.h | 390 | ||||
-rw-r--r-- | arch/x86/include/asm/i387.h | 29 | ||||
-rw-r--r-- | arch/x86/include/asm/signal.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/xor_32.h | 56 | ||||
-rw-r--r-- | arch/x86/include/asm/xor_64.h | 61 | ||||
-rw-r--r-- | arch/x86/include/asm/xor_avx.h | 54 | ||||
-rw-r--r-- | arch/x86/include/asm/xsave.h | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/bugs.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 292 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 22 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 211 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/xsave.c | 517 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 3 | ||||
-rw-r--r-- | drivers/lguest/x86/core.c | 10 |
23 files changed, 791 insertions, 924 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d1cda8d892aa..df551dfa8e52 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1833,6 +1833,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1833 | and restore using xsave. The kernel will fallback to | 1833 | and restore using xsave. The kernel will fallback to |
1834 | enabling legacy floating-point and sse state. | 1834 | enabling legacy floating-point and sse state. |
1835 | 1835 | ||
1836 | eagerfpu= [X86] | ||
1837 | on enable eager fpu restore | ||
1838 | off disable eager fpu restore | ||
1839 | auto selects the default scheme, which automatically | ||
1840 | enables eagerfpu restore for xsaveopt. | ||
1841 | |||
1836 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or | 1842 | nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or |
1837 | wfi(ARM) instruction doesn't work correctly and not to | 1843 | wfi(ARM) instruction doesn't work correctly and not to |
1838 | use it. This is also useful when using JTAG debugger. | 1844 | use it. This is also useful when using JTAG debugger. |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 452d4dd0a95a..8c77c64fbd27 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -251,7 +251,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
251 | 251 | ||
252 | get_user_ex(tmp, &sc->fpstate); | 252 | get_user_ex(tmp, &sc->fpstate); |
253 | buf = compat_ptr(tmp); | 253 | buf = compat_ptr(tmp); |
254 | err |= restore_i387_xstate_ia32(buf); | 254 | err |= restore_xstate_sig(buf, 1); |
255 | 255 | ||
256 | get_user_ex(*pax, &sc->ax); | 256 | get_user_ex(*pax, &sc->ax); |
257 | } get_user_catch(err); | 257 | } get_user_catch(err); |
@@ -382,9 +382,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
382 | sp = (unsigned long) ka->sa.sa_restorer; | 382 | sp = (unsigned long) ka->sa.sa_restorer; |
383 | 383 | ||
384 | if (used_math()) { | 384 | if (used_math()) { |
385 | sp = sp - sig_xstate_ia32_size; | 385 | unsigned long fx_aligned, math_size; |
386 | |||
387 | sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); | ||
386 | *fpstate = (struct _fpstate_ia32 __user *) sp; | 388 | *fpstate = (struct _fpstate_ia32 __user *) sp; |
387 | if (save_i387_xstate_ia32(*fpstate) < 0) | 389 | if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, |
390 | math_size) < 0) | ||
388 | return (void __user *) -1L; | 391 | return (void __user *) -1L; |
389 | } | 392 | } |
390 | 393 | ||
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 633b6176cf60..16cae425d1f8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -97,6 +97,7 @@ | |||
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ |
99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ | 99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ |
100 | #define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ | ||
100 | 101 | ||
101 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 102 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
102 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 103 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
@@ -300,12 +301,14 @@ extern const char * const x86_power_flags[32]; | |||
300 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) | 301 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) |
301 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) | 302 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) |
302 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 303 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
304 | #define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) | ||
303 | #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) | 305 | #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) |
304 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 306 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
305 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) | 307 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) |
306 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) | 308 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) |
307 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) | 309 | #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) |
308 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) | 310 | #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) |
311 | #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) | ||
309 | 312 | ||
310 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 313 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
311 | # define cpu_has_invlpg 1 | 314 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d6a331..92f3c6ed817f 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/kernel_stat.h> | 13 | #include <linux/kernel_stat.h> |
14 | #include <linux/regset.h> | 14 | #include <linux/regset.h> |
15 | #include <linux/compat.h> | ||
15 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
16 | #include <asm/asm.h> | 17 | #include <asm/asm.h> |
17 | #include <asm/cpufeature.h> | 18 | #include <asm/cpufeature.h> |
@@ -21,42 +22,74 @@ | |||
21 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
22 | #include <asm/xsave.h> | 23 | #include <asm/xsave.h> |
23 | 24 | ||
24 | extern unsigned int sig_xstate_size; | 25 | #ifdef CONFIG_X86_64 |
26 | # include <asm/sigcontext32.h> | ||
27 | # include <asm/user32.h> | ||
28 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
29 | compat_sigset_t *set, struct pt_regs *regs); | ||
30 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
31 | compat_sigset_t *set, struct pt_regs *regs); | ||
32 | #else | ||
33 | # define user_i387_ia32_struct user_i387_struct | ||
34 | # define user32_fxsr_struct user_fxsr_struct | ||
35 | # define ia32_setup_frame __setup_frame | ||
36 | # define ia32_setup_rt_frame __setup_rt_frame | ||
37 | #endif | ||
38 | |||
39 | extern unsigned int mxcsr_feature_mask; | ||
25 | extern void fpu_init(void); | 40 | extern void fpu_init(void); |
41 | extern void eager_fpu_init(void); | ||
26 | 42 | ||
27 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | 43 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); |
28 | 44 | ||
45 | extern void convert_from_fxsr(struct user_i387_ia32_struct *env, | ||
46 | struct task_struct *tsk); | ||
47 | extern void convert_to_fxsr(struct task_struct *tsk, | ||
48 | const struct user_i387_ia32_struct *env); | ||
49 | |||
29 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 50 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
30 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 51 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
31 | xstateregs_get; | 52 | xstateregs_get; |
32 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | 53 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, |
33 | xstateregs_set; | 54 | xstateregs_set; |
34 | 55 | ||
35 | |||
36 | /* | 56 | /* |
37 | * xstateregs_active == fpregs_active. Please refer to the comment | 57 | * xstateregs_active == fpregs_active. Please refer to the comment |
38 | * at the definition of fpregs_active. | 58 | * at the definition of fpregs_active. |
39 | */ | 59 | */ |
40 | #define xstateregs_active fpregs_active | 60 | #define xstateregs_active fpregs_active |
41 | 61 | ||
42 | extern struct _fpx_sw_bytes fx_sw_reserved; | ||
43 | #ifdef CONFIG_IA32_EMULATION | ||
44 | extern unsigned int sig_xstate_ia32_size; | ||
45 | extern struct _fpx_sw_bytes fx_sw_reserved_ia32; | ||
46 | struct _fpstate_ia32; | ||
47 | struct _xstate_ia32; | ||
48 | extern int save_i387_xstate_ia32(void __user *buf); | ||
49 | extern int restore_i387_xstate_ia32(void __user *buf); | ||
50 | #endif | ||
51 | |||
52 | #ifdef CONFIG_MATH_EMULATION | 62 | #ifdef CONFIG_MATH_EMULATION |
63 | # define HAVE_HWFP (boot_cpu_data.hard_math) | ||
53 | extern void finit_soft_fpu(struct i387_soft_struct *soft); | 64 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
54 | #else | 65 | #else |
66 | # define HAVE_HWFP 1 | ||
55 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | 67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
56 | #endif | 68 | #endif |
57 | 69 | ||
70 | static inline int is_ia32_compat_frame(void) | ||
71 | { | ||
72 | return config_enabled(CONFIG_IA32_EMULATION) && | ||
73 | test_thread_flag(TIF_IA32); | ||
74 | } | ||
75 | |||
76 | static inline int is_ia32_frame(void) | ||
77 | { | ||
78 | return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); | ||
79 | } | ||
80 | |||
81 | static inline int is_x32_frame(void) | ||
82 | { | ||
83 | return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); | ||
84 | } | ||
85 | |||
58 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 86 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
59 | 87 | ||
88 | static __always_inline __pure bool use_eager_fpu(void) | ||
89 | { | ||
90 | return static_cpu_has(X86_FEATURE_EAGER_FPU); | ||
91 | } | ||
92 | |||
60 | static __always_inline __pure bool use_xsaveopt(void) | 93 | static __always_inline __pure bool use_xsaveopt(void) |
61 | { | 94 | { |
62 | return static_cpu_has(X86_FEATURE_XSAVEOPT); | 95 | return static_cpu_has(X86_FEATURE_XSAVEOPT); |
@@ -72,6 +105,13 @@ static __always_inline __pure bool use_fxsr(void) | |||
72 | return static_cpu_has(X86_FEATURE_FXSR); | 105 | return static_cpu_has(X86_FEATURE_FXSR); |
73 | } | 106 | } |
74 | 107 | ||
108 | static inline void fx_finit(struct i387_fxsave_struct *fx) | ||
109 | { | ||
110 | memset(fx, 0, xstate_size); | ||
111 | fx->cwd = 0x37f; | ||
112 | fx->mxcsr = MXCSR_DEFAULT; | ||
113 | } | ||
114 | |||
75 | extern void __sanitize_i387_state(struct task_struct *); | 115 | extern void __sanitize_i387_state(struct task_struct *); |
76 | 116 | ||
77 | static inline void sanitize_i387_state(struct task_struct *tsk) | 117 | static inline void sanitize_i387_state(struct task_struct *tsk) |
@@ -81,131 +121,88 @@ static inline void sanitize_i387_state(struct task_struct *tsk) | |||
81 | __sanitize_i387_state(tsk); | 121 | __sanitize_i387_state(tsk); |
82 | } | 122 | } |
83 | 123 | ||
84 | #ifdef CONFIG_X86_64 | 124 | #define check_insn(insn, output, input...) \ |
85 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | 125 | ({ \ |
126 | int err; \ | ||
127 | asm volatile("1:" #insn "\n\t" \ | ||
128 | "2:\n" \ | ||
129 | ".section .fixup,\"ax\"\n" \ | ||
130 | "3: movl $-1,%[err]\n" \ | ||
131 | " jmp 2b\n" \ | ||
132 | ".previous\n" \ | ||
133 | _ASM_EXTABLE(1b, 3b) \ | ||
134 | : [err] "=r" (err), output \ | ||
135 | : "0"(0), input); \ | ||
136 | err; \ | ||
137 | }) | ||
138 | |||
139 | static inline int fsave_user(struct i387_fsave_struct __user *fx) | ||
86 | { | 140 | { |
87 | int err; | 141 | return check_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); |
88 | |||
89 | /* See comment in fxsave() below. */ | ||
90 | #ifdef CONFIG_AS_FXSAVEQ | ||
91 | asm volatile("1: fxrstorq %[fx]\n\t" | ||
92 | "2:\n" | ||
93 | ".section .fixup,\"ax\"\n" | ||
94 | "3: movl $-1,%[err]\n" | ||
95 | " jmp 2b\n" | ||
96 | ".previous\n" | ||
97 | _ASM_EXTABLE(1b, 3b) | ||
98 | : [err] "=r" (err) | ||
99 | : [fx] "m" (*fx), "0" (0)); | ||
100 | #else | ||
101 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | ||
102 | "2:\n" | ||
103 | ".section .fixup,\"ax\"\n" | ||
104 | "3: movl $-1,%[err]\n" | ||
105 | " jmp 2b\n" | ||
106 | ".previous\n" | ||
107 | _ASM_EXTABLE(1b, 3b) | ||
108 | : [err] "=r" (err) | ||
109 | : [fx] "R" (fx), "m" (*fx), "0" (0)); | ||
110 | #endif | ||
111 | return err; | ||
112 | } | 142 | } |
113 | 143 | ||
114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 144 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
115 | { | 145 | { |
116 | int err; | 146 | if (config_enabled(CONFIG_X86_32)) |
147 | return check_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
148 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
149 | return check_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
117 | 150 | ||
118 | /* | 151 | /* See comment in fpu_fxsave() below. */ |
119 | * Clear the bytes not touched by the fxsave and reserved | 152 | return check_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); |
120 | * for the SW usage. | ||
121 | */ | ||
122 | err = __clear_user(&fx->sw_reserved, | ||
123 | sizeof(struct _fpx_sw_bytes)); | ||
124 | if (unlikely(err)) | ||
125 | return -EFAULT; | ||
126 | |||
127 | /* See comment in fxsave() below. */ | ||
128 | #ifdef CONFIG_AS_FXSAVEQ | ||
129 | asm volatile("1: fxsaveq %[fx]\n\t" | ||
130 | "2:\n" | ||
131 | ".section .fixup,\"ax\"\n" | ||
132 | "3: movl $-1,%[err]\n" | ||
133 | " jmp 2b\n" | ||
134 | ".previous\n" | ||
135 | _ASM_EXTABLE(1b, 3b) | ||
136 | : [err] "=r" (err), [fx] "=m" (*fx) | ||
137 | : "0" (0)); | ||
138 | #else | ||
139 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | ||
140 | "2:\n" | ||
141 | ".section .fixup,\"ax\"\n" | ||
142 | "3: movl $-1,%[err]\n" | ||
143 | " jmp 2b\n" | ||
144 | ".previous\n" | ||
145 | _ASM_EXTABLE(1b, 3b) | ||
146 | : [err] "=r" (err), "=m" (*fx) | ||
147 | : [fx] "R" (fx), "0" (0)); | ||
148 | #endif | ||
149 | if (unlikely(err) && | ||
150 | __clear_user(fx, sizeof(struct i387_fxsave_struct))) | ||
151 | err = -EFAULT; | ||
152 | /* No need to clear here because the caller clears USED_MATH */ | ||
153 | return err; | ||
154 | } | 153 | } |
155 | 154 | ||
156 | static inline void fpu_fxsave(struct fpu *fpu) | 155 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
157 | { | 156 | { |
158 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 157 | if (config_enabled(CONFIG_X86_32)) |
159 | uses any extended registers for addressing, a second REX prefix | 158 | return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
160 | will be generated (to the assembler, rex64 followed by semicolon | 159 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) |
161 | is a separate instruction), and hence the 64-bitness is lost. */ | 160 | return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); |
162 | 161 | ||
163 | #ifdef CONFIG_AS_FXSAVEQ | 162 | /* See comment in fpu_fxsave() below. */ |
164 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 163 | return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), |
165 | starting with gas 2.16. */ | 164 | "m" (*fx)); |
166 | __asm__ __volatile__("fxsaveq %0" | ||
167 | : "=m" (fpu->state->fxsave)); | ||
168 | #else | ||
169 | /* Using, as a workaround, the properly prefixed form below isn't | ||
170 | accepted by any binutils version so far released, complaining that | ||
171 | the same type of prefix is used twice if an extended register is | ||
172 | needed for addressing (fix submitted to mainline 2005-11-21). | ||
173 | asm volatile("rex64/fxsave %0" | ||
174 | : "=m" (fpu->state->fxsave)); | ||
175 | This, however, we can work around by forcing the compiler to select | ||
176 | an addressing mode that doesn't require extended registers. */ | ||
177 | asm volatile("rex64/fxsave (%[fx])" | ||
178 | : "=m" (fpu->state->fxsave) | ||
179 | : [fx] "R" (&fpu->state->fxsave)); | ||
180 | #endif | ||
181 | } | 165 | } |
182 | 166 | ||
183 | #else /* CONFIG_X86_32 */ | 167 | static inline int frstor_checking(struct i387_fsave_struct *fx) |
184 | |||
185 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ | ||
186 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | ||
187 | { | 168 | { |
188 | /* | 169 | return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); |
189 | * The "nop" is needed to make the instructions the same | ||
190 | * length. | ||
191 | */ | ||
192 | alternative_input( | ||
193 | "nop ; frstor %1", | ||
194 | "fxrstor %1", | ||
195 | X86_FEATURE_FXSR, | ||
196 | "m" (*fx)); | ||
197 | |||
198 | return 0; | ||
199 | } | 170 | } |
200 | 171 | ||
201 | static inline void fpu_fxsave(struct fpu *fpu) | 172 | static inline void fpu_fxsave(struct fpu *fpu) |
202 | { | 173 | { |
203 | asm volatile("fxsave %[fx]" | 174 | if (config_enabled(CONFIG_X86_32)) |
204 | : [fx] "=m" (fpu->state->fxsave)); | 175 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); |
176 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
177 | asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); | ||
178 | else { | ||
179 | /* Using "rex64; fxsave %0" is broken because, if the memory | ||
180 | * operand uses any extended registers for addressing, a second | ||
181 | * REX prefix will be generated (to the assembler, rex64 | ||
182 | * followed by semicolon is a separate instruction), and hence | ||
183 | * the 64-bitness is lost. | ||
184 | * | ||
185 | * Using "fxsaveq %0" would be the ideal choice, but is only | ||
186 | * supported starting with gas 2.16. | ||
187 | * | ||
188 | * Using, as a workaround, the properly prefixed form below | ||
189 | * isn't accepted by any binutils version so far released, | ||
190 | * complaining that the same type of prefix is used twice if | ||
191 | * an extended register is needed for addressing (fix submitted | ||
192 | * to mainline 2005-11-21). | ||
193 | * | ||
194 | * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); | ||
195 | * | ||
196 | * This, however, we can work around by forcing the compiler to | ||
197 | * select an addressing mode that doesn't require extended | ||
198 | * registers. | ||
199 | */ | ||
200 | asm volatile( "rex64/fxsave (%[fx])" | ||
201 | : "=m" (fpu->state->fxsave) | ||
202 | : [fx] "R" (&fpu->state->fxsave)); | ||
203 | } | ||
205 | } | 204 | } |
206 | 205 | ||
207 | #endif /* CONFIG_X86_64 */ | ||
208 | |||
209 | /* | 206 | /* |
210 | * These must be called with preempt disabled. Returns | 207 | * These must be called with preempt disabled. Returns |
211 | * 'true' if the FPU state is still intact. | 208 | * 'true' if the FPU state is still intact. |
@@ -248,17 +245,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) | |||
248 | return fpu_save_init(&tsk->thread.fpu); | 245 | return fpu_save_init(&tsk->thread.fpu); |
249 | } | 246 | } |
250 | 247 | ||
251 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | ||
252 | { | ||
253 | return fxrstor_checking(&fpu->state->fxsave); | ||
254 | } | ||
255 | |||
256 | static inline int fpu_restore_checking(struct fpu *fpu) | 248 | static inline int fpu_restore_checking(struct fpu *fpu) |
257 | { | 249 | { |
258 | if (use_xsave()) | 250 | if (use_xsave()) |
259 | return fpu_xrstor_checking(fpu); | 251 | return fpu_xrstor_checking(&fpu->state->xsave); |
252 | else if (use_fxsr()) | ||
253 | return fxrstor_checking(&fpu->state->fxsave); | ||
260 | else | 254 | else |
261 | return fpu_fxrstor_checking(fpu); | 255 | return frstor_checking(&fpu->state->fsave); |
262 | } | 256 | } |
263 | 257 | ||
264 | static inline int restore_fpu_checking(struct task_struct *tsk) | 258 | static inline int restore_fpu_checking(struct task_struct *tsk) |
@@ -310,15 +304,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) | |||
310 | static inline void __thread_fpu_end(struct task_struct *tsk) | 304 | static inline void __thread_fpu_end(struct task_struct *tsk) |
311 | { | 305 | { |
312 | __thread_clear_has_fpu(tsk); | 306 | __thread_clear_has_fpu(tsk); |
313 | stts(); | 307 | if (!use_eager_fpu()) |
308 | stts(); | ||
314 | } | 309 | } |
315 | 310 | ||
316 | static inline void __thread_fpu_begin(struct task_struct *tsk) | 311 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
317 | { | 312 | { |
318 | clts(); | 313 | if (!use_eager_fpu()) |
314 | clts(); | ||
319 | __thread_set_has_fpu(tsk); | 315 | __thread_set_has_fpu(tsk); |
320 | } | 316 | } |
321 | 317 | ||
318 | static inline void __drop_fpu(struct task_struct *tsk) | ||
319 | { | ||
320 | if (__thread_has_fpu(tsk)) { | ||
321 | /* Ignore delayed exceptions from user space */ | ||
322 | asm volatile("1: fwait\n" | ||
323 | "2:\n" | ||
324 | _ASM_EXTABLE(1b, 2b)); | ||
325 | __thread_fpu_end(tsk); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | static inline void drop_fpu(struct task_struct *tsk) | ||
330 | { | ||
331 | /* | ||
332 | * Forget coprocessor state.. | ||
333 | */ | ||
334 | preempt_disable(); | ||
335 | tsk->fpu_counter = 0; | ||
336 | __drop_fpu(tsk); | ||
337 | clear_used_math(); | ||
338 | preempt_enable(); | ||
339 | } | ||
340 | |||
341 | static inline void drop_init_fpu(struct task_struct *tsk) | ||
342 | { | ||
343 | if (!use_eager_fpu()) | ||
344 | drop_fpu(tsk); | ||
345 | else { | ||
346 | if (use_xsave()) | ||
347 | xrstor_state(init_xstate_buf, -1); | ||
348 | else | ||
349 | fxrstor_checking(&init_xstate_buf->i387); | ||
350 | } | ||
351 | } | ||
352 | |||
322 | /* | 353 | /* |
323 | * FPU state switching for scheduling. | 354 | * FPU state switching for scheduling. |
324 | * | 355 | * |
@@ -352,7 +383,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
352 | { | 383 | { |
353 | fpu_switch_t fpu; | 384 | fpu_switch_t fpu; |
354 | 385 | ||
355 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | 386 | /* |
387 | * If the task has used the math, pre-load the FPU on xsave processors | ||
388 | * or if the past 5 consecutive context-switches used math. | ||
389 | */ | ||
390 | fpu.preload = tsk_used_math(new) && (use_eager_fpu() || | ||
391 | new->fpu_counter > 5); | ||
356 | if (__thread_has_fpu(old)) { | 392 | if (__thread_has_fpu(old)) { |
357 | if (!__save_init_fpu(old)) | 393 | if (!__save_init_fpu(old)) |
358 | cpu = ~0; | 394 | cpu = ~0; |
@@ -364,14 +400,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta | |||
364 | new->fpu_counter++; | 400 | new->fpu_counter++; |
365 | __thread_set_has_fpu(new); | 401 | __thread_set_has_fpu(new); |
366 | prefetch(new->thread.fpu.state); | 402 | prefetch(new->thread.fpu.state); |
367 | } else | 403 | } else if (!use_eager_fpu()) |
368 | stts(); | 404 | stts(); |
369 | } else { | 405 | } else { |
370 | old->fpu_counter = 0; | 406 | old->fpu_counter = 0; |
371 | old->thread.fpu.last_cpu = ~0; | 407 | old->thread.fpu.last_cpu = ~0; |
372 | if (fpu.preload) { | 408 | if (fpu.preload) { |
373 | new->fpu_counter++; | 409 | new->fpu_counter++; |
374 | if (fpu_lazy_restore(new, cpu)) | 410 | if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) |
375 | fpu.preload = 0; | 411 | fpu.preload = 0; |
376 | else | 412 | else |
377 | prefetch(new->thread.fpu.state); | 413 | prefetch(new->thread.fpu.state); |
@@ -391,44 +427,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | |||
391 | { | 427 | { |
392 | if (fpu.preload) { | 428 | if (fpu.preload) { |
393 | if (unlikely(restore_fpu_checking(new))) | 429 | if (unlikely(restore_fpu_checking(new))) |
394 | __thread_fpu_end(new); | 430 | drop_init_fpu(new); |
395 | } | 431 | } |
396 | } | 432 | } |
397 | 433 | ||
398 | /* | 434 | /* |
399 | * Signal frame handlers... | 435 | * Signal frame handlers... |
400 | */ | 436 | */ |
401 | extern int save_i387_xstate(void __user *buf); | 437 | extern int save_xstate_sig(void __user *buf, void __user *fx, int size); |
402 | extern int restore_i387_xstate(void __user *buf); | 438 | extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); |
403 | 439 | ||
404 | static inline void __clear_fpu(struct task_struct *tsk) | 440 | static inline int xstate_sigframe_size(void) |
405 | { | 441 | { |
406 | if (__thread_has_fpu(tsk)) { | 442 | return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; |
407 | /* Ignore delayed exceptions from user space */ | 443 | } |
408 | asm volatile("1: fwait\n" | 444 | |
409 | "2:\n" | 445 | static inline int restore_xstate_sig(void __user *buf, int ia32_frame) |
410 | _ASM_EXTABLE(1b, 2b)); | 446 | { |
411 | __thread_fpu_end(tsk); | 447 | void __user *buf_fx = buf; |
448 | int size = xstate_sigframe_size(); | ||
449 | |||
450 | if (ia32_frame && use_fxsr()) { | ||
451 | buf_fx = buf + sizeof(struct i387_fsave_struct); | ||
452 | size += sizeof(struct i387_fsave_struct); | ||
412 | } | 453 | } |
454 | |||
455 | return __restore_xstate_sig(buf, buf_fx, size); | ||
413 | } | 456 | } |
414 | 457 | ||
415 | /* | 458 | /* |
416 | * The actual user_fpu_begin/end() functions | 459 | * Need to be preemption-safe. |
417 | * need to be preemption-safe. | ||
418 | * | 460 | * |
419 | * NOTE! user_fpu_end() must be used only after you | 461 | * NOTE! user_fpu_begin() must be used only immediately before restoring |
420 | * have saved the FP state, and user_fpu_begin() must | 462 | * it. This function does not do any save/restore on their own. |
421 | * be used only immediately before restoring it. | ||
422 | * These functions do not do any save/restore on | ||
423 | * their own. | ||
424 | */ | 463 | */ |
425 | static inline void user_fpu_end(void) | ||
426 | { | ||
427 | preempt_disable(); | ||
428 | __thread_fpu_end(current); | ||
429 | preempt_enable(); | ||
430 | } | ||
431 | |||
432 | static inline void user_fpu_begin(void) | 464 | static inline void user_fpu_begin(void) |
433 | { | 465 | { |
434 | preempt_disable(); | 466 | preempt_disable(); |
@@ -437,25 +469,32 @@ static inline void user_fpu_begin(void) | |||
437 | preempt_enable(); | 469 | preempt_enable(); |
438 | } | 470 | } |
439 | 471 | ||
472 | static inline void __save_fpu(struct task_struct *tsk) | ||
473 | { | ||
474 | if (use_xsave()) | ||
475 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | ||
476 | else | ||
477 | fpu_fxsave(&tsk->thread.fpu); | ||
478 | } | ||
479 | |||
440 | /* | 480 | /* |
441 | * These disable preemption on their own and are safe | 481 | * These disable preemption on their own and are safe |
442 | */ | 482 | */ |
443 | static inline void save_init_fpu(struct task_struct *tsk) | 483 | static inline void save_init_fpu(struct task_struct *tsk) |
444 | { | 484 | { |
445 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | 485 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
486 | |||
487 | if (use_eager_fpu()) { | ||
488 | __save_fpu(tsk); | ||
489 | return; | ||
490 | } | ||
491 | |||
446 | preempt_disable(); | 492 | preempt_disable(); |
447 | __save_init_fpu(tsk); | 493 | __save_init_fpu(tsk); |
448 | __thread_fpu_end(tsk); | 494 | __thread_fpu_end(tsk); |
449 | preempt_enable(); | 495 | preempt_enable(); |
450 | } | 496 | } |
451 | 497 | ||
452 | static inline void clear_fpu(struct task_struct *tsk) | ||
453 | { | ||
454 | preempt_disable(); | ||
455 | __clear_fpu(tsk); | ||
456 | preempt_enable(); | ||
457 | } | ||
458 | |||
459 | /* | 498 | /* |
460 | * i387 state interaction | 499 | * i387 state interaction |
461 | */ | 500 | */ |
@@ -510,11 +549,34 @@ static inline void fpu_free(struct fpu *fpu) | |||
510 | } | 549 | } |
511 | } | 550 | } |
512 | 551 | ||
513 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | 552 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) |
514 | { | 553 | { |
515 | memcpy(dst->state, src->state, xstate_size); | 554 | if (use_eager_fpu()) { |
555 | memset(&dst->thread.fpu.state->xsave, 0, xstate_size); | ||
556 | __save_fpu(dst); | ||
557 | } else { | ||
558 | struct fpu *dfpu = &dst->thread.fpu; | ||
559 | struct fpu *sfpu = &src->thread.fpu; | ||
560 | |||
561 | unlazy_fpu(src); | ||
562 | memcpy(dfpu->state, sfpu->state, xstate_size); | ||
563 | } | ||
516 | } | 564 | } |
517 | 565 | ||
518 | extern void fpu_finit(struct fpu *fpu); | 566 | static inline unsigned long |
567 | alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, | ||
568 | unsigned long *size) | ||
569 | { | ||
570 | unsigned long frame_size = xstate_sigframe_size(); | ||
571 | |||
572 | *buf_fx = sp = round_down(sp - frame_size, 64); | ||
573 | if (ia32_frame && use_fxsr()) { | ||
574 | frame_size += sizeof(struct i387_fsave_struct); | ||
575 | sp -= sizeof(struct i387_fsave_struct); | ||
576 | } | ||
577 | |||
578 | *size = frame_size; | ||
579 | return sp; | ||
580 | } | ||
519 | 581 | ||
520 | #endif | 582 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cca214f..ed8089d69094 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -19,12 +19,37 @@ struct pt_regs; | |||
19 | struct user_i387_struct; | 19 | struct user_i387_struct; |
20 | 20 | ||
21 | extern int init_fpu(struct task_struct *child); | 21 | extern int init_fpu(struct task_struct *child); |
22 | extern void fpu_finit(struct fpu *fpu); | ||
22 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 23 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
23 | extern void math_state_restore(void); | 24 | extern void math_state_restore(void); |
24 | 25 | ||
25 | extern bool irq_fpu_usable(void); | 26 | extern bool irq_fpu_usable(void); |
26 | extern void kernel_fpu_begin(void); | 27 | |
27 | extern void kernel_fpu_end(void); | 28 | /* |
29 | * Careful: __kernel_fpu_begin/end() must be called with preempt disabled | ||
30 | * and they don't touch the preempt state on their own. | ||
31 | * If you enable preemption after __kernel_fpu_begin(), preempt notifier | ||
32 | * should call the __kernel_fpu_end() to prevent the kernel/user FPU | ||
33 | * state from getting corrupted. KVM for example uses this model. | ||
34 | * | ||
35 | * All other cases use kernel_fpu_begin/end() which disable preemption | ||
36 | * during kernel FPU usage. | ||
37 | */ | ||
38 | extern void __kernel_fpu_begin(void); | ||
39 | extern void __kernel_fpu_end(void); | ||
40 | |||
41 | static inline void kernel_fpu_begin(void) | ||
42 | { | ||
43 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
44 | preempt_disable(); | ||
45 | __kernel_fpu_begin(); | ||
46 | } | ||
47 | |||
48 | static inline void kernel_fpu_end(void) | ||
49 | { | ||
50 | __kernel_fpu_end(); | ||
51 | preempt_enable(); | ||
52 | } | ||
28 | 53 | ||
29 | /* | 54 | /* |
30 | * Some instructions like VIA's padlock instructions generate a spurious | 55 | * Some instructions like VIA's padlock instructions generate a spurious |
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457cbd0f8..323973f4abf1 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h | |||
@@ -31,6 +31,10 @@ typedef struct { | |||
31 | unsigned long sig[_NSIG_WORDS]; | 31 | unsigned long sig[_NSIG_WORDS]; |
32 | } sigset_t; | 32 | } sigset_t; |
33 | 33 | ||
34 | #ifndef CONFIG_COMPAT | ||
35 | typedef sigset_t compat_sigset_t; | ||
36 | #endif | ||
37 | |||
34 | #else | 38 | #else |
35 | /* Here we must cater to libcs that poke about in kernel headers. */ | 39 | /* Here we must cater to libcs that poke about in kernel headers. */ |
36 | 40 | ||
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 454570891bdc..aabd5850bdb9 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h | |||
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { | |||
534 | * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | 534 | * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) |
535 | */ | 535 | */ |
536 | 536 | ||
537 | #define XMMS_SAVE \ | ||
538 | do { \ | ||
539 | preempt_disable(); \ | ||
540 | cr0 = read_cr0(); \ | ||
541 | clts(); \ | ||
542 | asm volatile( \ | ||
543 | "movups %%xmm0,(%0) ;\n\t" \ | ||
544 | "movups %%xmm1,0x10(%0) ;\n\t" \ | ||
545 | "movups %%xmm2,0x20(%0) ;\n\t" \ | ||
546 | "movups %%xmm3,0x30(%0) ;\n\t" \ | ||
547 | : \ | ||
548 | : "r" (xmm_save) \ | ||
549 | : "memory"); \ | ||
550 | } while (0) | ||
551 | |||
552 | #define XMMS_RESTORE \ | ||
553 | do { \ | ||
554 | asm volatile( \ | ||
555 | "sfence ;\n\t" \ | ||
556 | "movups (%0),%%xmm0 ;\n\t" \ | ||
557 | "movups 0x10(%0),%%xmm1 ;\n\t" \ | ||
558 | "movups 0x20(%0),%%xmm2 ;\n\t" \ | ||
559 | "movups 0x30(%0),%%xmm3 ;\n\t" \ | ||
560 | : \ | ||
561 | : "r" (xmm_save) \ | ||
562 | : "memory"); \ | ||
563 | write_cr0(cr0); \ | ||
564 | preempt_enable(); \ | ||
565 | } while (0) | ||
566 | |||
567 | #define ALIGN16 __attribute__((aligned(16))) | ||
568 | |||
569 | #define OFFS(x) "16*("#x")" | 537 | #define OFFS(x) "16*("#x")" |
570 | #define PF_OFFS(x) "256+16*("#x")" | 538 | #define PF_OFFS(x) "256+16*("#x")" |
571 | #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" | 539 | #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" |
@@ -587,10 +555,8 @@ static void | |||
587 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 555 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
588 | { | 556 | { |
589 | unsigned long lines = bytes >> 8; | 557 | unsigned long lines = bytes >> 8; |
590 | char xmm_save[16*4] ALIGN16; | ||
591 | int cr0; | ||
592 | 558 | ||
593 | XMMS_SAVE; | 559 | kernel_fpu_begin(); |
594 | 560 | ||
595 | asm volatile( | 561 | asm volatile( |
596 | #undef BLOCK | 562 | #undef BLOCK |
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |||
633 | : | 599 | : |
634 | : "memory"); | 600 | : "memory"); |
635 | 601 | ||
636 | XMMS_RESTORE; | 602 | kernel_fpu_end(); |
637 | } | 603 | } |
638 | 604 | ||
639 | static void | 605 | static void |
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
641 | unsigned long *p3) | 607 | unsigned long *p3) |
642 | { | 608 | { |
643 | unsigned long lines = bytes >> 8; | 609 | unsigned long lines = bytes >> 8; |
644 | char xmm_save[16*4] ALIGN16; | ||
645 | int cr0; | ||
646 | 610 | ||
647 | XMMS_SAVE; | 611 | kernel_fpu_begin(); |
648 | 612 | ||
649 | asm volatile( | 613 | asm volatile( |
650 | #undef BLOCK | 614 | #undef BLOCK |
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
694 | : | 658 | : |
695 | : "memory" ); | 659 | : "memory" ); |
696 | 660 | ||
697 | XMMS_RESTORE; | 661 | kernel_fpu_end(); |
698 | } | 662 | } |
699 | 663 | ||
700 | static void | 664 | static void |
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
702 | unsigned long *p3, unsigned long *p4) | 666 | unsigned long *p3, unsigned long *p4) |
703 | { | 667 | { |
704 | unsigned long lines = bytes >> 8; | 668 | unsigned long lines = bytes >> 8; |
705 | char xmm_save[16*4] ALIGN16; | ||
706 | int cr0; | ||
707 | 669 | ||
708 | XMMS_SAVE; | 670 | kernel_fpu_begin(); |
709 | 671 | ||
710 | asm volatile( | 672 | asm volatile( |
711 | #undef BLOCK | 673 | #undef BLOCK |
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
762 | : | 724 | : |
763 | : "memory" ); | 725 | : "memory" ); |
764 | 726 | ||
765 | XMMS_RESTORE; | 727 | kernel_fpu_end(); |
766 | } | 728 | } |
767 | 729 | ||
768 | static void | 730 | static void |
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
770 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 732 | unsigned long *p3, unsigned long *p4, unsigned long *p5) |
771 | { | 733 | { |
772 | unsigned long lines = bytes >> 8; | 734 | unsigned long lines = bytes >> 8; |
773 | char xmm_save[16*4] ALIGN16; | ||
774 | int cr0; | ||
775 | 735 | ||
776 | XMMS_SAVE; | 736 | kernel_fpu_begin(); |
777 | 737 | ||
778 | /* Make sure GCC forgets anything it knows about p4 or p5, | 738 | /* Make sure GCC forgets anything it knows about p4 or p5, |
779 | such that it won't pass to the asm volatile below a | 739 | such that it won't pass to the asm volatile below a |
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
850 | like assuming they have some legal value. */ | 810 | like assuming they have some legal value. */ |
851 | asm("" : "=r" (p4), "=r" (p5)); | 811 | asm("" : "=r" (p4), "=r" (p5)); |
852 | 812 | ||
853 | XMMS_RESTORE; | 813 | kernel_fpu_end(); |
854 | } | 814 | } |
855 | 815 | ||
856 | static struct xor_block_template xor_block_pIII_sse = { | 816 | static struct xor_block_template xor_block_pIII_sse = { |
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323e90fe..5fc06d0b7eb5 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h | |||
@@ -34,41 +34,7 @@ | |||
34 | * no advantages to be gotten from x86-64 here anyways. | 34 | * no advantages to be gotten from x86-64 here anyways. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | typedef struct { | 37 | #include <asm/i387.h> |
38 | unsigned long a, b; | ||
39 | } __attribute__((aligned(16))) xmm_store_t; | ||
40 | |||
41 | /* Doesn't use gcc to save the XMM registers, because there is no easy way to | ||
42 | tell it to do a clts before the register saving. */ | ||
43 | #define XMMS_SAVE \ | ||
44 | do { \ | ||
45 | preempt_disable(); \ | ||
46 | asm volatile( \ | ||
47 | "movq %%cr0,%0 ;\n\t" \ | ||
48 | "clts ;\n\t" \ | ||
49 | "movups %%xmm0,(%1) ;\n\t" \ | ||
50 | "movups %%xmm1,0x10(%1) ;\n\t" \ | ||
51 | "movups %%xmm2,0x20(%1) ;\n\t" \ | ||
52 | "movups %%xmm3,0x30(%1) ;\n\t" \ | ||
53 | : "=&r" (cr0) \ | ||
54 | : "r" (xmm_save) \ | ||
55 | : "memory"); \ | ||
56 | } while (0) | ||
57 | |||
58 | #define XMMS_RESTORE \ | ||
59 | do { \ | ||
60 | asm volatile( \ | ||
61 | "sfence ;\n\t" \ | ||
62 | "movups (%1),%%xmm0 ;\n\t" \ | ||
63 | "movups 0x10(%1),%%xmm1 ;\n\t" \ | ||
64 | "movups 0x20(%1),%%xmm2 ;\n\t" \ | ||
65 | "movups 0x30(%1),%%xmm3 ;\n\t" \ | ||
66 | "movq %0,%%cr0 ;\n\t" \ | ||
67 | : \ | ||
68 | : "r" (cr0), "r" (xmm_save) \ | ||
69 | : "memory"); \ | ||
70 | preempt_enable(); \ | ||
71 | } while (0) | ||
72 | 38 | ||
73 | #define OFFS(x) "16*("#x")" | 39 | #define OFFS(x) "16*("#x")" |
74 | #define PF_OFFS(x) "256+16*("#x")" | 40 | #define PF_OFFS(x) "256+16*("#x")" |
@@ -91,10 +57,8 @@ static void | |||
91 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 57 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
92 | { | 58 | { |
93 | unsigned int lines = bytes >> 8; | 59 | unsigned int lines = bytes >> 8; |
94 | unsigned long cr0; | ||
95 | xmm_store_t xmm_save[4]; | ||
96 | 60 | ||
97 | XMMS_SAVE; | 61 | kernel_fpu_begin(); |
98 | 62 | ||
99 | asm volatile( | 63 | asm volatile( |
100 | #undef BLOCK | 64 | #undef BLOCK |
@@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |||
135 | : [inc] "r" (256UL) | 99 | : [inc] "r" (256UL) |
136 | : "memory"); | 100 | : "memory"); |
137 | 101 | ||
138 | XMMS_RESTORE; | 102 | kernel_fpu_end(); |
139 | } | 103 | } |
140 | 104 | ||
141 | static void | 105 | static void |
@@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
143 | unsigned long *p3) | 107 | unsigned long *p3) |
144 | { | 108 | { |
145 | unsigned int lines = bytes >> 8; | 109 | unsigned int lines = bytes >> 8; |
146 | xmm_store_t xmm_save[4]; | ||
147 | unsigned long cr0; | ||
148 | |||
149 | XMMS_SAVE; | ||
150 | 110 | ||
111 | kernel_fpu_begin(); | ||
151 | asm volatile( | 112 | asm volatile( |
152 | #undef BLOCK | 113 | #undef BLOCK |
153 | #define BLOCK(i) \ | 114 | #define BLOCK(i) \ |
@@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
194 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) | 155 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) |
195 | : [inc] "r" (256UL) | 156 | : [inc] "r" (256UL) |
196 | : "memory"); | 157 | : "memory"); |
197 | XMMS_RESTORE; | 158 | kernel_fpu_end(); |
198 | } | 159 | } |
199 | 160 | ||
200 | static void | 161 | static void |
@@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
202 | unsigned long *p3, unsigned long *p4) | 163 | unsigned long *p3, unsigned long *p4) |
203 | { | 164 | { |
204 | unsigned int lines = bytes >> 8; | 165 | unsigned int lines = bytes >> 8; |
205 | xmm_store_t xmm_save[4]; | ||
206 | unsigned long cr0; | ||
207 | 166 | ||
208 | XMMS_SAVE; | 167 | kernel_fpu_begin(); |
209 | 168 | ||
210 | asm volatile( | 169 | asm volatile( |
211 | #undef BLOCK | 170 | #undef BLOCK |
@@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
261 | : [inc] "r" (256UL) | 220 | : [inc] "r" (256UL) |
262 | : "memory" ); | 221 | : "memory" ); |
263 | 222 | ||
264 | XMMS_RESTORE; | 223 | kernel_fpu_end(); |
265 | } | 224 | } |
266 | 225 | ||
267 | static void | 226 | static void |
@@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
269 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | 228 | unsigned long *p3, unsigned long *p4, unsigned long *p5) |
270 | { | 229 | { |
271 | unsigned int lines = bytes >> 8; | 230 | unsigned int lines = bytes >> 8; |
272 | xmm_store_t xmm_save[4]; | ||
273 | unsigned long cr0; | ||
274 | 231 | ||
275 | XMMS_SAVE; | 232 | kernel_fpu_begin(); |
276 | 233 | ||
277 | asm volatile( | 234 | asm volatile( |
278 | #undef BLOCK | 235 | #undef BLOCK |
@@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |||
336 | : [inc] "r" (256UL) | 293 | : [inc] "r" (256UL) |
337 | : "memory"); | 294 | : "memory"); |
338 | 295 | ||
339 | XMMS_RESTORE; | 296 | kernel_fpu_end(); |
340 | } | 297 | } |
341 | 298 | ||
342 | static struct xor_block_template xor_block_sse = { | 299 | static struct xor_block_template xor_block_sse = { |
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 2510d35f480e..7ea79c5fa1f2 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h | |||
@@ -20,32 +20,6 @@ | |||
20 | #include <linux/compiler.h> | 20 | #include <linux/compiler.h> |
21 | #include <asm/i387.h> | 21 | #include <asm/i387.h> |
22 | 22 | ||
23 | #define ALIGN32 __aligned(32) | ||
24 | |||
25 | #define YMM_SAVED_REGS 4 | ||
26 | |||
27 | #define YMMS_SAVE \ | ||
28 | do { \ | ||
29 | preempt_disable(); \ | ||
30 | cr0 = read_cr0(); \ | ||
31 | clts(); \ | ||
32 | asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \ | ||
33 | asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \ | ||
34 | asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \ | ||
35 | asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \ | ||
36 | } while (0); | ||
37 | |||
38 | #define YMMS_RESTORE \ | ||
39 | do { \ | ||
40 | asm volatile("sfence" : : : "memory"); \ | ||
41 | asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \ | ||
42 | asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \ | ||
43 | asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \ | ||
44 | asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \ | ||
45 | write_cr0(cr0); \ | ||
46 | preempt_enable(); \ | ||
47 | } while (0); | ||
48 | |||
49 | #define BLOCK4(i) \ | 23 | #define BLOCK4(i) \ |
50 | BLOCK(32 * i, 0) \ | 24 | BLOCK(32 * i, 0) \ |
51 | BLOCK(32 * (i + 1), 1) \ | 25 | BLOCK(32 * (i + 1), 1) \ |
@@ -60,10 +34,9 @@ do { \ | |||
60 | 34 | ||
61 | static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) | 35 | static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) |
62 | { | 36 | { |
63 | unsigned long cr0, lines = bytes >> 9; | 37 | unsigned long lines = bytes >> 9; |
64 | char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; | ||
65 | 38 | ||
66 | YMMS_SAVE | 39 | kernel_fpu_begin(); |
67 | 40 | ||
68 | while (lines--) { | 41 | while (lines--) { |
69 | #undef BLOCK | 42 | #undef BLOCK |
@@ -82,16 +55,15 @@ do { \ | |||
82 | p1 = (unsigned long *)((uintptr_t)p1 + 512); | 55 | p1 = (unsigned long *)((uintptr_t)p1 + 512); |
83 | } | 56 | } |
84 | 57 | ||
85 | YMMS_RESTORE | 58 | kernel_fpu_end(); |
86 | } | 59 | } |
87 | 60 | ||
88 | static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, | 61 | static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, |
89 | unsigned long *p2) | 62 | unsigned long *p2) |
90 | { | 63 | { |
91 | unsigned long cr0, lines = bytes >> 9; | 64 | unsigned long lines = bytes >> 9; |
92 | char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; | ||
93 | 65 | ||
94 | YMMS_SAVE | 66 | kernel_fpu_begin(); |
95 | 67 | ||
96 | while (lines--) { | 68 | while (lines--) { |
97 | #undef BLOCK | 69 | #undef BLOCK |
@@ -113,16 +85,15 @@ do { \ | |||
113 | p2 = (unsigned long *)((uintptr_t)p2 + 512); | 85 | p2 = (unsigned long *)((uintptr_t)p2 + 512); |
114 | } | 86 | } |
115 | 87 | ||
116 | YMMS_RESTORE | 88 | kernel_fpu_end(); |
117 | } | 89 | } |
118 | 90 | ||
119 | static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, | 91 | static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, |
120 | unsigned long *p2, unsigned long *p3) | 92 | unsigned long *p2, unsigned long *p3) |
121 | { | 93 | { |
122 | unsigned long cr0, lines = bytes >> 9; | 94 | unsigned long lines = bytes >> 9; |
123 | char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; | ||
124 | 95 | ||
125 | YMMS_SAVE | 96 | kernel_fpu_begin(); |
126 | 97 | ||
127 | while (lines--) { | 98 | while (lines--) { |
128 | #undef BLOCK | 99 | #undef BLOCK |
@@ -147,16 +118,15 @@ do { \ | |||
147 | p3 = (unsigned long *)((uintptr_t)p3 + 512); | 118 | p3 = (unsigned long *)((uintptr_t)p3 + 512); |
148 | } | 119 | } |
149 | 120 | ||
150 | YMMS_RESTORE | 121 | kernel_fpu_end(); |
151 | } | 122 | } |
152 | 123 | ||
153 | static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, | 124 | static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, |
154 | unsigned long *p2, unsigned long *p3, unsigned long *p4) | 125 | unsigned long *p2, unsigned long *p3, unsigned long *p4) |
155 | { | 126 | { |
156 | unsigned long cr0, lines = bytes >> 9; | 127 | unsigned long lines = bytes >> 9; |
157 | char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; | ||
158 | 128 | ||
159 | YMMS_SAVE | 129 | kernel_fpu_begin(); |
160 | 130 | ||
161 | while (lines--) { | 131 | while (lines--) { |
162 | #undef BLOCK | 132 | #undef BLOCK |
@@ -184,7 +154,7 @@ do { \ | |||
184 | p4 = (unsigned long *)((uintptr_t)p4 + 512); | 154 | p4 = (unsigned long *)((uintptr_t)p4 + 512); |
185 | } | 155 | } |
186 | 156 | ||
187 | YMMS_RESTORE | 157 | kernel_fpu_end(); |
188 | } | 158 | } |
189 | 159 | ||
190 | static struct xor_block_template xor_block_avx = { | 160 | static struct xor_block_template xor_block_avx = { |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9b594a..2ddee1b87793 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -34,17 +34,14 @@ | |||
34 | extern unsigned int xstate_size; | 34 | extern unsigned int xstate_size; |
35 | extern u64 pcntxt_mask; | 35 | extern u64 pcntxt_mask; |
36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | 36 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; |
37 | extern struct xsave_struct *init_xstate_buf; | ||
37 | 38 | ||
38 | extern void xsave_init(void); | 39 | extern void xsave_init(void); |
39 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 40 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); |
40 | extern int init_fpu(struct task_struct *child); | 41 | extern int init_fpu(struct task_struct *child); |
41 | extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | ||
42 | void __user *fpstate, | ||
43 | struct _fpx_sw_bytes *sw); | ||
44 | 42 | ||
45 | static inline int fpu_xrstor_checking(struct fpu *fpu) | 43 | static inline int fpu_xrstor_checking(struct xsave_struct *fx) |
46 | { | 44 | { |
47 | struct xsave_struct *fx = &fpu->state->xsave; | ||
48 | int err; | 45 | int err; |
49 | 46 | ||
50 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 47 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
@@ -69,8 +66,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) | |||
69 | * Clear the xsave header first, so that reserved fields are | 66 | * Clear the xsave header first, so that reserved fields are |
70 | * initialized to zero. | 67 | * initialized to zero. |
71 | */ | 68 | */ |
72 | err = __clear_user(&buf->xsave_hdr, | 69 | err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); |
73 | sizeof(struct xsave_hdr_struct)); | ||
74 | if (unlikely(err)) | 70 | if (unlikely(err)) |
75 | return -EFAULT; | 71 | return -EFAULT; |
76 | 72 | ||
@@ -84,9 +80,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) | |||
84 | : [err] "=r" (err) | 80 | : [err] "=r" (err) |
85 | : "D" (buf), "a" (-1), "d" (-1), "0" (0) | 81 | : "D" (buf), "a" (-1), "d" (-1), "0" (0) |
86 | : "memory"); | 82 | : "memory"); |
87 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
88 | err = -EFAULT; | ||
89 | /* No need to clear here because the caller clears USED_MATH */ | ||
90 | return err; | 83 | return err; |
91 | } | 84 | } |
92 | 85 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b5a9f8..d0e910da16c5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -165,10 +165,15 @@ void __init check_bugs(void) | |||
165 | print_cpu_info(&boot_cpu_data); | 165 | print_cpu_info(&boot_cpu_data); |
166 | #endif | 166 | #endif |
167 | check_config(); | 167 | check_config(); |
168 | check_fpu(); | ||
169 | check_hlt(); | 168 | check_hlt(); |
170 | check_popad(); | 169 | check_popad(); |
171 | init_utsname()->machine[1] = | 170 | init_utsname()->machine[1] = |
172 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 171 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
173 | alternative_instructions(); | 172 | alternative_instructions(); |
173 | |||
174 | /* | ||
175 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
176 | * alternative instructions. | ||
177 | */ | ||
178 | check_fpu(); | ||
174 | } | 179 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 134505e07b09..761cb3547041 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1297,7 +1297,6 @@ void __cpuinit cpu_init(void) | |||
1297 | dbg_restore_debug_regs(); | 1297 | dbg_restore_debug_regs(); |
1298 | 1298 | ||
1299 | fpu_init(); | 1299 | fpu_init(); |
1300 | xsave_init(); | ||
1301 | 1300 | ||
1302 | if (is_uv_system()) | 1301 | if (is_uv_system()) |
1303 | uv_cpu_init(); | 1302 | uv_cpu_init(); |
@@ -1350,6 +1349,5 @@ void __cpuinit cpu_init(void) | |||
1350 | dbg_restore_debug_regs(); | 1349 | dbg_restore_debug_regs(); |
1351 | 1350 | ||
1352 | fpu_init(); | 1351 | fpu_init(); |
1353 | xsave_init(); | ||
1354 | } | 1352 | } |
1355 | #endif | 1353 | #endif |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431fb505..675a05012449 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -19,24 +19,17 @@ | |||
19 | #include <asm/fpu-internal.h> | 19 | #include <asm/fpu-internal.h> |
20 | #include <asm/user.h> | 20 | #include <asm/user.h> |
21 | 21 | ||
22 | #ifdef CONFIG_X86_64 | ||
23 | # include <asm/sigcontext32.h> | ||
24 | # include <asm/user32.h> | ||
25 | #else | ||
26 | # define save_i387_xstate_ia32 save_i387_xstate | ||
27 | # define restore_i387_xstate_ia32 restore_i387_xstate | ||
28 | # define _fpstate_ia32 _fpstate | ||
29 | # define _xstate_ia32 _xstate | ||
30 | # define sig_xstate_ia32_size sig_xstate_size | ||
31 | # define fx_sw_reserved_ia32 fx_sw_reserved | ||
32 | # define user_i387_ia32_struct user_i387_struct | ||
33 | # define user32_fxsr_struct user_fxsr_struct | ||
34 | #endif | ||
35 | |||
36 | /* | 22 | /* |
37 | * Were we in an interrupt that interrupted kernel mode? | 23 | * Were we in an interrupt that interrupted kernel mode? |
38 | * | 24 | * |
39 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | 25 | * For now, with eagerfpu we will return interrupted kernel FPU |
26 | * state as not-idle. TBD: Ideally we can change the return value | ||
27 | * to something like __thread_has_fpu(current). But we need to | ||
28 | * be careful of doing __thread_clear_has_fpu() before saving | ||
29 | * the FPU etc for supporting nested uses etc. For now, take | ||
30 | * the simple route! | ||
31 | * | ||
32 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
40 | * pair does nothing at all: the thread must not have fpu (so | 33 | * pair does nothing at all: the thread must not have fpu (so |
41 | * that we don't try to save the FPU state), and TS must | 34 | * that we don't try to save the FPU state), and TS must |
42 | * be set (so that the clts/stts pair does nothing that is | 35 | * be set (so that the clts/stts pair does nothing that is |
@@ -44,6 +37,9 @@ | |||
44 | */ | 37 | */ |
45 | static inline bool interrupted_kernel_fpu_idle(void) | 38 | static inline bool interrupted_kernel_fpu_idle(void) |
46 | { | 39 | { |
40 | if (use_eager_fpu()) | ||
41 | return 0; | ||
42 | |||
47 | return !__thread_has_fpu(current) && | 43 | return !__thread_has_fpu(current) && |
48 | (read_cr0() & X86_CR0_TS); | 44 | (read_cr0() & X86_CR0_TS); |
49 | } | 45 | } |
@@ -77,29 +73,29 @@ bool irq_fpu_usable(void) | |||
77 | } | 73 | } |
78 | EXPORT_SYMBOL(irq_fpu_usable); | 74 | EXPORT_SYMBOL(irq_fpu_usable); |
79 | 75 | ||
80 | void kernel_fpu_begin(void) | 76 | void __kernel_fpu_begin(void) |
81 | { | 77 | { |
82 | struct task_struct *me = current; | 78 | struct task_struct *me = current; |
83 | 79 | ||
84 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
85 | preempt_disable(); | ||
86 | if (__thread_has_fpu(me)) { | 80 | if (__thread_has_fpu(me)) { |
87 | __save_init_fpu(me); | 81 | __save_init_fpu(me); |
88 | __thread_clear_has_fpu(me); | 82 | __thread_clear_has_fpu(me); |
89 | /* We do 'stts()' in kernel_fpu_end() */ | 83 | /* We do 'stts()' in __kernel_fpu_end() */ |
90 | } else { | 84 | } else if (!use_eager_fpu()) { |
91 | this_cpu_write(fpu_owner_task, NULL); | 85 | this_cpu_write(fpu_owner_task, NULL); |
92 | clts(); | 86 | clts(); |
93 | } | 87 | } |
94 | } | 88 | } |
95 | EXPORT_SYMBOL(kernel_fpu_begin); | 89 | EXPORT_SYMBOL(__kernel_fpu_begin); |
96 | 90 | ||
97 | void kernel_fpu_end(void) | 91 | void __kernel_fpu_end(void) |
98 | { | 92 | { |
99 | stts(); | 93 | if (use_eager_fpu()) |
100 | preempt_enable(); | 94 | math_state_restore(); |
95 | else | ||
96 | stts(); | ||
101 | } | 97 | } |
102 | EXPORT_SYMBOL(kernel_fpu_end); | 98 | EXPORT_SYMBOL(__kernel_fpu_end); |
103 | 99 | ||
104 | void unlazy_fpu(struct task_struct *tsk) | 100 | void unlazy_fpu(struct task_struct *tsk) |
105 | { | 101 | { |
@@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) | |||
113 | } | 109 | } |
114 | EXPORT_SYMBOL(unlazy_fpu); | 110 | EXPORT_SYMBOL(unlazy_fpu); |
115 | 111 | ||
116 | #ifdef CONFIG_MATH_EMULATION | 112 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
117 | # define HAVE_HWFP (boot_cpu_data.hard_math) | ||
118 | #else | ||
119 | # define HAVE_HWFP 1 | ||
120 | #endif | ||
121 | |||
122 | static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
123 | unsigned int xstate_size; | 113 | unsigned int xstate_size; |
124 | EXPORT_SYMBOL_GPL(xstate_size); | 114 | EXPORT_SYMBOL_GPL(xstate_size); |
125 | unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); | ||
126 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; | 115 | static struct i387_fxsave_struct fx_scratch __cpuinitdata; |
127 | 116 | ||
128 | static void __cpuinit mxcsr_feature_mask_init(void) | 117 | static void __cpuinit mxcsr_feature_mask_init(void) |
129 | { | 118 | { |
130 | unsigned long mask = 0; | 119 | unsigned long mask = 0; |
131 | 120 | ||
132 | clts(); | ||
133 | if (cpu_has_fxsr) { | 121 | if (cpu_has_fxsr) { |
134 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | 122 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); |
135 | asm volatile("fxsave %0" : : "m" (fx_scratch)); | 123 | asm volatile("fxsave %0" : : "m" (fx_scratch)); |
@@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) | |||
138 | mask = 0x0000ffbf; | 126 | mask = 0x0000ffbf; |
139 | } | 127 | } |
140 | mxcsr_feature_mask &= mask; | 128 | mxcsr_feature_mask &= mask; |
141 | stts(); | ||
142 | } | 129 | } |
143 | 130 | ||
144 | static void __cpuinit init_thread_xstate(void) | 131 | static void __cpuinit init_thread_xstate(void) |
@@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) | |||
192 | init_thread_xstate(); | 179 | init_thread_xstate(); |
193 | 180 | ||
194 | mxcsr_feature_mask_init(); | 181 | mxcsr_feature_mask_init(); |
195 | /* clean state in init */ | 182 | xsave_init(); |
196 | current_thread_info()->status = 0; | 183 | eager_fpu_init(); |
197 | clear_used_math(); | ||
198 | } | 184 | } |
199 | 185 | ||
200 | void fpu_finit(struct fpu *fpu) | 186 | void fpu_finit(struct fpu *fpu) |
@@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) | |||
205 | } | 191 | } |
206 | 192 | ||
207 | if (cpu_has_fxsr) { | 193 | if (cpu_has_fxsr) { |
208 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; | 194 | fx_finit(&fpu->state->fxsave); |
209 | |||
210 | memset(fx, 0, xstate_size); | ||
211 | fx->cwd = 0x37f; | ||
212 | if (cpu_has_xmm) | ||
213 | fx->mxcsr = MXCSR_DEFAULT; | ||
214 | } else { | 195 | } else { |
215 | struct i387_fsave_struct *fp = &fpu->state->fsave; | 196 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
216 | memset(fp, 0, xstate_size); | 197 | memset(fp, 0, xstate_size); |
@@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
454 | * FXSR floating point environment conversions. | 435 | * FXSR floating point environment conversions. |
455 | */ | 436 | */ |
456 | 437 | ||
457 | static void | 438 | void |
458 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 439 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
459 | { | 440 | { |
460 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 441 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | |||
491 | memcpy(&to[i], &from[i], sizeof(to[0])); | 472 | memcpy(&to[i], &from[i], sizeof(to[0])); |
492 | } | 473 | } |
493 | 474 | ||
494 | static void convert_to_fxsr(struct task_struct *tsk, | 475 | void convert_to_fxsr(struct task_struct *tsk, |
495 | const struct user_i387_ia32_struct *env) | 476 | const struct user_i387_ia32_struct *env) |
496 | 477 | ||
497 | { | 478 | { |
498 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | 479 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
@@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
589 | } | 570 | } |
590 | 571 | ||
591 | /* | 572 | /* |
592 | * Signal frame handlers. | ||
593 | */ | ||
594 | |||
595 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
596 | { | ||
597 | struct task_struct *tsk = current; | ||
598 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; | ||
599 | |||
600 | fp->status = fp->swd; | ||
601 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | ||
602 | return -1; | ||
603 | return 1; | ||
604 | } | ||
605 | |||
606 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | ||
607 | { | ||
608 | struct task_struct *tsk = current; | ||
609 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | ||
610 | struct user_i387_ia32_struct env; | ||
611 | int err = 0; | ||
612 | |||
613 | convert_from_fxsr(&env, tsk); | ||
614 | if (__copy_to_user(buf, &env, sizeof(env))) | ||
615 | return -1; | ||
616 | |||
617 | err |= __put_user(fx->swd, &buf->status); | ||
618 | err |= __put_user(X86_FXSR_MAGIC, &buf->magic); | ||
619 | if (err) | ||
620 | return -1; | ||
621 | |||
622 | if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) | ||
623 | return -1; | ||
624 | return 1; | ||
625 | } | ||
626 | |||
627 | static int save_i387_xsave(void __user *buf) | ||
628 | { | ||
629 | struct task_struct *tsk = current; | ||
630 | struct _fpstate_ia32 __user *fx = buf; | ||
631 | int err = 0; | ||
632 | |||
633 | |||
634 | sanitize_i387_state(tsk); | ||
635 | |||
636 | /* | ||
637 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
638 | * vector while saving the state to the user context. | ||
639 | * This will enable us capturing any changes(during sigreturn) to | ||
640 | * the FP/SSE bits by the legacy applications which don't touch | ||
641 | * xstate_bv in the xsave header. | ||
642 | * | ||
643 | * xsave aware applications can change the xstate_bv in the xsave | ||
644 | * header as well as change any contents in the memory layout. | ||
645 | * xrestore as part of sigreturn will capture all the changes. | ||
646 | */ | ||
647 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
648 | |||
649 | if (save_i387_fxsave(fx) < 0) | ||
650 | return -1; | ||
651 | |||
652 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, | ||
653 | sizeof(struct _fpx_sw_bytes)); | ||
654 | err |= __put_user(FP_XSTATE_MAGIC2, | ||
655 | (__u32 __user *) (buf + sig_xstate_ia32_size | ||
656 | - FP_XSTATE_MAGIC2_SIZE)); | ||
657 | if (err) | ||
658 | return -1; | ||
659 | |||
660 | return 1; | ||
661 | } | ||
662 | |||
663 | int save_i387_xstate_ia32(void __user *buf) | ||
664 | { | ||
665 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
666 | struct task_struct *tsk = current; | ||
667 | |||
668 | if (!used_math()) | ||
669 | return 0; | ||
670 | |||
671 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) | ||
672 | return -EACCES; | ||
673 | /* | ||
674 | * This will cause a "finit" to be triggered by the next | ||
675 | * attempted FPU operation by the 'current' process. | ||
676 | */ | ||
677 | clear_used_math(); | ||
678 | |||
679 | if (!HAVE_HWFP) { | ||
680 | return fpregs_soft_get(current, NULL, | ||
681 | 0, sizeof(struct user_i387_ia32_struct), | ||
682 | NULL, fp) ? -1 : 1; | ||
683 | } | ||
684 | |||
685 | unlazy_fpu(tsk); | ||
686 | |||
687 | if (cpu_has_xsave) | ||
688 | return save_i387_xsave(fp); | ||
689 | if (cpu_has_fxsr) | ||
690 | return save_i387_fxsave(fp); | ||
691 | else | ||
692 | return save_i387_fsave(fp); | ||
693 | } | ||
694 | |||
695 | static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | ||
696 | { | ||
697 | struct task_struct *tsk = current; | ||
698 | |||
699 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, | ||
700 | sizeof(struct i387_fsave_struct)); | ||
701 | } | ||
702 | |||
703 | static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | ||
704 | unsigned int size) | ||
705 | { | ||
706 | struct task_struct *tsk = current; | ||
707 | struct user_i387_ia32_struct env; | ||
708 | int err; | ||
709 | |||
710 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], | ||
711 | size); | ||
712 | /* mxcsr reserved bits must be masked to zero for security reasons */ | ||
713 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | ||
714 | if (err || __copy_from_user(&env, buf, sizeof(env))) | ||
715 | return 1; | ||
716 | convert_to_fxsr(tsk, &env); | ||
717 | |||
718 | return 0; | ||
719 | } | ||
720 | |||
721 | static int restore_i387_xsave(void __user *buf) | ||
722 | { | ||
723 | struct _fpx_sw_bytes fx_sw_user; | ||
724 | struct _fpstate_ia32 __user *fx_user = | ||
725 | ((struct _fpstate_ia32 __user *) buf); | ||
726 | struct i387_fxsave_struct __user *fx = | ||
727 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | ||
728 | struct xsave_hdr_struct *xsave_hdr = | ||
729 | ¤t->thread.fpu.state->xsave.xsave_hdr; | ||
730 | u64 mask; | ||
731 | int err; | ||
732 | |||
733 | if (check_for_xstate(fx, buf, &fx_sw_user)) | ||
734 | goto fx_only; | ||
735 | |||
736 | mask = fx_sw_user.xstate_bv; | ||
737 | |||
738 | err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); | ||
739 | |||
740 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
741 | /* | ||
742 | * These bits must be zero. | ||
743 | */ | ||
744 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
745 | |||
746 | /* | ||
747 | * Init the state that is not present in the memory layout | ||
748 | * and enabled by the OS. | ||
749 | */ | ||
750 | mask = ~(pcntxt_mask & ~mask); | ||
751 | xsave_hdr->xstate_bv &= mask; | ||
752 | |||
753 | return err; | ||
754 | fx_only: | ||
755 | /* | ||
756 | * Couldn't find the extended state information in the memory | ||
757 | * layout. Restore the FP/SSE and init the other extended state | ||
758 | * enabled by the OS. | ||
759 | */ | ||
760 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
761 | return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); | ||
762 | } | ||
763 | |||
764 | int restore_i387_xstate_ia32(void __user *buf) | ||
765 | { | ||
766 | int err; | ||
767 | struct task_struct *tsk = current; | ||
768 | struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; | ||
769 | |||
770 | if (HAVE_HWFP) | ||
771 | clear_fpu(tsk); | ||
772 | |||
773 | if (!buf) { | ||
774 | if (used_math()) { | ||
775 | clear_fpu(tsk); | ||
776 | clear_used_math(); | ||
777 | } | ||
778 | |||
779 | return 0; | ||
780 | } else | ||
781 | if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) | ||
782 | return -EACCES; | ||
783 | |||
784 | if (!used_math()) { | ||
785 | err = init_fpu(tsk); | ||
786 | if (err) | ||
787 | return err; | ||
788 | } | ||
789 | |||
790 | if (HAVE_HWFP) { | ||
791 | if (cpu_has_xsave) | ||
792 | err = restore_i387_xsave(buf); | ||
793 | else if (cpu_has_fxsr) | ||
794 | err = restore_i387_fxsave(fp, sizeof(struct | ||
795 | i387_fxsave_struct)); | ||
796 | else | ||
797 | err = restore_i387_fsave(fp); | ||
798 | } else { | ||
799 | err = fpregs_soft_set(current, NULL, | ||
800 | 0, sizeof(struct user_i387_ia32_struct), | ||
801 | NULL, fp) != 0; | ||
802 | } | ||
803 | set_used_math(); | ||
804 | |||
805 | return err; | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * FPU state for core dumps. | 573 | * FPU state for core dumps. |
810 | * This is only used for a.out dumps now. | 574 | * This is only used for a.out dumps now. |
811 | * It is declared generically using elf_fpregset_t (which is | 575 | * It is declared generically using elf_fpregset_t (which is |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a8456f719..dc3567e083f9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
66 | { | 66 | { |
67 | int ret; | 67 | int ret; |
68 | 68 | ||
69 | unlazy_fpu(src); | ||
70 | |||
71 | *dst = *src; | 69 | *dst = *src; |
72 | if (fpu_allocated(&src->thread.fpu)) { | 70 | if (fpu_allocated(&src->thread.fpu)) { |
73 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); | 71 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
74 | ret = fpu_alloc(&dst->thread.fpu); | 72 | ret = fpu_alloc(&dst->thread.fpu); |
75 | if (ret) | 73 | if (ret) |
76 | return ret; | 74 | return ret; |
77 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); | 75 | fpu_copy(dst, src); |
78 | } | 76 | } |
79 | return 0; | 77 | return 0; |
80 | } | 78 | } |
@@ -97,16 +95,6 @@ void arch_task_cache_init(void) | |||
97 | SLAB_PANIC | SLAB_NOTRACK, NULL); | 95 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
98 | } | 96 | } |
99 | 97 | ||
100 | static inline void drop_fpu(struct task_struct *tsk) | ||
101 | { | ||
102 | /* | ||
103 | * Forget coprocessor state.. | ||
104 | */ | ||
105 | tsk->fpu_counter = 0; | ||
106 | clear_fpu(tsk); | ||
107 | clear_used_math(); | ||
108 | } | ||
109 | |||
110 | /* | 98 | /* |
111 | * Free current thread data structures etc.. | 99 | * Free current thread data structures etc.. |
112 | */ | 100 | */ |
@@ -163,7 +151,13 @@ void flush_thread(void) | |||
163 | 151 | ||
164 | flush_ptrace_hw_breakpoint(tsk); | 152 | flush_ptrace_hw_breakpoint(tsk); |
165 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
166 | drop_fpu(tsk); | 154 | drop_init_fpu(tsk); |
155 | /* | ||
156 | * Free the FPU state for non xsave platforms. They get reallocated | ||
157 | * lazily at the first use. | ||
158 | */ | ||
159 | if (!use_eager_fpu()) | ||
160 | free_thread_xstate(tsk); | ||
167 | } | 161 | } |
168 | 162 | ||
169 | static void hard_disable_TSC(void) | 163 | static void hard_disable_TSC(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa186121b..b9ff83c7135b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -190,10 +190,6 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |||
190 | regs->cs = __USER_CS; | 190 | regs->cs = __USER_CS; |
191 | regs->ip = new_ip; | 191 | regs->ip = new_ip; |
192 | regs->sp = new_sp; | 192 | regs->sp = new_sp; |
193 | /* | ||
194 | * Free the old FP and other extended state | ||
195 | */ | ||
196 | free_thread_xstate(current); | ||
197 | } | 193 | } |
198 | EXPORT_SYMBOL_GPL(start_thread); | 194 | EXPORT_SYMBOL_GPL(start_thread); |
199 | 195 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9d7cb8..8a6d20ce1978 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -232,10 +232,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, | |||
232 | regs->cs = _cs; | 232 | regs->cs = _cs; |
233 | regs->ss = _ss; | 233 | regs->ss = _ss; |
234 | regs->flags = X86_EFLAGS_IF; | 234 | regs->flags = X86_EFLAGS_IF; |
235 | /* | ||
236 | * Free the old FP and other extended state | ||
237 | */ | ||
238 | free_thread_xstate(current); | ||
239 | } | 235 | } |
240 | 236 | ||
241 | void | 237 | void |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9f94f8ec26e4..b00b33a18390 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1333,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { | |||
1333 | #define genregs32_get genregs_get | 1333 | #define genregs32_get genregs_get |
1334 | #define genregs32_set genregs_set | 1334 | #define genregs32_set genregs_set |
1335 | 1335 | ||
1336 | #define user_i387_ia32_struct user_i387_struct | ||
1337 | #define user32_fxsr_struct user_fxsr_struct | ||
1338 | |||
1339 | #endif /* CONFIG_X86_64 */ | 1336 | #endif /* CONFIG_X86_64 */ |
1340 | 1337 | ||
1341 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1338 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index bca0ab903e57..3160c26db5e7 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -114,7 +114,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
114 | regs->orig_ax = -1; /* disable syscall checks */ | 114 | regs->orig_ax = -1; /* disable syscall checks */ |
115 | 115 | ||
116 | get_user_ex(buf, &sc->fpstate); | 116 | get_user_ex(buf, &sc->fpstate); |
117 | err |= restore_i387_xstate(buf); | 117 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); |
118 | 118 | ||
119 | get_user_ex(*pax, &sc->ax); | 119 | get_user_ex(*pax, &sc->ax); |
120 | } get_user_catch(err); | 120 | } get_user_catch(err); |
@@ -206,35 +206,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
206 | void __user **fpstate) | 206 | void __user **fpstate) |
207 | { | 207 | { |
208 | /* Default to using normal stack */ | 208 | /* Default to using normal stack */ |
209 | unsigned long math_size = 0; | ||
209 | unsigned long sp = regs->sp; | 210 | unsigned long sp = regs->sp; |
211 | unsigned long buf_fx = 0; | ||
210 | int onsigstack = on_sig_stack(sp); | 212 | int onsigstack = on_sig_stack(sp); |
211 | 213 | ||
212 | #ifdef CONFIG_X86_64 | ||
213 | /* redzone */ | 214 | /* redzone */ |
214 | sp -= 128; | 215 | if (config_enabled(CONFIG_X86_64)) |
215 | #endif /* CONFIG_X86_64 */ | 216 | sp -= 128; |
216 | 217 | ||
217 | if (!onsigstack) { | 218 | if (!onsigstack) { |
218 | /* This is the X/Open sanctioned signal stack switching. */ | 219 | /* This is the X/Open sanctioned signal stack switching. */ |
219 | if (ka->sa.sa_flags & SA_ONSTACK) { | 220 | if (ka->sa.sa_flags & SA_ONSTACK) { |
220 | if (current->sas_ss_size) | 221 | if (current->sas_ss_size) |
221 | sp = current->sas_ss_sp + current->sas_ss_size; | 222 | sp = current->sas_ss_sp + current->sas_ss_size; |
222 | } else { | 223 | } else if (config_enabled(CONFIG_X86_32) && |
223 | #ifdef CONFIG_X86_32 | 224 | (regs->ss & 0xffff) != __USER_DS && |
224 | /* This is the legacy signal stack switching. */ | 225 | !(ka->sa.sa_flags & SA_RESTORER) && |
225 | if ((regs->ss & 0xffff) != __USER_DS && | 226 | ka->sa.sa_restorer) { |
226 | !(ka->sa.sa_flags & SA_RESTORER) && | 227 | /* This is the legacy signal stack switching. */ |
227 | ka->sa.sa_restorer) | ||
228 | sp = (unsigned long) ka->sa.sa_restorer; | 228 | sp = (unsigned long) ka->sa.sa_restorer; |
229 | #endif /* CONFIG_X86_32 */ | ||
230 | } | 229 | } |
231 | } | 230 | } |
232 | 231 | ||
233 | if (used_math()) { | 232 | if (used_math()) { |
234 | sp -= sig_xstate_size; | 233 | sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), |
235 | #ifdef CONFIG_X86_64 | 234 | &buf_fx, &math_size); |
236 | sp = round_down(sp, 64); | ||
237 | #endif /* CONFIG_X86_64 */ | ||
238 | *fpstate = (void __user *)sp; | 235 | *fpstate = (void __user *)sp; |
239 | } | 236 | } |
240 | 237 | ||
@@ -247,8 +244,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
247 | if (onsigstack && !likely(on_sig_stack(sp))) | 244 | if (onsigstack && !likely(on_sig_stack(sp))) |
248 | return (void __user *)-1L; | 245 | return (void __user *)-1L; |
249 | 246 | ||
250 | /* save i387 state */ | 247 | /* save i387 and extended state */ |
251 | if (used_math() && save_i387_xstate(*fpstate) < 0) | 248 | if (used_math() && |
249 | save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) | ||
252 | return (void __user *)-1L; | 250 | return (void __user *)-1L; |
253 | 251 | ||
254 | return (void __user *)sp; | 252 | return (void __user *)sp; |
@@ -474,6 +472,74 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
474 | } | 472 | } |
475 | #endif /* CONFIG_X86_32 */ | 473 | #endif /* CONFIG_X86_32 */ |
476 | 474 | ||
475 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
476 | siginfo_t *info, compat_sigset_t *set, | ||
477 | struct pt_regs *regs) | ||
478 | { | ||
479 | #ifdef CONFIG_X86_X32_ABI | ||
480 | struct rt_sigframe_x32 __user *frame; | ||
481 | void __user *restorer; | ||
482 | int err = 0; | ||
483 | void __user *fpstate = NULL; | ||
484 | |||
485 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
486 | |||
487 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
488 | return -EFAULT; | ||
489 | |||
490 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
491 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
492 | return -EFAULT; | ||
493 | } | ||
494 | |||
495 | put_user_try { | ||
496 | /* Create the ucontext. */ | ||
497 | if (cpu_has_xsave) | ||
498 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
499 | else | ||
500 | put_user_ex(0, &frame->uc.uc_flags); | ||
501 | put_user_ex(0, &frame->uc.uc_link); | ||
502 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
503 | put_user_ex(sas_ss_flags(regs->sp), | ||
504 | &frame->uc.uc_stack.ss_flags); | ||
505 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
506 | put_user_ex(0, &frame->uc.uc__pad0); | ||
507 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
508 | regs, set->sig[0]); | ||
509 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
510 | |||
511 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
512 | restorer = ka->sa.sa_restorer; | ||
513 | } else { | ||
514 | /* could use a vstub here */ | ||
515 | restorer = NULL; | ||
516 | err |= -EFAULT; | ||
517 | } | ||
518 | put_user_ex(restorer, &frame->pretcode); | ||
519 | } put_user_catch(err); | ||
520 | |||
521 | if (err) | ||
522 | return -EFAULT; | ||
523 | |||
524 | /* Set up registers for signal handler */ | ||
525 | regs->sp = (unsigned long) frame; | ||
526 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
527 | |||
528 | /* We use the x32 calling convention here... */ | ||
529 | regs->di = sig; | ||
530 | regs->si = (unsigned long) &frame->info; | ||
531 | regs->dx = (unsigned long) &frame->uc; | ||
532 | |||
533 | loadsegment(ds, __USER_DS); | ||
534 | loadsegment(es, __USER_DS); | ||
535 | |||
536 | regs->cs = __USER_CS; | ||
537 | regs->ss = __USER_DS; | ||
538 | #endif /* CONFIG_X86_X32_ABI */ | ||
539 | |||
540 | return 0; | ||
541 | } | ||
542 | |||
477 | #ifdef CONFIG_X86_32 | 543 | #ifdef CONFIG_X86_32 |
478 | /* | 544 | /* |
479 | * Atomically swap in the new signal mask, and wait for a signal. | 545 | * Atomically swap in the new signal mask, and wait for a signal. |
@@ -612,55 +678,22 @@ static int signr_convert(int sig) | |||
612 | return sig; | 678 | return sig; |
613 | } | 679 | } |
614 | 680 | ||
615 | #ifdef CONFIG_X86_32 | ||
616 | |||
617 | #define is_ia32 1 | ||
618 | #define ia32_setup_frame __setup_frame | ||
619 | #define ia32_setup_rt_frame __setup_rt_frame | ||
620 | |||
621 | #else /* !CONFIG_X86_32 */ | ||
622 | |||
623 | #ifdef CONFIG_IA32_EMULATION | ||
624 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
625 | #else /* !CONFIG_IA32_EMULATION */ | ||
626 | #define is_ia32 0 | ||
627 | #endif /* CONFIG_IA32_EMULATION */ | ||
628 | |||
629 | #ifdef CONFIG_X86_X32_ABI | ||
630 | #define is_x32 test_thread_flag(TIF_X32) | ||
631 | |||
632 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
633 | siginfo_t *info, compat_sigset_t *set, | ||
634 | struct pt_regs *regs); | ||
635 | #else /* !CONFIG_X86_X32_ABI */ | ||
636 | #define is_x32 0 | ||
637 | #endif /* CONFIG_X86_X32_ABI */ | ||
638 | |||
639 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
640 | sigset_t *set, struct pt_regs *regs); | ||
641 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
642 | sigset_t *set, struct pt_regs *regs); | ||
643 | |||
644 | #endif /* CONFIG_X86_32 */ | ||
645 | |||
646 | static int | 681 | static int |
647 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 682 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
648 | struct pt_regs *regs) | 683 | struct pt_regs *regs) |
649 | { | 684 | { |
650 | int usig = signr_convert(sig); | 685 | int usig = signr_convert(sig); |
651 | sigset_t *set = sigmask_to_save(); | 686 | sigset_t *set = sigmask_to_save(); |
687 | compat_sigset_t *cset = (compat_sigset_t *) set; | ||
652 | 688 | ||
653 | /* Set up the stack frame */ | 689 | /* Set up the stack frame */ |
654 | if (is_ia32) { | 690 | if (is_ia32_frame()) { |
655 | if (ka->sa.sa_flags & SA_SIGINFO) | 691 | if (ka->sa.sa_flags & SA_SIGINFO) |
656 | return ia32_setup_rt_frame(usig, ka, info, set, regs); | 692 | return ia32_setup_rt_frame(usig, ka, info, cset, regs); |
657 | else | 693 | else |
658 | return ia32_setup_frame(usig, ka, set, regs); | 694 | return ia32_setup_frame(usig, ka, cset, regs); |
659 | #ifdef CONFIG_X86_X32_ABI | 695 | } else if (is_x32_frame()) { |
660 | } else if (is_x32) { | 696 | return x32_setup_rt_frame(usig, ka, info, cset, regs); |
661 | return x32_setup_rt_frame(usig, ka, info, | ||
662 | (compat_sigset_t *)set, regs); | ||
663 | #endif | ||
664 | } else { | 697 | } else { |
665 | return __setup_rt_frame(sig, ka, info, set, regs); | 698 | return __setup_rt_frame(sig, ka, info, set, regs); |
666 | } | 699 | } |
@@ -828,72 +861,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
828 | } | 861 | } |
829 | 862 | ||
830 | #ifdef CONFIG_X86_X32_ABI | 863 | #ifdef CONFIG_X86_X32_ABI |
831 | static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, | ||
832 | siginfo_t *info, compat_sigset_t *set, | ||
833 | struct pt_regs *regs) | ||
834 | { | ||
835 | struct rt_sigframe_x32 __user *frame; | ||
836 | void __user *restorer; | ||
837 | int err = 0; | ||
838 | void __user *fpstate = NULL; | ||
839 | |||
840 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | ||
841 | |||
842 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
843 | return -EFAULT; | ||
844 | |||
845 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
846 | if (copy_siginfo_to_user32(&frame->info, info)) | ||
847 | return -EFAULT; | ||
848 | } | ||
849 | |||
850 | put_user_try { | ||
851 | /* Create the ucontext. */ | ||
852 | if (cpu_has_xsave) | ||
853 | put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
854 | else | ||
855 | put_user_ex(0, &frame->uc.uc_flags); | ||
856 | put_user_ex(0, &frame->uc.uc_link); | ||
857 | put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
858 | put_user_ex(sas_ss_flags(regs->sp), | ||
859 | &frame->uc.uc_stack.ss_flags); | ||
860 | put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
861 | put_user_ex(0, &frame->uc.uc__pad0); | ||
862 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, | ||
863 | regs, set->sig[0]); | ||
864 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
865 | |||
866 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
867 | restorer = ka->sa.sa_restorer; | ||
868 | } else { | ||
869 | /* could use a vstub here */ | ||
870 | restorer = NULL; | ||
871 | err |= -EFAULT; | ||
872 | } | ||
873 | put_user_ex(restorer, &frame->pretcode); | ||
874 | } put_user_catch(err); | ||
875 | |||
876 | if (err) | ||
877 | return -EFAULT; | ||
878 | |||
879 | /* Set up registers for signal handler */ | ||
880 | regs->sp = (unsigned long) frame; | ||
881 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
882 | |||
883 | /* We use the x32 calling convention here... */ | ||
884 | regs->di = sig; | ||
885 | regs->si = (unsigned long) &frame->info; | ||
886 | regs->dx = (unsigned long) &frame->uc; | ||
887 | |||
888 | loadsegment(ds, __USER_DS); | ||
889 | loadsegment(es, __USER_DS); | ||
890 | |||
891 | regs->cs = __USER_CS; | ||
892 | regs->ss = __USER_DS; | ||
893 | |||
894 | return 0; | ||
895 | } | ||
896 | |||
897 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) | 864 | asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) |
898 | { | 865 | { |
899 | struct rt_sigframe_x32 __user *frame; | 866 | struct rt_sigframe_x32 __user *frame; |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cfbe3fc41586..8276dc6794cc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -628,11 +628,12 @@ void math_state_restore(void) | |||
628 | } | 628 | } |
629 | 629 | ||
630 | __thread_fpu_begin(tsk); | 630 | __thread_fpu_begin(tsk); |
631 | |||
631 | /* | 632 | /* |
632 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | 633 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
633 | */ | 634 | */ |
634 | if (unlikely(restore_fpu_checking(tsk))) { | 635 | if (unlikely(restore_fpu_checking(tsk))) { |
635 | __thread_fpu_end(tsk); | 636 | drop_init_fpu(tsk); |
636 | force_sig(SIGSEGV, tsk); | 637 | force_sig(SIGSEGV, tsk); |
637 | return; | 638 | return; |
638 | } | 639 | } |
@@ -645,6 +646,8 @@ dotraplinkage void __kprobes | |||
645 | do_device_not_available(struct pt_regs *regs, long error_code) | 646 | do_device_not_available(struct pt_regs *regs, long error_code) |
646 | { | 647 | { |
647 | exception_enter(regs); | 648 | exception_enter(regs); |
649 | BUG_ON(use_eager_fpu()); | ||
650 | |||
648 | #ifdef CONFIG_MATH_EMULATION | 651 | #ifdef CONFIG_MATH_EMULATION |
649 | if (read_cr0() & X86_CR0_EM) { | 652 | if (read_cr0() & X86_CR0_EM) { |
650 | struct math_emu_info info = { }; | 653 | struct math_emu_info info = { }; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9e1a8a7ba6e6..4e89b3dd408d 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -10,9 +10,7 @@ | |||
10 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
11 | #include <asm/i387.h> | 11 | #include <asm/i387.h> |
12 | #include <asm/fpu-internal.h> | 12 | #include <asm/fpu-internal.h> |
13 | #ifdef CONFIG_IA32_EMULATION | 13 | #include <asm/sigframe.h> |
14 | #include <asm/sigcontext32.h> | ||
15 | #endif | ||
16 | #include <asm/xcr.h> | 14 | #include <asm/xcr.h> |
17 | 15 | ||
18 | /* | 16 | /* |
@@ -23,13 +21,9 @@ u64 pcntxt_mask; | |||
23 | /* | 21 | /* |
24 | * Represents init state for the supported extended state. | 22 | * Represents init state for the supported extended state. |
25 | */ | 23 | */ |
26 | static struct xsave_struct *init_xstate_buf; | 24 | struct xsave_struct *init_xstate_buf; |
27 | |||
28 | struct _fpx_sw_bytes fx_sw_reserved; | ||
29 | #ifdef CONFIG_IA32_EMULATION | ||
30 | struct _fpx_sw_bytes fx_sw_reserved_ia32; | ||
31 | #endif | ||
32 | 25 | ||
26 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | ||
33 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | 27 | static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; |
34 | 28 | ||
35 | /* | 29 | /* |
@@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; | |||
44 | */ | 38 | */ |
45 | void __sanitize_i387_state(struct task_struct *tsk) | 39 | void __sanitize_i387_state(struct task_struct *tsk) |
46 | { | 40 | { |
47 | u64 xstate_bv; | ||
48 | int feature_bit = 0x2; | ||
49 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | 41 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
42 | int feature_bit = 0x2; | ||
43 | u64 xstate_bv; | ||
50 | 44 | ||
51 | if (!fx) | 45 | if (!fx) |
52 | return; | 46 | return; |
@@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
104 | * Check for the presence of extended state information in the | 98 | * Check for the presence of extended state information in the |
105 | * user fpstate pointer in the sigcontext. | 99 | * user fpstate pointer in the sigcontext. |
106 | */ | 100 | */ |
107 | int check_for_xstate(struct i387_fxsave_struct __user *buf, | 101 | static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, |
108 | void __user *fpstate, | 102 | void __user *fpstate, |
109 | struct _fpx_sw_bytes *fx_sw_user) | 103 | struct _fpx_sw_bytes *fx_sw) |
110 | { | 104 | { |
111 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + | 105 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + |
112 | sizeof(struct xsave_hdr_struct); | 106 | sizeof(struct xsave_hdr_struct); |
113 | unsigned int magic2; | 107 | unsigned int magic2; |
114 | int err; | ||
115 | 108 | ||
116 | err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], | 109 | if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) |
117 | sizeof(struct _fpx_sw_bytes)); | 110 | return -1; |
118 | if (err) | ||
119 | return -EFAULT; | ||
120 | 111 | ||
121 | /* | 112 | /* Check for the first magic field and other error scenarios. */ |
122 | * First Magic check failed. | 113 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || |
123 | */ | 114 | fx_sw->xstate_size < min_xstate_size || |
124 | if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) | 115 | fx_sw->xstate_size > xstate_size || |
125 | return -EINVAL; | 116 | fx_sw->xstate_size > fx_sw->extended_size) |
117 | return -1; | ||
126 | 118 | ||
127 | /* | 119 | /* |
128 | * Check for error scenarios. | ||
129 | */ | ||
130 | if (fx_sw_user->xstate_size < min_xstate_size || | ||
131 | fx_sw_user->xstate_size > xstate_size || | ||
132 | fx_sw_user->xstate_size > fx_sw_user->extended_size) | ||
133 | return -EINVAL; | ||
134 | |||
135 | err = __get_user(magic2, (__u32 __user *) (fpstate + | ||
136 | fx_sw_user->extended_size - | ||
137 | FP_XSTATE_MAGIC2_SIZE)); | ||
138 | if (err) | ||
139 | return err; | ||
140 | /* | ||
141 | * Check for the presence of second magic word at the end of memory | 120 | * Check for the presence of second magic word at the end of memory |
142 | * layout. This detects the case where the user just copied the legacy | 121 | * layout. This detects the case where the user just copied the legacy |
143 | * fpstate layout with out copying the extended state information | 122 | * fpstate layout with out copying the extended state information |
144 | * in the memory layout. | 123 | * in the memory layout. |
145 | */ | 124 | */ |
146 | if (magic2 != FP_XSTATE_MAGIC2) | 125 | if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) |
147 | return -EFAULT; | 126 | || magic2 != FP_XSTATE_MAGIC2) |
127 | return -1; | ||
148 | 128 | ||
149 | return 0; | 129 | return 0; |
150 | } | 130 | } |
151 | 131 | ||
152 | #ifdef CONFIG_X86_64 | ||
153 | /* | 132 | /* |
154 | * Signal frame handlers. | 133 | * Signal frame handlers. |
155 | */ | 134 | */ |
156 | 135 | static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) | |
157 | int save_i387_xstate(void __user *buf) | ||
158 | { | 136 | { |
159 | struct task_struct *tsk = current; | 137 | if (use_fxsr()) { |
160 | int err = 0; | 138 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
161 | 139 | struct user_i387_ia32_struct env; | |
162 | if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) | 140 | struct _fpstate_ia32 __user *fp = buf; |
163 | return -EACCES; | ||
164 | 141 | ||
165 | BUG_ON(sig_xstate_size < xstate_size); | 142 | convert_from_fxsr(&env, tsk); |
166 | 143 | ||
167 | if ((unsigned long)buf % 64) | 144 | if (__copy_to_user(buf, &env, sizeof(env)) || |
168 | pr_err("%s: bad fpstate %p\n", __func__, buf); | 145 | __put_user(xsave->i387.swd, &fp->status) || |
169 | 146 | __put_user(X86_FXSR_MAGIC, &fp->magic)) | |
170 | if (!used_math()) | 147 | return -1; |
171 | return 0; | ||
172 | |||
173 | if (user_has_fpu()) { | ||
174 | if (use_xsave()) | ||
175 | err = xsave_user(buf); | ||
176 | else | ||
177 | err = fxsave_user(buf); | ||
178 | |||
179 | if (err) | ||
180 | return err; | ||
181 | user_fpu_end(); | ||
182 | } else { | 148 | } else { |
183 | sanitize_i387_state(tsk); | 149 | struct i387_fsave_struct __user *fp = buf; |
184 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, | 150 | u32 swd; |
185 | xstate_size)) | 151 | if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) |
186 | return -1; | 152 | return -1; |
187 | } | 153 | } |
188 | 154 | ||
189 | clear_used_math(); /* trigger finit */ | 155 | return 0; |
156 | } | ||
190 | 157 | ||
191 | if (use_xsave()) { | 158 | static inline int save_xstate_epilog(void __user *buf, int ia32_frame) |
192 | struct _fpstate __user *fx = buf; | 159 | { |
193 | struct _xstate __user *x = buf; | 160 | struct xsave_struct __user *x = buf; |
194 | u64 xstate_bv; | 161 | struct _fpx_sw_bytes *sw_bytes; |
162 | u32 xstate_bv; | ||
163 | int err; | ||
195 | 164 | ||
196 | err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, | 165 | /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ |
197 | sizeof(struct _fpx_sw_bytes)); | 166 | sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; |
167 | err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); | ||
198 | 168 | ||
199 | err |= __put_user(FP_XSTATE_MAGIC2, | 169 | if (!use_xsave()) |
200 | (__u32 __user *) (buf + sig_xstate_size | 170 | return err; |
201 | - FP_XSTATE_MAGIC2_SIZE)); | ||
202 | 171 | ||
203 | /* | 172 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); |
204 | * Read the xstate_bv which we copied (directly from the cpu or | ||
205 | * from the state in task struct) to the user buffers and | ||
206 | * set the FP/SSE bits. | ||
207 | */ | ||
208 | err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); | ||
209 | 173 | ||
210 | /* | 174 | /* |
211 | * For legacy compatible, we always set FP/SSE bits in the bit | 175 | * Read the xstate_bv which we copied (directly from the cpu or |
212 | * vector while saving the state to the user context. This will | 176 | * from the state in task struct) to the user buffers. |
213 | * enable us capturing any changes(during sigreturn) to | 177 | */ |
214 | * the FP/SSE bits by the legacy applications which don't touch | 178 | err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
215 | * xstate_bv in the xsave header. | ||
216 | * | ||
217 | * xsave aware apps can change the xstate_bv in the xsave | ||
218 | * header as well as change any contents in the memory layout. | ||
219 | * xrestore as part of sigreturn will capture all the changes. | ||
220 | */ | ||
221 | xstate_bv |= XSTATE_FPSSE; | ||
222 | 179 | ||
223 | err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); | 180 | /* |
181 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
182 | * vector while saving the state to the user context. This will | ||
183 | * enable us capturing any changes(during sigreturn) to | ||
184 | * the FP/SSE bits by the legacy applications which don't touch | ||
185 | * xstate_bv in the xsave header. | ||
186 | * | ||
187 | * xsave aware apps can change the xstate_bv in the xsave | ||
188 | * header as well as change any contents in the memory layout. | ||
189 | * xrestore as part of sigreturn will capture all the changes. | ||
190 | */ | ||
191 | xstate_bv |= XSTATE_FPSSE; | ||
224 | 192 | ||
225 | if (err) | 193 | err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); |
226 | return err; | ||
227 | } | ||
228 | 194 | ||
229 | return 1; | 195 | return err; |
196 | } | ||
197 | |||
198 | static inline int save_user_xstate(struct xsave_struct __user *buf) | ||
199 | { | ||
200 | int err; | ||
201 | |||
202 | if (use_xsave()) | ||
203 | err = xsave_user(buf); | ||
204 | else if (use_fxsr()) | ||
205 | err = fxsave_user((struct i387_fxsave_struct __user *) buf); | ||
206 | else | ||
207 | err = fsave_user((struct i387_fsave_struct __user *) buf); | ||
208 | |||
209 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
210 | err = -EFAULT; | ||
211 | return err; | ||
230 | } | 212 | } |
231 | 213 | ||
232 | /* | 214 | /* |
233 | * Restore the extended state if present. Otherwise, restore the FP/SSE | 215 | * Save the fpu, extended register state to the user signal frame. |
234 | * state. | 216 | * |
217 | * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save | ||
218 | * state is copied. | ||
219 | * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. | ||
220 | * | ||
221 | * buf == buf_fx for 64-bit frames and 32-bit fsave frame. | ||
222 | * buf != buf_fx for 32-bit frames with fxstate. | ||
223 | * | ||
224 | * If the fpu, extended register state is live, save the state directly | ||
225 | * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, | ||
226 | * copy the thread's fpu state to the user frame starting at 'buf_fx'. | ||
227 | * | ||
228 | * If this is a 32-bit frame with fxstate, put a fsave header before | ||
229 | * the aligned state at 'buf_fx'. | ||
230 | * | ||
231 | * For [f]xsave state, update the SW reserved fields in the [f]xsave frame | ||
232 | * indicating the absence/presence of the extended state to the user. | ||
235 | */ | 233 | */ |
236 | static int restore_user_xstate(void __user *buf) | 234 | int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) |
237 | { | 235 | { |
238 | struct _fpx_sw_bytes fx_sw_user; | 236 | struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; |
239 | u64 mask; | 237 | struct task_struct *tsk = current; |
240 | int err; | 238 | int ia32_fxstate = (buf != buf_fx); |
241 | 239 | ||
242 | if (((unsigned long)buf % 64) || | 240 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || |
243 | check_for_xstate(buf, buf, &fx_sw_user)) | 241 | config_enabled(CONFIG_IA32_EMULATION)); |
244 | goto fx_only; | ||
245 | 242 | ||
246 | mask = fx_sw_user.xstate_bv; | 243 | if (!access_ok(VERIFY_WRITE, buf, size)) |
244 | return -EACCES; | ||
247 | 245 | ||
248 | /* | 246 | if (!HAVE_HWFP) |
249 | * restore the state passed by the user. | 247 | return fpregs_soft_get(current, NULL, 0, |
250 | */ | 248 | sizeof(struct user_i387_ia32_struct), NULL, |
251 | err = xrestore_user(buf, mask); | 249 | (struct _fpstate_ia32 __user *) buf) ? -1 : 1; |
252 | if (err) | ||
253 | return err; | ||
254 | 250 | ||
255 | /* | 251 | if (user_has_fpu()) { |
256 | * init the state skipped by the user. | 252 | /* Save the live register state to the user directly. */ |
257 | */ | 253 | if (save_user_xstate(buf_fx)) |
258 | mask = pcntxt_mask & ~mask; | 254 | return -1; |
259 | if (unlikely(mask)) | 255 | /* Update the thread's fxstate to save the fsave header. */ |
260 | xrstor_state(init_xstate_buf, mask); | 256 | if (ia32_fxstate) |
257 | fpu_fxsave(&tsk->thread.fpu); | ||
258 | } else { | ||
259 | sanitize_i387_state(tsk); | ||
260 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | ||
261 | return -1; | ||
262 | } | ||
263 | |||
264 | /* Save the fsave header for the 32-bit frames. */ | ||
265 | if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) | ||
266 | return -1; | ||
267 | |||
268 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | ||
269 | return -1; | ||
270 | |||
271 | drop_init_fpu(tsk); /* trigger finit */ | ||
261 | 272 | ||
262 | return 0; | 273 | return 0; |
274 | } | ||
263 | 275 | ||
264 | fx_only: | 276 | static inline void |
265 | /* | 277 | sanitize_restored_xstate(struct task_struct *tsk, |
266 | * couldn't find the extended state information in the | 278 | struct user_i387_ia32_struct *ia32_env, |
267 | * memory layout. Restore just the FP/SSE and init all | 279 | u64 xstate_bv, int fx_only) |
268 | * the other extended state. | 280 | { |
269 | */ | 281 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; |
270 | xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); | 282 | struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; |
271 | return fxrstor_checking((__force struct i387_fxsave_struct *)buf); | 283 | |
284 | if (use_xsave()) { | ||
285 | /* These bits must be zero. */ | ||
286 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
287 | |||
288 | /* | ||
289 | * Init the state that is not present in the memory | ||
290 | * layout and not enabled by the OS. | ||
291 | */ | ||
292 | if (fx_only) | ||
293 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
294 | else | ||
295 | xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); | ||
296 | } | ||
297 | |||
298 | if (use_fxsr()) { | ||
299 | /* | ||
300 | * mscsr reserved bits must be masked to zero for security | ||
301 | * reasons. | ||
302 | */ | ||
303 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
304 | |||
305 | convert_to_fxsr(tsk, ia32_env); | ||
306 | } | ||
272 | } | 307 | } |
273 | 308 | ||
274 | /* | 309 | /* |
275 | * This restores directly out of user space. Exceptions are handled. | 310 | * Restore the extended state if present. Otherwise, restore the FP/SSE state. |
276 | */ | 311 | */ |
277 | int restore_i387_xstate(void __user *buf) | 312 | static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) |
278 | { | 313 | { |
314 | if (use_xsave()) { | ||
315 | if ((unsigned long)buf % 64 || fx_only) { | ||
316 | u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; | ||
317 | xrstor_state(init_xstate_buf, init_bv); | ||
318 | return fxrstor_checking((__force void *) buf); | ||
319 | } else { | ||
320 | u64 init_bv = pcntxt_mask & ~xbv; | ||
321 | if (unlikely(init_bv)) | ||
322 | xrstor_state(init_xstate_buf, init_bv); | ||
323 | return xrestore_user(buf, xbv); | ||
324 | } | ||
325 | } else if (use_fxsr()) { | ||
326 | return fxrstor_checking((__force void *) buf); | ||
327 | } else | ||
328 | return frstor_checking((__force void *) buf); | ||
329 | } | ||
330 | |||
331 | int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | ||
332 | { | ||
333 | int ia32_fxstate = (buf != buf_fx); | ||
279 | struct task_struct *tsk = current; | 334 | struct task_struct *tsk = current; |
280 | int err = 0; | 335 | int state_size = xstate_size; |
336 | u64 xstate_bv = 0; | ||
337 | int fx_only = 0; | ||
338 | |||
339 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
340 | config_enabled(CONFIG_IA32_EMULATION)); | ||
281 | 341 | ||
282 | if (!buf) { | 342 | if (!buf) { |
283 | if (used_math()) | 343 | drop_init_fpu(tsk); |
284 | goto clear; | ||
285 | return 0; | 344 | return 0; |
286 | } else | 345 | } |
287 | if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) | ||
288 | return -EACCES; | ||
289 | 346 | ||
290 | if (!used_math()) { | 347 | if (!access_ok(VERIFY_READ, buf, size)) |
291 | err = init_fpu(tsk); | 348 | return -EACCES; |
292 | if (err) | 349 | |
293 | return err; | 350 | if (!used_math() && init_fpu(tsk)) |
351 | return -1; | ||
352 | |||
353 | if (!HAVE_HWFP) { | ||
354 | return fpregs_soft_set(current, NULL, | ||
355 | 0, sizeof(struct user_i387_ia32_struct), | ||
356 | NULL, buf) != 0; | ||
294 | } | 357 | } |
295 | 358 | ||
296 | user_fpu_begin(); | 359 | if (use_xsave()) { |
297 | if (use_xsave()) | 360 | struct _fpx_sw_bytes fx_sw_user; |
298 | err = restore_user_xstate(buf); | 361 | if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { |
299 | else | 362 | /* |
300 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 363 | * Couldn't find the extended state information in the |
301 | buf); | 364 | * memory layout. Restore just the FP/SSE and init all |
302 | if (unlikely(err)) { | 365 | * the other extended state. |
366 | */ | ||
367 | state_size = sizeof(struct i387_fxsave_struct); | ||
368 | fx_only = 1; | ||
369 | } else { | ||
370 | state_size = fx_sw_user.xstate_size; | ||
371 | xstate_bv = fx_sw_user.xstate_bv; | ||
372 | } | ||
373 | } | ||
374 | |||
375 | if (ia32_fxstate) { | ||
376 | /* | ||
377 | * For 32-bit frames with fxstate, copy the user state to the | ||
378 | * thread's fpu state, reconstruct fxstate from the fsave | ||
379 | * header. Sanitize the copied state etc. | ||
380 | */ | ||
381 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | ||
382 | struct user_i387_ia32_struct env; | ||
383 | int err = 0; | ||
384 | |||
385 | /* | ||
386 | * Drop the current fpu which clears used_math(). This ensures | ||
387 | * that any context-switch during the copy of the new state, | ||
388 | * avoids the intermediate state from getting restored/saved. | ||
389 | * Thus avoiding the new restored state from getting corrupted. | ||
390 | * We will be ready to restore/save the state only after | ||
391 | * set_used_math() is again set. | ||
392 | */ | ||
393 | drop_fpu(tsk); | ||
394 | |||
395 | if (__copy_from_user(xsave, buf_fx, state_size) || | ||
396 | __copy_from_user(&env, buf, sizeof(env))) { | ||
397 | err = -1; | ||
398 | } else { | ||
399 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
400 | set_used_math(); | ||
401 | } | ||
402 | |||
403 | if (use_eager_fpu()) | ||
404 | math_state_restore(); | ||
405 | |||
406 | return err; | ||
407 | } else { | ||
303 | /* | 408 | /* |
304 | * Encountered an error while doing the restore from the | 409 | * For 64-bit frames and 32-bit fsave frames, restore the user |
305 | * user buffer, clear the fpu state. | 410 | * state to the registers directly (with exceptions handled). |
306 | */ | 411 | */ |
307 | clear: | 412 | user_fpu_begin(); |
308 | clear_fpu(tsk); | 413 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { |
309 | clear_used_math(); | 414 | drop_init_fpu(tsk); |
415 | return -1; | ||
416 | } | ||
310 | } | 417 | } |
311 | return err; | 418 | |
419 | return 0; | ||
312 | } | 420 | } |
313 | #endif | ||
314 | 421 | ||
315 | /* | 422 | /* |
316 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | 423 | * Prepare the SW reserved portion of the fxsave memory layout, indicating |
@@ -321,31 +428,22 @@ clear: | |||
321 | */ | 428 | */ |
322 | static void prepare_fx_sw_frame(void) | 429 | static void prepare_fx_sw_frame(void) |
323 | { | 430 | { |
324 | int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + | 431 | int fsave_header_size = sizeof(struct i387_fsave_struct); |
325 | FP_XSTATE_MAGIC2_SIZE; | 432 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; |
326 | 433 | ||
327 | sig_xstate_size = sizeof(struct _fpstate) + size_extended; | 434 | if (config_enabled(CONFIG_X86_32)) |
328 | 435 | size += fsave_header_size; | |
329 | #ifdef CONFIG_IA32_EMULATION | ||
330 | sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; | ||
331 | #endif | ||
332 | |||
333 | memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); | ||
334 | 436 | ||
335 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | 437 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; |
336 | fx_sw_reserved.extended_size = sig_xstate_size; | 438 | fx_sw_reserved.extended_size = size; |
337 | fx_sw_reserved.xstate_bv = pcntxt_mask; | 439 | fx_sw_reserved.xstate_bv = pcntxt_mask; |
338 | fx_sw_reserved.xstate_size = xstate_size; | 440 | fx_sw_reserved.xstate_size = xstate_size; |
339 | #ifdef CONFIG_IA32_EMULATION | ||
340 | memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, | ||
341 | sizeof(struct _fpx_sw_bytes)); | ||
342 | fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; | ||
343 | #endif | ||
344 | } | ||
345 | 441 | ||
346 | #ifdef CONFIG_X86_64 | 442 | if (config_enabled(CONFIG_IA32_EMULATION)) { |
347 | unsigned int sig_xstate_size = sizeof(struct _fpstate); | 443 | fx_sw_reserved_ia32 = fx_sw_reserved; |
348 | #endif | 444 | fx_sw_reserved_ia32.extended_size += fsave_header_size; |
445 | } | ||
446 | } | ||
349 | 447 | ||
350 | /* | 448 | /* |
351 | * Enable the extended processor state save/restore feature | 449 | * Enable the extended processor state save/restore feature |
@@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) | |||
384 | /* | 482 | /* |
385 | * setup the xstate image representing the init state | 483 | * setup the xstate image representing the init state |
386 | */ | 484 | */ |
387 | static void __init setup_xstate_init(void) | 485 | static void __init setup_init_fpu_buf(void) |
388 | { | 486 | { |
389 | setup_xstate_features(); | ||
390 | |||
391 | /* | 487 | /* |
392 | * Setup init_xstate_buf to represent the init state of | 488 | * Setup init_xstate_buf to represent the init state of |
393 | * all the features managed by the xsave | 489 | * all the features managed by the xsave |
394 | */ | 490 | */ |
395 | init_xstate_buf = alloc_bootmem_align(xstate_size, | 491 | init_xstate_buf = alloc_bootmem_align(xstate_size, |
396 | __alignof__(struct xsave_struct)); | 492 | __alignof__(struct xsave_struct)); |
397 | init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; | 493 | fx_finit(&init_xstate_buf->i387); |
494 | |||
495 | if (!cpu_has_xsave) | ||
496 | return; | ||
497 | |||
498 | setup_xstate_features(); | ||
398 | 499 | ||
399 | clts(); | ||
400 | /* | 500 | /* |
401 | * Init all the features state with header_bv being 0x0 | 501 | * Init all the features state with header_bv being 0x0 |
402 | */ | 502 | */ |
@@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) | |||
406 | * of any feature which is not represented by all zero's. | 506 | * of any feature which is not represented by all zero's. |
407 | */ | 507 | */ |
408 | xsave_state(init_xstate_buf, -1); | 508 | xsave_state(init_xstate_buf, -1); |
409 | stts(); | ||
410 | } | 509 | } |
411 | 510 | ||
511 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | ||
512 | static int __init eager_fpu_setup(char *s) | ||
513 | { | ||
514 | if (!strcmp(s, "on")) | ||
515 | eagerfpu = ENABLE; | ||
516 | else if (!strcmp(s, "off")) | ||
517 | eagerfpu = DISABLE; | ||
518 | else if (!strcmp(s, "auto")) | ||
519 | eagerfpu = AUTO; | ||
520 | return 1; | ||
521 | } | ||
522 | __setup("eagerfpu=", eager_fpu_setup); | ||
523 | |||
412 | /* | 524 | /* |
413 | * Enable and initialize the xsave feature. | 525 | * Enable and initialize the xsave feature. |
414 | */ | 526 | */ |
@@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) | |||
445 | 557 | ||
446 | update_regset_xstate_info(xstate_size, pcntxt_mask); | 558 | update_regset_xstate_info(xstate_size, pcntxt_mask); |
447 | prepare_fx_sw_frame(); | 559 | prepare_fx_sw_frame(); |
560 | setup_init_fpu_buf(); | ||
448 | 561 | ||
449 | setup_xstate_init(); | 562 | /* Auto enable eagerfpu for xsaveopt */ |
563 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | ||
564 | eagerfpu = ENABLE; | ||
450 | 565 | ||
451 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 566 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
452 | pcntxt_mask, xstate_size); | 567 | pcntxt_mask, xstate_size); |
@@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) | |||
471 | next_func = xstate_enable; | 586 | next_func = xstate_enable; |
472 | this_func(); | 587 | this_func(); |
473 | } | 588 | } |
589 | |||
590 | static inline void __init eager_fpu_init_bp(void) | ||
591 | { | ||
592 | current->thread.fpu.state = | ||
593 | alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); | ||
594 | if (!init_xstate_buf) | ||
595 | setup_init_fpu_buf(); | ||
596 | } | ||
597 | |||
598 | void __cpuinit eager_fpu_init(void) | ||
599 | { | ||
600 | static __refdata void (*boot_func)(void) = eager_fpu_init_bp; | ||
601 | |||
602 | clear_used_math(); | ||
603 | current_thread_info()->status = 0; | ||
604 | |||
605 | if (eagerfpu == ENABLE) | ||
606 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
607 | |||
608 | if (!cpu_has_eager_fpu) { | ||
609 | stts(); | ||
610 | return; | ||
611 | } | ||
612 | |||
613 | if (boot_func) { | ||
614 | boot_func(); | ||
615 | boot_func = NULL; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * This is same as math_state_restore(). But use_xsave() is | ||
620 | * not yet patched to use math_state_restore(). | ||
621 | */ | ||
622 | init_fpu(current); | ||
623 | __thread_fpu_begin(current); | ||
624 | if (cpu_has_xsave) | ||
625 | xrstor_state(init_xstate_buf, -1); | ||
626 | else | ||
627 | fxrstor_checking(&init_xstate_buf->i387); | ||
628 | } | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b06737d122f4..851aa7c3b890 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -1493,8 +1493,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1493 | #ifdef CONFIG_X86_64 | 1493 | #ifdef CONFIG_X86_64 |
1494 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 1494 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
1495 | #endif | 1495 | #endif |
1496 | if (user_has_fpu()) | 1496 | /* |
1497 | clts(); | 1497 | * If the FPU is not active (through the host task or |
1498 | * the guest vcpu), then restore the cr0.TS bit. | ||
1499 | */ | ||
1500 | if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) | ||
1501 | stts(); | ||
1498 | load_gdt(&__get_cpu_var(host_gdt)); | 1502 | load_gdt(&__get_cpu_var(host_gdt)); |
1499 | } | 1503 | } |
1500 | 1504 | ||
@@ -3743,7 +3747,7 @@ static void vmx_set_constant_host_state(void) | |||
3743 | unsigned long tmpl; | 3747 | unsigned long tmpl; |
3744 | struct desc_ptr dt; | 3748 | struct desc_ptr dt; |
3745 | 3749 | ||
3746 | vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ | 3750 | vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ |
3747 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ | 3751 | vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ |
3748 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ | 3752 | vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ |
3749 | 3753 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2966c847d489..1f09552572fa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -5979,7 +5979,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | |||
5979 | */ | 5979 | */ |
5980 | kvm_put_guest_xcr0(vcpu); | 5980 | kvm_put_guest_xcr0(vcpu); |
5981 | vcpu->guest_fpu_loaded = 1; | 5981 | vcpu->guest_fpu_loaded = 1; |
5982 | unlazy_fpu(current); | 5982 | __kernel_fpu_begin(); |
5983 | fpu_restore_checking(&vcpu->arch.guest_fpu); | 5983 | fpu_restore_checking(&vcpu->arch.guest_fpu); |
5984 | trace_kvm_fpu(1); | 5984 | trace_kvm_fpu(1); |
5985 | } | 5985 | } |
@@ -5993,6 +5993,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
5993 | 5993 | ||
5994 | vcpu->guest_fpu_loaded = 0; | 5994 | vcpu->guest_fpu_loaded = 0; |
5995 | fpu_save_init(&vcpu->arch.guest_fpu); | 5995 | fpu_save_init(&vcpu->arch.guest_fpu); |
5996 | __kernel_fpu_end(); | ||
5996 | ++vcpu->stat.fpu_reload; | 5997 | ++vcpu->stat.fpu_reload; |
5997 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); | 5998 | kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); |
5998 | trace_kvm_fpu(0); | 5999 | trace_kvm_fpu(0); |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 39809035320a..4af12e1844d5 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
203 | * we set it now, so we can trap and pass that trap to the Guest if it | 203 | * we set it now, so we can trap and pass that trap to the Guest if it |
204 | * uses the FPU. | 204 | * uses the FPU. |
205 | */ | 205 | */ |
206 | if (cpu->ts) | 206 | if (cpu->ts && user_has_fpu()) |
207 | unlazy_fpu(current); | 207 | stts(); |
208 | 208 | ||
209 | /* | 209 | /* |
210 | * SYSENTER is an optimized way of doing system calls. We can't allow | 210 | * SYSENTER is an optimized way of doing system calls. We can't allow |
@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
234 | if (boot_cpu_has(X86_FEATURE_SEP)) | 234 | if (boot_cpu_has(X86_FEATURE_SEP)) |
235 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 235 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
236 | 236 | ||
237 | /* Clear the host TS bit if it was set above. */ | ||
238 | if (cpu->ts && user_has_fpu()) | ||
239 | clts(); | ||
240 | |||
237 | /* | 241 | /* |
238 | * If the Guest page faulted, then the cr2 register will tell us the | 242 | * If the Guest page faulted, then the cr2 register will tell us the |
239 | * bad virtual address. We have to grab this now, because once we | 243 | * bad virtual address. We have to grab this now, because once we |
@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
249 | * a different CPU. So all the critical stuff should be done | 253 | * a different CPU. So all the critical stuff should be done |
250 | * before this. | 254 | * before this. |
251 | */ | 255 | */ |
252 | else if (cpu->regs->trapnum == 7) | 256 | else if (cpu->regs->trapnum == 7 && !user_has_fpu()) |
253 | math_state_restore(); | 257 | math_state_restore(); |
254 | } | 258 | } |
255 | 259 | ||