diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 21:48:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-25 21:48:27 -0400 |
commit | 2d724ffddd958f21e2711b7400c63bdfee287d75 (patch) | |
tree | 227f80aa66924ac2d4649d46844491f6a87b0a35 /arch/x86/kernel | |
parent | 36e635cb21d96da0f30b91a39cc95ef4ed1bce26 (diff) | |
parent | ec3ed4a2104b8d1ab8da2db5b1221b2ba8a7a6e1 (diff) |
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu updates from Ingo Molnar:
"The main x86 FPU changes in this cycle were:
- a large series of cleanups, fixes and enhancements to re-enable the
XSAVES instruction on Intel CPUs - which is the most advanced
instruction to do FPU context switches (Yu-cheng Yu, Fenghua Yu)
- Add FPU tracepoints for the FPU state machine (Dave Hansen)"
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/fpu: Do not BUG_ON() in early FPU code
x86/fpu/xstate: Re-enable XSAVES
x86/fpu/xstate: Fix fpstate_init() for XRSTORS
x86/fpu/xstate: Return NULL for disabled xstate component address
x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES
x86/fpu/xstate: Fix xstate_offsets, xstate_sizes for non-extended xstates
x86/fpu/xstate: Fix XSTATE component offset print out
x86/fpu/xstate: Fix PTRACE frames for XSAVES
x86/fpu/xstate: Fix supervisor xstate component offset
x86/fpu/xstate: Align xstate components according to CPUID
x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use
x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization
x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'
x86/fpu/xstate: Define and use 'fpu_user_xstate_size'
x86/fpu: Add tracepoints to dump FPU state at key points
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/fpu/core.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 34 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/regset.c | 52 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/signal.c | 46 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 451 |
5 files changed, 464 insertions, 152 deletions
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97027545a72d..3fc03a09a93b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c | |||
@@ -8,10 +8,14 @@ | |||
8 | #include <asm/fpu/internal.h> | 8 | #include <asm/fpu/internal.h> |
9 | #include <asm/fpu/regset.h> | 9 | #include <asm/fpu/regset.h> |
10 | #include <asm/fpu/signal.h> | 10 | #include <asm/fpu/signal.h> |
11 | #include <asm/fpu/types.h> | ||
11 | #include <asm/traps.h> | 12 | #include <asm/traps.h> |
12 | 13 | ||
13 | #include <linux/hardirq.h> | 14 | #include <linux/hardirq.h> |
14 | 15 | ||
16 | #define CREATE_TRACE_POINTS | ||
17 | #include <asm/trace/fpu.h> | ||
18 | |||
15 | /* | 19 | /* |
16 | * Represents the initial FPU state. It's mostly (but not completely) zeroes, | 20 | * Represents the initial FPU state. It's mostly (but not completely) zeroes, |
17 | * depending on the FPU hardware format: | 21 | * depending on the FPU hardware format: |
@@ -192,6 +196,7 @@ void fpu__save(struct fpu *fpu) | |||
192 | WARN_ON_FPU(fpu != ¤t->thread.fpu); | 196 | WARN_ON_FPU(fpu != ¤t->thread.fpu); |
193 | 197 | ||
194 | preempt_disable(); | 198 | preempt_disable(); |
199 | trace_x86_fpu_before_save(fpu); | ||
195 | if (fpu->fpregs_active) { | 200 | if (fpu->fpregs_active) { |
196 | if (!copy_fpregs_to_fpstate(fpu)) { | 201 | if (!copy_fpregs_to_fpstate(fpu)) { |
197 | if (use_eager_fpu()) | 202 | if (use_eager_fpu()) |
@@ -200,6 +205,7 @@ void fpu__save(struct fpu *fpu) | |||
200 | fpregs_deactivate(fpu); | 205 | fpregs_deactivate(fpu); |
201 | } | 206 | } |
202 | } | 207 | } |
208 | trace_x86_fpu_after_save(fpu); | ||
203 | preempt_enable(); | 209 | preempt_enable(); |
204 | } | 210 | } |
205 | EXPORT_SYMBOL_GPL(fpu__save); | 211 | EXPORT_SYMBOL_GPL(fpu__save); |
@@ -222,7 +228,14 @@ void fpstate_init(union fpregs_state *state) | |||
222 | return; | 228 | return; |
223 | } | 229 | } |
224 | 230 | ||
225 | memset(state, 0, xstate_size); | 231 | memset(state, 0, fpu_kernel_xstate_size); |
232 | |||
233 | /* | ||
234 | * XRSTORS requires that this bit is set in xcomp_bv, or | ||
235 | * it will #GP. Make sure it is replaced after the memset(). | ||
236 | */ | ||
237 | if (static_cpu_has(X86_FEATURE_XSAVES)) | ||
238 | state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; | ||
226 | 239 | ||
227 | if (static_cpu_has(X86_FEATURE_FXSR)) | 240 | if (static_cpu_has(X86_FEATURE_FXSR)) |
228 | fpstate_init_fxstate(&state->fxsave); | 241 | fpstate_init_fxstate(&state->fxsave); |
@@ -247,7 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
247 | * leak into the child task: | 260 | * leak into the child task: |
248 | */ | 261 | */ |
249 | if (use_eager_fpu()) | 262 | if (use_eager_fpu()) |
250 | memset(&dst_fpu->state.xsave, 0, xstate_size); | 263 | memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); |
251 | 264 | ||
252 | /* | 265 | /* |
253 | * Save current FPU registers directly into the child | 266 | * Save current FPU registers directly into the child |
@@ -266,7 +279,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
266 | */ | 279 | */ |
267 | preempt_disable(); | 280 | preempt_disable(); |
268 | if (!copy_fpregs_to_fpstate(dst_fpu)) { | 281 | if (!copy_fpregs_to_fpstate(dst_fpu)) { |
269 | memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); | 282 | memcpy(&src_fpu->state, &dst_fpu->state, |
283 | fpu_kernel_xstate_size); | ||
270 | 284 | ||
271 | if (use_eager_fpu()) | 285 | if (use_eager_fpu()) |
272 | copy_kernel_to_fpregs(&src_fpu->state); | 286 | copy_kernel_to_fpregs(&src_fpu->state); |
@@ -275,6 +289,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | |||
275 | } | 289 | } |
276 | preempt_enable(); | 290 | preempt_enable(); |
277 | 291 | ||
292 | trace_x86_fpu_copy_src(src_fpu); | ||
293 | trace_x86_fpu_copy_dst(dst_fpu); | ||
294 | |||
278 | return 0; | 295 | return 0; |
279 | } | 296 | } |
280 | 297 | ||
@@ -288,7 +305,9 @@ void fpu__activate_curr(struct fpu *fpu) | |||
288 | 305 | ||
289 | if (!fpu->fpstate_active) { | 306 | if (!fpu->fpstate_active) { |
290 | fpstate_init(&fpu->state); | 307 | fpstate_init(&fpu->state); |
308 | trace_x86_fpu_init_state(fpu); | ||
291 | 309 | ||
310 | trace_x86_fpu_activate_state(fpu); | ||
292 | /* Safe to do for the current task: */ | 311 | /* Safe to do for the current task: */ |
293 | fpu->fpstate_active = 1; | 312 | fpu->fpstate_active = 1; |
294 | } | 313 | } |
@@ -314,7 +333,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu) | |||
314 | } else { | 333 | } else { |
315 | if (!fpu->fpstate_active) { | 334 | if (!fpu->fpstate_active) { |
316 | fpstate_init(&fpu->state); | 335 | fpstate_init(&fpu->state); |
336 | trace_x86_fpu_init_state(fpu); | ||
317 | 337 | ||
338 | trace_x86_fpu_activate_state(fpu); | ||
318 | /* Safe to do for current and for stopped child tasks: */ | 339 | /* Safe to do for current and for stopped child tasks: */ |
319 | fpu->fpstate_active = 1; | 340 | fpu->fpstate_active = 1; |
320 | } | 341 | } |
@@ -347,7 +368,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu) | |||
347 | fpu->last_cpu = -1; | 368 | fpu->last_cpu = -1; |
348 | } else { | 369 | } else { |
349 | fpstate_init(&fpu->state); | 370 | fpstate_init(&fpu->state); |
371 | trace_x86_fpu_init_state(fpu); | ||
350 | 372 | ||
373 | trace_x86_fpu_activate_state(fpu); | ||
351 | /* Safe to do for stopped child tasks: */ | 374 | /* Safe to do for stopped child tasks: */ |
352 | fpu->fpstate_active = 1; | 375 | fpu->fpstate_active = 1; |
353 | } | 376 | } |
@@ -432,9 +455,11 @@ void fpu__restore(struct fpu *fpu) | |||
432 | 455 | ||
433 | /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ | 456 | /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ |
434 | kernel_fpu_disable(); | 457 | kernel_fpu_disable(); |
458 | trace_x86_fpu_before_restore(fpu); | ||
435 | fpregs_activate(fpu); | 459 | fpregs_activate(fpu); |
436 | copy_kernel_to_fpregs(&fpu->state); | 460 | copy_kernel_to_fpregs(&fpu->state); |
437 | fpu->counter++; | 461 | fpu->counter++; |
462 | trace_x86_fpu_after_restore(fpu); | ||
438 | kernel_fpu_enable(); | 463 | kernel_fpu_enable(); |
439 | } | 464 | } |
440 | EXPORT_SYMBOL_GPL(fpu__restore); | 465 | EXPORT_SYMBOL_GPL(fpu__restore); |
@@ -463,6 +488,8 @@ void fpu__drop(struct fpu *fpu) | |||
463 | 488 | ||
464 | fpu->fpstate_active = 0; | 489 | fpu->fpstate_active = 0; |
465 | 490 | ||
491 | trace_x86_fpu_dropped(fpu); | ||
492 | |||
466 | preempt_enable(); | 493 | preempt_enable(); |
467 | } | 494 | } |
468 | 495 | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index aacfd7a82cec..93982aebb398 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void) | |||
145 | * This is inherent to the XSAVE architecture which puts all state | 145 | * This is inherent to the XSAVE architecture which puts all state |
146 | * components into a single, continuous memory block: | 146 | * components into a single, continuous memory block: |
147 | */ | 147 | */ |
148 | unsigned int xstate_size; | 148 | unsigned int fpu_kernel_xstate_size; |
149 | EXPORT_SYMBOL_GPL(xstate_size); | 149 | EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); |
150 | 150 | ||
151 | /* Get alignment of the TYPE. */ | 151 | /* Get alignment of the TYPE. */ |
152 | #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) | 152 | #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) |
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void) | |||
178 | * Add back the dynamically-calculated register state | 178 | * Add back the dynamically-calculated register state |
179 | * size. | 179 | * size. |
180 | */ | 180 | */ |
181 | task_size += xstate_size; | 181 | task_size += fpu_kernel_xstate_size; |
182 | 182 | ||
183 | /* | 183 | /* |
184 | * We dynamically size 'struct fpu', so we require that | 184 | * We dynamically size 'struct fpu', so we require that |
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void) | |||
195 | } | 195 | } |
196 | 196 | ||
197 | /* | 197 | /* |
198 | * Set up the xstate_size based on the legacy FPU context size. | 198 | * Set up the user and kernel xstate sizes based on the legacy FPU context size. |
199 | * | 199 | * |
200 | * We set this up first, and later it will be overwritten by | 200 | * We set this up first, and later it will be overwritten by |
201 | * fpu__init_system_xstate() if the CPU knows about xstates. | 201 | * fpu__init_system_xstate() if the CPU knows about xstates. |
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
208 | on_boot_cpu = 0; | 208 | on_boot_cpu = 0; |
209 | 209 | ||
210 | /* | 210 | /* |
211 | * Note that xstate_size might be overwriten later during | 211 | * Note that xstate sizes might be overwritten later during |
212 | * fpu__init_system_xstate(). | 212 | * fpu__init_system_xstate(). |
213 | */ | 213 | */ |
214 | 214 | ||
@@ -219,27 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void) | |||
219 | */ | 219 | */ |
220 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 220 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
221 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 221 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); |
222 | xstate_size = sizeof(struct swregs_state); | 222 | fpu_kernel_xstate_size = sizeof(struct swregs_state); |
223 | } else { | 223 | } else { |
224 | if (boot_cpu_has(X86_FEATURE_FXSR)) | 224 | if (boot_cpu_has(X86_FEATURE_FXSR)) |
225 | xstate_size = sizeof(struct fxregs_state); | 225 | fpu_kernel_xstate_size = |
226 | sizeof(struct fxregs_state); | ||
226 | else | 227 | else |
227 | xstate_size = sizeof(struct fregs_state); | 228 | fpu_kernel_xstate_size = |
229 | sizeof(struct fregs_state); | ||
228 | } | 230 | } |
229 | /* | 231 | |
230 | * Quirk: we don't yet handle the XSAVES* instructions | 232 | fpu_user_xstate_size = fpu_kernel_xstate_size; |
231 | * correctly, as we don't correctly convert between | ||
232 | * standard and compacted format when interfacing | ||
233 | * with user-space - so disable it for now. | ||
234 | * | ||
235 | * The difference is small: with recent CPUs the | ||
236 | * compacted format is only marginally smaller than | ||
237 | * the standard FPU state format. | ||
238 | * | ||
239 | * ( This is easy to backport while we are fixing | ||
240 | * XSAVES* support. ) | ||
241 | */ | ||
242 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
243 | } | 233 | } |
244 | 234 | ||
245 | /* | 235 | /* |
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 81422dfb152b..c114b132d121 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <asm/fpu/internal.h> | 4 | #include <asm/fpu/internal.h> |
5 | #include <asm/fpu/signal.h> | 5 | #include <asm/fpu/signal.h> |
6 | #include <asm/fpu/regset.h> | 6 | #include <asm/fpu/regset.h> |
7 | #include <asm/fpu/xstate.h> | ||
7 | 8 | ||
8 | /* | 9 | /* |
9 | * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, | 10 | * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, |
@@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
85 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) | 86 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
86 | return -ENODEV; | 87 | return -ENODEV; |
87 | 88 | ||
88 | fpu__activate_fpstate_read(fpu); | ||
89 | |||
90 | xsave = &fpu->state.xsave; | 89 | xsave = &fpu->state.xsave; |
91 | 90 | ||
92 | /* | 91 | fpu__activate_fpstate_read(fpu); |
93 | * Copy the 48bytes defined by the software first into the xstate | 92 | |
94 | * memory layout in the thread struct, so that we can copy the entire | 93 | if (using_compacted_format()) { |
95 | * xstateregs to the user using one user_regset_copyout(). | 94 | ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); |
96 | */ | 95 | } else { |
97 | memcpy(&xsave->i387.sw_reserved, | 96 | fpstate_sanitize_xstate(fpu); |
98 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 97 | /* |
99 | /* | 98 | * Copy the 48 bytes defined by the software into the xsave |
100 | * Copy the xstate memory layout. | 99 | * area in the thread struct, so that we can copy the whole |
101 | */ | 100 | * area to user using one user_regset_copyout(). |
102 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | 101 | */ |
102 | memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | ||
103 | |||
104 | /* | ||
105 | * Copy the xstate memory layout. | ||
106 | */ | ||
107 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
108 | } | ||
103 | return ret; | 109 | return ret; |
104 | } | 110 | } |
105 | 111 | ||
@@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
114 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) | 120 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) |
115 | return -ENODEV; | 121 | return -ENODEV; |
116 | 122 | ||
117 | fpu__activate_fpstate_write(fpu); | 123 | /* |
124 | * A whole standard-format XSAVE buffer is needed: | ||
125 | */ | ||
126 | if ((pos != 0) || (count < fpu_user_xstate_size)) | ||
127 | return -EFAULT; | ||
118 | 128 | ||
119 | xsave = &fpu->state.xsave; | 129 | xsave = &fpu->state.xsave; |
120 | 130 | ||
121 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | 131 | fpu__activate_fpstate_write(fpu); |
132 | |||
133 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
134 | ret = copyin_to_xsaves(kbuf, ubuf, xsave); | ||
135 | else | ||
136 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
137 | |||
138 | /* | ||
139 | * In case of failure, mark all states as init: | ||
140 | */ | ||
141 | if (ret) | ||
142 | fpstate_init(&fpu->state); | ||
143 | |||
122 | /* | 144 | /* |
123 | * mxcsr reserved bits must be masked to zero for security reasons. | 145 | * mxcsr reserved bits must be masked to zero for security reasons. |
124 | */ | 146 | */ |
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 31c6a60505e6..9e231d88bb33 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c | |||
@@ -8,8 +8,10 @@ | |||
8 | #include <asm/fpu/internal.h> | 8 | #include <asm/fpu/internal.h> |
9 | #include <asm/fpu/signal.h> | 9 | #include <asm/fpu/signal.h> |
10 | #include <asm/fpu/regset.h> | 10 | #include <asm/fpu/regset.h> |
11 | #include <asm/fpu/xstate.h> | ||
11 | 12 | ||
12 | #include <asm/sigframe.h> | 13 | #include <asm/sigframe.h> |
14 | #include <asm/trace/fpu.h> | ||
13 | 15 | ||
14 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | 16 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; |
15 | 17 | ||
@@ -31,7 +33,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf, | |||
31 | /* Check for the first magic field and other error scenarios. */ | 33 | /* Check for the first magic field and other error scenarios. */ |
32 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || | 34 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || |
33 | fx_sw->xstate_size < min_xstate_size || | 35 | fx_sw->xstate_size < min_xstate_size || |
34 | fx_sw->xstate_size > xstate_size || | 36 | fx_sw->xstate_size > fpu_user_xstate_size || |
35 | fx_sw->xstate_size > fx_sw->extended_size) | 37 | fx_sw->xstate_size > fx_sw->extended_size) |
36 | return -1; | 38 | return -1; |
37 | 39 | ||
@@ -88,7 +90,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) | |||
88 | if (!use_xsave()) | 90 | if (!use_xsave()) |
89 | return err; | 91 | return err; |
90 | 92 | ||
91 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); | 93 | err |= __put_user(FP_XSTATE_MAGIC2, |
94 | (__u32 *)(buf + fpu_user_xstate_size)); | ||
92 | 95 | ||
93 | /* | 96 | /* |
94 | * Read the xfeatures which we copied (directly from the cpu or | 97 | * Read the xfeatures which we copied (directly from the cpu or |
@@ -125,7 +128,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) | |||
125 | else | 128 | else |
126 | err = copy_fregs_to_user((struct fregs_state __user *) buf); | 129 | err = copy_fregs_to_user((struct fregs_state __user *) buf); |
127 | 130 | ||
128 | if (unlikely(err) && __clear_user(buf, xstate_size)) | 131 | if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size)) |
129 | err = -EFAULT; | 132 | err = -EFAULT; |
130 | return err; | 133 | return err; |
131 | } | 134 | } |
@@ -167,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) | |||
167 | sizeof(struct user_i387_ia32_struct), NULL, | 170 | sizeof(struct user_i387_ia32_struct), NULL, |
168 | (struct _fpstate_32 __user *) buf) ? -1 : 1; | 171 | (struct _fpstate_32 __user *) buf) ? -1 : 1; |
169 | 172 | ||
170 | if (fpregs_active()) { | 173 | if (fpregs_active() || using_compacted_format()) { |
171 | /* Save the live register state to the user directly. */ | 174 | /* Save the live register state to the user directly. */ |
172 | if (copy_fpregs_to_sigframe(buf_fx)) | 175 | if (copy_fpregs_to_sigframe(buf_fx)) |
173 | return -1; | 176 | return -1; |
@@ -175,8 +178,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) | |||
175 | if (ia32_fxstate) | 178 | if (ia32_fxstate) |
176 | copy_fxregs_to_kernel(&tsk->thread.fpu); | 179 | copy_fxregs_to_kernel(&tsk->thread.fpu); |
177 | } else { | 180 | } else { |
181 | /* | ||
182 | * It is a *bug* if kernel uses compacted-format for xsave | ||
183 | * area and we copy it out directly to a signal frame. It | ||
184 | * should have been handled above by saving the registers | ||
185 | * directly. | ||
186 | */ | ||
187 | if (boot_cpu_has(X86_FEATURE_XSAVES)) { | ||
188 | WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); | ||
189 | return -1; | ||
190 | } | ||
191 | |||
178 | fpstate_sanitize_xstate(&tsk->thread.fpu); | 192 | fpstate_sanitize_xstate(&tsk->thread.fpu); |
179 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | 193 | if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) |
180 | return -1; | 194 | return -1; |
181 | } | 195 | } |
182 | 196 | ||
@@ -250,7 +264,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | |||
250 | int ia32_fxstate = (buf != buf_fx); | 264 | int ia32_fxstate = (buf != buf_fx); |
251 | struct task_struct *tsk = current; | 265 | struct task_struct *tsk = current; |
252 | struct fpu *fpu = &tsk->thread.fpu; | 266 | struct fpu *fpu = &tsk->thread.fpu; |
253 | int state_size = xstate_size; | 267 | int state_size = fpu_kernel_xstate_size; |
254 | u64 xfeatures = 0; | 268 | u64 xfeatures = 0; |
255 | int fx_only = 0; | 269 | int fx_only = 0; |
256 | 270 | ||
@@ -282,6 +296,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | |||
282 | */ | 296 | */ |
283 | state_size = sizeof(struct fxregs_state); | 297 | state_size = sizeof(struct fxregs_state); |
284 | fx_only = 1; | 298 | fx_only = 1; |
299 | trace_x86_fpu_xstate_check_failed(fpu); | ||
285 | } else { | 300 | } else { |
286 | state_size = fx_sw_user.xstate_size; | 301 | state_size = fx_sw_user.xstate_size; |
287 | xfeatures = fx_sw_user.xfeatures; | 302 | xfeatures = fx_sw_user.xfeatures; |
@@ -308,9 +323,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | |||
308 | */ | 323 | */ |
309 | fpu__drop(fpu); | 324 | fpu__drop(fpu); |
310 | 325 | ||
311 | if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || | 326 | if (using_compacted_format()) { |
312 | __copy_from_user(&env, buf, sizeof(env))) { | 327 | err = copyin_to_xsaves(NULL, buf_fx, |
328 | &fpu->state.xsave); | ||
329 | } else { | ||
330 | err = __copy_from_user(&fpu->state.xsave, | ||
331 | buf_fx, state_size); | ||
332 | } | ||
333 | |||
334 | if (err || __copy_from_user(&env, buf, sizeof(env))) { | ||
313 | fpstate_init(&fpu->state); | 335 | fpstate_init(&fpu->state); |
336 | trace_x86_fpu_init_state(fpu); | ||
314 | err = -1; | 337 | err = -1; |
315 | } else { | 338 | } else { |
316 | sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); | 339 | sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); |
@@ -341,7 +364,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | |||
341 | 364 | ||
342 | static inline int xstate_sigframe_size(void) | 365 | static inline int xstate_sigframe_size(void) |
343 | { | 366 | { |
344 | return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; | 367 | return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE : |
368 | fpu_user_xstate_size; | ||
345 | } | 369 | } |
346 | 370 | ||
347 | /* | 371 | /* |
@@ -385,12 +409,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, | |||
385 | */ | 409 | */ |
386 | void fpu__init_prepare_fx_sw_frame(void) | 410 | void fpu__init_prepare_fx_sw_frame(void) |
387 | { | 411 | { |
388 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; | 412 | int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE; |
389 | 413 | ||
390 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | 414 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; |
391 | fx_sw_reserved.extended_size = size; | 415 | fx_sw_reserved.extended_size = size; |
392 | fx_sw_reserved.xfeatures = xfeatures_mask; | 416 | fx_sw_reserved.xfeatures = xfeatures_mask; |
393 | fx_sw_reserved.xstate_size = xstate_size; | 417 | fx_sw_reserved.xstate_size = fpu_user_xstate_size; |
394 | 418 | ||
395 | if (config_enabled(CONFIG_IA32_EMULATION) || | 419 | if (config_enabled(CONFIG_IA32_EMULATION) || |
396 | config_enabled(CONFIG_X86_32)) { | 420 | config_enabled(CONFIG_X86_32)) { |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 4ea2a59483c7..680049aa4593 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <asm/fpu/internal.h> | 11 | #include <asm/fpu/internal.h> |
12 | #include <asm/fpu/signal.h> | 12 | #include <asm/fpu/signal.h> |
13 | #include <asm/fpu/regset.h> | 13 | #include <asm/fpu/regset.h> |
14 | #include <asm/fpu/xstate.h> | ||
14 | 15 | ||
15 | #include <asm/tlbflush.h> | 16 | #include <asm/tlbflush.h> |
16 | 17 | ||
@@ -44,6 +45,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = | |||
44 | static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; | 45 | static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; |
45 | 46 | ||
46 | /* | 47 | /* |
48 | * The XSAVE area of kernel can be in standard or compacted format; | ||
49 | * it is always in standard format for user mode. This is the user | ||
50 | * mode standard format size used for signal and ptrace frames. | ||
51 | */ | ||
52 | unsigned int fpu_user_xstate_size; | ||
53 | |||
54 | /* | ||
47 | * Clear all of the X86_FEATURE_* bits that are unavailable | 55 | * Clear all of the X86_FEATURE_* bits that are unavailable |
48 | * when the CPU has no XSAVE support. | 56 | * when the CPU has no XSAVE support. |
49 | */ | 57 | */ |
@@ -105,6 +113,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) | |||
105 | } | 113 | } |
106 | EXPORT_SYMBOL_GPL(cpu_has_xfeatures); | 114 | EXPORT_SYMBOL_GPL(cpu_has_xfeatures); |
107 | 115 | ||
116 | static int xfeature_is_supervisor(int xfeature_nr) | ||
117 | { | ||
118 | /* | ||
119 | * We currently do not support supervisor states, but if | ||
120 | * we did, we could find out like this. | ||
121 | * | ||
122 | * SDM says: If state component 'i' is a user state component, | ||
123 | * ECX[0] return 0; if state component i is a supervisor | ||
124 | * state component, ECX[0] returns 1. | ||
125 | */ | ||
126 | u32 eax, ebx, ecx, edx; | ||
127 | |||
128 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
129 | return !!(ecx & 1); | ||
130 | } | ||
131 | |||
132 | static int xfeature_is_user(int xfeature_nr) | ||
133 | { | ||
134 | return !xfeature_is_supervisor(xfeature_nr); | ||
135 | } | ||
136 | |||
108 | /* | 137 | /* |
109 | * When executing XSAVEOPT (or other optimized XSAVE instructions), if | 138 | * When executing XSAVEOPT (or other optimized XSAVE instructions), if |
110 | * a processor implementation detects that an FPU state component is still | 139 | * a processor implementation detects that an FPU state component is still |
@@ -171,7 +200,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) | |||
171 | */ | 200 | */ |
172 | while (xfeatures) { | 201 | while (xfeatures) { |
173 | if (xfeatures & 0x1) { | 202 | if (xfeatures & 0x1) { |
174 | int offset = xstate_offsets[feature_bit]; | 203 | int offset = xstate_comp_offsets[feature_bit]; |
175 | int size = xstate_sizes[feature_bit]; | 204 | int size = xstate_sizes[feature_bit]; |
176 | 205 | ||
177 | memcpy((void *)fx + offset, | 206 | memcpy((void *)fx + offset, |
@@ -192,6 +221,15 @@ void fpu__init_cpu_xstate(void) | |||
192 | { | 221 | { |
193 | if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) | 222 | if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) |
194 | return; | 223 | return; |
224 | /* | ||
225 | * Make it clear that XSAVES supervisor states are not yet | ||
226 | * implemented should anyone expect it to work by changing | ||
227 | * bits in XFEATURE_MASK_* macros and XCR0. | ||
228 | */ | ||
229 | WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), | ||
230 | "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); | ||
231 | |||
232 | xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; | ||
195 | 233 | ||
196 | cr4_set_bits(X86_CR4_OSXSAVE); | 234 | cr4_set_bits(X86_CR4_OSXSAVE); |
197 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); | 235 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); |
@@ -217,13 +255,29 @@ static void __init setup_xstate_features(void) | |||
217 | /* start at the beginnning of the "extended state" */ | 255 | /* start at the beginnning of the "extended state" */ |
218 | unsigned int last_good_offset = offsetof(struct xregs_state, | 256 | unsigned int last_good_offset = offsetof(struct xregs_state, |
219 | extended_state_area); | 257 | extended_state_area); |
258 | /* | ||
259 | * The FP xstates and SSE xstates are legacy states. They are always | ||
260 | * in the fixed offsets in the xsave area in either compacted form | ||
261 | * or standard form. | ||
262 | */ | ||
263 | xstate_offsets[0] = 0; | ||
264 | xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space); | ||
265 | xstate_offsets[1] = xstate_sizes[0]; | ||
266 | xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space); | ||
220 | 267 | ||
221 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { | 268 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { |
222 | if (!xfeature_enabled(i)) | 269 | if (!xfeature_enabled(i)) |
223 | continue; | 270 | continue; |
224 | 271 | ||
225 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | 272 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); |
226 | xstate_offsets[i] = ebx; | 273 | |
274 | /* | ||
275 | * If an xfeature is supervisor state, the offset | ||
276 | * in EBX is invalid. We leave it to -1. | ||
277 | */ | ||
278 | if (xfeature_is_user(i)) | ||
279 | xstate_offsets[i] = ebx; | ||
280 | |||
227 | xstate_sizes[i] = eax; | 281 | xstate_sizes[i] = eax; |
228 | /* | 282 | /* |
229 | * In our xstate size checks, we assume that the | 283 | * In our xstate size checks, we assume that the |
@@ -233,8 +287,6 @@ static void __init setup_xstate_features(void) | |||
233 | WARN_ONCE(last_good_offset > xstate_offsets[i], | 287 | WARN_ONCE(last_good_offset > xstate_offsets[i], |
234 | "x86/fpu: misordered xstate at %d\n", last_good_offset); | 288 | "x86/fpu: misordered xstate at %d\n", last_good_offset); |
235 | last_good_offset = xstate_offsets[i]; | 289 | last_good_offset = xstate_offsets[i]; |
236 | |||
237 | printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); | ||
238 | } | 290 | } |
239 | } | 291 | } |
240 | 292 | ||
@@ -263,6 +315,33 @@ static void __init print_xstate_features(void) | |||
263 | } | 315 | } |
264 | 316 | ||
265 | /* | 317 | /* |
318 | * This check is important because it is easy to get XSTATE_* | ||
319 | * confused with XSTATE_BIT_*. | ||
320 | */ | ||
321 | #define CHECK_XFEATURE(nr) do { \ | ||
322 | WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ | ||
323 | WARN_ON(nr >= XFEATURE_MAX); \ | ||
324 | } while (0) | ||
325 | |||
326 | /* | ||
327 | * We could cache this like xstate_size[], but we only use | ||
328 | * it here, so it would be a waste of space. | ||
329 | */ | ||
330 | static int xfeature_is_aligned(int xfeature_nr) | ||
331 | { | ||
332 | u32 eax, ebx, ecx, edx; | ||
333 | |||
334 | CHECK_XFEATURE(xfeature_nr); | ||
335 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
336 | /* | ||
337 | * The value returned by ECX[1] indicates the alignment | ||
338 | * of state component 'i' when the compacted format | ||
339 | * of the extended region of an XSAVE area is used: | ||
340 | */ | ||
341 | return !!(ecx & 2); | ||
342 | } | ||
343 | |||
344 | /* | ||
266 | * This function sets up offsets and sizes of all extended states in | 345 | * This function sets up offsets and sizes of all extended states in |
267 | * xsave area. This supports both standard format and compacted format | 346 | * xsave area. This supports both standard format and compacted format |
268 | * of the xsave aread. | 347 | * of the xsave aread. |
@@ -299,10 +378,29 @@ static void __init setup_xstate_comp(void) | |||
299 | else | 378 | else |
300 | xstate_comp_sizes[i] = 0; | 379 | xstate_comp_sizes[i] = 0; |
301 | 380 | ||
302 | if (i > FIRST_EXTENDED_XFEATURE) | 381 | if (i > FIRST_EXTENDED_XFEATURE) { |
303 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] | 382 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] |
304 | + xstate_comp_sizes[i-1]; | 383 | + xstate_comp_sizes[i-1]; |
305 | 384 | ||
385 | if (xfeature_is_aligned(i)) | ||
386 | xstate_comp_offsets[i] = | ||
387 | ALIGN(xstate_comp_offsets[i], 64); | ||
388 | } | ||
389 | } | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Print out xstate component offsets and sizes | ||
394 | */ | ||
395 | static void __init print_xstate_offset_size(void) | ||
396 | { | ||
397 | int i; | ||
398 | |||
399 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { | ||
400 | if (!xfeature_enabled(i)) | ||
401 | continue; | ||
402 | pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", | ||
403 | i, xstate_comp_offsets[i], i, xstate_sizes[i]); | ||
306 | } | 404 | } |
307 | } | 405 | } |
308 | 406 | ||
@@ -322,13 +420,11 @@ static void __init setup_init_fpu_buf(void) | |||
322 | setup_xstate_features(); | 420 | setup_xstate_features(); |
323 | print_xstate_features(); | 421 | print_xstate_features(); |
324 | 422 | ||
325 | if (boot_cpu_has(X86_FEATURE_XSAVES)) { | 423 | if (boot_cpu_has(X86_FEATURE_XSAVES)) |
326 | init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; | 424 | init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; |
327 | init_fpstate.xsave.header.xfeatures = xfeatures_mask; | ||
328 | } | ||
329 | 425 | ||
330 | /* | 426 | /* |
331 | * Init all the features state with header_bv being 0x0 | 427 | * Init all the features state with header.xfeatures being 0x0 |
332 | */ | 428 | */ |
333 | copy_kernel_to_xregs_booting(&init_fpstate.xsave); | 429 | copy_kernel_to_xregs_booting(&init_fpstate.xsave); |
334 | 430 | ||
@@ -339,58 +435,19 @@ static void __init setup_init_fpu_buf(void) | |||
339 | copy_xregs_to_kernel_booting(&init_fpstate.xsave); | 435 | copy_xregs_to_kernel_booting(&init_fpstate.xsave); |
340 | } | 436 | } |
341 | 437 | ||
342 | static int xfeature_is_supervisor(int xfeature_nr) | 438 | static int xfeature_uncompacted_offset(int xfeature_nr) |
343 | { | ||
344 | /* | ||
345 | * We currently do not support supervisor states, but if | ||
346 | * we did, we could find out like this. | ||
347 | * | ||
348 | * SDM says: If state component i is a user state component, | ||
349 | * ECX[0] return 0; if state component i is a supervisor | ||
350 | * state component, ECX[0] returns 1. | ||
351 | u32 eax, ebx, ecx, edx; | ||
352 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; | ||
353 | return !!(ecx & 1); | ||
354 | */ | ||
355 | return 0; | ||
356 | } | ||
357 | /* | ||
358 | static int xfeature_is_user(int xfeature_nr) | ||
359 | { | ||
360 | return !xfeature_is_supervisor(xfeature_nr); | ||
361 | } | ||
362 | */ | ||
363 | |||
364 | /* | ||
365 | * This check is important because it is easy to get XSTATE_* | ||
366 | * confused with XSTATE_BIT_*. | ||
367 | */ | ||
368 | #define CHECK_XFEATURE(nr) do { \ | ||
369 | WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ | ||
370 | WARN_ON(nr >= XFEATURE_MAX); \ | ||
371 | } while (0) | ||
372 | |||
373 | /* | ||
374 | * We could cache this like xstate_size[], but we only use | ||
375 | * it here, so it would be a waste of space. | ||
376 | */ | ||
377 | static int xfeature_is_aligned(int xfeature_nr) | ||
378 | { | 439 | { |
379 | u32 eax, ebx, ecx, edx; | 440 | u32 eax, ebx, ecx, edx; |
380 | 441 | ||
381 | CHECK_XFEATURE(xfeature_nr); | ||
382 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
383 | /* | 442 | /* |
384 | * The value returned by ECX[1] indicates the alignment | 443 | * Only XSAVES supports supervisor states and it uses compacted |
385 | * of state component i when the compacted format | 444 | * format. Checking a supervisor state's uncompacted offset is |
386 | * of the extended region of an XSAVE area is used | 445 | * an error. |
387 | */ | 446 | */ |
388 | return !!(ecx & 2); | 447 | if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) { |
389 | } | 448 | WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); |
390 | 449 | return -1; | |
391 | static int xfeature_uncompacted_offset(int xfeature_nr) | 450 | } |
392 | { | ||
393 | u32 eax, ebx, ecx, edx; | ||
394 | 451 | ||
395 | CHECK_XFEATURE(xfeature_nr); | 452 | CHECK_XFEATURE(xfeature_nr); |
396 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | 453 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); |
@@ -415,7 +472,7 @@ static int xfeature_size(int xfeature_nr) | |||
415 | * that it is obvious which aspect of 'XSAVES' is being handled | 472 | * that it is obvious which aspect of 'XSAVES' is being handled |
416 | * by the calling code. | 473 | * by the calling code. |
417 | */ | 474 | */ |
418 | static int using_compacted_format(void) | 475 | int using_compacted_format(void) |
419 | { | 476 | { |
420 | return boot_cpu_has(X86_FEATURE_XSAVES); | 477 | return boot_cpu_has(X86_FEATURE_XSAVES); |
421 | } | 478 | } |
@@ -530,11 +587,12 @@ static void do_extra_xstate_size_checks(void) | |||
530 | */ | 587 | */ |
531 | paranoid_xstate_size += xfeature_size(i); | 588 | paranoid_xstate_size += xfeature_size(i); |
532 | } | 589 | } |
533 | XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); | 590 | XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); |
534 | } | 591 | } |
535 | 592 | ||
593 | |||
536 | /* | 594 | /* |
537 | * Calculate total size of enabled xstates in XCR0/xfeatures_mask. | 595 | * Get total size of enabled xstates in XCR0/xfeatures_mask. |
538 | * | 596 | * |
539 | * Note the SDM's wording here. "sub-function 0" only enumerates | 597 | * Note the SDM's wording here. "sub-function 0" only enumerates |
540 | * the size of the *user* states. If we use it to size a buffer | 598 | * the size of the *user* states. If we use it to size a buffer |
@@ -544,34 +602,33 @@ static void do_extra_xstate_size_checks(void) | |||
544 | * Note that we do not currently set any bits on IA32_XSS so | 602 | * Note that we do not currently set any bits on IA32_XSS so |
545 | * 'XCR0 | IA32_XSS == XCR0' for now. | 603 | * 'XCR0 | IA32_XSS == XCR0' for now. |
546 | */ | 604 | */ |
547 | static unsigned int __init calculate_xstate_size(void) | 605 | static unsigned int __init get_xsaves_size(void) |
548 | { | 606 | { |
549 | unsigned int eax, ebx, ecx, edx; | 607 | unsigned int eax, ebx, ecx, edx; |
550 | unsigned int calculated_xstate_size; | 608 | /* |
609 | * - CPUID function 0DH, sub-function 1: | ||
610 | * EBX enumerates the size (in bytes) required by | ||
611 | * the XSAVES instruction for an XSAVE area | ||
612 | * containing all the state components | ||
613 | * corresponding to bits currently set in | ||
614 | * XCR0 | IA32_XSS. | ||
615 | */ | ||
616 | cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); | ||
617 | return ebx; | ||
618 | } | ||
551 | 619 | ||
552 | if (!boot_cpu_has(X86_FEATURE_XSAVES)) { | 620 | static unsigned int __init get_xsave_size(void) |
553 | /* | 621 | { |
554 | * - CPUID function 0DH, sub-function 0: | 622 | unsigned int eax, ebx, ecx, edx; |
555 | * EBX enumerates the size (in bytes) required by | 623 | /* |
556 | * the XSAVE instruction for an XSAVE area | 624 | * - CPUID function 0DH, sub-function 0: |
557 | * containing all the *user* state components | 625 | * EBX enumerates the size (in bytes) required by |
558 | * corresponding to bits currently set in XCR0. | 626 | * the XSAVE instruction for an XSAVE area |
559 | */ | 627 | * containing all the *user* state components |
560 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | 628 | * corresponding to bits currently set in XCR0. |
561 | calculated_xstate_size = ebx; | 629 | */ |
562 | } else { | 630 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
563 | /* | 631 | return ebx; |
564 | * - CPUID function 0DH, sub-function 1: | ||
565 | * EBX enumerates the size (in bytes) required by | ||
566 | * the XSAVES instruction for an XSAVE area | ||
567 | * containing all the state components | ||
568 | * corresponding to bits currently set in | ||
569 | * XCR0 | IA32_XSS. | ||
570 | */ | ||
571 | cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); | ||
572 | calculated_xstate_size = ebx; | ||
573 | } | ||
574 | return calculated_xstate_size; | ||
575 | } | 632 | } |
576 | 633 | ||
577 | /* | 634 | /* |
@@ -591,7 +648,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size) | |||
591 | static int init_xstate_size(void) | 648 | static int init_xstate_size(void) |
592 | { | 649 | { |
593 | /* Recompute the context size for enabled features: */ | 650 | /* Recompute the context size for enabled features: */ |
594 | unsigned int possible_xstate_size = calculate_xstate_size(); | 651 | unsigned int possible_xstate_size; |
652 | unsigned int xsave_size; | ||
653 | |||
654 | xsave_size = get_xsave_size(); | ||
655 | |||
656 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
657 | possible_xstate_size = get_xsaves_size(); | ||
658 | else | ||
659 | possible_xstate_size = xsave_size; | ||
595 | 660 | ||
596 | /* Ensure we have the space to store all enabled: */ | 661 | /* Ensure we have the space to store all enabled: */ |
597 | if (!is_supported_xstate_size(possible_xstate_size)) | 662 | if (!is_supported_xstate_size(possible_xstate_size)) |
@@ -601,8 +666,13 @@ static int init_xstate_size(void) | |||
601 | * The size is OK, we are definitely going to use xsave, | 666 | * The size is OK, we are definitely going to use xsave, |
602 | * make it known to the world that we need more space. | 667 | * make it known to the world that we need more space. |
603 | */ | 668 | */ |
604 | xstate_size = possible_xstate_size; | 669 | fpu_kernel_xstate_size = possible_xstate_size; |
605 | do_extra_xstate_size_checks(); | 670 | do_extra_xstate_size_checks(); |
671 | |||
672 | /* | ||
673 | * User space is always in standard format. | ||
674 | */ | ||
675 | fpu_user_xstate_size = xsave_size; | ||
606 | return 0; | 676 | return 0; |
607 | } | 677 | } |
608 | 678 | ||
@@ -644,8 +714,13 @@ void __init fpu__init_system_xstate(void) | |||
644 | xfeatures_mask = eax + ((u64)edx << 32); | 714 | xfeatures_mask = eax + ((u64)edx << 32); |
645 | 715 | ||
646 | if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { | 716 | if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { |
717 | /* | ||
718 | * This indicates that something really unexpected happened | ||
719 | * with the enumeration. Disable XSAVE and try to continue | ||
720 | * booting without it. This is too early to BUG(). | ||
721 | */ | ||
647 | pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); | 722 | pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); |
648 | BUG(); | 723 | goto out_disable; |
649 | } | 724 | } |
650 | 725 | ||
651 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); | 726 | xfeatures_mask &= fpu__get_supported_xfeatures_mask(); |
@@ -653,21 +728,29 @@ void __init fpu__init_system_xstate(void) | |||
653 | /* Enable xstate instructions to be able to continue with initialization: */ | 728 | /* Enable xstate instructions to be able to continue with initialization: */ |
654 | fpu__init_cpu_xstate(); | 729 | fpu__init_cpu_xstate(); |
655 | err = init_xstate_size(); | 730 | err = init_xstate_size(); |
656 | if (err) { | 731 | if (err) |
657 | /* something went wrong, boot without any XSAVE support */ | 732 | goto out_disable; |
658 | fpu__init_disable_system_xstate(); | 733 | |
659 | return; | 734 | /* |
660 | } | 735 | * Update info used for ptrace frames; use standard-format size and no |
736 | * supervisor xstates: | ||
737 | */ | ||
738 | update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); | ||
661 | 739 | ||
662 | update_regset_xstate_info(xstate_size, xfeatures_mask); | ||
663 | fpu__init_prepare_fx_sw_frame(); | 740 | fpu__init_prepare_fx_sw_frame(); |
664 | setup_init_fpu_buf(); | 741 | setup_init_fpu_buf(); |
665 | setup_xstate_comp(); | 742 | setup_xstate_comp(); |
743 | print_xstate_offset_size(); | ||
666 | 744 | ||
667 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", | 745 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", |
668 | xfeatures_mask, | 746 | xfeatures_mask, |
669 | xstate_size, | 747 | fpu_kernel_xstate_size, |
670 | boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); | 748 | boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); |
749 | return; | ||
750 | |||
751 | out_disable: | ||
752 | /* something went wrong, try to boot without any XSAVE support */ | ||
753 | fpu__init_disable_system_xstate(); | ||
671 | } | 754 | } |
672 | 755 | ||
673 | /* | 756 | /* |
@@ -693,6 +776,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) | |||
693 | { | 776 | { |
694 | int feature_nr = fls64(xstate_feature_mask) - 1; | 777 | int feature_nr = fls64(xstate_feature_mask) - 1; |
695 | 778 | ||
779 | if (!xfeature_enabled(feature_nr)) { | ||
780 | WARN_ON_FPU(1); | ||
781 | return NULL; | ||
782 | } | ||
783 | |||
696 | return (void *)xsave + xstate_comp_offsets[feature_nr]; | 784 | return (void *)xsave + xstate_comp_offsets[feature_nr]; |
697 | } | 785 | } |
698 | /* | 786 | /* |
@@ -887,16 +975,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
887 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) | 975 | if (!boot_cpu_has(X86_FEATURE_OSPKE)) |
888 | return -EINVAL; | 976 | return -EINVAL; |
889 | 977 | ||
890 | /* Set the bits we need in PKRU */ | 978 | /* Set the bits we need in PKRU: */ |
891 | if (init_val & PKEY_DISABLE_ACCESS) | 979 | if (init_val & PKEY_DISABLE_ACCESS) |
892 | new_pkru_bits |= PKRU_AD_BIT; | 980 | new_pkru_bits |= PKRU_AD_BIT; |
893 | if (init_val & PKEY_DISABLE_WRITE) | 981 | if (init_val & PKEY_DISABLE_WRITE) |
894 | new_pkru_bits |= PKRU_WD_BIT; | 982 | new_pkru_bits |= PKRU_WD_BIT; |
895 | 983 | ||
896 | /* Shift the bits in to the correct place in PKRU for pkey. */ | 984 | /* Shift the bits in to the correct place in PKRU for pkey: */ |
897 | new_pkru_bits <<= pkey_shift; | 985 | new_pkru_bits <<= pkey_shift; |
898 | 986 | ||
899 | /* Locate old copy of the state in the xsave buffer */ | 987 | /* Locate old copy of the state in the xsave buffer: */ |
900 | old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); | 988 | old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); |
901 | 989 | ||
902 | /* | 990 | /* |
@@ -909,9 +997,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
909 | else | 997 | else |
910 | new_pkru_state.pkru = old_pkru_state->pkru; | 998 | new_pkru_state.pkru = old_pkru_state->pkru; |
911 | 999 | ||
912 | /* mask off any old bits in place */ | 1000 | /* Mask off any old bits in place: */ |
913 | new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); | 1001 | new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); |
914 | /* Set the newly-requested bits */ | 1002 | |
1003 | /* Set the newly-requested bits: */ | ||
915 | new_pkru_state.pkru |= new_pkru_bits; | 1004 | new_pkru_state.pkru |= new_pkru_bits; |
916 | 1005 | ||
917 | /* | 1006 | /* |
@@ -925,8 +1014,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, | |||
925 | */ | 1014 | */ |
926 | new_pkru_state.pad = 0; | 1015 | new_pkru_state.pad = 0; |
927 | 1016 | ||
928 | fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, | 1017 | fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state)); |
929 | sizeof(new_pkru_state)); | 1018 | |
1019 | return 0; | ||
1020 | } | ||
1021 | |||
1022 | /* | ||
1023 | * This is similar to user_regset_copyout(), but will not add offset to | ||
1024 | * the source data pointer or increment pos, count, kbuf, and ubuf. | ||
1025 | */ | ||
1026 | static inline int xstate_copyout(unsigned int pos, unsigned int count, | ||
1027 | void *kbuf, void __user *ubuf, | ||
1028 | const void *data, const int start_pos, | ||
1029 | const int end_pos) | ||
1030 | { | ||
1031 | if ((count == 0) || (pos < start_pos)) | ||
1032 | return 0; | ||
1033 | |||
1034 | if (end_pos < 0 || pos < end_pos) { | ||
1035 | unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); | ||
1036 | |||
1037 | if (kbuf) { | ||
1038 | memcpy(kbuf + pos, data, copy); | ||
1039 | } else { | ||
1040 | if (__copy_to_user(ubuf + pos, data, copy)) | ||
1041 | return -EFAULT; | ||
1042 | } | ||
1043 | } | ||
1044 | return 0; | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1048 | * Convert from kernel XSAVES compacted format to standard format and copy | ||
1049 | * to a ptrace buffer. It supports partial copy but pos always starts from | ||
1050 | * zero. This is called from xstateregs_get() and there we check the CPU | ||
1051 | * has XSAVES. | ||
1052 | */ | ||
1053 | int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, | ||
1054 | void __user *ubuf, struct xregs_state *xsave) | ||
1055 | { | ||
1056 | unsigned int offset, size; | ||
1057 | int ret, i; | ||
1058 | struct xstate_header header; | ||
1059 | |||
1060 | /* | ||
1061 | * Currently copy_regset_to_user() starts from pos 0: | ||
1062 | */ | ||
1063 | if (unlikely(pos != 0)) | ||
1064 | return -EFAULT; | ||
1065 | |||
1066 | /* | ||
1067 | * The destination is a ptrace buffer; we put in only user xstates: | ||
1068 | */ | ||
1069 | memset(&header, 0, sizeof(header)); | ||
1070 | header.xfeatures = xsave->header.xfeatures; | ||
1071 | header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; | ||
1072 | |||
1073 | /* | ||
1074 | * Copy xregs_state->header: | ||
1075 | */ | ||
1076 | offset = offsetof(struct xregs_state, header); | ||
1077 | size = sizeof(header); | ||
1078 | |||
1079 | ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); | ||
1080 | |||
1081 | if (ret) | ||
1082 | return ret; | ||
1083 | |||
1084 | for (i = 0; i < XFEATURE_MAX; i++) { | ||
1085 | /* | ||
1086 | * Copy only in-use xstates: | ||
1087 | */ | ||
1088 | if ((header.xfeatures >> i) & 1) { | ||
1089 | void *src = __raw_xsave_addr(xsave, 1 << i); | ||
1090 | |||
1091 | offset = xstate_offsets[i]; | ||
1092 | size = xstate_sizes[i]; | ||
1093 | |||
1094 | ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); | ||
1095 | |||
1096 | if (ret) | ||
1097 | return ret; | ||
1098 | |||
1099 | if (offset + size >= count) | ||
1100 | break; | ||
1101 | } | ||
1102 | |||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * Fill xsave->i387.sw_reserved value for ptrace frame: | ||
1107 | */ | ||
1108 | offset = offsetof(struct fxregs_state, sw_reserved); | ||
1109 | size = sizeof(xstate_fx_sw_bytes); | ||
1110 | |||
1111 | ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); | ||
1112 | |||
1113 | if (ret) | ||
1114 | return ret; | ||
1115 | |||
1116 | return 0; | ||
1117 | } | ||
1118 | |||
1119 | /* | ||
1120 | * Convert from a ptrace standard-format buffer to kernel XSAVES format | ||
1121 | * and copy to the target thread. This is called from xstateregs_set() and | ||
1122 | * there we check the CPU has XSAVES and a whole standard-sized buffer | ||
1123 | * exists. | ||
1124 | */ | ||
1125 | int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, | ||
1126 | struct xregs_state *xsave) | ||
1127 | { | ||
1128 | unsigned int offset, size; | ||
1129 | int i; | ||
1130 | u64 xfeatures; | ||
1131 | u64 allowed_features; | ||
1132 | |||
1133 | offset = offsetof(struct xregs_state, header); | ||
1134 | size = sizeof(xfeatures); | ||
1135 | |||
1136 | if (kbuf) { | ||
1137 | memcpy(&xfeatures, kbuf + offset, size); | ||
1138 | } else { | ||
1139 | if (__copy_from_user(&xfeatures, ubuf + offset, size)) | ||
1140 | return -EFAULT; | ||
1141 | } | ||
1142 | |||
1143 | /* | ||
1144 | * Reject if the user sets any disabled or supervisor features: | ||
1145 | */ | ||
1146 | allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; | ||
1147 | |||
1148 | if (xfeatures & ~allowed_features) | ||
1149 | return -EINVAL; | ||
1150 | |||
1151 | for (i = 0; i < XFEATURE_MAX; i++) { | ||
1152 | u64 mask = ((u64)1 << i); | ||
1153 | |||
1154 | if (xfeatures & mask) { | ||
1155 | void *dst = __raw_xsave_addr(xsave, 1 << i); | ||
1156 | |||
1157 | offset = xstate_offsets[i]; | ||
1158 | size = xstate_sizes[i]; | ||
1159 | |||
1160 | if (kbuf) { | ||
1161 | memcpy(dst, kbuf + offset, size); | ||
1162 | } else { | ||
1163 | if (__copy_from_user(dst, ubuf + offset, size)) | ||
1164 | return -EFAULT; | ||
1165 | } | ||
1166 | } | ||
1167 | } | ||
1168 | |||
1169 | /* | ||
1170 | * The state that came in from userspace was user-state only. | ||
1171 | * Mask all the user states out of 'xfeatures': | ||
1172 | */ | ||
1173 | xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; | ||
1174 | |||
1175 | /* | ||
1176 | * Add back in the features that came in from userspace: | ||
1177 | */ | ||
1178 | xsave->header.xfeatures |= xfeatures; | ||
930 | 1179 | ||
931 | return 0; | 1180 | return 0; |
932 | } | 1181 | } |