diff options
Diffstat (limited to 'arch/x86/include/asm/i387.h')
-rw-r--r-- | arch/x86/include/asm/i387.h | 284 |
1 files changed, 229 insertions, 55 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6919e936345b..a850b4d8d14d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -29,8 +29,8 @@ extern unsigned int sig_xstate_size; | |||
29 | extern void fpu_init(void); | 29 | extern void fpu_init(void); |
30 | extern void mxcsr_feature_mask_init(void); | 30 | extern void mxcsr_feature_mask_init(void); |
31 | extern int init_fpu(struct task_struct *child); | 31 | extern int init_fpu(struct task_struct *child); |
32 | extern asmlinkage void math_state_restore(void); | 32 | extern void __math_state_restore(struct task_struct *); |
33 | extern void __math_state_restore(void); | 33 | extern void math_state_restore(void); |
34 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 34 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
35 | 35 | ||
36 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 36 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
@@ -212,19 +212,11 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
212 | 212 | ||
213 | #endif /* CONFIG_X86_64 */ | 213 | #endif /* CONFIG_X86_64 */ |
214 | 214 | ||
215 | /* We need a safe address that is cheap to find and that is already | ||
216 | in L1 during context switch. The best choices are unfortunately | ||
217 | different for UP and SMP */ | ||
218 | #ifdef CONFIG_SMP | ||
219 | #define safe_address (__per_cpu_offset[0]) | ||
220 | #else | ||
221 | #define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER]) | ||
222 | #endif | ||
223 | |||
224 | /* | 215 | /* |
225 | * These must be called with preempt disabled | 216 | * These must be called with preempt disabled. Returns |
217 | * 'true' if the FPU state is still intact. | ||
226 | */ | 218 | */ |
227 | static inline void fpu_save_init(struct fpu *fpu) | 219 | static inline int fpu_save_init(struct fpu *fpu) |
228 | { | 220 | { |
229 | if (use_xsave()) { | 221 | if (use_xsave()) { |
230 | fpu_xsave(fpu); | 222 | fpu_xsave(fpu); |
@@ -233,33 +225,33 @@ static inline void fpu_save_init(struct fpu *fpu) | |||
233 | * xsave header may indicate the init state of the FP. | 225 | * xsave header may indicate the init state of the FP. |
234 | */ | 226 | */ |
235 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | 227 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
236 | return; | 228 | return 1; |
237 | } else if (use_fxsr()) { | 229 | } else if (use_fxsr()) { |
238 | fpu_fxsave(fpu); | 230 | fpu_fxsave(fpu); |
239 | } else { | 231 | } else { |
240 | asm volatile("fnsave %[fx]; fwait" | 232 | asm volatile("fnsave %[fx]; fwait" |
241 | : [fx] "=m" (fpu->state->fsave)); | 233 | : [fx] "=m" (fpu->state->fsave)); |
242 | return; | 234 | return 0; |
243 | } | 235 | } |
244 | 236 | ||
245 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | 237 | /* |
238 | * If exceptions are pending, we need to clear them so | ||
239 | * that we don't randomly get exceptions later. | ||
240 | * | ||
241 | * FIXME! Is this perhaps only true for the old-style | ||
242 | * irq13 case? Maybe we could leave the x87 state | ||
243 | * intact otherwise? | ||
244 | */ | ||
245 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
246 | asm volatile("fnclex"); | 246 | asm volatile("fnclex"); |
247 | 247 | return 0; | |
248 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 248 | } |
249 | is pending. Clear the x87 state here by setting it to fixed | 249 | return 1; |
250 | values. safe_address is a random variable that should be in L1 */ | ||
251 | alternative_input( | ||
252 | ASM_NOP8 ASM_NOP2, | ||
253 | "emms\n\t" /* clear stack tags */ | ||
254 | "fildl %P[addr]", /* set F?P to defined value */ | ||
255 | X86_FEATURE_FXSAVE_LEAK, | ||
256 | [addr] "m" (safe_address)); | ||
257 | } | 250 | } |
258 | 251 | ||
259 | static inline void __save_init_fpu(struct task_struct *tsk) | 252 | static inline int __save_init_fpu(struct task_struct *tsk) |
260 | { | 253 | { |
261 | fpu_save_init(&tsk->thread.fpu); | 254 | return fpu_save_init(&tsk->thread.fpu); |
262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
263 | } | 255 | } |
264 | 256 | ||
265 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | 257 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
@@ -281,39 +273,185 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
281 | } | 273 | } |
282 | 274 | ||
283 | /* | 275 | /* |
284 | * Signal frame handlers... | 276 | * Software FPU state helpers. Careful: these need to |
277 | * be preemption protection *and* they need to be | ||
278 | * properly paired with the CR0.TS changes! | ||
285 | */ | 279 | */ |
286 | extern int save_i387_xstate(void __user *buf); | 280 | static inline int __thread_has_fpu(struct task_struct *tsk) |
287 | extern int restore_i387_xstate(void __user *buf); | 281 | { |
282 | return tsk->thread.has_fpu; | ||
283 | } | ||
288 | 284 | ||
289 | static inline void __unlazy_fpu(struct task_struct *tsk) | 285 | /* Must be paired with an 'stts' after! */ |
286 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | ||
290 | { | 287 | { |
291 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 288 | tsk->thread.has_fpu = 0; |
292 | __save_init_fpu(tsk); | 289 | } |
293 | stts(); | 290 | |
294 | } else | 291 | /* Must be paired with a 'clts' before! */ |
295 | tsk->fpu_counter = 0; | 292 | static inline void __thread_set_has_fpu(struct task_struct *tsk) |
293 | { | ||
294 | tsk->thread.has_fpu = 1; | ||
296 | } | 295 | } |
297 | 296 | ||
297 | /* | ||
298 | * Encapsulate the CR0.TS handling together with the | ||
299 | * software flag. | ||
300 | * | ||
301 | * These generally need preemption protection to work, | ||
302 | * do try to avoid using these on their own. | ||
303 | */ | ||
304 | static inline void __thread_fpu_end(struct task_struct *tsk) | ||
305 | { | ||
306 | __thread_clear_has_fpu(tsk); | ||
307 | stts(); | ||
308 | } | ||
309 | |||
310 | static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
311 | { | ||
312 | clts(); | ||
313 | __thread_set_has_fpu(tsk); | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * FPU state switching for scheduling. | ||
318 | * | ||
319 | * This is a two-stage process: | ||
320 | * | ||
321 | * - switch_fpu_prepare() saves the old state and | ||
322 | * sets the new state of the CR0.TS bit. This is | ||
323 | * done within the context of the old process. | ||
324 | * | ||
325 | * - switch_fpu_finish() restores the new state as | ||
326 | * necessary. | ||
327 | */ | ||
328 | typedef struct { int preload; } fpu_switch_t; | ||
329 | |||
330 | /* | ||
331 | * FIXME! We could do a totally lazy restore, but we need to | ||
332 | * add a per-cpu "this was the task that last touched the FPU | ||
333 | * on this CPU" variable, and the task needs to have a "I last | ||
334 | * touched the FPU on this CPU" and check them. | ||
335 | * | ||
336 | * We don't do that yet, so "fpu_lazy_restore()" always returns | ||
337 | * false, but some day.. | ||
338 | */ | ||
339 | #define fpu_lazy_restore(tsk) (0) | ||
340 | #define fpu_lazy_state_intact(tsk) do { } while (0) | ||
341 | |||
342 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) | ||
343 | { | ||
344 | fpu_switch_t fpu; | ||
345 | |||
346 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | ||
347 | if (__thread_has_fpu(old)) { | ||
348 | if (__save_init_fpu(old)) | ||
349 | fpu_lazy_state_intact(old); | ||
350 | __thread_clear_has_fpu(old); | ||
351 | old->fpu_counter++; | ||
352 | |||
353 | /* Don't change CR0.TS if we just switch! */ | ||
354 | if (fpu.preload) { | ||
355 | __thread_set_has_fpu(new); | ||
356 | prefetch(new->thread.fpu.state); | ||
357 | } else | ||
358 | stts(); | ||
359 | } else { | ||
360 | old->fpu_counter = 0; | ||
361 | if (fpu.preload) { | ||
362 | if (fpu_lazy_restore(new)) | ||
363 | fpu.preload = 0; | ||
364 | else | ||
365 | prefetch(new->thread.fpu.state); | ||
366 | __thread_fpu_begin(new); | ||
367 | } | ||
368 | } | ||
369 | return fpu; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * By the time this gets called, we've already cleared CR0.TS and | ||
374 | * given the process the FPU if we are going to preload the FPU | ||
375 | * state - all we need to do is to conditionally restore the register | ||
376 | * state itself. | ||
377 | */ | ||
378 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
379 | { | ||
380 | if (fpu.preload) | ||
381 | __math_state_restore(new); | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Signal frame handlers... | ||
386 | */ | ||
387 | extern int save_i387_xstate(void __user *buf); | ||
388 | extern int restore_i387_xstate(void __user *buf); | ||
389 | |||
298 | static inline void __clear_fpu(struct task_struct *tsk) | 390 | static inline void __clear_fpu(struct task_struct *tsk) |
299 | { | 391 | { |
300 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 392 | if (__thread_has_fpu(tsk)) { |
301 | /* Ignore delayed exceptions from user space */ | 393 | /* Ignore delayed exceptions from user space */ |
302 | asm volatile("1: fwait\n" | 394 | asm volatile("1: fwait\n" |
303 | "2:\n" | 395 | "2:\n" |
304 | _ASM_EXTABLE(1b, 2b)); | 396 | _ASM_EXTABLE(1b, 2b)); |
305 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 397 | __thread_fpu_end(tsk); |
306 | stts(); | ||
307 | } | 398 | } |
308 | } | 399 | } |
309 | 400 | ||
401 | /* | ||
402 | * Were we in an interrupt that interrupted kernel mode? | ||
403 | * | ||
404 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
405 | * pair does nothing at all: the thread must not have fpu (so | ||
406 | * that we don't try to save the FPU state), and TS must | ||
407 | * be set (so that the clts/stts pair does nothing that is | ||
408 | * visible in the interrupted kernel thread). | ||
409 | */ | ||
410 | static inline bool interrupted_kernel_fpu_idle(void) | ||
411 | { | ||
412 | return !__thread_has_fpu(current) && | ||
413 | (read_cr0() & X86_CR0_TS); | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * Were we in user mode (or vm86 mode) when we were | ||
418 | * interrupted? | ||
419 | * | ||
420 | * Doing kernel_fpu_begin/end() is ok if we are running | ||
421 | * in an interrupt context from user mode - we'll just | ||
422 | * save the FPU state as required. | ||
423 | */ | ||
424 | static inline bool interrupted_user_mode(void) | ||
425 | { | ||
426 | struct pt_regs *regs = get_irq_regs(); | ||
427 | return regs && user_mode_vm(regs); | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * Can we use the FPU in kernel mode with the | ||
432 | * whole "kernel_fpu_begin/end()" sequence? | ||
433 | * | ||
434 | * It's always ok in process context (ie "not interrupt") | ||
435 | * but it is sometimes ok even from an irq. | ||
436 | */ | ||
437 | static inline bool irq_fpu_usable(void) | ||
438 | { | ||
439 | return !in_interrupt() || | ||
440 | interrupted_user_mode() || | ||
441 | interrupted_kernel_fpu_idle(); | ||
442 | } | ||
443 | |||
310 | static inline void kernel_fpu_begin(void) | 444 | static inline void kernel_fpu_begin(void) |
311 | { | 445 | { |
312 | struct thread_info *me = current_thread_info(); | 446 | struct task_struct *me = current; |
447 | |||
448 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
313 | preempt_disable(); | 449 | preempt_disable(); |
314 | if (me->status & TS_USEDFPU) | 450 | if (__thread_has_fpu(me)) { |
315 | __save_init_fpu(me->task); | 451 | __save_init_fpu(me); |
316 | else | 452 | __thread_clear_has_fpu(me); |
453 | /* We do 'stts()' in kernel_fpu_end() */ | ||
454 | } else | ||
317 | clts(); | 455 | clts(); |
318 | } | 456 | } |
319 | 457 | ||
@@ -323,14 +461,6 @@ static inline void kernel_fpu_end(void) | |||
323 | preempt_enable(); | 461 | preempt_enable(); |
324 | } | 462 | } |
325 | 463 | ||
326 | static inline bool irq_fpu_usable(void) | ||
327 | { | ||
328 | struct pt_regs *regs; | ||
329 | |||
330 | return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
331 | user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
332 | } | ||
333 | |||
334 | /* | 464 | /* |
335 | * Some instructions like VIA's padlock instructions generate a spurious | 465 | * Some instructions like VIA's padlock instructions generate a spurious |
336 | * DNA fault but don't modify SSE registers. And these instructions | 466 | * DNA fault but don't modify SSE registers. And these instructions |
@@ -363,20 +493,64 @@ static inline void irq_ts_restore(int TS_state) | |||
363 | } | 493 | } |
364 | 494 | ||
365 | /* | 495 | /* |
496 | * The question "does this thread have fpu access?" | ||
497 | * is slightly racy, since preemption could come in | ||
498 | * and revoke it immediately after the test. | ||
499 | * | ||
500 | * However, even in that very unlikely scenario, | ||
501 | * we can just assume we have FPU access - typically | ||
502 | * to save the FP state - we'll just take a #NM | ||
503 | * fault and get the FPU access back. | ||
504 | * | ||
505 | * The actual user_fpu_begin/end() functions | ||
506 | * need to be preemption-safe, though. | ||
507 | * | ||
508 | * NOTE! user_fpu_end() must be used only after you | ||
509 | * have saved the FP state, and user_fpu_begin() must | ||
510 | * be used only immediately before restoring it. | ||
511 | * These functions do not do any save/restore on | ||
512 | * their own. | ||
513 | */ | ||
514 | static inline int user_has_fpu(void) | ||
515 | { | ||
516 | return __thread_has_fpu(current); | ||
517 | } | ||
518 | |||
519 | static inline void user_fpu_end(void) | ||
520 | { | ||
521 | preempt_disable(); | ||
522 | __thread_fpu_end(current); | ||
523 | preempt_enable(); | ||
524 | } | ||
525 | |||
526 | static inline void user_fpu_begin(void) | ||
527 | { | ||
528 | preempt_disable(); | ||
529 | if (!user_has_fpu()) | ||
530 | __thread_fpu_begin(current); | ||
531 | preempt_enable(); | ||
532 | } | ||
533 | |||
534 | /* | ||
366 | * These disable preemption on their own and are safe | 535 | * These disable preemption on their own and are safe |
367 | */ | 536 | */ |
368 | static inline void save_init_fpu(struct task_struct *tsk) | 537 | static inline void save_init_fpu(struct task_struct *tsk) |
369 | { | 538 | { |
539 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
370 | preempt_disable(); | 540 | preempt_disable(); |
371 | __save_init_fpu(tsk); | 541 | __save_init_fpu(tsk); |
372 | stts(); | 542 | __thread_fpu_end(tsk); |
373 | preempt_enable(); | 543 | preempt_enable(); |
374 | } | 544 | } |
375 | 545 | ||
376 | static inline void unlazy_fpu(struct task_struct *tsk) | 546 | static inline void unlazy_fpu(struct task_struct *tsk) |
377 | { | 547 | { |
378 | preempt_disable(); | 548 | preempt_disable(); |
379 | __unlazy_fpu(tsk); | 549 | if (__thread_has_fpu(tsk)) { |
550 | __save_init_fpu(tsk); | ||
551 | __thread_fpu_end(tsk); | ||
552 | } else | ||
553 | tsk->fpu_counter = 0; | ||
380 | preempt_enable(); | 554 | preempt_enable(); |
381 | } | 555 | } |
382 | 556 | ||