diff options
Diffstat (limited to 'arch/x86/include/asm/i387.h')
-rw-r--r-- | arch/x86/include/asm/i387.h | 307 |
1 files changed, 252 insertions, 55 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6919e936345b..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -29,10 +29,11 @@ extern unsigned int sig_xstate_size; | |||
29 | extern void fpu_init(void); | 29 | extern void fpu_init(void); |
30 | extern void mxcsr_feature_mask_init(void); | 30 | extern void mxcsr_feature_mask_init(void); |
31 | extern int init_fpu(struct task_struct *child); | 31 | extern int init_fpu(struct task_struct *child); |
32 | extern asmlinkage void math_state_restore(void); | 32 | extern void math_state_restore(void); |
33 | extern void __math_state_restore(void); | ||
34 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
35 | 34 | ||
35 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
36 | |||
36 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 37 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
37 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 38 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
38 | xstateregs_get; | 39 | xstateregs_get; |
@@ -212,19 +213,11 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
212 | 213 | ||
213 | #endif /* CONFIG_X86_64 */ | 214 | #endif /* CONFIG_X86_64 */ |
214 | 215 | ||
215 | /* We need a safe address that is cheap to find and that is already | ||
216 | in L1 during context switch. The best choices are unfortunately | ||
217 | different for UP and SMP */ | ||
218 | #ifdef CONFIG_SMP | ||
219 | #define safe_address (__per_cpu_offset[0]) | ||
220 | #else | ||
221 | #define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER]) | ||
222 | #endif | ||
223 | |||
224 | /* | 216 | /* |
225 | * These must be called with preempt disabled | 217 | * These must be called with preempt disabled. Returns |
218 | * 'true' if the FPU state is still intact. | ||
226 | */ | 219 | */ |
227 | static inline void fpu_save_init(struct fpu *fpu) | 220 | static inline int fpu_save_init(struct fpu *fpu) |
228 | { | 221 | { |
229 | if (use_xsave()) { | 222 | if (use_xsave()) { |
230 | fpu_xsave(fpu); | 223 | fpu_xsave(fpu); |
@@ -233,33 +226,33 @@ static inline void fpu_save_init(struct fpu *fpu) | |||
233 | * xsave header may indicate the init state of the FP. | 226 | * xsave header may indicate the init state of the FP. |
234 | */ | 227 | */ |
235 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | 228 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
236 | return; | 229 | return 1; |
237 | } else if (use_fxsr()) { | 230 | } else if (use_fxsr()) { |
238 | fpu_fxsave(fpu); | 231 | fpu_fxsave(fpu); |
239 | } else { | 232 | } else { |
240 | asm volatile("fnsave %[fx]; fwait" | 233 | asm volatile("fnsave %[fx]; fwait" |
241 | : [fx] "=m" (fpu->state->fsave)); | 234 | : [fx] "=m" (fpu->state->fsave)); |
242 | return; | 235 | return 0; |
243 | } | 236 | } |
244 | 237 | ||
245 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | 238 | /* |
239 | * If exceptions are pending, we need to clear them so | ||
240 | * that we don't randomly get exceptions later. | ||
241 | * | ||
242 | * FIXME! Is this perhaps only true for the old-style | ||
243 | * irq13 case? Maybe we could leave the x87 state | ||
244 | * intact otherwise? | ||
245 | */ | ||
246 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
246 | asm volatile("fnclex"); | 247 | asm volatile("fnclex"); |
247 | 248 | return 0; | |
248 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 249 | } |
249 | is pending. Clear the x87 state here by setting it to fixed | 250 | return 1; |
250 | values. safe_address is a random variable that should be in L1 */ | ||
251 | alternative_input( | ||
252 | ASM_NOP8 ASM_NOP2, | ||
253 | "emms\n\t" /* clear stack tags */ | ||
254 | "fildl %P[addr]", /* set F?P to defined value */ | ||
255 | X86_FEATURE_FXSAVE_LEAK, | ||
256 | [addr] "m" (safe_address)); | ||
257 | } | 251 | } |
258 | 252 | ||
259 | static inline void __save_init_fpu(struct task_struct *tsk) | 253 | static inline int __save_init_fpu(struct task_struct *tsk) |
260 | { | 254 | { |
261 | fpu_save_init(&tsk->thread.fpu); | 255 | return fpu_save_init(&tsk->thread.fpu); |
262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
263 | } | 256 | } |
264 | 257 | ||
265 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | 258 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
@@ -277,44 +270,212 @@ static inline int fpu_restore_checking(struct fpu *fpu) | |||
277 | 270 | ||
278 | static inline int restore_fpu_checking(struct task_struct *tsk) | 271 | static inline int restore_fpu_checking(struct task_struct *tsk) |
279 | { | 272 | { |
273 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
274 | is pending. Clear the x87 state here by setting it to fixed | ||
275 | values. "m" is a random variable that should be in L1 */ | ||
276 | alternative_input( | ||
277 | ASM_NOP8 ASM_NOP2, | ||
278 | "emms\n\t" /* clear stack tags */ | ||
279 | "fildl %P[addr]", /* set F?P to defined value */ | ||
280 | X86_FEATURE_FXSAVE_LEAK, | ||
281 | [addr] "m" (tsk->thread.fpu.has_fpu)); | ||
282 | |||
280 | return fpu_restore_checking(&tsk->thread.fpu); | 283 | return fpu_restore_checking(&tsk->thread.fpu); |
281 | } | 284 | } |
282 | 285 | ||
283 | /* | 286 | /* |
284 | * Signal frame handlers... | 287 | * Software FPU state helpers. Careful: these need to |
288 | * be preemption protection *and* they need to be | ||
289 | * properly paired with the CR0.TS changes! | ||
285 | */ | 290 | */ |
286 | extern int save_i387_xstate(void __user *buf); | 291 | static inline int __thread_has_fpu(struct task_struct *tsk) |
287 | extern int restore_i387_xstate(void __user *buf); | 292 | { |
293 | return tsk->thread.fpu.has_fpu; | ||
294 | } | ||
288 | 295 | ||
289 | static inline void __unlazy_fpu(struct task_struct *tsk) | 296 | /* Must be paired with an 'stts' after! */ |
297 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | ||
290 | { | 298 | { |
291 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 299 | tsk->thread.fpu.has_fpu = 0; |
292 | __save_init_fpu(tsk); | 300 | percpu_write(fpu_owner_task, NULL); |
293 | stts(); | 301 | } |
294 | } else | 302 | |
295 | tsk->fpu_counter = 0; | 303 | /* Must be paired with a 'clts' before! */ |
304 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | ||
305 | { | ||
306 | tsk->thread.fpu.has_fpu = 1; | ||
307 | percpu_write(fpu_owner_task, tsk); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Encapsulate the CR0.TS handling together with the | ||
312 | * software flag. | ||
313 | * | ||
314 | * These generally need preemption protection to work, | ||
315 | * do try to avoid using these on their own. | ||
316 | */ | ||
317 | static inline void __thread_fpu_end(struct task_struct *tsk) | ||
318 | { | ||
319 | __thread_clear_has_fpu(tsk); | ||
320 | stts(); | ||
321 | } | ||
322 | |||
323 | static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
324 | { | ||
325 | clts(); | ||
326 | __thread_set_has_fpu(tsk); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * FPU state switching for scheduling. | ||
331 | * | ||
332 | * This is a two-stage process: | ||
333 | * | ||
334 | * - switch_fpu_prepare() saves the old state and | ||
335 | * sets the new state of the CR0.TS bit. This is | ||
336 | * done within the context of the old process. | ||
337 | * | ||
338 | * - switch_fpu_finish() restores the new state as | ||
339 | * necessary. | ||
340 | */ | ||
341 | typedef struct { int preload; } fpu_switch_t; | ||
342 | |||
343 | /* | ||
344 | * FIXME! We could do a totally lazy restore, but we need to | ||
345 | * add a per-cpu "this was the task that last touched the FPU | ||
346 | * on this CPU" variable, and the task needs to have a "I last | ||
347 | * touched the FPU on this CPU" and check them. | ||
348 | * | ||
349 | * We don't do that yet, so "fpu_lazy_restore()" always returns | ||
350 | * false, but some day.. | ||
351 | */ | ||
352 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
353 | { | ||
354 | return new == percpu_read_stable(fpu_owner_task) && | ||
355 | cpu == new->thread.fpu.last_cpu; | ||
356 | } | ||
357 | |||
358 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | ||
359 | { | ||
360 | fpu_switch_t fpu; | ||
361 | |||
362 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | ||
363 | if (__thread_has_fpu(old)) { | ||
364 | if (!__save_init_fpu(old)) | ||
365 | cpu = ~0; | ||
366 | old->thread.fpu.last_cpu = cpu; | ||
367 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | ||
368 | |||
369 | /* Don't change CR0.TS if we just switch! */ | ||
370 | if (fpu.preload) { | ||
371 | new->fpu_counter++; | ||
372 | __thread_set_has_fpu(new); | ||
373 | prefetch(new->thread.fpu.state); | ||
374 | } else | ||
375 | stts(); | ||
376 | } else { | ||
377 | old->fpu_counter = 0; | ||
378 | old->thread.fpu.last_cpu = ~0; | ||
379 | if (fpu.preload) { | ||
380 | new->fpu_counter++; | ||
381 | if (fpu_lazy_restore(new, cpu)) | ||
382 | fpu.preload = 0; | ||
383 | else | ||
384 | prefetch(new->thread.fpu.state); | ||
385 | __thread_fpu_begin(new); | ||
386 | } | ||
387 | } | ||
388 | return fpu; | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * By the time this gets called, we've already cleared CR0.TS and | ||
393 | * given the process the FPU if we are going to preload the FPU | ||
394 | * state - all we need to do is to conditionally restore the register | ||
395 | * state itself. | ||
396 | */ | ||
397 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
398 | { | ||
399 | if (fpu.preload) { | ||
400 | if (unlikely(restore_fpu_checking(new))) | ||
401 | __thread_fpu_end(new); | ||
402 | } | ||
296 | } | 403 | } |
297 | 404 | ||
405 | /* | ||
406 | * Signal frame handlers... | ||
407 | */ | ||
408 | extern int save_i387_xstate(void __user *buf); | ||
409 | extern int restore_i387_xstate(void __user *buf); | ||
410 | |||
298 | static inline void __clear_fpu(struct task_struct *tsk) | 411 | static inline void __clear_fpu(struct task_struct *tsk) |
299 | { | 412 | { |
300 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | 413 | if (__thread_has_fpu(tsk)) { |
301 | /* Ignore delayed exceptions from user space */ | 414 | /* Ignore delayed exceptions from user space */ |
302 | asm volatile("1: fwait\n" | 415 | asm volatile("1: fwait\n" |
303 | "2:\n" | 416 | "2:\n" |
304 | _ASM_EXTABLE(1b, 2b)); | 417 | _ASM_EXTABLE(1b, 2b)); |
305 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 418 | __thread_fpu_end(tsk); |
306 | stts(); | ||
307 | } | 419 | } |
308 | } | 420 | } |
309 | 421 | ||
422 | /* | ||
423 | * Were we in an interrupt that interrupted kernel mode? | ||
424 | * | ||
425 | * We can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
426 | * pair does nothing at all: the thread must not have fpu (so | ||
427 | * that we don't try to save the FPU state), and TS must | ||
428 | * be set (so that the clts/stts pair does nothing that is | ||
429 | * visible in the interrupted kernel thread). | ||
430 | */ | ||
431 | static inline bool interrupted_kernel_fpu_idle(void) | ||
432 | { | ||
433 | return !__thread_has_fpu(current) && | ||
434 | (read_cr0() & X86_CR0_TS); | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Were we in user mode (or vm86 mode) when we were | ||
439 | * interrupted? | ||
440 | * | ||
441 | * Doing kernel_fpu_begin/end() is ok if we are running | ||
442 | * in an interrupt context from user mode - we'll just | ||
443 | * save the FPU state as required. | ||
444 | */ | ||
445 | static inline bool interrupted_user_mode(void) | ||
446 | { | ||
447 | struct pt_regs *regs = get_irq_regs(); | ||
448 | return regs && user_mode_vm(regs); | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * Can we use the FPU in kernel mode with the | ||
453 | * whole "kernel_fpu_begin/end()" sequence? | ||
454 | * | ||
455 | * It's always ok in process context (ie "not interrupt") | ||
456 | * but it is sometimes ok even from an irq. | ||
457 | */ | ||
458 | static inline bool irq_fpu_usable(void) | ||
459 | { | ||
460 | return !in_interrupt() || | ||
461 | interrupted_user_mode() || | ||
462 | interrupted_kernel_fpu_idle(); | ||
463 | } | ||
464 | |||
310 | static inline void kernel_fpu_begin(void) | 465 | static inline void kernel_fpu_begin(void) |
311 | { | 466 | { |
312 | struct thread_info *me = current_thread_info(); | 467 | struct task_struct *me = current; |
468 | |||
469 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
313 | preempt_disable(); | 470 | preempt_disable(); |
314 | if (me->status & TS_USEDFPU) | 471 | if (__thread_has_fpu(me)) { |
315 | __save_init_fpu(me->task); | 472 | __save_init_fpu(me); |
316 | else | 473 | __thread_clear_has_fpu(me); |
474 | /* We do 'stts()' in kernel_fpu_end() */ | ||
475 | } else { | ||
476 | percpu_write(fpu_owner_task, NULL); | ||
317 | clts(); | 477 | clts(); |
478 | } | ||
318 | } | 479 | } |
319 | 480 | ||
320 | static inline void kernel_fpu_end(void) | 481 | static inline void kernel_fpu_end(void) |
@@ -323,14 +484,6 @@ static inline void kernel_fpu_end(void) | |||
323 | preempt_enable(); | 484 | preempt_enable(); |
324 | } | 485 | } |
325 | 486 | ||
326 | static inline bool irq_fpu_usable(void) | ||
327 | { | ||
328 | struct pt_regs *regs; | ||
329 | |||
330 | return !in_interrupt() || !(regs = get_irq_regs()) || \ | ||
331 | user_mode(regs) || (read_cr0() & X86_CR0_TS); | ||
332 | } | ||
333 | |||
334 | /* | 487 | /* |
335 | * Some instructions like VIA's padlock instructions generate a spurious | 488 | * Some instructions like VIA's padlock instructions generate a spurious |
336 | * DNA fault but don't modify SSE registers. And these instructions | 489 | * DNA fault but don't modify SSE registers. And these instructions |
@@ -363,20 +516,64 @@ static inline void irq_ts_restore(int TS_state) | |||
363 | } | 516 | } |
364 | 517 | ||
365 | /* | 518 | /* |
519 | * The question "does this thread have fpu access?" | ||
520 | * is slightly racy, since preemption could come in | ||
521 | * and revoke it immediately after the test. | ||
522 | * | ||
523 | * However, even in that very unlikely scenario, | ||
524 | * we can just assume we have FPU access - typically | ||
525 | * to save the FP state - we'll just take a #NM | ||
526 | * fault and get the FPU access back. | ||
527 | * | ||
528 | * The actual user_fpu_begin/end() functions | ||
529 | * need to be preemption-safe, though. | ||
530 | * | ||
531 | * NOTE! user_fpu_end() must be used only after you | ||
532 | * have saved the FP state, and user_fpu_begin() must | ||
533 | * be used only immediately before restoring it. | ||
534 | * These functions do not do any save/restore on | ||
535 | * their own. | ||
536 | */ | ||
537 | static inline int user_has_fpu(void) | ||
538 | { | ||
539 | return __thread_has_fpu(current); | ||
540 | } | ||
541 | |||
542 | static inline void user_fpu_end(void) | ||
543 | { | ||
544 | preempt_disable(); | ||
545 | __thread_fpu_end(current); | ||
546 | preempt_enable(); | ||
547 | } | ||
548 | |||
549 | static inline void user_fpu_begin(void) | ||
550 | { | ||
551 | preempt_disable(); | ||
552 | if (!user_has_fpu()) | ||
553 | __thread_fpu_begin(current); | ||
554 | preempt_enable(); | ||
555 | } | ||
556 | |||
557 | /* | ||
366 | * These disable preemption on their own and are safe | 558 | * These disable preemption on their own and are safe |
367 | */ | 559 | */ |
368 | static inline void save_init_fpu(struct task_struct *tsk) | 560 | static inline void save_init_fpu(struct task_struct *tsk) |
369 | { | 561 | { |
562 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); | ||
370 | preempt_disable(); | 563 | preempt_disable(); |
371 | __save_init_fpu(tsk); | 564 | __save_init_fpu(tsk); |
372 | stts(); | 565 | __thread_fpu_end(tsk); |
373 | preempt_enable(); | 566 | preempt_enable(); |
374 | } | 567 | } |
375 | 568 | ||
376 | static inline void unlazy_fpu(struct task_struct *tsk) | 569 | static inline void unlazy_fpu(struct task_struct *tsk) |
377 | { | 570 | { |
378 | preempt_disable(); | 571 | preempt_disable(); |
379 | __unlazy_fpu(tsk); | 572 | if (__thread_has_fpu(tsk)) { |
573 | __save_init_fpu(tsk); | ||
574 | __thread_fpu_end(tsk); | ||
575 | } else | ||
576 | tsk->fpu_counter = 0; | ||
380 | preempt_enable(); | 577 | preempt_enable(); |
381 | } | 578 | } |
382 | 579 | ||