diff options
Diffstat (limited to 'arch/x86/include/asm')
| -rw-r--r-- | arch/x86/include/asm/i387.h | 175 | ||||
| -rw-r--r-- | arch/x86/include/asm/processor.h | 2 | ||||
| -rw-r--r-- | arch/x86/include/asm/thread_info.h | 2 |
3 files changed, 139 insertions, 40 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 01b115d86770..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
| @@ -32,6 +32,8 @@ extern int init_fpu(struct task_struct *child); | |||
| 32 | extern void math_state_restore(void); | 32 | extern void math_state_restore(void); |
| 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | 33 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); |
| 34 | 34 | ||
| 35 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 36 | |||
| 35 | extern user_regset_active_fn fpregs_active, xfpregs_active; | 37 | extern user_regset_active_fn fpregs_active, xfpregs_active; |
| 36 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | 38 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, |
| 37 | xstateregs_get; | 39 | xstateregs_get; |
| @@ -212,9 +214,10 @@ static inline void fpu_fxsave(struct fpu *fpu) | |||
| 212 | #endif /* CONFIG_X86_64 */ | 214 | #endif /* CONFIG_X86_64 */ |
| 213 | 215 | ||
| 214 | /* | 216 | /* |
| 215 | * These must be called with preempt disabled | 217 | * These must be called with preempt disabled. Returns |
| 218 | * 'true' if the FPU state is still intact. | ||
| 216 | */ | 219 | */ |
| 217 | static inline void fpu_save_init(struct fpu *fpu) | 220 | static inline int fpu_save_init(struct fpu *fpu) |
| 218 | { | 221 | { |
| 219 | if (use_xsave()) { | 222 | if (use_xsave()) { |
| 220 | fpu_xsave(fpu); | 223 | fpu_xsave(fpu); |
| @@ -223,22 +226,33 @@ static inline void fpu_save_init(struct fpu *fpu) | |||
| 223 | * xsave header may indicate the init state of the FP. | 226 | * xsave header may indicate the init state of the FP. |
| 224 | */ | 227 | */ |
| 225 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | 228 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) |
| 226 | return; | 229 | return 1; |
| 227 | } else if (use_fxsr()) { | 230 | } else if (use_fxsr()) { |
| 228 | fpu_fxsave(fpu); | 231 | fpu_fxsave(fpu); |
| 229 | } else { | 232 | } else { |
| 230 | asm volatile("fnsave %[fx]; fwait" | 233 | asm volatile("fnsave %[fx]; fwait" |
| 231 | : [fx] "=m" (fpu->state->fsave)); | 234 | : [fx] "=m" (fpu->state->fsave)); |
| 232 | return; | 235 | return 0; |
| 233 | } | 236 | } |
| 234 | 237 | ||
| 235 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) | 238 | /* |
| 239 | * If exceptions are pending, we need to clear them so | ||
| 240 | * that we don't randomly get exceptions later. | ||
| 241 | * | ||
| 242 | * FIXME! Is this perhaps only true for the old-style | ||
| 243 | * irq13 case? Maybe we could leave the x87 state | ||
| 244 | * intact otherwise? | ||
| 245 | */ | ||
| 246 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
| 236 | asm volatile("fnclex"); | 247 | asm volatile("fnclex"); |
| 248 | return 0; | ||
| 249 | } | ||
| 250 | return 1; | ||
| 237 | } | 251 | } |
| 238 | 252 | ||
| 239 | static inline void __save_init_fpu(struct task_struct *tsk) | 253 | static inline int __save_init_fpu(struct task_struct *tsk) |
| 240 | { | 254 | { |
| 241 | fpu_save_init(&tsk->thread.fpu); | 255 | return fpu_save_init(&tsk->thread.fpu); |
| 242 | } | 256 | } |
| 243 | 257 | ||
| 244 | static inline int fpu_fxrstor_checking(struct fpu *fpu) | 258 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
| @@ -256,6 +270,16 @@ static inline int fpu_restore_checking(struct fpu *fpu) | |||
| 256 | 270 | ||
| 257 | static inline int restore_fpu_checking(struct task_struct *tsk) | 271 | static inline int restore_fpu_checking(struct task_struct *tsk) |
| 258 | { | 272 | { |
| 273 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | ||
| 274 | is pending. Clear the x87 state here by setting it to fixed | ||
| 275 | values. "m" is a random variable that should be in L1 */ | ||
| 276 | alternative_input( | ||
| 277 | ASM_NOP8 ASM_NOP2, | ||
| 278 | "emms\n\t" /* clear stack tags */ | ||
| 279 | "fildl %P[addr]", /* set F?P to defined value */ | ||
| 280 | X86_FEATURE_FXSAVE_LEAK, | ||
| 281 | [addr] "m" (tsk->thread.fpu.has_fpu)); | ||
| 282 | |||
| 259 | return fpu_restore_checking(&tsk->thread.fpu); | 283 | return fpu_restore_checking(&tsk->thread.fpu); |
| 260 | } | 284 | } |
| 261 | 285 | ||
| @@ -264,21 +288,23 @@ static inline int restore_fpu_checking(struct task_struct *tsk) | |||
| 264 | * be preemption protection *and* they need to be | 288 | * be preemption protection *and* they need to be |
| 265 | * properly paired with the CR0.TS changes! | 289 | * properly paired with the CR0.TS changes! |
| 266 | */ | 290 | */ |
| 267 | static inline int __thread_has_fpu(struct thread_info *ti) | 291 | static inline int __thread_has_fpu(struct task_struct *tsk) |
| 268 | { | 292 | { |
| 269 | return ti->status & TS_USEDFPU; | 293 | return tsk->thread.fpu.has_fpu; |
| 270 | } | 294 | } |
| 271 | 295 | ||
| 272 | /* Must be paired with an 'stts' after! */ | 296 | /* Must be paired with an 'stts' after! */ |
| 273 | static inline void __thread_clear_has_fpu(struct thread_info *ti) | 297 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) |
| 274 | { | 298 | { |
| 275 | ti->status &= ~TS_USEDFPU; | 299 | tsk->thread.fpu.has_fpu = 0; |
| 300 | percpu_write(fpu_owner_task, NULL); | ||
| 276 | } | 301 | } |
| 277 | 302 | ||
| 278 | /* Must be paired with a 'clts' before! */ | 303 | /* Must be paired with a 'clts' before! */ |
| 279 | static inline void __thread_set_has_fpu(struct thread_info *ti) | 304 | static inline void __thread_set_has_fpu(struct task_struct *tsk) |
| 280 | { | 305 | { |
| 281 | ti->status |= TS_USEDFPU; | 306 | tsk->thread.fpu.has_fpu = 1; |
| 307 | percpu_write(fpu_owner_task, tsk); | ||
| 282 | } | 308 | } |
| 283 | 309 | ||
| 284 | /* | 310 | /* |
| @@ -288,41 +314,108 @@ static inline void __thread_set_has_fpu(struct thread_info *ti) | |||
| 288 | * These generally need preemption protection to work, | 314 | * These generally need preemption protection to work, |
| 289 | * do try to avoid using these on their own. | 315 | * do try to avoid using these on their own. |
| 290 | */ | 316 | */ |
| 291 | static inline void __thread_fpu_end(struct thread_info *ti) | 317 | static inline void __thread_fpu_end(struct task_struct *tsk) |
| 292 | { | 318 | { |
| 293 | __thread_clear_has_fpu(ti); | 319 | __thread_clear_has_fpu(tsk); |
| 294 | stts(); | 320 | stts(); |
| 295 | } | 321 | } |
| 296 | 322 | ||
| 297 | static inline void __thread_fpu_begin(struct thread_info *ti) | 323 | static inline void __thread_fpu_begin(struct task_struct *tsk) |
| 298 | { | 324 | { |
| 299 | clts(); | 325 | clts(); |
| 300 | __thread_set_has_fpu(ti); | 326 | __thread_set_has_fpu(tsk); |
| 301 | } | 327 | } |
| 302 | 328 | ||
| 303 | /* | 329 | /* |
| 304 | * Signal frame handlers... | 330 | * FPU state switching for scheduling. |
| 331 | * | ||
| 332 | * This is a two-stage process: | ||
| 333 | * | ||
| 334 | * - switch_fpu_prepare() saves the old state and | ||
| 335 | * sets the new state of the CR0.TS bit. This is | ||
| 336 | * done within the context of the old process. | ||
| 337 | * | ||
| 338 | * - switch_fpu_finish() restores the new state as | ||
| 339 | * necessary. | ||
| 305 | */ | 340 | */ |
| 306 | extern int save_i387_xstate(void __user *buf); | 341 | typedef struct { int preload; } fpu_switch_t; |
| 307 | extern int restore_i387_xstate(void __user *buf); | ||
| 308 | 342 | ||
| 309 | static inline void __unlazy_fpu(struct task_struct *tsk) | 343 | /* |
| 344 | * FIXME! We could do a totally lazy restore, but we need to | ||
| 345 | * add a per-cpu "this was the task that last touched the FPU | ||
| 346 | * on this CPU" variable, and the task needs to have a "I last | ||
| 347 | * touched the FPU on this CPU" and check them. | ||
| 348 | * | ||
| 349 | * We don't do that yet, so "fpu_lazy_restore()" always returns | ||
| 350 | * false, but some day.. | ||
| 351 | */ | ||
| 352 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
| 310 | { | 353 | { |
| 311 | if (__thread_has_fpu(task_thread_info(tsk))) { | 354 | return new == percpu_read_stable(fpu_owner_task) && |
| 312 | __save_init_fpu(tsk); | 355 | cpu == new->thread.fpu.last_cpu; |
| 313 | __thread_fpu_end(task_thread_info(tsk)); | 356 | } |
| 314 | } else | 357 | |
| 315 | tsk->fpu_counter = 0; | 358 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) |
| 359 | { | ||
| 360 | fpu_switch_t fpu; | ||
| 361 | |||
| 362 | fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; | ||
| 363 | if (__thread_has_fpu(old)) { | ||
| 364 | if (!__save_init_fpu(old)) | ||
| 365 | cpu = ~0; | ||
| 366 | old->thread.fpu.last_cpu = cpu; | ||
| 367 | old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ | ||
| 368 | |||
| 369 | /* Don't change CR0.TS if we just switch! */ | ||
| 370 | if (fpu.preload) { | ||
| 371 | new->fpu_counter++; | ||
| 372 | __thread_set_has_fpu(new); | ||
| 373 | prefetch(new->thread.fpu.state); | ||
| 374 | } else | ||
| 375 | stts(); | ||
| 376 | } else { | ||
| 377 | old->fpu_counter = 0; | ||
| 378 | old->thread.fpu.last_cpu = ~0; | ||
| 379 | if (fpu.preload) { | ||
| 380 | new->fpu_counter++; | ||
| 381 | if (fpu_lazy_restore(new, cpu)) | ||
| 382 | fpu.preload = 0; | ||
| 383 | else | ||
| 384 | prefetch(new->thread.fpu.state); | ||
| 385 | __thread_fpu_begin(new); | ||
| 386 | } | ||
| 387 | } | ||
| 388 | return fpu; | ||
| 389 | } | ||
| 390 | |||
| 391 | /* | ||
| 392 | * By the time this gets called, we've already cleared CR0.TS and | ||
| 393 | * given the process the FPU if we are going to preload the FPU | ||
| 394 | * state - all we need to do is to conditionally restore the register | ||
| 395 | * state itself. | ||
| 396 | */ | ||
| 397 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
| 398 | { | ||
| 399 | if (fpu.preload) { | ||
| 400 | if (unlikely(restore_fpu_checking(new))) | ||
| 401 | __thread_fpu_end(new); | ||
| 402 | } | ||
| 316 | } | 403 | } |
| 317 | 404 | ||
| 405 | /* | ||
| 406 | * Signal frame handlers... | ||
| 407 | */ | ||
| 408 | extern int save_i387_xstate(void __user *buf); | ||
| 409 | extern int restore_i387_xstate(void __user *buf); | ||
| 410 | |||
| 318 | static inline void __clear_fpu(struct task_struct *tsk) | 411 | static inline void __clear_fpu(struct task_struct *tsk) |
| 319 | { | 412 | { |
| 320 | if (__thread_has_fpu(task_thread_info(tsk))) { | 413 | if (__thread_has_fpu(tsk)) { |
| 321 | /* Ignore delayed exceptions from user space */ | 414 | /* Ignore delayed exceptions from user space */ |
| 322 | asm volatile("1: fwait\n" | 415 | asm volatile("1: fwait\n" |
| 323 | "2:\n" | 416 | "2:\n" |
| 324 | _ASM_EXTABLE(1b, 2b)); | 417 | _ASM_EXTABLE(1b, 2b)); |
| 325 | __thread_fpu_end(task_thread_info(tsk)); | 418 | __thread_fpu_end(tsk); |
| 326 | } | 419 | } |
| 327 | } | 420 | } |
| 328 | 421 | ||
| @@ -337,7 +430,7 @@ static inline void __clear_fpu(struct task_struct *tsk) | |||
| 337 | */ | 430 | */ |
| 338 | static inline bool interrupted_kernel_fpu_idle(void) | 431 | static inline bool interrupted_kernel_fpu_idle(void) |
| 339 | { | 432 | { |
| 340 | return !__thread_has_fpu(current_thread_info()) && | 433 | return !__thread_has_fpu(current) && |
| 341 | (read_cr0() & X86_CR0_TS); | 434 | (read_cr0() & X86_CR0_TS); |
| 342 | } | 435 | } |
| 343 | 436 | ||
| @@ -371,16 +464,18 @@ static inline bool irq_fpu_usable(void) | |||
| 371 | 464 | ||
| 372 | static inline void kernel_fpu_begin(void) | 465 | static inline void kernel_fpu_begin(void) |
| 373 | { | 466 | { |
| 374 | struct thread_info *me = current_thread_info(); | 467 | struct task_struct *me = current; |
| 375 | 468 | ||
| 376 | WARN_ON_ONCE(!irq_fpu_usable()); | 469 | WARN_ON_ONCE(!irq_fpu_usable()); |
| 377 | preempt_disable(); | 470 | preempt_disable(); |
| 378 | if (__thread_has_fpu(me)) { | 471 | if (__thread_has_fpu(me)) { |
| 379 | __save_init_fpu(me->task); | 472 | __save_init_fpu(me); |
| 380 | __thread_clear_has_fpu(me); | 473 | __thread_clear_has_fpu(me); |
| 381 | /* We do 'stts()' in kernel_fpu_end() */ | 474 | /* We do 'stts()' in kernel_fpu_end() */ |
| 382 | } else | 475 | } else { |
| 476 | percpu_write(fpu_owner_task, NULL); | ||
| 383 | clts(); | 477 | clts(); |
| 478 | } | ||
| 384 | } | 479 | } |
| 385 | 480 | ||
| 386 | static inline void kernel_fpu_end(void) | 481 | static inline void kernel_fpu_end(void) |
| @@ -441,13 +536,13 @@ static inline void irq_ts_restore(int TS_state) | |||
| 441 | */ | 536 | */ |
| 442 | static inline int user_has_fpu(void) | 537 | static inline int user_has_fpu(void) |
| 443 | { | 538 | { |
| 444 | return __thread_has_fpu(current_thread_info()); | 539 | return __thread_has_fpu(current); |
| 445 | } | 540 | } |
| 446 | 541 | ||
| 447 | static inline void user_fpu_end(void) | 542 | static inline void user_fpu_end(void) |
| 448 | { | 543 | { |
| 449 | preempt_disable(); | 544 | preempt_disable(); |
| 450 | __thread_fpu_end(current_thread_info()); | 545 | __thread_fpu_end(current); |
| 451 | preempt_enable(); | 546 | preempt_enable(); |
| 452 | } | 547 | } |
| 453 | 548 | ||
| @@ -455,7 +550,7 @@ static inline void user_fpu_begin(void) | |||
| 455 | { | 550 | { |
| 456 | preempt_disable(); | 551 | preempt_disable(); |
| 457 | if (!user_has_fpu()) | 552 | if (!user_has_fpu()) |
| 458 | __thread_fpu_begin(current_thread_info()); | 553 | __thread_fpu_begin(current); |
| 459 | preempt_enable(); | 554 | preempt_enable(); |
| 460 | } | 555 | } |
| 461 | 556 | ||
| @@ -464,17 +559,21 @@ static inline void user_fpu_begin(void) | |||
| 464 | */ | 559 | */ |
| 465 | static inline void save_init_fpu(struct task_struct *tsk) | 560 | static inline void save_init_fpu(struct task_struct *tsk) |
| 466 | { | 561 | { |
| 467 | WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk))); | 562 | WARN_ON_ONCE(!__thread_has_fpu(tsk)); |
| 468 | preempt_disable(); | 563 | preempt_disable(); |
| 469 | __save_init_fpu(tsk); | 564 | __save_init_fpu(tsk); |
| 470 | __thread_fpu_end(task_thread_info(tsk)); | 565 | __thread_fpu_end(tsk); |
| 471 | preempt_enable(); | 566 | preempt_enable(); |
| 472 | } | 567 | } |
| 473 | 568 | ||
| 474 | static inline void unlazy_fpu(struct task_struct *tsk) | 569 | static inline void unlazy_fpu(struct task_struct *tsk) |
| 475 | { | 570 | { |
| 476 | preempt_disable(); | 571 | preempt_disable(); |
| 477 | __unlazy_fpu(tsk); | 572 | if (__thread_has_fpu(tsk)) { |
| 573 | __save_init_fpu(tsk); | ||
| 574 | __thread_fpu_end(tsk); | ||
| 575 | } else | ||
| 576 | tsk->fpu_counter = 0; | ||
| 478 | preempt_enable(); | 577 | preempt_enable(); |
| 479 | } | 578 | } |
| 480 | 579 | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index aa9088c26931..58545c97d071 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -374,6 +374,8 @@ union thread_xstate { | |||
| 374 | }; | 374 | }; |
| 375 | 375 | ||
| 376 | struct fpu { | 376 | struct fpu { |
| 377 | unsigned int last_cpu; | ||
| 378 | unsigned int has_fpu; | ||
| 377 | union thread_xstate *state; | 379 | union thread_xstate *state; |
| 378 | }; | 380 | }; |
| 379 | 381 | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index bc817cd8b443..cfd8144d5527 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -247,8 +247,6 @@ static inline struct thread_info *current_thread_info(void) | |||
| 247 | * ever touches our thread-synchronous status, so we don't | 247 | * ever touches our thread-synchronous status, so we don't |
| 248 | * have to worry about atomic accesses. | 248 | * have to worry about atomic accesses. |
| 249 | */ | 249 | */ |
| 250 | #define TS_USEDFPU 0x0001 /* FPU was used by this task | ||
| 251 | this quantum (SMP) */ | ||
| 252 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ | 250 | #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ |
| 253 | #define TS_POLLING 0x0004 /* idle task polling need_resched, | 251 | #define TS_POLLING 0x0004 /* idle task polling need_resched, |
| 254 | skip sending interrupt */ | 252 | skip sending interrupt */ |
