diff options
-rw-r--r-- | arch/x86_64/kernel/process.c | 10 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 1 | ||||
-rw-r--r-- | include/asm-x86_64/i387.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 9 |
4 files changed, 24 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 6fbd19564e4..9e9a70e50c7 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
552 | int cpu = smp_processor_id(); | 552 | int cpu = smp_processor_id(); |
553 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 553 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
554 | 554 | ||
555 | /* we're going to use this soon, after a few expensive things */ | ||
556 | if (next_p->fpu_counter>5) | ||
557 | prefetch(&next->i387.fxsave); | ||
558 | |||
555 | /* | 559 | /* |
556 | * Reload esp0, LDT and the page table pointer: | 560 | * Reload esp0, LDT and the page table pointer: |
557 | */ | 561 | */ |
@@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
629 | || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) | 633 | || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) |
630 | __switch_to_xtra(prev_p, next_p, tss); | 634 | __switch_to_xtra(prev_p, next_p, tss); |
631 | 635 | ||
636 | /* If the task has used fpu the last 5 timeslices, just do a full | ||
637 | * restore of the math state immediately to avoid the trap; the | ||
638 | * chances of needing FPU soon are obviously high now | ||
639 | */ | ||
640 | if (next_p->fpu_counter>5) | ||
641 | math_state_restore(); | ||
632 | return prev_p; | 642 | return prev_p; |
633 | } | 643 | } |
634 | 644 | ||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28e53342f29..ffc40cff1e0 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void) | |||
1136 | init_fpu(me); | 1136 | init_fpu(me); |
1137 | restore_fpu_checking(&me->thread.i387.fxsave); | 1137 | restore_fpu_checking(&me->thread.i387.fxsave); |
1138 | task_thread_info(me)->status |= TS_USEDFPU; | 1138 | task_thread_info(me)->status |= TS_USEDFPU; |
1139 | me->fpu_counter++; | ||
1139 | } | 1140 | } |
1140 | 1141 | ||
1141 | void __init trap_init(void) | 1142 | void __init trap_init(void) |
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index cba8a3b0cde..60c0f4853fd 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h | |||
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask; | |||
24 | extern void mxcsr_feature_mask_init(void); | 24 | extern void mxcsr_feature_mask_init(void); |
25 | extern void init_fpu(struct task_struct *child); | 25 | extern void init_fpu(struct task_struct *child); |
26 | extern int save_i387(struct _fpstate __user *buf); | 26 | extern int save_i387(struct _fpstate __user *buf); |
27 | extern asmlinkage void math_state_restore(void); | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * FPU lazy state save handling... | 30 | * FPU lazy state save handling... |
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf); | |||
31 | 32 | ||
32 | #define unlazy_fpu(tsk) do { \ | 33 | #define unlazy_fpu(tsk) do { \ |
33 | if (task_thread_info(tsk)->status & TS_USEDFPU) \ | 34 | if (task_thread_info(tsk)->status & TS_USEDFPU) \ |
34 | save_init_fpu(tsk); \ | 35 | save_init_fpu(tsk); \ |
36 | else \ | ||
37 | tsk->fpu_counter = 0; \ | ||
35 | } while (0) | 38 | } while (0) |
36 | 39 | ||
37 | /* Ignore delayed exceptions from user space */ | 40 | /* Ignore delayed exceptions from user space */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 34ed0d99b1b..807556c5bcd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -865,6 +865,15 @@ struct task_struct { | |||
865 | struct key *thread_keyring; /* keyring private to this thread */ | 865 | struct key *thread_keyring; /* keyring private to this thread */ |
866 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ | 866 | unsigned char jit_keyring; /* default keyring to attach requested keys to */ |
867 | #endif | 867 | #endif |
868 | /* | ||
869 | * fpu_counter contains the number of consecutive context switches | ||
870 | * that the FPU is used. If this is over a threshold, the lazy fpu | ||
871 | * saving becomes unlazy to save the trap. This is an unsigned char | ||
872 | * so that after 256 times the counter wraps and the behavior turns | ||
873 | * lazy again; this to deal with bursty apps that only use FPU for | ||
874 | * a short time | ||
875 | */ | ||
876 | unsigned char fpu_counter; | ||
868 | int oomkilladj; /* OOM kill score adjustment (bit shift). */ | 877 | int oomkilladj; /* OOM kill score adjustment (bit shift). */ |
869 | char comm[TASK_COMM_LEN]; /* executable name excluding path | 878 | char comm[TASK_COMM_LEN]; /* executable name excluding path |
870 | - access with [gs]et_task_comm (which lock | 879 | - access with [gs]et_task_comm (which lock |