aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/kernel/process.c10
-rw-r--r--arch/x86_64/kernel/traps.c1
-rw-r--r--include/asm-x86_64/i387.h5
-rw-r--r--include/linux/sched.h9
4 files changed, 24 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 6fbd19564e4e..9e9a70e50c72 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
552 int cpu = smp_processor_id(); 552 int cpu = smp_processor_id();
553 struct tss_struct *tss = &per_cpu(init_tss, cpu); 553 struct tss_struct *tss = &per_cpu(init_tss, cpu);
554 554
555 /* we're going to use this soon, after a few expensive things */
556 if (next_p->fpu_counter>5)
557 prefetch(&next->i387.fxsave);
558
555 /* 559 /*
556 * Reload esp0, LDT and the page table pointer: 560 * Reload esp0, LDT and the page table pointer:
557 */ 561 */
@@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
629 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) 633 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
630 __switch_to_xtra(prev_p, next_p, tss); 634 __switch_to_xtra(prev_p, next_p, tss);
631 635
636 /* If the task has used fpu the last 5 timeslices, just do a full
637 * restore of the math state immediately to avoid the trap; the
638 * chances of needing FPU soon are obviously high now
639 */
640 if (next_p->fpu_counter>5)
641 math_state_restore();
632 return prev_p; 642 return prev_p;
633} 643}
634 644
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 28e53342f294..ffc40cff1e07 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void)
1136 init_fpu(me); 1136 init_fpu(me);
1137 restore_fpu_checking(&me->thread.i387.fxsave); 1137 restore_fpu_checking(&me->thread.i387.fxsave);
1138 task_thread_info(me)->status |= TS_USEDFPU; 1138 task_thread_info(me)->status |= TS_USEDFPU;
1139 me->fpu_counter++;
1139} 1140}
1140 1141
1141void __init trap_init(void) 1142void __init trap_init(void)
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index cba8a3b0cded..60c0f4853fdb 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask;
24extern void mxcsr_feature_mask_init(void); 24extern void mxcsr_feature_mask_init(void);
25extern void init_fpu(struct task_struct *child); 25extern void init_fpu(struct task_struct *child);
26extern int save_i387(struct _fpstate __user *buf); 26extern int save_i387(struct _fpstate __user *buf);
27extern asmlinkage void math_state_restore(void);
27 28
28/* 29/*
29 * FPU lazy state save handling... 30 * FPU lazy state save handling...
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf);
31 32
32#define unlazy_fpu(tsk) do { \ 33#define unlazy_fpu(tsk) do { \
33 if (task_thread_info(tsk)->status & TS_USEDFPU) \ 34 if (task_thread_info(tsk)->status & TS_USEDFPU) \
34 save_init_fpu(tsk); \ 35 save_init_fpu(tsk); \
36 else \
37 tsk->fpu_counter = 0; \
35} while (0) 38} while (0)
36 39
37/* Ignore delayed exceptions from user space */ 40/* Ignore delayed exceptions from user space */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34ed0d99b1bd..807556c5bcd2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,6 +865,15 @@ struct task_struct {
865 struct key *thread_keyring; /* keyring private to this thread */ 865 struct key *thread_keyring; /* keyring private to this thread */
866 unsigned char jit_keyring; /* default keyring to attach requested keys to */ 866 unsigned char jit_keyring; /* default keyring to attach requested keys to */
867#endif 867#endif
868 /*
869 * fpu_counter contains the number of consecutive context switches
870 * that the FPU is used. If this is over a threshold, the lazy fpu
871 * saving becomes unlazy to save the trap. This is an unsigned char
872 * so that after 256 times the counter wraps and the behavior turns
873 * lazy again; this to deal with bursty apps that only use FPU for
874 * a short time
875 */
876 unsigned char fpu_counter;
868 int oomkilladj; /* OOM kill score adjustment (bit shift). */ 877 int oomkilladj; /* OOM kill score adjustment (bit shift). */
869 char comm[TASK_COMM_LEN]; /* executable name excluding path 878 char comm[TASK_COMM_LEN]; /* executable name excluding path
870 - access with [gs]et_task_comm (which lock 879 - access with [gs]et_task_comm (which lock