aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2008-07-29 13:29:20 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-30 13:49:24 -0400
commitb359e8a434cc3d09847010fc4aeccf48d69740e4 (patch)
tree8911c299dc1768c78d5452a1e7e0efd2fc8d5abb
parentdc1e35c6e95e8923cf1d3510438b63c600fee1e2 (diff)
x86, xsave: context switch support using xsave/xrstor
Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch when available. Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor instructions during context switch instead of the legacy fxsave/fxrstor instructions. Thread-synchronous status word is already in L1 cache during this code patch and thus minimizes the performance penality compared to (cpu_has_xsave) checks. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/i387.c5
-rw-r--r--arch/x86/kernel/traps_64.c2
-rw-r--r--include/asm-x86/i387.h64
-rw-r--r--include/asm-x86/processor.h1
-rw-r--r--include/asm-x86/thread_info.h1
-rw-r--r--include/asm-x86/xsave.h35
7 files changed, 104 insertions, 9 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fabbcb7020fb..6c2b9e756db2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -709,7 +709,10 @@ void __cpuinit cpu_init(void)
709 /* 709 /*
710 * Force FPU initialization: 710 * Force FPU initialization:
711 */ 711 */
712 current_thread_info()->status = 0; 712 if (cpu_has_xsave)
713 current_thread_info()->status = TS_XSAVE;
714 else
715 current_thread_info()->status = 0;
713 clear_used_math(); 716 clear_used_math();
714 mxcsr_feature_mask_init(); 717 mxcsr_feature_mask_init();
715 718
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index e22a9a9dce8a..b778e17e4b01 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -97,7 +97,10 @@ void __cpuinit fpu_init(void)
97 97
98 mxcsr_feature_mask_init(); 98 mxcsr_feature_mask_init();
99 /* clean state in init */ 99 /* clean state in init */
100 current_thread_info()->status = 0; 100 if (cpu_has_xsave)
101 current_thread_info()->status = TS_XSAVE;
102 else
103 current_thread_info()->status = 0;
101 clear_used_math(); 104 clear_used_math();
102} 105}
103#endif /* CONFIG_X86_64 */ 106#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3580a7938a2e..38eb76156a47 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void)
1134 /* 1134 /*
1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1136 */ 1136 */
1137 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1137 if (unlikely(restore_fpu_checking(me))) {
1138 stts(); 1138 stts();
1139 force_sig(SIGSEGV, me); 1139 force_sig(SIGSEGV, me);
1140 return; 1140 return;
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 6a6647896670..a6d256f4ac81 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -36,6 +36,8 @@ extern int save_i387_ia32(struct _fpstate_ia32 __user *buf);
36extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); 36extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf);
37#endif 37#endif
38 38
39#define X87_FSW_ES (1 << 7) /* Exception Summary */
40
39#ifdef CONFIG_X86_64 41#ifdef CONFIG_X86_64
40 42
41/* Ignore delayed exceptions from user space */ 43/* Ignore delayed exceptions from user space */
@@ -46,7 +48,7 @@ static inline void tolerant_fwait(void)
46 _ASM_EXTABLE(1b, 2b)); 48 _ASM_EXTABLE(1b, 2b));
47} 49}
48 50
49static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 51static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
50{ 52{
51 int err; 53 int err;
52 54
@@ -66,15 +68,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
66 return err; 68 return err;
67} 69}
68 70
69#define X87_FSW_ES (1 << 7) /* Exception Summary */ 71static inline int restore_fpu_checking(struct task_struct *tsk)
72{
73 if (task_thread_info(tsk)->status & TS_XSAVE)
74 return xrstor_checking(&tsk->thread.xstate->xsave);
75 else
76 return fxrstor_checking(&tsk->thread.xstate->fxsave);
77}
70 78
71/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception 79/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
72 is pending. Clear the x87 state here by setting it to fixed 80 is pending. Clear the x87 state here by setting it to fixed
73 values. The kernel data segment can be sometimes 0 and sometimes 81 values. The kernel data segment can be sometimes 0 and sometimes
74 new user value. Both should be ok. 82 new user value. Both should be ok.
75 Use the PDA as safe address because it should be already in L1. */ 83 Use the PDA as safe address because it should be already in L1. */
76static inline void clear_fpu_state(struct i387_fxsave_struct *fx) 84static inline void clear_fpu_state(struct task_struct *tsk)
77{ 85{
86 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
87 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
88
89 /*
90 * xsave header may indicate the init state of the FP.
91 */
92 if ((task_thread_info(tsk)->status & TS_XSAVE) &&
93 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
94 return;
95
78 if (unlikely(fx->swd & X87_FSW_ES)) 96 if (unlikely(fx->swd & X87_FSW_ES))
79 asm volatile("fnclex"); 97 asm volatile("fnclex");
80 alternative_input(ASM_NOP8 ASM_NOP2, 98 alternative_input(ASM_NOP8 ASM_NOP2,
@@ -107,7 +125,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
107 return err; 125 return err;
108} 126}
109 127
110static inline void __save_init_fpu(struct task_struct *tsk) 128static inline void fxsave(struct task_struct *tsk)
111{ 129{
112 /* Using "rex64; fxsave %0" is broken because, if the memory operand 130 /* Using "rex64; fxsave %0" is broken because, if the memory operand
113 uses any extended registers for addressing, a second REX prefix 131 uses any extended registers for addressing, a second REX prefix
@@ -132,7 +150,16 @@ static inline void __save_init_fpu(struct task_struct *tsk)
132 : "=m" (tsk->thread.xstate->fxsave) 150 : "=m" (tsk->thread.xstate->fxsave)
133 : "cdaSDb" (&tsk->thread.xstate->fxsave)); 151 : "cdaSDb" (&tsk->thread.xstate->fxsave));
134#endif 152#endif
135 clear_fpu_state(&tsk->thread.xstate->fxsave); 153}
154
155static inline void __save_init_fpu(struct task_struct *tsk)
156{
157 if (task_thread_info(tsk)->status & TS_XSAVE)
158 xsave(tsk);
159 else
160 fxsave(tsk);
161
162 clear_fpu_state(tsk);
136 task_thread_info(tsk)->status &= ~TS_USEDFPU; 163 task_thread_info(tsk)->status &= ~TS_USEDFPU;
137} 164}
138 165
@@ -147,6 +174,10 @@ static inline void tolerant_fwait(void)
147 174
148static inline void restore_fpu(struct task_struct *tsk) 175static inline void restore_fpu(struct task_struct *tsk)
149{ 176{
177 if (task_thread_info(tsk)->status & TS_XSAVE) {
178 xrstor_checking(&tsk->thread.xstate->xsave);
179 return;
180 }
150 /* 181 /*
151 * The "nop" is needed to make the instructions the same 182 * The "nop" is needed to make the instructions the same
152 * length. 183 * length.
@@ -172,6 +203,27 @@ static inline void restore_fpu(struct task_struct *tsk)
172 */ 203 */
173static inline void __save_init_fpu(struct task_struct *tsk) 204static inline void __save_init_fpu(struct task_struct *tsk)
174{ 205{
206 if (task_thread_info(tsk)->status & TS_XSAVE) {
207 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
208 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
209
210 xsave(tsk);
211
212 /*
213 * xsave header may indicate the init state of the FP.
214 */
215 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
216 goto end;
217
218 if (unlikely(fx->swd & X87_FSW_ES))
219 asm volatile("fnclex");
220
221 /*
222 * we can do a simple return here or be paranoid :)
223 */
224 goto clear_state;
225 }
226
175 /* Use more nops than strictly needed in case the compiler 227 /* Use more nops than strictly needed in case the compiler
176 varies code */ 228 varies code */
177 alternative_input( 229 alternative_input(
@@ -181,6 +233,7 @@ static inline void __save_init_fpu(struct task_struct *tsk)
181 X86_FEATURE_FXSR, 233 X86_FEATURE_FXSR,
182 [fx] "m" (tsk->thread.xstate->fxsave), 234 [fx] "m" (tsk->thread.xstate->fxsave),
183 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); 235 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
236clear_state:
184 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 237 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
185 is pending. Clear the x87 state here by setting it to fixed 238 is pending. Clear the x87 state here by setting it to fixed
186 values. safe_address is a random variable that should be in L1 */ 239 values. safe_address is a random variable that should be in L1 */
@@ -190,6 +243,7 @@ static inline void __save_init_fpu(struct task_struct *tsk)
190 "fildl %[addr]", /* set F?P to defined value */ 243 "fildl %[addr]", /* set F?P to defined value */
191 X86_FEATURE_FXSAVE_LEAK, 244 X86_FEATURE_FXSAVE_LEAK,
192 [addr] "m" (safe_address)); 245 [addr] "m" (safe_address));
246end:
193 task_thread_info(tsk)->status &= ~TS_USEDFPU; 247 task_thread_info(tsk)->status &= ~TS_USEDFPU;
194} 248}
195 249
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index d7c0221c0278..77b7af6b573b 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -362,6 +362,7 @@ union thread_xstate {
362 struct i387_fsave_struct fsave; 362 struct i387_fsave_struct fsave;
363 struct i387_fxsave_struct fxsave; 363 struct i387_fxsave_struct fxsave;
364 struct i387_soft_struct soft; 364 struct i387_soft_struct soft;
365 struct xsave_struct xsave;
365}; 366};
366 367
367#ifdef CONFIG_X86_64 368#ifdef CONFIG_X86_64
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index e64be8863b76..30586f2ee558 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -239,6 +239,7 @@ static inline struct thread_info *stack_thread_info(void)
239#define TS_POLLING 0x0004 /* true if in idle loop 239#define TS_POLLING 0x0004 /* true if in idle loop
240 and not sleeping */ 240 and not sleeping */
241#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ 241#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
242#define TS_XSAVE 0x0010 /* Use xsave/xrstor */
242 243
243#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) 244#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
244 245
diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h
index 6d70e62c6bdc..e835a917ee19 100644
--- a/include/asm-x86/xsave.h
+++ b/include/asm-x86/xsave.h
@@ -17,10 +17,43 @@
17#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE) 17#define XCNTXT_LMASK (XSTATE_FP | XSTATE_SSE)
18#define XCNTXT_HMASK 0x0 18#define XCNTXT_HMASK 0x0
19 19
20#ifdef CONFIG_X86_64
21#define REX_PREFIX "0x48, "
22#else
23#define REX_PREFIX
24#endif
25
20extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask; 26extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask;
21extern struct xsave_struct *init_xstate_buf; 27extern struct xsave_struct *init_xstate_buf;
22 28
23extern void xsave_cntxt_init(void); 29extern void xsave_cntxt_init(void);
24extern void xsave_init(void); 30extern void xsave_init(void);
25 31extern int init_fpu(struct task_struct *child);
32
33static inline int xrstor_checking(struct xsave_struct *fx)
34{
35 int err;
36
37 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
38 "2:\n"
39 ".section .fixup,\"ax\"\n"
40 "3: movl $-1,%[err]\n"
41 " jmp 2b\n"
42 ".previous\n"
43 _ASM_EXTABLE(1b, 3b)
44 : [err] "=r" (err)
45 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
46 : "memory");
47
48 return err;
49}
50
51static inline void xsave(struct task_struct *tsk)
52{
53 /* This, however, we can work around by forcing the compiler to select
54 an addressing mode that doesn't require extended registers. */
55 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
56 : : "D" (&(tsk->thread.xstate->xsave)),
57 "a" (-1), "d"(-1) : "memory");
58}
26#endif 59#endif