diff options
author | Suresh Siddha <suresh.b.siddha@intel.com> | 2008-07-29 13:29:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-30 13:49:24 -0400 |
commit | b359e8a434cc3d09847010fc4aeccf48d69740e4 (patch) | |
tree | 8911c299dc1768c78d5452a1e7e0efd2fc8d5abb /include/asm-x86/i387.h | |
parent | dc1e35c6e95e8923cf1d3510438b63c600fee1e2 (diff) |
x86, xsave: context switch support using xsave/xrstor
Uses xsave/xrstor (instead of traditional fxsave/fxrstor) in context switch
when available.
Introduces TS_XSAVE flag, which determine the need to use xsave/xrstor
instructions during context switch instead of the legacy fxsave/fxrstor
instructions. Thread-synchronous status word is already in L1 cache during
this code patch and thus minimizes the performance penality compared to
(cpu_has_xsave) checks.
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/asm-x86/i387.h')
-rw-r--r-- | include/asm-x86/i387.h | 64 |
1 files changed, 59 insertions, 5 deletions
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 6a6647896670..a6d256f4ac81 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h | |||
@@ -36,6 +36,8 @@ extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); | |||
36 | extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); | 36 | extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); |
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | ||
40 | |||
39 | #ifdef CONFIG_X86_64 | 41 | #ifdef CONFIG_X86_64 |
40 | 42 | ||
41 | /* Ignore delayed exceptions from user space */ | 43 | /* Ignore delayed exceptions from user space */ |
@@ -46,7 +48,7 @@ static inline void tolerant_fwait(void) | |||
46 | _ASM_EXTABLE(1b, 2b)); | 48 | _ASM_EXTABLE(1b, 2b)); |
47 | } | 49 | } |
48 | 50 | ||
49 | static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) | 51 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) |
50 | { | 52 | { |
51 | int err; | 53 | int err; |
52 | 54 | ||
@@ -66,15 +68,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) | |||
66 | return err; | 68 | return err; |
67 | } | 69 | } |
68 | 70 | ||
69 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 71 | static inline int restore_fpu_checking(struct task_struct *tsk) |
72 | { | ||
73 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
74 | return xrstor_checking(&tsk->thread.xstate->xsave); | ||
75 | else | ||
76 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | ||
77 | } | ||
70 | 78 | ||
71 | /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception | 79 | /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception |
72 | is pending. Clear the x87 state here by setting it to fixed | 80 | is pending. Clear the x87 state here by setting it to fixed |
73 | values. The kernel data segment can be sometimes 0 and sometimes | 81 | values. The kernel data segment can be sometimes 0 and sometimes |
74 | new user value. Both should be ok. | 82 | new user value. Both should be ok. |
75 | Use the PDA as safe address because it should be already in L1. */ | 83 | Use the PDA as safe address because it should be already in L1. */ |
76 | static inline void clear_fpu_state(struct i387_fxsave_struct *fx) | 84 | static inline void clear_fpu_state(struct task_struct *tsk) |
77 | { | 85 | { |
86 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | ||
87 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | ||
88 | |||
89 | /* | ||
90 | * xsave header may indicate the init state of the FP. | ||
91 | */ | ||
92 | if ((task_thread_info(tsk)->status & TS_XSAVE) && | ||
93 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | ||
94 | return; | ||
95 | |||
78 | if (unlikely(fx->swd & X87_FSW_ES)) | 96 | if (unlikely(fx->swd & X87_FSW_ES)) |
79 | asm volatile("fnclex"); | 97 | asm volatile("fnclex"); |
80 | alternative_input(ASM_NOP8 ASM_NOP2, | 98 | alternative_input(ASM_NOP8 ASM_NOP2, |
@@ -107,7 +125,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) | |||
107 | return err; | 125 | return err; |
108 | } | 126 | } |
109 | 127 | ||
110 | static inline void __save_init_fpu(struct task_struct *tsk) | 128 | static inline void fxsave(struct task_struct *tsk) |
111 | { | 129 | { |
112 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 130 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
113 | uses any extended registers for addressing, a second REX prefix | 131 | uses any extended registers for addressing, a second REX prefix |
@@ -132,7 +150,16 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
132 | : "=m" (tsk->thread.xstate->fxsave) | 150 | : "=m" (tsk->thread.xstate->fxsave) |
133 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); | 151 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); |
134 | #endif | 152 | #endif |
135 | clear_fpu_state(&tsk->thread.xstate->fxsave); | 153 | } |
154 | |||
155 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
156 | { | ||
157 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
158 | xsave(tsk); | ||
159 | else | ||
160 | fxsave(tsk); | ||
161 | |||
162 | clear_fpu_state(tsk); | ||
136 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 163 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
137 | } | 164 | } |
138 | 165 | ||
@@ -147,6 +174,10 @@ static inline void tolerant_fwait(void) | |||
147 | 174 | ||
148 | static inline void restore_fpu(struct task_struct *tsk) | 175 | static inline void restore_fpu(struct task_struct *tsk) |
149 | { | 176 | { |
177 | if (task_thread_info(tsk)->status & TS_XSAVE) { | ||
178 | xrstor_checking(&tsk->thread.xstate->xsave); | ||
179 | return; | ||
180 | } | ||
150 | /* | 181 | /* |
151 | * The "nop" is needed to make the instructions the same | 182 | * The "nop" is needed to make the instructions the same |
152 | * length. | 183 | * length. |
@@ -172,6 +203,27 @@ static inline void restore_fpu(struct task_struct *tsk) | |||
172 | */ | 203 | */ |
173 | static inline void __save_init_fpu(struct task_struct *tsk) | 204 | static inline void __save_init_fpu(struct task_struct *tsk) |
174 | { | 205 | { |
206 | if (task_thread_info(tsk)->status & TS_XSAVE) { | ||
207 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | ||
208 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | ||
209 | |||
210 | xsave(tsk); | ||
211 | |||
212 | /* | ||
213 | * xsave header may indicate the init state of the FP. | ||
214 | */ | ||
215 | if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | ||
216 | goto end; | ||
217 | |||
218 | if (unlikely(fx->swd & X87_FSW_ES)) | ||
219 | asm volatile("fnclex"); | ||
220 | |||
221 | /* | ||
222 | * we can do a simple return here or be paranoid :) | ||
223 | */ | ||
224 | goto clear_state; | ||
225 | } | ||
226 | |||
175 | /* Use more nops than strictly needed in case the compiler | 227 | /* Use more nops than strictly needed in case the compiler |
176 | varies code */ | 228 | varies code */ |
177 | alternative_input( | 229 | alternative_input( |
@@ -181,6 +233,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
181 | X86_FEATURE_FXSR, | 233 | X86_FEATURE_FXSR, |
182 | [fx] "m" (tsk->thread.xstate->fxsave), | 234 | [fx] "m" (tsk->thread.xstate->fxsave), |
183 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | 235 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); |
236 | clear_state: | ||
184 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 237 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
185 | is pending. Clear the x87 state here by setting it to fixed | 238 | is pending. Clear the x87 state here by setting it to fixed |
186 | values. safe_address is a random variable that should be in L1 */ | 239 | values. safe_address is a random variable that should be in L1 */ |
@@ -190,6 +243,7 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
190 | "fildl %[addr]", /* set F?P to defined value */ | 243 | "fildl %[addr]", /* set F?P to defined value */ |
191 | X86_FEATURE_FXSAVE_LEAK, | 244 | X86_FEATURE_FXSAVE_LEAK, |
192 | [addr] "m" (safe_address)); | 245 | [addr] "m" (safe_address)); |
246 | end: | ||
193 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 247 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
194 | } | 248 | } |
195 | 249 | ||