diff options
author | Jan Beulich <jbeulich@novell.com> | 2006-01-11 16:43:36 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:04:51 -0500 |
commit | 7180d4fb83085fef9d24b353f5bd79cf6fd98447 (patch) | |
tree | 0b2dde2d788ee264fbf02f7f7cfb30288c73feee | |
parent | e99286744599a66195de4cd975d7ef4d643c2789 (diff) |
[PATCH] x86_64: Fix 64bit FXSAVE encoding
The separation of the rex64 prefix (on fxsave/fxrstor) by way of using
a semicolon resulted in the prefix not always taking effect (because
when extended registers are needed for addressing, another rex prefix
would have been generated by the compiler), thus (depending on the
build) resulting in eventually getting 32-bit saves and/or restores.
Signed-Off-By: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/asm-x86_64/i387.h | 58 |
1 files changed, 48 insertions, 10 deletions
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h index aa39cfd0e001..57f7e1433849 100644 --- a/include/asm-x86_64/i387.h +++ b/include/asm-x86_64/i387.h | |||
@@ -75,7 +75,8 @@ extern int set_fpregs(struct task_struct *tsk, | |||
75 | static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) | 75 | static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) |
76 | { | 76 | { |
77 | int err; | 77 | int err; |
78 | asm volatile("1: rex64 ; fxrstor (%[fx])\n\t" | 78 | |
79 | asm volatile("1: rex64/fxrstor (%[fx])\n\t" | ||
79 | "2:\n" | 80 | "2:\n" |
80 | ".section .fixup,\"ax\"\n" | 81 | ".section .fixup,\"ax\"\n" |
81 | "3: movl $-1,%[err]\n" | 82 | "3: movl $-1,%[err]\n" |
@@ -86,7 +87,11 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) | |||
86 | " .quad 1b,3b\n" | 87 | " .quad 1b,3b\n" |
87 | ".previous" | 88 | ".previous" |
88 | : [err] "=r" (err) | 89 | : [err] "=r" (err) |
89 | : [fx] "r" (fx), "0" (0)); | 90 | #if 0 /* See comment in __fxsave_clear() below. */ |
91 | : [fx] "r" (fx), "m" (*fx), "0" (0)); | ||
92 | #else | ||
93 | : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); | ||
94 | #endif | ||
90 | if (unlikely(err)) | 95 | if (unlikely(err)) |
91 | init_fpu(current); | 96 | init_fpu(current); |
92 | return err; | 97 | return err; |
@@ -95,7 +100,8 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) | |||
95 | static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) | 100 | static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) |
96 | { | 101 | { |
97 | int err; | 102 | int err; |
98 | asm volatile("1: rex64 ; fxsave (%[fx])\n\t" | 103 | |
104 | asm volatile("1: rex64/fxsave (%[fx])\n\t" | ||
99 | "2:\n" | 105 | "2:\n" |
100 | ".section .fixup,\"ax\"\n" | 106 | ".section .fixup,\"ax\"\n" |
101 | "3: movl $-1,%[err]\n" | 107 | "3: movl $-1,%[err]\n" |
@@ -105,20 +111,53 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) | |||
105 | " .align 8\n" | 111 | " .align 8\n" |
106 | " .quad 1b,3b\n" | 112 | " .quad 1b,3b\n" |
107 | ".previous" | 113 | ".previous" |
108 | : [err] "=r" (err) | 114 | : [err] "=r" (err), "=m" (*fx) |
109 | : [fx] "r" (fx), "0" (0)); | 115 | #if 0 /* See comment in __fxsave_clear() below. */ |
116 | : [fx] "r" (fx), "0" (0)); | ||
117 | #else | ||
118 | : [fx] "cdaSDb" (fx), "0" (0)); | ||
119 | #endif | ||
110 | if (unlikely(err)) | 120 | if (unlikely(err)) |
111 | __clear_user(fx, sizeof(struct i387_fxsave_struct)); | 121 | __clear_user(fx, sizeof(struct i387_fxsave_struct)); |
112 | return err; | 122 | return err; |
113 | } | 123 | } |
114 | 124 | ||
125 | static inline void __fxsave_clear(struct task_struct *tsk) | ||
126 | { | ||
127 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | ||
128 | uses any extended registers for addressing, a second REX prefix | ||
129 | will be generated (to the assembler, rex64 followed by semicolon | ||
130 | is a separate instruction), and hence the 64-bitness is lost. */ | ||
131 | #if 0 | ||
132 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | ||
133 | starting with gas 2.16. */ | ||
134 | __asm__ __volatile__("fxsaveq %0" | ||
135 | : "=m" (tsk->thread.i387.fxsave)); | ||
136 | #elif 0 | ||
137 | /* Using, as a workaround, the properly prefixed form below isn't | ||
138 | accepted by any binutils version so far released, complaining that | ||
139 | the same type of prefix is used twice if an extended register is | ||
140 | needed for addressing (fix submitted to mainline 2005-11-21). */ | ||
141 | __asm__ __volatile__("rex64/fxsave %0" | ||
142 | : "=m" (tsk->thread.i387.fxsave)); | ||
143 | #else | ||
144 | /* This, however, we can work around by forcing the compiler to select | ||
145 | an addressing mode that doesn't require extended registers. */ | ||
146 | __asm__ __volatile__("rex64/fxsave %P2(%1)" | ||
147 | : "=m" (tsk->thread.i387.fxsave) | ||
148 | : "cdaSDb" (tsk), | ||
149 | "i" (offsetof(__typeof__(*tsk), | ||
150 | thread.i387.fxsave))); | ||
151 | #endif | ||
152 | __asm__ __volatile__("fnclex"); | ||
153 | } | ||
154 | |||
115 | static inline void kernel_fpu_begin(void) | 155 | static inline void kernel_fpu_begin(void) |
116 | { | 156 | { |
117 | struct thread_info *me = current_thread_info(); | 157 | struct thread_info *me = current_thread_info(); |
118 | preempt_disable(); | 158 | preempt_disable(); |
119 | if (me->status & TS_USEDFPU) { | 159 | if (me->status & TS_USEDFPU) { |
120 | asm volatile("rex64 ; fxsave %0 ; fnclex" | 160 | __fxsave_clear(me->task); |
121 | : "=m" (me->task->thread.i387.fxsave)); | ||
122 | me->status &= ~TS_USEDFPU; | 161 | me->status &= ~TS_USEDFPU; |
123 | return; | 162 | return; |
124 | } | 163 | } |
@@ -133,8 +172,7 @@ static inline void kernel_fpu_end(void) | |||
133 | 172 | ||
134 | static inline void save_init_fpu( struct task_struct *tsk ) | 173 | static inline void save_init_fpu( struct task_struct *tsk ) |
135 | { | 174 | { |
136 | asm volatile( "rex64 ; fxsave %0 ; fnclex" | 175 | __fxsave_clear(tsk); |
137 | : "=m" (tsk->thread.i387.fxsave)); | ||
138 | tsk->thread_info->status &= ~TS_USEDFPU; | 176 | tsk->thread_info->status &= ~TS_USEDFPU; |
139 | stts(); | 177 | stts(); |
140 | } | 178 | } |