diff options
author | Jan Beulich <JBeulich@suse.com> | 2012-01-20 11:22:04 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-20 20:29:49 -0500 |
commit | cb8095bba6d24118135a5683a956f4f4fb5f17bb (patch) | |
tree | 25eff3732e8471e314591d0bc6ea41d96857c18b /arch/x86/lib | |
parent | 819165fb34b9777f852429f2c6d6f79fbb71b9eb (diff) |
x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied
into %eax and %edx respectively (this is only necessary when desiring
to only read the stored value).
In the "add_unless" implementation, swapping the use of %ecx and %esi
for passing arguments allows %esi to become an input only (i.e.
permitting the register to be re-used to address the same object
without reload).
In "{add,sub}_return", doing the initial read64 through the passed in
%ecx decreases a register dependency.
In "inc_not_zero", a branch can be eliminated by or-ing together the
two halves of the current (64-bit) value, and code size can be further
reduced by adjusting the arithmetic slightly.
v2: Undo the folding of "xchg" and "set".
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/atomic64_386_32.S | 6 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 29 |
2 files changed, 14 insertions, 21 deletions
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index e8e7e0d06f42..00933d5e992f 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S | |||
@@ -137,13 +137,13 @@ BEGIN(dec_return) | |||
137 | RET_ENDP | 137 | RET_ENDP |
138 | #undef v | 138 | #undef v |
139 | 139 | ||
140 | #define v %ecx | 140 | #define v %esi |
141 | BEGIN(add_unless) | 141 | BEGIN(add_unless) |
142 | addl %eax, %esi | 142 | addl %eax, %ecx |
143 | adcl %edx, %edi | 143 | adcl %edx, %edi |
144 | addl (v), %eax | 144 | addl (v), %eax |
145 | adcl 4(v), %edx | 145 | adcl 4(v), %edx |
146 | cmpl %eax, %esi | 146 | cmpl %eax, %ecx |
147 | je 3f | 147 | je 3f |
148 | 1: | 148 | 1: |
149 | movl %eax, (v) | 149 | movl %eax, (v) |
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 391a083674b4..f5cc9eb1d51b 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8) | |||
55 | ENTRY(atomic64_xchg_cx8) | 55 | ENTRY(atomic64_xchg_cx8) |
56 | CFI_STARTPROC | 56 | CFI_STARTPROC |
57 | 57 | ||
58 | movl %ebx, %eax | ||
59 | movl %ecx, %edx | ||
60 | 1: | 58 | 1: |
61 | LOCK_PREFIX | 59 | LOCK_PREFIX |
62 | cmpxchg8b (%esi) | 60 | cmpxchg8b (%esi) |
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8) | |||
78 | movl %edx, %edi | 76 | movl %edx, %edi |
79 | movl %ecx, %ebp | 77 | movl %ecx, %ebp |
80 | 78 | ||
81 | read64 %ebp | 79 | read64 %ecx |
82 | 1: | 80 | 1: |
83 | movl %eax, %ebx | 81 | movl %eax, %ebx |
84 | movl %edx, %ecx | 82 | movl %edx, %ecx |
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8) | |||
159 | SAVE ebx | 157 | SAVE ebx |
160 | /* these just push these two parameters on the stack */ | 158 | /* these just push these two parameters on the stack */ |
161 | SAVE edi | 159 | SAVE edi |
162 | SAVE esi | 160 | SAVE ecx |
163 | 161 | ||
164 | movl %ecx, %ebp | 162 | movl %eax, %ebp |
165 | movl %eax, %esi | ||
166 | movl %edx, %edi | 163 | movl %edx, %edi |
167 | 164 | ||
168 | read64 %ebp | 165 | read64 %esi |
169 | 1: | 166 | 1: |
170 | cmpl %eax, 0(%esp) | 167 | cmpl %eax, 0(%esp) |
171 | je 4f | 168 | je 4f |
172 | 2: | 169 | 2: |
173 | movl %eax, %ebx | 170 | movl %eax, %ebx |
174 | movl %edx, %ecx | 171 | movl %edx, %ecx |
175 | addl %esi, %ebx | 172 | addl %ebp, %ebx |
176 | adcl %edi, %ecx | 173 | adcl %edi, %ecx |
177 | LOCK_PREFIX | 174 | LOCK_PREFIX |
178 | cmpxchg8b (%ebp) | 175 | cmpxchg8b (%esi) |
179 | jne 1b | 176 | jne 1b |
180 | 177 | ||
181 | movl $1, %eax | 178 | movl $1, %eax |
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8) | |||
199 | 196 | ||
200 | read64 %esi | 197 | read64 %esi |
201 | 1: | 198 | 1: |
202 | testl %eax, %eax | 199 | movl %eax, %ecx |
203 | je 4f | 200 | orl %edx, %ecx |
204 | 2: | 201 | jz 3f |
205 | movl %eax, %ebx | 202 | movl %eax, %ebx |
206 | movl %edx, %ecx | 203 | xorl %ecx, %ecx |
207 | addl $1, %ebx | 204 | addl $1, %ebx |
208 | adcl $0, %ecx | 205 | adcl %edx, %ecx |
209 | LOCK_PREFIX | 206 | LOCK_PREFIX |
210 | cmpxchg8b (%esi) | 207 | cmpxchg8b (%esi) |
211 | jne 1b | 208 | jne 1b |
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8) | |||
214 | 3: | 211 | 3: |
215 | RESTORE ebx | 212 | RESTORE ebx |
216 | ret | 213 | ret |
217 | 4: | ||
218 | testl %edx, %edx | ||
219 | jne 2b | ||
220 | jmp 3b | ||
221 | CFI_ENDPROC | 214 | CFI_ENDPROC |
222 | ENDPROC(atomic64_inc_not_zero_cx8) | 215 | ENDPROC(atomic64_inc_not_zero_cx8) |