aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@suse.com>2012-01-20 11:22:04 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2012-01-20 20:29:49 -0500
commitcb8095bba6d24118135a5683a956f4f4fb5f17bb (patch)
tree25eff3732e8471e314591d0bc6ea41d96857c18b /arch/x86/lib
parent819165fb34b9777f852429f2c6d6f79fbb71b9eb (diff)
x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied into %eax and %edx respectively (this is only necessary when desiring to only read the stored value). In the "add_unless" implementation, swapping the use of %ecx and %esi for passing arguments allows %esi to become an input only (i.e. permitting the register to be re-used to address the same object without reload). In "{add,sub}_return", doing the initial read64 through the passed in %ecx decreases a register dependency. In "inc_not_zero", a branch can be eliminated by or-ing together the two halves of the current (64-bit) value, and code size can be further reduced by adjusting the arithmetic slightly. v2: Undo the folding of "xchg" and "set". Signed-off-by: Jan Beulich <jbeulich@suse.com> Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com Cc: Luca Barbieri <luca@luca-barbieri.com> Cc: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S29
2 files changed, 14 insertions, 21 deletions
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e8e7e0d06f42..00933d5e992f 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -137,13 +137,13 @@ BEGIN(dec_return)
137RET_ENDP 137RET_ENDP
138#undef v 138#undef v
139 139
140#define v %ecx 140#define v %esi
141BEGIN(add_unless) 141BEGIN(add_unless)
142 addl %eax, %esi 142 addl %eax, %ecx
143 adcl %edx, %edi 143 adcl %edx, %edi
144 addl (v), %eax 144 addl (v), %eax
145 adcl 4(v), %edx 145 adcl 4(v), %edx
146 cmpl %eax, %esi 146 cmpl %eax, %ecx
147 je 3f 147 je 3f
1481: 1481:
149 movl %eax, (v) 149 movl %eax, (v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083674b4..f5cc9eb1d51b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
55ENTRY(atomic64_xchg_cx8) 55ENTRY(atomic64_xchg_cx8)
56 CFI_STARTPROC 56 CFI_STARTPROC
57 57
58 movl %ebx, %eax
59 movl %ecx, %edx
601: 581:
61 LOCK_PREFIX 59 LOCK_PREFIX
62 cmpxchg8b (%esi) 60 cmpxchg8b (%esi)
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
78 movl %edx, %edi 76 movl %edx, %edi
79 movl %ecx, %ebp 77 movl %ecx, %ebp
80 78
81 read64 %ebp 79 read64 %ecx
821: 801:
83 movl %eax, %ebx 81 movl %eax, %ebx
84 movl %edx, %ecx 82 movl %edx, %ecx
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
159 SAVE ebx 157 SAVE ebx
160/* these just push these two parameters on the stack */ 158/* these just push these two parameters on the stack */
161 SAVE edi 159 SAVE edi
162 SAVE esi 160 SAVE ecx
163 161
164 movl %ecx, %ebp 162 movl %eax, %ebp
165 movl %eax, %esi
166 movl %edx, %edi 163 movl %edx, %edi
167 164
168 read64 %ebp 165 read64 %esi
1691: 1661:
170 cmpl %eax, 0(%esp) 167 cmpl %eax, 0(%esp)
171 je 4f 168 je 4f
1722: 1692:
173 movl %eax, %ebx 170 movl %eax, %ebx
174 movl %edx, %ecx 171 movl %edx, %ecx
175 addl %esi, %ebx 172 addl %ebp, %ebx
176 adcl %edi, %ecx 173 adcl %edi, %ecx
177 LOCK_PREFIX 174 LOCK_PREFIX
178 cmpxchg8b (%ebp) 175 cmpxchg8b (%esi)
179 jne 1b 176 jne 1b
180 177
181 movl $1, %eax 178 movl $1, %eax
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
199 196
200 read64 %esi 197 read64 %esi
2011: 1981:
202 testl %eax, %eax 199 movl %eax, %ecx
203 je 4f 200 orl %edx, %ecx
2042: 201 jz 3f
205 movl %eax, %ebx 202 movl %eax, %ebx
206 movl %edx, %ecx 203 xorl %ecx, %ecx
207 addl $1, %ebx 204 addl $1, %ebx
208 adcl $0, %ecx 205 adcl %edx, %ecx
209 LOCK_PREFIX 206 LOCK_PREFIX
210 cmpxchg8b (%esi) 207 cmpxchg8b (%esi)
211 jne 1b 208 jne 1b
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
2143: 2113:
215 RESTORE ebx 212 RESTORE ebx
216 ret 213 ret
2174:
218 testl %edx, %edx
219 jne 2b
220 jmp 3b
221 CFI_ENDPROC 214 CFI_ENDPROC
222ENDPROC(atomic64_inc_not_zero_cx8) 215ENDPROC(atomic64_inc_not_zero_cx8)