aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/atomic64_386_32.S
diff options
context:
space:
mode:
authorLuca Barbieri <luca@luca-barbieri.com>2010-02-24 04:54:25 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-25 23:47:30 -0500
commita7e926abc3adfbd2e5e20d2b46177adb4e313915 (patch)
treea1b342c35a6fe39167927b5eb13e2422935deb8e /arch/x86/lib/atomic64_386_32.S
parent86a8938078a8bb518c5376de493e348c7490d506 (diff)
x86-32: Rewrite 32-bit atomic64 functions in assembly
This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/lib/atomic64_386_32.S')
-rw-r--r--arch/x86/lib/atomic64_386_32.S175
1 files changed, 175 insertions, 0 deletions
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
new file mode 100644
index 000000000000..5db07fe4a0ca
--- /dev/null
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -0,0 +1,175 @@
1/*
2 * atomic64_t for 386/486
3 *
4 * Copyright © 2010 Luca Barbieri
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <linux/linkage.h>
13#include <asm/alternative-asm.h>
14#include <asm/dwarf2.h>
15
16/* if you want SMP support, implement these with real spinlocks */
17.macro LOCK reg
18 pushfl
19 CFI_ADJUST_CFA_OFFSET 4
20 cli
21.endm
22
23.macro UNLOCK reg
24 popfl
25 CFI_ADJUST_CFA_OFFSET -4
26.endm
27
28.macro BEGIN func reg
29$v = \reg
30
31ENTRY(atomic64_\func\()_386)
32 CFI_STARTPROC
33 LOCK $v
34
35.macro RETURN
36 UNLOCK $v
37 ret
38.endm
39
40.macro END_
41 CFI_ENDPROC
42ENDPROC(atomic64_\func\()_386)
43.purgem RETURN
44.purgem END_
45.purgem END
46.endm
47
48.macro END
49RETURN
50END_
51.endm
52.endm
53
54BEGIN read %ecx
55 movl ($v), %eax
56 movl 4($v), %edx
57END
58
59BEGIN set %esi
60 movl %ebx, ($v)
61 movl %ecx, 4($v)
62END
63
64BEGIN xchg %esi
65 movl ($v), %eax
66 movl 4($v), %edx
67 movl %ebx, ($v)
68 movl %ecx, 4($v)
69END
70
71BEGIN add %ecx
72 addl %eax, ($v)
73 adcl %edx, 4($v)
74END
75
76BEGIN add_return %ecx
77 addl ($v), %eax
78 adcl 4($v), %edx
79 movl %eax, ($v)
80 movl %edx, 4($v)
81END
82
83BEGIN sub %ecx
84 subl %eax, ($v)
85 sbbl %edx, 4($v)
86END
87
88BEGIN sub_return %ecx
89 negl %edx
90 negl %eax
91 sbbl $0, %edx
92 addl ($v), %eax
93 adcl 4($v), %edx
94 movl %eax, ($v)
95 movl %edx, 4($v)
96END
97
98BEGIN inc %esi
99 addl $1, ($v)
100 adcl $0, 4($v)
101END
102
103BEGIN inc_return %esi
104 movl ($v), %eax
105 movl 4($v), %edx
106 addl $1, %eax
107 adcl $0, %edx
108 movl %eax, ($v)
109 movl %edx, 4($v)
110END
111
112BEGIN dec %esi
113 subl $1, ($v)
114 sbbl $0, 4($v)
115END
116
117BEGIN dec_return %esi
118 movl ($v), %eax
119 movl 4($v), %edx
120 subl $1, %eax
121 sbbl $0, %edx
122 movl %eax, ($v)
123 movl %edx, 4($v)
124END
125
126BEGIN add_unless %ecx
127 addl %eax, %esi
128 adcl %edx, %edi
129 addl ($v), %eax
130 adcl 4($v), %edx
131 cmpl %eax, %esi
132 je 3f
1331:
134 movl %eax, ($v)
135 movl %edx, 4($v)
136 xorl %eax, %eax
1372:
138RETURN
1393:
140 cmpl %edx, %edi
141 jne 1b
142 movl $1, %eax
143 jmp 2b
144END_
145
146BEGIN inc_not_zero %esi
147 movl ($v), %eax
148 movl 4($v), %edx
149 testl %eax, %eax
150 je 3f
1511:
152 addl $1, %eax
153 adcl $0, %edx
154 movl %eax, ($v)
155 movl %edx, 4($v)
156 xorl %eax, %eax
1572:
158RETURN
1593:
160 testl %edx, %edx
161 jne 1b
162 movl $1, %eax
163 jmp 2b
164END_
165
166BEGIN dec_if_positive %esi
167 movl ($v), %eax
168 movl 4($v), %edx
169 subl $1, %eax
170 sbbl $0, %edx
171 js 1f
172 movl %eax, ($v)
173 movl %edx, 4($v)
1741:
175END