x86-32: Rewrite 32-bit atomic64 functions in assembly

This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
author: Luca Barbieri <luca@luca-barbieri.com> 2010-02-24 04:54:25 -0500
committer: H. Peter Anvin <hpa@zytor.com> 2010-02-25 23:47:30 -0500
commit: a7e926abc3adfbd2e5e20d2b46177adb4e313915 (patch)
tree: a1b342c35a6fe39167927b5eb13e2422935deb8e /arch/x86/lib/atomic64_386_32.S
parent: 86a8938078a8bb518c5376de493e348c7490d506 (diff)
1 files changed, 175 insertions, 0 deletions
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
new file mode 100644
index 000000000000..5db07fe4a0ca
--- /dev/null
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -0,0 +1,175 @@
+/*
+ * atomic64_t for 386/486
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+/* if you want SMP support, implement these with real spinlocks */
+.macro LOCK reg
+        pushfl
+        CFI_ADJUST_CFA_OFFSET 4
+        cli
+.endm
+.macro UNLOCK reg
+        popfl
+        CFI_ADJUST_CFA_OFFSET -4
+.endm
+.macro BEGIN func reg
+$v = \reg
+ENTRY(atomic64_\func\()_386)
+        CFI_STARTPROC
+        LOCK $v
+.macro RETURN
+        UNLOCK $v
+        ret
+.endm
+.macro END_
+        CFI_ENDPROC
+ENDPROC(atomic64_\func\()_386)
+.purgem RETURN
+.purgem END_
+.purgem END
+.endm
+.macro END
+RETURN
+END_
+.endm
+.endm
+BEGIN read %ecx
+        movl  ($v), %eax
+        movl 4($v), %edx
+END
+BEGIN set %esi
+        movl %ebx,  ($v)
+        movl %ecx, 4($v)
+END
+BEGIN xchg %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        movl %ebx,  ($v)
+        movl %ecx, 4($v)
+END
+BEGIN add %ecx
+        addl %eax,  ($v)
+        adcl %edx, 4($v)
+END
+BEGIN add_return %ecx
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN sub %ecx
+        subl %eax,  ($v)
+        sbbl %edx, 4($v)
+END
+BEGIN sub_return %ecx
+        negl %edx
+        negl %eax
+        sbbl $0, %edx
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN inc %esi
+        addl $1,  ($v)
+        adcl $0, 4($v)
+END
+BEGIN inc_return %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        addl $1, %eax
+        adcl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN dec %esi
+        subl $1,  ($v)
+        sbbl $0, 4($v)
+END
+BEGIN dec_return %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        subl $1, %eax
+        sbbl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN add_unless %ecx
+        addl %eax, %esi
+        adcl %edx, %edi
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        cmpl %eax, %esi
+        je 3f
+1:
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+        xorl %eax, %eax
+2:
+RETURN
+3:
+        cmpl %edx, %edi
+        jne 1b
+        movl $1, %eax
+        jmp 2b
+END_
+BEGIN inc_not_zero %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        testl %eax, %eax
+        je 3f
+1:
+        addl $1, %eax
+        adcl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+        xorl %eax, %eax
+2:
+RETURN
+3:
+        testl %edx, %edx
+        jne 1b
+        movl $1, %eax
+        jmp 2b
+END_
+BEGIN dec_if_positive %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        subl $1, %eax
+        sbbl $0, %edx
+        js 1f
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+1:
+END
author	Luca Barbieri <luca@luca-barbieri.com>	2010-02-24 04:54:25 -0500
committer	H. Peter Anvin <hpa@zytor.com>	2010-02-25 23:47:30 -0500
commit	a7e926abc3adfbd2e5e20d2b46177adb4e313915 (patch)
tree	a1b342c35a6fe39167927b5eb13e2422935deb8e /arch/x86/lib/atomic64_386_32.S
parent	86a8938078a8bb518c5376de493e348c7490d506 (diff)

diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..5db07fe4a0ca --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S
@@ -0,0 +1,175 @@
	1	/*
	2	* atomic64_t for 386/486
	3	*
	4	* Copyright © 2010 Luca Barbieri
	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU General Public License as published by
	8	* the Free Software Foundation; either version 2 of the License, or
	9	* (at your option) any later version.
	10	*/
	11
	12	#include <linux/linkage.h>
	13	#include <asm/alternative-asm.h>
	14	#include <asm/dwarf2.h>
	15
	16	/* if you want SMP support, implement these with real spinlocks */
	17	.macro LOCK reg
	18	pushfl
	19	CFI_ADJUST_CFA_OFFSET 4
	20	cli
	21	.endm
	22
	23	.macro UNLOCK reg
	24	popfl
	25	CFI_ADJUST_CFA_OFFSET -4
	26	.endm
	27
	28	.macro BEGIN func reg
	29	$v = \reg
	30
	31	ENTRY(atomic64_\func\()_386)
	32	CFI_STARTPROC
	33	LOCK $v
	34
	35	.macro RETURN
	36	UNLOCK $v
	37	ret
	38	.endm
	39
	40	.macro END_
	41	CFI_ENDPROC
	42	ENDPROC(atomic64_\func\()_386)
	43	.purgem RETURN
	44	.purgem END_
	45	.purgem END
	46	.endm
	47
	48	.macro END
	49	RETURN
	50	END_
	51	.endm
	52	.endm
	53
	54	BEGIN read %ecx
	55	movl ($v), %eax
	56	movl 4($v), %edx
	57	END
	58
	59	BEGIN set %esi
	60	movl %ebx, ($v)
	61	movl %ecx, 4($v)
	62	END
	63
	64	BEGIN xchg %esi
	65	movl ($v), %eax
	66	movl 4($v), %edx
	67	movl %ebx, ($v)
	68	movl %ecx, 4($v)
	69	END
	70
	71	BEGIN add %ecx
	72	addl %eax, ($v)
	73	adcl %edx, 4($v)
	74	END
	75
	76	BEGIN add_return %ecx
	77	addl ($v), %eax
	78	adcl 4($v), %edx
	79	movl %eax, ($v)
	80	movl %edx, 4($v)
	81	END
	82
	83	BEGIN sub %ecx
	84	subl %eax, ($v)
	85	sbbl %edx, 4($v)
	86	END
	87
	88	BEGIN sub_return %ecx
	89	negl %edx
	90	negl %eax
	91	sbbl $0, %edx
	92	addl ($v), %eax
	93	adcl 4($v), %edx
	94	movl %eax, ($v)
	95	movl %edx, 4($v)
	96	END
	97
	98	BEGIN inc %esi
	99	addl $1, ($v)
	100	adcl $0, 4($v)
	101	END
	102
	103	BEGIN inc_return %esi
	104	movl ($v), %eax
	105	movl 4($v), %edx
	106	addl $1, %eax
	107	adcl $0, %edx
	108	movl %eax, ($v)
	109	movl %edx, 4($v)
	110	END
	111
	112	BEGIN dec %esi
	113	subl $1, ($v)
	114	sbbl $0, 4($v)
	115	END
	116
	117	BEGIN dec_return %esi
	118	movl ($v), %eax
	119	movl 4($v), %edx
	120	subl $1, %eax
	121	sbbl $0, %edx
	122	movl %eax, ($v)
	123	movl %edx, 4($v)
	124	END
	125
	126	BEGIN add_unless %ecx
	127	addl %eax, %esi
	128	adcl %edx, %edi
	129	addl ($v), %eax
	130	adcl 4($v), %edx
	131	cmpl %eax, %esi
	132	je 3f
	133	1:
	134	movl %eax, ($v)
	135	movl %edx, 4($v)
	136	xorl %eax, %eax
	137	2:
	138	RETURN
	139	3:
	140	cmpl %edx, %edi
	141	jne 1b
	142	movl $1, %eax
	143	jmp 2b
	144	END_
	145
	146	BEGIN inc_not_zero %esi
	147	movl ($v), %eax
	148	movl 4($v), %edx
	149	testl %eax, %eax
	150	je 3f
	151	1:
	152	addl $1, %eax
	153	adcl $0, %edx
	154	movl %eax, ($v)
	155	movl %edx, 4($v)
	156	xorl %eax, %eax
	157	2:
	158	RETURN
	159	3:
	160	testl %edx, %edx
	161	jne 1b
	162	movl $1, %eax
	163	jmp 2b
	164	END_
	165
	166	BEGIN dec_if_positive %esi
	167	movl ($v), %eax
	168	movl 4($v), %edx
	169	subl $1, %eax
	170	sbbl $0, %edx
	171	js 1f
	172	movl %eax, ($v)
	173	movl %edx, 4($v)
	174	1:
	175	END