x86-32: Rewrite 32-bit atomic64 functions in assembly

This patch replaces atomic64_32.c with two assembly implementations, one for 386/486 machines using pushf/cli/popf and one for 586+ machines using cmpxchg8b. The cmpxchg8b implementation provides the following advantages over the current one: 1. Implements atomic64_add_unless, atomic64_dec_if_positive and atomic64_inc_not_zero 2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison 3. Uses custom register calling conventions that reduce or eliminate register moves to suit cmpxchg8b 4. Reads the initial value instead of using cmpxchg8b to do that. Currently we use lock xaddl and movl, which seems the fastest. 5. Does not use the lock prefix for atomic64_set 64-bit writes are already atomic, so we don't need that. We still need it for atomic64_read to avoid restoring a value changed in the meantime. 6. Allocates registers as well or better than gcc The 386 implementation provides support for 386 and 486 machines. 386/486 SMP is not supported (we dropped it), but such support can be added easily if desired. A pure assembly implementation is required due to the custom calling conventions, and desire to use %ebp in atomic64_add_return (we need 7 registers...), as well as the ability to use pushf/popf in the 386 code without an intermediate pop/push. The parameter names are changed to match the convention in atomic_64.h Changes in v3 (due to rebasing to tip/x86/asm): - Patches atomic64_32.h instead of atomic_32.h - Uses the CALL alternative mechanism from commit 1b1d9258181bae199dc940f4bd0298126b9a73d9 Changes in v2: - Merged 386 and cx8 support in the same patch - 386 support now done in assembly, C code no longer used at all - cmpxchg64 is used for atomic64_cmpxchg - stop using macros, use one-line inline functions instead - miscellanous changes and improvements Signed-off-by: Luca Barbieri <luca@luca-barbieri.com> LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
author: Luca Barbieri <luca@luca-barbieri.com> 2010-02-24 04:54:25 -0500
committer: H. Peter Anvin <hpa@zytor.com> 2010-02-25 23:47:30 -0500
commit: a7e926abc3adfbd2e5e20d2b46177adb4e313915 (patch)
tree: a1b342c35a6fe39167927b5eb13e2422935deb8e /arch/x86/lib
parent: 86a8938078a8bb518c5376de493e348c7490d506 (diff)
4 files changed, 453 insertions, 223 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index cffd754f3039..05d686bbbe9f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,11 +26,12 @@ obj-y += msr.o msr-reg.o msr-reg-export.o
 ifeq ($(CONFIG_X86_32),y)
        obj-y += atomic64_32.o
+        lib-y += atomic64_cx8_32.o
        lib-y += checksum_32.o
        lib-y += strstr_32.o
        lib-y += semaphore_32.o string_32.o
 ifneq ($(CONFIG_X86_CMPXCHG64),y)
-        lib-y += cmpxchg8b_emu.o
+        lib-y += cmpxchg8b_emu.o atomic64_386_32.o
 endif
        lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
 else
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 824fa0be55a3..540179e8e9fa 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -6,225 +6,54 @@
 #include <asm/cmpxchg.h>
 #include <asm/atomic.h>
-static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)
+long long atomic64_read_cx8(long long, const atomic64_t *v);
-{
+EXPORT_SYMBOL(atomic64_read_cx8);
-        u32 low = new;
+long long atomic64_set_cx8(long long, const atomic64_t *v);
-        u32 high = new >> 32;
+EXPORT_SYMBOL(atomic64_set_cx8);
+long long atomic64_xchg_cx8(long long, unsigned high);
-        asm volatile(
+EXPORT_SYMBOL(atomic64_xchg_cx8);
-                LOCK_PREFIX "cmpxchg8b %1\n"
+long long atomic64_add_return_cx8(long long a, atomic64_t *v);
-                     : "+A" (old), "+m" (*ptr)
+EXPORT_SYMBOL(atomic64_add_return_cx8);
-                     :  "b" (low),  "c" (high)
+long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
-                     );
+EXPORT_SYMBOL(atomic64_sub_return_cx8);
-        return old;
+long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
-}
+EXPORT_SYMBOL(atomic64_inc_return_cx8);
+long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
-u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val)
+EXPORT_SYMBOL(atomic64_dec_return_cx8);
-{
+long long atomic64_dec_if_positive_cx8(atomic64_t *v);
-        return cmpxchg8b(&ptr->counter, old_val, new_val);
+EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
-}
+int atomic64_inc_not_zero_cx8(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_cmpxchg);
+EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
+int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
-/**
+EXPORT_SYMBOL(atomic64_add_unless_cx8);
- * atomic64_xchg - xchg atomic64 variable
- * @ptr:      pointer to type atomic64_t
+#ifndef CONFIG_X86_CMPXCHG64
- * @new_val:  value to assign
+long long atomic64_read_386(long long, const atomic64_t *v);
- *
+EXPORT_SYMBOL(atomic64_read_386);
- * Atomically xchgs the value of @ptr to @new_val and returns
+long long atomic64_set_386(long long, const atomic64_t *v);
- * the old value.
+EXPORT_SYMBOL(atomic64_set_386);
- */
+long long atomic64_xchg_386(long long, unsigned high);
-u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)
+EXPORT_SYMBOL(atomic64_xchg_386);
-{
+long long atomic64_add_return_386(long long a, atomic64_t *v);
-        /*
+EXPORT_SYMBOL(atomic64_add_return_386);
-         * Try first with a (possibly incorrect) assumption about
+long long atomic64_sub_return_386(long long a, atomic64_t *v);
-         * what we have there. We'll do two loops most likely,
+EXPORT_SYMBOL(atomic64_sub_return_386);
-         * but we'll get an ownership MESI transaction straight away
+long long atomic64_inc_return_386(long long a, atomic64_t *v);
-         * instead of a read transaction followed by a
+EXPORT_SYMBOL(atomic64_inc_return_386);
-         * flush-for-ownership transaction:
+long long atomic64_dec_return_386(long long a, atomic64_t *v);
-         */
+EXPORT_SYMBOL(atomic64_dec_return_386);
-        u64 old_val, real_val = 0;
+long long atomic64_add_386(long long a, atomic64_t *v);
+EXPORT_SYMBOL(atomic64_add_386);
-        do {
+long long atomic64_sub_386(long long a, atomic64_t *v);
-                old_val = real_val;
+EXPORT_SYMBOL(atomic64_sub_386);
+long long atomic64_inc_386(long long a, atomic64_t *v);
-                real_val = atomic64_cmpxchg(ptr, old_val, new_val);
+EXPORT_SYMBOL(atomic64_inc_386);
+long long atomic64_dec_386(long long a, atomic64_t *v);
-        } while (real_val != old_val);
+EXPORT_SYMBOL(atomic64_dec_386);
+long long atomic64_dec_if_positive_386(atomic64_t *v);
-        return old_val;
+EXPORT_SYMBOL(atomic64_dec_if_positive_386);
-}
+int atomic64_inc_not_zero_386(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_xchg);
+EXPORT_SYMBOL(atomic64_inc_not_zero_386);
+int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
-/**
+EXPORT_SYMBOL(atomic64_add_unless_386);
- * atomic64_set - set atomic64 variable
+#endif
- * @ptr:      pointer to type atomic64_t
- * @new_val:  value to assign
- *
- * Atomically sets the value of @ptr to @new_val.
- */
-void atomic64_set(atomic64_t *ptr, u64 new_val)
-{
-        atomic64_xchg(ptr, new_val);
-}
-EXPORT_SYMBOL(atomic64_set);
-/**
-EXPORT_SYMBOL(atomic64_read);
- * atomic64_add_return - add and return
- * @delta: integer value to add
- * @ptr:   pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr and returns @delta + *@ptr
- */
-noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
-{
-        /*
-         * Try first with a (possibly incorrect) assumption about
-         * what we have there. We'll do two loops most likely,
-         * but we'll get an ownership MESI transaction straight away
-         * instead of a read transaction followed by a
-         * flush-for-ownership transaction:
-         */
-        u64 old_val, new_val, real_val = 0;
-        do {
-                old_val = real_val;
-                new_val = old_val + delta;
-                real_val = atomic64_cmpxchg(ptr, old_val, new_val);
-        } while (real_val != old_val);
-        return new_val;
-}
-EXPORT_SYMBOL(atomic64_add_return);
-u64 atomic64_sub_return(u64 delta, atomic64_t *ptr)
-{
-        return atomic64_add_return(-delta, ptr);
-}
-EXPORT_SYMBOL(atomic64_sub_return);
-u64 atomic64_inc_return(atomic64_t *ptr)
-{
-        return atomic64_add_return(1, ptr);
-}
-EXPORT_SYMBOL(atomic64_inc_return);
-u64 atomic64_dec_return(atomic64_t *ptr)
-{
-        return atomic64_sub_return(1, ptr);
-}
-EXPORT_SYMBOL(atomic64_dec_return);
-/**
- * atomic64_add - add integer to atomic64 variable
- * @delta: integer value to add
- * @ptr:   pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr.
- */
-void atomic64_add(u64 delta, atomic64_t *ptr)
-{
-        atomic64_add_return(delta, ptr);
-}
-EXPORT_SYMBOL(atomic64_add);
-/**
- * atomic64_sub - subtract the atomic64 variable
- * @delta: integer value to subtract
- * @ptr:   pointer to type atomic64_t
- *
- * Atomically subtracts @delta from @ptr.
- */
-void atomic64_sub(u64 delta, atomic64_t *ptr)
-{
-        atomic64_add(-delta, ptr);
-}
-EXPORT_SYMBOL(atomic64_sub);
-/**
- * atomic64_sub_and_test - subtract value from variable and test result
- * @delta: integer value to subtract
- * @ptr:   pointer to type atomic64_t
- *
- * Atomically subtracts @delta from @ptr and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
-{
-        u64 new_val = atomic64_sub_return(delta, ptr);
-        return new_val == 0;
-}
-EXPORT_SYMBOL(atomic64_sub_and_test);
-/**
- * atomic64_inc - increment atomic64 variable
- * @ptr: pointer to type atomic64_t
- *
- * Atomically increments @ptr by 1.
- */
-void atomic64_inc(atomic64_t *ptr)
-{
-        atomic64_add(1, ptr);
-}
-EXPORT_SYMBOL(atomic64_inc);
-/**
- * atomic64_dec - decrement atomic64 variable
- * @ptr: pointer to type atomic64_t
- *
- * Atomically decrements @ptr by 1.
- */
-void atomic64_dec(atomic64_t *ptr)
-{
-        atomic64_sub(1, ptr);
-}
-EXPORT_SYMBOL(atomic64_dec);
-/**
- * atomic64_dec_and_test - decrement and test
- * @ptr: pointer to type atomic64_t
- *
- * Atomically decrements @ptr by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-int atomic64_dec_and_test(atomic64_t *ptr)
-{
-        return atomic64_sub_and_test(1, ptr);
-}
-EXPORT_SYMBOL(atomic64_dec_and_test);
-/**
- * atomic64_inc_and_test - increment and test
- * @ptr: pointer to type atomic64_t
- *
- * Atomically increments @ptr by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-int atomic64_inc_and_test(atomic64_t *ptr)
-{
-        return atomic64_sub_and_test(-1, ptr);
-}
-EXPORT_SYMBOL(atomic64_inc_and_test);
-/**
- * atomic64_add_negative - add and test if negative
- * @delta: integer value to add
- * @ptr:   pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-int atomic64_add_negative(u64 delta, atomic64_t *ptr)
-{
-        s64 new_val = atomic64_add_return(delta, ptr);
-        return new_val < 0;
-}
-EXPORT_SYMBOL(atomic64_add_negative);
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
new file mode 100644
index 000000000000..5db07fe4a0ca
--- /dev/null
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -0,0 +1,175 @@
+/*
+ * atomic64_t for 386/486
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+/* if you want SMP support, implement these with real spinlocks */
+.macro LOCK reg
+        pushfl
+        CFI_ADJUST_CFA_OFFSET 4
+        cli
+.endm
+.macro UNLOCK reg
+        popfl
+        CFI_ADJUST_CFA_OFFSET -4
+.endm
+.macro BEGIN func reg
+$v = \reg
+ENTRY(atomic64_\func\()_386)
+        CFI_STARTPROC
+        LOCK $v
+.macro RETURN
+        UNLOCK $v
+        ret
+.endm
+.macro END_
+        CFI_ENDPROC
+ENDPROC(atomic64_\func\()_386)
+.purgem RETURN
+.purgem END_
+.purgem END
+.endm
+.macro END
+RETURN
+END_
+.endm
+.endm
+BEGIN read %ecx
+        movl  ($v), %eax
+        movl 4($v), %edx
+END
+BEGIN set %esi
+        movl %ebx,  ($v)
+        movl %ecx, 4($v)
+END
+BEGIN xchg %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        movl %ebx,  ($v)
+        movl %ecx, 4($v)
+END
+BEGIN add %ecx
+        addl %eax,  ($v)
+        adcl %edx, 4($v)
+END
+BEGIN add_return %ecx
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN sub %ecx
+        subl %eax,  ($v)
+        sbbl %edx, 4($v)
+END
+BEGIN sub_return %ecx
+        negl %edx
+        negl %eax
+        sbbl $0, %edx
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN inc %esi
+        addl $1,  ($v)
+        adcl $0, 4($v)
+END
+BEGIN inc_return %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        addl $1, %eax
+        adcl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN dec %esi
+        subl $1,  ($v)
+        sbbl $0, 4($v)
+END
+BEGIN dec_return %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        subl $1, %eax
+        sbbl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+END
+BEGIN add_unless %ecx
+        addl %eax, %esi
+        adcl %edx, %edi
+        addl  ($v), %eax
+        adcl 4($v), %edx
+        cmpl %eax, %esi
+        je 3f
+1:
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+        xorl %eax, %eax
+2:
+RETURN
+3:
+        cmpl %edx, %edi
+        jne 1b
+        movl $1, %eax
+        jmp 2b
+END_
+BEGIN inc_not_zero %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        testl %eax, %eax
+        je 3f
+1:
+        addl $1, %eax
+        adcl $0, %edx
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+        xorl %eax, %eax
+2:
+RETURN
+3:
+        testl %edx, %edx
+        jne 1b
+        movl $1, %eax
+        jmp 2b
+END_
+BEGIN dec_if_positive %esi
+        movl  ($v), %eax
+        movl 4($v), %edx
+        subl $1, %eax
+        sbbl $0, %edx
+        js 1f
+        movl %eax,  ($v)
+        movl %edx, 4($v)
+1:
+END
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
new file mode 100644
index 000000000000..e49c4ebca9f4
--- /dev/null
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -0,0 +1,225 @@
+/*
+ * atomic64_t for 586+
+ *
+ * Copyright © 2010  Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/dwarf2.h>
+.macro SAVE reg
+        pushl %\reg
+        CFI_ADJUST_CFA_OFFSET 4
+        CFI_REL_OFFSET \reg, 0
+.endm
+.macro RESTORE reg
+        popl %\reg
+        CFI_ADJUST_CFA_OFFSET -4
+        CFI_RESTORE \reg
+.endm
+.macro read64 reg
+        movl %ebx, %eax
+        movl %ecx, %edx
+/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
+        LOCK_PREFIX
+        cmpxchg8b (\reg)
+.endm
+ENTRY(atomic64_read_cx8)
+        CFI_STARTPROC
+        read64 %ecx
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_read_cx8)
+ENTRY(atomic64_set_cx8)
+        CFI_STARTPROC
+1:
+/* we don't need LOCK_PREFIX since aligned 64-bit writes
+ * are atomic on 586 and newer */
+        cmpxchg8b (%esi)
+        jne 1b
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_set_cx8)
+ENTRY(atomic64_xchg_cx8)
+        CFI_STARTPROC
+        movl %ebx, %eax
+        movl %ecx, %edx
+1:
+        LOCK_PREFIX
+        cmpxchg8b (%esi)
+        jne 1b
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_xchg_cx8)
+.macro addsub_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+        CFI_STARTPROC
+        SAVE ebp
+        SAVE ebx
+        SAVE esi
+        SAVE edi
+        movl %eax, %esi
+        movl %edx, %edi
+        movl %ecx, %ebp
+        read64 %ebp
+1:
+        movl %eax, %ebx
+        movl %edx, %ecx
+        \ins\()l %esi, %ebx
+        \insc\()l %edi, %ecx
+        LOCK_PREFIX
+        cmpxchg8b (%ebp)
+        jne 1b
+10:
+        movl %ebx, %eax
+        movl %ecx, %edx
+        RESTORE edi
+        RESTORE esi
+        RESTORE ebx
+        RESTORE ebp
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+addsub_return add add adc
+addsub_return sub sub sbb
+.macro incdec_return func ins insc
+ENTRY(atomic64_\func\()_return_cx8)
+        CFI_STARTPROC
+        SAVE ebx
+        read64 %esi
+1:
+        movl %eax, %ebx
+        movl %edx, %ecx
+        \ins\()l $1, %ebx
+        \insc\()l $0, %ecx
+        LOCK_PREFIX
+        cmpxchg8b (%esi)
+        jne 1b
+10:
+        movl %ebx, %eax
+        movl %ecx, %edx
+        RESTORE ebx
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_\func\()_return_cx8)
+.endm
+incdec_return inc add adc
+incdec_return dec sub sbb
+ENTRY(atomic64_dec_if_positive_cx8)
+        CFI_STARTPROC
+        SAVE ebx
+        read64 %esi
+1:
+        movl %eax, %ebx
+        movl %edx, %ecx
+        subl $1, %ebx
+        sbb $0, %ecx
+        js 2f
+        LOCK_PREFIX
+        cmpxchg8b (%esi)
+        jne 1b
+2:
+        movl %ebx, %eax
+        movl %ecx, %edx
+        RESTORE ebx
+        ret
+        CFI_ENDPROC
+ENDPROC(atomic64_dec_if_positive_cx8)
+ENTRY(atomic64_add_unless_cx8)
+        CFI_STARTPROC
+        SAVE ebp
+        SAVE ebx
+/* these just push these two parameters on the stack */
+        SAVE edi
+        SAVE esi
+        movl %ecx, %ebp
+        movl %eax, %esi
+        movl %edx, %edi
+        read64 %ebp
+1:
+        cmpl %eax, 0(%esp)
+        je 4f
+2:
+        movl %eax, %ebx
+        movl %edx, %ecx
+        addl %esi, %ebx
+        adcl %edi, %ecx
+        LOCK_PREFIX
+        cmpxchg8b (%ebp)
+        jne 1b
+        xorl %eax, %eax
+3:
+        addl $8, %esp
+        CFI_ADJUST_CFA_OFFSET -8
+        RESTORE ebx
+        RESTORE ebp
+        ret
+4:
+        cmpl %edx, 4(%esp)
+        jne 2b
+        movl $1, %eax
+        jmp 3b
+        CFI_ENDPROC
+ENDPROC(atomic64_add_unless_cx8)
+ENTRY(atomic64_inc_not_zero_cx8)
+        CFI_STARTPROC
+        SAVE ebx
+        read64 %esi
+1:
+        testl %eax, %eax
+        je 4f
+2:
+        movl %eax, %ebx
+        movl %edx, %ecx
+        addl $1, %ebx
+        adcl $0, %ecx
+        LOCK_PREFIX
+        cmpxchg8b (%esi)
+        jne 1b
+        xorl %eax, %eax
+3:
+        RESTORE ebx
+        ret
+4:
+        testl %edx, %edx
+        jne 2b
+        movl $1, %eax
+        jmp 3b
+        CFI_ENDPROC
+ENDPROC(atomic64_inc_not_zero_cx8)
author	Luca Barbieri <luca@luca-barbieri.com>	2010-02-24 04:54:25 -0500
committer	H. Peter Anvin <hpa@zytor.com>	2010-02-25 23:47:30 -0500
commit	a7e926abc3adfbd2e5e20d2b46177adb4e313915 (patch)
tree	a1b342c35a6fe39167927b5eb13e2422935deb8e /arch/x86/lib
parent	86a8938078a8bb518c5376de493e348c7490d506 (diff)

diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..05d686bbbe9f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile
@@ -26,11 +26,12 @@ obj-y += msr.o msr-reg.o msr-reg-export.o
26		26
27	ifeq ($(CONFIG_X86_32),y)	27	ifeq ($(CONFIG_X86_32),y)
28	obj-y += atomic64_32.o	28	obj-y += atomic64_32.o
		29	lib-y += atomic64_cx8_32.o
29	lib-y += checksum_32.o	30	lib-y += checksum_32.o
30	lib-y += strstr_32.o	31	lib-y += strstr_32.o
31	lib-y += semaphore_32.o string_32.o	32	lib-y += semaphore_32.o string_32.o
32	ifneq ($(CONFIG_X86_CMPXCHG64),y)	33	ifneq ($(CONFIG_X86_CMPXCHG64),y)
33	lib-y += cmpxchg8b_emu.o	34	lib-y += cmpxchg8b_emu.o atomic64_386_32.o
34	endif	35	endif
35	lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o	36	lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
36	else	37	else


diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c
@@ -6,225 +6,54 @@
6	#include <asm/cmpxchg.h>	6	#include <asm/cmpxchg.h>
7	#include <asm/atomic.h>	7	#include <asm/atomic.h>
8		8
9	static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new)	9	long long atomic64_read_cx8(long long, const atomic64_t *v);
10	{	10	EXPORT_SYMBOL(atomic64_read_cx8);
11	u32 low = new;	11	long long atomic64_set_cx8(long long, const atomic64_t *v);
12	u32 high = new >> 32;	12	EXPORT_SYMBOL(atomic64_set_cx8);
13		13	long long atomic64_xchg_cx8(long long, unsigned high);
14	asm volatile(	14	EXPORT_SYMBOL(atomic64_xchg_cx8);
15	LOCK_PREFIX "cmpxchg8b %1\n"	15	long long atomic64_add_return_cx8(long long a, atomic64_t *v);
16	: "+A" (old), "+m" (*ptr)	16	EXPORT_SYMBOL(atomic64_add_return_cx8);
17	: "b" (low), "c" (high)	17	long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
18	);	18	EXPORT_SYMBOL(atomic64_sub_return_cx8);
19	return old;	19	long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
20	}	20	EXPORT_SYMBOL(atomic64_inc_return_cx8);
21		21	long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
22	u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val)	22	EXPORT_SYMBOL(atomic64_dec_return_cx8);
23	{	23	long long atomic64_dec_if_positive_cx8(atomic64_t *v);
24	return cmpxchg8b(&ptr->counter, old_val, new_val);	24	EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
25	}	25	int atomic64_inc_not_zero_cx8(atomic64_t *v);
26	EXPORT_SYMBOL(atomic64_cmpxchg);	26	EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
27		27	int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
28	/**	28	EXPORT_SYMBOL(atomic64_add_unless_cx8);
29	* atomic64_xchg - xchg atomic64 variable	29
30	* @ptr: pointer to type atomic64_t	30	#ifndef CONFIG_X86_CMPXCHG64
31	* @new_val: value to assign	31	long long atomic64_read_386(long long, const atomic64_t *v);
32	*	32	EXPORT_SYMBOL(atomic64_read_386);
33	* Atomically xchgs the value of @ptr to @new_val and returns	33	long long atomic64_set_386(long long, const atomic64_t *v);
34	* the old value.	34	EXPORT_SYMBOL(atomic64_set_386);
35	*/	35	long long atomic64_xchg_386(long long, unsigned high);
36	u64 atomic64_xchg(atomic64_t *ptr, u64 new_val)	36	EXPORT_SYMBOL(atomic64_xchg_386);
37	{	37	long long atomic64_add_return_386(long long a, atomic64_t *v);
38	/*	38	EXPORT_SYMBOL(atomic64_add_return_386);
39	* Try first with a (possibly incorrect) assumption about	39	long long atomic64_sub_return_386(long long a, atomic64_t *v);
40	* what we have there. We'll do two loops most likely,	40	EXPORT_SYMBOL(atomic64_sub_return_386);
41	* but we'll get an ownership MESI transaction straight away	41	long long atomic64_inc_return_386(long long a, atomic64_t *v);
42	* instead of a read transaction followed by a	42	EXPORT_SYMBOL(atomic64_inc_return_386);
43	* flush-for-ownership transaction:	43	long long atomic64_dec_return_386(long long a, atomic64_t *v);
44	*/	44	EXPORT_SYMBOL(atomic64_dec_return_386);
45	u64 old_val, real_val = 0;	45	long long atomic64_add_386(long long a, atomic64_t *v);
46		46	EXPORT_SYMBOL(atomic64_add_386);
47	do {	47	long long atomic64_sub_386(long long a, atomic64_t *v);
48	old_val = real_val;	48	EXPORT_SYMBOL(atomic64_sub_386);
49		49	long long atomic64_inc_386(long long a, atomic64_t *v);
50	real_val = atomic64_cmpxchg(ptr, old_val, new_val);	50	EXPORT_SYMBOL(atomic64_inc_386);
51		51	long long atomic64_dec_386(long long a, atomic64_t *v);
52	} while (real_val != old_val);	52	EXPORT_SYMBOL(atomic64_dec_386);
53		53	long long atomic64_dec_if_positive_386(atomic64_t *v);
54	return old_val;	54	EXPORT_SYMBOL(atomic64_dec_if_positive_386);
55	}	55	int atomic64_inc_not_zero_386(atomic64_t *v);
56	EXPORT_SYMBOL(atomic64_xchg);	56	EXPORT_SYMBOL(atomic64_inc_not_zero_386);
57		57	int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
58	/**	58	EXPORT_SYMBOL(atomic64_add_unless_386);
59	* atomic64_set - set atomic64 variable	59	#endif
60	* @ptr: pointer to type atomic64_t
61	* @new_val: value to assign
62	*
63	* Atomically sets the value of @ptr to @new_val.
64	*/
65	void atomic64_set(atomic64_t *ptr, u64 new_val)
66	{
67	atomic64_xchg(ptr, new_val);
68	}
69	EXPORT_SYMBOL(atomic64_set);
70
71	/**
72	EXPORT_SYMBOL(atomic64_read);
73	* atomic64_add_return - add and return
74	* @delta: integer value to add
75	* @ptr: pointer to type atomic64_t
76	*
77	* Atomically adds @delta to @ptr and returns @delta + *@ptr
78	*/
79	noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr)
80	{
81	/*
82	* Try first with a (possibly incorrect) assumption about
83	* what we have there. We'll do two loops most likely,
84	* but we'll get an ownership MESI transaction straight away
85	* instead of a read transaction followed by a
86	* flush-for-ownership transaction:
87	*/
88	u64 old_val, new_val, real_val = 0;
89
90	do {
91	old_val = real_val;
92	new_val = old_val + delta;
93
94	real_val = atomic64_cmpxchg(ptr, old_val, new_val);
95
96	} while (real_val != old_val);
97
98	return new_val;
99	}
100	EXPORT_SYMBOL(atomic64_add_return);
101
102	u64 atomic64_sub_return(u64 delta, atomic64_t *ptr)
103	{
104	return atomic64_add_return(-delta, ptr);
105	}
106	EXPORT_SYMBOL(atomic64_sub_return);
107
108	u64 atomic64_inc_return(atomic64_t *ptr)
109	{
110	return atomic64_add_return(1, ptr);
111	}
112	EXPORT_SYMBOL(atomic64_inc_return);
113
114	u64 atomic64_dec_return(atomic64_t *ptr)
115	{
116	return atomic64_sub_return(1, ptr);
117	}
118	EXPORT_SYMBOL(atomic64_dec_return);
119
120	/**
121	* atomic64_add - add integer to atomic64 variable
122	* @delta: integer value to add
123	* @ptr: pointer to type atomic64_t
124	*
125	* Atomically adds @delta to @ptr.
126	*/
127	void atomic64_add(u64 delta, atomic64_t *ptr)
128	{
129	atomic64_add_return(delta, ptr);
130	}
131	EXPORT_SYMBOL(atomic64_add);
132
133	/**
134	* atomic64_sub - subtract the atomic64 variable
135	* @delta: integer value to subtract
136	* @ptr: pointer to type atomic64_t
137	*
138	* Atomically subtracts @delta from @ptr.
139	*/
140	void atomic64_sub(u64 delta, atomic64_t *ptr)
141	{
142	atomic64_add(-delta, ptr);
143	}
144	EXPORT_SYMBOL(atomic64_sub);
145
146	/**
147	* atomic64_sub_and_test - subtract value from variable and test result
148	* @delta: integer value to subtract
149	* @ptr: pointer to type atomic64_t
150	*
151	* Atomically subtracts @delta from @ptr and returns
152	* true if the result is zero, or false for all
153	* other cases.
154	*/
155	int atomic64_sub_and_test(u64 delta, atomic64_t *ptr)
156	{
157	u64 new_val = atomic64_sub_return(delta, ptr);
158
159	return new_val == 0;
160	}
161	EXPORT_SYMBOL(atomic64_sub_and_test);
162
163	/**
164	* atomic64_inc - increment atomic64 variable
165	* @ptr: pointer to type atomic64_t
166	*
167	* Atomically increments @ptr by 1.
168	*/
169	void atomic64_inc(atomic64_t *ptr)
170	{
171	atomic64_add(1, ptr);
172	}
173	EXPORT_SYMBOL(atomic64_inc);
174
175	/**
176	* atomic64_dec - decrement atomic64 variable
177	* @ptr: pointer to type atomic64_t
178	*
179	* Atomically decrements @ptr by 1.
180	*/
181	void atomic64_dec(atomic64_t *ptr)
182	{
183	atomic64_sub(1, ptr);
184	}
185	EXPORT_SYMBOL(atomic64_dec);
186
187	/**
188	* atomic64_dec_and_test - decrement and test
189	* @ptr: pointer to type atomic64_t
190	*
191	* Atomically decrements @ptr by 1 and
192	* returns true if the result is 0, or false for all other
193	* cases.
194	*/
195	int atomic64_dec_and_test(atomic64_t *ptr)
196	{
197	return atomic64_sub_and_test(1, ptr);
198	}
199	EXPORT_SYMBOL(atomic64_dec_and_test);
200
201	/**
202	* atomic64_inc_and_test - increment and test
203	* @ptr: pointer to type atomic64_t
204	*
205	* Atomically increments @ptr by 1
206	* and returns true if the result is zero, or false for all
207	* other cases.
208	*/
209	int atomic64_inc_and_test(atomic64_t *ptr)
210	{
211	return atomic64_sub_and_test(-1, ptr);
212	}
213	EXPORT_SYMBOL(atomic64_inc_and_test);
214
215	/**
216	* atomic64_add_negative - add and test if negative
217	* @delta: integer value to add
218	* @ptr: pointer to type atomic64_t
219	*
220	* Atomically adds @delta to @ptr and returns true
221	* if the result is negative, or false when
222	* result is greater than or equal to zero.
223	*/
224	int atomic64_add_negative(u64 delta, atomic64_t *ptr)
225	{
226	s64 new_val = atomic64_add_return(delta, ptr);
227
228	return new_val < 0;
229	}
230	EXPORT_SYMBOL(atomic64_add_negative);


diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..5db07fe4a0ca --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S
@@ -0,0 +1,175 @@
		1	/*
		2	* atomic64_t for 386/486
		3	*
		4	* Copyright © 2010 Luca Barbieri
		5	*
		6	* This program is free software; you can redistribute it and/or modify
		7	* it under the terms of the GNU General Public License as published by
		8	* the Free Software Foundation; either version 2 of the License, or
		9	* (at your option) any later version.
		10	*/
		11
		12	#include <linux/linkage.h>
		13	#include <asm/alternative-asm.h>
		14	#include <asm/dwarf2.h>
		15
		16	/* if you want SMP support, implement these with real spinlocks */
		17	.macro LOCK reg
		18	pushfl
		19	CFI_ADJUST_CFA_OFFSET 4
		20	cli
		21	.endm
		22
		23	.macro UNLOCK reg
		24	popfl
		25	CFI_ADJUST_CFA_OFFSET -4
		26	.endm
		27
		28	.macro BEGIN func reg
		29	$v = \reg
		30
		31	ENTRY(atomic64_\func\()_386)
		32	CFI_STARTPROC
		33	LOCK $v
		34
		35	.macro RETURN
		36	UNLOCK $v
		37	ret
		38	.endm
		39
		40	.macro END_
		41	CFI_ENDPROC
		42	ENDPROC(atomic64_\func\()_386)
		43	.purgem RETURN
		44	.purgem END_
		45	.purgem END
		46	.endm
		47
		48	.macro END
		49	RETURN
		50	END_
		51	.endm
		52	.endm
		53
		54	BEGIN read %ecx
		55	movl ($v), %eax
		56	movl 4($v), %edx
		57	END
		58
		59	BEGIN set %esi
		60	movl %ebx, ($v)
		61	movl %ecx, 4($v)
		62	END
		63
		64	BEGIN xchg %esi
		65	movl ($v), %eax
		66	movl 4($v), %edx
		67	movl %ebx, ($v)
		68	movl %ecx, 4($v)
		69	END
		70
		71	BEGIN add %ecx
		72	addl %eax, ($v)
		73	adcl %edx, 4($v)
		74	END
		75
		76	BEGIN add_return %ecx
		77	addl ($v), %eax
		78	adcl 4($v), %edx
		79	movl %eax, ($v)
		80	movl %edx, 4($v)
		81	END
		82
		83	BEGIN sub %ecx
		84	subl %eax, ($v)
		85	sbbl %edx, 4($v)
		86	END
		87
		88	BEGIN sub_return %ecx
		89	negl %edx
		90	negl %eax
		91	sbbl $0, %edx
		92	addl ($v), %eax
		93	adcl 4($v), %edx
		94	movl %eax, ($v)
		95	movl %edx, 4($v)
		96	END
		97
		98	BEGIN inc %esi
		99	addl $1, ($v)
		100	adcl $0, 4($v)
		101	END
		102
		103	BEGIN inc_return %esi
		104	movl ($v), %eax
		105	movl 4($v), %edx
		106	addl $1, %eax
		107	adcl $0, %edx
		108	movl %eax, ($v)
		109	movl %edx, 4($v)
		110	END
		111
		112	BEGIN dec %esi
		113	subl $1, ($v)
		114	sbbl $0, 4($v)
		115	END
		116
		117	BEGIN dec_return %esi
		118	movl ($v), %eax
		119	movl 4($v), %edx
		120	subl $1, %eax
		121	sbbl $0, %edx
		122	movl %eax, ($v)
		123	movl %edx, 4($v)
		124	END
		125
		126	BEGIN add_unless %ecx
		127	addl %eax, %esi
		128	adcl %edx, %edi
		129	addl ($v), %eax
		130	adcl 4($v), %edx
		131	cmpl %eax, %esi
		132	je 3f
		133	1:
		134	movl %eax, ($v)
		135	movl %edx, 4($v)
		136	xorl %eax, %eax
		137	2:
		138	RETURN
		139	3:
		140	cmpl %edx, %edi
		141	jne 1b
		142	movl $1, %eax
		143	jmp 2b
		144	END_
		145
		146	BEGIN inc_not_zero %esi
		147	movl ($v), %eax
		148	movl 4($v), %edx
		149	testl %eax, %eax
		150	je 3f
		151	1:
		152	addl $1, %eax
		153	adcl $0, %edx
		154	movl %eax, ($v)
		155	movl %edx, 4($v)
		156	xorl %eax, %eax
		157	2:
		158	RETURN
		159	3:
		160	testl %edx, %edx
		161	jne 1b
		162	movl $1, %eax
		163	jmp 2b
		164	END_
		165
		166	BEGIN dec_if_positive %esi
		167	movl ($v), %eax
		168	movl 4($v), %edx
		169	subl $1, %eax
		170	sbbl $0, %edx
		171	js 1f
		172	movl %eax, ($v)
		173	movl %edx, 4($v)
		174	1:
		175	END


diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..e49c4ebca9f4 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -0,0 +1,225 @@
		1	/*
		2	* atomic64_t for 586+
		3	*
		4	* Copyright © 2010 Luca Barbieri
		5	*
		6	* This program is free software; you can redistribute it and/or modify
		7	* it under the terms of the GNU General Public License as published by
		8	* the Free Software Foundation; either version 2 of the License, or
		9	* (at your option) any later version.
		10	*/
		11
		12	#include <linux/linkage.h>
		13	#include <asm/alternative-asm.h>
		14	#include <asm/dwarf2.h>
		15
		16	.macro SAVE reg
		17	pushl %\reg
		18	CFI_ADJUST_CFA_OFFSET 4
		19	CFI_REL_OFFSET \reg, 0
		20	.endm
		21
		22	.macro RESTORE reg
		23	popl %\reg
		24	CFI_ADJUST_CFA_OFFSET -4
		25	CFI_RESTORE \reg
		26	.endm
		27
		28	.macro read64 reg
		29	movl %ebx, %eax
		30	movl %ecx, %edx
		31	/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */
		32	LOCK_PREFIX
		33	cmpxchg8b (\reg)
		34	.endm
		35
		36	ENTRY(atomic64_read_cx8)
		37	CFI_STARTPROC
		38
		39	read64 %ecx
		40	ret
		41	CFI_ENDPROC
		42	ENDPROC(atomic64_read_cx8)
		43
		44	ENTRY(atomic64_set_cx8)
		45	CFI_STARTPROC
		46
		47	1:
		48	/* we don't need LOCK_PREFIX since aligned 64-bit writes
		49	* are atomic on 586 and newer */
		50	cmpxchg8b (%esi)
		51	jne 1b
		52
		53	ret
		54	CFI_ENDPROC
		55	ENDPROC(atomic64_set_cx8)
		56
		57	ENTRY(atomic64_xchg_cx8)
		58	CFI_STARTPROC
		59
		60	movl %ebx, %eax
		61	movl %ecx, %edx
		62	1:
		63	LOCK_PREFIX
		64	cmpxchg8b (%esi)
		65	jne 1b
		66
		67	ret
		68	CFI_ENDPROC
		69	ENDPROC(atomic64_xchg_cx8)
		70
		71	.macro addsub_return func ins insc
		72	ENTRY(atomic64_\func\()_return_cx8)
		73	CFI_STARTPROC
		74	SAVE ebp
		75	SAVE ebx
		76	SAVE esi
		77	SAVE edi
		78
		79	movl %eax, %esi
		80	movl %edx, %edi
		81	movl %ecx, %ebp
		82
		83	read64 %ebp
		84	1:
		85	movl %eax, %ebx
		86	movl %edx, %ecx
		87	\ins\()l %esi, %ebx
		88	\insc\()l %edi, %ecx
		89	LOCK_PREFIX
		90	cmpxchg8b (%ebp)
		91	jne 1b
		92
		93	10:
		94	movl %ebx, %eax
		95	movl %ecx, %edx
		96	RESTORE edi
		97	RESTORE esi
		98	RESTORE ebx
		99	RESTORE ebp
		100	ret
		101	CFI_ENDPROC
		102	ENDPROC(atomic64_\func\()_return_cx8)
		103	.endm
		104
		105	addsub_return add add adc
		106	addsub_return sub sub sbb
		107
		108	.macro incdec_return func ins insc
		109	ENTRY(atomic64_\func\()_return_cx8)
		110	CFI_STARTPROC
		111	SAVE ebx
		112
		113	read64 %esi
		114	1:
		115	movl %eax, %ebx
		116	movl %edx, %ecx
		117	\ins\()l $1, %ebx
		118	\insc\()l $0, %ecx
		119	LOCK_PREFIX
		120	cmpxchg8b (%esi)
		121	jne 1b
		122
		123	10:
		124	movl %ebx, %eax
		125	movl %ecx, %edx
		126	RESTORE ebx
		127	ret
		128	CFI_ENDPROC
		129	ENDPROC(atomic64_\func\()_return_cx8)
		130	.endm
		131
		132	incdec_return inc add adc
		133	incdec_return dec sub sbb
		134
		135	ENTRY(atomic64_dec_if_positive_cx8)
		136	CFI_STARTPROC
		137	SAVE ebx
		138
		139	read64 %esi
		140	1:
		141	movl %eax, %ebx
		142	movl %edx, %ecx
		143	subl $1, %ebx
		144	sbb $0, %ecx
		145	js 2f
		146	LOCK_PREFIX
		147	cmpxchg8b (%esi)
		148	jne 1b
		149
		150	2:
		151	movl %ebx, %eax
		152	movl %ecx, %edx
		153	RESTORE ebx
		154	ret
		155	CFI_ENDPROC
		156	ENDPROC(atomic64_dec_if_positive_cx8)
		157
		158	ENTRY(atomic64_add_unless_cx8)
		159	CFI_STARTPROC
		160	SAVE ebp
		161	SAVE ebx
		162	/* these just push these two parameters on the stack */
		163	SAVE edi
		164	SAVE esi
		165
		166	movl %ecx, %ebp
		167	movl %eax, %esi
		168	movl %edx, %edi
		169
		170	read64 %ebp
		171	1:
		172	cmpl %eax, 0(%esp)
		173	je 4f
		174	2:
		175	movl %eax, %ebx
		176	movl %edx, %ecx
		177	addl %esi, %ebx
		178	adcl %edi, %ecx
		179	LOCK_PREFIX
		180	cmpxchg8b (%ebp)
		181	jne 1b
		182
		183	xorl %eax, %eax
		184	3:
		185	addl $8, %esp
		186	CFI_ADJUST_CFA_OFFSET -8
		187	RESTORE ebx
		188	RESTORE ebp
		189	ret
		190	4:
		191	cmpl %edx, 4(%esp)
		192	jne 2b
		193	movl $1, %eax
		194	jmp 3b
		195	CFI_ENDPROC
		196	ENDPROC(atomic64_add_unless_cx8)
		197
		198	ENTRY(atomic64_inc_not_zero_cx8)
		199	CFI_STARTPROC
		200	SAVE ebx
		201
		202	read64 %esi
		203	1:
		204	testl %eax, %eax
		205	je 4f
		206	2:
		207	movl %eax, %ebx
		208	movl %edx, %ecx
		209	addl $1, %ebx
		210	adcl $0, %ecx
		211	LOCK_PREFIX
		212	cmpxchg8b (%esi)
		213	jne 1b
		214
		215	xorl %eax, %eax
		216	3:
		217	RESTORE ebx
		218	ret
		219	4:
		220	testl %edx, %edx
		221	jne 2b
		222	movl $1, %eax
		223	jmp 3b
		224	CFI_ENDPROC
		225	ENDPROC(atomic64_inc_not_zero_cx8)