aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/cmpxchg_32.h
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2010-07-28 02:29:52 -0400
committerH. Peter Anvin <hpa@zytor.com>2010-07-28 02:29:52 -0400
commit69309a05907546fb686b251d4ab041c26afe1e1d (patch)
treeada2b711139dfec0ba6bd820e7840138af4d5bfa /arch/x86/include/asm/cmpxchg_32.h
parentd3608b5681d238605b7da6be62244e803e24c649 (diff)
x86, asm: Clean up and simplify set_64bit()
Clean up and simplify set_64bit(). This code is quite old (1.3.11) and contains a fair bit of auxilliary machinery that current versions of gcc handle just fine automatically. Worse, the auxilliary machinery can actually cause an unnecessary spill to memory. Furthermore, the loading of the old value inside the loop in the 32-bit case is unnecessary: if the value doesn't match, the CMPXCHG8B instruction will already have loaded the "new previous" value for us. Clean up the comment, too, and remove page references to obsolete versions of the Intel SDM. Signed-off-by: H. Peter Anvin <hpa@zytor.com> LKML-Reference: <tip-*@vger.kernel.org>
Diffstat (limited to 'arch/x86/include/asm/cmpxchg_32.h')
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h67
1 files changed, 20 insertions, 47 deletions
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index c1cf59d72f09..20955ea7bc12 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -53,60 +53,33 @@ struct __xchg_dummy {
53 __xchg((v), (ptr), sizeof(*ptr)) 53 __xchg((v), (ptr), sizeof(*ptr))
54 54
55/* 55/*
56 * The semantics of XCHGCMP8B are a bit strange, this is why 56 * CMPXCHG8B only writes to the target if we had the previous
57 * there is a loop and the loading of %%eax and %%edx has to 57 * value in registers, otherwise it acts as a read and gives us the
58 * be inside. This inlines well in most cases, the cached 58 * "new previous" value. That is why there is a loop. Preloading
59 * cost is around ~38 cycles. (in the future we might want 59 * EDX:EAX is a performance optimization: in the common case it means
60 * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that 60 * we need only one locked operation.
61 * might have an implicit FPU-save as a cost, so it's not
62 * clear which path to go.)
63 * 61 *
64 * cmpxchg8b must be used with the lock prefix here to allow 62 * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
65 * the instruction to be executed atomically, see page 3-102 63 * least an FPU save and/or %cr0.ts manipulation.
66 * of the instruction set reference 24319102.pdf. We need 64 *
67 * the reader side to see the coherent 64bit value. 65 * cmpxchg8b must be used with the lock prefix here to allow the
66 * instruction to be executed atomically. We need to have the reader
67 * side to see the coherent 64bit value.
68 */ 68 */
69static inline void __set_64bit(unsigned long long *ptr, 69static inline void set_64bit(volatile u64 *ptr, u64 value)
70 unsigned int low, unsigned int high)
71{ 70{
71 u32 low = value;
72 u32 high = value >> 32;
73 u64 prev = *ptr;
74
72 asm volatile("\n1:\t" 75 asm volatile("\n1:\t"
73 "movl (%1), %%eax\n\t" 76 LOCK_PREFIX "cmpxchg8b %0\n\t"
74 "movl 4(%1), %%edx\n\t"
75 LOCK_PREFIX "cmpxchg8b (%1)\n\t"
76 "jnz 1b" 77 "jnz 1b"
77 : "=m" (*ptr) 78 : "=m" (*ptr), "+A" (prev)
78 : "D" (ptr), 79 : "b" (low), "c" (high)
79 "b" (low), 80 : "memory");
80 "c" (high)
81 : "ax", "dx", "memory");
82}
83
84static inline void __set_64bit_constant(unsigned long long *ptr,
85 unsigned long long value)
86{
87 __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32));
88}
89
90#define ll_low(x) *(((unsigned int *)&(x)) + 0)
91#define ll_high(x) *(((unsigned int *)&(x)) + 1)
92
93static inline void __set_64bit_var(unsigned long long *ptr,
94 unsigned long long value)
95{
96 __set_64bit(ptr, ll_low(value), ll_high(value));
97} 81}
98 82
99#define set_64bit(ptr, value) \
100 (__builtin_constant_p((value)) \
101 ? __set_64bit_constant((ptr), (value)) \
102 : __set_64bit_var((ptr), (value)))
103
104#define _set_64bit(ptr, value) \
105 (__builtin_constant_p(value) \
106 ? __set_64bit(ptr, (unsigned int)(value), \
107 (unsigned int)((value) >> 32)) \
108 : __set_64bit(ptr, ll_low((value)), ll_high((value))))
109
110extern void __cmpxchg_wrong_size(void); 83extern void __cmpxchg_wrong_size(void);
111 84
112/* 85/*