x86: create asm/cmpxchg.h

i386: Rearrange the cmpxchg code to allow atomic.h to get it without needing to include system.h. This kills warnings in the UML build from atomic.h about implicit declarations of cmpxchg symbols. The i386 build presumably isn't seeing this because a separate inclusion of system.h is covering it over. The cmpxchg stuff is moved to asm-i386/cmpxchg.h, with an include left in system.h for the benefit of generic code which expects cmpxchg there. Meanwhile, atomic.h includes cmpxchg.h. This causes no noticable damage to the i386 build. x86_64: Move cmpxchg into its own header. atomic.h already included system.h, so this is changed to include cmpxchg.h. This is purely cleanup - it's not fixing any warnings - so if the x86_64 system.h isn't considered as cleanup-worthy as i386, then this can be dropped. It causes no noticable damage to the x86_64 build. uml: The i386 and x86_64 cmpxchg patches require an asm-um/cmpxchg.h for the UML build. Signed-off-by: Jeff Dike <jdike@linux.intel.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Jeff Dike <jdike@addtoit.com> 2007-05-08 03:35:02 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-05-08 14:15:20 -0400
commit: a436ed9c5106b41606cbb55ab3b28389fe8ae04f (patch)
tree: b8df0bde6d7eb2808c37da815d8857396ee1eaf1 /include/asm-i386/system.h
parent: 5dc12ddee93d63d7107cbbf70db23476d7b30e43 (diff)
1 files changed, 1 insertions, 288 deletions
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index a4ed087ac0ae..94ed3686a5f3 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -4,7 +4,7 @@
 #include <linux/kernel.h>
 #include <asm/segment.h>
 #include <asm/cpufeature.h>
-#include <linux/bitops.h> /* for LOCK_PREFIX */
+#include <asm/cmpxchg.h>
 #ifdef __KERNEL__
@@ -195,293 +195,6 @@ static inline unsigned long get_limit(unsigned long segment)
 #define nop() __asm__ __volatile__ ("nop")
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-#ifdef CONFIG_X86_CMPXCHG64
-/*
- * The semantics of XCHGCMP8B are a bit strange, this is why
- * there is a loop and the loading of %%eax and %%edx has to
- * be inside. This inlines well in most cases, the cached
- * cost is around ~38 cycles. (in the future we might want
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
- * might have an implicit FPU-save as a cost, so it's not
- * clear which path to go.)
- *
- * cmpxchg8b must be used with the lock prefix here to allow
- * the instruction to be executed atomically, see page 3-102
- * of the instruction set reference 24319102.pdf. We need
- * the reader side to see the coherent 64bit value.
- */
-static inline void __set_64bit (unsigned long long * ptr,
-                unsigned int low, unsigned int high)
-{
-        __asm__ __volatile__ (
-                "\n1:\t"
-                "movl (%0), %%eax\n\t"
-                "movl 4(%0), %%edx\n\t"
-                "lock cmpxchg8b (%0)\n\t"
-                "jnz 1b"
-                : /* no outputs */
-                :       "D"(ptr),
-                        "b"(low),
-                        "c"(high)
-                :       "ax","dx","memory");
-}
-static inline void __set_64bit_constant (unsigned long long *ptr,
-                                                 unsigned long long value)
-{
-        __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
-}
-#define ll_low(x)       *(((unsigned int*)&(x))+0)
-#define ll_high(x)      *(((unsigned int*)&(x))+1)
-static inline void __set_64bit_var (unsigned long long *ptr,
-                         unsigned long long value)
-{
-        __set_64bit(ptr,ll_low(value), ll_high(value));
-}
-#define set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit_constant(ptr, value) : \
- __set_64bit_var(ptr, value) )
-#define _set_64bit(ptr,value) \
-(__builtin_constant_p(value) ? \
- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
- __set_64bit(ptr, ll_low(value), ll_high(value)) )
-#endif
-/*
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
- *        but generally the primitive is invalid, *ptr is output argument. --ANK
- */
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-        switch (size) {
-                case 1:
-                        __asm__ __volatile__("xchgb %b0,%1"
-                                :"=q" (x)
-                                :"m" (*__xg(ptr)), "0" (x)
-                                :"memory");
-                        break;
-                case 2:
-                        __asm__ __volatile__("xchgw %w0,%1"
-                                :"=r" (x)
-                                :"m" (*__xg(ptr)), "0" (x)
-                                :"memory");
-                        break;
-                case 4:
-                        __asm__ __volatile__("xchgl %0,%1"
-                                :"=r" (x)
-                                :"m" (*__xg(ptr)), "0" (x)
-                                :"memory");
-                        break;
-        }
-        return x;
-}
-/*
- * Atomic compare and exchange.  Compare OLD with MEM, if identical,
- * store NEW in MEM.  Return the initial value in MEM.  Success is
- * indicated by comparing RETURN with OLD.
- */
-#ifdef CONFIG_X86_CMPXCHG
-#define __HAVE_ARCH_CMPXCHG 1
-#define cmpxchg(ptr,o,n)\
-        ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
-                                        (unsigned long)(n),sizeof(*(ptr))))
-#define sync_cmpxchg(ptr,o,n)\
-        ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
-                                        (unsigned long)(n),sizeof(*(ptr))))
-#define cmpxchg_local(ptr,o,n)\
-        ((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
-                                        (unsigned long)(n),sizeof(*(ptr))))
-#endif
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-                                      unsigned long new, int size)
-{
-        unsigned long prev;
-        switch (size) {
-        case 1:
-                __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
-                                     : "=a"(prev)
-                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 2:
-                __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 4:
-                __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        }
-        return old;
-}
-/*
- * Always use locked operations when touching memory shared with a
- * hypervisor, since the system may be SMP even if the guest kernel
- * isn't.
- */
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
-                                            unsigned long old,
-                                            unsigned long new, int size)
-{
-        unsigned long prev;
-        switch (size) {
-        case 1:
-                __asm__ __volatile__("lock; cmpxchgb %b1,%2"
-                                     : "=a"(prev)
-                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 2:
-                __asm__ __volatile__("lock; cmpxchgw %w1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 4:
-                __asm__ __volatile__("lock; cmpxchgl %1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        }
-        return old;
-}
-static inline unsigned long __cmpxchg_local(volatile void *ptr,
-                        unsigned long old, unsigned long new, int size)
-{
-        unsigned long prev;
-        switch (size) {
-        case 1:
-                __asm__ __volatile__("cmpxchgb %b1,%2"
-                                     : "=a"(prev)
-                                     : "q"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 2:
-                __asm__ __volatile__("cmpxchgw %w1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        case 4:
-                __asm__ __volatile__("cmpxchgl %1,%2"
-                                     : "=a"(prev)
-                                     : "r"(new), "m"(*__xg(ptr)), "0"(old)
-                                     : "memory");
-                return prev;
-        }
-        return old;
-}
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
-                                      unsigned long new, int size)
-{
-        switch (size) {
-        case 1:
-                return cmpxchg_386_u8(ptr, old, new);
-        case 2:
-                return cmpxchg_386_u16(ptr, old, new);
-        case 4:
-                return cmpxchg_386_u32(ptr, old, new);
-        }
-        return old;
-}
-#define cmpxchg(ptr,o,n)                                                \
-({                                                                      \
-        __typeof__(*(ptr)) __ret;                                       \
-        if (likely(boot_cpu_data.x86 > 3))                              \
-                __ret = __cmpxchg((ptr), (unsigned long)(o),            \
-                                        (unsigned long)(n), sizeof(*(ptr))); \
-        else                                                            \
-                __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
-                                        (unsigned long)(n), sizeof(*(ptr))); \
-        __ret;                                                          \
-})
-#define cmpxchg_local(ptr,o,n)                                          \
-({                                                                      \
-        __typeof__(*(ptr)) __ret;                                       \
-        if (likely(boot_cpu_data.x86 > 3))                              \
-                __ret = __cmpxchg_local((ptr), (unsigned long)(o),      \
-                                        (unsigned long)(n), sizeof(*(ptr))); \
-        else                                                            \
-                __ret = cmpxchg_386((ptr), (unsigned long)(o),          \
-                                        (unsigned long)(n), sizeof(*(ptr))); \
-        __ret;                                                          \
-})
-#endif
-#ifdef CONFIG_X86_CMPXCHG64
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
-                                      unsigned long long new)
-{
-        unsigned long long prev;
-        __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
-                             : "=A"(prev)
-                             : "b"((unsigned long)new),
-                               "c"((unsigned long)(new >> 32)),
-                               "m"(*__xg(ptr)),
-                               "0"(old)
-                             : "memory");
-        return prev;
-}
-static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
-                        unsigned long long old, unsigned long long new)
-{
-        unsigned long long prev;
-        __asm__ __volatile__("cmpxchg8b %3"
-                             : "=A"(prev)
-                             : "b"((unsigned long)new),
-                               "c"((unsigned long)(new >> 32)),
-                               "m"(*__xg(ptr)),
-                               "0"(old)
-                             : "memory");
-        return prev;
-}
-#define cmpxchg64(ptr,o,n)\
-        ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
-                                        (unsigned long long)(n)))
-#define cmpxchg64_local(ptr,o,n)\
-        ((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
-                                        (unsigned long long)(n)))
-#endif
-    
 /*
 * Force strict CPU ordering.
 * And yes, this is required on UP too when we're talking
author	Jeff Dike <jdike@addtoit.com>	2007-05-08 03:35:02 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-05-08 14:15:20 -0400
commit	a436ed9c5106b41606cbb55ab3b28389fe8ae04f (patch)
tree	b8df0bde6d7eb2808c37da815d8857396ee1eaf1 /include/asm-i386/system.h
parent	5dc12ddee93d63d7107cbbf70db23476d7b30e43 (diff)

diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h index a4ed087ac0ae..94ed3686a5f3 100644 --- a/include/asm-i386/system.h +++ b/include/asm-i386/system.h
@@ -4,7 +4,7 @@
4	#include <linux/kernel.h>	4	#include <linux/kernel.h>
5	#include <asm/segment.h>	5	#include <asm/segment.h>
6	#include <asm/cpufeature.h>	6	#include <asm/cpufeature.h>
7	#include <linux/bitops.h> /* for LOCK_PREFIX */	7	#include <asm/cmpxchg.h>
8		8
9	#ifdef __KERNEL__	9	#ifdef __KERNEL__
10		10
@@ -195,293 +195,6 @@ static inline unsigned long get_limit(unsigned long segment)
195		195
196	#define nop() __asm__ __volatile__ ("nop")	196	#define nop() __asm__ __volatile__ ("nop")
197		197
198	#define xchg(ptr,v) ((__typeof__((ptr)))__xchg((unsigned long)(v),(ptr),sizeof((ptr))))
199
200	struct __xchg_dummy { unsigned long a[100]; };
201	#define __xg(x) ((struct __xchg_dummy *)(x))
202
203
204	#ifdef CONFIG_X86_CMPXCHG64
205
206	/*
207	* The semantics of XCHGCMP8B are a bit strange, this is why
208	* there is a loop and the loading of %%eax and %%edx has to
209	* be inside. This inlines well in most cases, the cached
210	* cost is around ~38 cycles. (in the future we might want
211	* to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
212	* might have an implicit FPU-save as a cost, so it's not
213	* clear which path to go.)
214	*
215	* cmpxchg8b must be used with the lock prefix here to allow
216	* the instruction to be executed atomically, see page 3-102
217	* of the instruction set reference 24319102.pdf. We need
218	* the reader side to see the coherent 64bit value.
219	*/
220	static inline void __set_64bit (unsigned long long * ptr,
221	unsigned int low, unsigned int high)
222	{
223	__asm__ __volatile__ (
224	"\n1:\t"
225	"movl (%0), %%eax\n\t"
226	"movl 4(%0), %%edx\n\t"
227	"lock cmpxchg8b (%0)\n\t"
228	"jnz 1b"
229	: /* no outputs */
230	: "D"(ptr),
231	"b"(low),
232	"c"(high)
233	: "ax","dx","memory");
234	}
235
236	static inline void __set_64bit_constant (unsigned long long *ptr,
237	unsigned long long value)
238	{
239	__set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
240	}
241	#define ll_low(x) (((unsigned int)&(x))+0)
242	#define ll_high(x) (((unsigned int)&(x))+1)
243
244	static inline void __set_64bit_var (unsigned long long *ptr,
245	unsigned long long value)
246	{
247	__set_64bit(ptr,ll_low(value), ll_high(value));
248	}
249
250	#define set_64bit(ptr,value) \
251	(__builtin_constant_p(value) ? \
252	__set_64bit_constant(ptr, value) : \
253	__set_64bit_var(ptr, value) )
254
255	#define _set_64bit(ptr,value) \
256	(__builtin_constant_p(value) ? \
257	__set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
258	__set_64bit(ptr, ll_low(value), ll_high(value)) )
259
260	#endif
261
262	/*
263	* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
264	* Note 2: xchg has side effect, so that attribute volatile is necessary,
265	* but generally the primitive is invalid, *ptr is output argument. --ANK
266	*/
267	static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
268	{
269	switch (size) {
270	case 1:
271	__asm__ __volatile__("xchgb %b0,%1"
272	:"=q" (x)
273	:"m" (*__xg(ptr)), "0" (x)
274	:"memory");
275	break;
276	case 2:
277	__asm__ __volatile__("xchgw %w0,%1"
278	:"=r" (x)
279	:"m" (*__xg(ptr)), "0" (x)
280	:"memory");
281	break;
282	case 4:
283	__asm__ __volatile__("xchgl %0,%1"
284	:"=r" (x)
285	:"m" (*__xg(ptr)), "0" (x)
286	:"memory");
287	break;
288	}
289	return x;
290	}
291
292	/*
293	* Atomic compare and exchange. Compare OLD with MEM, if identical,
294	* store NEW in MEM. Return the initial value in MEM. Success is
295	* indicated by comparing RETURN with OLD.
296	*/
297
298	#ifdef CONFIG_X86_CMPXCHG
299	#define __HAVE_ARCH_CMPXCHG 1
300	#define cmpxchg(ptr,o,n)\
301	((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
302	(unsigned long)(n),sizeof(*(ptr))))
303	#define sync_cmpxchg(ptr,o,n)\
304	((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
305	(unsigned long)(n),sizeof(*(ptr))))
306	#define cmpxchg_local(ptr,o,n)\
307	((__typeof__(*(ptr)))__cmpxchg_local((ptr),(unsigned long)(o),\
308	(unsigned long)(n),sizeof(*(ptr))))
309	#endif
310
311	static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
312	unsigned long new, int size)
313	{
314	unsigned long prev;
315	switch (size) {
316	case 1:
317	__asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
318	: "=a"(prev)
319	: "q"(new), "m"(*__xg(ptr)), "0"(old)
320	: "memory");
321	return prev;
322	case 2:
323	__asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
324	: "=a"(prev)
325	: "r"(new), "m"(*__xg(ptr)), "0"(old)
326	: "memory");
327	return prev;
328	case 4:
329	__asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
330	: "=a"(prev)
331	: "r"(new), "m"(*__xg(ptr)), "0"(old)
332	: "memory");
333	return prev;
334	}
335	return old;
336	}
337
338	/*
339	* Always use locked operations when touching memory shared with a
340	* hypervisor, since the system may be SMP even if the guest kernel
341	* isn't.
342	*/
343	static inline unsigned long __sync_cmpxchg(volatile void *ptr,
344	unsigned long old,
345	unsigned long new, int size)
346	{
347	unsigned long prev;
348	switch (size) {
349	case 1:
350	__asm__ __volatile__("lock; cmpxchgb %b1,%2"
351	: "=a"(prev)
352	: "q"(new), "m"(*__xg(ptr)), "0"(old)
353	: "memory");
354	return prev;
355	case 2:
356	__asm__ __volatile__("lock; cmpxchgw %w1,%2"
357	: "=a"(prev)
358	: "r"(new), "m"(*__xg(ptr)), "0"(old)
359	: "memory");
360	return prev;
361	case 4:
362	__asm__ __volatile__("lock; cmpxchgl %1,%2"
363	: "=a"(prev)
364	: "r"(new), "m"(*__xg(ptr)), "0"(old)
365	: "memory");
366	return prev;
367	}
368	return old;
369	}
370
371	static inline unsigned long __cmpxchg_local(volatile void *ptr,
372	unsigned long old, unsigned long new, int size)
373	{
374	unsigned long prev;
375	switch (size) {
376	case 1:
377	__asm__ __volatile__("cmpxchgb %b1,%2"
378	: "=a"(prev)
379	: "q"(new), "m"(*__xg(ptr)), "0"(old)
380	: "memory");
381	return prev;
382	case 2:
383	__asm__ __volatile__("cmpxchgw %w1,%2"
384	: "=a"(prev)
385	: "r"(new), "m"(*__xg(ptr)), "0"(old)
386	: "memory");
387	return prev;
388	case 4:
389	__asm__ __volatile__("cmpxchgl %1,%2"
390	: "=a"(prev)
391	: "r"(new), "m"(*__xg(ptr)), "0"(old)
392	: "memory");
393	return prev;
394	}
395	return old;
396	}
397
398	#ifndef CONFIG_X86_CMPXCHG
399	/*
400	* Building a kernel capable running on 80386. It may be necessary to
401	* simulate the cmpxchg on the 80386 CPU. For that purpose we define
402	* a function for each of the sizes we support.
403	*/
404
405	extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
406	extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
407	extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
408
409	static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
410	unsigned long new, int size)
411	{
412	switch (size) {
413	case 1:
414	return cmpxchg_386_u8(ptr, old, new);
415	case 2:
416	return cmpxchg_386_u16(ptr, old, new);
417	case 4:
418	return cmpxchg_386_u32(ptr, old, new);
419	}
420	return old;
421	}
422
423	#define cmpxchg(ptr,o,n) \
424	({ \
425	__typeof__(*(ptr)) __ret; \
426	if (likely(boot_cpu_data.x86 > 3)) \
427	__ret = __cmpxchg((ptr), (unsigned long)(o), \
428	(unsigned long)(n), sizeof(*(ptr))); \
429	else \
430	__ret = cmpxchg_386((ptr), (unsigned long)(o), \
431	(unsigned long)(n), sizeof(*(ptr))); \
432	__ret; \
433	})
434	#define cmpxchg_local(ptr,o,n) \
435	({ \
436	__typeof__(*(ptr)) __ret; \
437	if (likely(boot_cpu_data.x86 > 3)) \
438	__ret = __cmpxchg_local((ptr), (unsigned long)(o), \
439	(unsigned long)(n), sizeof(*(ptr))); \
440	else \
441	__ret = cmpxchg_386((ptr), (unsigned long)(o), \
442	(unsigned long)(n), sizeof(*(ptr))); \
443	__ret; \
444	})
445	#endif
446
447	#ifdef CONFIG_X86_CMPXCHG64
448
449	static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
450	unsigned long long new)
451	{
452	unsigned long long prev;
453	__asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
454	: "=A"(prev)
455	: "b"((unsigned long)new),
456	"c"((unsigned long)(new >> 32)),
457	"m"(*__xg(ptr)),
458	"0"(old)
459	: "memory");
460	return prev;
461	}
462
463	static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
464	unsigned long long old, unsigned long long new)
465	{
466	unsigned long long prev;
467	__asm__ __volatile__("cmpxchg8b %3"
468	: "=A"(prev)
469	: "b"((unsigned long)new),
470	"c"((unsigned long)(new >> 32)),
471	"m"(*__xg(ptr)),
472	"0"(old)
473	: "memory");
474	return prev;
475	}
476
477	#define cmpxchg64(ptr,o,n)\
478	((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
479	(unsigned long long)(n)))
480	#define cmpxchg64_local(ptr,o,n)\
481	((__typeof__(*(ptr)))__cmpxchg64_local((ptr),(unsigned long long)(o),\
482	(unsigned long long)(n)))
483	#endif
484
485	/*	198	/*
486	* Force strict CPU ordering.	199	* Force strict CPU ordering.
487	* And yes, this is required on UP too when we're talking	200	* And yes, this is required on UP too when we're talking