[IA64] implement ticket locks for Itanium

Back in January 2008 Nick Piggin implemented "ticket" spinlocks for X86 (See commit 314cdbefd1fd0a7acf3780e9628465b77ea6a836). IA64 implementation has a couple of differences because of the available atomic operations ... e.g. we have no fetchadd2 instruction that operates on a 16-bit quantity so we make ticket locks use a 32-bit word for each of the current ticket and now-serving values. Performance on uncontended locks is about 8% worse than the previous implementation, but this seems a good trade for determinism in the contended case. Performance impact on macro-level benchmarks is in the noise. Signed-off-by: Tony Luck <tony.luck@intel.com>
author: Tony Luck <tony.luck@intel.com> 2009-09-25 11:42:16 -0400
committer: Tony Luck <tony.luck@intel.com> 2009-09-25 11:42:16 -0400
commit: 2c86963b093c1a0887dfc6b32c6e5ea3a80f2922 (patch)
tree: d1ed58dd0a644dd670e3724a575112fedd4b4250 /arch/ia64/include/asm/spinlock.h
parent: 53cddfcc0e760d2b364878b6dadbd0c6d087cfae (diff)
1 files changed, 89 insertions, 86 deletions
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 13ab71576bc7..30bb930e1111 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -19,103 +19,106 @@
 #define __raw_spin_lock_init(x)                 ((x)->lock = 0)
-#ifdef ASM_SUPPORTED
 /*
- * Try to get the lock.  If we fail to get the lock, make a non-standard call to
+ * Ticket locks are conceptually two parts, one indicating the current head of
- * ia64_spinlock_contention().  We do not use a normal call because that would force all
+ * the queue, and the other indicating the current tail. The lock is acquired
- * callers of __raw_spin_lock() to be non-leaf routines.  Instead, ia64_spinlock_contention() is
+ * by atomically noting the tail and incrementing it by one (thus adding
- * carefully coded to touch only those registers that __raw_spin_lock() marks "clobbered".
+ * ourself to the queue and noting our position), then waiting until the head
+ * becomes equal to the the initial value of the tail.
+ *
+ *   63                     32  31                      0
+ *  +----------------------------------------------------+
+ *  |  next_ticket_number      |     now_serving         |
+ *  +----------------------------------------------------+
 */
-#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"
+#define TICKET_SHIFT    32
-static inline void
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
-__raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
 {
-        register volatile unsigned int *ptr asm ("r31") = &lock->lock;
+        int     *p = (int *)&lock->lock, turn, now_serving;
-#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+        now_serving = *p;
-# ifdef CONFIG_ITANIUM
+        turn = ia64_fetchadd(1, p+1, acq);
-        /* don't use brl on Itanium... */
-        asm volatile ("{\n\t"
+        if (turn == now_serving)
-                      "  mov ar.ccv = r0\n\t"
+                return;
-                      "  mov r28 = ip\n\t"
-                      "  mov r30 = 1;;\n\t"
+        do {
-                      "}\n\t"
+                cpu_relax();
-                      "cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t"
+        } while (ACCESS_ONCE(*p) != turn);
-                      "movl r29 = ia64_spinlock_contention_pre3_4;;\n\t"
-                      "cmp4.ne p14, p0 = r30, r0\n\t"
-                      "mov b6 = r29;;\n\t"
-                      "mov r27=%2\n\t"
-                      "(p14) br.cond.spnt.many b6"
-                      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-        asm volatile ("{\n\t"
-                      "  mov ar.ccv = r0\n\t"
-                      "  mov r28 = ip\n\t"
-                      "  mov r30 = 1;;\n\t"
-                      "}\n\t"
-                      "cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t"
-                      "cmp4.ne p14, p0 = r30, r0\n\t"
-                      "mov r27=%2\n\t"
-                      "(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;"
-                      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#else
-# ifdef CONFIG_ITANIUM
-        /* don't use brl on Itanium... */
-        /* mis-declare, so we get the entry-point, not it's function descriptor: */
-        asm volatile ("mov r30 = 1\n\t"
-                      "mov r27=%2\n\t"
-                      "mov ar.ccv = r0;;\n\t"
-                      "cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t"
-                      "movl r29 = ia64_spinlock_contention;;\n\t"
-                      "cmp4.ne p14, p0 = r30, r0\n\t"
-                      "mov b6 = r29;;\n\t"
-                      "(p14) br.call.spnt.many b6 = b6"
-                      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# else
-        asm volatile ("mov r30 = 1\n\t"
-                      "mov r27=%2\n\t"
-                      "mov ar.ccv = r0;;\n\t"
-                      "cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t"
-                      "cmp4.ne p14, p0 = r30, r0\n\t"
-                      "(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;"
-                      : "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
-# endif /* CONFIG_MCKINLEY */
-#endif
 }
-#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+{
+        long tmp = ACCESS_ONCE(lock->lock), try;
-/* Unlock by doing an ordered store and releasing the cacheline with nta */
+        if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) {
-static inline void __raw_spin_unlock(raw_spinlock_t *x) {
+                try = tmp + (1L << TICKET_SHIFT);
-        barrier();
-        asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));
+                return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp;
+        }
+        return 0;
 }
-#else /* !ASM_SUPPORTED */
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
-#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
+{
-# define __raw_spin_lock(x)                                                             \
+        int     *p = (int *)&lock->lock;
-do {                                                                                    \
-        __u32 *ia64_spinlock_ptr = (__u32 *) (x);                                       \
+        (void)ia64_fetchadd(1, p, rel);
-        __u64 ia64_spinlock_val;                                                        \
+}
-        ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);                 \
-        if (unlikely(ia64_spinlock_val)) {                                              \
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
-                do {                                                                    \
+{
-                        while (*ia64_spinlock_ptr)                                      \
+        long tmp = ACCESS_ONCE(lock->lock);
-                                ia64_barrier();                                         \
-                        ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \
+        return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1));
-                } while (ia64_spinlock_val);                                            \
+}
-        }                                                                               \
-} while (0)
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
-#define __raw_spin_unlock(x)    do { barrier(); ((raw_spinlock_t *) x)->lock = 0; } while (0)
+{
-#endif /* !ASM_SUPPORTED */
+        long tmp = ACCESS_ONCE(lock->lock);
-#define __raw_spin_is_locked(x)         ((x)->lock != 0)
+        return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1;
-#define __raw_spin_trylock(x)           (cmpxchg_acq(&(x)->lock, 0, 1) == 0)
+}
-#define __raw_spin_unlock_wait(lock) \
-        do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+{
+        return __ticket_spin_is_locked(lock);
+}
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+{
+        return __ticket_spin_is_contended(lock);
+}
+#define __raw_spin_is_contended __raw_spin_is_contended
+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+{
+        __ticket_spin_lock(lock);
+}
+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+        return __ticket_spin_trylock(lock);
+}
+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+        __ticket_spin_unlock(lock);
+}
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+                                                  unsigned long flags)
+{
+        __raw_spin_lock(lock);
+}
+static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+        while (__raw_spin_is_locked(lock))
+                cpu_relax();
+}
 #define __raw_read_can_lock(rw)         (*(volatile int *)(rw) >= 0)
 #define __raw_write_can_lock(rw)        (*(volatile int *)(rw) == 0)
author	Tony Luck <tony.luck@intel.com>	2009-09-25 11:42:16 -0400
committer	Tony Luck <tony.luck@intel.com>	2009-09-25 11:42:16 -0400
commit	2c86963b093c1a0887dfc6b32c6e5ea3a80f2922 (patch)
tree	d1ed58dd0a644dd670e3724a575112fedd4b4250 /arch/ia64/include/asm/spinlock.h
parent	53cddfcc0e760d2b364878b6dadbd0c6d087cfae (diff)

diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index 13ab71576bc7..30bb930e1111 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h
@@ -19,103 +19,106 @@
19		19
20	#define __raw_spin_lock_init(x) ((x)->lock = 0)	20	#define __raw_spin_lock_init(x) ((x)->lock = 0)
21		21
22	#ifdef ASM_SUPPORTED
23	/*	22	/*
24	* Try to get the lock. If we fail to get the lock, make a non-standard call to	23	* Ticket locks are conceptually two parts, one indicating the current head of
25	* ia64_spinlock_contention(). We do not use a normal call because that would force all	24	* the queue, and the other indicating the current tail. The lock is acquired
26	* callers of __raw_spin_lock() to be non-leaf routines. Instead, ia64_spinlock_contention() is	25	* by atomically noting the tail and incrementing it by one (thus adding
27	* carefully coded to touch only those registers that __raw_spin_lock() marks "clobbered".	26	* ourself to the queue and noting our position), then waiting until the head
		27	* becomes equal to the the initial value of the tail.
		28	*
		29	* 63 32 31 0
		30	* +----------------------------------------------------+
		31	* \| next_ticket_number \| now_serving \|
		32	* +----------------------------------------------------+
28	*/	33	*/
29		34
30	#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"	35	#define TICKET_SHIFT 32
31		36
32	static inline void	37	static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
33	__raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
34	{	38	{
35	register volatile unsigned int *ptr asm ("r31") = &lock->lock;	39	int p = (int )&lock->lock, turn, now_serving;
36		40
37	#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)	41	now_serving = *p;
38	# ifdef CONFIG_ITANIUM	42	turn = ia64_fetchadd(1, p+1, acq);
39	/* don't use brl on Itanium... */	43
40	asm volatile ("{\n\t"	44	if (turn == now_serving)
41	" mov ar.ccv = r0\n\t"	45	return;
42	" mov r28 = ip\n\t"	46
43	" mov r30 = 1;;\n\t"	47	do {
44	"}\n\t"	48	cpu_relax();
45	"cmpxchg4.acq r30 = [%1], r30, ar.ccv\n\t"	49	} while (ACCESS_ONCE(*p) != turn);
46	"movl r29 = ia64_spinlock_contention_pre3_4;;\n\t"
47	"cmp4.ne p14, p0 = r30, r0\n\t"
48	"mov b6 = r29;;\n\t"
49	"mov r27=%2\n\t"
50	"(p14) br.cond.spnt.many b6"
51	: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
52	# else
53	asm volatile ("{\n\t"
54	" mov ar.ccv = r0\n\t"
55	" mov r28 = ip\n\t"
56	" mov r30 = 1;;\n\t"
57	"}\n\t"
58	"cmpxchg4.acq r30 = [%1], r30, ar.ccv;;\n\t"
59	"cmp4.ne p14, p0 = r30, r0\n\t"
60	"mov r27=%2\n\t"
61	"(p14) brl.cond.spnt.many ia64_spinlock_contention_pre3_4;;"
62	: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
63	# endif /* CONFIG_MCKINLEY */
64	#else
65	# ifdef CONFIG_ITANIUM
66	/* don't use brl on Itanium... */
67	/* mis-declare, so we get the entry-point, not it's function descriptor: */
68	asm volatile ("mov r30 = 1\n\t"
69	"mov r27=%2\n\t"
70	"mov ar.ccv = r0;;\n\t"
71	"cmpxchg4.acq r30 = [%0], r30, ar.ccv\n\t"
72	"movl r29 = ia64_spinlock_contention;;\n\t"
73	"cmp4.ne p14, p0 = r30, r0\n\t"
74	"mov b6 = r29;;\n\t"
75	"(p14) br.call.spnt.many b6 = b6"
76	: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
77	# else
78	asm volatile ("mov r30 = 1\n\t"
79	"mov r27=%2\n\t"
80	"mov ar.ccv = r0;;\n\t"
81	"cmpxchg4.acq r30 = [%0], r30, ar.ccv;;\n\t"
82	"cmp4.ne p14, p0 = r30, r0\n\t"
83	"(p14) brl.call.spnt.many b6=ia64_spinlock_contention;;"
84	: "=r"(ptr) : "r"(ptr), "r" (flags) : IA64_SPINLOCK_CLOBBERS);
85	# endif /* CONFIG_MCKINLEY */
86	#endif
87	}	50	}
88		51
89	#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0)	52	static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
		53	{
		54	long tmp = ACCESS_ONCE(lock->lock), try;
90		55
91	/* Unlock by doing an ordered store and releasing the cacheline with nta */	56	if (!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1))) {
92	static inline void __raw_spin_unlock(raw_spinlock_t *x) {	57	try = tmp + (1L << TICKET_SHIFT);
93	barrier();	58
94	asm volatile ("st4.rel.nta [%0] = r0\n\t" :: "r"(x));	59	return ia64_cmpxchg(acq, &lock->lock, tmp, try, sizeof (tmp)) == tmp;
		60	}
		61	return 0;
95	}	62	}
96		63
97	#else /* !ASM_SUPPORTED */	64	static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
98	#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)	65	{
99	# define __raw_spin_lock(x) \	66	int p = (int )&lock->lock;
100	do { \	67
101	__u32 ia64_spinlock_ptr = (__u32 ) (x); \	68	(void)ia64_fetchadd(1, p, rel);
102	__u64 ia64_spinlock_val; \	69	}
103	ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \	70
104	if (unlikely(ia64_spinlock_val)) { \	71	static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
105	do { \	72	{
106	while (*ia64_spinlock_ptr) \	73	long tmp = ACCESS_ONCE(lock->lock);
107	ia64_barrier(); \	74
108	ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0); \	75	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1L << TICKET_SHIFT) - 1));
109	} while (ia64_spinlock_val); \	76	}
110	} \	77
111	} while (0)	78	static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
112	#define __raw_spin_unlock(x) do { barrier(); ((raw_spinlock_t *) x)->lock = 0; } while (0)	79	{
113	#endif /* !ASM_SUPPORTED */	80	long tmp = ACCESS_ONCE(lock->lock);
114		81
115	#define __raw_spin_is_locked(x) ((x)->lock != 0)	82	return (((tmp >> TICKET_SHIFT) - tmp) & ((1L << TICKET_SHIFT) - 1)) > 1;
116	#define __raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0)	83	}
117	#define __raw_spin_unlock_wait(lock) \	84
118	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)	85	static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
		86	{
		87	return __ticket_spin_is_locked(lock);
		88	}
		89
		90	static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
		91	{
		92	return __ticket_spin_is_contended(lock);
		93	}
		94	#define __raw_spin_is_contended __raw_spin_is_contended
		95
		96	static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
		97	{
		98	__ticket_spin_lock(lock);
		99	}
		100
		101	static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
		102	{
		103	return __ticket_spin_trylock(lock);
		104	}
		105
		106	static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
		107	{
		108	__ticket_spin_unlock(lock);
		109	}
		110
		111	static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
		112	unsigned long flags)
		113	{
		114	__raw_spin_lock(lock);
		115	}
		116
		117	static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
		118	{
		119	while (__raw_spin_is_locked(lock))
		120	cpu_relax();
		121	}
119		122
120	#define __raw_read_can_lock(rw) ((volatile int )(rw) >= 0)	123	#define __raw_read_can_lock(rw) ((volatile int )(rw) >= 0)
121	#define __raw_write_can_lock(rw) ((volatile int )(rw) == 0)	124	#define __raw_write_can_lock(rw) ((volatile int )(rw) == 0)