aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-09-28 02:57:10 -0400
committerIngo Molnar <mingo@elte.hu>2011-09-28 02:57:10 -0400
commit695d16f7870847c304a8cb9555572b07a541d4ee (patch)
tree5f6aaeec1bfacf5ecf6ea1bc4724b7b6ee354204 /arch/x86
parent815d405ceff0d6964683f033e18b9b23a88fba87 (diff)
parent4a7f340c6a75ec5fca23d9c80a59f3f28cc4a61e (diff)
Merge branch 'upstream/ticketlock-cleanup' of git://github.com/jsgf/linux-xen into x86/spinlocks
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/atomic.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h6
-rw-r--r--arch/x86/include/asm/cmpxchg.h205
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h114
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h131
-rw-r--r--arch/x86/include/asm/rwsem.h8
-rw-r--r--arch/x86/include/asm/spinlock.h114
-rw-r--r--arch/x86/include/asm/spinlock_types.h22
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h6
9 files changed, 257 insertions, 357 deletions
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 10572e309ab2..58cb6d4085f7 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -172,18 +172,14 @@ static inline int atomic_add_negative(int i, atomic_t *v)
172 */ 172 */
173static inline int atomic_add_return(int i, atomic_t *v) 173static inline int atomic_add_return(int i, atomic_t *v)
174{ 174{
175 int __i;
176#ifdef CONFIG_M386 175#ifdef CONFIG_M386
176 int __i;
177 unsigned long flags; 177 unsigned long flags;
178 if (unlikely(boot_cpu_data.x86 <= 3)) 178 if (unlikely(boot_cpu_data.x86 <= 3))
179 goto no_xadd; 179 goto no_xadd;
180#endif 180#endif
181 /* Modern 486+ processor */ 181 /* Modern 486+ processor */
182 __i = i; 182 return i + xadd(&v->counter, i);
183 asm volatile(LOCK_PREFIX "xaddl %0, %1"
184 : "+r" (i), "+m" (v->counter)
185 : : "memory");
186 return i + __i;
187 183
188#ifdef CONFIG_M386 184#ifdef CONFIG_M386
189no_xadd: /* Legacy 386 processor */ 185no_xadd: /* Legacy 386 processor */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 017594d403f6..0e1cbfc8ee06 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -170,11 +170,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v)
170 */ 170 */
171static inline long atomic64_add_return(long i, atomic64_t *v) 171static inline long atomic64_add_return(long i, atomic64_t *v)
172{ 172{
173 long __i = i; 173 return i + xadd(&v->counter, i);
174 asm volatile(LOCK_PREFIX "xaddq %0, %1;"
175 : "+r" (i), "+m" (v->counter)
176 : : "memory");
177 return i + __i;
178} 174}
179 175
180static inline long atomic64_sub_return(long i, atomic64_t *v) 176static inline long atomic64_sub_return(long i, atomic64_t *v)
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index a460fa088d4c..5d3acdf5a7a6 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -1,5 +1,210 @@
1#ifndef ASM_X86_CMPXCHG_H
2#define ASM_X86_CMPXCHG_H
3
4#include <linux/compiler.h>
5#include <asm/alternative.h> /* Provides LOCK_PREFIX */
6
7/*
8 * Non-existant functions to indicate usage errors at link time
9 * (or compile-time if the compiler implements __compiletime_error().
10 */
11extern void __xchg_wrong_size(void)
12 __compiletime_error("Bad argument size for xchg");
13extern void __cmpxchg_wrong_size(void)
14 __compiletime_error("Bad argument size for cmpxchg");
15extern void __xadd_wrong_size(void)
16 __compiletime_error("Bad argument size for xadd");
17
18/*
19 * Constants for operation sizes. On 32-bit, the 64-bit size it set to
20 * -1 because sizeof will never return -1, thereby making those switch
21 * case statements guaranteeed dead code which the compiler will
22 * eliminate, and allowing the "missing symbol in the default case" to
23 * indicate a usage error.
24 */
25#define __X86_CASE_B 1
26#define __X86_CASE_W 2
27#define __X86_CASE_L 4
28#ifdef CONFIG_64BIT
29#define __X86_CASE_Q 8
30#else
31#define __X86_CASE_Q -1 /* sizeof will never return -1 */
32#endif
33
34/*
35 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
36 * Since this is generally used to protect other memory information, we
37 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
38 * information around.
39 */
40#define __xchg(x, ptr, size) \
41({ \
42 __typeof(*(ptr)) __x = (x); \
43 switch (size) { \
44 case __X86_CASE_B: \
45 { \
46 volatile u8 *__ptr = (volatile u8 *)(ptr); \
47 asm volatile("xchgb %0,%1" \
48 : "=q" (__x), "+m" (*__ptr) \
49 : "0" (__x) \
50 : "memory"); \
51 break; \
52 } \
53 case __X86_CASE_W: \
54 { \
55 volatile u16 *__ptr = (volatile u16 *)(ptr); \
56 asm volatile("xchgw %0,%1" \
57 : "=r" (__x), "+m" (*__ptr) \
58 : "0" (__x) \
59 : "memory"); \
60 break; \
61 } \
62 case __X86_CASE_L: \
63 { \
64 volatile u32 *__ptr = (volatile u32 *)(ptr); \
65 asm volatile("xchgl %0,%1" \
66 : "=r" (__x), "+m" (*__ptr) \
67 : "0" (__x) \
68 : "memory"); \
69 break; \
70 } \
71 case __X86_CASE_Q: \
72 { \
73 volatile u64 *__ptr = (volatile u64 *)(ptr); \
74 asm volatile("xchgq %0,%1" \
75 : "=r" (__x), "+m" (*__ptr) \
76 : "0" (__x) \
77 : "memory"); \
78 break; \
79 } \
80 default: \
81 __xchg_wrong_size(); \
82 } \
83 __x; \
84})
85
86#define xchg(ptr, v) \
87 __xchg((v), (ptr), sizeof(*ptr))
88
89/*
90 * Atomic compare and exchange. Compare OLD with MEM, if identical,
91 * store NEW in MEM. Return the initial value in MEM. Success is
92 * indicated by comparing RETURN with OLD.
93 */
94#define __raw_cmpxchg(ptr, old, new, size, lock) \
95({ \
96 __typeof__(*(ptr)) __ret; \
97 __typeof__(*(ptr)) __old = (old); \
98 __typeof__(*(ptr)) __new = (new); \
99 switch (size) { \
100 case __X86_CASE_B: \
101 { \
102 volatile u8 *__ptr = (volatile u8 *)(ptr); \
103 asm volatile(lock "cmpxchgb %2,%1" \
104 : "=a" (__ret), "+m" (*__ptr) \
105 : "q" (__new), "0" (__old) \
106 : "memory"); \
107 break; \
108 } \
109 case __X86_CASE_W: \
110 { \
111 volatile u16 *__ptr = (volatile u16 *)(ptr); \
112 asm volatile(lock "cmpxchgw %2,%1" \
113 : "=a" (__ret), "+m" (*__ptr) \
114 : "r" (__new), "0" (__old) \
115 : "memory"); \
116 break; \
117 } \
118 case __X86_CASE_L: \
119 { \
120 volatile u32 *__ptr = (volatile u32 *)(ptr); \
121 asm volatile(lock "cmpxchgl %2,%1" \
122 : "=a" (__ret), "+m" (*__ptr) \
123 : "r" (__new), "0" (__old) \
124 : "memory"); \
125 break; \
126 } \
127 case __X86_CASE_Q: \
128 { \
129 volatile u64 *__ptr = (volatile u64 *)(ptr); \
130 asm volatile(lock "cmpxchgq %2,%1" \
131 : "=a" (__ret), "+m" (*__ptr) \
132 : "r" (__new), "0" (__old) \
133 : "memory"); \
134 break; \
135 } \
136 default: \
137 __cmpxchg_wrong_size(); \
138 } \
139 __ret; \
140})
141
142#define __cmpxchg(ptr, old, new, size) \
143 __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
144
145#define __sync_cmpxchg(ptr, old, new, size) \
146 __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
147
148#define __cmpxchg_local(ptr, old, new, size) \
149 __raw_cmpxchg((ptr), (old), (new), (size), "")
150
1#ifdef CONFIG_X86_32 151#ifdef CONFIG_X86_32
2# include "cmpxchg_32.h" 152# include "cmpxchg_32.h"
3#else 153#else
4# include "cmpxchg_64.h" 154# include "cmpxchg_64.h"
5#endif 155#endif
156
157#ifdef __HAVE_ARCH_CMPXCHG
158#define cmpxchg(ptr, old, new) \
159 __cmpxchg((ptr), (old), (new), sizeof(*ptr))
160
161#define sync_cmpxchg(ptr, old, new) \
162 __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
163
164#define cmpxchg_local(ptr, old, new) \
165 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
166#endif
167
168#define __xadd(ptr, inc, lock) \
169 ({ \
170 __typeof__ (*(ptr)) __ret = (inc); \
171 switch (sizeof(*(ptr))) { \
172 case __X86_CASE_B: \
173 asm volatile (lock "xaddb %b0, %1\n" \
174 : "+r" (__ret), "+m" (*(ptr)) \
175 : : "memory", "cc"); \
176 break; \
177 case __X86_CASE_W: \
178 asm volatile (lock "xaddw %w0, %1\n" \
179 : "+r" (__ret), "+m" (*(ptr)) \
180 : : "memory", "cc"); \
181 break; \
182 case __X86_CASE_L: \
183 asm volatile (lock "xaddl %0, %1\n" \
184 : "+r" (__ret), "+m" (*(ptr)) \
185 : : "memory", "cc"); \
186 break; \
187 case __X86_CASE_Q: \
188 asm volatile (lock "xaddq %q0, %1\n" \
189 : "+r" (__ret), "+m" (*(ptr)) \
190 : : "memory", "cc"); \
191 break; \
192 default: \
193 __xadd_wrong_size(); \
194 } \
195 __ret; \
196 })
197
198/*
199 * xadd() adds "inc" to "*ptr" and atomically returns the previous
200 * value of "*ptr".
201 *
202 * xadd() is locked when multiple CPUs are online
203 * xadd_sync() is always locked
204 * xadd_local() is never locked
205 */
206#define xadd(ptr, inc) __xadd((ptr), (inc), LOCK_PREFIX)
207#define xadd_sync(ptr, inc) __xadd((ptr), (inc), "lock; ")
208#define xadd_local(ptr, inc) __xadd((ptr), (inc), "")
209
210#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 3deb7250624c..fbebb07dd80b 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -1,61 +1,11 @@
1#ifndef _ASM_X86_CMPXCHG_32_H 1#ifndef _ASM_X86_CMPXCHG_32_H
2#define _ASM_X86_CMPXCHG_32_H 2#define _ASM_X86_CMPXCHG_32_H
3 3
4#include <linux/bitops.h> /* for LOCK_PREFIX */
5
6/* 4/*
7 * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you 5 * Note: if you use set64_bit(), __cmpxchg64(), or their variants, you
8 * you need to test for the feature in boot_cpu_data. 6 * you need to test for the feature in boot_cpu_data.
9 */ 7 */
10 8
11extern void __xchg_wrong_size(void);
12
13/*
14 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
15 * Since this is generally used to protect other memory information, we
16 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
17 * information around.
18 */
19#define __xchg(x, ptr, size) \
20({ \
21 __typeof(*(ptr)) __x = (x); \
22 switch (size) { \
23 case 1: \
24 { \
25 volatile u8 *__ptr = (volatile u8 *)(ptr); \
26 asm volatile("xchgb %0,%1" \
27 : "=q" (__x), "+m" (*__ptr) \
28 : "0" (__x) \
29 : "memory"); \
30 break; \
31 } \
32 case 2: \
33 { \
34 volatile u16 *__ptr = (volatile u16 *)(ptr); \
35 asm volatile("xchgw %0,%1" \
36 : "=r" (__x), "+m" (*__ptr) \
37 : "0" (__x) \
38 : "memory"); \
39 break; \
40 } \
41 case 4: \
42 { \
43 volatile u32 *__ptr = (volatile u32 *)(ptr); \
44 asm volatile("xchgl %0,%1" \
45 : "=r" (__x), "+m" (*__ptr) \
46 : "0" (__x) \
47 : "memory"); \
48 break; \
49 } \
50 default: \
51 __xchg_wrong_size(); \
52 } \
53 __x; \
54})
55
56#define xchg(ptr, v) \
57 __xchg((v), (ptr), sizeof(*ptr))
58
59/* 9/*
60 * CMPXCHG8B only writes to the target if we had the previous 10 * CMPXCHG8B only writes to the target if we had the previous
61 * value in registers, otherwise it acts as a read and gives us the 11 * value in registers, otherwise it acts as a read and gives us the
@@ -84,72 +34,8 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
84 : "memory"); 34 : "memory");
85} 35}
86 36
87extern void __cmpxchg_wrong_size(void);
88
89/*
90 * Atomic compare and exchange. Compare OLD with MEM, if identical,
91 * store NEW in MEM. Return the initial value in MEM. Success is
92 * indicated by comparing RETURN with OLD.
93 */
94#define __raw_cmpxchg(ptr, old, new, size, lock) \
95({ \
96 __typeof__(*(ptr)) __ret; \
97 __typeof__(*(ptr)) __old = (old); \
98 __typeof__(*(ptr)) __new = (new); \
99 switch (size) { \
100 case 1: \
101 { \
102 volatile u8 *__ptr = (volatile u8 *)(ptr); \
103 asm volatile(lock "cmpxchgb %2,%1" \
104 : "=a" (__ret), "+m" (*__ptr) \
105 : "q" (__new), "0" (__old) \
106 : "memory"); \
107 break; \
108 } \
109 case 2: \
110 { \
111 volatile u16 *__ptr = (volatile u16 *)(ptr); \
112 asm volatile(lock "cmpxchgw %2,%1" \
113 : "=a" (__ret), "+m" (*__ptr) \
114 : "r" (__new), "0" (__old) \
115 : "memory"); \
116 break; \
117 } \
118 case 4: \
119 { \
120 volatile u32 *__ptr = (volatile u32 *)(ptr); \
121 asm volatile(lock "cmpxchgl %2,%1" \
122 : "=a" (__ret), "+m" (*__ptr) \
123 : "r" (__new), "0" (__old) \
124 : "memory"); \
125 break; \
126 } \
127 default: \
128 __cmpxchg_wrong_size(); \
129 } \
130 __ret; \
131})
132
133#define __cmpxchg(ptr, old, new, size) \
134 __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
135
136#define __sync_cmpxchg(ptr, old, new, size) \
137 __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
138
139#define __cmpxchg_local(ptr, old, new, size) \
140 __raw_cmpxchg((ptr), (old), (new), (size), "")
141
142#ifdef CONFIG_X86_CMPXCHG 37#ifdef CONFIG_X86_CMPXCHG
143#define __HAVE_ARCH_CMPXCHG 1 38#define __HAVE_ARCH_CMPXCHG 1
144
145#define cmpxchg(ptr, old, new) \
146 __cmpxchg((ptr), (old), (new), sizeof(*ptr))
147
148#define sync_cmpxchg(ptr, old, new) \
149 __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
150
151#define cmpxchg_local(ptr, old, new) \
152 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
153#endif 39#endif
154 40
155#ifdef CONFIG_X86_CMPXCHG64 41#ifdef CONFIG_X86_CMPXCHG64
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 7cf5c0a24434..285da02c38fa 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -1,144 +1,13 @@
1#ifndef _ASM_X86_CMPXCHG_64_H 1#ifndef _ASM_X86_CMPXCHG_64_H
2#define _ASM_X86_CMPXCHG_64_H 2#define _ASM_X86_CMPXCHG_64_H
3 3
4#include <asm/alternative.h> /* Provides LOCK_PREFIX */
5
6static inline void set_64bit(volatile u64 *ptr, u64 val) 4static inline void set_64bit(volatile u64 *ptr, u64 val)
7{ 5{
8 *ptr = val; 6 *ptr = val;
9} 7}
10 8
11extern void __xchg_wrong_size(void);
12extern void __cmpxchg_wrong_size(void);
13
14/*
15 * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
16 * Since this is generally used to protect other memory information, we
17 * use "asm volatile" and "memory" clobbers to prevent gcc from moving
18 * information around.
19 */
20#define __xchg(x, ptr, size) \
21({ \
22 __typeof(*(ptr)) __x = (x); \
23 switch (size) { \
24 case 1: \
25 { \
26 volatile u8 *__ptr = (volatile u8 *)(ptr); \
27 asm volatile("xchgb %0,%1" \
28 : "=q" (__x), "+m" (*__ptr) \
29 : "0" (__x) \
30 : "memory"); \
31 break; \
32 } \
33 case 2: \
34 { \
35 volatile u16 *__ptr = (volatile u16 *)(ptr); \
36 asm volatile("xchgw %0,%1" \
37 : "=r" (__x), "+m" (*__ptr) \
38 : "0" (__x) \
39 : "memory"); \
40 break; \
41 } \
42 case 4: \
43 { \
44 volatile u32 *__ptr = (volatile u32 *)(ptr); \
45 asm volatile("xchgl %0,%1" \
46 : "=r" (__x), "+m" (*__ptr) \
47 : "0" (__x) \
48 : "memory"); \
49 break; \
50 } \
51 case 8: \
52 { \
53 volatile u64 *__ptr = (volatile u64 *)(ptr); \
54 asm volatile("xchgq %0,%1" \
55 : "=r" (__x), "+m" (*__ptr) \
56 : "0" (__x) \
57 : "memory"); \
58 break; \
59 } \
60 default: \
61 __xchg_wrong_size(); \
62 } \
63 __x; \
64})
65
66#define xchg(ptr, v) \
67 __xchg((v), (ptr), sizeof(*ptr))
68
69#define __HAVE_ARCH_CMPXCHG 1 9#define __HAVE_ARCH_CMPXCHG 1
70 10
71/*
72 * Atomic compare and exchange. Compare OLD with MEM, if identical,
73 * store NEW in MEM. Return the initial value in MEM. Success is
74 * indicated by comparing RETURN with OLD.
75 */
76#define __raw_cmpxchg(ptr, old, new, size, lock) \
77({ \
78 __typeof__(*(ptr)) __ret; \
79 __typeof__(*(ptr)) __old = (old); \
80 __typeof__(*(ptr)) __new = (new); \
81 switch (size) { \
82 case 1: \
83 { \
84 volatile u8 *__ptr = (volatile u8 *)(ptr); \
85 asm volatile(lock "cmpxchgb %2,%1" \
86 : "=a" (__ret), "+m" (*__ptr) \
87 : "q" (__new), "0" (__old) \
88 : "memory"); \
89 break; \
90 } \
91 case 2: \
92 { \
93 volatile u16 *__ptr = (volatile u16 *)(ptr); \
94 asm volatile(lock "cmpxchgw %2,%1" \
95 : "=a" (__ret), "+m" (*__ptr) \
96 : "r" (__new), "0" (__old) \
97 : "memory"); \
98 break; \
99 } \
100 case 4: \
101 { \
102 volatile u32 *__ptr = (volatile u32 *)(ptr); \
103 asm volatile(lock "cmpxchgl %2,%1" \
104 : "=a" (__ret), "+m" (*__ptr) \
105 : "r" (__new), "0" (__old) \
106 : "memory"); \
107 break; \
108 } \
109 case 8: \
110 { \
111 volatile u64 *__ptr = (volatile u64 *)(ptr); \
112 asm volatile(lock "cmpxchgq %2,%1" \
113 : "=a" (__ret), "+m" (*__ptr) \
114 : "r" (__new), "0" (__old) \
115 : "memory"); \
116 break; \
117 } \
118 default: \
119 __cmpxchg_wrong_size(); \
120 } \
121 __ret; \
122})
123
124#define __cmpxchg(ptr, old, new, size) \
125 __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
126
127#define __sync_cmpxchg(ptr, old, new, size) \
128 __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
129
130#define __cmpxchg_local(ptr, old, new, size) \
131 __raw_cmpxchg((ptr), (old), (new), (size), "")
132
133#define cmpxchg(ptr, old, new) \
134 __cmpxchg((ptr), (old), (new), sizeof(*ptr))
135
136#define sync_cmpxchg(ptr, old, new) \
137 __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr))
138
139#define cmpxchg_local(ptr, old, new) \
140 __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
141
142#define cmpxchg64(ptr, o, n) \ 11#define cmpxchg64(ptr, o, n) \
143({ \ 12({ \
144 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ 13 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index df4cd32b4cc6..2dbe4a721ce5 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -204,13 +204,7 @@ static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
204 */ 204 */
205static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) 205static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
206{ 206{
207 long tmp = delta; 207 return delta + xadd(&sem->count, delta);
208
209 asm volatile(LOCK_PREFIX "xadd %0,%1"
210 : "+r" (tmp), "+m" (sem->count)
211 : : "memory");
212
213 return tmp + delta;
214} 208}
215 209
216#endif /* __KERNEL__ */ 210#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index ee67edf86fdd..972c260919a3 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -49,109 +49,49 @@
49 * issues and should be optimal for the uncontended case. Note the tail must be 49 * issues and should be optimal for the uncontended case. Note the tail must be
50 * in the high part, because a wide xadd increment of the low part would carry 50 * in the high part, because a wide xadd increment of the low part would carry
51 * up and contaminate the high part. 51 * up and contaminate the high part.
52 *
53 * With fewer than 2^8 possible CPUs, we can use x86's partial registers to
54 * save some instructions and make the code more elegant. There really isn't
55 * much between them in performance though, especially as locks are out of line.
56 */ 52 */
57#if (NR_CPUS < 256)
58#define TICKET_SHIFT 8
59
60static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) 53static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
61{ 54{
62 short inc = 0x0100; 55 register struct __raw_tickets inc = { .tail = 1 };
63 56
64 asm volatile ( 57 inc = xadd(&lock->tickets, inc);
65 LOCK_PREFIX "xaddw %w0, %1\n" 58
66 "1:\t" 59 for (;;) {
67 "cmpb %h0, %b0\n\t" 60 if (inc.head == inc.tail)
68 "je 2f\n\t" 61 break;
69 "rep ; nop\n\t" 62 cpu_relax();
70 "movb %1, %b0\n\t" 63 inc.head = ACCESS_ONCE(lock->tickets.head);
71 /* don't need lfence here, because loads are in-order */ 64 }
72 "jmp 1b\n" 65 barrier(); /* make sure nothing creeps before the lock is taken */
73 "2:"
74 : "+Q" (inc), "+m" (lock->slock)
75 :
76 : "memory", "cc");
77} 66}
78 67
79static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) 68static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
80{ 69{
81 int tmp, new; 70 arch_spinlock_t old, new;
82 71
83 asm volatile("movzwl %2, %0\n\t" 72 old.tickets = ACCESS_ONCE(lock->tickets);
84 "cmpb %h0,%b0\n\t" 73 if (old.tickets.head != old.tickets.tail)
85 "leal 0x100(%" REG_PTR_MODE "0), %1\n\t" 74 return 0;
86 "jne 1f\n\t" 75
87 LOCK_PREFIX "cmpxchgw %w1,%2\n\t" 76 new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
88 "1:"
89 "sete %b1\n\t"
90 "movzbl %b1,%0\n\t"
91 : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
92 :
93 : "memory", "cc");
94 77
95 return tmp; 78 /* cmpxchg is a full barrier, so nothing can move before it */
79 return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
96} 80}
97 81
82#if (NR_CPUS < 256)
98static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 83static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
99{ 84{
100 asm volatile(UNLOCK_LOCK_PREFIX "incb %0" 85 asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
101 : "+m" (lock->slock) 86 : "+m" (lock->head_tail)
102 : 87 :
103 : "memory", "cc"); 88 : "memory", "cc");
104} 89}
105#else 90#else
106#define TICKET_SHIFT 16
107
108static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
109{
110 int inc = 0x00010000;
111 int tmp;
112
113 asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
114 "movzwl %w0, %2\n\t"
115 "shrl $16, %0\n\t"
116 "1:\t"
117 "cmpl %0, %2\n\t"
118 "je 2f\n\t"
119 "rep ; nop\n\t"
120 "movzwl %1, %2\n\t"
121 /* don't need lfence here, because loads are in-order */
122 "jmp 1b\n"
123 "2:"
124 : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
125 :
126 : "memory", "cc");
127}
128
129static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
130{
131 int tmp;
132 int new;
133
134 asm volatile("movl %2,%0\n\t"
135 "movl %0,%1\n\t"
136 "roll $16, %0\n\t"
137 "cmpl %0,%1\n\t"
138 "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
139 "jne 1f\n\t"
140 LOCK_PREFIX "cmpxchgl %1,%2\n\t"
141 "1:"
142 "sete %b1\n\t"
143 "movzbl %b1,%0\n\t"
144 : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
145 :
146 : "memory", "cc");
147
148 return tmp;
149}
150
151static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) 91static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
152{ 92{
153 asm volatile(UNLOCK_LOCK_PREFIX "incw %0" 93 asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
154 : "+m" (lock->slock) 94 : "+m" (lock->head_tail)
155 : 95 :
156 : "memory", "cc"); 96 : "memory", "cc");
157} 97}
@@ -159,16 +99,16 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
159 99
160static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) 100static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
161{ 101{
162 int tmp = ACCESS_ONCE(lock->slock); 102 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
163 103
164 return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1)); 104 return !!(tmp.tail ^ tmp.head);
165} 105}
166 106
167static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) 107static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
168{ 108{
169 int tmp = ACCESS_ONCE(lock->slock); 109 struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
170 110
171 return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1; 111 return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
172} 112}
173 113
174#ifndef CONFIG_PARAVIRT_SPINLOCKS 114#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 7c7a486fcb68..8ebd5df7451e 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,11 +5,29 @@
5# error "please don't include this file directly" 5# error "please don't include this file directly"
6#endif 6#endif
7 7
8#include <linux/types.h>
9
10#if (CONFIG_NR_CPUS < 256)
11typedef u8 __ticket_t;
12typedef u16 __ticketpair_t;
13#else
14typedef u16 __ticket_t;
15typedef u32 __ticketpair_t;
16#endif
17
18#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
19#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
20
8typedef struct arch_spinlock { 21typedef struct arch_spinlock {
9 unsigned int slock; 22 union {
23 __ticketpair_t head_tail;
24 struct __raw_tickets {
25 __ticket_t head, tail;
26 } tickets;
27 };
10} arch_spinlock_t; 28} arch_spinlock_t;
11 29
12#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } 30#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
13 31
14#include <asm/rwlock.h> 32#include <asm/rwlock.h>
15 33
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 37d369859c8e..c568ccca6e0e 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -656,11 +656,7 @@ static inline int atomic_read_short(const struct atomic_short *v)
656 */ 656 */
657static inline int atom_asr(short i, struct atomic_short *v) 657static inline int atom_asr(short i, struct atomic_short *v)
658{ 658{
659 short __i = i; 659 return i + xadd(&v->counter, i);
660 asm volatile(LOCK_PREFIX "xaddw %0, %1"
661 : "+r" (i), "+m" (v->counter)
662 : : "memory");
663 return i + __i;
664} 660}
665 661
666/* 662/*