diff options
author | Nick Piggin <npiggin@suse.de> | 2008-01-30 07:33:00 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:33:00 -0500 |
commit | 3a556b26a2718e48aa2b6ce06ea4875ddcd0778e (patch) | |
tree | 2c92b0f64e0a22ddeb349ef12ff8573df8f9d70e /include | |
parent | 938f667198179dc0c8424e2cfac9cd9fe405bee3 (diff) |
x86: big ticket locks
This implements ticket lock support for more than 255 CPUs on x86. The
code gets switched according to the configured NR_CPUS.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-x86/spinlock.h | 124 |
1 files changed, 97 insertions, 27 deletions
diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 2076d5d62d83..23804c1890ff 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h | |||
@@ -35,10 +35,35 @@ typedef int _slock_t; | |||
35 | # define LOCK_PTR_REG "D" | 35 | # define LOCK_PTR_REG "D" |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | #if (NR_CPUS > 256) | 38 | #if defined(CONFIG_X86_32) && \ |
39 | #error spinlock supports a maximum of 256 CPUs | 39 | (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) |
40 | /* | ||
41 | * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock | ||
42 | * (PPro errata 66, 92) | ||
43 | */ | ||
44 | # define UNLOCK_LOCK_PREFIX LOCK_PREFIX | ||
45 | #else | ||
46 | # define UNLOCK_LOCK_PREFIX | ||
40 | #endif | 47 | #endif |
41 | 48 | ||
49 | /* | ||
50 | * Ticket locks are conceptually two parts, one indicating the current head of | ||
51 | * the queue, and the other indicating the current tail. The lock is acquired | ||
52 | * by atomically noting the tail and incrementing it by one (thus adding | ||
53 | * ourself to the queue and noting our position), then waiting until the head | ||
54 | * becomes equal to the the initial value of the tail. | ||
55 | * | ||
56 | * We use an xadd covering *both* parts of the lock, to increment the tail and | ||
57 | * also load the position of the head, which takes care of memory ordering | ||
58 | * issues and should be optimal for the uncontended case. Note the tail must be | ||
59 | * in the high part, because a wide xadd increment of the low part would carry | ||
60 | * up and contaminate the high part. | ||
61 | * | ||
62 | * With fewer than 2^8 possible CPUs, we can use x86's partial registers to | ||
63 | * save some instructions and make the code more elegant. There really isn't | ||
64 | * much between them in performance though, especially as locks are out of line. | ||
65 | */ | ||
66 | #if (NR_CPUS < 256) | ||
42 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) | 67 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) |
43 | { | 68 | { |
44 | int tmp = *(volatile signed int *)(&(lock)->slock); | 69 | int tmp = *(volatile signed int *)(&(lock)->slock); |
@@ -57,21 +82,6 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) | |||
57 | { | 82 | { |
58 | short inc = 0x0100; | 83 | short inc = 0x0100; |
59 | 84 | ||
60 | /* | ||
61 | * Ticket locks are conceptually two bytes, one indicating the current | ||
62 | * head of the queue, and the other indicating the current tail. The | ||
63 | * lock is acquired by atomically noting the tail and incrementing it | ||
64 | * by one (thus adding ourself to the queue and noting our position), | ||
65 | * then waiting until the head becomes equal to the the initial value | ||
66 | * of the tail. | ||
67 | * | ||
68 | * This uses a 16-bit xadd to increment the tail and also load the | ||
69 | * position of the head, which takes care of memory ordering issues | ||
70 | * and should be optimal for the uncontended case. Note the tail must | ||
71 | * be in the high byte, otherwise the 16-bit wide increment of the low | ||
72 | * byte would carry up and contaminate the high byte. | ||
73 | */ | ||
74 | |||
75 | __asm__ __volatile__ ( | 85 | __asm__ __volatile__ ( |
76 | LOCK_PREFIX "xaddw %w0, %1\n" | 86 | LOCK_PREFIX "xaddw %w0, %1\n" |
77 | "1:\t" | 87 | "1:\t" |
@@ -111,25 +121,85 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) | |||
111 | return tmp; | 121 | return tmp; |
112 | } | 122 | } |
113 | 123 | ||
114 | #if defined(CONFIG_X86_32) && \ | 124 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) |
115 | (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) | 125 | { |
116 | /* | 126 | __asm__ __volatile__( |
117 | * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock | 127 | UNLOCK_LOCK_PREFIX "incb %0" |
118 | * (PPro errata 66, 92) | 128 | :"+m" (lock->slock) |
119 | */ | 129 | : |
120 | # define UNLOCK_LOCK_PREFIX LOCK_PREFIX | 130 | :"memory", "cc"); |
131 | } | ||
121 | #else | 132 | #else |
122 | # define UNLOCK_LOCK_PREFIX | 133 | static inline int __raw_spin_is_locked(raw_spinlock_t *lock) |
123 | #endif | 134 | { |
135 | int tmp = *(volatile signed int *)(&(lock)->slock); | ||
136 | |||
137 | return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); | ||
138 | } | ||
139 | |||
140 | static inline int __raw_spin_is_contended(raw_spinlock_t *lock) | ||
141 | { | ||
142 | int tmp = *(volatile signed int *)(&(lock)->slock); | ||
143 | |||
144 | return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; | ||
145 | } | ||
146 | |||
147 | static inline void __raw_spin_lock(raw_spinlock_t *lock) | ||
148 | { | ||
149 | int inc = 0x00010000; | ||
150 | int tmp; | ||
151 | |||
152 | __asm__ __volatile__ ( | ||
153 | "lock ; xaddl %0, %1\n" | ||
154 | "movzwl %w0, %2\n\t" | ||
155 | "shrl $16, %0\n\t" | ||
156 | "1:\t" | ||
157 | "cmpl %0, %2\n\t" | ||
158 | "je 2f\n\t" | ||
159 | "rep ; nop\n\t" | ||
160 | "movzwl %1, %2\n\t" | ||
161 | /* don't need lfence here, because loads are in-order */ | ||
162 | "jmp 1b\n" | ||
163 | "2:" | ||
164 | :"+Q" (inc), "+m" (lock->slock), "=r" (tmp) | ||
165 | : | ||
166 | :"memory", "cc"); | ||
167 | } | ||
168 | |||
169 | #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) | ||
170 | |||
171 | static inline int __raw_spin_trylock(raw_spinlock_t *lock) | ||
172 | { | ||
173 | int tmp; | ||
174 | int new; | ||
175 | |||
176 | asm volatile( | ||
177 | "movl %2,%0\n\t" | ||
178 | "movl %0,%1\n\t" | ||
179 | "roll $16, %0\n\t" | ||
180 | "cmpl %0,%1\n\t" | ||
181 | "jne 1f\n\t" | ||
182 | "addl $0x00010000, %1\n\t" | ||
183 | "lock ; cmpxchgl %1,%2\n\t" | ||
184 | "1:" | ||
185 | "sete %b1\n\t" | ||
186 | "movzbl %b1,%0\n\t" | ||
187 | :"=&a" (tmp), "=r" (new), "+m" (lock->slock) | ||
188 | : | ||
189 | : "memory", "cc"); | ||
190 | |||
191 | return tmp; | ||
192 | } | ||
124 | 193 | ||
125 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) | 194 | static inline void __raw_spin_unlock(raw_spinlock_t *lock) |
126 | { | 195 | { |
127 | __asm__ __volatile__( | 196 | __asm__ __volatile__( |
128 | UNLOCK_LOCK_PREFIX "incb %0" | 197 | UNLOCK_LOCK_PREFIX "incw %0" |
129 | :"+m" (lock->slock) | 198 | :"+m" (lock->slock) |
130 | : | 199 | : |
131 | :"memory", "cc"); | 200 | :"memory", "cc"); |
132 | } | 201 | } |
202 | #endif | ||
133 | 203 | ||
134 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) | 204 | static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) |
135 | { | 205 | { |