diff options
author | Jan Beulich <JBeulich@novell.com> | 2011-07-19 08:00:45 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-07-21 03:03:36 -0400 |
commit | a750036f35cda160ef77408ec92c3dc41f8feebb (patch) | |
tree | 1198013e1289dfb9b5a299388ee09515642c4030 | |
parent | a738669464a1e0d8e7b20f631120192f9cf7cfbd (diff) |
x86: Fix write lock scalability 64-bit issue
With the write lock path simply subtracting RW_LOCK_BIAS there
is, on large systems, the theoretical possibility of overflowing
the 32-bit value that was used so far (namely if 128 or more
CPUs manage to do the subtraction, but don't get to do the
inverse addition in the failure path quickly enough).
A first measure is to modify RW_LOCK_BIAS itself - with the new
value chosen, it is good for up to 2048 CPUs each allowed to
nest over 2048 times on the read path without causing an issue.
Quite possibly it would even be sufficient to adjust the bias a
little further, assuming that allowing for significantly less
nesting would suffice.
However, as the original value chosen allowed for even more
nesting levels, to support more than 2048 CPUs (possible
currently only for 64-bit kernels) the lock itself gets widened
to 64 bits.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4E258E0D020000780004E3F0@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/asm.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/rwlock.h | 43 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 37 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock_types.h | 6 | ||||
-rw-r--r-- | arch/x86/lib/rwlock.S | 12 | ||||
-rw-r--r-- | arch/x86/lib/thunk_64.S | 1 |
6 files changed, 73 insertions, 28 deletions
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 5890beb021c4..9412d6558c88 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h | |||
@@ -3,9 +3,11 @@ | |||
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | 4 | #ifdef __ASSEMBLY__ |
5 | # define __ASM_FORM(x) x | 5 | # define __ASM_FORM(x) x |
6 | # define __ASM_FORM_COMMA(x) x, | ||
6 | # define __ASM_EX_SEC .section __ex_table, "a" | 7 | # define __ASM_EX_SEC .section __ex_table, "a" |
7 | #else | 8 | #else |
8 | # define __ASM_FORM(x) " " #x " " | 9 | # define __ASM_FORM(x) " " #x " " |
10 | # define __ASM_FORM_COMMA(x) " " #x "," | ||
9 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" | 11 | # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" |
10 | #endif | 12 | #endif |
11 | 13 | ||
diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h index 6a8c0d645108..a5370a03d90c 100644 --- a/arch/x86/include/asm/rwlock.h +++ b/arch/x86/include/asm/rwlock.h | |||
@@ -1,7 +1,48 @@ | |||
1 | #ifndef _ASM_X86_RWLOCK_H | 1 | #ifndef _ASM_X86_RWLOCK_H |
2 | #define _ASM_X86_RWLOCK_H | 2 | #define _ASM_X86_RWLOCK_H |
3 | 3 | ||
4 | #define RW_LOCK_BIAS 0x01000000 | 4 | #include <asm/asm.h> |
5 | |||
6 | #if CONFIG_NR_CPUS <= 2048 | ||
7 | |||
8 | #ifndef __ASSEMBLY__ | ||
9 | typedef union { | ||
10 | s32 lock; | ||
11 | s32 write; | ||
12 | } arch_rwlock_t; | ||
13 | #endif | ||
14 | |||
15 | #define RW_LOCK_BIAS 0x00100000 | ||
16 | #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) | ||
17 | #define READ_LOCK_ATOMIC(n) atomic_##n | ||
18 | #define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) | ||
19 | #define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) | ||
20 | #define WRITE_LOCK_CMP RW_LOCK_BIAS | ||
21 | |||
22 | #else /* CONFIG_NR_CPUS > 2048 */ | ||
23 | |||
24 | #include <linux/const.h> | ||
25 | |||
26 | #ifndef __ASSEMBLY__ | ||
27 | typedef union { | ||
28 | s64 lock; | ||
29 | struct { | ||
30 | u32 read; | ||
31 | s32 write; | ||
32 | }; | ||
33 | } arch_rwlock_t; | ||
34 | #endif | ||
35 | |||
36 | #define RW_LOCK_BIAS (_AC(1,L) << 32) | ||
37 | #define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) | ||
38 | #define READ_LOCK_ATOMIC(n) atomic64_##n | ||
39 | #define WRITE_LOCK_ADD(n) __ASM_FORM(incl) | ||
40 | #define WRITE_LOCK_SUB(n) __ASM_FORM(decl) | ||
41 | #define WRITE_LOCK_CMP 1 | ||
42 | |||
43 | #endif /* CONFIG_NR_CPUS */ | ||
44 | |||
45 | #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } | ||
5 | 46 | ||
6 | /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ | 47 | /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ |
7 | 48 | ||
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 3089f70c0c52..e9e51f710e6c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_SPINLOCK_H | 2 | #define _ASM_X86_SPINLOCK_H |
3 | 3 | ||
4 | #include <asm/atomic.h> | 4 | #include <asm/atomic.h> |
5 | #include <asm/rwlock.h> | ||
6 | #include <asm/page.h> | 5 | #include <asm/page.h> |
7 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
8 | #include <linux/compiler.h> | 7 | #include <linux/compiler.h> |
@@ -234,7 +233,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | |||
234 | */ | 233 | */ |
235 | static inline int arch_read_can_lock(arch_rwlock_t *lock) | 234 | static inline int arch_read_can_lock(arch_rwlock_t *lock) |
236 | { | 235 | { |
237 | return (int)(lock)->lock > 0; | 236 | return lock->lock > 0; |
238 | } | 237 | } |
239 | 238 | ||
240 | /** | 239 | /** |
@@ -243,12 +242,12 @@ static inline int arch_read_can_lock(arch_rwlock_t *lock) | |||
243 | */ | 242 | */ |
244 | static inline int arch_write_can_lock(arch_rwlock_t *lock) | 243 | static inline int arch_write_can_lock(arch_rwlock_t *lock) |
245 | { | 244 | { |
246 | return (lock)->lock == RW_LOCK_BIAS; | 245 | return lock->write == WRITE_LOCK_CMP; |
247 | } | 246 | } |
248 | 247 | ||
249 | static inline void arch_read_lock(arch_rwlock_t *rw) | 248 | static inline void arch_read_lock(arch_rwlock_t *rw) |
250 | { | 249 | { |
251 | asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" | 250 | asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" |
252 | "jns 1f\n" | 251 | "jns 1f\n" |
253 | "call __read_lock_failed\n\t" | 252 | "call __read_lock_failed\n\t" |
254 | "1:\n" | 253 | "1:\n" |
@@ -257,47 +256,55 @@ static inline void arch_read_lock(arch_rwlock_t *rw) | |||
257 | 256 | ||
258 | static inline void arch_write_lock(arch_rwlock_t *rw) | 257 | static inline void arch_write_lock(arch_rwlock_t *rw) |
259 | { | 258 | { |
260 | asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" | 259 | asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" |
261 | "jz 1f\n" | 260 | "jz 1f\n" |
262 | "call __write_lock_failed\n\t" | 261 | "call __write_lock_failed\n\t" |
263 | "1:\n" | 262 | "1:\n" |
264 | ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); | 263 | ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) |
264 | : "memory"); | ||
265 | } | 265 | } |
266 | 266 | ||
267 | static inline int arch_read_trylock(arch_rwlock_t *lock) | 267 | static inline int arch_read_trylock(arch_rwlock_t *lock) |
268 | { | 268 | { |
269 | atomic_t *count = (atomic_t *)lock; | 269 | READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; |
270 | 270 | ||
271 | if (atomic_dec_return(count) >= 0) | 271 | if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) |
272 | return 1; | 272 | return 1; |
273 | atomic_inc(count); | 273 | READ_LOCK_ATOMIC(inc)(count); |
274 | return 0; | 274 | return 0; |
275 | } | 275 | } |
276 | 276 | ||
277 | static inline int arch_write_trylock(arch_rwlock_t *lock) | 277 | static inline int arch_write_trylock(arch_rwlock_t *lock) |
278 | { | 278 | { |
279 | atomic_t *count = (atomic_t *)lock; | 279 | atomic_t *count = (atomic_t *)&lock->write; |
280 | 280 | ||
281 | if (atomic_sub_and_test(RW_LOCK_BIAS, count)) | 281 | if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) |
282 | return 1; | 282 | return 1; |
283 | atomic_add(RW_LOCK_BIAS, count); | 283 | atomic_add(WRITE_LOCK_CMP, count); |
284 | return 0; | 284 | return 0; |
285 | } | 285 | } |
286 | 286 | ||
287 | static inline void arch_read_unlock(arch_rwlock_t *rw) | 287 | static inline void arch_read_unlock(arch_rwlock_t *rw) |
288 | { | 288 | { |
289 | asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); | 289 | asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" |
290 | :"+m" (rw->lock) : : "memory"); | ||
290 | } | 291 | } |
291 | 292 | ||
292 | static inline void arch_write_unlock(arch_rwlock_t *rw) | 293 | static inline void arch_write_unlock(arch_rwlock_t *rw) |
293 | { | 294 | { |
294 | asm volatile(LOCK_PREFIX "addl %1, %0" | 295 | asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" |
295 | : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); | 296 | : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); |
296 | } | 297 | } |
297 | 298 | ||
298 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 299 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
299 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 300 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
300 | 301 | ||
302 | #undef READ_LOCK_SIZE | ||
303 | #undef READ_LOCK_ATOMIC | ||
304 | #undef WRITE_LOCK_ADD | ||
305 | #undef WRITE_LOCK_SUB | ||
306 | #undef WRITE_LOCK_CMP | ||
307 | |||
301 | #define arch_spin_relax(lock) cpu_relax() | 308 | #define arch_spin_relax(lock) cpu_relax() |
302 | #define arch_read_relax(lock) cpu_relax() | 309 | #define arch_read_relax(lock) cpu_relax() |
303 | #define arch_write_relax(lock) cpu_relax() | 310 | #define arch_write_relax(lock) cpu_relax() |
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index dcb48b2edc11..7c7a486fcb68 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h | |||
@@ -11,10 +11,6 @@ typedef struct arch_spinlock { | |||
11 | 11 | ||
12 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } | 12 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } |
13 | 13 | ||
14 | typedef struct { | 14 | #include <asm/rwlock.h> |
15 | unsigned int lock; | ||
16 | } arch_rwlock_t; | ||
17 | |||
18 | #define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } | ||
19 | 15 | ||
20 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ | 16 | #endif /* _ASM_X86_SPINLOCK_TYPES_H */ |
diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S index fca17829caa8..1cad22139c88 100644 --- a/arch/x86/lib/rwlock.S +++ b/arch/x86/lib/rwlock.S | |||
@@ -15,12 +15,12 @@ ENTRY(__write_lock_failed) | |||
15 | CFI_STARTPROC | 15 | CFI_STARTPROC |
16 | FRAME | 16 | FRAME |
17 | 0: LOCK_PREFIX | 17 | 0: LOCK_PREFIX |
18 | addl $RW_LOCK_BIAS, (%__lock_ptr) | 18 | WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) |
19 | 1: rep; nop | 19 | 1: rep; nop |
20 | cmpl $RW_LOCK_BIAS, (%__lock_ptr) | 20 | cmpl $WRITE_LOCK_CMP, (%__lock_ptr) |
21 | jne 1b | 21 | jne 1b |
22 | LOCK_PREFIX | 22 | LOCK_PREFIX |
23 | subl $RW_LOCK_BIAS, (%__lock_ptr) | 23 | WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) |
24 | jnz 0b | 24 | jnz 0b |
25 | ENDFRAME | 25 | ENDFRAME |
26 | ret | 26 | ret |
@@ -31,12 +31,12 @@ ENTRY(__read_lock_failed) | |||
31 | CFI_STARTPROC | 31 | CFI_STARTPROC |
32 | FRAME | 32 | FRAME |
33 | 0: LOCK_PREFIX | 33 | 0: LOCK_PREFIX |
34 | incl (%__lock_ptr) | 34 | READ_LOCK_SIZE(inc) (%__lock_ptr) |
35 | 1: rep; nop | 35 | 1: rep; nop |
36 | cmpl $1, (%__lock_ptr) | 36 | READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) |
37 | js 1b | 37 | js 1b |
38 | LOCK_PREFIX | 38 | LOCK_PREFIX |
39 | decl (%__lock_ptr) | 39 | READ_LOCK_SIZE(dec) (%__lock_ptr) |
40 | js 0b | 40 | js 0b |
41 | ENDFRAME | 41 | ENDFRAME |
42 | ret | 42 | ret |
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index d5b088b3ab81..a63efd6bb6a5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | #include <asm/dwarf2.h> | 9 | #include <asm/dwarf2.h> |
10 | #include <asm/calling.h> | 10 | #include <asm/calling.h> |
11 | #include <asm/rwlock.h> | ||
12 | 11 | ||
13 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ | 12 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ |
14 | .macro THUNK name, func, put_ret_addr_in_rdi=0 | 13 | .macro THUNK name, func, put_ret_addr_in_rdi=0 |