aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Beulich <JBeulich@novell.com>2011-07-19 08:00:45 -0400
committerIngo Molnar <mingo@elte.hu>2011-07-21 03:03:36 -0400
commita750036f35cda160ef77408ec92c3dc41f8feebb (patch)
tree1198013e1289dfb9b5a299388ee09515642c4030
parenta738669464a1e0d8e7b20f631120192f9cf7cfbd (diff)
x86: Fix write lock scalability 64-bit issue
With the write lock path simply subtracting RW_LOCK_BIAS there is, on large systems, the theoretical possibility of overflowing the 32-bit value that was used so far (namely if 128 or more CPUs manage to do the subtraction, but don't get to do the inverse addition in the failure path quickly enough). A first measure is to modify RW_LOCK_BIAS itself - with the new value chosen, it is good for up to 2048 CPUs each allowed to nest over 2048 times on the read path without causing an issue. Quite possibly it would even be sufficient to adjust the bias a little further, assuming that allowing for significantly less nesting would suffice. However, as the original value chosen allowed for even more nesting levels, to support more than 2048 CPUs (possible currently only for 64-bit kernels) the lock itself gets widened to 64 bits. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4E258E0D020000780004E3F0@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/include/asm/asm.h2
-rw-r--r--arch/x86/include/asm/rwlock.h43
-rw-r--r--arch/x86/include/asm/spinlock.h37
-rw-r--r--arch/x86/include/asm/spinlock_types.h6
-rw-r--r--arch/x86/lib/rwlock.S12
-rw-r--r--arch/x86/lib/thunk_64.S1
6 files changed, 73 insertions, 28 deletions
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5890beb021c4..9412d6558c88 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,9 +3,11 @@
3 3
4#ifdef __ASSEMBLY__ 4#ifdef __ASSEMBLY__
5# define __ASM_FORM(x) x 5# define __ASM_FORM(x) x
6# define __ASM_FORM_COMMA(x) x,
6# define __ASM_EX_SEC .section __ex_table, "a" 7# define __ASM_EX_SEC .section __ex_table, "a"
7#else 8#else
8# define __ASM_FORM(x) " " #x " " 9# define __ASM_FORM(x) " " #x " "
10# define __ASM_FORM_COMMA(x) " " #x ","
9# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" 11# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
10#endif 12#endif
11 13
diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h
index 6a8c0d645108..a5370a03d90c 100644
--- a/arch/x86/include/asm/rwlock.h
+++ b/arch/x86/include/asm/rwlock.h
@@ -1,7 +1,48 @@
1#ifndef _ASM_X86_RWLOCK_H 1#ifndef _ASM_X86_RWLOCK_H
2#define _ASM_X86_RWLOCK_H 2#define _ASM_X86_RWLOCK_H
3 3
4#define RW_LOCK_BIAS 0x01000000 4#include <asm/asm.h>
5
6#if CONFIG_NR_CPUS <= 2048
7
8#ifndef __ASSEMBLY__
9typedef union {
10 s32 lock;
11 s32 write;
12} arch_rwlock_t;
13#endif
14
15#define RW_LOCK_BIAS 0x00100000
16#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l)
17#define READ_LOCK_ATOMIC(n) atomic_##n
18#define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n)
19#define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n)
20#define WRITE_LOCK_CMP RW_LOCK_BIAS
21
22#else /* CONFIG_NR_CPUS > 2048 */
23
24#include <linux/const.h>
25
26#ifndef __ASSEMBLY__
27typedef union {
28 s64 lock;
29 struct {
30 u32 read;
31 s32 write;
32 };
33} arch_rwlock_t;
34#endif
35
36#define RW_LOCK_BIAS (_AC(1,L) << 32)
37#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q)
38#define READ_LOCK_ATOMIC(n) atomic64_##n
39#define WRITE_LOCK_ADD(n) __ASM_FORM(incl)
40#define WRITE_LOCK_SUB(n) __ASM_FORM(decl)
41#define WRITE_LOCK_CMP 1
42
43#endif /* CONFIG_NR_CPUS */
44
45#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
5 46
6/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ 47/* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */
7 48
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 3089f70c0c52..e9e51f710e6c 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -2,7 +2,6 @@
2#define _ASM_X86_SPINLOCK_H 2#define _ASM_X86_SPINLOCK_H
3 3
4#include <asm/atomic.h> 4#include <asm/atomic.h>
5#include <asm/rwlock.h>
6#include <asm/page.h> 5#include <asm/page.h>
7#include <asm/processor.h> 6#include <asm/processor.h>
8#include <linux/compiler.h> 7#include <linux/compiler.h>
@@ -234,7 +233,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
234 */ 233 */
235static inline int arch_read_can_lock(arch_rwlock_t *lock) 234static inline int arch_read_can_lock(arch_rwlock_t *lock)
236{ 235{
237 return (int)(lock)->lock > 0; 236 return lock->lock > 0;
238} 237}
239 238
240/** 239/**
@@ -243,12 +242,12 @@ static inline int arch_read_can_lock(arch_rwlock_t *lock)
243 */ 242 */
244static inline int arch_write_can_lock(arch_rwlock_t *lock) 243static inline int arch_write_can_lock(arch_rwlock_t *lock)
245{ 244{
246 return (lock)->lock == RW_LOCK_BIAS; 245 return lock->write == WRITE_LOCK_CMP;
247} 246}
248 247
249static inline void arch_read_lock(arch_rwlock_t *rw) 248static inline void arch_read_lock(arch_rwlock_t *rw)
250{ 249{
251 asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" 250 asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t"
252 "jns 1f\n" 251 "jns 1f\n"
253 "call __read_lock_failed\n\t" 252 "call __read_lock_failed\n\t"
254 "1:\n" 253 "1:\n"
@@ -257,47 +256,55 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
257 256
258static inline void arch_write_lock(arch_rwlock_t *rw) 257static inline void arch_write_lock(arch_rwlock_t *rw)
259{ 258{
260 asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" 259 asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t"
261 "jz 1f\n" 260 "jz 1f\n"
262 "call __write_lock_failed\n\t" 261 "call __write_lock_failed\n\t"
263 "1:\n" 262 "1:\n"
264 ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); 263 ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS)
264 : "memory");
265} 265}
266 266
267static inline int arch_read_trylock(arch_rwlock_t *lock) 267static inline int arch_read_trylock(arch_rwlock_t *lock)
268{ 268{
269 atomic_t *count = (atomic_t *)lock; 269 READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock;
270 270
271 if (atomic_dec_return(count) >= 0) 271 if (READ_LOCK_ATOMIC(dec_return)(count) >= 0)
272 return 1; 272 return 1;
273 atomic_inc(count); 273 READ_LOCK_ATOMIC(inc)(count);
274 return 0; 274 return 0;
275} 275}
276 276
277static inline int arch_write_trylock(arch_rwlock_t *lock) 277static inline int arch_write_trylock(arch_rwlock_t *lock)
278{ 278{
279 atomic_t *count = (atomic_t *)lock; 279 atomic_t *count = (atomic_t *)&lock->write;
280 280
281 if (atomic_sub_and_test(RW_LOCK_BIAS, count)) 281 if (atomic_sub_and_test(WRITE_LOCK_CMP, count))
282 return 1; 282 return 1;
283 atomic_add(RW_LOCK_BIAS, count); 283 atomic_add(WRITE_LOCK_CMP, count);
284 return 0; 284 return 0;
285} 285}
286 286
287static inline void arch_read_unlock(arch_rwlock_t *rw) 287static inline void arch_read_unlock(arch_rwlock_t *rw)
288{ 288{
289 asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); 289 asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0"
290 :"+m" (rw->lock) : : "memory");
290} 291}
291 292
292static inline void arch_write_unlock(arch_rwlock_t *rw) 293static inline void arch_write_unlock(arch_rwlock_t *rw)
293{ 294{
294 asm volatile(LOCK_PREFIX "addl %1, %0" 295 asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
295 : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); 296 : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
296} 297}
297 298
298#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 299#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
299#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 300#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
300 301
302#undef READ_LOCK_SIZE
303#undef READ_LOCK_ATOMIC
304#undef WRITE_LOCK_ADD
305#undef WRITE_LOCK_SUB
306#undef WRITE_LOCK_CMP
307
301#define arch_spin_relax(lock) cpu_relax() 308#define arch_spin_relax(lock) cpu_relax()
302#define arch_read_relax(lock) cpu_relax() 309#define arch_read_relax(lock) cpu_relax()
303#define arch_write_relax(lock) cpu_relax() 310#define arch_write_relax(lock) cpu_relax()
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index dcb48b2edc11..7c7a486fcb68 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -11,10 +11,6 @@ typedef struct arch_spinlock {
11 11
12#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } 12#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
13 13
14typedef struct { 14#include <asm/rwlock.h>
15 unsigned int lock;
16} arch_rwlock_t;
17
18#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
19 15
20#endif /* _ASM_X86_SPINLOCK_TYPES_H */ 16#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S
index fca17829caa8..1cad22139c88 100644
--- a/arch/x86/lib/rwlock.S
+++ b/arch/x86/lib/rwlock.S
@@ -15,12 +15,12 @@ ENTRY(__write_lock_failed)
15 CFI_STARTPROC 15 CFI_STARTPROC
16 FRAME 16 FRAME
170: LOCK_PREFIX 170: LOCK_PREFIX
18 addl $RW_LOCK_BIAS, (%__lock_ptr) 18 WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr)
191: rep; nop 191: rep; nop
20 cmpl $RW_LOCK_BIAS, (%__lock_ptr) 20 cmpl $WRITE_LOCK_CMP, (%__lock_ptr)
21 jne 1b 21 jne 1b
22 LOCK_PREFIX 22 LOCK_PREFIX
23 subl $RW_LOCK_BIAS, (%__lock_ptr) 23 WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr)
24 jnz 0b 24 jnz 0b
25 ENDFRAME 25 ENDFRAME
26 ret 26 ret
@@ -31,12 +31,12 @@ ENTRY(__read_lock_failed)
31 CFI_STARTPROC 31 CFI_STARTPROC
32 FRAME 32 FRAME
330: LOCK_PREFIX 330: LOCK_PREFIX
34 incl (%__lock_ptr) 34 READ_LOCK_SIZE(inc) (%__lock_ptr)
351: rep; nop 351: rep; nop
36 cmpl $1, (%__lock_ptr) 36 READ_LOCK_SIZE(cmp) $1, (%__lock_ptr)
37 js 1b 37 js 1b
38 LOCK_PREFIX 38 LOCK_PREFIX
39 decl (%__lock_ptr) 39 READ_LOCK_SIZE(dec) (%__lock_ptr)
40 js 0b 40 js 0b
41 ENDFRAME 41 ENDFRAME
42 ret 42 ret
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index d5b088b3ab81..a63efd6bb6a5 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -8,7 +8,6 @@
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/dwarf2.h> 9#include <asm/dwarf2.h>
10#include <asm/calling.h> 10#include <asm/calling.h>
11#include <asm/rwlock.h>
12 11
13 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 12 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
14 .macro THUNK name, func, put_ret_addr_in_rdi=0 13 .macro THUNK name, func, put_ret_addr_in_rdi=0