aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/qspinlock.c
diff options
context:
space:
mode:
authorWaiman Long <Waiman.Long@hp.com>2015-04-24 14:56:35 -0400
committerIngo Molnar <mingo@kernel.org>2015-05-08 06:36:55 -0400
commit2c83e8e9492dc823be1d96d4c5ef75d16d3866a0 (patch)
tree612d1bb3e7410ad4208f3216e8de0592d69755e1 /kernel/locking/qspinlock.c
parent69f9cae90907e09af95fb991ed384670cef8dd32 (diff)
locking/qspinlock: Use a simple write to grab the lock
Currently, atomic_cmpxchg() is used to get the lock. However, this is not really necessary if there is more than one task in the queue and the queue head don't need to reset the tail code. For that case, a simple write to set the lock bit is enough as the queue head will be the only one eligible to get the lock as long as it checks that both the lock and pending bits are not set. The current pending bit waiting code will ensure that the bit will not be set as soon as the tail code in the lock is set. With that change, the are some slight improvement in the performance of the queued spinlock in the 5M loop micro-benchmark run on a 4-socket Westere-EX machine as shown in the tables below. [Standalone/Embedded - same node] # of tasks Before patch After patch %Change ---------- ----------- ---------- ------- 3 2324/2321 2248/2265 -3%/-2% 4 2890/2896 2819/2831 -2%/-2% 5 3611/3595 3522/3512 -2%/-2% 6 4281/4276 4173/4160 -3%/-3% 7 5018/5001 4875/4861 -3%/-3% 8 5759/5750 5563/5568 -3%/-3% [Standalone/Embedded - different nodes] # of tasks Before patch After patch %Change ---------- ----------- ---------- ------- 3 12242/12237 12087/12093 -1%/-1% 4 10688/10696 10507/10521 -2%/-2% It was also found that this change produced a much bigger performance improvement in the newer IvyBridge-EX chip and was essentially to close the performance gap between the ticket spinlock and queued spinlock. The disk workload of the AIM7 benchmark was run on a 4-socket Westmere-EX machine with both ext4 and xfs RAM disks at 3000 users on a 3.14 based kernel. The results of the test runs were: AIM7 XFS Disk Test kernel JPM Real Time Sys Time Usr Time ----- --- --------- -------- -------- ticketlock 5678233 3.17 96.61 5.81 qspinlock 5750799 3.13 94.83 5.97 AIM7 EXT4 Disk Test kernel JPM Real Time Sys Time Usr Time ----- --- --------- -------- -------- ticketlock 1114551 16.15 509.72 7.11 qspinlock 2184466 8.24 232.99 6.01 The ext4 filesystem run had a much higher spinlock contention than the xfs filesystem run. The "ebizzy -m" test was also run with the following results: kernel records/s Real Time Sys Time Usr Time ----- --------- --------- -------- -------- ticketlock 2075 10.00 216.35 3.49 qspinlock 3023 10.00 198.20 4.80 Signed-off-by: Waiman Long <Waiman.Long@hp.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Daniel J Blueman <daniel@numascale.com> Cc: David Vrabel <david.vrabel@citrix.com> Cc: Douglas Hatch <doug.hatch@hp.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paolo Bonzini <paolo.bonzini@gmail.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com> Cc: Rik van Riel <riel@redhat.com> Cc: Scott J Norton <scott.norton@hp.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1429901803-29771-7-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r--kernel/locking/qspinlock.c66
1 files changed, 50 insertions, 16 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index e17efe7b8d4d..033872113ebb 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -105,24 +105,37 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
105 * By using the whole 2nd least significant byte for the pending bit, we 105 * By using the whole 2nd least significant byte for the pending bit, we
106 * can allow better optimization of the lock acquisition for the pending 106 * can allow better optimization of the lock acquisition for the pending
107 * bit holder. 107 * bit holder.
108 *
109 * This internal structure is also used by the set_locked function which
110 * is not restricted to _Q_PENDING_BITS == 8.
108 */ 111 */
109#if _Q_PENDING_BITS == 8
110
111struct __qspinlock { 112struct __qspinlock {
112 union { 113 union {
113 atomic_t val; 114 atomic_t val;
114 struct {
115#ifdef __LITTLE_ENDIAN 115#ifdef __LITTLE_ENDIAN
116 struct {
117 u8 locked;
118 u8 pending;
119 };
120 struct {
116 u16 locked_pending; 121 u16 locked_pending;
117 u16 tail; 122 u16 tail;
123 };
118#else 124#else
125 struct {
119 u16 tail; 126 u16 tail;
120 u16 locked_pending; 127 u16 locked_pending;
121#endif
122 }; 128 };
129 struct {
130 u8 reserved[2];
131 u8 pending;
132 u8 locked;
133 };
134#endif
123 }; 135 };
124}; 136};
125 137
138#if _Q_PENDING_BITS == 8
126/** 139/**
127 * clear_pending_set_locked - take ownership and clear the pending bit. 140 * clear_pending_set_locked - take ownership and clear the pending bit.
128 * @lock: Pointer to queued spinlock structure 141 * @lock: Pointer to queued spinlock structure
@@ -195,6 +208,19 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
195#endif /* _Q_PENDING_BITS == 8 */ 208#endif /* _Q_PENDING_BITS == 8 */
196 209
197/** 210/**
211 * set_locked - Set the lock bit and own the lock
212 * @lock: Pointer to queued spinlock structure
213 *
214 * *,*,0 -> *,0,1
215 */
216static __always_inline void set_locked(struct qspinlock *lock)
217{
218 struct __qspinlock *l = (void *)lock;
219
220 WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
221}
222
223/**
198 * queued_spin_lock_slowpath - acquire the queued spinlock 224 * queued_spin_lock_slowpath - acquire the queued spinlock
199 * @lock: Pointer to queued spinlock structure 225 * @lock: Pointer to queued spinlock structure
200 * @val: Current value of the queued spinlock 32-bit word 226 * @val: Current value of the queued spinlock 32-bit word
@@ -329,8 +355,14 @@ queue:
329 * go away. 355 * go away.
330 * 356 *
331 * *,x,y -> *,0,0 357 * *,x,y -> *,0,0
358 *
359 * this wait loop must use a load-acquire such that we match the
360 * store-release that clears the locked bit and create lock
361 * sequentiality; this is because the set_locked() function below
362 * does not imply a full barrier.
363 *
332 */ 364 */
333 while ((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK) 365 while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
334 cpu_relax(); 366 cpu_relax();
335 367
336 /* 368 /*
@@ -338,15 +370,19 @@ queue:
338 * 370 *
339 * n,0,0 -> 0,0,1 : lock, uncontended 371 * n,0,0 -> 0,0,1 : lock, uncontended
340 * *,0,0 -> *,0,1 : lock, contended 372 * *,0,0 -> *,0,1 : lock, contended
373 *
374 * If the queue head is the only one in the queue (lock value == tail),
375 * clear the tail code and grab the lock. Otherwise, we only need
376 * to grab the lock.
341 */ 377 */
342 for (;;) { 378 for (;;) {
343 new = _Q_LOCKED_VAL; 379 if (val != tail) {
344 if (val != tail) 380 set_locked(lock);
345 new |= val;
346
347 old = atomic_cmpxchg(&lock->val, val, new);
348 if (old == val)
349 break; 381 break;
382 }
383 old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
384 if (old == val)
385 goto release; /* No contention */
350 386
351 val = old; 387 val = old;
352 } 388 }
@@ -354,12 +390,10 @@ queue:
354 /* 390 /*
355 * contended path; wait for next, release. 391 * contended path; wait for next, release.
356 */ 392 */
357 if (new != _Q_LOCKED_VAL) { 393 while (!(next = READ_ONCE(node->next)))
358 while (!(next = READ_ONCE(node->next))) 394 cpu_relax();
359 cpu_relax();
360 395
361 arch_mcs_spin_unlock_contended(&next->locked); 396 arch_mcs_spin_unlock_contended(&next->locked);
362 }
363 397
364release: 398release:
365 /* 399 /*