aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/locking/qspinlock.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r--kernel/locking/qspinlock.c143
1 files changed, 97 insertions, 46 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index bfaeb05123ff..8a8c3c208c5e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -74,12 +74,24 @@
74 */ 74 */
75 75
76#include "mcs_spinlock.h" 76#include "mcs_spinlock.h"
77#define MAX_NODES 4
77 78
79/*
80 * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
81 * size and four of them will fit nicely in one 64-byte cacheline. For
82 * pvqspinlock, however, we need more space for extra data. To accommodate
83 * that, we insert two more long words to pad it up to 32 bytes. IOW, only
84 * two of them can fit in a cacheline in this case. That is OK as it is rare
85 * to have more than 2 levels of slowpath nesting in actual use. We don't
86 * want to penalize pvqspinlocks to optimize for a rare case in native
87 * qspinlocks.
88 */
89struct qnode {
90 struct mcs_spinlock mcs;
78#ifdef CONFIG_PARAVIRT_SPINLOCKS 91#ifdef CONFIG_PARAVIRT_SPINLOCKS
79#define MAX_NODES 8 92 long reserved[2];
80#else
81#define MAX_NODES 4
82#endif 93#endif
94};
83 95
84/* 96/*
85 * The pending bit spinning loop count. 97 * The pending bit spinning loop count.
@@ -101,7 +113,7 @@
101 * 113 *
102 * PV doubles the storage and uses the second cacheline for PV state. 114 * PV doubles the storage and uses the second cacheline for PV state.
103 */ 115 */
104static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); 116static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
105 117
106/* 118/*
107 * We must be able to distinguish between no-tail and the tail at 0:0, 119 * We must be able to distinguish between no-tail and the tail at 0:0,
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
126 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; 138 int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
127 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; 139 int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
128 140
129 return per_cpu_ptr(&mcs_nodes[idx], cpu); 141 return per_cpu_ptr(&qnodes[idx].mcs, cpu);
142}
143
144static inline __pure
145struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
146{
147 return &((struct qnode *)base + idx)->mcs;
130} 148}
131 149
132#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) 150#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
@@ -232,6 +250,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
232#endif /* _Q_PENDING_BITS == 8 */ 250#endif /* _Q_PENDING_BITS == 8 */
233 251
234/** 252/**
253 * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending
254 * @lock : Pointer to queued spinlock structure
255 * Return: The previous lock value
256 *
257 * *,*,* -> *,1,*
258 */
259#ifndef queued_fetch_set_pending_acquire
260static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
261{
262 return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val);
263}
264#endif
265
266/**
235 * set_locked - Set the lock bit and own the lock 267 * set_locked - Set the lock bit and own the lock
236 * @lock: Pointer to queued spinlock structure 268 * @lock: Pointer to queued spinlock structure
237 * 269 *
@@ -326,43 +358,48 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
326 /* 358 /*
327 * trylock || pending 359 * trylock || pending
328 * 360 *
329 * 0,0,0 -> 0,0,1 ; trylock 361 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
330 * 0,0,1 -> 0,1,1 ; pending
331 */ 362 */
332 val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); 363 val = queued_fetch_set_pending_acquire(lock);
333 if (!(val & ~_Q_LOCKED_MASK)) {
334 /*
335 * We're pending, wait for the owner to go away.
336 *
337 * *,1,1 -> *,1,0
338 *
339 * this wait loop must be a load-acquire such that we match the
340 * store-release that clears the locked bit and create lock
341 * sequentiality; this is because not all
342 * clear_pending_set_locked() implementations imply full
343 * barriers.
344 */
345 if (val & _Q_LOCKED_MASK) {
346 atomic_cond_read_acquire(&lock->val,
347 !(VAL & _Q_LOCKED_MASK));
348 }
349 364
350 /* 365 /*
351 * take ownership and clear the pending bit. 366 * If we observe contention, there is a concurrent locker.
352 * 367 *
353 * *,1,0 -> *,0,1 368 * Undo and queue; our setting of PENDING might have made the
354 */ 369 * n,0,0 -> 0,0,0 transition fail and it will now be waiting
355 clear_pending_set_locked(lock); 370 * on @next to become !NULL.
356 qstat_inc(qstat_lock_pending, true); 371 */
357 return; 372 if (unlikely(val & ~_Q_LOCKED_MASK)) {
373
374 /* Undo PENDING if we set it. */
375 if (!(val & _Q_PENDING_MASK))
376 clear_pending(lock);
377
378 goto queue;
358 } 379 }
359 380
360 /* 381 /*
361 * If pending was clear but there are waiters in the queue, then 382 * We're pending, wait for the owner to go away.
362 * we need to undo our setting of pending before we queue ourselves. 383 *
384 * 0,1,1 -> 0,1,0
385 *
386 * this wait loop must be a load-acquire such that we match the
387 * store-release that clears the locked bit and create lock
388 * sequentiality; this is because not all
389 * clear_pending_set_locked() implementations imply full
390 * barriers.
391 */
392 if (val & _Q_LOCKED_MASK)
393 atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK));
394
395 /*
396 * take ownership and clear the pending bit.
397 *
398 * 0,1,0 -> 0,0,1
363 */ 399 */
364 if (!(val & _Q_PENDING_MASK)) 400 clear_pending_set_locked(lock);
365 clear_pending(lock); 401 qstat_inc(qstat_lock_pending, true);
402 return;
366 403
367 /* 404 /*
368 * End of pending bit optimistic spinning and beginning of MCS 405 * End of pending bit optimistic spinning and beginning of MCS
@@ -371,11 +408,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
371queue: 408queue:
372 qstat_inc(qstat_lock_slowpath, true); 409 qstat_inc(qstat_lock_slowpath, true);
373pv_queue: 410pv_queue:
374 node = this_cpu_ptr(&mcs_nodes[0]); 411 node = this_cpu_ptr(&qnodes[0].mcs);
375 idx = node->count++; 412 idx = node->count++;
376 tail = encode_tail(smp_processor_id(), idx); 413 tail = encode_tail(smp_processor_id(), idx);
377 414
378 node += idx; 415 node = grab_mcs_node(node, idx);
416
417 /*
418 * Keep counts of non-zero index values:
419 */
420 qstat_inc(qstat_lock_idx1 + idx - 1, idx);
379 421
380 /* 422 /*
381 * Ensure that we increment the head node->count before initialising 423 * Ensure that we increment the head node->count before initialising
@@ -476,16 +518,25 @@ locked:
476 */ 518 */
477 519
478 /* 520 /*
479 * In the PV case we might already have _Q_LOCKED_VAL set. 521 * In the PV case we might already have _Q_LOCKED_VAL set, because
522 * of lock stealing; therefore we must also allow:
523 *
524 * n,0,1 -> 0,0,1
480 * 525 *
481 * The atomic_cond_read_acquire() call above has provided the 526 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
482 * necessary acquire semantics required for locking. 527 * above wait condition, therefore any concurrent setting of
528 * PENDING will make the uncontended transition fail.
483 */ 529 */
484 if (((val & _Q_TAIL_MASK) == tail) && 530 if ((val & _Q_TAIL_MASK) == tail) {
485 atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) 531 if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
486 goto release; /* No contention */ 532 goto release; /* No contention */
533 }
487 534
488 /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ 535 /*
536 * Either somebody is queued behind us or _Q_PENDING_VAL got set
537 * which will then detect the remaining tail and queue behind us
538 * ensuring we'll see a @next.
539 */
489 set_locked(lock); 540 set_locked(lock);
490 541
491 /* 542 /*
@@ -501,7 +552,7 @@ release:
501 /* 552 /*
502 * release the node 553 * release the node
503 */ 554 */
504 __this_cpu_dec(mcs_nodes[0].count); 555 __this_cpu_dec(qnodes[0].mcs.count);
505} 556}
506EXPORT_SYMBOL(queued_spin_lock_slowpath); 557EXPORT_SYMBOL(queued_spin_lock_slowpath);
507 558