diff options
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r-- | kernel/locking/qspinlock.c | 143 |
1 files changed, 97 insertions, 46 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index bfaeb05123ff..8a8c3c208c5e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -74,12 +74,24 @@ | |||
74 | */ | 74 | */ |
75 | 75 | ||
76 | #include "mcs_spinlock.h" | 76 | #include "mcs_spinlock.h" |
77 | #define MAX_NODES 4 | ||
77 | 78 | ||
79 | /* | ||
80 | * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in | ||
81 | * size and four of them will fit nicely in one 64-byte cacheline. For | ||
82 | * pvqspinlock, however, we need more space for extra data. To accommodate | ||
83 | * that, we insert two more long words to pad it up to 32 bytes. IOW, only | ||
84 | * two of them can fit in a cacheline in this case. That is OK as it is rare | ||
85 | * to have more than 2 levels of slowpath nesting in actual use. We don't | ||
86 | * want to penalize pvqspinlocks to optimize for a rare case in native | ||
87 | * qspinlocks. | ||
88 | */ | ||
89 | struct qnode { | ||
90 | struct mcs_spinlock mcs; | ||
78 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 91 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
79 | #define MAX_NODES 8 | 92 | long reserved[2]; |
80 | #else | ||
81 | #define MAX_NODES 4 | ||
82 | #endif | 93 | #endif |
94 | }; | ||
83 | 95 | ||
84 | /* | 96 | /* |
85 | * The pending bit spinning loop count. | 97 | * The pending bit spinning loop count. |
@@ -101,7 +113,7 @@ | |||
101 | * | 113 | * |
102 | * PV doubles the storage and uses the second cacheline for PV state. | 114 | * PV doubles the storage and uses the second cacheline for PV state. |
103 | */ | 115 | */ |
104 | static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); | 116 | static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]); |
105 | 117 | ||
106 | /* | 118 | /* |
107 | * We must be able to distinguish between no-tail and the tail at 0:0, | 119 | * We must be able to distinguish between no-tail and the tail at 0:0, |
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) | |||
126 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; | 138 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; |
127 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; | 139 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; |
128 | 140 | ||
129 | return per_cpu_ptr(&mcs_nodes[idx], cpu); | 141 | return per_cpu_ptr(&qnodes[idx].mcs, cpu); |
142 | } | ||
143 | |||
144 | static inline __pure | ||
145 | struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) | ||
146 | { | ||
147 | return &((struct qnode *)base + idx)->mcs; | ||
130 | } | 148 | } |
131 | 149 | ||
132 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) | 150 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) |
@@ -232,6 +250,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
232 | #endif /* _Q_PENDING_BITS == 8 */ | 250 | #endif /* _Q_PENDING_BITS == 8 */ |
233 | 251 | ||
234 | /** | 252 | /** |
253 | * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending | ||
254 | * @lock : Pointer to queued spinlock structure | ||
255 | * Return: The previous lock value | ||
256 | * | ||
257 | * *,*,* -> *,1,* | ||
258 | */ | ||
259 | #ifndef queued_fetch_set_pending_acquire | ||
260 | static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) | ||
261 | { | ||
262 | return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); | ||
263 | } | ||
264 | #endif | ||
265 | |||
266 | /** | ||
235 | * set_locked - Set the lock bit and own the lock | 267 | * set_locked - Set the lock bit and own the lock |
236 | * @lock: Pointer to queued spinlock structure | 268 | * @lock: Pointer to queued spinlock structure |
237 | * | 269 | * |
@@ -326,43 +358,48 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
326 | /* | 358 | /* |
327 | * trylock || pending | 359 | * trylock || pending |
328 | * | 360 | * |
329 | * 0,0,0 -> 0,0,1 ; trylock | 361 | * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock |
330 | * 0,0,1 -> 0,1,1 ; pending | ||
331 | */ | 362 | */ |
332 | val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); | 363 | val = queued_fetch_set_pending_acquire(lock); |
333 | if (!(val & ~_Q_LOCKED_MASK)) { | ||
334 | /* | ||
335 | * We're pending, wait for the owner to go away. | ||
336 | * | ||
337 | * *,1,1 -> *,1,0 | ||
338 | * | ||
339 | * this wait loop must be a load-acquire such that we match the | ||
340 | * store-release that clears the locked bit and create lock | ||
341 | * sequentiality; this is because not all | ||
342 | * clear_pending_set_locked() implementations imply full | ||
343 | * barriers. | ||
344 | */ | ||
345 | if (val & _Q_LOCKED_MASK) { | ||
346 | atomic_cond_read_acquire(&lock->val, | ||
347 | !(VAL & _Q_LOCKED_MASK)); | ||
348 | } | ||
349 | 364 | ||
350 | /* | 365 | /* |
351 | * take ownership and clear the pending bit. | 366 | * If we observe contention, there is a concurrent locker. |
352 | * | 367 | * |
353 | * *,1,0 -> *,0,1 | 368 | * Undo and queue; our setting of PENDING might have made the |
354 | */ | 369 | * n,0,0 -> 0,0,0 transition fail and it will now be waiting |
355 | clear_pending_set_locked(lock); | 370 | * on @next to become !NULL. |
356 | qstat_inc(qstat_lock_pending, true); | 371 | */ |
357 | return; | 372 | if (unlikely(val & ~_Q_LOCKED_MASK)) { |
373 | |||
374 | /* Undo PENDING if we set it. */ | ||
375 | if (!(val & _Q_PENDING_MASK)) | ||
376 | clear_pending(lock); | ||
377 | |||
378 | goto queue; | ||
358 | } | 379 | } |
359 | 380 | ||
360 | /* | 381 | /* |
361 | * If pending was clear but there are waiters in the queue, then | 382 | * We're pending, wait for the owner to go away. |
362 | * we need to undo our setting of pending before we queue ourselves. | 383 | * |
384 | * 0,1,1 -> 0,1,0 | ||
385 | * | ||
386 | * this wait loop must be a load-acquire such that we match the | ||
387 | * store-release that clears the locked bit and create lock | ||
388 | * sequentiality; this is because not all | ||
389 | * clear_pending_set_locked() implementations imply full | ||
390 | * barriers. | ||
391 | */ | ||
392 | if (val & _Q_LOCKED_MASK) | ||
393 | atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); | ||
394 | |||
395 | /* | ||
396 | * take ownership and clear the pending bit. | ||
397 | * | ||
398 | * 0,1,0 -> 0,0,1 | ||
363 | */ | 399 | */ |
364 | if (!(val & _Q_PENDING_MASK)) | 400 | clear_pending_set_locked(lock); |
365 | clear_pending(lock); | 401 | qstat_inc(qstat_lock_pending, true); |
402 | return; | ||
366 | 403 | ||
367 | /* | 404 | /* |
368 | * End of pending bit optimistic spinning and beginning of MCS | 405 | * End of pending bit optimistic spinning and beginning of MCS |
@@ -371,11 +408,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
371 | queue: | 408 | queue: |
372 | qstat_inc(qstat_lock_slowpath, true); | 409 | qstat_inc(qstat_lock_slowpath, true); |
373 | pv_queue: | 410 | pv_queue: |
374 | node = this_cpu_ptr(&mcs_nodes[0]); | 411 | node = this_cpu_ptr(&qnodes[0].mcs); |
375 | idx = node->count++; | 412 | idx = node->count++; |
376 | tail = encode_tail(smp_processor_id(), idx); | 413 | tail = encode_tail(smp_processor_id(), idx); |
377 | 414 | ||
378 | node += idx; | 415 | node = grab_mcs_node(node, idx); |
416 | |||
417 | /* | ||
418 | * Keep counts of non-zero index values: | ||
419 | */ | ||
420 | qstat_inc(qstat_lock_idx1 + idx - 1, idx); | ||
379 | 421 | ||
380 | /* | 422 | /* |
381 | * Ensure that we increment the head node->count before initialising | 423 | * Ensure that we increment the head node->count before initialising |
@@ -476,16 +518,25 @@ locked: | |||
476 | */ | 518 | */ |
477 | 519 | ||
478 | /* | 520 | /* |
479 | * In the PV case we might already have _Q_LOCKED_VAL set. | 521 | * In the PV case we might already have _Q_LOCKED_VAL set, because |
522 | * of lock stealing; therefore we must also allow: | ||
523 | * | ||
524 | * n,0,1 -> 0,0,1 | ||
480 | * | 525 | * |
481 | * The atomic_cond_read_acquire() call above has provided the | 526 | * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the |
482 | * necessary acquire semantics required for locking. | 527 | * above wait condition, therefore any concurrent setting of |
528 | * PENDING will make the uncontended transition fail. | ||
483 | */ | 529 | */ |
484 | if (((val & _Q_TAIL_MASK) == tail) && | 530 | if ((val & _Q_TAIL_MASK) == tail) { |
485 | atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) | 531 | if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) |
486 | goto release; /* No contention */ | 532 | goto release; /* No contention */ |
533 | } | ||
487 | 534 | ||
488 | /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ | 535 | /* |
536 | * Either somebody is queued behind us or _Q_PENDING_VAL got set | ||
537 | * which will then detect the remaining tail and queue behind us | ||
538 | * ensuring we'll see a @next. | ||
539 | */ | ||
489 | set_locked(lock); | 540 | set_locked(lock); |
490 | 541 | ||
491 | /* | 542 | /* |
@@ -501,7 +552,7 @@ release: | |||
501 | /* | 552 | /* |
502 | * release the node | 553 | * release the node |
503 | */ | 554 | */ |
504 | __this_cpu_dec(mcs_nodes[0].count); | 555 | __this_cpu_dec(qnodes[0].mcs.count); |
505 | } | 556 | } |
506 | EXPORT_SYMBOL(queued_spin_lock_slowpath); | 557 | EXPORT_SYMBOL(queued_spin_lock_slowpath); |
507 | 558 | ||