diff options
Diffstat (limited to 'kernel/locking/qspinlock.c')
-rw-r--r-- | kernel/locking/qspinlock.c | 82 |
1 files changed, 64 insertions, 18 deletions
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 87e9ce6a63c5..393d1874b9e0 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -14,8 +14,9 @@ | |||
14 | * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. | 14 | * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P. |
15 | * (C) Copyright 2013-2014 Red Hat, Inc. | 15 | * (C) Copyright 2013-2014 Red Hat, Inc. |
16 | * (C) Copyright 2015 Intel Corp. | 16 | * (C) Copyright 2015 Intel Corp. |
17 | * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP | ||
17 | * | 18 | * |
18 | * Authors: Waiman Long <waiman.long@hp.com> | 19 | * Authors: Waiman Long <waiman.long@hpe.com> |
19 | * Peter Zijlstra <peterz@infradead.org> | 20 | * Peter Zijlstra <peterz@infradead.org> |
20 | */ | 21 | */ |
21 | 22 | ||
@@ -176,7 +177,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
176 | { | 177 | { |
177 | struct __qspinlock *l = (void *)lock; | 178 | struct __qspinlock *l = (void *)lock; |
178 | 179 | ||
179 | return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; | 180 | /* |
181 | * Use release semantics to make sure that the MCS node is properly | ||
182 | * initialized before changing the tail code. | ||
183 | */ | ||
184 | return (u32)xchg_release(&l->tail, | ||
185 | tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; | ||
180 | } | 186 | } |
181 | 187 | ||
182 | #else /* _Q_PENDING_BITS == 8 */ | 188 | #else /* _Q_PENDING_BITS == 8 */ |
@@ -208,7 +214,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
208 | 214 | ||
209 | for (;;) { | 215 | for (;;) { |
210 | new = (val & _Q_LOCKED_PENDING_MASK) | tail; | 216 | new = (val & _Q_LOCKED_PENDING_MASK) | tail; |
211 | old = atomic_cmpxchg(&lock->val, val, new); | 217 | /* |
218 | * Use release semantics to make sure that the MCS node is | ||
219 | * properly initialized before changing the tail code. | ||
220 | */ | ||
221 | old = atomic_cmpxchg_release(&lock->val, val, new); | ||
212 | if (old == val) | 222 | if (old == val) |
213 | break; | 223 | break; |
214 | 224 | ||
@@ -238,18 +248,20 @@ static __always_inline void set_locked(struct qspinlock *lock) | |||
238 | */ | 248 | */ |
239 | 249 | ||
240 | static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } | 250 | static __always_inline void __pv_init_node(struct mcs_spinlock *node) { } |
241 | static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { } | 251 | static __always_inline void __pv_wait_node(struct mcs_spinlock *node, |
252 | struct mcs_spinlock *prev) { } | ||
242 | static __always_inline void __pv_kick_node(struct qspinlock *lock, | 253 | static __always_inline void __pv_kick_node(struct qspinlock *lock, |
243 | struct mcs_spinlock *node) { } | 254 | struct mcs_spinlock *node) { } |
244 | static __always_inline void __pv_wait_head(struct qspinlock *lock, | 255 | static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, |
245 | struct mcs_spinlock *node) { } | 256 | struct mcs_spinlock *node) |
257 | { return 0; } | ||
246 | 258 | ||
247 | #define pv_enabled() false | 259 | #define pv_enabled() false |
248 | 260 | ||
249 | #define pv_init_node __pv_init_node | 261 | #define pv_init_node __pv_init_node |
250 | #define pv_wait_node __pv_wait_node | 262 | #define pv_wait_node __pv_wait_node |
251 | #define pv_kick_node __pv_kick_node | 263 | #define pv_kick_node __pv_kick_node |
252 | #define pv_wait_head __pv_wait_head | 264 | #define pv_wait_head_or_lock __pv_wait_head_or_lock |
253 | 265 | ||
254 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 266 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
255 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath | 267 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
@@ -319,7 +331,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
319 | if (val == new) | 331 | if (val == new) |
320 | new |= _Q_PENDING_VAL; | 332 | new |= _Q_PENDING_VAL; |
321 | 333 | ||
322 | old = atomic_cmpxchg(&lock->val, val, new); | 334 | /* |
335 | * Acquire semantic is required here as the function may | ||
336 | * return immediately if the lock was free. | ||
337 | */ | ||
338 | old = atomic_cmpxchg_acquire(&lock->val, val, new); | ||
323 | if (old == val) | 339 | if (old == val) |
324 | break; | 340 | break; |
325 | 341 | ||
@@ -382,6 +398,7 @@ queue: | |||
382 | * p,*,* -> n,*,* | 398 | * p,*,* -> n,*,* |
383 | */ | 399 | */ |
384 | old = xchg_tail(lock, tail); | 400 | old = xchg_tail(lock, tail); |
401 | next = NULL; | ||
385 | 402 | ||
386 | /* | 403 | /* |
387 | * if there was a previous node; link it and wait until reaching the | 404 | * if there was a previous node; link it and wait until reaching the |
@@ -391,8 +408,18 @@ queue: | |||
391 | prev = decode_tail(old); | 408 | prev = decode_tail(old); |
392 | WRITE_ONCE(prev->next, node); | 409 | WRITE_ONCE(prev->next, node); |
393 | 410 | ||
394 | pv_wait_node(node); | 411 | pv_wait_node(node, prev); |
395 | arch_mcs_spin_lock_contended(&node->locked); | 412 | arch_mcs_spin_lock_contended(&node->locked); |
413 | |||
414 | /* | ||
415 | * While waiting for the MCS lock, the next pointer may have | ||
416 | * been set by another lock waiter. We optimistically load | ||
417 | * the next pointer & prefetch the cacheline for writing | ||
418 | * to reduce latency in the upcoming MCS unlock operation. | ||
419 | */ | ||
420 | next = READ_ONCE(node->next); | ||
421 | if (next) | ||
422 | prefetchw(next); | ||
396 | } | 423 | } |
397 | 424 | ||
398 | /* | 425 | /* |
@@ -406,11 +433,22 @@ queue: | |||
406 | * sequentiality; this is because the set_locked() function below | 433 | * sequentiality; this is because the set_locked() function below |
407 | * does not imply a full barrier. | 434 | * does not imply a full barrier. |
408 | * | 435 | * |
436 | * The PV pv_wait_head_or_lock function, if active, will acquire | ||
437 | * the lock and return a non-zero value. So we have to skip the | ||
438 | * smp_load_acquire() call. As the next PV queue head hasn't been | ||
439 | * designated yet, there is no way for the locked value to become | ||
440 | * _Q_SLOW_VAL. So both the set_locked() and the | ||
441 | * atomic_cmpxchg_relaxed() calls will be safe. | ||
442 | * | ||
443 | * If PV isn't active, 0 will be returned instead. | ||
444 | * | ||
409 | */ | 445 | */ |
410 | pv_wait_head(lock, node); | 446 | if ((val = pv_wait_head_or_lock(lock, node))) |
411 | while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK) | 447 | goto locked; |
412 | cpu_relax(); | ||
413 | 448 | ||
449 | smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK)); | ||
450 | |||
451 | locked: | ||
414 | /* | 452 | /* |
415 | * claim the lock: | 453 | * claim the lock: |
416 | * | 454 | * |
@@ -422,11 +460,17 @@ queue: | |||
422 | * to grab the lock. | 460 | * to grab the lock. |
423 | */ | 461 | */ |
424 | for (;;) { | 462 | for (;;) { |
425 | if (val != tail) { | 463 | /* In the PV case we might already have _Q_LOCKED_VAL set */ |
464 | if ((val & _Q_TAIL_MASK) != tail) { | ||
426 | set_locked(lock); | 465 | set_locked(lock); |
427 | break; | 466 | break; |
428 | } | 467 | } |
429 | old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL); | 468 | /* |
469 | * The smp_load_acquire() call above has provided the necessary | ||
470 | * acquire semantics required for locking. At most two | ||
471 | * iterations of this loop may be ran. | ||
472 | */ | ||
473 | old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL); | ||
430 | if (old == val) | 474 | if (old == val) |
431 | goto release; /* No contention */ | 475 | goto release; /* No contention */ |
432 | 476 | ||
@@ -434,10 +478,12 @@ queue: | |||
434 | } | 478 | } |
435 | 479 | ||
436 | /* | 480 | /* |
437 | * contended path; wait for next, release. | 481 | * contended path; wait for next if not observed yet, release. |
438 | */ | 482 | */ |
439 | while (!(next = READ_ONCE(node->next))) | 483 | if (!next) { |
440 | cpu_relax(); | 484 | while (!(next = READ_ONCE(node->next))) |
485 | cpu_relax(); | ||
486 | } | ||
441 | 487 | ||
442 | arch_mcs_spin_unlock_contended(&next->locked); | 488 | arch_mcs_spin_unlock_contended(&next->locked); |
443 | pv_kick_node(lock, next); | 489 | pv_kick_node(lock, next); |
@@ -462,7 +508,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath); | |||
462 | #undef pv_init_node | 508 | #undef pv_init_node |
463 | #undef pv_wait_node | 509 | #undef pv_wait_node |
464 | #undef pv_kick_node | 510 | #undef pv_kick_node |
465 | #undef pv_wait_head | 511 | #undef pv_wait_head_or_lock |
466 | 512 | ||
467 | #undef queued_spin_lock_slowpath | 513 | #undef queued_spin_lock_slowpath |
468 | #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath | 514 | #define queued_spin_lock_slowpath __pv_queued_spin_lock_slowpath |