summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Hellstrom <thellstrom@vmware.com>2018-06-15 04:17:38 -0400
committerThomas Hellstrom <thellstrom@vmware.com>2018-07-03 03:44:36 -0400
commit08295b3b5beec9aac0f7a9db86f0fc3792039da3 (patch)
tree6226474d5fdaba0bf04aa8bf6542a89722547990
parent55f036ca7e74b85e34958af3d22121c656796413 (diff)
locking: Implement an algorithm choice for Wound-Wait mutexes
The current Wound-Wait mutex algorithm is actually not Wound-Wait but Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait is, contrary to Wait-Die a preemptive algorithm and is known to generate fewer backoffs. Testing reveals that this is true if the number of simultaneous contending transactions is small. As the number of simultaneous contending threads increases, Wait-Wound becomes inferior to Wait-Die in terms of elapsed time. Possibly due to the larger number of held locks of sleeping transactions. Update documentation and callers. Timings using git://people.freedesktop.org/~thomash/ww_mutex_test tag patch-18-06-15 Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly chosen out of 100000. Four core Intel x86_64: Algorithm #threads Rollbacks time Wound-Wait 4 ~100 ~17s. Wait-Die 4 ~150000 ~19s. Wound-Wait 16 ~360000 ~109s. Wait-Die 16 ~450000 ~82s. Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Gustavo Padovan <gustavo@padovan.org> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Sean Paul <seanpaul@chromium.org> Cc: David Airlie <airlied@linux.ie> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Kate Stewart <kstewart@linuxfoundation.org> Cc: Philippe Ombredanne <pombredanne@nexb.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: linux-doc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Co-authored-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/locking/ww-mutex-design.txt57
-rw-r--r--drivers/dma-buf/reservation.c2
-rw-r--r--drivers/gpu/drm/drm_modeset_lock.c2
-rw-r--r--include/linux/ww_mutex.h17
-rw-r--r--kernel/locking/locktorture.c2
-rw-r--r--kernel/locking/mutex.c165
-rw-r--r--kernel/locking/test-ww_mutex.c2
-rw-r--r--lib/locking-selftest.c2
8 files changed, 213 insertions, 36 deletions
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
index 2fd7f2a2af21..f0ed7c30e695 100644
--- a/Documentation/locking/ww-mutex-design.txt
+++ b/Documentation/locking/ww-mutex-design.txt
@@ -1,4 +1,4 @@
1Wait/Wound Deadlock-Proof Mutex Design 1Wound/Wait Deadlock-Proof Mutex Design
2====================================== 2======================================
3 3
4Please read mutex-design.txt first, as it applies to wait/wound mutexes too. 4Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
@@ -32,10 +32,26 @@ the oldest task) wins, and the one with the higher reservation id (i.e. the
32younger task) unlocks all of the buffers that it has already locked, and then 32younger task) unlocks all of the buffers that it has already locked, and then
33tries again. 33tries again.
34 34
35In the RDBMS literature this deadlock handling approach is called wait/die: 35In the RDBMS literature, a reservation ticket is associated with a transaction.
36The older tasks waits until it can acquire the contended lock. The younger tasks 36and the deadlock handling approach is called Wait-Die. The name is based on
37needs to back off and drop all the locks it is currently holding, i.e. the 37the actions of a locking thread when it encounters an already locked mutex.
38younger task dies. 38If the transaction holding the lock is younger, the locking transaction waits.
39If the transaction holding the lock is older, the locking transaction backs off
40and dies. Hence Wait-Die.
41There is also another algorithm called Wound-Wait:
42If the transaction holding the lock is younger, the locking transaction
43wounds the transaction holding the lock, requesting it to die.
44If the transaction holding the lock is older, it waits for the other
45transaction. Hence Wound-Wait.
46The two algorithms are both fair in that a transaction will eventually succeed.
47However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
48compared to Wait-Die, but is, on the other hand, associated with more work than
49Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
50algorithm in that transactions are wounded by other transactions, and that
51requires a reliable way to pick up up the wounded condition and preempt the
52running transaction. Note that this is not the same as process preemption. A
53Wound-Wait transaction is considered preempted when it dies (returning
54-EDEADLK) following a wound.
39 55
40Concepts 56Concepts
41-------- 57--------
@@ -47,10 +63,12 @@ Acquire context: To ensure eventual forward progress it is important the a task
47trying to acquire locks doesn't grab a new reservation id, but keeps the one it 63trying to acquire locks doesn't grab a new reservation id, but keeps the one it
48acquired when starting the lock acquisition. This ticket is stored in the 64acquired when starting the lock acquisition. This ticket is stored in the
49acquire context. Furthermore the acquire context keeps track of debugging state 65acquire context. Furthermore the acquire context keeps track of debugging state
50to catch w/w mutex interface abuse. 66to catch w/w mutex interface abuse. An acquire context is representing a
67transaction.
51 68
52W/w class: In contrast to normal mutexes the lock class needs to be explicit for 69W/w class: In contrast to normal mutexes the lock class needs to be explicit for
53w/w mutexes, since it is required to initialize the acquire context. 70w/w mutexes, since it is required to initialize the acquire context. The lock
71class also specifies what algorithm to use, Wound-Wait or Wait-Die.
54 72
55Furthermore there are three different class of w/w lock acquire functions: 73Furthermore there are three different class of w/w lock acquire functions:
56 74
@@ -90,6 +108,12 @@ provided.
90Usage 108Usage
91----- 109-----
92 110
111The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
112DEFINE_WW_CLASS() (Wound-Wait) or DEFINE_WD_CLASS() (Wait-Die)
113As a rough rule of thumb, use Wound-Wait iff you
114expect the number of simultaneous competing transactions to be typically small,
115and you want to reduce the number of rollbacks.
116
93Three different ways to acquire locks within the same w/w class. Common 117Three different ways to acquire locks within the same w/w class. Common
94definitions for methods #1 and #2: 118definitions for methods #1 and #2:
95 119
@@ -312,12 +336,23 @@ Design:
312 We maintain the following invariants for the wait list: 336 We maintain the following invariants for the wait list:
313 (1) Waiters with an acquire context are sorted by stamp order; waiters 337 (1) Waiters with an acquire context are sorted by stamp order; waiters
314 without an acquire context are interspersed in FIFO order. 338 without an acquire context are interspersed in FIFO order.
315 (2) Among waiters with contexts, only the first one can have other locks 339 (2) For Wait-Die, among waiters with contexts, only the first one can have
316 acquired already (ctx->acquired > 0). Note that this waiter may come 340 other locks acquired already (ctx->acquired > 0). Note that this waiter
317 after other waiters without contexts in the list. 341 may come after other waiters without contexts in the list.
342
343 The Wound-Wait preemption is implemented with a lazy-preemption scheme:
344 The wounded status of the transaction is checked only when there is
345 contention for a new lock and hence a true chance of deadlock. In that
346 situation, if the transaction is wounded, it backs off, clears the
347 wounded status and retries. A great benefit of implementing preemption in
348 this way is that the wounded transaction can identify a contending lock to
349 wait for before restarting the transaction. Just blindly restarting the
350 transaction would likely make the transaction end up in a situation where
351 it would have to back off again.
318 352
319 In general, not much contention is expected. The locks are typically used to 353 In general, not much contention is expected. The locks are typically used to
320 serialize access to resources for devices. 354 serialize access to resources for devices, and optimization focus should
355 therefore be directed towards the uncontended cases.
321 356
322Lockdep: 357Lockdep:
323 Special care has been taken to warn for as many cases of api abuse 358 Special care has been taken to warn for as many cases of api abuse
diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c
index 314eb1071cce..20bf90f4ee63 100644
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@@ -46,7 +46,7 @@
46 * write-side updates. 46 * write-side updates.
47 */ 47 */
48 48
49DEFINE_WW_CLASS(reservation_ww_class); 49DEFINE_WD_CLASS(reservation_ww_class);
50EXPORT_SYMBOL(reservation_ww_class); 50EXPORT_SYMBOL(reservation_ww_class);
51 51
52struct lock_class_key reservation_seqcount_class; 52struct lock_class_key reservation_seqcount_class;
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 8a5100685875..638be2eb67b4 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -70,7 +70,7 @@
70 * lists and lookup data structures. 70 * lists and lookup data structures.
71 */ 71 */
72 72
73static DEFINE_WW_CLASS(crtc_ww_class); 73static DEFINE_WD_CLASS(crtc_ww_class);
74 74
75/** 75/**
76 * drm_modeset_lock_all - take all modeset locks 76 * drm_modeset_lock_all - take all modeset locks
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index f82fce2229c8..3af7c0e03be5 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -8,6 +8,8 @@
8 * 8 *
9 * Wait/Die implementation: 9 * Wait/Die implementation:
10 * Copyright (C) 2013 Canonical Ltd. 10 * Copyright (C) 2013 Canonical Ltd.
11 * Choice of algorithm:
12 * Copyright (C) 2018 WMWare Inc.
11 * 13 *
12 * This file contains the main data structure and API definitions. 14 * This file contains the main data structure and API definitions.
13 */ 15 */
@@ -23,12 +25,15 @@ struct ww_class {
23 struct lock_class_key mutex_key; 25 struct lock_class_key mutex_key;
24 const char *acquire_name; 26 const char *acquire_name;
25 const char *mutex_name; 27 const char *mutex_name;
28 unsigned int is_wait_die;
26}; 29};
27 30
28struct ww_acquire_ctx { 31struct ww_acquire_ctx {
29 struct task_struct *task; 32 struct task_struct *task;
30 unsigned long stamp; 33 unsigned long stamp;
31 unsigned int acquired; 34 unsigned int acquired;
35 unsigned short wounded;
36 unsigned short is_wait_die;
32#ifdef CONFIG_DEBUG_MUTEXES 37#ifdef CONFIG_DEBUG_MUTEXES
33 unsigned int done_acquire; 38 unsigned int done_acquire;
34 struct ww_class *ww_class; 39 struct ww_class *ww_class;
@@ -58,17 +63,21 @@ struct ww_mutex {
58# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) 63# define __WW_CLASS_MUTEX_INITIALIZER(lockname, class)
59#endif 64#endif
60 65
61#define __WW_CLASS_INITIALIZER(ww_class) \ 66#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \
62 { .stamp = ATOMIC_LONG_INIT(0) \ 67 { .stamp = ATOMIC_LONG_INIT(0) \
63 , .acquire_name = #ww_class "_acquire" \ 68 , .acquire_name = #ww_class "_acquire" \
64 , .mutex_name = #ww_class "_mutex" } 69 , .mutex_name = #ww_class "_mutex" \
70 , .is_wait_die = _is_wait_die }
65 71
66#define __WW_MUTEX_INITIALIZER(lockname, class) \ 72#define __WW_MUTEX_INITIALIZER(lockname, class) \
67 { .base = __MUTEX_INITIALIZER(lockname.base) \ 73 { .base = __MUTEX_INITIALIZER(lockname.base) \
68 __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } 74 __WW_CLASS_MUTEX_INITIALIZER(lockname, class) }
69 75
76#define DEFINE_WD_CLASS(classname) \
77 struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1)
78
70#define DEFINE_WW_CLASS(classname) \ 79#define DEFINE_WW_CLASS(classname) \
71 struct ww_class classname = __WW_CLASS_INITIALIZER(classname) 80 struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0)
72 81
73#define DEFINE_WW_MUTEX(mutexname, ww_class) \ 82#define DEFINE_WW_MUTEX(mutexname, ww_class) \
74 struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) 83 struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class)
@@ -123,6 +132,8 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
123 ctx->task = current; 132 ctx->task = current;
124 ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); 133 ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp);
125 ctx->acquired = 0; 134 ctx->acquired = 0;
135 ctx->wounded = false;
136 ctx->is_wait_die = ww_class->is_wait_die;
126#ifdef CONFIG_DEBUG_MUTEXES 137#ifdef CONFIG_DEBUG_MUTEXES
127 ctx->ww_class = ww_class; 138 ctx->ww_class = ww_class;
128 ctx->done_acquire = 0; 139 ctx->done_acquire = 0;
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 8402b3349dca..c28224347d69 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -365,7 +365,7 @@ static struct lock_torture_ops mutex_lock_ops = {
365}; 365};
366 366
367#include <linux/ww_mutex.h> 367#include <linux/ww_mutex.h>
368static DEFINE_WW_CLASS(torture_ww_class); 368static DEFINE_WD_CLASS(torture_ww_class);
369static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class); 369static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
370static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class); 370static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
371static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class); 371static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index cfe48419b7d0..1a81a1257b3f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -174,6 +174,21 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
174} 174}
175 175
176/* 176/*
177 * Add @waiter to a given location in the lock wait_list and set the
178 * FLAG_WAITERS flag if it's the first waiter.
179 */
180static void __sched
181__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
182 struct list_head *list)
183{
184 debug_mutex_add_waiter(lock, waiter, current);
185
186 list_add_tail(&waiter->list, list);
187 if (__mutex_waiter_is_first(lock, waiter))
188 __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
189}
190
191/*
177 * Give up ownership to a specific task, when @task = NULL, this is equivalent 192 * Give up ownership to a specific task, when @task = NULL, this is equivalent
178 * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves 193 * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
179 * WAITERS. Provides RELEASE semantics like a regular unlock, the 194 * WAITERS. Provides RELEASE semantics like a regular unlock, the
@@ -249,6 +264,11 @@ EXPORT_SYMBOL(mutex_lock);
249 * The newer transactions are killed when: 264 * The newer transactions are killed when:
250 * It (the new transaction) makes a request for a lock being held 265 * It (the new transaction) makes a request for a lock being held
251 * by an older transaction. 266 * by an older transaction.
267 *
268 * Wound-Wait:
269 * The newer transactions are wounded when:
270 * An older transaction makes a request for a lock being held by
271 * the newer transaction.
252 */ 272 */
253 273
254/* 274/*
@@ -320,6 +340,9 @@ static bool __sched
320__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, 340__ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter,
321 struct ww_acquire_ctx *ww_ctx) 341 struct ww_acquire_ctx *ww_ctx)
322{ 342{
343 if (!ww_ctx->is_wait_die)
344 return false;
345
323 if (waiter->ww_ctx->acquired > 0 && 346 if (waiter->ww_ctx->acquired > 0 &&
324 __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { 347 __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) {
325 debug_mutex_wake_waiter(lock, waiter); 348 debug_mutex_wake_waiter(lock, waiter);
@@ -330,12 +353,64 @@ __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter,
330} 353}
331 354
332/* 355/*
356 * Wound-Wait; wound a younger @hold_ctx if it holds the lock.
357 *
358 * Wound the lock holder if there are waiters with older transactions than
359 * the lock holders. Even if multiple waiters may wound the lock holder,
360 * it's sufficient that only one does.
361 */
362static bool __ww_mutex_wound(struct mutex *lock,
363 struct ww_acquire_ctx *ww_ctx,
364 struct ww_acquire_ctx *hold_ctx)
365{
366 struct task_struct *owner = __mutex_owner(lock);
367
368 lockdep_assert_held(&lock->wait_lock);
369
370 /*
371 * Possible through __ww_mutex_add_waiter() when we race with
372 * ww_mutex_set_context_fastpath(). In that case we'll get here again
373 * through __ww_mutex_check_waiters().
374 */
375 if (!hold_ctx)
376 return false;
377
378 /*
379 * Can have !owner because of __mutex_unlock_slowpath(), but if owner,
380 * it cannot go away because we'll have FLAG_WAITERS set and hold
381 * wait_lock.
382 */
383 if (!owner)
384 return false;
385
386 if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) {
387 hold_ctx->wounded = 1;
388
389 /*
390 * wake_up_process() paired with set_current_state()
391 * inserts sufficient barriers to make sure @owner either sees
392 * it's wounded in __ww_mutex_lock_check_stamp() or has a
393 * wakeup pending to re-read the wounded state.
394 */
395 if (owner != current)
396 wake_up_process(owner);
397
398 return true;
399 }
400
401 return false;
402}
403
404/*
333 * We just acquired @lock under @ww_ctx, if there are later contexts waiting 405 * We just acquired @lock under @ww_ctx, if there are later contexts waiting
334 * behind us on the wait-list, check if they need to die. 406 * behind us on the wait-list, check if they need to die, or wound us.
335 * 407 *
336 * See __ww_mutex_add_waiter() for the list-order construction; basically the 408 * See __ww_mutex_add_waiter() for the list-order construction; basically the
337 * list is ordered by stamp, smallest (oldest) first. 409 * list is ordered by stamp, smallest (oldest) first.
338 * 410 *
411 * This relies on never mixing wait-die/wound-wait on the same wait-list;
412 * which is currently ensured by that being a ww_class property.
413 *
339 * The current task must not be on the wait list. 414 * The current task must not be on the wait list.
340 */ 415 */
341static void __sched 416static void __sched
@@ -349,7 +424,8 @@ __ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
349 if (!cur->ww_ctx) 424 if (!cur->ww_ctx)
350 continue; 425 continue;
351 426
352 if (__ww_mutex_die(lock, cur, ww_ctx)) 427 if (__ww_mutex_die(lock, cur, ww_ctx) ||
428 __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx))
353 break; 429 break;
354 } 430 }
355} 431}
@@ -370,17 +446,23 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
370 * and keep spinning, or it will acquire wait_lock, add itself 446 * and keep spinning, or it will acquire wait_lock, add itself
371 * to waiter list and sleep. 447 * to waiter list and sleep.
372 */ 448 */
373 smp_mb(); /* ^^^ */ 449 smp_mb(); /* See comments above and below. */
374 450
375 /* 451 /*
376 * Check if lock is contended, if not there is nobody to wake up 452 * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS
453 * MB MB
454 * [R] MUTEX_FLAG_WAITERS [R] ww->ctx
455 *
456 * The memory barrier above pairs with the memory barrier in
457 * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx
458 * and/or !empty list.
377 */ 459 */
378 if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) 460 if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS)))
379 return; 461 return;
380 462
381 /* 463 /*
382 * Uh oh, we raced in fastpath, check if any of the waiters need to 464 * Uh oh, we raced in fastpath, check if any of the waiters need to
383 * die. 465 * die or wound us.
384 */ 466 */
385 spin_lock(&lock->base.wait_lock); 467 spin_lock(&lock->base.wait_lock);
386 __ww_mutex_check_waiters(&lock->base, ctx); 468 __ww_mutex_check_waiters(&lock->base, ctx);
@@ -682,7 +764,9 @@ __ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
682 764
683 765
684/* 766/*
685 * Check whether we need to kill the transaction for the current lock acquire. 767 * Check the wound condition for the current lock acquire.
768 *
769 * Wound-Wait: If we're wounded, kill ourself.
686 * 770 *
687 * Wait-Die: If we're trying to acquire a lock already held by an older 771 * Wait-Die: If we're trying to acquire a lock already held by an older
688 * context, kill ourselves. 772 * context, kill ourselves.
@@ -701,6 +785,13 @@ __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter,
701 if (ctx->acquired == 0) 785 if (ctx->acquired == 0)
702 return 0; 786 return 0;
703 787
788 if (!ctx->is_wait_die) {
789 if (ctx->wounded)
790 return __ww_mutex_kill(lock, ctx);
791
792 return 0;
793 }
794
704 if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) 795 if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
705 return __ww_mutex_kill(lock, ctx); 796 return __ww_mutex_kill(lock, ctx);
706 797
@@ -727,7 +818,8 @@ __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter,
727 * Waiters without context are interspersed in FIFO order. 818 * Waiters without context are interspersed in FIFO order.
728 * 819 *
729 * Furthermore, for Wait-Die kill ourself immediately when possible (there are 820 * Furthermore, for Wait-Die kill ourself immediately when possible (there are
730 * older contexts already waiting) to avoid unnecessary waiting. 821 * older contexts already waiting) to avoid unnecessary waiting and for
822 * Wound-Wait ensure we wound the owning context when it is younger.
731 */ 823 */
732static inline int __sched 824static inline int __sched
733__ww_mutex_add_waiter(struct mutex_waiter *waiter, 825__ww_mutex_add_waiter(struct mutex_waiter *waiter,
@@ -736,16 +828,21 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
736{ 828{
737 struct mutex_waiter *cur; 829 struct mutex_waiter *cur;
738 struct list_head *pos; 830 struct list_head *pos;
831 bool is_wait_die;
739 832
740 if (!ww_ctx) { 833 if (!ww_ctx) {
741 list_add_tail(&waiter->list, &lock->wait_list); 834 __mutex_add_waiter(lock, waiter, &lock->wait_list);
742 return 0; 835 return 0;
743 } 836 }
744 837
838 is_wait_die = ww_ctx->is_wait_die;
839
745 /* 840 /*
746 * Add the waiter before the first waiter with a higher stamp. 841 * Add the waiter before the first waiter with a higher stamp.
747 * Waiters without a context are skipped to avoid starving 842 * Waiters without a context are skipped to avoid starving
748 * them. Wait-Die waiters may die here. 843 * them. Wait-Die waiters may die here. Wound-Wait waiters
844 * never die here, but they are sorted in stamp order and
845 * may wound the lock holder.
749 */ 846 */
750 pos = &lock->wait_list; 847 pos = &lock->wait_list;
751 list_for_each_entry_reverse(cur, &lock->wait_list, list) { 848 list_for_each_entry_reverse(cur, &lock->wait_list, list) {
@@ -758,10 +855,12 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
758 * is no point in queueing behind it, as we'd have to 855 * is no point in queueing behind it, as we'd have to
759 * die the moment it would acquire the lock. 856 * die the moment it would acquire the lock.
760 */ 857 */
761 int ret = __ww_mutex_kill(lock, ww_ctx); 858 if (is_wait_die) {
859 int ret = __ww_mutex_kill(lock, ww_ctx);
762 860
763 if (ret) 861 if (ret)
764 return ret; 862 return ret;
863 }
765 864
766 break; 865 break;
767 } 866 }
@@ -772,7 +871,23 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
772 __ww_mutex_die(lock, cur, ww_ctx); 871 __ww_mutex_die(lock, cur, ww_ctx);
773 } 872 }
774 873
775 list_add_tail(&waiter->list, pos); 874 __mutex_add_waiter(lock, waiter, pos);
875
876 /*
877 * Wound-Wait: if we're blocking on a mutex owned by a younger context,
878 * wound that such that we might proceed.
879 */
880 if (!is_wait_die) {
881 struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
882
883 /*
884 * See ww_mutex_set_context_fastpath(). Orders setting
885 * MUTEX_FLAG_WAITERS vs the ww->ctx load,
886 * such that either we or the fastpath will wound @ww->ctx.
887 */
888 smp_mb();
889 __ww_mutex_wound(lock, ww_ctx, ww->ctx);
890 }
776 891
777 return 0; 892 return 0;
778} 893}
@@ -796,6 +911,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
796 if (use_ww_ctx && ww_ctx) { 911 if (use_ww_ctx && ww_ctx) {
797 if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) 912 if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
798 return -EALREADY; 913 return -EALREADY;
914
915 /*
916 * Reset the wounded flag after a kill. No other process can
917 * race and wound us here since they can't have a valid owner
918 * pointer if we don't have any locks held.
919 */
920 if (ww_ctx->acquired == 0)
921 ww_ctx->wounded = 0;
799 } 922 }
800 923
801 preempt_disable(); 924 preempt_disable();
@@ -829,7 +952,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
829 952
830 if (!use_ww_ctx) { 953 if (!use_ww_ctx) {
831 /* add waiting tasks to the end of the waitqueue (FIFO): */ 954 /* add waiting tasks to the end of the waitqueue (FIFO): */
832 list_add_tail(&waiter.list, &lock->wait_list); 955 __mutex_add_waiter(lock, &waiter, &lock->wait_list);
956
833 957
834#ifdef CONFIG_DEBUG_MUTEXES 958#ifdef CONFIG_DEBUG_MUTEXES
835 waiter.ww_ctx = MUTEX_POISON_WW_CTX; 959 waiter.ww_ctx = MUTEX_POISON_WW_CTX;
@@ -848,9 +972,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
848 972
849 waiter.task = current; 973 waiter.task = current;
850 974
851 if (__mutex_waiter_is_first(lock, &waiter))
852 __mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
853
854 set_current_state(state); 975 set_current_state(state);
855 for (;;) { 976 for (;;) {
856 /* 977 /*
@@ -907,6 +1028,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
907acquired: 1028acquired:
908 __set_current_state(TASK_RUNNING); 1029 __set_current_state(TASK_RUNNING);
909 1030
1031 if (use_ww_ctx && ww_ctx) {
1032 /*
1033 * Wound-Wait; we stole the lock (!first_waiter), check the
1034 * waiters as anyone might want to wound us.
1035 */
1036 if (!ww_ctx->is_wait_die &&
1037 !__mutex_waiter_is_first(lock, &waiter))
1038 __ww_mutex_check_waiters(lock, ww_ctx);
1039 }
1040
910 mutex_remove_waiter(lock, &waiter, current); 1041 mutex_remove_waiter(lock, &waiter, current);
911 if (likely(list_empty(&lock->wait_list))) 1042 if (likely(list_empty(&lock->wait_list)))
912 __mutex_clear_flag(lock, MUTEX_FLAGS); 1043 __mutex_clear_flag(lock, MUTEX_FLAGS);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 0e4cd64ad2c0..5b915b370d5a 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -26,7 +26,7 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/ww_mutex.h> 27#include <linux/ww_mutex.h>
28 28
29static DEFINE_WW_CLASS(ww_class); 29static DEFINE_WD_CLASS(ww_class);
30struct workqueue_struct *wq; 30struct workqueue_struct *wq;
31 31
32struct test_mutex { 32struct test_mutex {
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index b5c1293ce147..1e1bbf171eca 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -29,7 +29,7 @@
29 */ 29 */
30static unsigned int debug_locks_verbose; 30static unsigned int debug_locks_verbose;
31 31
32static DEFINE_WW_CLASS(ww_lockdep); 32static DEFINE_WD_CLASS(ww_lockdep);
33 33
34static int __init setup_debug_locks_verbose(char *str) 34static int __init setup_debug_locks_verbose(char *str)
35{ 35{