diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-01-29 06:51:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-03-11 07:14:56 -0400 |
commit | fb0527bd5ea99bfeb2dd91e3c1433ecf745d6b99 (patch) | |
tree | b3ab4c067c035688d4295fdcadf00170465db7df | |
parent | 1d8fe7dc8078b23e060ec62ccb4cdc1ac3c41bf8 (diff) |
locking/mutexes: Introduce cancelable MCS lock for adaptive spinning
Since we want a task waiting for a mutex_lock() to go to sleep and
reschedule on need_resched() we must be able to abort the
mcs_spin_lock() around the adaptive spin.
Therefore implement a cancelable mcs lock.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: chegu_vinod@hp.com
Cc: paulmck@linux.vnet.ibm.com
Cc: Waiman.Long@hp.com
Cc: torvalds@linux-foundation.org
Cc: tglx@linutronix.de
Cc: riel@redhat.com
Cc: akpm@linux-foundation.org
Cc: davidlohr@hp.com
Cc: hpa@zytor.com
Cc: andi@firstfloor.org
Cc: aswin@hp.com
Cc: scott.norton@hp.com
Cc: Jason Low <jason.low2@hp.com>
Link: http://lkml.kernel.org/n/tip-62hcl5wxydmjzd182zhvk89m@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/mutex.h | 4 | ||||
-rw-r--r-- | kernel/locking/Makefile | 2 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.c | 178 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.h | 15 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 10 |
5 files changed, 202 insertions, 7 deletions
diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c482e1d2cc49..11692dea18aa 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h | |||
@@ -46,7 +46,7 @@ | |||
46 | * - detects multi-task circular deadlocks and prints out all affected | 46 | * - detects multi-task circular deadlocks and prints out all affected |
47 | * locks and tasks (and only those tasks) | 47 | * locks and tasks (and only those tasks) |
48 | */ | 48 | */ |
49 | struct mcs_spinlock; | 49 | struct optimistic_spin_queue; |
50 | struct mutex { | 50 | struct mutex { |
51 | /* 1: unlocked, 0: locked, negative: locked, possible waiters */ | 51 | /* 1: unlocked, 0: locked, negative: locked, possible waiters */ |
52 | atomic_t count; | 52 | atomic_t count; |
@@ -56,7 +56,7 @@ struct mutex { | |||
56 | struct task_struct *owner; | 56 | struct task_struct *owner; |
57 | #endif | 57 | #endif |
58 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 58 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
59 | struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ | 59 | struct optimistic_spin_queue *osq; /* Spinner MCS lock */ |
60 | #endif | 60 | #endif |
61 | #ifdef CONFIG_DEBUG_MUTEXES | 61 | #ifdef CONFIG_DEBUG_MUTEXES |
62 | const char *name; | 62 | const char *name; |
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5e7f66..2a9ee96ecf00 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o | 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o |
3 | 3 | ||
4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
5 | CFLAGS_REMOVE_lockdep.o = -pg | 5 | CFLAGS_REMOVE_lockdep.o = -pg |
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c | |||
@@ -0,0 +1,178 @@ | |||
1 | |||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/mutex.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include "mcs_spinlock.h" | ||
6 | |||
7 | #ifdef CONFIG_SMP | ||
8 | |||
9 | /* | ||
10 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
11 | * lock implementations (mutex, rwsem, etc). | ||
12 | * | ||
13 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
14 | * called from interrupt context and we have preemption disabled while | ||
15 | * spinning. | ||
16 | */ | ||
17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | ||
18 | |||
19 | /* | ||
20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
21 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
22 | */ | ||
23 | static inline struct optimistic_spin_queue * | ||
24 | osq_wait_next(struct optimistic_spin_queue **lock, | ||
25 | struct optimistic_spin_queue *node, | ||
26 | struct optimistic_spin_queue *prev) | ||
27 | { | ||
28 | struct optimistic_spin_queue *next = NULL; | ||
29 | |||
30 | for (;;) { | ||
31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | ||
32 | /* | ||
33 | * We were the last queued, we moved @lock back. @prev | ||
34 | * will now observe @lock and will complete its | ||
35 | * unlock()/unqueue(). | ||
36 | */ | ||
37 | break; | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * We must xchg() the @node->next value, because if we were to | ||
42 | * leave it in, a concurrent unlock()/unqueue() from | ||
43 | * @node->next might complete Step-A and think its @prev is | ||
44 | * still valid. | ||
45 | * | ||
46 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
47 | * wait for either @lock to point to us, through its Step-B, or | ||
48 | * wait for a new @node->next from its Step-C. | ||
49 | */ | ||
50 | if (node->next) { | ||
51 | next = xchg(&node->next, NULL); | ||
52 | if (next) | ||
53 | break; | ||
54 | } | ||
55 | |||
56 | arch_mutex_cpu_relax(); | ||
57 | } | ||
58 | |||
59 | return next; | ||
60 | } | ||
61 | |||
62 | bool osq_lock(struct optimistic_spin_queue **lock) | ||
63 | { | ||
64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
65 | struct optimistic_spin_queue *prev, *next; | ||
66 | |||
67 | node->locked = 0; | ||
68 | node->next = NULL; | ||
69 | |||
70 | node->prev = prev = xchg(lock, node); | ||
71 | if (likely(prev == NULL)) | ||
72 | return true; | ||
73 | |||
74 | ACCESS_ONCE(prev->next) = node; | ||
75 | |||
76 | /* | ||
77 | * Normally @prev is untouchable after the above store; because at that | ||
78 | * moment unlock can proceed and wipe the node element from stack. | ||
79 | * | ||
80 | * However, since our nodes are static per-cpu storage, we're | ||
81 | * guaranteed their existence -- this allows us to apply | ||
82 | * cmpxchg in an attempt to undo our queueing. | ||
83 | */ | ||
84 | |||
85 | while (!smp_load_acquire(&node->locked)) { | ||
86 | /* | ||
87 | * If we need to reschedule bail... so we can block. | ||
88 | */ | ||
89 | if (need_resched()) | ||
90 | goto unqueue; | ||
91 | |||
92 | arch_mutex_cpu_relax(); | ||
93 | } | ||
94 | return true; | ||
95 | |||
96 | unqueue: | ||
97 | /* | ||
98 | * Step - A -- stabilize @prev | ||
99 | * | ||
100 | * Undo our @prev->next assignment; this will make @prev's | ||
101 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
102 | * (or later). | ||
103 | */ | ||
104 | |||
105 | for (;;) { | ||
106 | if (prev->next == node && | ||
107 | cmpxchg(&prev->next, node, NULL) == node) | ||
108 | break; | ||
109 | |||
110 | /* | ||
111 | * We can only fail the cmpxchg() racing against an unlock(), | ||
112 | * in which case we should observe @node->locked becomming | ||
113 | * true. | ||
114 | */ | ||
115 | if (smp_load_acquire(&node->locked)) | ||
116 | return true; | ||
117 | |||
118 | arch_mutex_cpu_relax(); | ||
119 | |||
120 | /* | ||
121 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
122 | * case its step-C will write us a new @node->prev pointer. | ||
123 | */ | ||
124 | prev = ACCESS_ONCE(node->prev); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Step - B -- stabilize @next | ||
129 | * | ||
130 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
131 | * back to @prev. | ||
132 | */ | ||
133 | |||
134 | next = osq_wait_next(lock, node, prev); | ||
135 | if (!next) | ||
136 | return false; | ||
137 | |||
138 | /* | ||
139 | * Step - C -- unlink | ||
140 | * | ||
141 | * @prev is stable because its still waiting for a new @prev->next | ||
142 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
143 | * it will wait in Step-A. | ||
144 | */ | ||
145 | |||
146 | ACCESS_ONCE(next->prev) = prev; | ||
147 | ACCESS_ONCE(prev->next) = next; | ||
148 | |||
149 | return false; | ||
150 | } | ||
151 | |||
152 | void osq_unlock(struct optimistic_spin_queue **lock) | ||
153 | { | ||
154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
155 | struct optimistic_spin_queue *next; | ||
156 | |||
157 | /* | ||
158 | * Fast path for the uncontended case. | ||
159 | */ | ||
160 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
161 | return; | ||
162 | |||
163 | /* | ||
164 | * Second most likely case. | ||
165 | */ | ||
166 | next = xchg(&node->next, NULL); | ||
167 | if (next) { | ||
168 | ACCESS_ONCE(next->locked) = 1; | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | next = osq_wait_next(lock, node, NULL); | ||
173 | if (next) | ||
174 | ACCESS_ONCE(next->locked) = 1; | ||
175 | } | ||
176 | |||
177 | #endif | ||
178 | |||
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index f2a5c6360083..a2dbac4aca6b 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h | |||
@@ -111,4 +111,19 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
111 | arch_mcs_spin_unlock_contended(&next->locked); | 111 | arch_mcs_spin_unlock_contended(&next->locked); |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | ||
115 | * Cancellable version of the MCS lock above. | ||
116 | * | ||
117 | * Intended for adaptive spinning of sleeping locks: | ||
118 | * mutex_lock()/rwsem_down_{read,write}() etc. | ||
119 | */ | ||
120 | |||
121 | struct optimistic_spin_queue { | ||
122 | struct optimistic_spin_queue *next, *prev; | ||
123 | int locked; /* 1 if lock acquired */ | ||
124 | }; | ||
125 | |||
126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | ||
127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | ||
128 | |||
114 | #endif /* __LINUX_MCS_SPINLOCK_H */ | 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index dc3d6f2bbe2a..2670b84067d6 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
@@ -53,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
53 | INIT_LIST_HEAD(&lock->wait_list); | 53 | INIT_LIST_HEAD(&lock->wait_list); |
54 | mutex_clear_owner(lock); | 54 | mutex_clear_owner(lock); |
55 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 55 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
56 | lock->mcs_lock = NULL; | 56 | lock->osq = NULL; |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | debug_mutex_init(lock, name, key); | 59 | debug_mutex_init(lock, name, key); |
@@ -403,7 +403,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
403 | if (!mutex_can_spin_on_owner(lock)) | 403 | if (!mutex_can_spin_on_owner(lock)) |
404 | goto slowpath; | 404 | goto slowpath; |
405 | 405 | ||
406 | mcs_spin_lock(&lock->mcs_lock, &node); | 406 | if (!osq_lock(&lock->osq)) |
407 | goto slowpath; | ||
408 | |||
407 | for (;;) { | 409 | for (;;) { |
408 | struct task_struct *owner; | 410 | struct task_struct *owner; |
409 | 411 | ||
@@ -442,7 +444,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
442 | } | 444 | } |
443 | 445 | ||
444 | mutex_set_owner(lock); | 446 | mutex_set_owner(lock); |
445 | mcs_spin_unlock(&lock->mcs_lock, &node); | 447 | osq_unlock(&lock->osq); |
446 | preempt_enable(); | 448 | preempt_enable(); |
447 | return 0; | 449 | return 0; |
448 | } | 450 | } |
@@ -464,7 +466,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
464 | */ | 466 | */ |
465 | arch_mutex_cpu_relax(); | 467 | arch_mutex_cpu_relax(); |
466 | } | 468 | } |
467 | mcs_spin_unlock(&lock->mcs_lock, &node); | 469 | osq_unlock(&lock->osq); |
468 | slowpath: | 470 | slowpath: |
469 | #endif | 471 | #endif |
470 | spin_lock_mutex(&lock->wait_lock, flags); | 472 | spin_lock_mutex(&lock->wait_lock, flags); |