diff options
author | Peter Zijlstra <peterz@infradead.org> | 2014-01-29 06:51:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-03-11 07:14:56 -0400 |
commit | fb0527bd5ea99bfeb2dd91e3c1433ecf745d6b99 (patch) | |
tree | b3ab4c067c035688d4295fdcadf00170465db7df /kernel/locking/mcs_spinlock.c | |
parent | 1d8fe7dc8078b23e060ec62ccb4cdc1ac3c41bf8 (diff) |
locking/mutexes: Introduce cancelable MCS lock for adaptive spinning
Since we want a task waiting for a mutex_lock() to go to sleep and
reschedule on need_resched() we must be able to abort the
mcs_spin_lock() around the adaptive spin.
Therefore implement a cancelable mcs lock.
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: chegu_vinod@hp.com
Cc: paulmck@linux.vnet.ibm.com
Cc: Waiman.Long@hp.com
Cc: torvalds@linux-foundation.org
Cc: tglx@linutronix.de
Cc: riel@redhat.com
Cc: akpm@linux-foundation.org
Cc: davidlohr@hp.com
Cc: hpa@zytor.com
Cc: andi@firstfloor.org
Cc: aswin@hp.com
Cc: scott.norton@hp.com
Cc: Jason Low <jason.low2@hp.com>
Link: http://lkml.kernel.org/n/tip-62hcl5wxydmjzd182zhvk89m@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking/mcs_spinlock.c')
-rw-r--r-- | kernel/locking/mcs_spinlock.c | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c | |||
@@ -0,0 +1,178 @@ | |||
1 | |||
2 | #include <linux/percpu.h> | ||
3 | #include <linux/mutex.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include "mcs_spinlock.h" | ||
6 | |||
7 | #ifdef CONFIG_SMP | ||
8 | |||
9 | /* | ||
10 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
11 | * lock implementations (mutex, rwsem, etc). | ||
12 | * | ||
13 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
14 | * called from interrupt context and we have preemption disabled while | ||
15 | * spinning. | ||
16 | */ | ||
17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | ||
18 | |||
19 | /* | ||
20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
21 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
22 | */ | ||
23 | static inline struct optimistic_spin_queue * | ||
24 | osq_wait_next(struct optimistic_spin_queue **lock, | ||
25 | struct optimistic_spin_queue *node, | ||
26 | struct optimistic_spin_queue *prev) | ||
27 | { | ||
28 | struct optimistic_spin_queue *next = NULL; | ||
29 | |||
30 | for (;;) { | ||
31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | ||
32 | /* | ||
33 | * We were the last queued, we moved @lock back. @prev | ||
34 | * will now observe @lock and will complete its | ||
35 | * unlock()/unqueue(). | ||
36 | */ | ||
37 | break; | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * We must xchg() the @node->next value, because if we were to | ||
42 | * leave it in, a concurrent unlock()/unqueue() from | ||
43 | * @node->next might complete Step-A and think its @prev is | ||
44 | * still valid. | ||
45 | * | ||
46 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
47 | * wait for either @lock to point to us, through its Step-B, or | ||
48 | * wait for a new @node->next from its Step-C. | ||
49 | */ | ||
50 | if (node->next) { | ||
51 | next = xchg(&node->next, NULL); | ||
52 | if (next) | ||
53 | break; | ||
54 | } | ||
55 | |||
56 | arch_mutex_cpu_relax(); | ||
57 | } | ||
58 | |||
59 | return next; | ||
60 | } | ||
61 | |||
62 | bool osq_lock(struct optimistic_spin_queue **lock) | ||
63 | { | ||
64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
65 | struct optimistic_spin_queue *prev, *next; | ||
66 | |||
67 | node->locked = 0; | ||
68 | node->next = NULL; | ||
69 | |||
70 | node->prev = prev = xchg(lock, node); | ||
71 | if (likely(prev == NULL)) | ||
72 | return true; | ||
73 | |||
74 | ACCESS_ONCE(prev->next) = node; | ||
75 | |||
76 | /* | ||
77 | * Normally @prev is untouchable after the above store; because at that | ||
78 | * moment unlock can proceed and wipe the node element from stack. | ||
79 | * | ||
80 | * However, since our nodes are static per-cpu storage, we're | ||
81 | * guaranteed their existence -- this allows us to apply | ||
82 | * cmpxchg in an attempt to undo our queueing. | ||
83 | */ | ||
84 | |||
85 | while (!smp_load_acquire(&node->locked)) { | ||
86 | /* | ||
87 | * If we need to reschedule bail... so we can block. | ||
88 | */ | ||
89 | if (need_resched()) | ||
90 | goto unqueue; | ||
91 | |||
92 | arch_mutex_cpu_relax(); | ||
93 | } | ||
94 | return true; | ||
95 | |||
96 | unqueue: | ||
97 | /* | ||
98 | * Step - A -- stabilize @prev | ||
99 | * | ||
100 | * Undo our @prev->next assignment; this will make @prev's | ||
101 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
102 | * (or later). | ||
103 | */ | ||
104 | |||
105 | for (;;) { | ||
106 | if (prev->next == node && | ||
107 | cmpxchg(&prev->next, node, NULL) == node) | ||
108 | break; | ||
109 | |||
110 | /* | ||
111 | * We can only fail the cmpxchg() racing against an unlock(), | ||
112 | * in which case we should observe @node->locked becomming | ||
113 | * true. | ||
114 | */ | ||
115 | if (smp_load_acquire(&node->locked)) | ||
116 | return true; | ||
117 | |||
118 | arch_mutex_cpu_relax(); | ||
119 | |||
120 | /* | ||
121 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
122 | * case its step-C will write us a new @node->prev pointer. | ||
123 | */ | ||
124 | prev = ACCESS_ONCE(node->prev); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Step - B -- stabilize @next | ||
129 | * | ||
130 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
131 | * back to @prev. | ||
132 | */ | ||
133 | |||
134 | next = osq_wait_next(lock, node, prev); | ||
135 | if (!next) | ||
136 | return false; | ||
137 | |||
138 | /* | ||
139 | * Step - C -- unlink | ||
140 | * | ||
141 | * @prev is stable because its still waiting for a new @prev->next | ||
142 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
143 | * it will wait in Step-A. | ||
144 | */ | ||
145 | |||
146 | ACCESS_ONCE(next->prev) = prev; | ||
147 | ACCESS_ONCE(prev->next) = next; | ||
148 | |||
149 | return false; | ||
150 | } | ||
151 | |||
152 | void osq_unlock(struct optimistic_spin_queue **lock) | ||
153 | { | ||
154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
155 | struct optimistic_spin_queue *next; | ||
156 | |||
157 | /* | ||
158 | * Fast path for the uncontended case. | ||
159 | */ | ||
160 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
161 | return; | ||
162 | |||
163 | /* | ||
164 | * Second most likely case. | ||
165 | */ | ||
166 | next = xchg(&node->next, NULL); | ||
167 | if (next) { | ||
168 | ACCESS_ONCE(next->locked) = 1; | ||
169 | return; | ||
170 | } | ||
171 | |||
172 | next = osq_wait_next(lock, node, NULL); | ||
173 | if (next) | ||
174 | ACCESS_ONCE(next->locked) = 1; | ||
175 | } | ||
176 | |||
177 | #endif | ||
178 | |||