diff options
author | Davidlohr Bueso <dave@stgolabs.net> | 2015-01-06 14:45:07 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-01-14 09:07:32 -0500 |
commit | d84b6728c54dcf73bcef3e3f7cf6767e2d224e39 (patch) | |
tree | c3d8f60aee0a71f79666e3d47df73deff975a5ab /kernel/locking/osq_lock.c | |
parent | 4bd19084faa61a8c68586e74f03f5776179f65c2 (diff) |
locking/mcs: Better differentiate between MCS variants
We have two flavors of the MCS spinlock: standard and cancelable (OSQ).
While each one is independent of the other, we currently mix and match
them. This patch:
- Moves the OSQ code out of mcs_spinlock.h (which only deals with the traditional
version) into include/linux/osq_lock.h. No unnecessary code is added to the
more global header file, anything locks that make use of OSQ must include
it anyway.
- Renames mcs_spinlock.c to osq_lock.c. This file only contains osq code.
- Introduces a CONFIG_LOCK_SPIN_ON_OWNER in order to only build osq_lock
if there is support for it.
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Waiman Long <Waiman.Long@hp.com>
Link: http://lkml.kernel.org/r/1420573509-24774-5-git-send-email-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/locking/osq_lock.c')
-rw-r--r-- | kernel/locking/osq_lock.c | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c new file mode 100644 index 000000000000..ec83d4db8ec6 --- /dev/null +++ b/kernel/locking/osq_lock.c | |||
@@ -0,0 +1,203 @@ | |||
1 | #include <linux/percpu.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/osq_lock.h> | ||
4 | |||
5 | /* | ||
6 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
7 | * lock implementations (mutex, rwsem, etc). | ||
8 | * | ||
9 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
10 | * called from interrupt context and we have preemption disabled while | ||
11 | * spinning. | ||
12 | */ | ||
13 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); | ||
14 | |||
15 | /* | ||
16 | * We use the value 0 to represent "no CPU", thus the encoded value | ||
17 | * will be the CPU number incremented by 1. | ||
18 | */ | ||
19 | static inline int encode_cpu(int cpu_nr) | ||
20 | { | ||
21 | return cpu_nr + 1; | ||
22 | } | ||
23 | |||
24 | static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) | ||
25 | { | ||
26 | int cpu_nr = encoded_cpu_val - 1; | ||
27 | |||
28 | return per_cpu_ptr(&osq_node, cpu_nr); | ||
29 | } | ||
30 | |||
31 | /* | ||
32 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
33 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
34 | */ | ||
35 | static inline struct optimistic_spin_node * | ||
36 | osq_wait_next(struct optimistic_spin_queue *lock, | ||
37 | struct optimistic_spin_node *node, | ||
38 | struct optimistic_spin_node *prev) | ||
39 | { | ||
40 | struct optimistic_spin_node *next = NULL; | ||
41 | int curr = encode_cpu(smp_processor_id()); | ||
42 | int old; | ||
43 | |||
44 | /* | ||
45 | * If there is a prev node in queue, then the 'old' value will be | ||
46 | * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if | ||
47 | * we're currently last in queue, then the queue will then become empty. | ||
48 | */ | ||
49 | old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; | ||
50 | |||
51 | for (;;) { | ||
52 | if (atomic_read(&lock->tail) == curr && | ||
53 | atomic_cmpxchg(&lock->tail, curr, old) == curr) { | ||
54 | /* | ||
55 | * We were the last queued, we moved @lock back. @prev | ||
56 | * will now observe @lock and will complete its | ||
57 | * unlock()/unqueue(). | ||
58 | */ | ||
59 | break; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * We must xchg() the @node->next value, because if we were to | ||
64 | * leave it in, a concurrent unlock()/unqueue() from | ||
65 | * @node->next might complete Step-A and think its @prev is | ||
66 | * still valid. | ||
67 | * | ||
68 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
69 | * wait for either @lock to point to us, through its Step-B, or | ||
70 | * wait for a new @node->next from its Step-C. | ||
71 | */ | ||
72 | if (node->next) { | ||
73 | next = xchg(&node->next, NULL); | ||
74 | if (next) | ||
75 | break; | ||
76 | } | ||
77 | |||
78 | cpu_relax_lowlatency(); | ||
79 | } | ||
80 | |||
81 | return next; | ||
82 | } | ||
83 | |||
84 | bool osq_lock(struct optimistic_spin_queue *lock) | ||
85 | { | ||
86 | struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); | ||
87 | struct optimistic_spin_node *prev, *next; | ||
88 | int curr = encode_cpu(smp_processor_id()); | ||
89 | int old; | ||
90 | |||
91 | node->locked = 0; | ||
92 | node->next = NULL; | ||
93 | node->cpu = curr; | ||
94 | |||
95 | old = atomic_xchg(&lock->tail, curr); | ||
96 | if (old == OSQ_UNLOCKED_VAL) | ||
97 | return true; | ||
98 | |||
99 | prev = decode_cpu(old); | ||
100 | node->prev = prev; | ||
101 | ACCESS_ONCE(prev->next) = node; | ||
102 | |||
103 | /* | ||
104 | * Normally @prev is untouchable after the above store; because at that | ||
105 | * moment unlock can proceed and wipe the node element from stack. | ||
106 | * | ||
107 | * However, since our nodes are static per-cpu storage, we're | ||
108 | * guaranteed their existence -- this allows us to apply | ||
109 | * cmpxchg in an attempt to undo our queueing. | ||
110 | */ | ||
111 | |||
112 | while (!smp_load_acquire(&node->locked)) { | ||
113 | /* | ||
114 | * If we need to reschedule bail... so we can block. | ||
115 | */ | ||
116 | if (need_resched()) | ||
117 | goto unqueue; | ||
118 | |||
119 | cpu_relax_lowlatency(); | ||
120 | } | ||
121 | return true; | ||
122 | |||
123 | unqueue: | ||
124 | /* | ||
125 | * Step - A -- stabilize @prev | ||
126 | * | ||
127 | * Undo our @prev->next assignment; this will make @prev's | ||
128 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
129 | * (or later). | ||
130 | */ | ||
131 | |||
132 | for (;;) { | ||
133 | if (prev->next == node && | ||
134 | cmpxchg(&prev->next, node, NULL) == node) | ||
135 | break; | ||
136 | |||
137 | /* | ||
138 | * We can only fail the cmpxchg() racing against an unlock(), | ||
139 | * in which case we should observe @node->locked becomming | ||
140 | * true. | ||
141 | */ | ||
142 | if (smp_load_acquire(&node->locked)) | ||
143 | return true; | ||
144 | |||
145 | cpu_relax_lowlatency(); | ||
146 | |||
147 | /* | ||
148 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
149 | * case its step-C will write us a new @node->prev pointer. | ||
150 | */ | ||
151 | prev = ACCESS_ONCE(node->prev); | ||
152 | } | ||
153 | |||
154 | /* | ||
155 | * Step - B -- stabilize @next | ||
156 | * | ||
157 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
158 | * back to @prev. | ||
159 | */ | ||
160 | |||
161 | next = osq_wait_next(lock, node, prev); | ||
162 | if (!next) | ||
163 | return false; | ||
164 | |||
165 | /* | ||
166 | * Step - C -- unlink | ||
167 | * | ||
168 | * @prev is stable because its still waiting for a new @prev->next | ||
169 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
170 | * it will wait in Step-A. | ||
171 | */ | ||
172 | |||
173 | ACCESS_ONCE(next->prev) = prev; | ||
174 | ACCESS_ONCE(prev->next) = next; | ||
175 | |||
176 | return false; | ||
177 | } | ||
178 | |||
179 | void osq_unlock(struct optimistic_spin_queue *lock) | ||
180 | { | ||
181 | struct optimistic_spin_node *node, *next; | ||
182 | int curr = encode_cpu(smp_processor_id()); | ||
183 | |||
184 | /* | ||
185 | * Fast path for the uncontended case. | ||
186 | */ | ||
187 | if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) | ||
188 | return; | ||
189 | |||
190 | /* | ||
191 | * Second most likely case. | ||
192 | */ | ||
193 | node = this_cpu_ptr(&osq_node); | ||
194 | next = xchg(&node->next, NULL); | ||
195 | if (next) { | ||
196 | ACCESS_ONCE(next->locked) = 1; | ||
197 | return; | ||
198 | } | ||
199 | |||
200 | next = osq_wait_next(lock, node, NULL); | ||
201 | if (next) | ||
202 | ACCESS_ONCE(next->locked) = 1; | ||
203 | } | ||