diff options
Diffstat (limited to 'kernel/srcu.c')
-rw-r--r-- | kernel/srcu.c | 561 |
1 files changed, 110 insertions, 451 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c index 2b859828cdc..73ce23feaea 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -16,17 +16,15 @@ | |||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | 16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
17 | * | 17 | * |
18 | * Copyright (C) IBM Corporation, 2006 | 18 | * Copyright (C) IBM Corporation, 2006 |
19 | * Copyright (C) Fujitsu, 2012 | ||
20 | * | 19 | * |
21 | * Author: Paul McKenney <paulmck@us.ibm.com> | 20 | * Author: Paul McKenney <paulmck@us.ibm.com> |
22 | * Lai Jiangshan <laijs@cn.fujitsu.com> | ||
23 | * | 21 | * |
24 | * For detailed explanation of Read-Copy Update mechanism see - | 22 | * For detailed explanation of Read-Copy Update mechanism see - |
25 | * Documentation/RCU/ *.txt | 23 | * Documentation/RCU/ *.txt |
26 | * | 24 | * |
27 | */ | 25 | */ |
28 | 26 | ||
29 | #include <linux/export.h> | 27 | #include <linux/module.h> |
30 | #include <linux/mutex.h> | 28 | #include <linux/mutex.h> |
31 | #include <linux/percpu.h> | 29 | #include <linux/percpu.h> |
32 | #include <linux/preempt.h> | 30 | #include <linux/preempt.h> |
@@ -36,78 +34,10 @@ | |||
36 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
37 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
38 | 36 | ||
39 | #include <trace/events/rcu.h> | ||
40 | |||
41 | #include "rcu.h" | ||
42 | |||
43 | /* | ||
44 | * Initialize an rcu_batch structure to empty. | ||
45 | */ | ||
46 | static inline void rcu_batch_init(struct rcu_batch *b) | ||
47 | { | ||
48 | b->head = NULL; | ||
49 | b->tail = &b->head; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Enqueue a callback onto the tail of the specified rcu_batch structure. | ||
54 | */ | ||
55 | static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) | ||
56 | { | ||
57 | *b->tail = head; | ||
58 | b->tail = &head->next; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Is the specified rcu_batch structure empty? | ||
63 | */ | ||
64 | static inline bool rcu_batch_empty(struct rcu_batch *b) | ||
65 | { | ||
66 | return b->tail == &b->head; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * Remove the callback at the head of the specified rcu_batch structure | ||
71 | * and return a pointer to it, or return NULL if the structure is empty. | ||
72 | */ | ||
73 | static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) | ||
74 | { | ||
75 | struct rcu_head *head; | ||
76 | |||
77 | if (rcu_batch_empty(b)) | ||
78 | return NULL; | ||
79 | |||
80 | head = b->head; | ||
81 | b->head = head->next; | ||
82 | if (b->tail == &head->next) | ||
83 | rcu_batch_init(b); | ||
84 | |||
85 | return head; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Move all callbacks from the rcu_batch structure specified by "from" to | ||
90 | * the structure specified by "to". | ||
91 | */ | ||
92 | static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) | ||
93 | { | ||
94 | if (!rcu_batch_empty(from)) { | ||
95 | *to->tail = from->head; | ||
96 | to->tail = from->tail; | ||
97 | rcu_batch_init(from); | ||
98 | } | ||
99 | } | ||
100 | |||
101 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 37 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
102 | { | 38 | { |
103 | sp->completed = 0; | 39 | sp->completed = 0; |
104 | spin_lock_init(&sp->queue_lock); | 40 | mutex_init(&sp->mutex); |
105 | sp->running = false; | ||
106 | rcu_batch_init(&sp->batch_queue); | ||
107 | rcu_batch_init(&sp->batch_check0); | ||
108 | rcu_batch_init(&sp->batch_check1); | ||
109 | rcu_batch_init(&sp->batch_done); | ||
110 | INIT_DELAYED_WORK(&sp->work, process_srcu); | ||
111 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); | 41 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); |
112 | return sp->per_cpu_ref ? 0 : -ENOMEM; | 42 | return sp->per_cpu_ref ? 0 : -ENOMEM; |
113 | } | 43 | } |
@@ -143,116 +73,21 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); | |||
143 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 73 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
144 | 74 | ||
145 | /* | 75 | /* |
146 | * Returns approximate total of the readers' ->seq[] values for the | 76 | * srcu_readers_active_idx -- returns approximate number of readers |
147 | * rank of per-CPU counters specified by idx. | 77 | * active on the specified rank of per-CPU counters. |
148 | */ | 78 | */ |
149 | static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx) | ||
150 | { | ||
151 | int cpu; | ||
152 | unsigned long sum = 0; | ||
153 | unsigned long t; | ||
154 | |||
155 | for_each_possible_cpu(cpu) { | ||
156 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); | ||
157 | sum += t; | ||
158 | } | ||
159 | return sum; | ||
160 | } | ||
161 | 79 | ||
162 | /* | 80 | static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) |
163 | * Returns approximate number of readers active on the specified rank | ||
164 | * of the per-CPU ->c[] counters. | ||
165 | */ | ||
166 | static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx) | ||
167 | { | 81 | { |
168 | int cpu; | 82 | int cpu; |
169 | unsigned long sum = 0; | 83 | int sum; |
170 | unsigned long t; | ||
171 | 84 | ||
172 | for_each_possible_cpu(cpu) { | 85 | sum = 0; |
173 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); | 86 | for_each_possible_cpu(cpu) |
174 | sum += t; | 87 | sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; |
175 | } | ||
176 | return sum; | 88 | return sum; |
177 | } | 89 | } |
178 | 90 | ||
179 | /* | ||
180 | * Return true if the number of pre-existing readers is determined to | ||
181 | * be stably zero. An example unstable zero can occur if the call | ||
182 | * to srcu_readers_active_idx() misses an __srcu_read_lock() increment, | ||
183 | * but due to task migration, sees the corresponding __srcu_read_unlock() | ||
184 | * decrement. This can happen because srcu_readers_active_idx() takes | ||
185 | * time to sum the array, and might in fact be interrupted or preempted | ||
186 | * partway through the summation. | ||
187 | */ | ||
188 | static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | ||
189 | { | ||
190 | unsigned long seq; | ||
191 | |||
192 | seq = srcu_readers_seq_idx(sp, idx); | ||
193 | |||
194 | /* | ||
195 | * The following smp_mb() A pairs with the smp_mb() B located in | ||
196 | * __srcu_read_lock(). This pairing ensures that if an | ||
197 | * __srcu_read_lock() increments its counter after the summation | ||
198 | * in srcu_readers_active_idx(), then the corresponding SRCU read-side | ||
199 | * critical section will see any changes made prior to the start | ||
200 | * of the current SRCU grace period. | ||
201 | * | ||
202 | * Also, if the above call to srcu_readers_seq_idx() saw the | ||
203 | * increment of ->seq[], then the call to srcu_readers_active_idx() | ||
204 | * must see the increment of ->c[]. | ||
205 | */ | ||
206 | smp_mb(); /* A */ | ||
207 | |||
208 | /* | ||
209 | * Note that srcu_readers_active_idx() can incorrectly return | ||
210 | * zero even though there is a pre-existing reader throughout. | ||
211 | * To see this, suppose that task A is in a very long SRCU | ||
212 | * read-side critical section that started on CPU 0, and that | ||
213 | * no other reader exists, so that the sum of the counters | ||
214 | * is equal to one. Then suppose that task B starts executing | ||
215 | * srcu_readers_active_idx(), summing up to CPU 1, and then that | ||
216 | * task C starts reading on CPU 0, so that its increment is not | ||
217 | * summed, but finishes reading on CPU 2, so that its decrement | ||
218 | * -is- summed. Then when task B completes its sum, it will | ||
219 | * incorrectly get zero, despite the fact that task A has been | ||
220 | * in its SRCU read-side critical section the whole time. | ||
221 | * | ||
222 | * We therefore do a validation step should srcu_readers_active_idx() | ||
223 | * return zero. | ||
224 | */ | ||
225 | if (srcu_readers_active_idx(sp, idx) != 0) | ||
226 | return false; | ||
227 | |||
228 | /* | ||
229 | * The remainder of this function is the validation step. | ||
230 | * The following smp_mb() D pairs with the smp_mb() C in | ||
231 | * __srcu_read_unlock(). If the __srcu_read_unlock() was seen | ||
232 | * by srcu_readers_active_idx() above, then any destructive | ||
233 | * operation performed after the grace period will happen after | ||
234 | * the corresponding SRCU read-side critical section. | ||
235 | * | ||
236 | * Note that there can be at most NR_CPUS worth of readers using | ||
237 | * the old index, which is not enough to overflow even a 32-bit | ||
238 | * integer. (Yes, this does mean that systems having more than | ||
239 | * a billion or so CPUs need to be 64-bit systems.) Therefore, | ||
240 | * the sum of the ->seq[] counters cannot possibly overflow. | ||
241 | * Therefore, the only way that the return values of the two | ||
242 | * calls to srcu_readers_seq_idx() can be equal is if there were | ||
243 | * no increments of the corresponding rank of ->seq[] counts | ||
244 | * in the interim. But the missed-increment scenario laid out | ||
245 | * above includes an increment of the ->seq[] counter by | ||
246 | * the corresponding __srcu_read_lock(). Therefore, if this | ||
247 | * scenario occurs, the return values from the two calls to | ||
248 | * srcu_readers_seq_idx() will differ, and thus the validation | ||
249 | * step below suffices. | ||
250 | */ | ||
251 | smp_mb(); /* D */ | ||
252 | |||
253 | return srcu_readers_seq_idx(sp, idx) == seq; | ||
254 | } | ||
255 | |||
256 | /** | 91 | /** |
257 | * srcu_readers_active - returns approximate number of readers. | 92 | * srcu_readers_active - returns approximate number of readers. |
258 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | 93 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). |
@@ -263,14 +98,7 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | |||
263 | */ | 98 | */ |
264 | static int srcu_readers_active(struct srcu_struct *sp) | 99 | static int srcu_readers_active(struct srcu_struct *sp) |
265 | { | 100 | { |
266 | int cpu; | 101 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); |
267 | unsigned long sum = 0; | ||
268 | |||
269 | for_each_possible_cpu(cpu) { | ||
270 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); | ||
271 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); | ||
272 | } | ||
273 | return sum; | ||
274 | } | 102 | } |
275 | 103 | ||
276 | /** | 104 | /** |
@@ -303,11 +131,10 @@ int __srcu_read_lock(struct srcu_struct *sp) | |||
303 | int idx; | 131 | int idx; |
304 | 132 | ||
305 | preempt_disable(); | 133 | preempt_disable(); |
306 | idx = rcu_dereference_index_check(sp->completed, | 134 | idx = sp->completed & 0x1; |
307 | rcu_read_lock_sched_held()) & 0x1; | 135 | barrier(); /* ensure compiler looks -once- at sp->completed. */ |
308 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; | 136 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; |
309 | smp_mb(); /* B */ /* Avoid leaking the critical section. */ | 137 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ |
310 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; | ||
311 | preempt_enable(); | 138 | preempt_enable(); |
312 | return idx; | 139 | return idx; |
313 | } | 140 | } |
@@ -322,8 +149,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); | |||
322 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) | 149 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) |
323 | { | 150 | { |
324 | preempt_disable(); | 151 | preempt_disable(); |
325 | smp_mb(); /* C */ /* Avoid leaking the critical section. */ | 152 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ |
326 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; | 153 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; |
327 | preempt_enable(); | 154 | preempt_enable(); |
328 | } | 155 | } |
329 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
@@ -336,119 +163,100 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
336 | * we repeatedly block for 1-millisecond time periods. This approach | 163 | * we repeatedly block for 1-millisecond time periods. This approach |
337 | * has done well in testing, so there is no need for a config parameter. | 164 | * has done well in testing, so there is no need for a config parameter. |
338 | */ | 165 | */ |
339 | #define SRCU_RETRY_CHECK_DELAY 5 | 166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 |
340 | #define SYNCHRONIZE_SRCU_TRYCOUNT 2 | ||
341 | #define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12 | ||
342 | 167 | ||
343 | /* | 168 | /* |
344 | * @@@ Wait until all pre-existing readers complete. Such readers | 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
345 | * will have used the index specified by "idx". | ||
346 | * the caller should ensures the ->completed is not changed while checking | ||
347 | * and idx = (->completed & 1) ^ 1 | ||
348 | */ | 170 | */ |
349 | static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) | 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) |
350 | { | 172 | { |
351 | for (;;) { | 173 | int idx; |
352 | if (srcu_readers_active_idx_check(sp, idx)) | ||
353 | return true; | ||
354 | if (--trycount <= 0) | ||
355 | return false; | ||
356 | udelay(SRCU_RETRY_CHECK_DELAY); | ||
357 | } | ||
358 | } | ||
359 | 174 | ||
360 | /* | 175 | idx = sp->completed; |
361 | * Increment the ->completed counter so that future SRCU readers will | 176 | mutex_lock(&sp->mutex); |
362 | * use the other rank of the ->c[] and ->seq[] arrays. This allows | ||
363 | * us to wait for pre-existing readers in a starvation-free manner. | ||
364 | */ | ||
365 | static void srcu_flip(struct srcu_struct *sp) | ||
366 | { | ||
367 | sp->completed++; | ||
368 | } | ||
369 | 177 | ||
370 | /* | 178 | /* |
371 | * Enqueue an SRCU callback on the specified srcu_struct structure, | 179 | * Check to see if someone else did the work for us while we were |
372 | * initiating grace-period processing if it is not already running. | 180 | * waiting to acquire the lock. We need -two- advances of |
373 | */ | 181 | * the counter, not just one. If there was but one, we might have |
374 | void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | 182 | * shown up -after- our helper's first synchronize_sched(), thus |
375 | void (*func)(struct rcu_head *head)) | 183 | * having failed to prevent CPU-reordering races with concurrent |
376 | { | 184 | * srcu_read_unlock()s on other CPUs (see comment below). So we |
377 | unsigned long flags; | 185 | * either (1) wait for two or (2) supply the second ourselves. |
378 | 186 | */ | |
379 | head->next = NULL; | 187 | |
380 | head->func = func; | 188 | if ((sp->completed - idx) >= 2) { |
381 | spin_lock_irqsave(&sp->queue_lock, flags); | 189 | mutex_unlock(&sp->mutex); |
382 | rcu_batch_queue(&sp->batch_queue, head); | 190 | return; |
383 | if (!sp->running) { | ||
384 | sp->running = true; | ||
385 | schedule_delayed_work(&sp->work, 0); | ||
386 | } | 191 | } |
387 | spin_unlock_irqrestore(&sp->queue_lock, flags); | ||
388 | } | ||
389 | EXPORT_SYMBOL_GPL(call_srcu); | ||
390 | 192 | ||
391 | struct rcu_synchronize { | 193 | sync_func(); /* Force memory barrier on all CPUs. */ |
392 | struct rcu_head head; | ||
393 | struct completion completion; | ||
394 | }; | ||
395 | 194 | ||
396 | /* | 195 | /* |
397 | * Awaken the corresponding synchronize_srcu() instance now that a | 196 | * The preceding synchronize_sched() ensures that any CPU that |
398 | * grace period has elapsed. | 197 | * sees the new value of sp->completed will also see any preceding |
399 | */ | 198 | * changes to data structures made by this CPU. This prevents |
400 | static void wakeme_after_rcu(struct rcu_head *head) | 199 | * some other CPU from reordering the accesses in its SRCU |
401 | { | 200 | * read-side critical section to precede the corresponding |
402 | struct rcu_synchronize *rcu; | 201 | * srcu_read_lock() -- ensuring that such references will in |
202 | * fact be protected. | ||
203 | * | ||
204 | * So it is now safe to do the flip. | ||
205 | */ | ||
403 | 206 | ||
404 | rcu = container_of(head, struct rcu_synchronize, head); | 207 | idx = sp->completed & 0x1; |
405 | complete(&rcu->completion); | 208 | sp->completed++; |
406 | } | ||
407 | 209 | ||
408 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); | 210 | sync_func(); /* Force memory barrier on all CPUs. */ |
409 | static void srcu_reschedule(struct srcu_struct *sp); | ||
410 | 211 | ||
411 | /* | 212 | /* |
412 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 213 | * At this point, because of the preceding synchronize_sched(), |
413 | */ | 214 | * all srcu_read_lock() calls using the old counters have completed. |
414 | static void __synchronize_srcu(struct srcu_struct *sp, int trycount) | 215 | * Their corresponding critical sections might well be still |
415 | { | 216 | * executing, but the srcu_read_lock() primitives themselves |
416 | struct rcu_synchronize rcu; | 217 | * will have finished executing. We initially give readers |
417 | struct rcu_head *head = &rcu.head; | 218 | * an arbitrarily chosen 10 microseconds to get out of their |
418 | bool done = false; | 219 | * SRCU read-side critical sections, then loop waiting 1/HZ |
419 | 220 | * seconds per iteration. The 10-microsecond value has done | |
420 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 221 | * very well in testing. |
421 | !lock_is_held(&rcu_bh_lock_map) && | 222 | */ |
422 | !lock_is_held(&rcu_lock_map) && | 223 | |
423 | !lock_is_held(&rcu_sched_lock_map), | 224 | if (srcu_readers_active_idx(sp, idx)) |
424 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | 225 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); |
425 | 226 | while (srcu_readers_active_idx(sp, idx)) | |
426 | init_completion(&rcu.completion); | 227 | schedule_timeout_interruptible(1); |
427 | 228 | ||
428 | head->next = NULL; | 229 | sync_func(); /* Force memory barrier on all CPUs. */ |
429 | head->func = wakeme_after_rcu; | ||
430 | spin_lock_irq(&sp->queue_lock); | ||
431 | if (!sp->running) { | ||
432 | /* steal the processing owner */ | ||
433 | sp->running = true; | ||
434 | rcu_batch_queue(&sp->batch_check0, head); | ||
435 | spin_unlock_irq(&sp->queue_lock); | ||
436 | |||
437 | srcu_advance_batches(sp, trycount); | ||
438 | if (!rcu_batch_empty(&sp->batch_done)) { | ||
439 | BUG_ON(sp->batch_done.head != head); | ||
440 | rcu_batch_dequeue(&sp->batch_done); | ||
441 | done = true; | ||
442 | } | ||
443 | /* give the processing owner to work_struct */ | ||
444 | srcu_reschedule(sp); | ||
445 | } else { | ||
446 | rcu_batch_queue(&sp->batch_queue, head); | ||
447 | spin_unlock_irq(&sp->queue_lock); | ||
448 | } | ||
449 | 230 | ||
450 | if (!done) | 231 | /* |
451 | wait_for_completion(&rcu.completion); | 232 | * The preceding synchronize_sched() forces all srcu_read_unlock() |
233 | * primitives that were executing concurrently with the preceding | ||
234 | * for_each_possible_cpu() loop to have completed by this point. | ||
235 | * More importantly, it also forces the corresponding SRCU read-side | ||
236 | * critical sections to have also completed, and the corresponding | ||
237 | * references to SRCU-protected data items to be dropped. | ||
238 | * | ||
239 | * Note: | ||
240 | * | ||
241 | * Despite what you might think at first glance, the | ||
242 | * preceding synchronize_sched() -must- be within the | ||
243 | * critical section ended by the following mutex_unlock(). | ||
244 | * Otherwise, a task taking the early exit can race | ||
245 | * with a srcu_read_unlock(), which might have executed | ||
246 | * just before the preceding srcu_readers_active() check, | ||
247 | * and whose CPU might have reordered the srcu_read_unlock() | ||
248 | * with the preceding critical section. In this case, there | ||
249 | * is nothing preventing the synchronize_sched() task that is | ||
250 | * taking the early exit from freeing a data structure that | ||
251 | * is still being referenced (out of order) by the task | ||
252 | * doing the srcu_read_unlock(). | ||
253 | * | ||
254 | * Alternatively, the comparison with "2" on the early exit | ||
255 | * could be changed to "3", but this increases synchronize_srcu() | ||
256 | * latency for bulk loads. So the current code is preferred. | ||
257 | */ | ||
258 | |||
259 | mutex_unlock(&sp->mutex); | ||
452 | } | 260 | } |
453 | 261 | ||
454 | /** | 262 | /** |
@@ -467,190 +275,41 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) | |||
467 | */ | 275 | */ |
468 | void synchronize_srcu(struct srcu_struct *sp) | 276 | void synchronize_srcu(struct srcu_struct *sp) |
469 | { | 277 | { |
470 | __synchronize_srcu(sp, rcu_expedited | 278 | __synchronize_srcu(sp, synchronize_sched); |
471 | ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT | ||
472 | : SYNCHRONIZE_SRCU_TRYCOUNT); | ||
473 | } | 279 | } |
474 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 280 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
475 | 281 | ||
476 | /** | 282 | /** |
477 | * synchronize_srcu_expedited - Brute-force SRCU grace period | 283 | * synchronize_srcu_expedited - like synchronize_srcu, but less patient |
478 | * @sp: srcu_struct with which to synchronize. | 284 | * @sp: srcu_struct with which to synchronize. |
479 | * | 285 | * |
480 | * Wait for an SRCU grace period to elapse, but be more aggressive about | 286 | * Flip the completed counter, and wait for the old count to drain to zero. |
481 | * spinning rather than blocking when waiting. | 287 | * As with classic RCU, the updater must use some separate means of |
288 | * synchronizing concurrent updates. Can block; must be called from | ||
289 | * process context. | ||
482 | * | 290 | * |
483 | * Note that it is illegal to call this function while holding any lock | 291 | * Note that it is illegal to call synchronize_srcu_expedited() |
484 | * that is acquired by a CPU-hotplug notifier. It is also illegal to call | 292 | * from the corresponding SRCU read-side critical section; doing so |
485 | * synchronize_srcu_expedited() from the corresponding SRCU read-side | 293 | * will result in deadlock. However, it is perfectly legal to call |
486 | * critical section; doing so will result in deadlock. However, it is | 294 | * synchronize_srcu_expedited() on one srcu_struct from some other |
487 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct | 295 | * srcu_struct's read-side critical section. |
488 | * from some other srcu_struct's read-side critical section, as long as | ||
489 | * the resulting graph of srcu_structs is acyclic. | ||
490 | */ | 296 | */ |
491 | void synchronize_srcu_expedited(struct srcu_struct *sp) | 297 | void synchronize_srcu_expedited(struct srcu_struct *sp) |
492 | { | 298 | { |
493 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT); | 299 | __synchronize_srcu(sp, synchronize_sched_expedited); |
494 | } | 300 | } |
495 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); | 301 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); |
496 | 302 | ||
497 | /** | 303 | /** |
498 | * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. | ||
499 | */ | ||
500 | void srcu_barrier(struct srcu_struct *sp) | ||
501 | { | ||
502 | synchronize_srcu(sp); | ||
503 | } | ||
504 | EXPORT_SYMBOL_GPL(srcu_barrier); | ||
505 | |||
506 | /** | ||
507 | * srcu_batches_completed - return batches completed. | 304 | * srcu_batches_completed - return batches completed. |
508 | * @sp: srcu_struct on which to report batch completion. | 305 | * @sp: srcu_struct on which to report batch completion. |
509 | * | 306 | * |
510 | * Report the number of batches, correlated with, but not necessarily | 307 | * Report the number of batches, correlated with, but not necessarily |
511 | * precisely the same as, the number of grace periods that have elapsed. | 308 | * precisely the same as, the number of grace periods that have elapsed. |
512 | */ | 309 | */ |
310 | |||
513 | long srcu_batches_completed(struct srcu_struct *sp) | 311 | long srcu_batches_completed(struct srcu_struct *sp) |
514 | { | 312 | { |
515 | return sp->completed; | 313 | return sp->completed; |
516 | } | 314 | } |
517 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | 315 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
518 | |||
519 | #define SRCU_CALLBACK_BATCH 10 | ||
520 | #define SRCU_INTERVAL 1 | ||
521 | |||
522 | /* | ||
523 | * Move any new SRCU callbacks to the first stage of the SRCU grace | ||
524 | * period pipeline. | ||
525 | */ | ||
526 | static void srcu_collect_new(struct srcu_struct *sp) | ||
527 | { | ||
528 | if (!rcu_batch_empty(&sp->batch_queue)) { | ||
529 | spin_lock_irq(&sp->queue_lock); | ||
530 | rcu_batch_move(&sp->batch_check0, &sp->batch_queue); | ||
531 | spin_unlock_irq(&sp->queue_lock); | ||
532 | } | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Core SRCU state machine. Advance callbacks from ->batch_check0 to | ||
537 | * ->batch_check1 and then to ->batch_done as readers drain. | ||
538 | */ | ||
539 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount) | ||
540 | { | ||
541 | int idx = 1 ^ (sp->completed & 1); | ||
542 | |||
543 | /* | ||
544 | * Because readers might be delayed for an extended period after | ||
545 | * fetching ->completed for their index, at any point in time there | ||
546 | * might well be readers using both idx=0 and idx=1. We therefore | ||
547 | * need to wait for readers to clear from both index values before | ||
548 | * invoking a callback. | ||
549 | */ | ||
550 | |||
551 | if (rcu_batch_empty(&sp->batch_check0) && | ||
552 | rcu_batch_empty(&sp->batch_check1)) | ||
553 | return; /* no callbacks need to be advanced */ | ||
554 | |||
555 | if (!try_check_zero(sp, idx, trycount)) | ||
556 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
557 | |||
558 | /* | ||
559 | * The callbacks in ->batch_check1 have already done with their | ||
560 | * first zero check and flip back when they were enqueued on | ||
561 | * ->batch_check0 in a previous invocation of srcu_advance_batches(). | ||
562 | * (Presumably try_check_zero() returned false during that | ||
563 | * invocation, leaving the callbacks stranded on ->batch_check1.) | ||
564 | * They are therefore ready to invoke, so move them to ->batch_done. | ||
565 | */ | ||
566 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
567 | |||
568 | if (rcu_batch_empty(&sp->batch_check0)) | ||
569 | return; /* no callbacks need to be advanced */ | ||
570 | srcu_flip(sp); | ||
571 | |||
572 | /* | ||
573 | * The callbacks in ->batch_check0 just finished their | ||
574 | * first check zero and flip, so move them to ->batch_check1 | ||
575 | * for future checking on the other idx. | ||
576 | */ | ||
577 | rcu_batch_move(&sp->batch_check1, &sp->batch_check0); | ||
578 | |||
579 | /* | ||
580 | * SRCU read-side critical sections are normally short, so check | ||
581 | * at least twice in quick succession after a flip. | ||
582 | */ | ||
583 | trycount = trycount < 2 ? 2 : trycount; | ||
584 | if (!try_check_zero(sp, idx^1, trycount)) | ||
585 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
586 | |||
587 | /* | ||
588 | * The callbacks in ->batch_check1 have now waited for all | ||
589 | * pre-existing readers using both idx values. They are therefore | ||
590 | * ready to invoke, so move them to ->batch_done. | ||
591 | */ | ||
592 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * Invoke a limited number of SRCU callbacks that have passed through | ||
597 | * their grace period. If there are more to do, SRCU will reschedule | ||
598 | * the workqueue. | ||
599 | */ | ||
600 | static void srcu_invoke_callbacks(struct srcu_struct *sp) | ||
601 | { | ||
602 | int i; | ||
603 | struct rcu_head *head; | ||
604 | |||
605 | for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { | ||
606 | head = rcu_batch_dequeue(&sp->batch_done); | ||
607 | if (!head) | ||
608 | break; | ||
609 | local_bh_disable(); | ||
610 | head->func(head); | ||
611 | local_bh_enable(); | ||
612 | } | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Finished one round of SRCU grace period. Start another if there are | ||
617 | * more SRCU callbacks queued, otherwise put SRCU into not-running state. | ||
618 | */ | ||
619 | static void srcu_reschedule(struct srcu_struct *sp) | ||
620 | { | ||
621 | bool pending = true; | ||
622 | |||
623 | if (rcu_batch_empty(&sp->batch_done) && | ||
624 | rcu_batch_empty(&sp->batch_check1) && | ||
625 | rcu_batch_empty(&sp->batch_check0) && | ||
626 | rcu_batch_empty(&sp->batch_queue)) { | ||
627 | spin_lock_irq(&sp->queue_lock); | ||
628 | if (rcu_batch_empty(&sp->batch_done) && | ||
629 | rcu_batch_empty(&sp->batch_check1) && | ||
630 | rcu_batch_empty(&sp->batch_check0) && | ||
631 | rcu_batch_empty(&sp->batch_queue)) { | ||
632 | sp->running = false; | ||
633 | pending = false; | ||
634 | } | ||
635 | spin_unlock_irq(&sp->queue_lock); | ||
636 | } | ||
637 | |||
638 | if (pending) | ||
639 | schedule_delayed_work(&sp->work, SRCU_INTERVAL); | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * This is the work-queue function that handles SRCU grace periods. | ||
644 | */ | ||
645 | void process_srcu(struct work_struct *work) | ||
646 | { | ||
647 | struct srcu_struct *sp; | ||
648 | |||
649 | sp = container_of(work, struct srcu_struct, work.work); | ||
650 | |||
651 | srcu_collect_new(sp); | ||
652 | srcu_advance_batches(sp, 1); | ||
653 | srcu_invoke_callbacks(sp); | ||
654 | srcu_reschedule(sp); | ||
655 | } | ||
656 | EXPORT_SYMBOL_GPL(process_srcu); | ||