aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/rcu/rcu_segcblist.h625
-rw-r--r--kernel/rcu/tree.c348
-rw-r--r--kernel/rcu/tree.h41
-rw-r--r--kernel/rcu/tree_plugin.h54
-rw-r--r--kernel/rcu/tree_trace.c21
5 files changed, 780 insertions, 309 deletions
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
new file mode 100644
index 000000000000..24078f3c0218
--- /dev/null
+++ b/kernel/rcu/rcu_segcblist.h
@@ -0,0 +1,625 @@
1/*
2 * RCU segmented callback lists
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, you can access it online at
16 * http://www.gnu.org/licenses/gpl-2.0.html.
17 *
18 * Copyright IBM Corporation, 2017
19 *
20 * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21 */
22
23#ifndef __KERNEL_RCU_SEGCBLIST_H
24#define __KERNEL_RCU_SEGCBLIST_H
25
26/* Simple unsegmented callback lists. */
27struct rcu_cblist {
28 struct rcu_head *head;
29 struct rcu_head **tail;
30 long len;
31 long len_lazy;
32};
33
34#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head }
35
36/* Initialize simple callback list. */
37static inline void rcu_cblist_init(struct rcu_cblist *rclp)
38{
39 rclp->head = NULL;
40 rclp->tail = &rclp->head;
41 rclp->len = 0;
42 rclp->len_lazy = 0;
43}
44
45/* Is simple callback list empty? */
46static inline bool rcu_cblist_empty(struct rcu_cblist *rclp)
47{
48 return !rclp->head;
49}
50
51/* Return number of callbacks in simple callback list. */
52static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
53{
54 return rclp->len;
55}
56
57/* Return number of lazy callbacks in simple callback list. */
58static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp)
59{
60 return rclp->len_lazy;
61}
62
63/*
64 * Debug function to actually count the number of callbacks.
65 * If the number exceeds the limit specified, return -1.
66 */
67static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim)
68{
69 int cnt = 0;
70 struct rcu_head **rhpp = &rclp->head;
71
72 for (;;) {
73 if (!*rhpp)
74 return cnt;
75 if (++cnt > lim)
76 return -1;
77 rhpp = &(*rhpp)->next;
78 }
79}
80
81/*
82 * Dequeue the oldest rcu_head structure from the specified callback
83 * list. This function assumes that the callback is non-lazy, but
84 * the caller can later invoke rcu_cblist_dequeued_lazy() if it
85 * finds otherwise (and if it cares about laziness). This allows
86 * different users to have different ways of determining laziness.
87 */
88static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
89{
90 struct rcu_head *rhp;
91
92 rhp = rclp->head;
93 if (!rhp)
94 return NULL;
95 prefetch(rhp);
96 rclp->len--;
97 rclp->head = rhp->next;
98 if (!rclp->head)
99 rclp->tail = &rclp->head;
100 return rhp;
101}
102
103/*
104 * Account for the fact that a previously dequeued callback turned out
105 * to be marked as lazy.
106 */
107static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp)
108{
109 rclp->len_lazy--;
110}
111
112/*
113 * Interim function to return rcu_cblist head pointer. Longer term, the
114 * rcu_cblist will be used more pervasively, removing the need for this
115 * function.
116 */
117static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp)
118{
119 return rclp->head;
120}
121
122/*
123 * Interim function to return rcu_cblist head pointer. Longer term, the
124 * rcu_cblist will be used more pervasively, removing the need for this
125 * function.
126 */
127static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp)
128{
129 WARN_ON_ONCE(rcu_cblist_empty(rclp));
130 return rclp->tail;
131}
132
133/* Complicated segmented callback lists. ;-) */
134
135/*
136 * Index values for segments in rcu_segcblist structure.
137 *
138 * The segments are as follows:
139 *
140 * [head, *tails[RCU_DONE_TAIL]):
141 * Callbacks whose grace period has elapsed, and thus can be invoked.
142 * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]):
143 * Callbacks waiting for the current GP from the current CPU's viewpoint.
144 * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]):
145 * Callbacks that arrived before the next GP started, again from
146 * the current CPU's viewpoint. These can be handled by the next GP.
147 * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]):
148 * Callbacks that might have arrived after the next GP started.
149 * There is some uncertainty as to when a given GP starts and
150 * ends, but a CPU knows the exact times if it is the one starting
151 * or ending the GP. Other CPUs know that the previous GP ends
152 * before the next one starts.
153 *
154 * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also
155 * empty.
156 *
157 * The ->gp_seq[] array contains the grace-period number at which the
158 * corresponding segment of callbacks will be ready to invoke. A given
159 * element of this array is meaningful only when the corresponding segment
160 * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks
161 * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have
162 * not yet been assigned a grace-period number).
163 */
164#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */
165#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */
166#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */
167#define RCU_NEXT_TAIL 3
168#define RCU_CBLIST_NSEGS 4
169
170struct rcu_segcblist {
171 struct rcu_head *head;
172 struct rcu_head **tails[RCU_CBLIST_NSEGS];
173 unsigned long gp_seq[RCU_CBLIST_NSEGS];
174 long len;
175 long len_lazy;
176};
177
178/*
179 * Initialize an rcu_segcblist structure.
180 */
181static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp)
182{
183 int i;
184
185 BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
186 BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
187 rsclp->head = NULL;
188 for (i = 0; i < RCU_CBLIST_NSEGS; i++)
189 rsclp->tails[i] = &rsclp->head;
190 rsclp->len = 0;
191 rsclp->len_lazy = 0;
192}
193
194/*
195 * Is the specified rcu_segcblist structure empty?
196 *
197 * But careful! The fact that the ->head field is NULL does not
198 * necessarily imply that there are no callbacks associated with
199 * this structure. When callbacks are being invoked, they are
200 * removed as a group. If callback invocation must be preempted,
201 * the remaining callbacks will be added back to the list. Either
202 * way, the counts are updated later.
203 *
204 * So it is often the case that rcu_segcblist_n_cbs() should be used
205 * instead.
206 */
207static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
208{
209 return !rsclp->head;
210}
211
212/* Return number of callbacks in segmented callback list. */
213static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
214{
215 return READ_ONCE(rsclp->len);
216}
217
218/* Return number of lazy callbacks in segmented callback list. */
219static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
220{
221 return rsclp->len_lazy;
222}
223
224/* Return number of lazy callbacks in segmented callback list. */
225static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
226{
227 return rsclp->len - rsclp->len_lazy;
228}
229
230/*
231 * Is the specified rcu_segcblist enabled, for example, not corresponding
232 * to an offline or callback-offloaded CPU?
233 */
234static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
235{
236 return !!rsclp->tails[RCU_NEXT_TAIL];
237}
238
239/*
240 * Disable the specified rcu_segcblist structure, so that callbacks can
241 * no longer be posted to it. This structure must be empty.
242 */
243static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
244{
245 WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
246 WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
247 WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
248 rsclp->tails[RCU_NEXT_TAIL] = NULL;
249}
250
251/*
252 * Is the specified segment of the specified rcu_segcblist structure
253 * empty of callbacks?
254 */
255static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg)
256{
257 if (seg == RCU_DONE_TAIL)
258 return &rsclp->head == rsclp->tails[RCU_DONE_TAIL];
259 return rsclp->tails[seg - 1] == rsclp->tails[seg];
260}
261
262/*
263 * Are all segments following the specified segment of the specified
264 * rcu_segcblist structure empty of callbacks? (The specified
265 * segment might well contain callbacks.)
266 */
267static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
268{
269 return !*rsclp->tails[seg];
270}
271
272/*
273 * Does the specified rcu_segcblist structure contain callbacks that
274 * are ready to be invoked?
275 */
276static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
277{
278 return rcu_segcblist_is_enabled(rsclp) &&
279 &rsclp->head != rsclp->tails[RCU_DONE_TAIL];
280}
281
282/*
283 * Does the specified rcu_segcblist structure contain callbacks that
284 * are still pending, that is, not yet ready to be invoked?
285 */
286static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
287{
288 return rcu_segcblist_is_enabled(rsclp) &&
289 !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
290}
291
292/*
293 * Return a pointer to the first callback in the specified rcu_segcblist
294 * structure. This is useful for diagnostics.
295 */
296static inline struct rcu_head *
297rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
298{
299 if (rcu_segcblist_is_enabled(rsclp))
300 return rsclp->head;
301 return NULL;
302}
303
304/*
305 * Return a pointer to the first pending callback in the specified
306 * rcu_segcblist structure. This is useful just after posting a given
307 * callback -- if that callback is the first pending callback, then
308 * you cannot rely on someone else having already started up the required
309 * grace period.
310 */
311static inline struct rcu_head *
312rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
313{
314 if (rcu_segcblist_is_enabled(rsclp))
315 return *rsclp->tails[RCU_DONE_TAIL];
316 return NULL;
317}
318
319/*
320 * Does the specified rcu_segcblist structure contain callbacks that
321 * have not yet been processed beyond having been posted, that is,
322 * does it contain callbacks in its last segment?
323 */
324static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp)
325{
326 return rcu_segcblist_is_enabled(rsclp) &&
327 !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL);
328}
329
330/*
331 * Enqueue the specified callback onto the specified rcu_segcblist
332 * structure, updating accounting as needed. Note that the ->len
333 * field may be accessed locklessly, hence the WRITE_ONCE().
334 * The ->len field is used by rcu_barrier() and friends to determine
335 * if it must post a callback on this structure, and it is OK
336 * for rcu_barrier() to sometimes post callbacks needlessly, but
337 * absolutely not OK for it to ever miss posting a callback.
338 */
339static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
340 struct rcu_head *rhp, bool lazy)
341{
342 WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
343 if (lazy)
344 rsclp->len_lazy++;
345 smp_mb(); /* Ensure counts are updated before callback is enqueued. */
346 rhp->next = NULL;
347 *rsclp->tails[RCU_NEXT_TAIL] = rhp;
348 rsclp->tails[RCU_NEXT_TAIL] = &rhp->next;
349}
350
351/*
352 * Extract only the counts from the specified rcu_segcblist structure,
353 * and place them in the specified rcu_cblist structure. This function
354 * supports both callback orphaning and invocation, hence the separation
355 * of counts and callbacks. (Callbacks ready for invocation must be
356 * orphaned and adopted separately from pending callbacks, but counts
357 * apply to all callbacks. Locking must be used to make sure that
358 * both orphaned-callbacks lists are consistent.)
359 */
360static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
361 struct rcu_cblist *rclp)
362{
363 rclp->len_lazy += rsclp->len_lazy;
364 rclp->len += rsclp->len;
365 rsclp->len_lazy = 0;
366 WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
367}
368
369/*
370 * Extract only those callbacks ready to be invoked from the specified
371 * rcu_segcblist structure and place them in the specified rcu_cblist
372 * structure.
373 */
374static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
375 struct rcu_cblist *rclp)
376{
377 int i;
378
379 if (!rcu_segcblist_ready_cbs(rsclp))
380 return; /* Nothing to do. */
381 *rclp->tail = rsclp->head;
382 rsclp->head = *rsclp->tails[RCU_DONE_TAIL];
383 *rsclp->tails[RCU_DONE_TAIL] = NULL;
384 rclp->tail = rsclp->tails[RCU_DONE_TAIL];
385 for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
386 if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
387 rsclp->tails[i] = &rsclp->head;
388}
389
390/*
391 * Extract only those callbacks still pending (not yet ready to be
392 * invoked) from the specified rcu_segcblist structure and place them in
393 * the specified rcu_cblist structure. Note that this loses information
394 * about any callbacks that might have been partway done waiting for
395 * their grace period. Too bad! They will have to start over.
396 */
397static inline void
398rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
399 struct rcu_cblist *rclp)
400{
401 int i;
402
403 if (!rcu_segcblist_pend_cbs(rsclp))
404 return; /* Nothing to do. */
405 *rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
406 rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
407 *rsclp->tails[RCU_DONE_TAIL] = NULL;
408 for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
409 rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL];
410}
411
412/*
413 * Move the entire contents of the specified rcu_segcblist structure,
414 * counts, callbacks, and all, to the specified rcu_cblist structure.
415 * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists?
416 * @@@ Memory barrier needed? (Not if only used at boot time...)
417 */
418static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp,
419 struct rcu_cblist *rclp)
420{
421 rcu_segcblist_extract_done_cbs(rsclp, rclp);
422 rcu_segcblist_extract_pend_cbs(rsclp, rclp);
423 rcu_segcblist_extract_count(rsclp, rclp);
424}
425
426/*
427 * Insert counts from the specified rcu_cblist structure in the
428 * specified rcu_segcblist structure.
429 */
430static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
431 struct rcu_cblist *rclp)
432{
433 rsclp->len_lazy += rclp->len_lazy;
434 /* ->len sampled locklessly. */
435 WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
436 rclp->len_lazy = 0;
437 rclp->len = 0;
438}
439
440/*
441 * Move callbacks from the specified rcu_cblist to the beginning of the
442 * done-callbacks segment of the specified rcu_segcblist.
443 */
444static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
445 struct rcu_cblist *rclp)
446{
447 int i;
448
449 if (!rclp->head)
450 return; /* No callbacks to move. */
451 *rclp->tail = rsclp->head;
452 rsclp->head = rclp->head;
453 for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
454 if (&rsclp->head == rsclp->tails[i])
455 rsclp->tails[i] = rclp->tail;
456 else
457 break;
458 rclp->head = NULL;
459 rclp->tail = &rclp->head;
460}
461
462/*
463 * Move callbacks from the specified rcu_cblist to the end of the
464 * new-callbacks segment of the specified rcu_segcblist.
465 */
466static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
467 struct rcu_cblist *rclp)
468{
469 if (!rclp->head)
470 return; /* Nothing to do. */
471 *rsclp->tails[RCU_NEXT_TAIL] = rclp->head;
472 rsclp->tails[RCU_NEXT_TAIL] = rclp->tail;
473 rclp->head = NULL;
474 rclp->tail = &rclp->head;
475}
476
477/*
478 * Advance the callbacks in the specified rcu_segcblist structure based
479 * on the current value passed in for the grace-period counter.
480 */
481static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp,
482 unsigned long seq)
483{
484 int i, j;
485
486 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
487 WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
488
489 /*
490 * Find all callbacks whose ->gp_seq numbers indicate that they
491 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
492 */
493 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
494 if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
495 break;
496 rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i];
497 }
498
499 /* If no callbacks moved, nothing more need be done. */
500 if (i == RCU_WAIT_TAIL)
501 return;
502
503 /* Clean up tail pointers that might have been misordered above. */
504 for (j = RCU_WAIT_TAIL; j < i; j++)
505 rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL];
506
507 /*
508 * Callbacks moved, so clean up the misordered ->tails[] pointers
509 * that now point into the middle of the list of ready-to-invoke
510 * callbacks. The overall effect is to copy down the later pointers
511 * into the gap that was created by the now-ready segments.
512 */
513 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
514 if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
515 break; /* No more callbacks. */
516 rsclp->tails[j] = rsclp->tails[i];
517 rsclp->gp_seq[j] = rsclp->gp_seq[i];
518 }
519}
520
521/*
522 * "Accelerate" callbacks based on more-accurate grace-period information.
523 * The reason for this is that RCU does not synchronize the beginnings and
524 * ends of grace periods, and that callbacks are posted locally. This in
525 * turn means that the callbacks must be labelled conservatively early
526 * on, as getting exact information would degrade both performance and
527 * scalability. When more accurate grace-period information becomes
528 * available, previously posted callbacks can be "accelerated", marking
529 * them to complete at the end of the earlier grace period.
530 *
531 * This function operates on an rcu_segcblist structure, and also the
532 * grace-period sequence number at which new callbacks would become
533 * ready to invoke.
534 */
535static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp,
536 unsigned long seq)
537{
538 int i;
539
540 WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
541 WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL));
542
543 /*
544 * Find the segment preceding the oldest segment of callbacks
545 * whose ->gp_seq[] completion is at or after that passed in via
546 * "seq", skipping any empty segments. This oldest segment, along
547 * with any later segments, can be merged in with any newly arrived
548 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
549 * as their ->gp_seq[] grace-period completion sequence number.
550 */
551 for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
552 if (rsclp->tails[i] != rsclp->tails[i - 1] &&
553 ULONG_CMP_LT(rsclp->gp_seq[i], seq))
554 break;
555
556 /*
557 * If all the segments contain callbacks that correspond to
558 * earlier grace-period sequence numbers than "seq", leave.
559 * Assuming that the rcu_segcblist structure has enough
560 * segments in its arrays, this can only happen if some of
561 * the non-done segments contain callbacks that really are
562 * ready to invoke. This situation will get straightened
563 * out by the next call to rcu_segcblist_advance().
564 *
565 * Also advance to the oldest segment of callbacks whose
566 * ->gp_seq[] completion is at or after that passed in via "seq",
567 * skipping any empty segments.
568 */
569 if (++i >= RCU_NEXT_TAIL)
570 return false;
571
572 /*
573 * Merge all later callbacks, including newly arrived callbacks,
574 * into the segment located by the for-loop above. Assign "seq"
575 * as the ->gp_seq[] value in order to correctly handle the case
576 * where there were no pending callbacks in the rcu_segcblist
577 * structure other than in the RCU_NEXT_TAIL segment.
578 */
579 for (; i < RCU_NEXT_TAIL; i++) {
580 rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL];
581 rsclp->gp_seq[i] = seq;
582 }
583 return true;
584}
585
586/*
587 * Scan the specified rcu_segcblist structure for callbacks that need
588 * a grace period later than the one specified by "seq". We don't look
589 * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
590 * have a grace-period sequence number.
591 */
592static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
593 unsigned long seq)
594{
595 int i;
596
597 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
598 if (rsclp->tails[i - 1] != rsclp->tails[i] &&
599 ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
600 return true;
601 return false;
602}
603
604/*
605 * Interim function to return rcu_segcblist head pointer. Longer term, the
606 * rcu_segcblist will be used more pervasively, removing the need for this
607 * function.
608 */
609static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp)
610{
611 return rsclp->head;
612}
613
614/*
615 * Interim function to return rcu_segcblist head pointer. Longer term, the
616 * rcu_segcblist will be used more pervasively, removing the need for this
617 * function.
618 */
619static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp)
620{
621 WARN_ON_ONCE(rcu_segcblist_empty(rsclp));
622 return rsclp->tails[RCU_NEXT_TAIL];
623}
624
625#endif /* __KERNEL_RCU_SEGCBLIST_H */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 530ab6cf7a0b..8cc9d40b41ea 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -97,8 +97,8 @@ struct rcu_state sname##_state = { \
97 .gpnum = 0UL - 300UL, \ 97 .gpnum = 0UL - 300UL, \
98 .completed = 0UL - 300UL, \ 98 .completed = 0UL - 300UL, \
99 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ 99 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
100 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 100 .orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \
101 .orphan_donetail = &sname##_state.orphan_donelist, \ 101 .orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \
102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
103 .name = RCU_STATE_NAME(sname), \ 103 .name = RCU_STATE_NAME(sname), \
104 .abbr = sabbr, \ 104 .abbr = sabbr, \
@@ -726,16 +726,6 @@ void rcutorture_record_progress(unsigned long vernum)
726EXPORT_SYMBOL_GPL(rcutorture_record_progress); 726EXPORT_SYMBOL_GPL(rcutorture_record_progress);
727 727
728/* 728/*
729 * Does the CPU have callbacks ready to be invoked?
730 */
731static int
732cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
733{
734 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
735 rdp->nxttail[RCU_NEXT_TAIL] != NULL;
736}
737
738/*
739 * Return the root node of the specified rcu_state structure. 729 * Return the root node of the specified rcu_state structure.
740 */ 730 */
741static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 731static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
@@ -765,21 +755,17 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
765static bool 755static bool
766cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 756cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
767{ 757{
768 int i;
769
770 if (rcu_gp_in_progress(rsp)) 758 if (rcu_gp_in_progress(rsp))
771 return false; /* No, a grace period is already in progress. */ 759 return false; /* No, a grace period is already in progress. */
772 if (rcu_future_needs_gp(rsp)) 760 if (rcu_future_needs_gp(rsp))
773 return true; /* Yes, a no-CBs CPU needs one. */ 761 return true; /* Yes, a no-CBs CPU needs one. */
774 if (!rdp->nxttail[RCU_NEXT_TAIL]) 762 if (!rcu_segcblist_is_enabled(&rdp->cblist))
775 return false; /* No, this is a no-CBs (or offline) CPU. */ 763 return false; /* No, this is a no-CBs (or offline) CPU. */
776 if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) 764 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
777 return true; /* Yes, CPU has newly registered callbacks. */ 765 return true; /* Yes, CPU has newly registered callbacks. */
778 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) 766 if (rcu_segcblist_future_gp_needed(&rdp->cblist,
779 if (rdp->nxttail[i - 1] != rdp->nxttail[i] && 767 READ_ONCE(rsp->completed)))
780 ULONG_CMP_LT(READ_ONCE(rsp->completed), 768 return true; /* Yes, CBs for future grace period. */
781 rdp->nxtcompleted[i]))
782 return true; /* Yes, CBs for future grace period. */
783 return false; /* No grace period needed. */ 769 return false; /* No grace period needed. */
784} 770}
785 771
@@ -1490,7 +1476,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
1490 1476
1491 print_cpu_stall_info_end(); 1477 print_cpu_stall_info_end();
1492 for_each_possible_cpu(cpu) 1478 for_each_possible_cpu(cpu)
1493 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1479 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1480 cpu)->cblist);
1494 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n", 1481 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, c=%ld, q=%lu)\n",
1495 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1482 smp_processor_id(), (long)(jiffies - rsp->gp_start),
1496 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1483 (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1544,7 +1531,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
1544 print_cpu_stall_info(rsp, smp_processor_id()); 1531 print_cpu_stall_info(rsp, smp_processor_id());
1545 print_cpu_stall_info_end(); 1532 print_cpu_stall_info_end();
1546 for_each_possible_cpu(cpu) 1533 for_each_possible_cpu(cpu)
1547 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; 1534 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda,
1535 cpu)->cblist);
1548 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n", 1536 pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
1549 jiffies - rsp->gp_start, 1537 jiffies - rsp->gp_start,
1550 (long)rsp->gpnum, (long)rsp->completed, totqlen); 1538 (long)rsp->gpnum, (long)rsp->completed, totqlen);
@@ -1647,30 +1635,6 @@ void rcu_cpu_stall_reset(void)
1647} 1635}
1648 1636
1649/* 1637/*
1650 * Initialize the specified rcu_data structure's default callback list
1651 * to empty. The default callback list is the one that is not used by
1652 * no-callbacks CPUs.
1653 */
1654static void init_default_callback_list(struct rcu_data *rdp)
1655{
1656 int i;
1657
1658 rdp->nxtlist = NULL;
1659 for (i = 0; i < RCU_NEXT_SIZE; i++)
1660 rdp->nxttail[i] = &rdp->nxtlist;
1661}
1662
1663/*
1664 * Initialize the specified rcu_data structure's callback list to empty.
1665 */
1666static void init_callback_list(struct rcu_data *rdp)
1667{
1668 if (init_nocb_callback_list(rdp))
1669 return;
1670 init_default_callback_list(rdp);
1671}
1672
1673/*
1674 * Determine the value that ->completed will have at the end of the 1638 * Determine the value that ->completed will have at the end of the
1675 * next subsequent grace period. This is used to tag callbacks so that 1639 * next subsequent grace period. This is used to tag callbacks so that
1676 * a CPU can invoke callbacks in a timely fashion even if that CPU has 1640 * a CPU can invoke callbacks in a timely fashion even if that CPU has
@@ -1724,7 +1688,6 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1724 unsigned long *c_out) 1688 unsigned long *c_out)
1725{ 1689{
1726 unsigned long c; 1690 unsigned long c;
1727 int i;
1728 bool ret = false; 1691 bool ret = false;
1729 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1692 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1730 1693
@@ -1770,13 +1733,11 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1770 /* 1733 /*
1771 * Get a new grace-period number. If there really is no grace 1734 * Get a new grace-period number. If there really is no grace
1772 * period in progress, it will be smaller than the one we obtained 1735 * period in progress, it will be smaller than the one we obtained
1773 * earlier. Adjust callbacks as needed. Note that even no-CBs 1736 * earlier. Adjust callbacks as needed.
1774 * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
1775 */ 1737 */
1776 c = rcu_cbs_completed(rdp->rsp, rnp_root); 1738 c = rcu_cbs_completed(rdp->rsp, rnp_root);
1777 for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++) 1739 if (!rcu_is_nocb_cpu(rdp->cpu))
1778 if (ULONG_CMP_LT(c, rdp->nxtcompleted[i])) 1740 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1779 rdp->nxtcompleted[i] = c;
1780 1741
1781 /* 1742 /*
1782 * If the needed for the required grace period is already 1743 * If the needed for the required grace period is already
@@ -1856,57 +1817,27 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1856static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1817static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1857 struct rcu_data *rdp) 1818 struct rcu_data *rdp)
1858{ 1819{
1859 unsigned long c; 1820 bool ret = false;
1860 int i;
1861 bool ret;
1862
1863 /* If the CPU has no callbacks, nothing to do. */
1864 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1865 return false;
1866
1867 /*
1868 * Starting from the sublist containing the callbacks most
1869 * recently assigned a ->completed number and working down, find the
1870 * first sublist that is not assignable to an upcoming grace period.
1871 * Such a sublist has something in it (first two tests) and has
1872 * a ->completed number assigned that will complete sooner than
1873 * the ->completed number for newly arrived callbacks (last test).
1874 *
1875 * The key point is that any later sublist can be assigned the
1876 * same ->completed number as the newly arrived callbacks, which
1877 * means that the callbacks in any of these later sublist can be
1878 * grouped into a single sublist, whether or not they have already
1879 * been assigned a ->completed number.
1880 */
1881 c = rcu_cbs_completed(rsp, rnp);
1882 for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--)
1883 if (rdp->nxttail[i] != rdp->nxttail[i - 1] &&
1884 !ULONG_CMP_GE(rdp->nxtcompleted[i], c))
1885 break;
1886 1821
1887 /* 1822 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1888 * If there are no sublist for unassigned callbacks, leave. 1823 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1889 * At the same time, advance "i" one sublist, so that "i" will
1890 * index into the sublist where all the remaining callbacks should
1891 * be grouped into.
1892 */
1893 if (++i >= RCU_NEXT_TAIL)
1894 return false; 1824 return false;
1895 1825
1896 /* 1826 /*
1897 * Assign all subsequent callbacks' ->completed number to the next 1827 * Callbacks are often registered with incomplete grace-period
1898 * full grace period and group them all in the sublist initially 1828 * information. Something about the fact that getting exact
1899 * indexed by "i". 1829 * information requires acquiring a global lock... RCU therefore
1830 * makes a conservative estimate of the grace period number at which
1831 * a given callback will become ready to invoke. The following
1832 * code checks this estimate and improves it when possible, thus
1833 * accelerating callback invocation to an earlier grace-period
1834 * number.
1900 */ 1835 */
1901 for (; i <= RCU_NEXT_TAIL; i++) { 1836 if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
1902 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; 1837 ret = rcu_start_future_gp(rnp, rdp, NULL);
1903 rdp->nxtcompleted[i] = c;
1904 }
1905 /* Record any needed additional grace periods. */
1906 ret = rcu_start_future_gp(rnp, rdp, NULL);
1907 1838
1908 /* Trace depending on how much we were able to accelerate. */ 1839 /* Trace depending on how much we were able to accelerate. */
1909 if (!*rdp->nxttail[RCU_WAIT_TAIL]) 1840 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1910 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB")); 1841 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccWaitCB"));
1911 else 1842 else
1912 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB")); 1843 trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("AccReadyCB"));
@@ -1926,32 +1857,15 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1926static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1857static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1927 struct rcu_data *rdp) 1858 struct rcu_data *rdp)
1928{ 1859{
1929 int i, j; 1860 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1930 1861 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1931 /* If the CPU has no callbacks, nothing to do. */
1932 if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL])
1933 return false; 1862 return false;
1934 1863
1935 /* 1864 /*
1936 * Find all callbacks whose ->completed numbers indicate that they 1865 * Find all callbacks whose ->completed numbers indicate that they
1937 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. 1866 * are ready to invoke, and put them into the RCU_DONE_TAIL sublist.
1938 */ 1867 */
1939 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { 1868 rcu_segcblist_advance(&rdp->cblist, rnp->completed);
1940 if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i]))
1941 break;
1942 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i];
1943 }
1944 /* Clean up any sublist tail pointers that were misordered above. */
1945 for (j = RCU_WAIT_TAIL; j < i; j++)
1946 rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL];
1947
1948 /* Copy down callbacks to fill in empty sublists. */
1949 for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
1950 if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL])
1951 break;
1952 rdp->nxttail[j] = rdp->nxttail[i];
1953 rdp->nxtcompleted[j] = rdp->nxtcompleted[i];
1954 }
1955 1869
1956 /* Classify any remaining callbacks. */ 1870 /* Classify any remaining callbacks. */
1957 return rcu_accelerate_cbs(rsp, rnp, rdp); 1871 return rcu_accelerate_cbs(rsp, rnp, rdp);
@@ -2668,13 +2582,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2668 * because _rcu_barrier() excludes CPU-hotplug operations, so it 2582 * because _rcu_barrier() excludes CPU-hotplug operations, so it
2669 * cannot be running now. Thus no memory barrier is required. 2583 * cannot be running now. Thus no memory barrier is required.
2670 */ 2584 */
2671 if (rdp->nxtlist != NULL) { 2585 rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist);
2672 rsp->qlen_lazy += rdp->qlen_lazy; 2586 rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done);
2673 rsp->qlen += rdp->qlen;
2674 rdp->n_cbs_orphaned += rdp->qlen;
2675 rdp->qlen_lazy = 0;
2676 WRITE_ONCE(rdp->qlen, 0);
2677 }
2678 2587
2679 /* 2588 /*
2680 * Next, move those callbacks still needing a grace period to 2589 * Next, move those callbacks still needing a grace period to
@@ -2682,31 +2591,18 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2682 * Some of the callbacks might have gone partway through a grace 2591 * Some of the callbacks might have gone partway through a grace
2683 * period, but that is too bad. They get to start over because we 2592 * period, but that is too bad. They get to start over because we
2684 * cannot assume that grace periods are synchronized across CPUs. 2593 * cannot assume that grace periods are synchronized across CPUs.
2685 * We don't bother updating the ->nxttail[] array yet, instead
2686 * we just reset the whole thing later on.
2687 */ 2594 */
2688 if (*rdp->nxttail[RCU_DONE_TAIL] != NULL) { 2595 rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
2689 *rsp->orphan_nxttail = *rdp->nxttail[RCU_DONE_TAIL];
2690 rsp->orphan_nxttail = rdp->nxttail[RCU_NEXT_TAIL];
2691 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2692 }
2693 2596
2694 /* 2597 /*
2695 * Then move the ready-to-invoke callbacks to the orphanage, 2598 * Then move the ready-to-invoke callbacks to the orphanage,
2696 * where some other CPU will pick them up. These will not be 2599 * where some other CPU will pick them up. These will not be
2697 * required to pass though another grace period: They are done. 2600 * required to pass though another grace period: They are done.
2698 */ 2601 */
2699 if (rdp->nxtlist != NULL) { 2602 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done);
2700 *rsp->orphan_donetail = rdp->nxtlist;
2701 rsp->orphan_donetail = rdp->nxttail[RCU_DONE_TAIL];
2702 }
2703 2603
2704 /* 2604 /* Finally, disallow further callbacks on this CPU. */
2705 * Finally, initialize the rcu_data structure's list to empty and 2605 rcu_segcblist_disable(&rdp->cblist);
2706 * disallow further callbacks on this CPU.
2707 */
2708 init_callback_list(rdp);
2709 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2710} 2606}
2711 2607
2712/* 2608/*
@@ -2715,7 +2611,6 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2715 */ 2611 */
2716static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) 2612static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2717{ 2613{
2718 int i;
2719 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2614 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2720 2615
2721 /* No-CBs CPUs are handled specially. */ 2616 /* No-CBs CPUs are handled specially. */
@@ -2724,13 +2619,11 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2724 return; 2619 return;
2725 2620
2726 /* Do the accounting first. */ 2621 /* Do the accounting first. */
2727 rdp->qlen_lazy += rsp->qlen_lazy; 2622 rdp->n_cbs_adopted += rcu_cblist_n_cbs(&rsp->orphan_done);
2728 rdp->qlen += rsp->qlen; 2623 if (rcu_cblist_n_lazy_cbs(&rsp->orphan_done) !=
2729 rdp->n_cbs_adopted += rsp->qlen; 2624 rcu_cblist_n_cbs(&rsp->orphan_done))
2730 if (rsp->qlen_lazy != rsp->qlen)
2731 rcu_idle_count_callbacks_posted(); 2625 rcu_idle_count_callbacks_posted();
2732 rsp->qlen_lazy = 0; 2626 rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done);
2733 rsp->qlen = 0;
2734 2627
2735 /* 2628 /*
2736 * We do not need a memory barrier here because the only way we 2629 * We do not need a memory barrier here because the only way we
@@ -2738,24 +2631,13 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2738 * we are the task doing the rcu_barrier(). 2631 * we are the task doing the rcu_barrier().
2739 */ 2632 */
2740 2633
2741 /* First adopt the ready-to-invoke callbacks. */ 2634 /* First adopt the ready-to-invoke callbacks, then the done ones. */
2742 if (rsp->orphan_donelist != NULL) { 2635 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done);
2743 *rsp->orphan_donetail = *rdp->nxttail[RCU_DONE_TAIL]; 2636 WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_done));
2744 *rdp->nxttail[RCU_DONE_TAIL] = rsp->orphan_donelist; 2637 rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend);
2745 for (i = RCU_NEXT_SIZE - 1; i >= RCU_DONE_TAIL; i--) 2638 WARN_ON_ONCE(!rcu_cblist_empty(&rsp->orphan_pend));
2746 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL]) 2639 WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) !=
2747 rdp->nxttail[i] = rsp->orphan_donetail; 2640 !rcu_segcblist_n_cbs(&rdp->cblist));
2748 rsp->orphan_donelist = NULL;
2749 rsp->orphan_donetail = &rsp->orphan_donelist;
2750 }
2751
2752 /* And then adopt the callbacks that still need a grace period. */
2753 if (rsp->orphan_nxtlist != NULL) {
2754 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxtlist;
2755 rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_nxttail;
2756 rsp->orphan_nxtlist = NULL;
2757 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2758 }
2759} 2641}
2760 2642
2761/* 2643/*
@@ -2843,9 +2725,11 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2843 rcu_adopt_orphan_cbs(rsp, flags); 2725 rcu_adopt_orphan_cbs(rsp, flags);
2844 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); 2726 raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags);
2845 2727
2846 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2728 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
2847 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2729 !rcu_segcblist_empty(&rdp->cblist),
2848 cpu, rdp->qlen, rdp->nxtlist); 2730 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
2731 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
2732 rcu_segcblist_first_cb(&rdp->cblist));
2849} 2733}
2850 2734
2851/* 2735/*
@@ -2855,14 +2739,17 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2855static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 2739static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2856{ 2740{
2857 unsigned long flags; 2741 unsigned long flags;
2858 struct rcu_head *next, *list, **tail; 2742 struct rcu_head *rhp;
2859 long bl, count, count_lazy; 2743 struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
2860 int i; 2744 long bl, count;
2861 2745
2862 /* If no callbacks are ready, just return. */ 2746 /* If no callbacks are ready, just return. */
2863 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 2747 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
2864 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); 2748 trace_rcu_batch_start(rsp->name,
2865 trace_rcu_batch_end(rsp->name, 0, !!READ_ONCE(rdp->nxtlist), 2749 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2750 rcu_segcblist_n_cbs(&rdp->cblist), 0);
2751 trace_rcu_batch_end(rsp->name, 0,
2752 !rcu_segcblist_empty(&rdp->cblist),
2866 need_resched(), is_idle_task(current), 2753 need_resched(), is_idle_task(current),
2867 rcu_is_callbacks_kthread()); 2754 rcu_is_callbacks_kthread());
2868 return; 2755 return;
@@ -2870,73 +2757,62 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2870 2757
2871 /* 2758 /*
2872 * Extract the list of ready callbacks, disabling to prevent 2759 * Extract the list of ready callbacks, disabling to prevent
2873 * races with call_rcu() from interrupt handlers. 2760 * races with call_rcu() from interrupt handlers. Leave the
2761 * callback counts, as rcu_barrier() needs to be conservative.
2874 */ 2762 */
2875 local_irq_save(flags); 2763 local_irq_save(flags);
2876 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 2764 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2877 bl = rdp->blimit; 2765 bl = rdp->blimit;
2878 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); 2766 trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2879 list = rdp->nxtlist; 2767 rcu_segcblist_n_cbs(&rdp->cblist), bl);
2880 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 2768 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2881 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
2882 tail = rdp->nxttail[RCU_DONE_TAIL];
2883 for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
2884 if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
2885 rdp->nxttail[i] = &rdp->nxtlist;
2886 local_irq_restore(flags); 2769 local_irq_restore(flags);
2887 2770
2888 /* Invoke callbacks. */ 2771 /* Invoke callbacks. */
2889 count = count_lazy = 0; 2772 rhp = rcu_cblist_dequeue(&rcl);
2890 while (list) { 2773 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
2891 next = list->next; 2774 debug_rcu_head_unqueue(rhp);
2892 prefetch(next); 2775 if (__rcu_reclaim(rsp->name, rhp))
2893 debug_rcu_head_unqueue(list); 2776 rcu_cblist_dequeued_lazy(&rcl);
2894 if (__rcu_reclaim(rsp->name, list)) 2777 /*
2895 count_lazy++; 2778 * Stop only if limit reached and CPU has something to do.
2896 list = next; 2779 * Note: The rcl structure counts down from zero.
2897 /* Stop only if limit reached and CPU has something to do. */ 2780 */
2898 if (++count >= bl && 2781 if (-rcu_cblist_n_cbs(&rcl) >= bl &&
2899 (need_resched() || 2782 (need_resched() ||
2900 (!is_idle_task(current) && !rcu_is_callbacks_kthread()))) 2783 (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
2901 break; 2784 break;
2902 } 2785 }
2903 2786
2904 local_irq_save(flags); 2787 local_irq_save(flags);
2905 trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), 2788 count = -rcu_cblist_n_cbs(&rcl);
2906 is_idle_task(current), 2789 trace_rcu_batch_end(rsp->name, count, !rcu_cblist_empty(&rcl),
2790 need_resched(), is_idle_task(current),
2907 rcu_is_callbacks_kthread()); 2791 rcu_is_callbacks_kthread());
2908 2792
2909 /* Update count, and requeue any remaining callbacks. */ 2793 /* Update counts and requeue any remaining callbacks. */
2910 if (list != NULL) { 2794 rcu_segcblist_insert_done_cbs(&rdp->cblist, &rcl);
2911 *tail = rdp->nxtlist;
2912 rdp->nxtlist = list;
2913 for (i = 0; i < RCU_NEXT_SIZE; i++)
2914 if (&rdp->nxtlist == rdp->nxttail[i])
2915 rdp->nxttail[i] = tail;
2916 else
2917 break;
2918 }
2919 smp_mb(); /* List handling before counting for rcu_barrier(). */ 2795 smp_mb(); /* List handling before counting for rcu_barrier(). */
2920 rdp->qlen_lazy -= count_lazy;
2921 WRITE_ONCE(rdp->qlen, rdp->qlen - count);
2922 rdp->n_cbs_invoked += count; 2796 rdp->n_cbs_invoked += count;
2797 rcu_segcblist_insert_count(&rdp->cblist, &rcl);
2923 2798
2924 /* Reinstate batch limit if we have worked down the excess. */ 2799 /* Reinstate batch limit if we have worked down the excess. */
2925 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 2800 count = rcu_segcblist_n_cbs(&rdp->cblist);
2801 if (rdp->blimit == LONG_MAX && count <= qlowmark)
2926 rdp->blimit = blimit; 2802 rdp->blimit = blimit;
2927 2803
2928 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 2804 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
2929 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { 2805 if (count == 0 && rdp->qlen_last_fqs_check != 0) {
2930 rdp->qlen_last_fqs_check = 0; 2806 rdp->qlen_last_fqs_check = 0;
2931 rdp->n_force_qs_snap = rsp->n_force_qs; 2807 rdp->n_force_qs_snap = rsp->n_force_qs;
2932 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) 2808 } else if (count < rdp->qlen_last_fqs_check - qhimark)
2933 rdp->qlen_last_fqs_check = rdp->qlen; 2809 rdp->qlen_last_fqs_check = count;
2934 WARN_ON_ONCE((rdp->nxtlist == NULL) != (rdp->qlen == 0)); 2810 WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
2935 2811
2936 local_irq_restore(flags); 2812 local_irq_restore(flags);
2937 2813
2938 /* Re-invoke RCU core processing if there are callbacks remaining. */ 2814 /* Re-invoke RCU core processing if there are callbacks remaining. */
2939 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2815 if (rcu_segcblist_ready_cbs(&rdp->cblist))
2940 invoke_rcu_core(); 2816 invoke_rcu_core();
2941} 2817}
2942 2818
@@ -3120,7 +2996,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
3120 } 2996 }
3121 2997
3122 /* If there are callbacks ready, invoke them. */ 2998 /* If there are callbacks ready, invoke them. */
3123 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2999 if (rcu_segcblist_ready_cbs(&rdp->cblist))
3124 invoke_rcu_callbacks(rsp, rdp); 3000 invoke_rcu_callbacks(rsp, rdp);
3125 3001
3126 /* Do any needed deferred wakeups of rcuo kthreads. */ 3002 /* Do any needed deferred wakeups of rcuo kthreads. */
@@ -3192,7 +3068,8 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
3192 * invoking force_quiescent_state() if the newly enqueued callback 3068 * invoking force_quiescent_state() if the newly enqueued callback
3193 * is the only one waiting for a grace period to complete. 3069 * is the only one waiting for a grace period to complete.
3194 */ 3070 */
3195 if (unlikely(rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { 3071 if (unlikely(rcu_segcblist_n_cbs(&rdp->cblist) >
3072 rdp->qlen_last_fqs_check + qhimark)) {
3196 3073
3197 /* Are we ignoring a completed grace period? */ 3074 /* Are we ignoring a completed grace period? */
3198 note_gp_changes(rsp, rdp); 3075 note_gp_changes(rsp, rdp);
@@ -3210,10 +3087,10 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
3210 /* Give the grace period a kick. */ 3087 /* Give the grace period a kick. */
3211 rdp->blimit = LONG_MAX; 3088 rdp->blimit = LONG_MAX;
3212 if (rsp->n_force_qs == rdp->n_force_qs_snap && 3089 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
3213 *rdp->nxttail[RCU_DONE_TAIL] != head) 3090 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
3214 force_quiescent_state(rsp); 3091 force_quiescent_state(rsp);
3215 rdp->n_force_qs_snap = rsp->n_force_qs; 3092 rdp->n_force_qs_snap = rsp->n_force_qs;
3216 rdp->qlen_last_fqs_check = rdp->qlen; 3093 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
3217 } 3094 }
3218 } 3095 }
3219} 3096}
@@ -3253,7 +3130,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3253 rdp = this_cpu_ptr(rsp->rda); 3130 rdp = this_cpu_ptr(rsp->rda);
3254 3131
3255 /* Add the callback to our list. */ 3132 /* Add the callback to our list. */
3256 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) { 3133 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {
3257 int offline; 3134 int offline;
3258 3135
3259 if (cpu != -1) 3136 if (cpu != -1)
@@ -3272,23 +3149,21 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3272 */ 3149 */
3273 BUG_ON(cpu != -1); 3150 BUG_ON(cpu != -1);
3274 WARN_ON_ONCE(!rcu_is_watching()); 3151 WARN_ON_ONCE(!rcu_is_watching());
3275 if (!likely(rdp->nxtlist)) 3152 if (rcu_segcblist_empty(&rdp->cblist))
3276 init_default_callback_list(rdp); 3153 rcu_segcblist_init(&rdp->cblist);
3277 } 3154 }
3278 WRITE_ONCE(rdp->qlen, rdp->qlen + 1); 3155 rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
3279 if (lazy) 3156 if (!lazy)
3280 rdp->qlen_lazy++;
3281 else
3282 rcu_idle_count_callbacks_posted(); 3157 rcu_idle_count_callbacks_posted();
3283 smp_mb(); /* Count before adding callback for rcu_barrier(). */
3284 *rdp->nxttail[RCU_NEXT_TAIL] = head;
3285 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
3286 3158
3287 if (__is_kfree_rcu_offset((unsigned long)func)) 3159 if (__is_kfree_rcu_offset((unsigned long)func))
3288 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 3160 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
3289 rdp->qlen_lazy, rdp->qlen); 3161 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3162 rcu_segcblist_n_cbs(&rdp->cblist));
3290 else 3163 else
3291 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); 3164 trace_rcu_callback(rsp->name, head,
3165 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3166 rcu_segcblist_n_cbs(&rdp->cblist));
3292 3167
3293 /* Go handle any RCU core processing required. */ 3168 /* Go handle any RCU core processing required. */
3294 __call_rcu_core(rsp, rdp, head, flags); 3169 __call_rcu_core(rsp, rdp, head, flags);
@@ -3600,7 +3475,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3600 } 3475 }
3601 3476
3602 /* Does this CPU have callbacks ready to invoke? */ 3477 /* Does this CPU have callbacks ready to invoke? */
3603 if (cpu_has_callbacks_ready_to_invoke(rdp)) { 3478 if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
3604 rdp->n_rp_cb_ready++; 3479 rdp->n_rp_cb_ready++;
3605 return 1; 3480 return 1;
3606 } 3481 }
@@ -3664,10 +3539,10 @@ static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
3664 3539
3665 for_each_rcu_flavor(rsp) { 3540 for_each_rcu_flavor(rsp) {
3666 rdp = this_cpu_ptr(rsp->rda); 3541 rdp = this_cpu_ptr(rsp->rda);
3667 if (!rdp->nxtlist) 3542 if (rcu_segcblist_empty(&rdp->cblist))
3668 continue; 3543 continue;
3669 hc = true; 3544 hc = true;
3670 if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { 3545 if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) {
3671 al = false; 3546 al = false;
3672 break; 3547 break;
3673 } 3548 }
@@ -3776,7 +3651,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
3776 __call_rcu(&rdp->barrier_head, 3651 __call_rcu(&rdp->barrier_head,
3777 rcu_barrier_callback, rsp, cpu, 0); 3652 rcu_barrier_callback, rsp, cpu, 0);
3778 } 3653 }
3779 } else if (READ_ONCE(rdp->qlen)) { 3654 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
3780 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 3655 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
3781 rsp->barrier_sequence); 3656 rsp->barrier_sequence);
3782 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 3657 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -3885,8 +3760,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3885 rdp->qlen_last_fqs_check = 0; 3760 rdp->qlen_last_fqs_check = 0;
3886 rdp->n_force_qs_snap = rsp->n_force_qs; 3761 rdp->n_force_qs_snap = rsp->n_force_qs;
3887 rdp->blimit = blimit; 3762 rdp->blimit = blimit;
3888 if (!rdp->nxtlist) 3763 if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
3889 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 3764 !init_nocb_callback_list(rdp))
3765 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
3890 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 3766 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3891 rcu_sysidle_init_percpu_data(rdp->dynticks); 3767 rcu_sysidle_init_percpu_data(rdp->dynticks);
3892 rcu_dynticks_eqs_online(); 3768 rcu_dynticks_eqs_online();
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 376c01e539c7..93889ff21dbb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -30,6 +30,7 @@
30#include <linux/seqlock.h> 30#include <linux/seqlock.h>
31#include <linux/swait.h> 31#include <linux/swait.h>
32#include <linux/stop_machine.h> 32#include <linux/stop_machine.h>
33#include "rcu_segcblist.h"
33 34
34/* 35/*
35 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and 36 * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and
@@ -335,34 +336,9 @@ struct rcu_data {
335 /* period it is aware of. */ 336 /* period it is aware of. */
336 337
337 /* 2) batch handling */ 338 /* 2) batch handling */
338 /* 339 struct rcu_segcblist cblist; /* Segmented callback list, with */
339 * If nxtlist is not NULL, it is partitioned as follows. 340 /* different callbacks waiting for */
340 * Any of the partitions might be empty, in which case the 341 /* different grace periods. */
341 * pointer to that partition will be equal to the pointer for
342 * the following partition. When the list is empty, all of
343 * the nxttail elements point to the ->nxtlist pointer itself,
344 * which in that case is NULL.
345 *
346 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
347 * Entries that batch # <= ->completed
348 * The grace period for these entries has completed, and
349 * the other grace-period-completed entries may be moved
350 * here temporarily in rcu_process_callbacks().
351 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
352 * Entries that batch # <= ->completed - 1: waiting for current GP
353 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
354 * Entries known to have arrived before current GP ended
355 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]):
356 * Entries that might have arrived after current GP ended
357 * Note that the value of *nxttail[RCU_NEXT_TAIL] will
358 * always be NULL, as this is the end of the list.
359 */
360 struct rcu_head *nxtlist;
361 struct rcu_head **nxttail[RCU_NEXT_SIZE];
362 unsigned long nxtcompleted[RCU_NEXT_SIZE];
363 /* grace periods for sublists. */
364 long qlen_lazy; /* # of lazy queued callbacks */
365 long qlen; /* # of queued callbacks, incl lazy */
366 long qlen_last_fqs_check; 342 long qlen_last_fqs_check;
367 /* qlen at last check for QS forcing */ 343 /* qlen at last check for QS forcing */
368 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 344 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
@@ -500,14 +476,11 @@ struct rcu_state {
500 476
501 raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp; 477 raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
502 /* Protect following fields. */ 478 /* Protect following fields. */
503 struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */ 479 struct rcu_cblist orphan_pend; /* Orphaned callbacks that */
504 /* need a grace period. */ 480 /* need a grace period. */
505 struct rcu_head **orphan_nxttail; /* Tail of above. */ 481 struct rcu_cblist orphan_done; /* Orphaned callbacks that */
506 struct rcu_head *orphan_donelist; /* Orphaned callbacks that */
507 /* are ready to invoke. */ 482 /* are ready to invoke. */
508 struct rcu_head **orphan_donetail; /* Tail of above. */ 483 /* (Contains counts.) */
509 long qlen_lazy; /* Number of lazy callbacks. */
510 long qlen; /* Total number of callbacks. */
511 /* End of fields guarded by orphan_lock. */ 484 /* End of fields guarded by orphan_lock. */
512 485
513 struct mutex barrier_mutex; /* Guards barrier fields. */ 486 struct mutex barrier_mutex; /* Guards barrier fields. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 621296a6694b..f88356652dcf 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1350,10 +1350,10 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1350 */ 1350 */
1351 if ((rdp->completed != rnp->completed || 1351 if ((rdp->completed != rnp->completed ||
1352 unlikely(READ_ONCE(rdp->gpwrap))) && 1352 unlikely(READ_ONCE(rdp->gpwrap))) &&
1353 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) 1353 rcu_segcblist_pend_cbs(&rdp->cblist))
1354 note_gp_changes(rsp, rdp); 1354 note_gp_changes(rsp, rdp);
1355 1355
1356 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1356 if (rcu_segcblist_ready_cbs(&rdp->cblist))
1357 cbs_ready = true; 1357 cbs_ready = true;
1358 } 1358 }
1359 return cbs_ready; 1359 return cbs_ready;
@@ -1461,7 +1461,7 @@ static void rcu_prepare_for_idle(void)
1461 rdtp->last_accelerate = jiffies; 1461 rdtp->last_accelerate = jiffies;
1462 for_each_rcu_flavor(rsp) { 1462 for_each_rcu_flavor(rsp) {
1463 rdp = this_cpu_ptr(rsp->rda); 1463 rdp = this_cpu_ptr(rsp->rda);
1464 if (!*rdp->nxttail[RCU_DONE_TAIL]) 1464 if (rcu_segcblist_pend_cbs(&rdp->cblist))
1465 continue; 1465 continue;
1466 rnp = rdp->mynode; 1466 rnp = rdp->mynode;
1467 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 1467 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
@@ -1529,7 +1529,7 @@ static void rcu_oom_notify_cpu(void *unused)
1529 1529
1530 for_each_rcu_flavor(rsp) { 1530 for_each_rcu_flavor(rsp) {
1531 rdp = raw_cpu_ptr(rsp->rda); 1531 rdp = raw_cpu_ptr(rsp->rda);
1532 if (rdp->qlen_lazy != 0) { 1532 if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
1533 atomic_inc(&oom_callback_count); 1533 atomic_inc(&oom_callback_count);
1534 rsp->call(&rdp->oom_head, rcu_oom_callback); 1534 rsp->call(&rdp->oom_head, rcu_oom_callback);
1535 } 1535 }
@@ -1934,30 +1934,26 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
1934 struct rcu_data *rdp, 1934 struct rcu_data *rdp,
1935 unsigned long flags) 1935 unsigned long flags)
1936{ 1936{
1937 long ql = rsp->qlen; 1937 long ql = rcu_cblist_n_cbs(&rsp->orphan_done);
1938 long qll = rsp->qlen_lazy; 1938 long qll = rcu_cblist_n_lazy_cbs(&rsp->orphan_done);
1939 1939
1940 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 1940 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
1941 if (!rcu_is_nocb_cpu(smp_processor_id())) 1941 if (!rcu_is_nocb_cpu(smp_processor_id()))
1942 return false; 1942 return false;
1943 rsp->qlen = 0;
1944 rsp->qlen_lazy = 0;
1945 1943
1946 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 1944 /* First, enqueue the donelist, if any. This preserves CB ordering. */
1947 if (rsp->orphan_donelist != NULL) { 1945 if (!rcu_cblist_empty(&rsp->orphan_done)) {
1948 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, 1946 __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done),
1949 rsp->orphan_donetail, ql, qll, flags); 1947 rcu_cblist_tail(&rsp->orphan_done),
1950 ql = qll = 0; 1948 ql, qll, flags);
1951 rsp->orphan_donelist = NULL;
1952 rsp->orphan_donetail = &rsp->orphan_donelist;
1953 } 1949 }
1954 if (rsp->orphan_nxtlist != NULL) { 1950 if (!rcu_cblist_empty(&rsp->orphan_pend)) {
1955 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, 1951 __call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend),
1956 rsp->orphan_nxttail, ql, qll, flags); 1952 rcu_cblist_tail(&rsp->orphan_pend),
1957 ql = qll = 0; 1953 ql, qll, flags);
1958 rsp->orphan_nxtlist = NULL;
1959 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
1960 } 1954 }
1955 rcu_cblist_init(&rsp->orphan_done);
1956 rcu_cblist_init(&rsp->orphan_pend);
1961 return true; 1957 return true;
1962} 1958}
1963 1959
@@ -2399,16 +2395,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2399 return false; 2395 return false;
2400 2396
2401 /* If there are early-boot callbacks, move them to nocb lists. */ 2397 /* If there are early-boot callbacks, move them to nocb lists. */
2402 if (rdp->nxtlist) { 2398 if (!rcu_segcblist_empty(&rdp->cblist)) {
2403 rdp->nocb_head = rdp->nxtlist; 2399 rdp->nocb_head = rcu_segcblist_head(&rdp->cblist);
2404 rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; 2400 rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist);
2405 atomic_long_set(&rdp->nocb_q_count, rdp->qlen); 2401 atomic_long_set(&rdp->nocb_q_count,
2406 atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); 2402 rcu_segcblist_n_cbs(&rdp->cblist));
2407 rdp->nxtlist = NULL; 2403 atomic_long_set(&rdp->nocb_q_count_lazy,
2408 rdp->qlen = 0; 2404 rcu_segcblist_n_lazy_cbs(&rdp->cblist));
2409 rdp->qlen_lazy = 0; 2405 rcu_segcblist_init(&rdp->cblist);
2410 } 2406 }
2411 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2407 rcu_segcblist_disable(&rdp->cblist);
2412 return true; 2408 return true;
2413} 2409}
2414 2410
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 65b43be38e68..066c64071a7b 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -41,6 +41,7 @@
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/debugfs.h> 42#include <linux/debugfs.h>
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/prefetch.h>
44 45
45#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
46#include "tree.h" 47#include "tree.h"
@@ -128,17 +129,15 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
128 rdp->dynticks_fqs); 129 rdp->dynticks_fqs);
129 seq_printf(m, " of=%lu", rdp->offline_fqs); 130 seq_printf(m, " of=%lu", rdp->offline_fqs);
130 rcu_nocb_q_lengths(rdp, &ql, &qll); 131 rcu_nocb_q_lengths(rdp, &ql, &qll);
131 qll += rdp->qlen_lazy; 132 qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist);
132 ql += rdp->qlen; 133 ql += rcu_segcblist_n_cbs(&rdp->cblist);
133 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", 134 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
134 qll, ql, 135 qll, ql,
135 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 136 ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)],
136 rdp->nxttail[RCU_NEXT_TAIL]], 137 ".R"[!rcu_segcblist_segempty(&rdp->cblist,
137 ".R"[rdp->nxttail[RCU_WAIT_TAIL] != 138 RCU_NEXT_READY_TAIL)],
138 rdp->nxttail[RCU_NEXT_READY_TAIL]], 139 ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)],
139 ".W"[rdp->nxttail[RCU_DONE_TAIL] != 140 ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]);
140 rdp->nxttail[RCU_WAIT_TAIL]],
141 ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
142#ifdef CONFIG_RCU_BOOST 141#ifdef CONFIG_RCU_BOOST
143 seq_printf(m, " kt=%d/%c ktl=%x", 142 seq_printf(m, " kt=%d/%c ktl=%x",
144 per_cpu(rcu_cpu_has_work, rdp->cpu), 143 per_cpu(rcu_cpu_has_work, rdp->cpu),
@@ -276,7 +275,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
276 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", 275 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
277 rsp->n_force_qs, rsp->n_force_qs_ngp, 276 rsp->n_force_qs, rsp->n_force_qs_ngp,
278 rsp->n_force_qs - rsp->n_force_qs_ngp, 277 rsp->n_force_qs - rsp->n_force_qs_ngp,
279 READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); 278 READ_ONCE(rsp->n_force_qs_lh),
279 rcu_cblist_n_lazy_cbs(&rsp->orphan_done),
280 rcu_cblist_n_cbs(&rsp->orphan_done));
280 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { 281 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
281 if (rnp->level != level) { 282 if (rnp->level != level) {
282 seq_puts(m, "\n"); 283 seq_puts(m, "\n");