aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/trace.txt29
-rw-r--r--kernel/rcutree.c5
-rw-r--r--kernel/rcutree.h21
-rw-r--r--kernel/rcutree_plugin.h163
-rw-r--r--kernel/rcutree_trace.c11
5 files changed, 135 insertions, 94 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index e731ad20d166..5a704ffd0bbc 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -166,14 +166,14 @@ o "gpnum" is the number of grace periods that have started. It is
166The output of "cat rcu/rcuhier" looks as follows, with very long lines: 166The output of "cat rcu/rcuhier" looks as follows, with very long lines:
167 167
168c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 168c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6
1691/1 .>. 0:127 ^0 1691/1 ..>. 0:127 ^0
1703/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 1703/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
1713/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 1713/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
172rcu_bh: 172rcu_bh:
173c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 173c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0
1740/1 .>. 0:127 ^0 1740/1 ..>. 0:127 ^0
1750/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 1750/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3
1760/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 1760/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3
177 177
178This is once again split into "rcu_sched" and "rcu_bh" portions, 178This is once again split into "rcu_sched" and "rcu_bh" portions,
179and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional 179and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional
@@ -232,13 +232,20 @@ o Each element of the form "1/1 0:127 ^0" represents one struct
232 current grace period. 232 current grace period.
233 233
234 o The characters separated by the ">" indicate the state 234 o The characters separated by the ">" indicate the state
235 of the blocked-tasks lists. A "T" preceding the ">" 235 of the blocked-tasks lists. A "G" preceding the ">"
236 indicates that at least one task blocked in an RCU 236 indicates that at least one task blocked in an RCU
237 read-side critical section blocks the current grace 237 read-side critical section blocks the current grace
238 period, while a "." preceding the ">" indicates otherwise. 238 period, while a "E" preceding the ">" indicates that
239 The character following the ">" indicates similarly for 239 at least one task blocked in an RCU read-side critical
240 the next grace period. A "T" should appear in this 240 section blocks the current expedited grace period.
241 field only for rcu-preempt. 241 A "T" character following the ">" indicates that at
242 least one task is blocked within an RCU read-side
243 critical section, regardless of whether any current
244 grace period (expedited or normal) is inconvenienced.
245 A "." character appears if the corresponding condition
246 does not hold, so that "..>." indicates that no tasks
247 are blocked. In contrast, "GE>T" indicates maximal
248 inconvenience from blocked tasks.
242 249
243 o The numbers separated by the ":" are the range of CPUs 250 o The numbers separated by the ":" are the range of CPUs
244 served by this struct rcu_node. This can be helpful 251 served by this struct rcu_node. This can be helpful
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 90104a19c564..0ac1cc03f935 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1901,10 +1901,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
1901 j / rsp->levelspread[i - 1]; 1901 j / rsp->levelspread[i - 1];
1902 } 1902 }
1903 rnp->level = i; 1903 rnp->level = i;
1904 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 1904 INIT_LIST_HEAD(&rnp->blkd_tasks);
1905 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1906 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1907 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1908 } 1905 }
1909 } 1906 }
1910 1907
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bd891def3303..5a439c180e69 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -107,7 +107,7 @@ struct rcu_node {
107 /* an rcu_data structure, otherwise, each */ 107 /* an rcu_data structure, otherwise, each */
108 /* bit corresponds to a child rcu_node */ 108 /* bit corresponds to a child rcu_node */
109 /* structure. */ 109 /* structure. */
110 unsigned long expmask; /* Groups that have ->blocked_tasks[] */ 110 unsigned long expmask; /* Groups that have ->blkd_tasks */
111 /* elements that need to drain to allow the */ 111 /* elements that need to drain to allow the */
112 /* current expedited grace period to */ 112 /* current expedited grace period to */
113 /* complete (only for TREE_PREEMPT_RCU). */ 113 /* complete (only for TREE_PREEMPT_RCU). */
@@ -120,11 +120,20 @@ struct rcu_node {
120 u8 grpnum; /* CPU/group number for next level up. */ 120 u8 grpnum; /* CPU/group number for next level up. */
121 u8 level; /* root is at level 0. */ 121 u8 level; /* root is at level 0. */
122 struct rcu_node *parent; 122 struct rcu_node *parent;
123 struct list_head blocked_tasks[4]; 123 struct list_head blkd_tasks;
124 /* Tasks blocked in RCU read-side critsect. */ 124 /* Tasks blocked in RCU read-side critical */
125 /* Grace period number (->gpnum) x blocked */ 125 /* section. Tasks are placed at the head */
126 /* by tasks on the (x & 0x1) element of the */ 126 /* of this list and age towards the tail. */
127 /* blocked_tasks[] array. */ 127 struct list_head *gp_tasks;
128 /* Pointer to the first task blocking the */
129 /* current grace period, or NULL if there */
130 /* is no such task. */
131 struct list_head *exp_tasks;
132 /* Pointer to the first task blocking the */
133 /* current expedited grace period, or NULL */
134 /* if there is no such task. If there */
135 /* is no current expedited grace period, */
136 /* then there can cannot be any such task. */
128} ____cacheline_internodealigned_in_smp; 137} ____cacheline_internodealigned_in_smp;
129 138
130/* 139/*
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 764b5fcc7c56..774f010a4619 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -130,12 +130,12 @@ static void rcu_preempt_qs(int cpu)
130 * We have entered the scheduler, and the current task might soon be 130 * We have entered the scheduler, and the current task might soon be
131 * context-switched away from. If this task is in an RCU read-side 131 * context-switched away from. If this task is in an RCU read-side
132 * critical section, we will no longer be able to rely on the CPU to 132 * critical section, we will no longer be able to rely on the CPU to
133 * record that fact, so we enqueue the task on the appropriate entry 133 * record that fact, so we enqueue the task on the blkd_tasks list.
134 * of the blocked_tasks[] array. The task will dequeue itself when 134 * The task will dequeue itself when it exits the outermost enclosing
135 * it exits the outermost enclosing RCU read-side critical section. 135 * RCU read-side critical section. Therefore, the current grace period
136 * Therefore, the current grace period cannot be permitted to complete 136 * cannot be permitted to complete until the blkd_tasks list entries
137 * until the blocked_tasks[] entry indexed by the low-order bit of 137 * predating the current grace period drain, in other words, until
138 * rnp->gpnum empties. 138 * rnp->gp_tasks becomes NULL.
139 * 139 *
140 * Caller must disable preemption. 140 * Caller must disable preemption.
141 */ 141 */
@@ -143,7 +143,6 @@ static void rcu_preempt_note_context_switch(int cpu)
143{ 143{
144 struct task_struct *t = current; 144 struct task_struct *t = current;
145 unsigned long flags; 145 unsigned long flags;
146 int phase;
147 struct rcu_data *rdp; 146 struct rcu_data *rdp;
148 struct rcu_node *rnp; 147 struct rcu_node *rnp;
149 148
@@ -165,15 +164,26 @@ static void rcu_preempt_note_context_switch(int cpu)
165 * (i.e., this CPU has not yet passed through a quiescent 164 * (i.e., this CPU has not yet passed through a quiescent
166 * state for the current grace period), then as long 165 * state for the current grace period), then as long
167 * as that task remains queued, the current grace period 166 * as that task remains queued, the current grace period
168 * cannot end. 167 * cannot end. Note that there is some uncertainty as
168 * to exactly when the current grace period started.
169 * We take a conservative approach, which can result
170 * in unnecessarily waiting on tasks that started very
171 * slightly after the current grace period began. C'est
172 * la vie!!!
169 * 173 *
170 * But first, note that the current CPU must still be 174 * But first, note that the current CPU must still be
171 * on line! 175 * on line!
172 */ 176 */
173 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 177 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
174 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 178 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
175 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 179 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
176 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 180 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
181 rnp->gp_tasks = &t->rcu_node_entry;
182 } else {
183 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
184 if (rnp->qsmask & rdp->grpmask)
185 rnp->gp_tasks = &t->rcu_node_entry;
186 }
177 raw_spin_unlock_irqrestore(&rnp->lock, flags); 187 raw_spin_unlock_irqrestore(&rnp->lock, flags);
178 } 188 }
179 189
@@ -210,10 +220,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
210 */ 220 */
211static int rcu_preempted_readers(struct rcu_node *rnp) 221static int rcu_preempted_readers(struct rcu_node *rnp)
212{ 222{
213 int phase = rnp->gpnum & 0x1; 223 return rnp->gp_tasks != NULL;
214
215 return !list_empty(&rnp->blocked_tasks[phase]) ||
216 !list_empty(&rnp->blocked_tasks[phase + 2]);
217} 224}
218 225
219/* 226/*
@@ -253,6 +260,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
253} 260}
254 261
255/* 262/*
263 * Advance a ->blkd_tasks-list pointer to the next entry, instead
264 * returning NULL if at the end of the list.
265 */
266static struct list_head *rcu_next_node_entry(struct task_struct *t,
267 struct rcu_node *rnp)
268{
269 struct list_head *np;
270
271 np = t->rcu_node_entry.next;
272 if (np == &rnp->blkd_tasks)
273 np = NULL;
274 return np;
275}
276
277/*
256 * Handle special cases during rcu_read_unlock(), such as needing to 278 * Handle special cases during rcu_read_unlock(), such as needing to
257 * notify RCU core processing or task having blocked during the RCU 279 * notify RCU core processing or task having blocked during the RCU
258 * read-side critical section. 280 * read-side critical section.
@@ -262,6 +284,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
262 int empty; 284 int empty;
263 int empty_exp; 285 int empty_exp;
264 unsigned long flags; 286 unsigned long flags;
287 struct list_head *np;
265 struct rcu_node *rnp; 288 struct rcu_node *rnp;
266 int special; 289 int special;
267 290
@@ -305,7 +328,12 @@ static void rcu_read_unlock_special(struct task_struct *t)
305 empty = !rcu_preempted_readers(rnp); 328 empty = !rcu_preempted_readers(rnp);
306 empty_exp = !rcu_preempted_readers_exp(rnp); 329 empty_exp = !rcu_preempted_readers_exp(rnp);
307 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 330 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
331 np = rcu_next_node_entry(t, rnp);
308 list_del_init(&t->rcu_node_entry); 332 list_del_init(&t->rcu_node_entry);
333 if (&t->rcu_node_entry == rnp->gp_tasks)
334 rnp->gp_tasks = np;
335 if (&t->rcu_node_entry == rnp->exp_tasks)
336 rnp->exp_tasks = np;
309 t->rcu_blocked_node = NULL; 337 t->rcu_blocked_node = NULL;
310 338
311 /* 339 /*
@@ -361,18 +389,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
361static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 389static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
362{ 390{
363 unsigned long flags; 391 unsigned long flags;
364 struct list_head *lp;
365 int phase;
366 struct task_struct *t; 392 struct task_struct *t;
367 393
368 if (rcu_preempted_readers(rnp)) { 394 if (!rcu_preempted_readers(rnp))
369 raw_spin_lock_irqsave(&rnp->lock, flags); 395 return;
370 phase = rnp->gpnum & 0x1; 396 raw_spin_lock_irqsave(&rnp->lock, flags);
371 lp = &rnp->blocked_tasks[phase]; 397 t = list_entry(rnp->gp_tasks,
372 list_for_each_entry(t, lp, rcu_node_entry) 398 struct task_struct, rcu_node_entry);
373 sched_show_task(t); 399 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
374 raw_spin_unlock_irqrestore(&rnp->lock, flags); 400 sched_show_task(t);
375 } 401 raw_spin_unlock_irqrestore(&rnp->lock, flags);
376} 402}
377 403
378/* 404/*
@@ -402,16 +428,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
402 */ 428 */
403static void rcu_print_task_stall(struct rcu_node *rnp) 429static void rcu_print_task_stall(struct rcu_node *rnp)
404{ 430{
405 struct list_head *lp;
406 int phase;
407 struct task_struct *t; 431 struct task_struct *t;
408 432
409 if (rcu_preempted_readers(rnp)) { 433 if (!rcu_preempted_readers(rnp))
410 phase = rnp->gpnum & 0x1; 434 return;
411 lp = &rnp->blocked_tasks[phase]; 435 t = list_entry(rnp->gp_tasks,
412 list_for_each_entry(t, lp, rcu_node_entry) 436 struct task_struct, rcu_node_entry);
413 printk(" P%d", t->pid); 437 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
414 } 438 printk(" P%d", t->pid);
415} 439}
416 440
417/* 441/*
@@ -430,10 +454,15 @@ static void rcu_preempt_stall_reset(void)
430 * period that still has RCU readers blocked! This function must be 454 * period that still has RCU readers blocked! This function must be
431 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 455 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
432 * must be held by the caller. 456 * must be held by the caller.
457 *
458 * Also, if there are blocked tasks on the list, they automatically
459 * block the newly created grace period, so set up ->gp_tasks accordingly.
433 */ 460 */
434static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 461static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
435{ 462{
436 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 463 WARN_ON_ONCE(rcu_preempted_readers(rnp));
464 if (!list_empty(&rnp->blkd_tasks))
465 rnp->gp_tasks = rnp->blkd_tasks.next;
437 WARN_ON_ONCE(rnp->qsmask); 466 WARN_ON_ONCE(rnp->qsmask);
438} 467}
439 468
@@ -457,45 +486,49 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
457 struct rcu_node *rnp, 486 struct rcu_node *rnp,
458 struct rcu_data *rdp) 487 struct rcu_data *rdp)
459{ 488{
460 int i;
461 struct list_head *lp; 489 struct list_head *lp;
462 struct list_head *lp_root; 490 struct list_head *lp_root;
463 int retval = 0; 491 int retval = 0;
464 struct rcu_node *rnp_root = rcu_get_root(rsp); 492 struct rcu_node *rnp_root = rcu_get_root(rsp);
465 struct task_struct *tp; 493 struct task_struct *t;
466 494
467 if (rnp == rnp_root) { 495 if (rnp == rnp_root) {
468 WARN_ONCE(1, "Last CPU thought to be offlined?"); 496 WARN_ONCE(1, "Last CPU thought to be offlined?");
469 return 0; /* Shouldn't happen: at least one CPU online. */ 497 return 0; /* Shouldn't happen: at least one CPU online. */
470 } 498 }
471 WARN_ON_ONCE(rnp != rdp->mynode && 499
472 (!list_empty(&rnp->blocked_tasks[0]) || 500 /* If we are on an internal node, complain bitterly. */
473 !list_empty(&rnp->blocked_tasks[1]) || 501 WARN_ON_ONCE(rnp != rdp->mynode);
474 !list_empty(&rnp->blocked_tasks[2]) ||
475 !list_empty(&rnp->blocked_tasks[3])));
476 502
477 /* 503 /*
478 * Move tasks up to root rcu_node. Rely on the fact that the 504 * Move tasks up to root rcu_node. Don't try to get fancy for
479 * root rcu_node can be at most one ahead of the rest of the 505 * this corner-case operation -- just put this node's tasks
480 * rcu_nodes in terms of gp_num value. This fact allows us to 506 * at the head of the root node's list, and update the root node's
481 * move the blocked_tasks[] array directly, element by element. 507 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
508 * if non-NULL. This might result in waiting for more tasks than
509 * absolutely necessary, but this is a good performance/complexity
510 * tradeoff.
482 */ 511 */
483 if (rcu_preempted_readers(rnp)) 512 if (rcu_preempted_readers(rnp))
484 retval |= RCU_OFL_TASKS_NORM_GP; 513 retval |= RCU_OFL_TASKS_NORM_GP;
485 if (rcu_preempted_readers_exp(rnp)) 514 if (rcu_preempted_readers_exp(rnp))
486 retval |= RCU_OFL_TASKS_EXP_GP; 515 retval |= RCU_OFL_TASKS_EXP_GP;
487 for (i = 0; i < 4; i++) { 516 lp = &rnp->blkd_tasks;
488 lp = &rnp->blocked_tasks[i]; 517 lp_root = &rnp_root->blkd_tasks;
489 lp_root = &rnp_root->blocked_tasks[i]; 518 while (!list_empty(lp)) {
490 while (!list_empty(lp)) { 519 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
491 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 520 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
492 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 521 list_del(&t->rcu_node_entry);
493 list_del(&tp->rcu_node_entry); 522 t->rcu_blocked_node = rnp_root;
494 tp->rcu_blocked_node = rnp_root; 523 list_add(&t->rcu_node_entry, lp_root);
495 list_add(&tp->rcu_node_entry, lp_root); 524 if (&t->rcu_node_entry == rnp->gp_tasks)
496 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 525 rnp_root->gp_tasks = rnp->gp_tasks;
497 } 526 if (&t->rcu_node_entry == rnp->exp_tasks)
527 rnp_root->exp_tasks = rnp->exp_tasks;
528 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
498 } 529 }
530 rnp->gp_tasks = NULL;
531 rnp->exp_tasks = NULL;
499 return retval; 532 return retval;
500} 533}
501 534
@@ -586,8 +619,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
586 */ 619 */
587static int rcu_preempted_readers_exp(struct rcu_node *rnp) 620static int rcu_preempted_readers_exp(struct rcu_node *rnp)
588{ 621{
589 return !list_empty(&rnp->blocked_tasks[2]) || 622 return rnp->exp_tasks != NULL;
590 !list_empty(&rnp->blocked_tasks[3]);
591} 623}
592 624
593/* 625/*
@@ -647,12 +679,13 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
647static void 679static void
648sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 680sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
649{ 681{
650 int must_wait; 682 int must_wait = 0;
651 683
652 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 684 raw_spin_lock(&rnp->lock); /* irqs already disabled */
653 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 685 if (!list_empty(&rnp->blkd_tasks)) {
654 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 686 rnp->exp_tasks = rnp->blkd_tasks.next;
655 must_wait = rcu_preempted_readers_exp(rnp); 687 must_wait = 1;
688 }
656 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 689 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
657 if (!must_wait) 690 if (!must_wait)
658 rcu_report_exp_rnp(rsp, rnp); 691 rcu_report_exp_rnp(rsp, rnp);
@@ -661,9 +694,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
661/* 694/*
662 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 695 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
663 * is to invoke synchronize_sched_expedited() to push all the tasks to 696 * is to invoke synchronize_sched_expedited() to push all the tasks to
664 * the ->blocked_tasks[] lists, move all entries from the first set of 697 * the ->blkd_tasks lists and wait for this list to drain.
665 * ->blocked_tasks[] lists to the second set, and finally wait for this
666 * second set to drain.
667 */ 698 */
668void synchronize_rcu_expedited(void) 699void synchronize_rcu_expedited(void)
669{ 700{
@@ -695,7 +726,7 @@ void synchronize_rcu_expedited(void)
695 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 726 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
696 goto unlock_mb_ret; /* Others did our work for us. */ 727 goto unlock_mb_ret; /* Others did our work for us. */
697 728
698 /* force all RCU readers onto blocked_tasks[]. */ 729 /* force all RCU readers onto ->blkd_tasks lists. */
699 synchronize_sched_expedited(); 730 synchronize_sched_expedited();
700 731
701 raw_spin_lock_irqsave(&rsp->onofflock, flags); 732 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -707,7 +738,7 @@ void synchronize_rcu_expedited(void)
707 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 738 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
708 } 739 }
709 740
710 /* Snapshot current state of ->blocked_tasks[] lists. */ 741 /* Snapshot current state of ->blkd_tasks lists. */
711 rcu_for_each_leaf_node(rsp, rnp) 742 rcu_for_each_leaf_node(rsp, rnp)
712 sync_rcu_preempt_exp_init(rsp, rnp); 743 sync_rcu_preempt_exp_init(rsp, rnp);
713 if (NUM_RCU_NODES > 1) 744 if (NUM_RCU_NODES > 1)
@@ -715,7 +746,7 @@ void synchronize_rcu_expedited(void)
715 746
716 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 747 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
717 748
718 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 749 /* Wait for snapshotted ->blkd_tasks lists to drain. */
719 rnp = rcu_get_root(rsp); 750 rnp = rcu_get_root(rsp);
720 wait_event(sync_rcu_preempt_exp_wq, 751 wait_event(sync_rcu_preempt_exp_wq,
721 sync_rcu_preempt_exp_done(rnp)); 752 sync_rcu_preempt_exp_done(rnp));
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4a21ca55ef7c..1cedf94e2c4f 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -161,7 +161,6 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
161{ 161{
162 unsigned long gpnum; 162 unsigned long gpnum;
163 int level = 0; 163 int level = 0;
164 int phase;
165 struct rcu_node *rnp; 164 struct rcu_node *rnp;
166 165
167 gpnum = rsp->gpnum; 166 gpnum = rsp->gpnum;
@@ -178,13 +177,11 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
178 seq_puts(m, "\n"); 177 seq_puts(m, "\n");
179 level = rnp->level; 178 level = rnp->level;
180 } 179 }
181 phase = gpnum & 0x1; 180 seq_printf(m, "%lx/%lx %c%c>%c %d:%d ^%d ",
182 seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ",
183 rnp->qsmask, rnp->qsmaskinit, 181 rnp->qsmask, rnp->qsmaskinit,
184 "T."[list_empty(&rnp->blocked_tasks[phase])], 182 ".G"[rnp->gp_tasks != NULL],
185 "E."[list_empty(&rnp->blocked_tasks[phase + 2])], 183 ".E"[rnp->exp_tasks != NULL],
186 "T."[list_empty(&rnp->blocked_tasks[!phase])], 184 ".T"[!list_empty(&rnp->blkd_tasks)],
187 "E."[list_empty(&rnp->blocked_tasks[!phase + 2])],
188 rnp->grplo, rnp->grphi, rnp->grpnum); 185 rnp->grplo, rnp->grphi, rnp->grpnum);
189 } 186 }
190 seq_puts(m, "\n"); 187 seq_puts(m, "\n");