aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2010-11-30 00:56:39 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-05-06 02:16:54 -0400
commit12f5f524cafef3ab689929b118f2dfb8bf2be321 (patch)
tree639473556b6edf9b79e0a18d5ba58f80eea76519 /kernel/rcutree_plugin.h
parente59fb3120becfb36b22ddb8bd27d065d3cdca499 (diff)
rcu: merge TREE_PREEPT_RCU blocked_tasks[] lists
Combine the current TREE_PREEMPT_RCU ->blocked_tasks[] lists in the rcu_node structure into a single ->blkd_tasks list with ->gp_tasks and ->exp_tasks tail pointers. This is in preparation for RCU priority boosting, which will add a third dimension to the combinatorial explosion in the ->blocked_tasks[] case, but simply a third pointer in the new ->blkd_tasks case. Also update documentation to reflect blocked_tasks[] merge Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h163
1 files changed, 97 insertions, 66 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 764b5fcc7c56..774f010a4619 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -130,12 +130,12 @@ static void rcu_preempt_qs(int cpu)
130 * We have entered the scheduler, and the current task might soon be 130 * We have entered the scheduler, and the current task might soon be
131 * context-switched away from. If this task is in an RCU read-side 131 * context-switched away from. If this task is in an RCU read-side
132 * critical section, we will no longer be able to rely on the CPU to 132 * critical section, we will no longer be able to rely on the CPU to
133 * record that fact, so we enqueue the task on the appropriate entry 133 * record that fact, so we enqueue the task on the blkd_tasks list.
134 * of the blocked_tasks[] array. The task will dequeue itself when 134 * The task will dequeue itself when it exits the outermost enclosing
135 * it exits the outermost enclosing RCU read-side critical section. 135 * RCU read-side critical section. Therefore, the current grace period
136 * Therefore, the current grace period cannot be permitted to complete 136 * cannot be permitted to complete until the blkd_tasks list entries
137 * until the blocked_tasks[] entry indexed by the low-order bit of 137 * predating the current grace period drain, in other words, until
138 * rnp->gpnum empties. 138 * rnp->gp_tasks becomes NULL.
139 * 139 *
140 * Caller must disable preemption. 140 * Caller must disable preemption.
141 */ 141 */
@@ -143,7 +143,6 @@ static void rcu_preempt_note_context_switch(int cpu)
143{ 143{
144 struct task_struct *t = current; 144 struct task_struct *t = current;
145 unsigned long flags; 145 unsigned long flags;
146 int phase;
147 struct rcu_data *rdp; 146 struct rcu_data *rdp;
148 struct rcu_node *rnp; 147 struct rcu_node *rnp;
149 148
@@ -165,15 +164,26 @@ static void rcu_preempt_note_context_switch(int cpu)
165 * (i.e., this CPU has not yet passed through a quiescent 164 * (i.e., this CPU has not yet passed through a quiescent
166 * state for the current grace period), then as long 165 * state for the current grace period), then as long
167 * as that task remains queued, the current grace period 166 * as that task remains queued, the current grace period
168 * cannot end. 167 * cannot end. Note that there is some uncertainty as
168 * to exactly when the current grace period started.
169 * We take a conservative approach, which can result
170 * in unnecessarily waiting on tasks that started very
171 * slightly after the current grace period began. C'est
172 * la vie!!!
169 * 173 *
170 * But first, note that the current CPU must still be 174 * But first, note that the current CPU must still be
171 * on line! 175 * on line!
172 */ 176 */
173 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 177 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
174 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 178 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
175 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 179 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
176 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 180 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
181 rnp->gp_tasks = &t->rcu_node_entry;
182 } else {
183 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
184 if (rnp->qsmask & rdp->grpmask)
185 rnp->gp_tasks = &t->rcu_node_entry;
186 }
177 raw_spin_unlock_irqrestore(&rnp->lock, flags); 187 raw_spin_unlock_irqrestore(&rnp->lock, flags);
178 } 188 }
179 189
@@ -210,10 +220,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
210 */ 220 */
211static int rcu_preempted_readers(struct rcu_node *rnp) 221static int rcu_preempted_readers(struct rcu_node *rnp)
212{ 222{
213 int phase = rnp->gpnum & 0x1; 223 return rnp->gp_tasks != NULL;
214
215 return !list_empty(&rnp->blocked_tasks[phase]) ||
216 !list_empty(&rnp->blocked_tasks[phase + 2]);
217} 224}
218 225
219/* 226/*
@@ -253,6 +260,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
253} 260}
254 261
255/* 262/*
263 * Advance a ->blkd_tasks-list pointer to the next entry, instead
264 * returning NULL if at the end of the list.
265 */
266static struct list_head *rcu_next_node_entry(struct task_struct *t,
267 struct rcu_node *rnp)
268{
269 struct list_head *np;
270
271 np = t->rcu_node_entry.next;
272 if (np == &rnp->blkd_tasks)
273 np = NULL;
274 return np;
275}
276
277/*
256 * Handle special cases during rcu_read_unlock(), such as needing to 278 * Handle special cases during rcu_read_unlock(), such as needing to
257 * notify RCU core processing or task having blocked during the RCU 279 * notify RCU core processing or task having blocked during the RCU
258 * read-side critical section. 280 * read-side critical section.
@@ -262,6 +284,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
262 int empty; 284 int empty;
263 int empty_exp; 285 int empty_exp;
264 unsigned long flags; 286 unsigned long flags;
287 struct list_head *np;
265 struct rcu_node *rnp; 288 struct rcu_node *rnp;
266 int special; 289 int special;
267 290
@@ -305,7 +328,12 @@ static void rcu_read_unlock_special(struct task_struct *t)
305 empty = !rcu_preempted_readers(rnp); 328 empty = !rcu_preempted_readers(rnp);
306 empty_exp = !rcu_preempted_readers_exp(rnp); 329 empty_exp = !rcu_preempted_readers_exp(rnp);
307 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 330 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
331 np = rcu_next_node_entry(t, rnp);
308 list_del_init(&t->rcu_node_entry); 332 list_del_init(&t->rcu_node_entry);
333 if (&t->rcu_node_entry == rnp->gp_tasks)
334 rnp->gp_tasks = np;
335 if (&t->rcu_node_entry == rnp->exp_tasks)
336 rnp->exp_tasks = np;
309 t->rcu_blocked_node = NULL; 337 t->rcu_blocked_node = NULL;
310 338
311 /* 339 /*
@@ -361,18 +389,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
361static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 389static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
362{ 390{
363 unsigned long flags; 391 unsigned long flags;
364 struct list_head *lp;
365 int phase;
366 struct task_struct *t; 392 struct task_struct *t;
367 393
368 if (rcu_preempted_readers(rnp)) { 394 if (!rcu_preempted_readers(rnp))
369 raw_spin_lock_irqsave(&rnp->lock, flags); 395 return;
370 phase = rnp->gpnum & 0x1; 396 raw_spin_lock_irqsave(&rnp->lock, flags);
371 lp = &rnp->blocked_tasks[phase]; 397 t = list_entry(rnp->gp_tasks,
372 list_for_each_entry(t, lp, rcu_node_entry) 398 struct task_struct, rcu_node_entry);
373 sched_show_task(t); 399 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
374 raw_spin_unlock_irqrestore(&rnp->lock, flags); 400 sched_show_task(t);
375 } 401 raw_spin_unlock_irqrestore(&rnp->lock, flags);
376} 402}
377 403
378/* 404/*
@@ -402,16 +428,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
402 */ 428 */
403static void rcu_print_task_stall(struct rcu_node *rnp) 429static void rcu_print_task_stall(struct rcu_node *rnp)
404{ 430{
405 struct list_head *lp;
406 int phase;
407 struct task_struct *t; 431 struct task_struct *t;
408 432
409 if (rcu_preempted_readers(rnp)) { 433 if (!rcu_preempted_readers(rnp))
410 phase = rnp->gpnum & 0x1; 434 return;
411 lp = &rnp->blocked_tasks[phase]; 435 t = list_entry(rnp->gp_tasks,
412 list_for_each_entry(t, lp, rcu_node_entry) 436 struct task_struct, rcu_node_entry);
413 printk(" P%d", t->pid); 437 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
414 } 438 printk(" P%d", t->pid);
415} 439}
416 440
417/* 441/*
@@ -430,10 +454,15 @@ static void rcu_preempt_stall_reset(void)
430 * period that still has RCU readers blocked! This function must be 454 * period that still has RCU readers blocked! This function must be
431 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 455 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
432 * must be held by the caller. 456 * must be held by the caller.
457 *
458 * Also, if there are blocked tasks on the list, they automatically
459 * block the newly created grace period, so set up ->gp_tasks accordingly.
433 */ 460 */
434static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 461static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
435{ 462{
436 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 463 WARN_ON_ONCE(rcu_preempted_readers(rnp));
464 if (!list_empty(&rnp->blkd_tasks))
465 rnp->gp_tasks = rnp->blkd_tasks.next;
437 WARN_ON_ONCE(rnp->qsmask); 466 WARN_ON_ONCE(rnp->qsmask);
438} 467}
439 468
@@ -457,45 +486,49 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
457 struct rcu_node *rnp, 486 struct rcu_node *rnp,
458 struct rcu_data *rdp) 487 struct rcu_data *rdp)
459{ 488{
460 int i;
461 struct list_head *lp; 489 struct list_head *lp;
462 struct list_head *lp_root; 490 struct list_head *lp_root;
463 int retval = 0; 491 int retval = 0;
464 struct rcu_node *rnp_root = rcu_get_root(rsp); 492 struct rcu_node *rnp_root = rcu_get_root(rsp);
465 struct task_struct *tp; 493 struct task_struct *t;
466 494
467 if (rnp == rnp_root) { 495 if (rnp == rnp_root) {
468 WARN_ONCE(1, "Last CPU thought to be offlined?"); 496 WARN_ONCE(1, "Last CPU thought to be offlined?");
469 return 0; /* Shouldn't happen: at least one CPU online. */ 497 return 0; /* Shouldn't happen: at least one CPU online. */
470 } 498 }
471 WARN_ON_ONCE(rnp != rdp->mynode && 499
472 (!list_empty(&rnp->blocked_tasks[0]) || 500 /* If we are on an internal node, complain bitterly. */
473 !list_empty(&rnp->blocked_tasks[1]) || 501 WARN_ON_ONCE(rnp != rdp->mynode);
474 !list_empty(&rnp->blocked_tasks[2]) ||
475 !list_empty(&rnp->blocked_tasks[3])));
476 502
477 /* 503 /*
478 * Move tasks up to root rcu_node. Rely on the fact that the 504 * Move tasks up to root rcu_node. Don't try to get fancy for
479 * root rcu_node can be at most one ahead of the rest of the 505 * this corner-case operation -- just put this node's tasks
480 * rcu_nodes in terms of gp_num value. This fact allows us to 506 * at the head of the root node's list, and update the root node's
481 * move the blocked_tasks[] array directly, element by element. 507 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
508 * if non-NULL. This might result in waiting for more tasks than
509 * absolutely necessary, but this is a good performance/complexity
510 * tradeoff.
482 */ 511 */
483 if (rcu_preempted_readers(rnp)) 512 if (rcu_preempted_readers(rnp))
484 retval |= RCU_OFL_TASKS_NORM_GP; 513 retval |= RCU_OFL_TASKS_NORM_GP;
485 if (rcu_preempted_readers_exp(rnp)) 514 if (rcu_preempted_readers_exp(rnp))
486 retval |= RCU_OFL_TASKS_EXP_GP; 515 retval |= RCU_OFL_TASKS_EXP_GP;
487 for (i = 0; i < 4; i++) { 516 lp = &rnp->blkd_tasks;
488 lp = &rnp->blocked_tasks[i]; 517 lp_root = &rnp_root->blkd_tasks;
489 lp_root = &rnp_root->blocked_tasks[i]; 518 while (!list_empty(lp)) {
490 while (!list_empty(lp)) { 519 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
491 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 520 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
492 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 521 list_del(&t->rcu_node_entry);
493 list_del(&tp->rcu_node_entry); 522 t->rcu_blocked_node = rnp_root;
494 tp->rcu_blocked_node = rnp_root; 523 list_add(&t->rcu_node_entry, lp_root);
495 list_add(&tp->rcu_node_entry, lp_root); 524 if (&t->rcu_node_entry == rnp->gp_tasks)
496 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 525 rnp_root->gp_tasks = rnp->gp_tasks;
497 } 526 if (&t->rcu_node_entry == rnp->exp_tasks)
527 rnp_root->exp_tasks = rnp->exp_tasks;
528 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
498 } 529 }
530 rnp->gp_tasks = NULL;
531 rnp->exp_tasks = NULL;
499 return retval; 532 return retval;
500} 533}
501 534
@@ -586,8 +619,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
586 */ 619 */
587static int rcu_preempted_readers_exp(struct rcu_node *rnp) 620static int rcu_preempted_readers_exp(struct rcu_node *rnp)
588{ 621{
589 return !list_empty(&rnp->blocked_tasks[2]) || 622 return rnp->exp_tasks != NULL;
590 !list_empty(&rnp->blocked_tasks[3]);
591} 623}
592 624
593/* 625/*
@@ -647,12 +679,13 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
647static void 679static void
648sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 680sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
649{ 681{
650 int must_wait; 682 int must_wait = 0;
651 683
652 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 684 raw_spin_lock(&rnp->lock); /* irqs already disabled */
653 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 685 if (!list_empty(&rnp->blkd_tasks)) {
654 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 686 rnp->exp_tasks = rnp->blkd_tasks.next;
655 must_wait = rcu_preempted_readers_exp(rnp); 687 must_wait = 1;
688 }
656 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 689 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
657 if (!must_wait) 690 if (!must_wait)
658 rcu_report_exp_rnp(rsp, rnp); 691 rcu_report_exp_rnp(rsp, rnp);
@@ -661,9 +694,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
661/* 694/*
662 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 695 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
663 * is to invoke synchronize_sched_expedited() to push all the tasks to 696 * is to invoke synchronize_sched_expedited() to push all the tasks to
664 * the ->blocked_tasks[] lists, move all entries from the first set of 697 * the ->blkd_tasks lists and wait for this list to drain.
665 * ->blocked_tasks[] lists to the second set, and finally wait for this
666 * second set to drain.
667 */ 698 */
668void synchronize_rcu_expedited(void) 699void synchronize_rcu_expedited(void)
669{ 700{
@@ -695,7 +726,7 @@ void synchronize_rcu_expedited(void)
695 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 726 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
696 goto unlock_mb_ret; /* Others did our work for us. */ 727 goto unlock_mb_ret; /* Others did our work for us. */
697 728
698 /* force all RCU readers onto blocked_tasks[]. */ 729 /* force all RCU readers onto ->blkd_tasks lists. */
699 synchronize_sched_expedited(); 730 synchronize_sched_expedited();
700 731
701 raw_spin_lock_irqsave(&rsp->onofflock, flags); 732 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -707,7 +738,7 @@ void synchronize_rcu_expedited(void)
707 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 738 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
708 } 739 }
709 740
710 /* Snapshot current state of ->blocked_tasks[] lists. */ 741 /* Snapshot current state of ->blkd_tasks lists. */
711 rcu_for_each_leaf_node(rsp, rnp) 742 rcu_for_each_leaf_node(rsp, rnp)
712 sync_rcu_preempt_exp_init(rsp, rnp); 743 sync_rcu_preempt_exp_init(rsp, rnp);
713 if (NUM_RCU_NODES > 1) 744 if (NUM_RCU_NODES > 1)
@@ -715,7 +746,7 @@ void synchronize_rcu_expedited(void)
715 746
716 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 747 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
717 748
718 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 749 /* Wait for snapshotted ->blkd_tasks lists to drain. */
719 rnp = rcu_get_root(rsp); 750 rnp = rcu_get_root(rsp);
720 wait_event(sync_rcu_preempt_exp_wq, 751 wait_event(sync_rcu_preempt_exp_wq,
721 sync_rcu_preempt_exp_done(rnp)); 752 sync_rcu_preempt_exp_done(rnp));