diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2010-11-30 00:56:39 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-05-06 02:16:54 -0400 |
commit | 12f5f524cafef3ab689929b118f2dfb8bf2be321 (patch) | |
tree | 639473556b6edf9b79e0a18d5ba58f80eea76519 /kernel/rcutree_plugin.h | |
parent | e59fb3120becfb36b22ddb8bd27d065d3cdca499 (diff) |
rcu: merge TREE_PREEPT_RCU blocked_tasks[] lists
Combine the current TREE_PREEMPT_RCU ->blocked_tasks[] lists in the
rcu_node structure into a single ->blkd_tasks list with ->gp_tasks
and ->exp_tasks tail pointers. This is in preparation for RCU priority
boosting, which will add a third dimension to the combinatorial explosion
in the ->blocked_tasks[] case, but simply a third pointer in the new
->blkd_tasks case.
Also update documentation to reflect blocked_tasks[] merge
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 163 |
1 files changed, 97 insertions, 66 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 764b5fcc7c56..774f010a4619 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -130,12 +130,12 @@ static void rcu_preempt_qs(int cpu) | |||
130 | * We have entered the scheduler, and the current task might soon be | 130 | * We have entered the scheduler, and the current task might soon be |
131 | * context-switched away from. If this task is in an RCU read-side | 131 | * context-switched away from. If this task is in an RCU read-side |
132 | * critical section, we will no longer be able to rely on the CPU to | 132 | * critical section, we will no longer be able to rely on the CPU to |
133 | * record that fact, so we enqueue the task on the appropriate entry | 133 | * record that fact, so we enqueue the task on the blkd_tasks list. |
134 | * of the blocked_tasks[] array. The task will dequeue itself when | 134 | * The task will dequeue itself when it exits the outermost enclosing |
135 | * it exits the outermost enclosing RCU read-side critical section. | 135 | * RCU read-side critical section. Therefore, the current grace period |
136 | * Therefore, the current grace period cannot be permitted to complete | 136 | * cannot be permitted to complete until the blkd_tasks list entries |
137 | * until the blocked_tasks[] entry indexed by the low-order bit of | 137 | * predating the current grace period drain, in other words, until |
138 | * rnp->gpnum empties. | 138 | * rnp->gp_tasks becomes NULL. |
139 | * | 139 | * |
140 | * Caller must disable preemption. | 140 | * Caller must disable preemption. |
141 | */ | 141 | */ |
@@ -143,7 +143,6 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
143 | { | 143 | { |
144 | struct task_struct *t = current; | 144 | struct task_struct *t = current; |
145 | unsigned long flags; | 145 | unsigned long flags; |
146 | int phase; | ||
147 | struct rcu_data *rdp; | 146 | struct rcu_data *rdp; |
148 | struct rcu_node *rnp; | 147 | struct rcu_node *rnp; |
149 | 148 | ||
@@ -165,15 +164,26 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
165 | * (i.e., this CPU has not yet passed through a quiescent | 164 | * (i.e., this CPU has not yet passed through a quiescent |
166 | * state for the current grace period), then as long | 165 | * state for the current grace period), then as long |
167 | * as that task remains queued, the current grace period | 166 | * as that task remains queued, the current grace period |
168 | * cannot end. | 167 | * cannot end. Note that there is some uncertainty as |
168 | * to exactly when the current grace period started. | ||
169 | * We take a conservative approach, which can result | ||
170 | * in unnecessarily waiting on tasks that started very | ||
171 | * slightly after the current grace period began. C'est | ||
172 | * la vie!!! | ||
169 | * | 173 | * |
170 | * But first, note that the current CPU must still be | 174 | * But first, note that the current CPU must still be |
171 | * on line! | 175 | * on line! |
172 | */ | 176 | */ |
173 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); | 177 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); |
174 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 178 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
175 | phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; | 179 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { |
176 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); | 180 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); |
181 | rnp->gp_tasks = &t->rcu_node_entry; | ||
182 | } else { | ||
183 | list_add(&t->rcu_node_entry, &rnp->blkd_tasks); | ||
184 | if (rnp->qsmask & rdp->grpmask) | ||
185 | rnp->gp_tasks = &t->rcu_node_entry; | ||
186 | } | ||
177 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 187 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
178 | } | 188 | } |
179 | 189 | ||
@@ -210,10 +220,7 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
210 | */ | 220 | */ |
211 | static int rcu_preempted_readers(struct rcu_node *rnp) | 221 | static int rcu_preempted_readers(struct rcu_node *rnp) |
212 | { | 222 | { |
213 | int phase = rnp->gpnum & 0x1; | 223 | return rnp->gp_tasks != NULL; |
214 | |||
215 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
216 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
217 | } | 224 | } |
218 | 225 | ||
219 | /* | 226 | /* |
@@ -253,6 +260,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
253 | } | 260 | } |
254 | 261 | ||
255 | /* | 262 | /* |
263 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
264 | * returning NULL if at the end of the list. | ||
265 | */ | ||
266 | static struct list_head *rcu_next_node_entry(struct task_struct *t, | ||
267 | struct rcu_node *rnp) | ||
268 | { | ||
269 | struct list_head *np; | ||
270 | |||
271 | np = t->rcu_node_entry.next; | ||
272 | if (np == &rnp->blkd_tasks) | ||
273 | np = NULL; | ||
274 | return np; | ||
275 | } | ||
276 | |||
277 | /* | ||
256 | * Handle special cases during rcu_read_unlock(), such as needing to | 278 | * Handle special cases during rcu_read_unlock(), such as needing to |
257 | * notify RCU core processing or task having blocked during the RCU | 279 | * notify RCU core processing or task having blocked during the RCU |
258 | * read-side critical section. | 280 | * read-side critical section. |
@@ -262,6 +284,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
262 | int empty; | 284 | int empty; |
263 | int empty_exp; | 285 | int empty_exp; |
264 | unsigned long flags; | 286 | unsigned long flags; |
287 | struct list_head *np; | ||
265 | struct rcu_node *rnp; | 288 | struct rcu_node *rnp; |
266 | int special; | 289 | int special; |
267 | 290 | ||
@@ -305,7 +328,12 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
305 | empty = !rcu_preempted_readers(rnp); | 328 | empty = !rcu_preempted_readers(rnp); |
306 | empty_exp = !rcu_preempted_readers_exp(rnp); | 329 | empty_exp = !rcu_preempted_readers_exp(rnp); |
307 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 330 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
331 | np = rcu_next_node_entry(t, rnp); | ||
308 | list_del_init(&t->rcu_node_entry); | 332 | list_del_init(&t->rcu_node_entry); |
333 | if (&t->rcu_node_entry == rnp->gp_tasks) | ||
334 | rnp->gp_tasks = np; | ||
335 | if (&t->rcu_node_entry == rnp->exp_tasks) | ||
336 | rnp->exp_tasks = np; | ||
309 | t->rcu_blocked_node = NULL; | 337 | t->rcu_blocked_node = NULL; |
310 | 338 | ||
311 | /* | 339 | /* |
@@ -361,18 +389,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); | |||
361 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | 389 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
362 | { | 390 | { |
363 | unsigned long flags; | 391 | unsigned long flags; |
364 | struct list_head *lp; | ||
365 | int phase; | ||
366 | struct task_struct *t; | 392 | struct task_struct *t; |
367 | 393 | ||
368 | if (rcu_preempted_readers(rnp)) { | 394 | if (!rcu_preempted_readers(rnp)) |
369 | raw_spin_lock_irqsave(&rnp->lock, flags); | 395 | return; |
370 | phase = rnp->gpnum & 0x1; | 396 | raw_spin_lock_irqsave(&rnp->lock, flags); |
371 | lp = &rnp->blocked_tasks[phase]; | 397 | t = list_entry(rnp->gp_tasks, |
372 | list_for_each_entry(t, lp, rcu_node_entry) | 398 | struct task_struct, rcu_node_entry); |
373 | sched_show_task(t); | 399 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
374 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 400 | sched_show_task(t); |
375 | } | 401 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
376 | } | 402 | } |
377 | 403 | ||
378 | /* | 404 | /* |
@@ -402,16 +428,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
402 | */ | 428 | */ |
403 | static void rcu_print_task_stall(struct rcu_node *rnp) | 429 | static void rcu_print_task_stall(struct rcu_node *rnp) |
404 | { | 430 | { |
405 | struct list_head *lp; | ||
406 | int phase; | ||
407 | struct task_struct *t; | 431 | struct task_struct *t; |
408 | 432 | ||
409 | if (rcu_preempted_readers(rnp)) { | 433 | if (!rcu_preempted_readers(rnp)) |
410 | phase = rnp->gpnum & 0x1; | 434 | return; |
411 | lp = &rnp->blocked_tasks[phase]; | 435 | t = list_entry(rnp->gp_tasks, |
412 | list_for_each_entry(t, lp, rcu_node_entry) | 436 | struct task_struct, rcu_node_entry); |
413 | printk(" P%d", t->pid); | 437 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
414 | } | 438 | printk(" P%d", t->pid); |
415 | } | 439 | } |
416 | 440 | ||
417 | /* | 441 | /* |
@@ -430,10 +454,15 @@ static void rcu_preempt_stall_reset(void) | |||
430 | * period that still has RCU readers blocked! This function must be | 454 | * period that still has RCU readers blocked! This function must be |
431 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock | 455 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock |
432 | * must be held by the caller. | 456 | * must be held by the caller. |
457 | * | ||
458 | * Also, if there are blocked tasks on the list, they automatically | ||
459 | * block the newly created grace period, so set up ->gp_tasks accordingly. | ||
433 | */ | 460 | */ |
434 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 461 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
435 | { | 462 | { |
436 | WARN_ON_ONCE(rcu_preempted_readers(rnp)); | 463 | WARN_ON_ONCE(rcu_preempted_readers(rnp)); |
464 | if (!list_empty(&rnp->blkd_tasks)) | ||
465 | rnp->gp_tasks = rnp->blkd_tasks.next; | ||
437 | WARN_ON_ONCE(rnp->qsmask); | 466 | WARN_ON_ONCE(rnp->qsmask); |
438 | } | 467 | } |
439 | 468 | ||
@@ -457,45 +486,49 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
457 | struct rcu_node *rnp, | 486 | struct rcu_node *rnp, |
458 | struct rcu_data *rdp) | 487 | struct rcu_data *rdp) |
459 | { | 488 | { |
460 | int i; | ||
461 | struct list_head *lp; | 489 | struct list_head *lp; |
462 | struct list_head *lp_root; | 490 | struct list_head *lp_root; |
463 | int retval = 0; | 491 | int retval = 0; |
464 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 492 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
465 | struct task_struct *tp; | 493 | struct task_struct *t; |
466 | 494 | ||
467 | if (rnp == rnp_root) { | 495 | if (rnp == rnp_root) { |
468 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | 496 | WARN_ONCE(1, "Last CPU thought to be offlined?"); |
469 | return 0; /* Shouldn't happen: at least one CPU online. */ | 497 | return 0; /* Shouldn't happen: at least one CPU online. */ |
470 | } | 498 | } |
471 | WARN_ON_ONCE(rnp != rdp->mynode && | 499 | |
472 | (!list_empty(&rnp->blocked_tasks[0]) || | 500 | /* If we are on an internal node, complain bitterly. */ |
473 | !list_empty(&rnp->blocked_tasks[1]) || | 501 | WARN_ON_ONCE(rnp != rdp->mynode); |
474 | !list_empty(&rnp->blocked_tasks[2]) || | ||
475 | !list_empty(&rnp->blocked_tasks[3]))); | ||
476 | 502 | ||
477 | /* | 503 | /* |
478 | * Move tasks up to root rcu_node. Rely on the fact that the | 504 | * Move tasks up to root rcu_node. Don't try to get fancy for |
479 | * root rcu_node can be at most one ahead of the rest of the | 505 | * this corner-case operation -- just put this node's tasks |
480 | * rcu_nodes in terms of gp_num value. This fact allows us to | 506 | * at the head of the root node's list, and update the root node's |
481 | * move the blocked_tasks[] array directly, element by element. | 507 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, |
508 | * if non-NULL. This might result in waiting for more tasks than | ||
509 | * absolutely necessary, but this is a good performance/complexity | ||
510 | * tradeoff. | ||
482 | */ | 511 | */ |
483 | if (rcu_preempted_readers(rnp)) | 512 | if (rcu_preempted_readers(rnp)) |
484 | retval |= RCU_OFL_TASKS_NORM_GP; | 513 | retval |= RCU_OFL_TASKS_NORM_GP; |
485 | if (rcu_preempted_readers_exp(rnp)) | 514 | if (rcu_preempted_readers_exp(rnp)) |
486 | retval |= RCU_OFL_TASKS_EXP_GP; | 515 | retval |= RCU_OFL_TASKS_EXP_GP; |
487 | for (i = 0; i < 4; i++) { | 516 | lp = &rnp->blkd_tasks; |
488 | lp = &rnp->blocked_tasks[i]; | 517 | lp_root = &rnp_root->blkd_tasks; |
489 | lp_root = &rnp_root->blocked_tasks[i]; | 518 | while (!list_empty(lp)) { |
490 | while (!list_empty(lp)) { | 519 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); |
491 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); | 520 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
492 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | 521 | list_del(&t->rcu_node_entry); |
493 | list_del(&tp->rcu_node_entry); | 522 | t->rcu_blocked_node = rnp_root; |
494 | tp->rcu_blocked_node = rnp_root; | 523 | list_add(&t->rcu_node_entry, lp_root); |
495 | list_add(&tp->rcu_node_entry, lp_root); | 524 | if (&t->rcu_node_entry == rnp->gp_tasks) |
496 | raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | 525 | rnp_root->gp_tasks = rnp->gp_tasks; |
497 | } | 526 | if (&t->rcu_node_entry == rnp->exp_tasks) |
527 | rnp_root->exp_tasks = rnp->exp_tasks; | ||
528 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
498 | } | 529 | } |
530 | rnp->gp_tasks = NULL; | ||
531 | rnp->exp_tasks = NULL; | ||
499 | return retval; | 532 | return retval; |
500 | } | 533 | } |
501 | 534 | ||
@@ -586,8 +619,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | |||
586 | */ | 619 | */ |
587 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | 620 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) |
588 | { | 621 | { |
589 | return !list_empty(&rnp->blocked_tasks[2]) || | 622 | return rnp->exp_tasks != NULL; |
590 | !list_empty(&rnp->blocked_tasks[3]); | ||
591 | } | 623 | } |
592 | 624 | ||
593 | /* | 625 | /* |
@@ -647,12 +679,13 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
647 | static void | 679 | static void |
648 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 680 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) |
649 | { | 681 | { |
650 | int must_wait; | 682 | int must_wait = 0; |
651 | 683 | ||
652 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 684 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
653 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | 685 | if (!list_empty(&rnp->blkd_tasks)) { |
654 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | 686 | rnp->exp_tasks = rnp->blkd_tasks.next; |
655 | must_wait = rcu_preempted_readers_exp(rnp); | 687 | must_wait = 1; |
688 | } | ||
656 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 689 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ |
657 | if (!must_wait) | 690 | if (!must_wait) |
658 | rcu_report_exp_rnp(rsp, rnp); | 691 | rcu_report_exp_rnp(rsp, rnp); |
@@ -661,9 +694,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
661 | /* | 694 | /* |
662 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 695 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
663 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 696 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
664 | * the ->blocked_tasks[] lists, move all entries from the first set of | 697 | * the ->blkd_tasks lists and wait for this list to drain. |
665 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
666 | * second set to drain. | ||
667 | */ | 698 | */ |
668 | void synchronize_rcu_expedited(void) | 699 | void synchronize_rcu_expedited(void) |
669 | { | 700 | { |
@@ -695,7 +726,7 @@ void synchronize_rcu_expedited(void) | |||
695 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | 726 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) |
696 | goto unlock_mb_ret; /* Others did our work for us. */ | 727 | goto unlock_mb_ret; /* Others did our work for us. */ |
697 | 728 | ||
698 | /* force all RCU readers onto blocked_tasks[]. */ | 729 | /* force all RCU readers onto ->blkd_tasks lists. */ |
699 | synchronize_sched_expedited(); | 730 | synchronize_sched_expedited(); |
700 | 731 | ||
701 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 732 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
@@ -707,7 +738,7 @@ void synchronize_rcu_expedited(void) | |||
707 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 738 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
708 | } | 739 | } |
709 | 740 | ||
710 | /* Snapshot current state of ->blocked_tasks[] lists. */ | 741 | /* Snapshot current state of ->blkd_tasks lists. */ |
711 | rcu_for_each_leaf_node(rsp, rnp) | 742 | rcu_for_each_leaf_node(rsp, rnp) |
712 | sync_rcu_preempt_exp_init(rsp, rnp); | 743 | sync_rcu_preempt_exp_init(rsp, rnp); |
713 | if (NUM_RCU_NODES > 1) | 744 | if (NUM_RCU_NODES > 1) |
@@ -715,7 +746,7 @@ void synchronize_rcu_expedited(void) | |||
715 | 746 | ||
716 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 747 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
717 | 748 | ||
718 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | 749 | /* Wait for snapshotted ->blkd_tasks lists to drain. */ |
719 | rnp = rcu_get_root(rsp); | 750 | rnp = rcu_get_root(rsp); |
720 | wait_event(sync_rcu_preempt_exp_wq, | 751 | wait_event(sync_rcu_preempt_exp_wq, |
721 | sync_rcu_preempt_exp_done(rnp)); | 752 | sync_rcu_preempt_exp_done(rnp)); |