aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-02-13 13:52:28 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-03-12 18:18:42 -0400
commit8eb74b2b291e7bf6aa59fcb4e59f236382f00bf5 (patch)
tree07abd44430c52bfe2f76ba62188b7958c3910c35
parent999c286347538388170f919146d7cfa58689472e (diff)
rcu: Rework preemptible expedited bitmask handling
Currently, the rcu_node tree ->expmask bitmasks are initially set to reflect the online CPUs. This is pointless, because only the CPUs preempted within RCU read-side critical sections by the preceding synchronize_sched_expedited() need to be tracked. This commit therefore instead sets up these bitmasks based on the state of the ->blkd_tasks lists. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcu/tree_plugin.h98
1 files changed, 75 insertions, 23 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 79376e2461c9..a22721547442 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -626,9 +626,6 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
626 * recursively up the tree. (Calm down, calm down, we do the recursion 626 * recursively up the tree. (Calm down, calm down, we do the recursion
627 * iteratively!) 627 * iteratively!)
628 * 628 *
629 * Most callers will set the "wake" flag, but the task initiating the
630 * expedited grace period need not wake itself.
631 *
632 * Caller must hold sync_rcu_preempt_exp_mutex. 629 * Caller must hold sync_rcu_preempt_exp_mutex.
633 */ 630 */
634static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 631static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
@@ -663,26 +660,85 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
663 660
664/* 661/*
665 * Snapshot the tasks blocking the newly started preemptible-RCU expedited 662 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
666 * grace period for the specified rcu_node structure. If there are no such 663 * grace period for the specified rcu_node structure, phase 1. If there
667 * tasks, report it up the rcu_node hierarchy. 664 * are such tasks, set the ->expmask bits up the rcu_node tree and also
665 * set the ->expmask bits on the leaf rcu_node structures to tell phase 2
666 * that work is needed here.
668 * 667 *
669 * Caller must hold sync_rcu_preempt_exp_mutex and must exclude 668 * Caller must hold sync_rcu_preempt_exp_mutex.
670 * CPU hotplug operations.
671 */ 669 */
672static void 670static void
673sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 671sync_rcu_preempt_exp_init1(struct rcu_state *rsp, struct rcu_node *rnp)
674{ 672{
675 unsigned long flags; 673 unsigned long flags;
674 unsigned long mask;
675 struct rcu_node *rnp_up;
676 676
677 raw_spin_lock_irqsave(&rnp->lock, flags); 677 raw_spin_lock_irqsave(&rnp->lock, flags);
678 smp_mb__after_unlock_lock(); 678 smp_mb__after_unlock_lock();
679 WARN_ON_ONCE(rnp->expmask);
680 WARN_ON_ONCE(rnp->exp_tasks);
679 if (!rcu_preempt_has_tasks(rnp)) { 681 if (!rcu_preempt_has_tasks(rnp)) {
682 /* No blocked tasks, nothing to do. */
680 raw_spin_unlock_irqrestore(&rnp->lock, flags); 683 raw_spin_unlock_irqrestore(&rnp->lock, flags);
681 rcu_report_exp_rnp(rsp, rnp, false); /* No tasks, report. */ 684 return;
682 } else { 685 }
686 /* Call for Phase 2 and propagate ->expmask bits up the tree. */
687 rnp->expmask = 1;
688 rnp_up = rnp;
689 while (rnp_up->parent) {
690 mask = rnp_up->grpmask;
691 rnp_up = rnp_up->parent;
692 if (rnp_up->expmask & mask)
693 break;
694 raw_spin_lock(&rnp_up->lock); /* irqs already off */
695 smp_mb__after_unlock_lock();
696 rnp_up->expmask |= mask;
697 raw_spin_unlock(&rnp_up->lock); /* irqs still off */
698 }
699 raw_spin_unlock_irqrestore(&rnp->lock, flags);
700}
701
702/*
703 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
704 * grace period for the specified rcu_node structure, phase 2. If the
705 * leaf rcu_node structure has its ->expmask field set, check for tasks.
706 * If there are some, clear ->expmask and set ->exp_tasks accordingly,
707 * then initiate RCU priority boosting. Otherwise, clear ->expmask and
708 * invoke rcu_report_exp_rnp() to clear out the upper-level ->expmask bits,
709 * enabling rcu_read_unlock_special() to do the bit-clearing.
710 *
711 * Caller must hold sync_rcu_preempt_exp_mutex.
712 */
713static void
714sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp)
715{
716 unsigned long flags;
717
718 raw_spin_lock_irqsave(&rnp->lock, flags);
719 smp_mb__after_unlock_lock();
720 if (!rnp->expmask) {
721 /* Phase 1 didn't do anything, so Phase 2 doesn't either. */
722 raw_spin_unlock_irqrestore(&rnp->lock, flags);
723 return;
724 }
725
726 /* Phase 1 is over. */
727 rnp->expmask = 0;
728
729 /*
730 * If there are still blocked tasks, set up ->exp_tasks so that
731 * rcu_read_unlock_special() will wake us and then boost them.
732 */
733 if (rcu_preempt_has_tasks(rnp)) {
683 rnp->exp_tasks = rnp->blkd_tasks.next; 734 rnp->exp_tasks = rnp->blkd_tasks.next;
684 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ 735 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
736 return;
685 } 737 }
738
739 /* No longer any blocked tasks, so undo bit setting. */
740 raw_spin_unlock_irqrestore(&rnp->lock, flags);
741 rcu_report_exp_rnp(rsp, rnp, false);
686} 742}
687 743
688/** 744/**
@@ -699,7 +755,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
699 */ 755 */
700void synchronize_rcu_expedited(void) 756void synchronize_rcu_expedited(void)
701{ 757{
702 unsigned long flags;
703 struct rcu_node *rnp; 758 struct rcu_node *rnp;
704 struct rcu_state *rsp = &rcu_preempt_state; 759 struct rcu_state *rsp = &rcu_preempt_state;
705 unsigned long snap; 760 unsigned long snap;
@@ -750,19 +805,16 @@ void synchronize_rcu_expedited(void)
750 /* force all RCU readers onto ->blkd_tasks lists. */ 805 /* force all RCU readers onto ->blkd_tasks lists. */
751 synchronize_sched_expedited(); 806 synchronize_sched_expedited();
752 807
753 /* Initialize ->expmask for all non-leaf rcu_node structures. */ 808 /*
754 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { 809 * Snapshot current state of ->blkd_tasks lists into ->expmask.
755 raw_spin_lock_irqsave(&rnp->lock, flags); 810 * Phase 1 sets bits and phase 2 permits rcu_read_unlock_special()
756 smp_mb__after_unlock_lock(); 811 * to start clearing them. Doing this in one phase leads to
757 rnp->expmask = rnp->qsmaskinit; 812 * strange races between setting and clearing bits, so just say "no"!
758 raw_spin_unlock_irqrestore(&rnp->lock, flags); 813 */
759 } 814 rcu_for_each_leaf_node(rsp, rnp)
760 815 sync_rcu_preempt_exp_init1(rsp, rnp);
761 /* Snapshot current state of ->blkd_tasks lists. */
762 rcu_for_each_leaf_node(rsp, rnp) 816 rcu_for_each_leaf_node(rsp, rnp)
763 sync_rcu_preempt_exp_init(rsp, rnp); 817 sync_rcu_preempt_exp_init2(rsp, rnp);
764 if (NUM_RCU_NODES > 1)
765 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
766 818
767 put_online_cpus(); 819 put_online_cpus();
768 820