aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-08-24 19:52:09 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-09-29 00:38:49 -0400
commitafe24b122eb6edb5f1cb942570ac8d766105c7fc (patch)
treec1f9e2fcbcf2d374f36ee3bfc45babf576cb6246 /kernel
parente90c53d3e238dd0b7b02964370e8fece1778df96 (diff)
rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp()
It is possible for the CPU that noted the end of the prior grace period to not need a new one, and therefore to decide to propagate ->completed throughout the rcu_node tree without starting another grace period. However, in so doing, it releases the root rcu_node structure's lock, which can allow some other CPU to start another grace period. The first CPU will be propagating ->completed in parallel with the second CPU initializing the rcu_node tree for the new grace period. In theory this is harmless, but in practice we need to keep things simple. This commit therefore moves the propagation of ->completed to rcu_report_qs_rsp(), and refrains from marking the old grace period as having been completed until it has finished doing this. This prevents anyone from starting a new grace period concurrently with marking the old grace period as having been completed. Of course, the optimization where a CPU needing a new grace period doesn't bother marking the old one completed is still in effect: In that case, the marking happens implicitly as part of initializing the new grace period. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c71
1 files changed, 51 insertions, 20 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e75df0c93abd..e234eb92a177 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -842,28 +842,24 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
842 struct rcu_node *rnp = rcu_get_root(rsp); 842 struct rcu_node *rnp = rcu_get_root(rsp);
843 843
844 if (!rcu_scheduler_fully_active || 844 if (!rcu_scheduler_fully_active ||
845 !cpu_needs_another_gp(rsp, rdp) || 845 !cpu_needs_another_gp(rsp, rdp)) {
846 rsp->fqs_active) { 846 /*
847 if (rcu_scheduler_fully_active && 847 * Either the scheduler hasn't yet spawned the first
848 cpu_needs_another_gp(rsp, rdp)) 848 * non-idle task or this CPU does not need another
849 rsp->fqs_need_gp = 1; 849 * grace period. Either way, don't start a new grace
850 if (rnp->completed == rsp->completed) { 850 * period.
851 raw_spin_unlock_irqrestore(&rnp->lock, flags); 851 */
852 return; 852 raw_spin_unlock_irqrestore(&rnp->lock, flags);
853 } 853 return;
854 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 854 }
855 855
856 if (rsp->fqs_active) {
856 /* 857 /*
857 * Propagate new ->completed value to rcu_node structures 858 * This CPU needs a grace period, but force_quiescent_state()
858 * so that other CPUs don't have to wait until the start 859 * is running. Tell it to start one on this CPU's behalf.
859 * of the next grace period to process their callbacks.
860 */ 860 */
861 rcu_for_each_node_breadth_first(rsp, rnp) { 861 rsp->fqs_need_gp = 1;
862 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 862 raw_spin_unlock_irqrestore(&rnp->lock, flags);
863 rnp->completed = rsp->completed;
864 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
865 }
866 local_irq_restore(flags);
867 return; 863 return;
868 } 864 }
869 865
@@ -947,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
947 __releases(rcu_get_root(rsp)->lock) 943 __releases(rcu_get_root(rsp)->lock)
948{ 944{
949 unsigned long gp_duration; 945 unsigned long gp_duration;
946 struct rcu_node *rnp = rcu_get_root(rsp);
947 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
950 948
951 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 949 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
952 950
@@ -958,7 +956,40 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
958 gp_duration = jiffies - rsp->gp_start; 956 gp_duration = jiffies - rsp->gp_start;
959 if (gp_duration > rsp->gp_max) 957 if (gp_duration > rsp->gp_max)
960 rsp->gp_max = gp_duration; 958 rsp->gp_max = gp_duration;
961 rsp->completed = rsp->gpnum; 959
960 /*
961 * We know the grace period is complete, but to everyone else
962 * it appears to still be ongoing. But it is also the case
963 * that to everyone else it looks like there is nothing that
964 * they can do to advance the grace period. It is therefore
965 * safe for us to drop the lock in order to mark the grace
966 * period as completed in all of the rcu_node structures.
967 *
968 * But if this CPU needs another grace period, it will take
969 * care of this while initializing the next grace period.
970 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
971 * because the callbacks have not yet been advanced: Those
972 * callbacks are waiting on the grace period that just now
973 * completed.
974 */
975 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
976 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
977
978 /*
979 * Propagate new ->completed value to rcu_node structures
980 * so that other CPUs don't have to wait until the start
981 * of the next grace period to process their callbacks.
982 */
983 rcu_for_each_node_breadth_first(rsp, rnp) {
984 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
985 rnp->completed = rsp->gpnum;
986 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
987 }
988 rnp = rcu_get_root(rsp);
989 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
990 }
991
992 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
962 trace_rcu_grace_period(rsp->name, rsp->completed, "end"); 993 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
963 rsp->signaled = RCU_GP_IDLE; 994 rsp->signaled = RCU_GP_IDLE;
964 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 995 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */