diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-08-24 19:52:09 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2011-09-29 00:38:49 -0400 |
commit | afe24b122eb6edb5f1cb942570ac8d766105c7fc (patch) | |
tree | c1f9e2fcbcf2d374f36ee3bfc45babf576cb6246 /kernel | |
parent | e90c53d3e238dd0b7b02964370e8fece1778df96 (diff) |
rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp()
It is possible for the CPU that noted the end of the prior grace period
to not need a new one, and therefore to decide to propagate ->completed
throughout the rcu_node tree without starting another grace period.
However, in so doing, it releases the root rcu_node structure's lock,
which can allow some other CPU to start another grace period. The first
CPU will be propagating ->completed in parallel with the second CPU
initializing the rcu_node tree for the new grace period. In theory
this is harmless, but in practice we need to keep things simple.
This commit therefore moves the propagation of ->completed to
rcu_report_qs_rsp(), and refrains from marking the old grace period
as having been completed until it has finished doing this. This
prevents anyone from starting a new grace period concurrently with
marking the old grace period as having been completed.
Of course, the optimization where a CPU needing a new grace period
doesn't bother marking the old one completed is still in effect:
In that case, the marking happens implicitly as part of initializing
the new grace period.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 71 |
1 files changed, 51 insertions, 20 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e75df0c93abd..e234eb92a177 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -842,28 +842,24 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
842 | struct rcu_node *rnp = rcu_get_root(rsp); | 842 | struct rcu_node *rnp = rcu_get_root(rsp); |
843 | 843 | ||
844 | if (!rcu_scheduler_fully_active || | 844 | if (!rcu_scheduler_fully_active || |
845 | !cpu_needs_another_gp(rsp, rdp) || | 845 | !cpu_needs_another_gp(rsp, rdp)) { |
846 | rsp->fqs_active) { | 846 | /* |
847 | if (rcu_scheduler_fully_active && | 847 | * Either the scheduler hasn't yet spawned the first |
848 | cpu_needs_another_gp(rsp, rdp)) | 848 | * non-idle task or this CPU does not need another |
849 | rsp->fqs_need_gp = 1; | 849 | * grace period. Either way, don't start a new grace |
850 | if (rnp->completed == rsp->completed) { | 850 | * period. |
851 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 851 | */ |
852 | return; | 852 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
853 | } | 853 | return; |
854 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 854 | } |
855 | 855 | ||
856 | if (rsp->fqs_active) { | ||
856 | /* | 857 | /* |
857 | * Propagate new ->completed value to rcu_node structures | 858 | * This CPU needs a grace period, but force_quiescent_state() |
858 | * so that other CPUs don't have to wait until the start | 859 | * is running. Tell it to start one on this CPU's behalf. |
859 | * of the next grace period to process their callbacks. | ||
860 | */ | 860 | */ |
861 | rcu_for_each_node_breadth_first(rsp, rnp) { | 861 | rsp->fqs_need_gp = 1; |
862 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 862 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
863 | rnp->completed = rsp->completed; | ||
864 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
865 | } | ||
866 | local_irq_restore(flags); | ||
867 | return; | 863 | return; |
868 | } | 864 | } |
869 | 865 | ||
@@ -947,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
947 | __releases(rcu_get_root(rsp)->lock) | 943 | __releases(rcu_get_root(rsp)->lock) |
948 | { | 944 | { |
949 | unsigned long gp_duration; | 945 | unsigned long gp_duration; |
946 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
947 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
950 | 948 | ||
951 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 949 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
952 | 950 | ||
@@ -958,7 +956,40 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |||
958 | gp_duration = jiffies - rsp->gp_start; | 956 | gp_duration = jiffies - rsp->gp_start; |
959 | if (gp_duration > rsp->gp_max) | 957 | if (gp_duration > rsp->gp_max) |
960 | rsp->gp_max = gp_duration; | 958 | rsp->gp_max = gp_duration; |
961 | rsp->completed = rsp->gpnum; | 959 | |
960 | /* | ||
961 | * We know the grace period is complete, but to everyone else | ||
962 | * it appears to still be ongoing. But it is also the case | ||
963 | * that to everyone else it looks like there is nothing that | ||
964 | * they can do to advance the grace period. It is therefore | ||
965 | * safe for us to drop the lock in order to mark the grace | ||
966 | * period as completed in all of the rcu_node structures. | ||
967 | * | ||
968 | * But if this CPU needs another grace period, it will take | ||
969 | * care of this while initializing the next grace period. | ||
970 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
971 | * because the callbacks have not yet been advanced: Those | ||
972 | * callbacks are waiting on the grace period that just now | ||
973 | * completed. | ||
974 | */ | ||
975 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
976 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
977 | |||
978 | /* | ||
979 | * Propagate new ->completed value to rcu_node structures | ||
980 | * so that other CPUs don't have to wait until the start | ||
981 | * of the next grace period to process their callbacks. | ||
982 | */ | ||
983 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
984 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
985 | rnp->completed = rsp->gpnum; | ||
986 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
987 | } | ||
988 | rnp = rcu_get_root(rsp); | ||
989 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
990 | } | ||
991 | |||
992 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | ||
962 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | 993 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); |
963 | rsp->signaled = RCU_GP_IDLE; | 994 | rsp->signaled = RCU_GP_IDLE; |
964 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | 995 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ |