aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-06-20 20:07:14 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-09-23 10:41:52 -0400
commitcabc49c1ff51baaf1958d501a7a616ce91245c93 (patch)
tree9d99237196fb45d01b38fb9235815b32fd1995d7 /kernel/rcutree.c
parent755609a9087fa983f567dc5452b2fa7b089b591f (diff)
rcu: Move RCU grace-period cleanup into kthread
As a first step towards allowing grace-period cleanup to be preemptible, this commit moves the RCU grace-period cleanup into the same kthread that is now used to initialize grace periods. This is needed to keep scheduling latency down to a dull roar. [ paulmck: Get rid of stray spin_lock_irqsave() calls. ] Reported-by: Mike Galbraith <mgalbraith@suse.de> Reported-by: Dimitri Sivanich <sivanich@sgi.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c112
1 files changed, 62 insertions, 50 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 781e5f0b7b17..52c3102dc5f7 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1032,6 +1032,7 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
1032 */ 1032 */
1033static int __noreturn rcu_gp_kthread(void *arg) 1033static int __noreturn rcu_gp_kthread(void *arg)
1034{ 1034{
1035 unsigned long gp_duration;
1035 struct rcu_data *rdp; 1036 struct rcu_data *rdp;
1036 struct rcu_node *rnp; 1037 struct rcu_node *rnp;
1037 struct rcu_state *rsp = arg; 1038 struct rcu_state *rsp = arg;
@@ -1116,6 +1117,65 @@ static int __noreturn rcu_gp_kthread(void *arg)
1116 rsp->fqs_state = RCU_SIGNAL_INIT; 1117 rsp->fqs_state = RCU_SIGNAL_INIT;
1117 raw_spin_unlock_irq(&rnp->lock); 1118 raw_spin_unlock_irq(&rnp->lock);
1118 put_online_cpus(); 1119 put_online_cpus();
1120
1121 /* Handle grace-period end. */
1122 rnp = rcu_get_root(rsp);
1123 for (;;) {
1124 wait_event_interruptible(rsp->gp_wq,
1125 !ACCESS_ONCE(rnp->qsmask) &&
1126 !rcu_preempt_blocked_readers_cgp(rnp));
1127 if (!ACCESS_ONCE(rnp->qsmask) &&
1128 !rcu_preempt_blocked_readers_cgp(rnp))
1129 break;
1130 flush_signals(current);
1131 }
1132
1133 raw_spin_lock_irq(&rnp->lock);
1134 gp_duration = jiffies - rsp->gp_start;
1135 if (gp_duration > rsp->gp_max)
1136 rsp->gp_max = gp_duration;
1137
1138 /*
1139 * We know the grace period is complete, but to everyone else
1140 * it appears to still be ongoing. But it is also the case
1141 * that to everyone else it looks like there is nothing that
1142 * they can do to advance the grace period. It is therefore
1143 * safe for us to drop the lock in order to mark the grace
1144 * period as completed in all of the rcu_node structures.
1145 *
1146 * But if this CPU needs another grace period, it will take
1147 * care of this while initializing the next grace period.
1148 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
1149 * because the callbacks have not yet been advanced: Those
1150 * callbacks are waiting on the grace period that just now
1151 * completed.
1152 */
1153 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
1154 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1155
1156 /*
1157 * Propagate new ->completed value to rcu_node
1158 * structures so that other CPUs don't have to
1159 * wait until the start of the next grace period
1160 * to process their callbacks.
1161 */
1162 rcu_for_each_node_breadth_first(rsp, rnp) {
1163 /* irqs already disabled. */
1164 raw_spin_lock(&rnp->lock);
1165 rnp->completed = rsp->gpnum;
1166 /* irqs remain disabled. */
1167 raw_spin_unlock(&rnp->lock);
1168 }
1169 rnp = rcu_get_root(rsp);
1170 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1171 }
1172
1173 rsp->completed = rsp->gpnum; /* Declare grace period done. */
1174 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
1175 rsp->fqs_state = RCU_GP_IDLE;
1176 if (cpu_needs_another_gp(rsp, rdp))
1177 rsp->gp_flags = 1;
1178 raw_spin_unlock_irq(&rnp->lock);
1119 } 1179 }
1120} 1180}
1121 1181
@@ -1162,57 +1222,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1162static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 1222static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1163 __releases(rcu_get_root(rsp)->lock) 1223 __releases(rcu_get_root(rsp)->lock)
1164{ 1224{
1165 unsigned long gp_duration;
1166 struct rcu_node *rnp = rcu_get_root(rsp);
1167 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1168
1169 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 1225 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1170 1226 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1171 /* 1227 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1172 * Ensure that all grace-period and pre-grace-period activity
1173 * is seen before the assignment to rsp->completed.
1174 */
1175 smp_mb(); /* See above block comment. */
1176 gp_duration = jiffies - rsp->gp_start;
1177 if (gp_duration > rsp->gp_max)
1178 rsp->gp_max = gp_duration;
1179
1180 /*
1181 * We know the grace period is complete, but to everyone else
1182 * it appears to still be ongoing. But it is also the case
1183 * that to everyone else it looks like there is nothing that
1184 * they can do to advance the grace period. It is therefore
1185 * safe for us to drop the lock in order to mark the grace
1186 * period as completed in all of the rcu_node structures.
1187 *
1188 * But if this CPU needs another grace period, it will take
1189 * care of this while initializing the next grace period.
1190 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
1191 * because the callbacks have not yet been advanced: Those
1192 * callbacks are waiting on the grace period that just now
1193 * completed.
1194 */
1195 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
1196 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1197
1198 /*
1199 * Propagate new ->completed value to rcu_node structures
1200 * so that other CPUs don't have to wait until the start
1201 * of the next grace period to process their callbacks.
1202 */
1203 rcu_for_each_node_breadth_first(rsp, rnp) {
1204 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1205 rnp->completed = rsp->gpnum;
1206 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1207 }
1208 rnp = rcu_get_root(rsp);
1209 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1210 }
1211
1212 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
1213 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
1214 rsp->fqs_state = RCU_GP_IDLE;
1215 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
1216} 1228}
1217 1229
1218/* 1230/*