diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-06-20 20:07:14 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-09-23 10:41:52 -0400 |
commit | cabc49c1ff51baaf1958d501a7a616ce91245c93 (patch) | |
tree | 9d99237196fb45d01b38fb9235815b32fd1995d7 /kernel/rcutree.c | |
parent | 755609a9087fa983f567dc5452b2fa7b089b591f (diff) |
rcu: Move RCU grace-period cleanup into kthread
As a first step towards allowing grace-period cleanup to be preemptible,
this commit moves the RCU grace-period cleanup into the same kthread
that is now used to initialize grace periods. This is needed to keep
scheduling latency down to a dull roar.
[ paulmck: Get rid of stray spin_lock_irqsave() calls. ]
Reported-by: Mike Galbraith <mgalbraith@suse.de>
Reported-by: Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 112 |
1 files changed, 62 insertions, 50 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 781e5f0b7b17..52c3102dc5f7 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -1032,6 +1032,7 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
1032 | */ | 1032 | */ |
1033 | static int __noreturn rcu_gp_kthread(void *arg) | 1033 | static int __noreturn rcu_gp_kthread(void *arg) |
1034 | { | 1034 | { |
1035 | unsigned long gp_duration; | ||
1035 | struct rcu_data *rdp; | 1036 | struct rcu_data *rdp; |
1036 | struct rcu_node *rnp; | 1037 | struct rcu_node *rnp; |
1037 | struct rcu_state *rsp = arg; | 1038 | struct rcu_state *rsp = arg; |
@@ -1116,6 +1117,65 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1116 | rsp->fqs_state = RCU_SIGNAL_INIT; | 1117 | rsp->fqs_state = RCU_SIGNAL_INIT; |
1117 | raw_spin_unlock_irq(&rnp->lock); | 1118 | raw_spin_unlock_irq(&rnp->lock); |
1118 | put_online_cpus(); | 1119 | put_online_cpus(); |
1120 | |||
1121 | /* Handle grace-period end. */ | ||
1122 | rnp = rcu_get_root(rsp); | ||
1123 | for (;;) { | ||
1124 | wait_event_interruptible(rsp->gp_wq, | ||
1125 | !ACCESS_ONCE(rnp->qsmask) && | ||
1126 | !rcu_preempt_blocked_readers_cgp(rnp)); | ||
1127 | if (!ACCESS_ONCE(rnp->qsmask) && | ||
1128 | !rcu_preempt_blocked_readers_cgp(rnp)) | ||
1129 | break; | ||
1130 | flush_signals(current); | ||
1131 | } | ||
1132 | |||
1133 | raw_spin_lock_irq(&rnp->lock); | ||
1134 | gp_duration = jiffies - rsp->gp_start; | ||
1135 | if (gp_duration > rsp->gp_max) | ||
1136 | rsp->gp_max = gp_duration; | ||
1137 | |||
1138 | /* | ||
1139 | * We know the grace period is complete, but to everyone else | ||
1140 | * it appears to still be ongoing. But it is also the case | ||
1141 | * that to everyone else it looks like there is nothing that | ||
1142 | * they can do to advance the grace period. It is therefore | ||
1143 | * safe for us to drop the lock in order to mark the grace | ||
1144 | * period as completed in all of the rcu_node structures. | ||
1145 | * | ||
1146 | * But if this CPU needs another grace period, it will take | ||
1147 | * care of this while initializing the next grace period. | ||
1148 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
1149 | * because the callbacks have not yet been advanced: Those | ||
1150 | * callbacks are waiting on the grace period that just now | ||
1151 | * completed. | ||
1152 | */ | ||
1153 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
1154 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1155 | |||
1156 | /* | ||
1157 | * Propagate new ->completed value to rcu_node | ||
1158 | * structures so that other CPUs don't have to | ||
1159 | * wait until the start of the next grace period | ||
1160 | * to process their callbacks. | ||
1161 | */ | ||
1162 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
1163 | /* irqs already disabled. */ | ||
1164 | raw_spin_lock(&rnp->lock); | ||
1165 | rnp->completed = rsp->gpnum; | ||
1166 | /* irqs remain disabled. */ | ||
1167 | raw_spin_unlock(&rnp->lock); | ||
1168 | } | ||
1169 | rnp = rcu_get_root(rsp); | ||
1170 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
1171 | } | ||
1172 | |||
1173 | rsp->completed = rsp->gpnum; /* Declare grace period done. */ | ||
1174 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||
1175 | rsp->fqs_state = RCU_GP_IDLE; | ||
1176 | if (cpu_needs_another_gp(rsp, rdp)) | ||
1177 | rsp->gp_flags = 1; | ||
1178 | raw_spin_unlock_irq(&rnp->lock); | ||
1119 | } | 1179 | } |
1120 | } | 1180 | } |
1121 | 1181 | ||
@@ -1162,57 +1222,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | |||
1162 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | 1222 | static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) |
1163 | __releases(rcu_get_root(rsp)->lock) | 1223 | __releases(rcu_get_root(rsp)->lock) |
1164 | { | 1224 | { |
1165 | unsigned long gp_duration; | ||
1166 | struct rcu_node *rnp = rcu_get_root(rsp); | ||
1167 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | ||
1168 | |||
1169 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | 1225 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); |
1170 | 1226 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | |
1171 | /* | 1227 | wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ |
1172 | * Ensure that all grace-period and pre-grace-period activity | ||
1173 | * is seen before the assignment to rsp->completed. | ||
1174 | */ | ||
1175 | smp_mb(); /* See above block comment. */ | ||
1176 | gp_duration = jiffies - rsp->gp_start; | ||
1177 | if (gp_duration > rsp->gp_max) | ||
1178 | rsp->gp_max = gp_duration; | ||
1179 | |||
1180 | /* | ||
1181 | * We know the grace period is complete, but to everyone else | ||
1182 | * it appears to still be ongoing. But it is also the case | ||
1183 | * that to everyone else it looks like there is nothing that | ||
1184 | * they can do to advance the grace period. It is therefore | ||
1185 | * safe for us to drop the lock in order to mark the grace | ||
1186 | * period as completed in all of the rcu_node structures. | ||
1187 | * | ||
1188 | * But if this CPU needs another grace period, it will take | ||
1189 | * care of this while initializing the next grace period. | ||
1190 | * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL | ||
1191 | * because the callbacks have not yet been advanced: Those | ||
1192 | * callbacks are waiting on the grace period that just now | ||
1193 | * completed. | ||
1194 | */ | ||
1195 | if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { | ||
1196 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1197 | |||
1198 | /* | ||
1199 | * Propagate new ->completed value to rcu_node structures | ||
1200 | * so that other CPUs don't have to wait until the start | ||
1201 | * of the next grace period to process their callbacks. | ||
1202 | */ | ||
1203 | rcu_for_each_node_breadth_first(rsp, rnp) { | ||
1204 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
1205 | rnp->completed = rsp->gpnum; | ||
1206 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1207 | } | ||
1208 | rnp = rcu_get_root(rsp); | ||
1209 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
1210 | } | ||
1211 | |||
1212 | rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ | ||
1213 | trace_rcu_grace_period(rsp->name, rsp->completed, "end"); | ||
1214 | rsp->fqs_state = RCU_GP_IDLE; | ||
1215 | rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ | ||
1216 | } | 1228 | } |
1217 | 1229 | ||
1218 | /* | 1230 | /* |