aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-01-07 14:03:57 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-02-21 12:03:34 -0500
commite5601400081651060a59bd1f45f2821bb8e97f95 (patch)
tree391e7e0b08d8b642eca260950c7f052832411aa9
parentae1f18e480c83d15539fb234a2dc5f0aa04cb119 (diff)
rcu: Simplify offline processing
Move ->qsmaskinit and blkd_tasks[] manipulation to the CPU_DYING notifier. This simplifies the code by eliminating a potential deadlock and by reducing the responsibilities of force_quiescent_state(). Also rename functions to make their connection to the CPU-hotplug stages explicit. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcutree.c160
-rw-r--r--kernel/rcutree.h4
-rw-r--r--kernel/rcutree_plugin.h25
3 files changed, 90 insertions, 99 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index acf2d67ad2f4..575f91d03f06 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -943,6 +943,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
943 * in preparation for detecting the next grace period. The caller must hold 943 * in preparation for detecting the next grace period. The caller must hold
944 * the root node's ->lock, which is released before return. Hard irqs must 944 * the root node's ->lock, which is released before return. Hard irqs must
945 * be disabled. 945 * be disabled.
946 *
947 * Note that it is legal for a dying CPU (which is marked as offline) to
948 * invoke this function. This can happen when the dying CPU reports its
949 * quiescent state.
946 */ 950 */
947static void 951static void
948rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 952rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
@@ -1245,118 +1249,101 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1245 1249
1246/* 1250/*
1247 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1251 * Move a dying CPU's RCU callbacks to online CPU's callback list.
1248 * Synchronization is not required because this function executes 1252 * Also record a quiescent state for this CPU for the current grace period.
1249 * in stop_machine() context. 1253 * Synchronization and interrupt disabling are not required because
1254 * this function executes in stop_machine() context. Therefore, cleanup
1255 * operations that might block must be done later from the CPU_DEAD
1256 * notifier.
1257 *
1258 * Note that the outgoing CPU's bit has already been cleared in the
1259 * cpu_online_mask. This allows us to randomly pick a callback
1260 * destination from the bits set in that mask.
1250 */ 1261 */
1251static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1262static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1252{ 1263{
1264 unsigned long flags;
1253 int i; 1265 int i;
1254 /* current DYING CPU is cleared in the cpu_online_mask */ 1266 unsigned long mask;
1267 int need_report;
1255 int receive_cpu = cpumask_any(cpu_online_mask); 1268 int receive_cpu = cpumask_any(cpu_online_mask);
1256 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1269 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1257 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1270 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1271 struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */
1272
1273 /* Move callbacks to some other CPU. */
1274 if (rdp->nxtlist != NULL) {
1275 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1276 receive_rdp->nxttail[RCU_NEXT_TAIL] =
1277 rdp->nxttail[RCU_NEXT_TAIL];
1278 receive_rdp->qlen_lazy += rdp->qlen_lazy;
1279 receive_rdp->qlen += rdp->qlen;
1280 receive_rdp->n_cbs_adopted += rdp->qlen;
1281 rdp->n_cbs_orphaned += rdp->qlen;
1282
1283 rdp->nxtlist = NULL;
1284 for (i = 0; i < RCU_NEXT_SIZE; i++)
1285 rdp->nxttail[i] = &rdp->nxtlist;
1286 rdp->qlen_lazy = 0;
1287 rdp->qlen = 0;
1288 }
1258 1289
1259 if (rdp->nxtlist == NULL) 1290 /* Record a quiescent state for the dying CPU. */
1260 return; /* irqs disabled, so comparison is stable. */
1261
1262 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1263 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1264 receive_rdp->qlen_lazy += rdp->qlen_lazy;
1265 receive_rdp->qlen += rdp->qlen;
1266 receive_rdp->n_cbs_adopted += rdp->qlen;
1267 rdp->n_cbs_orphaned += rdp->qlen;
1268
1269 rdp->nxtlist = NULL;
1270 for (i = 0; i < RCU_NEXT_SIZE; i++)
1271 rdp->nxttail[i] = &rdp->nxtlist;
1272 rdp->qlen_lazy = 0;
1273 rdp->qlen = 0;
1274}
1275
1276/*
1277 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy
1278 * and move all callbacks from the outgoing CPU to the current one.
1279 * There can only be one CPU hotplug operation at a time, so no other
1280 * CPU can be attempting to update rcu_cpu_kthread_task.
1281 */
1282static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1283{
1284 unsigned long flags;
1285 unsigned long mask;
1286 int need_report = 0;
1287 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1288 struct rcu_node *rnp;
1289
1290 rcu_stop_cpu_kthread(cpu);
1291
1292 /* Exclude any attempts to start a new grace period. */
1293 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1294
1295 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1296 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1297 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1291 mask = rdp->grpmask; /* rnp->grplo is constant. */
1292 trace_rcu_grace_period(rsp->name,
1293 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1294 "cpuofl");
1295 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
1296 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
1297
1298 /*
1299 * Remove the dying CPU from the bitmasks in the rcu_node
1300 * hierarchy. Because we are in stop_machine() context, we
1301 * automatically exclude ->onofflock critical sections.
1302 */
1298 do { 1303 do {
1299 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1304 raw_spin_lock_irqsave(&rnp->lock, flags);
1300 rnp->qsmaskinit &= ~mask; 1305 rnp->qsmaskinit &= ~mask;
1301 if (rnp->qsmaskinit != 0) { 1306 if (rnp->qsmaskinit != 0) {
1302 if (rnp != rdp->mynode) 1307 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1303 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1304 else
1305 trace_rcu_grace_period(rsp->name,
1306 rnp->gpnum + 1 -
1307 !!(rnp->qsmask & mask),
1308 "cpuofl");
1309 break; 1308 break;
1310 } 1309 }
1311 if (rnp == rdp->mynode) { 1310 if (rnp == rdp->mynode) {
1312 trace_rcu_grace_period(rsp->name,
1313 rnp->gpnum + 1 -
1314 !!(rnp->qsmask & mask),
1315 "cpuofl");
1316 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1311 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1312 if (need_report & RCU_OFL_TASKS_NORM_GP)
1313 rcu_report_unblock_qs_rnp(rnp, flags);
1314 else
1315 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1316 if (need_report & RCU_OFL_TASKS_EXP_GP)
1317 rcu_report_exp_rnp(rsp, rnp, true);
1317 } else 1318 } else
1318 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1319 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1319 mask = rnp->grpmask; 1320 mask = rnp->grpmask;
1320 rnp = rnp->parent; 1321 rnp = rnp->parent;
1321 } while (rnp != NULL); 1322 } while (rnp != NULL);
1322
1323 /*
1324 * We still hold the leaf rcu_node structure lock here, and
1325 * irqs are still disabled. The reason for this subterfuge is
1326 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1327 * held leads to deadlock.
1328 */
1329 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1330 rnp = rdp->mynode;
1331 if (need_report & RCU_OFL_TASKS_NORM_GP)
1332 rcu_report_unblock_qs_rnp(rnp, flags);
1333 else
1334 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1335 if (need_report & RCU_OFL_TASKS_EXP_GP)
1336 rcu_report_exp_rnp(rsp, rnp, true);
1337 rcu_node_kthread_setaffinity(rnp, -1);
1338} 1323}
1339 1324
1340/* 1325/*
1341 * Remove the specified CPU from the RCU hierarchy and move any pending 1326 * The CPU has been completely removed, and some other CPU is reporting
1342 * callbacks that it might have to the current CPU. This code assumes 1327 * this fact from process context. Do the remainder of the cleanup.
1343 * that at least one CPU in the system will remain running at all times. 1328 * There can only be one CPU hotplug operation at a time, so no other
1344 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. 1329 * CPU can be attempting to update rcu_cpu_kthread_task.
1345 */ 1330 */
1346static void rcu_offline_cpu(int cpu) 1331static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1347{ 1332{
1348 __rcu_offline_cpu(cpu, &rcu_sched_state); 1333 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1349 __rcu_offline_cpu(cpu, &rcu_bh_state); 1334 struct rcu_node *rnp = rdp->mynode;
1350 rcu_preempt_offline_cpu(cpu); 1335
1336 rcu_stop_cpu_kthread(cpu);
1337 rcu_node_kthread_setaffinity(rnp, -1);
1351} 1338}
1352 1339
1353#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1340#else /* #ifdef CONFIG_HOTPLUG_CPU */
1354 1341
1355static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1342static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1356{ 1343{
1357} 1344}
1358 1345
1359static void rcu_offline_cpu(int cpu) 1346static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1360{ 1347{
1361} 1348}
1362 1349
@@ -1725,6 +1712,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1725 * a quiescent state betweentimes. 1712 * a quiescent state betweentimes.
1726 */ 1713 */
1727 local_irq_save(flags); 1714 local_irq_save(flags);
1715 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1728 rdp = this_cpu_ptr(rsp->rda); 1716 rdp = this_cpu_ptr(rsp->rda);
1729 1717
1730 /* Add the callback to our list. */ 1718 /* Add the callback to our list. */
@@ -2155,16 +2143,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2155 * touch any data without introducing corruption. We send the 2143 * touch any data without introducing corruption. We send the
2156 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2144 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2157 */ 2145 */
2158 rcu_send_cbs_to_online(&rcu_bh_state); 2146 rcu_cleanup_dying_cpu(&rcu_bh_state);
2159 rcu_send_cbs_to_online(&rcu_sched_state); 2147 rcu_cleanup_dying_cpu(&rcu_sched_state);
2160 rcu_preempt_send_cbs_to_online(); 2148 rcu_preempt_cleanup_dying_cpu();
2161 rcu_cleanup_after_idle(cpu); 2149 rcu_cleanup_after_idle(cpu);
2162 break; 2150 break;
2163 case CPU_DEAD: 2151 case CPU_DEAD:
2164 case CPU_DEAD_FROZEN: 2152 case CPU_DEAD_FROZEN:
2165 case CPU_UP_CANCELED: 2153 case CPU_UP_CANCELED:
2166 case CPU_UP_CANCELED_FROZEN: 2154 case CPU_UP_CANCELED_FROZEN:
2167 rcu_offline_cpu(cpu); 2155 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
2156 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
2157 rcu_preempt_cleanup_dead_cpu(cpu);
2168 break; 2158 break;
2169 default: 2159 default:
2170 break; 2160 break;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index af2af3cc5e65..05e03675439a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -439,8 +439,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
439static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 439static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
440 struct rcu_node *rnp, 440 struct rcu_node *rnp,
441 struct rcu_data *rdp); 441 struct rcu_data *rdp);
442static void rcu_preempt_offline_cpu(int cpu);
443#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 442#endif /* #ifdef CONFIG_HOTPLUG_CPU */
443static void rcu_preempt_cleanup_dead_cpu(int cpu);
444static void rcu_preempt_check_callbacks(int cpu); 444static void rcu_preempt_check_callbacks(int cpu);
445static void rcu_preempt_process_callbacks(void); 445static void rcu_preempt_process_callbacks(void);
446void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 446void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
@@ -451,7 +451,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
451static int rcu_preempt_pending(int cpu); 451static int rcu_preempt_pending(int cpu);
452static int rcu_preempt_needs_cpu(int cpu); 452static int rcu_preempt_needs_cpu(int cpu);
453static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 453static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
454static void rcu_preempt_send_cbs_to_online(void); 454static void rcu_preempt_cleanup_dying_cpu(void);
455static void __init __rcu_init_preempt(void); 455static void __init __rcu_init_preempt(void);
456static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 456static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
457static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 457static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 7adf232bb66b..eeb2cc6b8657 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -618,16 +618,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
618 return retval; 618 return retval;
619} 619}
620 620
621#endif /* #ifdef CONFIG_HOTPLUG_CPU */
622
621/* 623/*
622 * Do CPU-offline processing for preemptible RCU. 624 * Do CPU-offline processing for preemptible RCU.
623 */ 625 */
624static void rcu_preempt_offline_cpu(int cpu) 626static void rcu_preempt_cleanup_dead_cpu(int cpu)
625{ 627{
626 __rcu_offline_cpu(cpu, &rcu_preempt_state); 628 rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
627} 629}
628 630
629#endif /* #ifdef CONFIG_HOTPLUG_CPU */
630
631/* 631/*
632 * Check for a quiescent state from the current CPU. When a task blocks, 632 * Check for a quiescent state from the current CPU. When a task blocks,
633 * the task is recorded in the corresponding CPU's rcu_node structure, 633 * the task is recorded in the corresponding CPU's rcu_node structure,
@@ -912,11 +912,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
912} 912}
913 913
914/* 914/*
915 * Move preemptible RCU's callbacks from dying CPU to other online CPU. 915 * Move preemptible RCU's callbacks from dying CPU to other online CPU
916 * and record a quiescent state.
916 */ 917 */
917static void rcu_preempt_send_cbs_to_online(void) 918static void rcu_preempt_cleanup_dying_cpu(void)
918{ 919{
919 rcu_send_cbs_to_online(&rcu_preempt_state); 920 rcu_cleanup_dying_cpu(&rcu_preempt_state);
920} 921}
921 922
922/* 923/*
@@ -1052,16 +1053,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1052 return 0; 1053 return 0;
1053} 1054}
1054 1055
1056#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1057
1055/* 1058/*
1056 * Because preemptible RCU does not exist, it never needs CPU-offline 1059 * Because preemptible RCU does not exist, it never needs CPU-offline
1057 * processing. 1060 * processing.
1058 */ 1061 */
1059static void rcu_preempt_offline_cpu(int cpu) 1062static void rcu_preempt_cleanup_dead_cpu(int cpu)
1060{ 1063{
1061} 1064}
1062 1065
1063#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1064
1065/* 1066/*
1066 * Because preemptible RCU does not exist, it never has any callbacks 1067 * Because preemptible RCU does not exist, it never has any callbacks
1067 * to check. 1068 * to check.
@@ -1153,9 +1154,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1153} 1154}
1154 1155
1155/* 1156/*
1156 * Because there is no preemptible RCU, there are no callbacks to move. 1157 * Because there is no preemptible RCU, there is no cleanup to do.
1157 */ 1158 */
1158static void rcu_preempt_send_cbs_to_online(void) 1159static void rcu_preempt_cleanup_dying_cpu(void)
1159{ 1160{
1160} 1161}
1161 1162