diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-01-07 14:03:57 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-02-21 12:03:34 -0500 |
commit | e5601400081651060a59bd1f45f2821bb8e97f95 (patch) | |
tree | 391e7e0b08d8b642eca260950c7f052832411aa9 /kernel | |
parent | ae1f18e480c83d15539fb234a2dc5f0aa04cb119 (diff) |
rcu: Simplify offline processing
Move ->qsmaskinit and blkd_tasks[] manipulation to the CPU_DYING
notifier. This simplifies the code by eliminating a potential
deadlock and by reducing the responsibilities of force_quiescent_state().
Also rename functions to make their connection to the CPU-hotplug
stages explicit.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 160 | ||||
-rw-r--r-- | kernel/rcutree.h | 4 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 25 |
3 files changed, 90 insertions, 99 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index acf2d67ad2f4..575f91d03f06 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -943,6 +943,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
943 | * in preparation for detecting the next grace period. The caller must hold | 943 | * in preparation for detecting the next grace period. The caller must hold |
944 | * the root node's ->lock, which is released before return. Hard irqs must | 944 | * the root node's ->lock, which is released before return. Hard irqs must |
945 | * be disabled. | 945 | * be disabled. |
946 | * | ||
947 | * Note that it is legal for a dying CPU (which is marked as offline) to | ||
948 | * invoke this function. This can happen when the dying CPU reports its | ||
949 | * quiescent state. | ||
946 | */ | 950 | */ |
947 | static void | 951 | static void |
948 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) | 952 | rcu_start_gp(struct rcu_state *rsp, unsigned long flags) |
@@ -1245,118 +1249,101 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1245 | 1249 | ||
1246 | /* | 1250 | /* |
1247 | * Move a dying CPU's RCU callbacks to online CPU's callback list. | 1251 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
1248 | * Synchronization is not required because this function executes | 1252 | * Also record a quiescent state for this CPU for the current grace period. |
1249 | * in stop_machine() context. | 1253 | * Synchronization and interrupt disabling are not required because |
1254 | * this function executes in stop_machine() context. Therefore, cleanup | ||
1255 | * operations that might block must be done later from the CPU_DEAD | ||
1256 | * notifier. | ||
1257 | * | ||
1258 | * Note that the outgoing CPU's bit has already been cleared in the | ||
1259 | * cpu_online_mask. This allows us to randomly pick a callback | ||
1260 | * destination from the bits set in that mask. | ||
1250 | */ | 1261 | */ |
1251 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1262 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1252 | { | 1263 | { |
1264 | unsigned long flags; | ||
1253 | int i; | 1265 | int i; |
1254 | /* current DYING CPU is cleared in the cpu_online_mask */ | 1266 | unsigned long mask; |
1267 | int need_report; | ||
1255 | int receive_cpu = cpumask_any(cpu_online_mask); | 1268 | int receive_cpu = cpumask_any(cpu_online_mask); |
1256 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1269 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1257 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1270 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
1271 | struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ | ||
1272 | |||
1273 | /* Move callbacks to some other CPU. */ | ||
1274 | if (rdp->nxtlist != NULL) { | ||
1275 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | ||
1276 | receive_rdp->nxttail[RCU_NEXT_TAIL] = | ||
1277 | rdp->nxttail[RCU_NEXT_TAIL]; | ||
1278 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
1279 | receive_rdp->qlen += rdp->qlen; | ||
1280 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1281 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1282 | |||
1283 | rdp->nxtlist = NULL; | ||
1284 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1285 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1286 | rdp->qlen_lazy = 0; | ||
1287 | rdp->qlen = 0; | ||
1288 | } | ||
1258 | 1289 | ||
1259 | if (rdp->nxtlist == NULL) | 1290 | /* Record a quiescent state for the dying CPU. */ |
1260 | return; /* irqs disabled, so comparison is stable. */ | ||
1261 | |||
1262 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; | ||
1263 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; | ||
1264 | receive_rdp->qlen_lazy += rdp->qlen_lazy; | ||
1265 | receive_rdp->qlen += rdp->qlen; | ||
1266 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
1267 | rdp->n_cbs_orphaned += rdp->qlen; | ||
1268 | |||
1269 | rdp->nxtlist = NULL; | ||
1270 | for (i = 0; i < RCU_NEXT_SIZE; i++) | ||
1271 | rdp->nxttail[i] = &rdp->nxtlist; | ||
1272 | rdp->qlen_lazy = 0; | ||
1273 | rdp->qlen = 0; | ||
1274 | } | ||
1275 | |||
1276 | /* | ||
1277 | * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy | ||
1278 | * and move all callbacks from the outgoing CPU to the current one. | ||
1279 | * There can only be one CPU hotplug operation at a time, so no other | ||
1280 | * CPU can be attempting to update rcu_cpu_kthread_task. | ||
1281 | */ | ||
1282 | static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | ||
1283 | { | ||
1284 | unsigned long flags; | ||
1285 | unsigned long mask; | ||
1286 | int need_report = 0; | ||
1287 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
1288 | struct rcu_node *rnp; | ||
1289 | |||
1290 | rcu_stop_cpu_kthread(cpu); | ||
1291 | |||
1292 | /* Exclude any attempts to start a new grace period. */ | ||
1293 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
1294 | |||
1295 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | ||
1296 | rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ | ||
1297 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 1291 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
1292 | trace_rcu_grace_period(rsp->name, | ||
1293 | rnp->gpnum + 1 - !!(rnp->qsmask & mask), | ||
1294 | "cpuofl"); | ||
1295 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | ||
1296 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | ||
1297 | |||
1298 | /* | ||
1299 | * Remove the dying CPU from the bitmasks in the rcu_node | ||
1300 | * hierarchy. Because we are in stop_machine() context, we | ||
1301 | * automatically exclude ->onofflock critical sections. | ||
1302 | */ | ||
1298 | do { | 1303 | do { |
1299 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | 1304 | raw_spin_lock_irqsave(&rnp->lock, flags); |
1300 | rnp->qsmaskinit &= ~mask; | 1305 | rnp->qsmaskinit &= ~mask; |
1301 | if (rnp->qsmaskinit != 0) { | 1306 | if (rnp->qsmaskinit != 0) { |
1302 | if (rnp != rdp->mynode) | 1307 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1303 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1304 | else | ||
1305 | trace_rcu_grace_period(rsp->name, | ||
1306 | rnp->gpnum + 1 - | ||
1307 | !!(rnp->qsmask & mask), | ||
1308 | "cpuofl"); | ||
1309 | break; | 1308 | break; |
1310 | } | 1309 | } |
1311 | if (rnp == rdp->mynode) { | 1310 | if (rnp == rdp->mynode) { |
1312 | trace_rcu_grace_period(rsp->name, | ||
1313 | rnp->gpnum + 1 - | ||
1314 | !!(rnp->qsmask & mask), | ||
1315 | "cpuofl"); | ||
1316 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 1311 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); |
1312 | if (need_report & RCU_OFL_TASKS_NORM_GP) | ||
1313 | rcu_report_unblock_qs_rnp(rnp, flags); | ||
1314 | else | ||
1315 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1316 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
1317 | rcu_report_exp_rnp(rsp, rnp, true); | ||
1317 | } else | 1318 | } else |
1318 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1319 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1319 | mask = rnp->grpmask; | 1320 | mask = rnp->grpmask; |
1320 | rnp = rnp->parent; | 1321 | rnp = rnp->parent; |
1321 | } while (rnp != NULL); | 1322 | } while (rnp != NULL); |
1322 | |||
1323 | /* | ||
1324 | * We still hold the leaf rcu_node structure lock here, and | ||
1325 | * irqs are still disabled. The reason for this subterfuge is | ||
1326 | * because invoking rcu_report_unblock_qs_rnp() with ->onofflock | ||
1327 | * held leads to deadlock. | ||
1328 | */ | ||
1329 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
1330 | rnp = rdp->mynode; | ||
1331 | if (need_report & RCU_OFL_TASKS_NORM_GP) | ||
1332 | rcu_report_unblock_qs_rnp(rnp, flags); | ||
1333 | else | ||
1334 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1335 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
1336 | rcu_report_exp_rnp(rsp, rnp, true); | ||
1337 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1338 | } | 1323 | } |
1339 | 1324 | ||
1340 | /* | 1325 | /* |
1341 | * Remove the specified CPU from the RCU hierarchy and move any pending | 1326 | * The CPU has been completely removed, and some other CPU is reporting |
1342 | * callbacks that it might have to the current CPU. This code assumes | 1327 | * this fact from process context. Do the remainder of the cleanup. |
1343 | * that at least one CPU in the system will remain running at all times. | 1328 | * There can only be one CPU hotplug operation at a time, so no other |
1344 | * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. | 1329 | * CPU can be attempting to update rcu_cpu_kthread_task. |
1345 | */ | 1330 | */ |
1346 | static void rcu_offline_cpu(int cpu) | 1331 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1347 | { | 1332 | { |
1348 | __rcu_offline_cpu(cpu, &rcu_sched_state); | 1333 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1349 | __rcu_offline_cpu(cpu, &rcu_bh_state); | 1334 | struct rcu_node *rnp = rdp->mynode; |
1350 | rcu_preempt_offline_cpu(cpu); | 1335 | |
1336 | rcu_stop_cpu_kthread(cpu); | ||
1337 | rcu_node_kthread_setaffinity(rnp, -1); | ||
1351 | } | 1338 | } |
1352 | 1339 | ||
1353 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1340 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
1354 | 1341 | ||
1355 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) | 1342 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1356 | { | 1343 | { |
1357 | } | 1344 | } |
1358 | 1345 | ||
1359 | static void rcu_offline_cpu(int cpu) | 1346 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1360 | { | 1347 | { |
1361 | } | 1348 | } |
1362 | 1349 | ||
@@ -1725,6 +1712,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1725 | * a quiescent state betweentimes. | 1712 | * a quiescent state betweentimes. |
1726 | */ | 1713 | */ |
1727 | local_irq_save(flags); | 1714 | local_irq_save(flags); |
1715 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||
1728 | rdp = this_cpu_ptr(rsp->rda); | 1716 | rdp = this_cpu_ptr(rsp->rda); |
1729 | 1717 | ||
1730 | /* Add the callback to our list. */ | 1718 | /* Add the callback to our list. */ |
@@ -2155,16 +2143,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2155 | * touch any data without introducing corruption. We send the | 2143 | * touch any data without introducing corruption. We send the |
2156 | * dying CPU's callbacks to an arbitrarily chosen online CPU. | 2144 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
2157 | */ | 2145 | */ |
2158 | rcu_send_cbs_to_online(&rcu_bh_state); | 2146 | rcu_cleanup_dying_cpu(&rcu_bh_state); |
2159 | rcu_send_cbs_to_online(&rcu_sched_state); | 2147 | rcu_cleanup_dying_cpu(&rcu_sched_state); |
2160 | rcu_preempt_send_cbs_to_online(); | 2148 | rcu_preempt_cleanup_dying_cpu(); |
2161 | rcu_cleanup_after_idle(cpu); | 2149 | rcu_cleanup_after_idle(cpu); |
2162 | break; | 2150 | break; |
2163 | case CPU_DEAD: | 2151 | case CPU_DEAD: |
2164 | case CPU_DEAD_FROZEN: | 2152 | case CPU_DEAD_FROZEN: |
2165 | case CPU_UP_CANCELED: | 2153 | case CPU_UP_CANCELED: |
2166 | case CPU_UP_CANCELED_FROZEN: | 2154 | case CPU_UP_CANCELED_FROZEN: |
2167 | rcu_offline_cpu(cpu); | 2155 | rcu_cleanup_dead_cpu(cpu, &rcu_bh_state); |
2156 | rcu_cleanup_dead_cpu(cpu, &rcu_sched_state); | ||
2157 | rcu_preempt_cleanup_dead_cpu(cpu); | ||
2168 | break; | 2158 | break; |
2169 | default: | 2159 | default: |
2170 | break; | 2160 | break; |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index af2af3cc5e65..05e03675439a 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -439,8 +439,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | |||
439 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | 439 | static int rcu_preempt_offline_tasks(struct rcu_state *rsp, |
440 | struct rcu_node *rnp, | 440 | struct rcu_node *rnp, |
441 | struct rcu_data *rdp); | 441 | struct rcu_data *rdp); |
442 | static void rcu_preempt_offline_cpu(int cpu); | ||
443 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 442 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
443 | static void rcu_preempt_cleanup_dead_cpu(int cpu); | ||
444 | static void rcu_preempt_check_callbacks(int cpu); | 444 | static void rcu_preempt_check_callbacks(int cpu); |
445 | static void rcu_preempt_process_callbacks(void); | 445 | static void rcu_preempt_process_callbacks(void); |
446 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 446 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
@@ -451,7 +451,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |||
451 | static int rcu_preempt_pending(int cpu); | 451 | static int rcu_preempt_pending(int cpu); |
452 | static int rcu_preempt_needs_cpu(int cpu); | 452 | static int rcu_preempt_needs_cpu(int cpu); |
453 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 453 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
454 | static void rcu_preempt_send_cbs_to_online(void); | 454 | static void rcu_preempt_cleanup_dying_cpu(void); |
455 | static void __init __rcu_init_preempt(void); | 455 | static void __init __rcu_init_preempt(void); |
456 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 456 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
457 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 457 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7adf232bb66b..eeb2cc6b8657 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -618,16 +618,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
618 | return retval; | 618 | return retval; |
619 | } | 619 | } |
620 | 620 | ||
621 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
622 | |||
621 | /* | 623 | /* |
622 | * Do CPU-offline processing for preemptible RCU. | 624 | * Do CPU-offline processing for preemptible RCU. |
623 | */ | 625 | */ |
624 | static void rcu_preempt_offline_cpu(int cpu) | 626 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
625 | { | 627 | { |
626 | __rcu_offline_cpu(cpu, &rcu_preempt_state); | 628 | rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state); |
627 | } | 629 | } |
628 | 630 | ||
629 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
630 | |||
631 | /* | 631 | /* |
632 | * Check for a quiescent state from the current CPU. When a task blocks, | 632 | * Check for a quiescent state from the current CPU. When a task blocks, |
633 | * the task is recorded in the corresponding CPU's rcu_node structure, | 633 | * the task is recorded in the corresponding CPU's rcu_node structure, |
@@ -912,11 +912,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
912 | } | 912 | } |
913 | 913 | ||
914 | /* | 914 | /* |
915 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. | 915 | * Move preemptible RCU's callbacks from dying CPU to other online CPU |
916 | * and record a quiescent state. | ||
916 | */ | 917 | */ |
917 | static void rcu_preempt_send_cbs_to_online(void) | 918 | static void rcu_preempt_cleanup_dying_cpu(void) |
918 | { | 919 | { |
919 | rcu_send_cbs_to_online(&rcu_preempt_state); | 920 | rcu_cleanup_dying_cpu(&rcu_preempt_state); |
920 | } | 921 | } |
921 | 922 | ||
922 | /* | 923 | /* |
@@ -1052,16 +1053,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
1052 | return 0; | 1053 | return 0; |
1053 | } | 1054 | } |
1054 | 1055 | ||
1056 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1057 | |||
1055 | /* | 1058 | /* |
1056 | * Because preemptible RCU does not exist, it never needs CPU-offline | 1059 | * Because preemptible RCU does not exist, it never needs CPU-offline |
1057 | * processing. | 1060 | * processing. |
1058 | */ | 1061 | */ |
1059 | static void rcu_preempt_offline_cpu(int cpu) | 1062 | static void rcu_preempt_cleanup_dead_cpu(int cpu) |
1060 | { | 1063 | { |
1061 | } | 1064 | } |
1062 | 1065 | ||
1063 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
1064 | |||
1065 | /* | 1066 | /* |
1066 | * Because preemptible RCU does not exist, it never has any callbacks | 1067 | * Because preemptible RCU does not exist, it never has any callbacks |
1067 | * to check. | 1068 | * to check. |
@@ -1153,9 +1154,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
1153 | } | 1154 | } |
1154 | 1155 | ||
1155 | /* | 1156 | /* |
1156 | * Because there is no preemptible RCU, there are no callbacks to move. | 1157 | * Because there is no preemptible RCU, there is no cleanup to do. |
1157 | */ | 1158 | */ |
1158 | static void rcu_preempt_send_cbs_to_online(void) | 1159 | static void rcu_preempt_cleanup_dying_cpu(void) |
1159 | { | 1160 | { |
1160 | } | 1161 | } |
1161 | 1162 | ||