diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-01-30 20:02:47 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-02-21 12:06:07 -0500 |
commit | 2036d94a7b61ca5032ce90f2bda06afec0fe713e (patch) | |
tree | fc9f4b5ba1058e5a2fdf9ccd187766c90f5ae036 /kernel/rcutree.c | |
parent | c5fdcec927ee31fc96e92339c3a83ac6e0725289 (diff) |
rcu: Rework detection of use of RCU by offline CPUs
Because newly offlined CPUs continue executing after completing the
CPU_DYING notifiers, they legitimately enter the scheduler and use
RCU while appearing to be offline. This calls for a more sophisticated
approach as follows:
1. RCU marks the CPU online during the CPU_UP_PREPARE phase.
2. RCU marks the CPU offline during the CPU_DEAD phase.
3. Diagnostics regarding use of read-side RCU by offline CPUs use
RCU's accounting rather than the cpu_online_map. (Note that
__call_rcu() still uses cpu_online_map to detect illegal
invocations within CPU_DYING notifiers.)
4. Offline CPUs are prevented from hanging the system by
force_quiescent_state(), which pays attention to cpu_online_map.
Some additional work (in a later commit) will be needed to
guarantee that force_quiescent_state() waits a full jiffy before
assuming that a CPU is offline, for example, when called from
idle entry. (This commit also makes the one-jiffy wait
explicit, since the old-style implicit wait can now be defeated
by RCU_FAST_NO_HZ and by rcutorture.)
This approach avoids the false positives encountered when attempting to
use more exact classification of CPU online/offline state.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r-- | kernel/rcutree.c | 113 |
1 files changed, 67 insertions, 46 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 05470d4caba3..708469a06860 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -320,25 +320,18 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
320 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) | 320 | static int rcu_implicit_offline_qs(struct rcu_data *rdp) |
321 | { | 321 | { |
322 | /* | 322 | /* |
323 | * If the CPU is offline, it is in a quiescent state. We can | 323 | * If the CPU is offline for more than a jiffy, it is in a quiescent |
324 | * trust its state not to change because interrupts are disabled. | 324 | * state. We can trust its state not to change because interrupts |
325 | * are disabled. The reason for the jiffy's worth of slack is to | ||
326 | * handle CPUs initializing on the way up and finding their way | ||
327 | * to the idle loop on the way down. | ||
325 | */ | 328 | */ |
326 | if (cpu_is_offline(rdp->cpu)) { | 329 | if (cpu_is_offline(rdp->cpu) && |
330 | ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { | ||
327 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); | 331 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); |
328 | rdp->offline_fqs++; | 332 | rdp->offline_fqs++; |
329 | return 1; | 333 | return 1; |
330 | } | 334 | } |
331 | |||
332 | /* | ||
333 | * The CPU is online, so send it a reschedule IPI. This forces | ||
334 | * it through the scheduler, and (inefficiently) also handles cases | ||
335 | * where idle loops fail to inform RCU about the CPU being idle. | ||
336 | */ | ||
337 | if (rdp->cpu != smp_processor_id()) | ||
338 | smp_send_reschedule(rdp->cpu); | ||
339 | else | ||
340 | set_need_resched(); | ||
341 | rdp->resched_ipi++; | ||
342 | return 0; | 335 | return 0; |
343 | } | 336 | } |
344 | 337 | ||
@@ -601,19 +594,33 @@ EXPORT_SYMBOL(rcu_is_cpu_idle); | |||
601 | * this task being preempted, its old CPU being taken offline, resuming | 594 | * this task being preempted, its old CPU being taken offline, resuming |
602 | * on some other CPU, then determining that its old CPU is now offline. | 595 | * on some other CPU, then determining that its old CPU is now offline. |
603 | * It is OK to use RCU on an offline processor during initial boot, hence | 596 | * It is OK to use RCU on an offline processor during initial boot, hence |
604 | * the check for rcu_scheduler_fully_active. | 597 | * the check for rcu_scheduler_fully_active. Note also that it is OK |
598 | * for a CPU coming online to use RCU for one jiffy prior to marking itself | ||
599 | * online in the cpu_online_mask. Similarly, it is OK for a CPU going | ||
600 | * offline to continue to use RCU for one jiffy after marking itself | ||
601 | * offline in the cpu_online_mask. This leniency is necessary given the | ||
602 | * non-atomic nature of the online and offline processing, for example, | ||
603 | * the fact that a CPU enters the scheduler after completing the CPU_DYING | ||
604 | * notifiers. | ||
605 | * | ||
606 | * This is also why RCU internally marks CPUs online during the | ||
607 | * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. | ||
605 | * | 608 | * |
606 | * Disable checking if in an NMI handler because we cannot safely report | 609 | * Disable checking if in an NMI handler because we cannot safely report |
607 | * errors from NMI handlers anyway. | 610 | * errors from NMI handlers anyway. |
608 | */ | 611 | */ |
609 | bool rcu_lockdep_current_cpu_online(void) | 612 | bool rcu_lockdep_current_cpu_online(void) |
610 | { | 613 | { |
614 | struct rcu_data *rdp; | ||
615 | struct rcu_node *rnp; | ||
611 | bool ret; | 616 | bool ret; |
612 | 617 | ||
613 | if (in_nmi()) | 618 | if (in_nmi()) |
614 | return 1; | 619 | return 1; |
615 | preempt_disable(); | 620 | preempt_disable(); |
616 | ret = cpu_online(smp_processor_id()) || | 621 | rdp = &__get_cpu_var(rcu_sched_data); |
622 | rnp = rdp->mynode; | ||
623 | ret = (rdp->grpmask & rnp->qsmaskinit) || | ||
617 | !rcu_scheduler_fully_active; | 624 | !rcu_scheduler_fully_active; |
618 | preempt_enable(); | 625 | preempt_enable(); |
619 | return ret; | 626 | return ret; |
@@ -1308,14 +1315,12 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1308 | */ | 1315 | */ |
1309 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | 1316 | static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) |
1310 | { | 1317 | { |
1311 | unsigned long flags; | ||
1312 | int i; | 1318 | int i; |
1313 | unsigned long mask; | 1319 | unsigned long mask; |
1314 | int need_report; | ||
1315 | int receive_cpu = cpumask_any(cpu_online_mask); | 1320 | int receive_cpu = cpumask_any(cpu_online_mask); |
1316 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1321 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
1317 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | 1322 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); |
1318 | struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ | 1323 | RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ |
1319 | 1324 | ||
1320 | /* First, adjust the counts. */ | 1325 | /* First, adjust the counts. */ |
1321 | if (rdp->nxtlist != NULL) { | 1326 | if (rdp->nxtlist != NULL) { |
@@ -1381,32 +1386,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
1381 | "cpuofl"); | 1386 | "cpuofl"); |
1382 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); | 1387 | rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); |
1383 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ | 1388 | /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ |
1384 | |||
1385 | /* | ||
1386 | * Remove the dying CPU from the bitmasks in the rcu_node | ||
1387 | * hierarchy. Because we are in stop_machine() context, we | ||
1388 | * automatically exclude ->onofflock critical sections. | ||
1389 | */ | ||
1390 | do { | ||
1391 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
1392 | rnp->qsmaskinit &= ~mask; | ||
1393 | if (rnp->qsmaskinit != 0) { | ||
1394 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1395 | break; | ||
1396 | } | ||
1397 | if (rnp == rdp->mynode) { | ||
1398 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | ||
1399 | if (need_report & RCU_OFL_TASKS_NORM_GP) | ||
1400 | rcu_report_unblock_qs_rnp(rnp, flags); | ||
1401 | else | ||
1402 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1403 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
1404 | rcu_report_exp_rnp(rsp, rnp, true); | ||
1405 | } else | ||
1406 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1407 | mask = rnp->grpmask; | ||
1408 | rnp = rnp->parent; | ||
1409 | } while (rnp != NULL); | ||
1410 | } | 1389 | } |
1411 | 1390 | ||
1412 | /* | 1391 | /* |
@@ -1417,11 +1396,53 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) | |||
1417 | */ | 1396 | */ |
1418 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | 1397 | static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) |
1419 | { | 1398 | { |
1399 | unsigned long flags; | ||
1400 | unsigned long mask; | ||
1401 | int need_report = 0; | ||
1420 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 1402 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
1421 | struct rcu_node *rnp = rdp->mynode; | 1403 | struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ |
1422 | 1404 | ||
1405 | /* Adjust any no-longer-needed kthreads. */ | ||
1423 | rcu_stop_cpu_kthread(cpu); | 1406 | rcu_stop_cpu_kthread(cpu); |
1424 | rcu_node_kthread_setaffinity(rnp, -1); | 1407 | rcu_node_kthread_setaffinity(rnp, -1); |
1408 | |||
1409 | /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ | ||
1410 | |||
1411 | /* Exclude any attempts to start a new grace period. */ | ||
1412 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
1413 | |||
1414 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | ||
1415 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | ||
1416 | do { | ||
1417 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
1418 | rnp->qsmaskinit &= ~mask; | ||
1419 | if (rnp->qsmaskinit != 0) { | ||
1420 | if (rnp != rdp->mynode) | ||
1421 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1422 | break; | ||
1423 | } | ||
1424 | if (rnp == rdp->mynode) | ||
1425 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | ||
1426 | else | ||
1427 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1428 | mask = rnp->grpmask; | ||
1429 | rnp = rnp->parent; | ||
1430 | } while (rnp != NULL); | ||
1431 | |||
1432 | /* | ||
1433 | * We still hold the leaf rcu_node structure lock here, and | ||
1434 | * irqs are still disabled. The reason for this subterfuge is | ||
1435 | * because invoking rcu_report_unblock_qs_rnp() with ->onofflock | ||
1436 | * held leads to deadlock. | ||
1437 | */ | ||
1438 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
1439 | rnp = rdp->mynode; | ||
1440 | if (need_report & RCU_OFL_TASKS_NORM_GP) | ||
1441 | rcu_report_unblock_qs_rnp(rnp, flags); | ||
1442 | else | ||
1443 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1444 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
1445 | rcu_report_exp_rnp(rsp, rnp, true); | ||
1425 | } | 1446 | } |
1426 | 1447 | ||
1427 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1448 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |