aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-01-30 20:02:47 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-02-21 12:06:07 -0500
commit2036d94a7b61ca5032ce90f2bda06afec0fe713e (patch)
treefc9f4b5ba1058e5a2fdf9ccd187766c90f5ae036 /kernel/rcutree.c
parentc5fdcec927ee31fc96e92339c3a83ac6e0725289 (diff)
rcu: Rework detection of use of RCU by offline CPUs
Because newly offlined CPUs continue executing after completing the CPU_DYING notifiers, they legitimately enter the scheduler and use RCU while appearing to be offline. This calls for a more sophisticated approach as follows: 1. RCU marks the CPU online during the CPU_UP_PREPARE phase. 2. RCU marks the CPU offline during the CPU_DEAD phase. 3. Diagnostics regarding use of read-side RCU by offline CPUs use RCU's accounting rather than the cpu_online_map. (Note that __call_rcu() still uses cpu_online_map to detect illegal invocations within CPU_DYING notifiers.) 4. Offline CPUs are prevented from hanging the system by force_quiescent_state(), which pays attention to cpu_online_map. Some additional work (in a later commit) will be needed to guarantee that force_quiescent_state() waits a full jiffy before assuming that a CPU is offline, for example, when called from idle entry. (This commit also makes the one-jiffy wait explicit, since the old-style implicit wait can now be defeated by RCU_FAST_NO_HZ and by rcutorture.) This approach avoids the false positives encountered when attempting to use more exact classification of CPU online/offline state. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c113
1 files changed, 67 insertions, 46 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 05470d4caba3..708469a06860 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -320,25 +320,18 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
320static int rcu_implicit_offline_qs(struct rcu_data *rdp) 320static int rcu_implicit_offline_qs(struct rcu_data *rdp)
321{ 321{
322 /* 322 /*
323 * If the CPU is offline, it is in a quiescent state. We can 323 * If the CPU is offline for more than a jiffy, it is in a quiescent
324 * trust its state not to change because interrupts are disabled. 324 * state. We can trust its state not to change because interrupts
325 * are disabled. The reason for the jiffy's worth of slack is to
326 * handle CPUs initializing on the way up and finding their way
327 * to the idle loop on the way down.
325 */ 328 */
326 if (cpu_is_offline(rdp->cpu)) { 329 if (cpu_is_offline(rdp->cpu) &&
330 ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
327 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 331 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
328 rdp->offline_fqs++; 332 rdp->offline_fqs++;
329 return 1; 333 return 1;
330 } 334 }
331
332 /*
333 * The CPU is online, so send it a reschedule IPI. This forces
334 * it through the scheduler, and (inefficiently) also handles cases
335 * where idle loops fail to inform RCU about the CPU being idle.
336 */
337 if (rdp->cpu != smp_processor_id())
338 smp_send_reschedule(rdp->cpu);
339 else
340 set_need_resched();
341 rdp->resched_ipi++;
342 return 0; 335 return 0;
343} 336}
344 337
@@ -601,19 +594,33 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
601 * this task being preempted, its old CPU being taken offline, resuming 594 * this task being preempted, its old CPU being taken offline, resuming
602 * on some other CPU, then determining that its old CPU is now offline. 595 * on some other CPU, then determining that its old CPU is now offline.
603 * It is OK to use RCU on an offline processor during initial boot, hence 596 * It is OK to use RCU on an offline processor during initial boot, hence
604 * the check for rcu_scheduler_fully_active. 597 * the check for rcu_scheduler_fully_active. Note also that it is OK
598 * for a CPU coming online to use RCU for one jiffy prior to marking itself
599 * online in the cpu_online_mask. Similarly, it is OK for a CPU going
600 * offline to continue to use RCU for one jiffy after marking itself
601 * offline in the cpu_online_mask. This leniency is necessary given the
602 * non-atomic nature of the online and offline processing, for example,
603 * the fact that a CPU enters the scheduler after completing the CPU_DYING
604 * notifiers.
605 *
606 * This is also why RCU internally marks CPUs online during the
607 * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
605 * 608 *
606 * Disable checking if in an NMI handler because we cannot safely report 609 * Disable checking if in an NMI handler because we cannot safely report
607 * errors from NMI handlers anyway. 610 * errors from NMI handlers anyway.
608 */ 611 */
609bool rcu_lockdep_current_cpu_online(void) 612bool rcu_lockdep_current_cpu_online(void)
610{ 613{
614 struct rcu_data *rdp;
615 struct rcu_node *rnp;
611 bool ret; 616 bool ret;
612 617
613 if (in_nmi()) 618 if (in_nmi())
614 return 1; 619 return 1;
615 preempt_disable(); 620 preempt_disable();
616 ret = cpu_online(smp_processor_id()) || 621 rdp = &__get_cpu_var(rcu_sched_data);
622 rnp = rdp->mynode;
623 ret = (rdp->grpmask & rnp->qsmaskinit) ||
617 !rcu_scheduler_fully_active; 624 !rcu_scheduler_fully_active;
618 preempt_enable(); 625 preempt_enable();
619 return ret; 626 return ret;
@@ -1308,14 +1315,12 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1308 */ 1315 */
1309static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 1316static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1310{ 1317{
1311 unsigned long flags;
1312 int i; 1318 int i;
1313 unsigned long mask; 1319 unsigned long mask;
1314 int need_report;
1315 int receive_cpu = cpumask_any(cpu_online_mask); 1320 int receive_cpu = cpumask_any(cpu_online_mask);
1316 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1321 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1317 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1322 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1318 struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ 1323 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
1319 1324
1320 /* First, adjust the counts. */ 1325 /* First, adjust the counts. */
1321 if (rdp->nxtlist != NULL) { 1326 if (rdp->nxtlist != NULL) {
@@ -1381,32 +1386,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1381 "cpuofl"); 1386 "cpuofl");
1382 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); 1387 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
1383 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ 1388 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
1384
1385 /*
1386 * Remove the dying CPU from the bitmasks in the rcu_node
1387 * hierarchy. Because we are in stop_machine() context, we
1388 * automatically exclude ->onofflock critical sections.
1389 */
1390 do {
1391 raw_spin_lock_irqsave(&rnp->lock, flags);
1392 rnp->qsmaskinit &= ~mask;
1393 if (rnp->qsmaskinit != 0) {
1394 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1395 break;
1396 }
1397 if (rnp == rdp->mynode) {
1398 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1399 if (need_report & RCU_OFL_TASKS_NORM_GP)
1400 rcu_report_unblock_qs_rnp(rnp, flags);
1401 else
1402 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1403 if (need_report & RCU_OFL_TASKS_EXP_GP)
1404 rcu_report_exp_rnp(rsp, rnp, true);
1405 } else
1406 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1407 mask = rnp->grpmask;
1408 rnp = rnp->parent;
1409 } while (rnp != NULL);
1410} 1389}
1411 1390
1412/* 1391/*
@@ -1417,11 +1396,53 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1417 */ 1396 */
1418static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 1397static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1419{ 1398{
1399 unsigned long flags;
1400 unsigned long mask;
1401 int need_report = 0;
1420 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1402 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1421 struct rcu_node *rnp = rdp->mynode; 1403 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */
1422 1404
1405 /* Adjust any no-longer-needed kthreads. */
1423 rcu_stop_cpu_kthread(cpu); 1406 rcu_stop_cpu_kthread(cpu);
1424 rcu_node_kthread_setaffinity(rnp, -1); 1407 rcu_node_kthread_setaffinity(rnp, -1);
1408
1409 /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
1410
1411 /* Exclude any attempts to start a new grace period. */
1412 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1413
1414 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1415 mask = rdp->grpmask; /* rnp->grplo is constant. */
1416 do {
1417 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1418 rnp->qsmaskinit &= ~mask;
1419 if (rnp->qsmaskinit != 0) {
1420 if (rnp != rdp->mynode)
1421 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1422 break;
1423 }
1424 if (rnp == rdp->mynode)
1425 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1426 else
1427 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1428 mask = rnp->grpmask;
1429 rnp = rnp->parent;
1430 } while (rnp != NULL);
1431
1432 /*
1433 * We still hold the leaf rcu_node structure lock here, and
1434 * irqs are still disabled. The reason for this subterfuge is
1435 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1436 * held leads to deadlock.
1437 */
1438 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1439 rnp = rdp->mynode;
1440 if (need_report & RCU_OFL_TASKS_NORM_GP)
1441 rcu_report_unblock_qs_rnp(rnp, flags);
1442 else
1443 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1444 if (need_report & RCU_OFL_TASKS_EXP_GP)
1445 rcu_report_exp_rnp(rsp, rnp, true);
1425} 1446}
1426 1447
1427#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1448#else /* #ifdef CONFIG_HOTPLUG_CPU */