aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2014-10-31 15:56:16 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2015-01-06 14:02:44 -0500
commitd19fb8d1f3f66cc342d30aa48f090c70afb753ed (patch)
tree4d84a0e35b659907b3c651a7841613afc3ad5011
parentb6a932d1d9840727eee619d455bdeeedaa205be9 (diff)
rcu: Don't migrate blocked tasks even if all corresponding CPUs offline
When the last CPU associated with a given leaf rcu_node structure goes offline, something must be done about the tasks queued on that rcu_node structure. Each of these tasks has been preempted on one of the leaf rcu_node structure's CPUs while in an RCU read-side critical section that it have not yet exited. Handling these tasks is the job of rcu_preempt_offline_tasks(), which migrates them from the leaf rcu_node structure to the root rcu_node structure. Unfortunately, this migration has to be done one task at a time because each tasks allegiance must be shifted from the original leaf rcu_node to the root, so that future attempts to deal with these tasks will acquire the root rcu_node structure's ->lock rather than that of the leaf. Worse yet, this migration must be done with interrupts disabled, which is not so good for realtime response, especially given that there is no bound on the number of tasks on a given rcu_node structure's list. (OK, OK, there is a bound, it is just that it is unreasonably large, especially on 64-bit systems.) This was not considered a problem back when rcu_preempt_offline_tasks() was first written because realtime systems were assumed not to do CPU-hotplug operations while real-time applications were running. This assumption has proved of dubious validity given that people are starting to run multiple realtime applications on a single SMP system and that it is common practice to offline then online a CPU before starting its real-time application in order to clear extraneous processing off of that CPU. So we now need CPU hotplug operations to avoid undue latencies. This commit therefore avoids migrating these tasks, instead letting them be dequeued one by one from the original leaf rcu_node structure by rcu_read_unlock_special(). This means that the clearing of bits from the upper-level rcu_node structures must be deferred until the last such task has been dequeued, because otherwise subsequent grace periods won't wait on them. This commit has the beneficial side effect of simplifying the CPU-hotplug code for TREE_PREEMPT_RCU, especially in CONFIG_RCU_BOOST builds. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--kernel/rcu/tree.c20
-rw-r--r--kernel/rcu/tree.h18
-rw-r--r--kernel/rcu/tree_plugin.h126
3 files changed, 4 insertions, 160 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 6625a1b5d9a1..84f16cf05991 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2276,7 +2276,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2276static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2276static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2277{ 2277{
2278 unsigned long flags; 2278 unsigned long flags;
2279 int need_report = 0;
2280 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2279 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2281 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2280 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2282 2281
@@ -2295,25 +2294,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2295 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 2294 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
2296 smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */ 2295 smp_mb__after_unlock_lock(); /* Enforce GP memory-order guarantee. */
2297 rnp->qsmaskinit &= ~rdp->grpmask; 2296 rnp->qsmaskinit &= ~rdp->grpmask;
2298 if (rnp->qsmaskinit == 0) { 2297 if (rnp->qsmaskinit == 0 && !rcu_preempt_has_tasks(rnp))
2299 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
2300 rcu_cleanup_dead_rnp(rnp); 2298 rcu_cleanup_dead_rnp(rnp);
2301 }
2302
2303 /*
2304 * We still hold the leaf rcu_node structure lock here, and
2305 * irqs are still disabled. The reason for this subterfuge is
2306 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
2307 * held leads to deadlock.
2308 */
2309 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */ 2299 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
2310 rnp = rdp->mynode; 2300 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2311 if (need_report & RCU_OFL_TASKS_NORM_GP)
2312 rcu_report_unblock_qs_rnp(rnp, flags);
2313 else
2314 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2315 if (need_report & RCU_OFL_TASKS_EXP_GP)
2316 rcu_report_exp_rnp(rsp, rnp, true);
2317 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, 2301 WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL,
2318 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", 2302 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n",
2319 cpu, rdp->qlen, rdp->nxtlist); 2303 cpu, rdp->qlen, rdp->nxtlist);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 9315477b47d9..883ebc8e2b6e 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -514,13 +514,6 @@ extern struct list_head rcu_struct_flavors;
514#define for_each_rcu_flavor(rsp) \ 514#define for_each_rcu_flavor(rsp) \
515 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 515 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
516 516
517/* Return values for rcu_preempt_offline_tasks(). */
518
519#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */
520 /* GP were moved to root. */
521#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
522 /* GP were moved to root. */
523
524/* 517/*
525 * RCU implementation internal declarations: 518 * RCU implementation internal declarations:
526 */ 519 */
@@ -550,24 +543,13 @@ long rcu_batches_completed(void);
550static void rcu_preempt_note_context_switch(void); 543static void rcu_preempt_note_context_switch(void);
551static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 544static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
552#ifdef CONFIG_HOTPLUG_CPU 545#ifdef CONFIG_HOTPLUG_CPU
553static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
554 unsigned long flags);
555static bool rcu_preempt_has_tasks(struct rcu_node *rnp); 546static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
556#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 547#endif /* #ifdef CONFIG_HOTPLUG_CPU */
557static void rcu_print_detail_task_stall(struct rcu_state *rsp); 548static void rcu_print_detail_task_stall(struct rcu_state *rsp);
558static int rcu_print_task_stall(struct rcu_node *rnp); 549static int rcu_print_task_stall(struct rcu_node *rnp);
559static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 550static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
560#ifdef CONFIG_HOTPLUG_CPU
561static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
562 struct rcu_node *rnp,
563 struct rcu_data *rdp);
564#endif /* #ifdef CONFIG_HOTPLUG_CPU */
565static void rcu_preempt_check_callbacks(void); 551static void rcu_preempt_check_callbacks(void);
566void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 552void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
567#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU)
568static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
569 bool wake);
570#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */
571static void __init __rcu_init_preempt(void); 553static void __init __rcu_init_preempt(void);
572static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 554static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
573static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 555static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 8a2b84157d34..d594da48f4b4 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -103,6 +103,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
103static struct rcu_state *rcu_state_p = &rcu_preempt_state; 103static struct rcu_state *rcu_state_p = &rcu_preempt_state;
104 104
105static int rcu_preempted_readers_exp(struct rcu_node *rnp); 105static int rcu_preempted_readers_exp(struct rcu_node *rnp);
106static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
107 bool wake);
106 108
107/* 109/*
108 * Tell them what RCU they are running. 110 * Tell them what RCU they are running.
@@ -545,92 +547,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
545 547
546#ifdef CONFIG_HOTPLUG_CPU 548#ifdef CONFIG_HOTPLUG_CPU
547 549
548/*
549 * Handle tasklist migration for case in which all CPUs covered by the
550 * specified rcu_node have gone offline. Move them up to the root
551 * rcu_node. The reason for not just moving them to the immediate
552 * parent is to remove the need for rcu_read_unlock_special() to
553 * make more than two attempts to acquire the target rcu_node's lock.
554 * Returns true if there were tasks blocking the current RCU grace
555 * period.
556 *
557 * Returns 1 if there was previously a task blocking the current grace
558 * period on the specified rcu_node structure.
559 *
560 * The caller must hold rnp->lock with irqs disabled.
561 */
562static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
563 struct rcu_node *rnp,
564 struct rcu_data *rdp)
565{
566 struct list_head *lp;
567 struct list_head *lp_root;
568 int retval = 0;
569 struct rcu_node *rnp_root = rcu_get_root(rsp);
570 struct task_struct *t;
571
572 if (rnp == rnp_root) {
573 WARN_ONCE(1, "Last CPU thought to be offlined?");
574 return 0; /* Shouldn't happen: at least one CPU online. */
575 }
576
577 /* If we are on an internal node, complain bitterly. */
578 WARN_ON_ONCE(rnp != rdp->mynode);
579
580 /*
581 * Move tasks up to root rcu_node. Don't try to get fancy for
582 * this corner-case operation -- just put this node's tasks
583 * at the head of the root node's list, and update the root node's
584 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
585 * if non-NULL. This might result in waiting for more tasks than
586 * absolutely necessary, but this is a good performance/complexity
587 * tradeoff.
588 */
589 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
590 retval |= RCU_OFL_TASKS_NORM_GP;
591 if (rcu_preempted_readers_exp(rnp))
592 retval |= RCU_OFL_TASKS_EXP_GP;
593 lp = &rnp->blkd_tasks;
594 lp_root = &rnp_root->blkd_tasks;
595 while (!list_empty(lp)) {
596 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
597 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
598 smp_mb__after_unlock_lock();
599 list_del(&t->rcu_node_entry);
600 t->rcu_blocked_node = rnp_root;
601 list_add(&t->rcu_node_entry, lp_root);
602 if (&t->rcu_node_entry == rnp->gp_tasks)
603 rnp_root->gp_tasks = rnp->gp_tasks;
604 if (&t->rcu_node_entry == rnp->exp_tasks)
605 rnp_root->exp_tasks = rnp->exp_tasks;
606#ifdef CONFIG_RCU_BOOST
607 if (&t->rcu_node_entry == rnp->boost_tasks)
608 rnp_root->boost_tasks = rnp->boost_tasks;
609#endif /* #ifdef CONFIG_RCU_BOOST */
610 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
611 }
612
613 rnp->gp_tasks = NULL;
614 rnp->exp_tasks = NULL;
615#ifdef CONFIG_RCU_BOOST
616 rnp->boost_tasks = NULL;
617 /*
618 * In case root is being boosted and leaf was not. Make sure
619 * that we boost the tasks blocking the current grace period
620 * in this case.
621 */
622 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
623 smp_mb__after_unlock_lock();
624 if (rnp_root->boost_tasks != NULL &&
625 rnp_root->boost_tasks != rnp_root->gp_tasks &&
626 rnp_root->boost_tasks != rnp_root->exp_tasks)
627 rnp_root->boost_tasks = rnp_root->gp_tasks;
628 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
629#endif /* #ifdef CONFIG_RCU_BOOST */
630
631 return retval;
632}
633
634#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 550#endif /* #ifdef CONFIG_HOTPLUG_CPU */
635 551
636/* 552/*
@@ -979,13 +895,6 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
979 895
980#ifdef CONFIG_HOTPLUG_CPU 896#ifdef CONFIG_HOTPLUG_CPU
981 897
982/* Because preemptible RCU does not exist, no quieting of tasks. */
983static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
984 __releases(rnp->lock)
985{
986 raw_spin_unlock_irqrestore(&rnp->lock, flags);
987}
988
989/* 898/*
990 * Because there is no preemptible RCU, there can be no readers blocked. 899 * Because there is no preemptible RCU, there can be no readers blocked.
991 */ 900 */
@@ -1023,23 +932,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
1023 WARN_ON_ONCE(rnp->qsmask); 932 WARN_ON_ONCE(rnp->qsmask);
1024} 933}
1025 934
1026#ifdef CONFIG_HOTPLUG_CPU
1027
1028/*
1029 * Because preemptible RCU does not exist, it never needs to migrate
1030 * tasks that were blocked within RCU read-side critical sections, and
1031 * such non-existent tasks cannot possibly have been blocking the current
1032 * grace period.
1033 */
1034static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1035 struct rcu_node *rnp,
1036 struct rcu_data *rdp)
1037{
1038 return 0;
1039}
1040
1041#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1042
1043/* 935/*
1044 * Because preemptible RCU does not exist, it never has any callbacks 936 * Because preemptible RCU does not exist, it never has any callbacks
1045 * to check. 937 * to check.
@@ -1058,20 +950,6 @@ void synchronize_rcu_expedited(void)
1058} 950}
1059EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 951EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
1060 952
1061#ifdef CONFIG_HOTPLUG_CPU
1062
1063/*
1064 * Because preemptible RCU does not exist, there is never any need to
1065 * report on tasks preempted in RCU read-side critical sections during
1066 * expedited RCU grace periods.
1067 */
1068static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
1069 bool wake)
1070{
1071}
1072
1073#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1074
1075/* 953/*
1076 * Because preemptible RCU does not exist, rcu_barrier() is just 954 * Because preemptible RCU does not exist, rcu_barrier() is just
1077 * another name for rcu_barrier_sched(). 955 * another name for rcu_barrier_sched().