aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-08-27 17:58:16 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-29 09:34:39 -0400
commitdd5d19bafd90d33043a4a14b2e2d98612caa293c (patch)
treec21d547124d277f00332bdb01c75f2f4f321de8c /kernel
parent06e799764eb7c2e4640888d438c3524d756613e1 (diff)
rcu: Create rcutree plugins to handle hotplug CPU for multi-level trees
When offlining CPUs from a multi-level tree, there is the possibility of offlining the last CPU from a given node when there are preempted RCU read-side critical sections that started life on one of the CPUs on that node. In this case, the corresponding tasks will be enqueued via the task_struct's rcu_node_entry list_head onto one of the rcu_node's blocked_tasks[] lists. These tasks need to be moved somewhere else so that they will prevent the current grace period from ending. That somewhere is the root rcu_node. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: akpm@linux-foundation.org Cc: mathieu.desnoyers@polymtl.ca Cc: josht@linux.vnet.ibm.com Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org LKML-Reference: <20090827215816.GA30472@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c2
-rw-r--r--kernel/rcutree_plugin.h69
2 files changed, 66 insertions, 5 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index fee6316a8673..d903e2f2b840 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,6 +81,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
82 82
83extern long rcu_batches_completed_sched(void); 83extern long rcu_batches_completed_sched(void);
84static struct rcu_node *rcu_get_root(struct rcu_state *rsp);
84static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, 85static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
85 struct rcu_node *rnp, unsigned long flags); 86 struct rcu_node *rnp, unsigned long flags);
86static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); 87static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
@@ -876,6 +877,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
876 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 877 spin_unlock(&rnp->lock); /* irqs remain disabled. */
877 break; 878 break;
878 } 879 }
880 rcu_preempt_offline_tasks(rsp, rnp);
879 mask = rnp->grpmask; 881 mask = rnp->grpmask;
880 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 882 spin_unlock(&rnp->lock); /* irqs remain disabled. */
881 rnp = rnp->parent; 883 rnp = rnp->parent;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 201334cdc200..04343bee646d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu)
92 rnp = rdp->mynode; 92 rnp = rdp->mynode;
93 spin_lock(&rnp->lock); 93 spin_lock(&rnp->lock);
94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 94 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
95 t->rcu_blocked_cpu = cpu; 95 t->rcu_blocked_node = (void *)rnp;
96 96
97 /* 97 /*
98 * If this CPU has already checked in, then this task 98 * If this CPU has already checked in, then this task
@@ -170,12 +170,21 @@ static void rcu_read_unlock_special(struct task_struct *t)
170 if (special & RCU_READ_UNLOCK_BLOCKED) { 170 if (special & RCU_READ_UNLOCK_BLOCKED) {
171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 171 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
172 172
173 /* Remove this task from the list it blocked on. */ 173 /*
174 rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode; 174 * Remove this task from the list it blocked on. The
175 spin_lock(&rnp->lock); 175 * task can migrate while we acquire the lock, but at
176 * most one time. So at most two passes through loop.
177 */
178 for (;;) {
179 rnp = (struct rcu_node *)t->rcu_blocked_node;
180 spin_lock(&rnp->lock);
181 if (rnp == (struct rcu_node *)t->rcu_blocked_node)
182 break;
183 spin_unlock(&rnp->lock);
184 }
176 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 185 empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
177 list_del_init(&t->rcu_node_entry); 186 list_del_init(&t->rcu_node_entry);
178 t->rcu_blocked_cpu = -1; 187 t->rcu_blocked_node = NULL;
179 188
180 /* 189 /*
181 * If this was the last task on the current list, and if 190 * If this was the last task on the current list, and if
@@ -262,6 +271,47 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
262#ifdef CONFIG_HOTPLUG_CPU 271#ifdef CONFIG_HOTPLUG_CPU
263 272
264/* 273/*
274 * Handle tasklist migration for case in which all CPUs covered by the
275 * specified rcu_node have gone offline. Move them up to the root
276 * rcu_node. The reason for not just moving them to the immediate
277 * parent is to remove the need for rcu_read_unlock_special() to
278 * make more than two attempts to acquire the target rcu_node's lock.
279 *
280 * The caller must hold rnp->lock with irqs disabled.
281 */
282static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
283 struct rcu_node *rnp)
284{
285 int i;
286 struct list_head *lp;
287 struct list_head *lp_root;
288 struct rcu_node *rnp_root = rcu_get_root(rsp);
289 struct task_struct *tp;
290
291 if (rnp == rnp_root)
292 return; /* Shouldn't happen: at least one CPU online. */
293
294 /*
295 * Move tasks up to root rcu_node. Rely on the fact that the
296 * root rcu_node can be at most one ahead of the rest of the
297 * rcu_nodes in terms of gp_num value. This fact allows us to
298 * move the blocked_tasks[] array directly, element by element.
299 */
300 for (i = 0; i < 2; i++) {
301 lp = &rnp->blocked_tasks[i];
302 lp_root = &rnp_root->blocked_tasks[i];
303 while (!list_empty(lp)) {
304 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
305 spin_lock(&rnp_root->lock); /* irqs already disabled */
306 list_del(&tp->rcu_node_entry);
307 tp->rcu_blocked_node = rnp_root;
308 list_add(&tp->rcu_node_entry, lp_root);
309 spin_unlock(&rnp_root->lock); /* irqs remain disabled */
310 }
311 }
312}
313
314/*
265 * Do CPU-offline processing for preemptable RCU. 315 * Do CPU-offline processing for preemptable RCU.
266 */ 316 */
267static void rcu_preempt_offline_cpu(int cpu) 317static void rcu_preempt_offline_cpu(int cpu)
@@ -410,6 +460,15 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
410#ifdef CONFIG_HOTPLUG_CPU 460#ifdef CONFIG_HOTPLUG_CPU
411 461
412/* 462/*
463 * Because preemptable RCU does not exist, it never needs to migrate
464 * tasks that were blocked within RCU read-side critical sections.
465 */
466static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
467 struct rcu_node *rnp)
468{
469}
470
471/*
413 * Because preemptable RCU does not exist, it never needs CPU-offline 472 * Because preemptable RCU does not exist, it never needs CPU-offline
414 * processing. 473 * processing.
415 */ 474 */