diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-08-27 17:58:16 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-08-29 09:34:39 -0400 |
commit | dd5d19bafd90d33043a4a14b2e2d98612caa293c (patch) | |
tree | c21d547124d277f00332bdb01c75f2f4f321de8c /kernel | |
parent | 06e799764eb7c2e4640888d438c3524d756613e1 (diff) |
rcu: Create rcutree plugins to handle hotplug CPU for multi-level trees
When offlining CPUs from a multi-level tree, there is the
possibility of offlining the last CPU from a given node when
there are preempted RCU read-side critical sections that
started life on one of the CPUs on that node.
In this case, the corresponding tasks will be enqueued via the
task_struct's rcu_node_entry list_head onto one of the
rcu_node's blocked_tasks[] lists. These tasks need to be moved
somewhere else so that they will prevent the current grace
period from ending. That somewhere is the root rcu_node.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <20090827215816.GA30472@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 2 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 69 |
2 files changed, 66 insertions, 5 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index fee6316a8673..d903e2f2b840 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -81,6 +81,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | |||
81 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 81 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
82 | 82 | ||
83 | extern long rcu_batches_completed_sched(void); | 83 | extern long rcu_batches_completed_sched(void); |
84 | static struct rcu_node *rcu_get_root(struct rcu_state *rsp); | ||
84 | static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, | 85 | static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, |
85 | struct rcu_node *rnp, unsigned long flags); | 86 | struct rcu_node *rnp, unsigned long flags); |
86 | static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); | 87 | static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags); |
@@ -876,6 +877,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
876 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 877 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
877 | break; | 878 | break; |
878 | } | 879 | } |
880 | rcu_preempt_offline_tasks(rsp, rnp); | ||
879 | mask = rnp->grpmask; | 881 | mask = rnp->grpmask; |
880 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 882 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
881 | rnp = rnp->parent; | 883 | rnp = rnp->parent; |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 201334cdc200..04343bee646d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu) | |||
92 | rnp = rdp->mynode; | 92 | rnp = rdp->mynode; |
93 | spin_lock(&rnp->lock); | 93 | spin_lock(&rnp->lock); |
94 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 94 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
95 | t->rcu_blocked_cpu = cpu; | 95 | t->rcu_blocked_node = (void *)rnp; |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * If this CPU has already checked in, then this task | 98 | * If this CPU has already checked in, then this task |
@@ -170,12 +170,21 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
170 | if (special & RCU_READ_UNLOCK_BLOCKED) { | 170 | if (special & RCU_READ_UNLOCK_BLOCKED) { |
171 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; | 171 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; |
172 | 172 | ||
173 | /* Remove this task from the list it blocked on. */ | 173 | /* |
174 | rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode; | 174 | * Remove this task from the list it blocked on. The |
175 | spin_lock(&rnp->lock); | 175 | * task can migrate while we acquire the lock, but at |
176 | * most one time. So at most two passes through loop. | ||
177 | */ | ||
178 | for (;;) { | ||
179 | rnp = (struct rcu_node *)t->rcu_blocked_node; | ||
180 | spin_lock(&rnp->lock); | ||
181 | if (rnp == (struct rcu_node *)t->rcu_blocked_node) | ||
182 | break; | ||
183 | spin_unlock(&rnp->lock); | ||
184 | } | ||
176 | empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 185 | empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); |
177 | list_del_init(&t->rcu_node_entry); | 186 | list_del_init(&t->rcu_node_entry); |
178 | t->rcu_blocked_cpu = -1; | 187 | t->rcu_blocked_node = NULL; |
179 | 188 | ||
180 | /* | 189 | /* |
181 | * If this was the last task on the current list, and if | 190 | * If this was the last task on the current list, and if |
@@ -262,6 +271,47 @@ static int rcu_preempted_readers(struct rcu_node *rnp) | |||
262 | #ifdef CONFIG_HOTPLUG_CPU | 271 | #ifdef CONFIG_HOTPLUG_CPU |
263 | 272 | ||
264 | /* | 273 | /* |
274 | * Handle tasklist migration for case in which all CPUs covered by the | ||
275 | * specified rcu_node have gone offline. Move them up to the root | ||
276 | * rcu_node. The reason for not just moving them to the immediate | ||
277 | * parent is to remove the need for rcu_read_unlock_special() to | ||
278 | * make more than two attempts to acquire the target rcu_node's lock. | ||
279 | * | ||
280 | * The caller must hold rnp->lock with irqs disabled. | ||
281 | */ | ||
282 | static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | ||
283 | struct rcu_node *rnp) | ||
284 | { | ||
285 | int i; | ||
286 | struct list_head *lp; | ||
287 | struct list_head *lp_root; | ||
288 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
289 | struct task_struct *tp; | ||
290 | |||
291 | if (rnp == rnp_root) | ||
292 | return; /* Shouldn't happen: at least one CPU online. */ | ||
293 | |||
294 | /* | ||
295 | * Move tasks up to root rcu_node. Rely on the fact that the | ||
296 | * root rcu_node can be at most one ahead of the rest of the | ||
297 | * rcu_nodes in terms of gp_num value. This fact allows us to | ||
298 | * move the blocked_tasks[] array directly, element by element. | ||
299 | */ | ||
300 | for (i = 0; i < 2; i++) { | ||
301 | lp = &rnp->blocked_tasks[i]; | ||
302 | lp_root = &rnp_root->blocked_tasks[i]; | ||
303 | while (!list_empty(lp)) { | ||
304 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); | ||
305 | spin_lock(&rnp_root->lock); /* irqs already disabled */ | ||
306 | list_del(&tp->rcu_node_entry); | ||
307 | tp->rcu_blocked_node = rnp_root; | ||
308 | list_add(&tp->rcu_node_entry, lp_root); | ||
309 | spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | ||
310 | } | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* | ||
265 | * Do CPU-offline processing for preemptable RCU. | 315 | * Do CPU-offline processing for preemptable RCU. |
266 | */ | 316 | */ |
267 | static void rcu_preempt_offline_cpu(int cpu) | 317 | static void rcu_preempt_offline_cpu(int cpu) |
@@ -410,6 +460,15 @@ static int rcu_preempted_readers(struct rcu_node *rnp) | |||
410 | #ifdef CONFIG_HOTPLUG_CPU | 460 | #ifdef CONFIG_HOTPLUG_CPU |
411 | 461 | ||
412 | /* | 462 | /* |
463 | * Because preemptable RCU does not exist, it never needs to migrate | ||
464 | * tasks that were blocked within RCU read-side critical sections. | ||
465 | */ | ||
466 | static void rcu_preempt_offline_tasks(struct rcu_state *rsp, | ||
467 | struct rcu_node *rnp) | ||
468 | { | ||
469 | } | ||
470 | |||
471 | /* | ||
413 | * Because preemptable RCU does not exist, it never needs CPU-offline | 472 | * Because preemptable RCU does not exist, it never needs CPU-offline |
414 | * processing. | 473 | * processing. |
415 | */ | 474 | */ |