diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r-- | kernel/rcutree_plugin.h | 568 |
1 files changed, 447 insertions, 121 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a3638710dc67..3f6559a5f5cd 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) |
3 | * Internal non-public definitions that provide either classic | 3 | * Internal non-public definitions that provide either classic |
4 | * or preemptable semantics. | 4 | * or preemptible semantics. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -54,10 +54,6 @@ static void __init rcu_bootup_announce_oddness(void) | |||
54 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | 54 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE |
55 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); | 55 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); |
56 | #endif | 56 | #endif |
57 | #ifndef CONFIG_RCU_CPU_STALL_DETECTOR | ||
58 | printk(KERN_INFO | ||
59 | "\tRCU-based detection of stalled CPUs is disabled.\n"); | ||
60 | #endif | ||
61 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) | 57 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) |
62 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | 58 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); |
63 | #endif | 59 | #endif |
@@ -70,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void) | |||
70 | 66 | ||
71 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 67 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
72 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 68 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
69 | static struct rcu_state *rcu_state = &rcu_preempt_state; | ||
73 | 70 | ||
74 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | 71 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); |
75 | 72 | ||
@@ -78,7 +75,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp); | |||
78 | */ | 75 | */ |
79 | static void __init rcu_bootup_announce(void) | 76 | static void __init rcu_bootup_announce(void) |
80 | { | 77 | { |
81 | printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); | 78 | printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n"); |
82 | rcu_bootup_announce_oddness(); | 79 | rcu_bootup_announce_oddness(); |
83 | } | 80 | } |
84 | 81 | ||
@@ -111,7 +108,7 @@ void rcu_force_quiescent_state(void) | |||
111 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 108 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
112 | 109 | ||
113 | /* | 110 | /* |
114 | * Record a preemptable-RCU quiescent state for the specified CPU. Note | 111 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
115 | * that this just means that the task currently running on the CPU is | 112 | * that this just means that the task currently running on the CPU is |
116 | * not in a quiescent state. There might be any number of tasks blocked | 113 | * not in a quiescent state. There might be any number of tasks blocked |
117 | * while in an RCU read-side critical section. | 114 | * while in an RCU read-side critical section. |
@@ -134,12 +131,12 @@ static void rcu_preempt_qs(int cpu) | |||
134 | * We have entered the scheduler, and the current task might soon be | 131 | * We have entered the scheduler, and the current task might soon be |
135 | * context-switched away from. If this task is in an RCU read-side | 132 | * context-switched away from. If this task is in an RCU read-side |
136 | * critical section, we will no longer be able to rely on the CPU to | 133 | * critical section, we will no longer be able to rely on the CPU to |
137 | * record that fact, so we enqueue the task on the appropriate entry | 134 | * record that fact, so we enqueue the task on the blkd_tasks list. |
138 | * of the blocked_tasks[] array. The task will dequeue itself when | 135 | * The task will dequeue itself when it exits the outermost enclosing |
139 | * it exits the outermost enclosing RCU read-side critical section. | 136 | * RCU read-side critical section. Therefore, the current grace period |
140 | * Therefore, the current grace period cannot be permitted to complete | 137 | * cannot be permitted to complete until the blkd_tasks list entries |
141 | * until the blocked_tasks[] entry indexed by the low-order bit of | 138 | * predating the current grace period drain, in other words, until |
142 | * rnp->gpnum empties. | 139 | * rnp->gp_tasks becomes NULL. |
143 | * | 140 | * |
144 | * Caller must disable preemption. | 141 | * Caller must disable preemption. |
145 | */ | 142 | */ |
@@ -147,7 +144,6 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
147 | { | 144 | { |
148 | struct task_struct *t = current; | 145 | struct task_struct *t = current; |
149 | unsigned long flags; | 146 | unsigned long flags; |
150 | int phase; | ||
151 | struct rcu_data *rdp; | 147 | struct rcu_data *rdp; |
152 | struct rcu_node *rnp; | 148 | struct rcu_node *rnp; |
153 | 149 | ||
@@ -169,15 +165,30 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
169 | * (i.e., this CPU has not yet passed through a quiescent | 165 | * (i.e., this CPU has not yet passed through a quiescent |
170 | * state for the current grace period), then as long | 166 | * state for the current grace period), then as long |
171 | * as that task remains queued, the current grace period | 167 | * as that task remains queued, the current grace period |
172 | * cannot end. | 168 | * cannot end. Note that there is some uncertainty as |
169 | * to exactly when the current grace period started. | ||
170 | * We take a conservative approach, which can result | ||
171 | * in unnecessarily waiting on tasks that started very | ||
172 | * slightly after the current grace period began. C'est | ||
173 | * la vie!!! | ||
173 | * | 174 | * |
174 | * But first, note that the current CPU must still be | 175 | * But first, note that the current CPU must still be |
175 | * on line! | 176 | * on line! |
176 | */ | 177 | */ |
177 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); | 178 | WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); |
178 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); | 179 | WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); |
179 | phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; | 180 | if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { |
180 | list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); | 181 | list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); |
182 | rnp->gp_tasks = &t->rcu_node_entry; | ||
183 | #ifdef CONFIG_RCU_BOOST | ||
184 | if (rnp->boost_tasks != NULL) | ||
185 | rnp->boost_tasks = rnp->gp_tasks; | ||
186 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
187 | } else { | ||
188 | list_add(&t->rcu_node_entry, &rnp->blkd_tasks); | ||
189 | if (rnp->qsmask & rdp->grpmask) | ||
190 | rnp->gp_tasks = &t->rcu_node_entry; | ||
191 | } | ||
181 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 192 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
182 | } | 193 | } |
183 | 194 | ||
@@ -196,7 +207,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
196 | } | 207 | } |
197 | 208 | ||
198 | /* | 209 | /* |
199 | * Tree-preemptable RCU implementation for rcu_read_lock(). | 210 | * Tree-preemptible RCU implementation for rcu_read_lock(). |
200 | * Just increment ->rcu_read_lock_nesting, shared state will be updated | 211 | * Just increment ->rcu_read_lock_nesting, shared state will be updated |
201 | * if we block. | 212 | * if we block. |
202 | */ | 213 | */ |
@@ -212,12 +223,9 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
212 | * for the specified rcu_node structure. If the caller needs a reliable | 223 | * for the specified rcu_node structure. If the caller needs a reliable |
213 | * answer, it must hold the rcu_node's ->lock. | 224 | * answer, it must hold the rcu_node's ->lock. |
214 | */ | 225 | */ |
215 | static int rcu_preempted_readers(struct rcu_node *rnp) | 226 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) |
216 | { | 227 | { |
217 | int phase = rnp->gpnum & 0x1; | 228 | return rnp->gp_tasks != NULL; |
218 | |||
219 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
220 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
221 | } | 229 | } |
222 | 230 | ||
223 | /* | 231 | /* |
@@ -233,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
233 | unsigned long mask; | 241 | unsigned long mask; |
234 | struct rcu_node *rnp_p; | 242 | struct rcu_node *rnp_p; |
235 | 243 | ||
236 | if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { | 244 | if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { |
237 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 245 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
238 | return; /* Still need more quiescent states! */ | 246 | return; /* Still need more quiescent states! */ |
239 | } | 247 | } |
@@ -257,6 +265,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
257 | } | 265 | } |
258 | 266 | ||
259 | /* | 267 | /* |
268 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
269 | * returning NULL if at the end of the list. | ||
270 | */ | ||
271 | static struct list_head *rcu_next_node_entry(struct task_struct *t, | ||
272 | struct rcu_node *rnp) | ||
273 | { | ||
274 | struct list_head *np; | ||
275 | |||
276 | np = t->rcu_node_entry.next; | ||
277 | if (np == &rnp->blkd_tasks) | ||
278 | np = NULL; | ||
279 | return np; | ||
280 | } | ||
281 | |||
282 | /* | ||
260 | * Handle special cases during rcu_read_unlock(), such as needing to | 283 | * Handle special cases during rcu_read_unlock(), such as needing to |
261 | * notify RCU core processing or task having blocked during the RCU | 284 | * notify RCU core processing or task having blocked during the RCU |
262 | * read-side critical section. | 285 | * read-side critical section. |
@@ -266,6 +289,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
266 | int empty; | 289 | int empty; |
267 | int empty_exp; | 290 | int empty_exp; |
268 | unsigned long flags; | 291 | unsigned long flags; |
292 | struct list_head *np; | ||
269 | struct rcu_node *rnp; | 293 | struct rcu_node *rnp; |
270 | int special; | 294 | int special; |
271 | 295 | ||
@@ -306,10 +330,19 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
306 | break; | 330 | break; |
307 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 331 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
308 | } | 332 | } |
309 | empty = !rcu_preempted_readers(rnp); | 333 | empty = !rcu_preempt_blocked_readers_cgp(rnp); |
310 | empty_exp = !rcu_preempted_readers_exp(rnp); | 334 | empty_exp = !rcu_preempted_readers_exp(rnp); |
311 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | 335 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ |
336 | np = rcu_next_node_entry(t, rnp); | ||
312 | list_del_init(&t->rcu_node_entry); | 337 | list_del_init(&t->rcu_node_entry); |
338 | if (&t->rcu_node_entry == rnp->gp_tasks) | ||
339 | rnp->gp_tasks = np; | ||
340 | if (&t->rcu_node_entry == rnp->exp_tasks) | ||
341 | rnp->exp_tasks = np; | ||
342 | #ifdef CONFIG_RCU_BOOST | ||
343 | if (&t->rcu_node_entry == rnp->boost_tasks) | ||
344 | rnp->boost_tasks = np; | ||
345 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
313 | t->rcu_blocked_node = NULL; | 346 | t->rcu_blocked_node = NULL; |
314 | 347 | ||
315 | /* | 348 | /* |
@@ -322,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
322 | else | 355 | else |
323 | rcu_report_unblock_qs_rnp(rnp, flags); | 356 | rcu_report_unblock_qs_rnp(rnp, flags); |
324 | 357 | ||
358 | #ifdef CONFIG_RCU_BOOST | ||
359 | /* Unboost if we were boosted. */ | ||
360 | if (special & RCU_READ_UNLOCK_BOOSTED) { | ||
361 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
362 | rt_mutex_unlock(t->rcu_boost_mutex); | ||
363 | t->rcu_boost_mutex = NULL; | ||
364 | } | ||
365 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
366 | |||
325 | /* | 367 | /* |
326 | * If this was the last task on the expedited lists, | 368 | * If this was the last task on the expedited lists, |
327 | * then we need to report up the rcu_node hierarchy. | 369 | * then we need to report up the rcu_node hierarchy. |
@@ -334,7 +376,7 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
334 | } | 376 | } |
335 | 377 | ||
336 | /* | 378 | /* |
337 | * Tree-preemptable RCU implementation for rcu_read_unlock(). | 379 | * Tree-preemptible RCU implementation for rcu_read_unlock(). |
338 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost | 380 | * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost |
339 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then | 381 | * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then |
340 | * invoke rcu_read_unlock_special() to clean up after a context switch | 382 | * invoke rcu_read_unlock_special() to clean up after a context switch |
@@ -356,8 +398,6 @@ void __rcu_read_unlock(void) | |||
356 | } | 398 | } |
357 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); | 399 | EXPORT_SYMBOL_GPL(__rcu_read_unlock); |
358 | 400 | ||
359 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
360 | |||
361 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE | 401 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE |
362 | 402 | ||
363 | /* | 403 | /* |
@@ -367,18 +407,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); | |||
367 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) | 407 | static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) |
368 | { | 408 | { |
369 | unsigned long flags; | 409 | unsigned long flags; |
370 | struct list_head *lp; | ||
371 | int phase; | ||
372 | struct task_struct *t; | 410 | struct task_struct *t; |
373 | 411 | ||
374 | if (rcu_preempted_readers(rnp)) { | 412 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
375 | raw_spin_lock_irqsave(&rnp->lock, flags); | 413 | return; |
376 | phase = rnp->gpnum & 0x1; | 414 | raw_spin_lock_irqsave(&rnp->lock, flags); |
377 | lp = &rnp->blocked_tasks[phase]; | 415 | t = list_entry(rnp->gp_tasks, |
378 | list_for_each_entry(t, lp, rcu_node_entry) | 416 | struct task_struct, rcu_node_entry); |
379 | sched_show_task(t); | 417 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
380 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 418 | sched_show_task(t); |
381 | } | 419 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
382 | } | 420 | } |
383 | 421 | ||
384 | /* | 422 | /* |
@@ -408,16 +446,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
408 | */ | 446 | */ |
409 | static void rcu_print_task_stall(struct rcu_node *rnp) | 447 | static void rcu_print_task_stall(struct rcu_node *rnp) |
410 | { | 448 | { |
411 | struct list_head *lp; | ||
412 | int phase; | ||
413 | struct task_struct *t; | 449 | struct task_struct *t; |
414 | 450 | ||
415 | if (rcu_preempted_readers(rnp)) { | 451 | if (!rcu_preempt_blocked_readers_cgp(rnp)) |
416 | phase = rnp->gpnum & 0x1; | 452 | return; |
417 | lp = &rnp->blocked_tasks[phase]; | 453 | t = list_entry(rnp->gp_tasks, |
418 | list_for_each_entry(t, lp, rcu_node_entry) | 454 | struct task_struct, rcu_node_entry); |
419 | printk(" P%d", t->pid); | 455 | list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) |
420 | } | 456 | printk(" P%d", t->pid); |
421 | } | 457 | } |
422 | 458 | ||
423 | /* | 459 | /* |
@@ -430,18 +466,21 @@ static void rcu_preempt_stall_reset(void) | |||
430 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; | 466 | rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; |
431 | } | 467 | } |
432 | 468 | ||
433 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
434 | |||
435 | /* | 469 | /* |
436 | * Check that the list of blocked tasks for the newly completed grace | 470 | * Check that the list of blocked tasks for the newly completed grace |
437 | * period is in fact empty. It is a serious bug to complete a grace | 471 | * period is in fact empty. It is a serious bug to complete a grace |
438 | * period that still has RCU readers blocked! This function must be | 472 | * period that still has RCU readers blocked! This function must be |
439 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock | 473 | * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock |
440 | * must be held by the caller. | 474 | * must be held by the caller. |
475 | * | ||
476 | * Also, if there are blocked tasks on the list, they automatically | ||
477 | * block the newly created grace period, so set up ->gp_tasks accordingly. | ||
441 | */ | 478 | */ |
442 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | 479 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) |
443 | { | 480 | { |
444 | WARN_ON_ONCE(rcu_preempted_readers(rnp)); | 481 | WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); |
482 | if (!list_empty(&rnp->blkd_tasks)) | ||
483 | rnp->gp_tasks = rnp->blkd_tasks.next; | ||
445 | WARN_ON_ONCE(rnp->qsmask); | 484 | WARN_ON_ONCE(rnp->qsmask); |
446 | } | 485 | } |
447 | 486 | ||
@@ -465,50 +504,68 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
465 | struct rcu_node *rnp, | 504 | struct rcu_node *rnp, |
466 | struct rcu_data *rdp) | 505 | struct rcu_data *rdp) |
467 | { | 506 | { |
468 | int i; | ||
469 | struct list_head *lp; | 507 | struct list_head *lp; |
470 | struct list_head *lp_root; | 508 | struct list_head *lp_root; |
471 | int retval = 0; | 509 | int retval = 0; |
472 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 510 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
473 | struct task_struct *tp; | 511 | struct task_struct *t; |
474 | 512 | ||
475 | if (rnp == rnp_root) { | 513 | if (rnp == rnp_root) { |
476 | WARN_ONCE(1, "Last CPU thought to be offlined?"); | 514 | WARN_ONCE(1, "Last CPU thought to be offlined?"); |
477 | return 0; /* Shouldn't happen: at least one CPU online. */ | 515 | return 0; /* Shouldn't happen: at least one CPU online. */ |
478 | } | 516 | } |
479 | WARN_ON_ONCE(rnp != rdp->mynode && | 517 | |
480 | (!list_empty(&rnp->blocked_tasks[0]) || | 518 | /* If we are on an internal node, complain bitterly. */ |
481 | !list_empty(&rnp->blocked_tasks[1]) || | 519 | WARN_ON_ONCE(rnp != rdp->mynode); |
482 | !list_empty(&rnp->blocked_tasks[2]) || | ||
483 | !list_empty(&rnp->blocked_tasks[3]))); | ||
484 | 520 | ||
485 | /* | 521 | /* |
486 | * Move tasks up to root rcu_node. Rely on the fact that the | 522 | * Move tasks up to root rcu_node. Don't try to get fancy for |
487 | * root rcu_node can be at most one ahead of the rest of the | 523 | * this corner-case operation -- just put this node's tasks |
488 | * rcu_nodes in terms of gp_num value. This fact allows us to | 524 | * at the head of the root node's list, and update the root node's |
489 | * move the blocked_tasks[] array directly, element by element. | 525 | * ->gp_tasks and ->exp_tasks pointers to those of this node's, |
526 | * if non-NULL. This might result in waiting for more tasks than | ||
527 | * absolutely necessary, but this is a good performance/complexity | ||
528 | * tradeoff. | ||
490 | */ | 529 | */ |
491 | if (rcu_preempted_readers(rnp)) | 530 | if (rcu_preempt_blocked_readers_cgp(rnp)) |
492 | retval |= RCU_OFL_TASKS_NORM_GP; | 531 | retval |= RCU_OFL_TASKS_NORM_GP; |
493 | if (rcu_preempted_readers_exp(rnp)) | 532 | if (rcu_preempted_readers_exp(rnp)) |
494 | retval |= RCU_OFL_TASKS_EXP_GP; | 533 | retval |= RCU_OFL_TASKS_EXP_GP; |
495 | for (i = 0; i < 4; i++) { | 534 | lp = &rnp->blkd_tasks; |
496 | lp = &rnp->blocked_tasks[i]; | 535 | lp_root = &rnp_root->blkd_tasks; |
497 | lp_root = &rnp_root->blocked_tasks[i]; | 536 | while (!list_empty(lp)) { |
498 | while (!list_empty(lp)) { | 537 | t = list_entry(lp->next, typeof(*t), rcu_node_entry); |
499 | tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); | 538 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ |
500 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | 539 | list_del(&t->rcu_node_entry); |
501 | list_del(&tp->rcu_node_entry); | 540 | t->rcu_blocked_node = rnp_root; |
502 | tp->rcu_blocked_node = rnp_root; | 541 | list_add(&t->rcu_node_entry, lp_root); |
503 | list_add(&tp->rcu_node_entry, lp_root); | 542 | if (&t->rcu_node_entry == rnp->gp_tasks) |
504 | raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ | 543 | rnp_root->gp_tasks = rnp->gp_tasks; |
505 | } | 544 | if (&t->rcu_node_entry == rnp->exp_tasks) |
545 | rnp_root->exp_tasks = rnp->exp_tasks; | ||
546 | #ifdef CONFIG_RCU_BOOST | ||
547 | if (&t->rcu_node_entry == rnp->boost_tasks) | ||
548 | rnp_root->boost_tasks = rnp->boost_tasks; | ||
549 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
550 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
506 | } | 551 | } |
552 | |||
553 | #ifdef CONFIG_RCU_BOOST | ||
554 | /* In case root is being boosted and leaf is not. */ | ||
555 | raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ | ||
556 | if (rnp_root->boost_tasks != NULL && | ||
557 | rnp_root->boost_tasks != rnp_root->gp_tasks) | ||
558 | rnp_root->boost_tasks = rnp_root->gp_tasks; | ||
559 | raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ | ||
560 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
561 | |||
562 | rnp->gp_tasks = NULL; | ||
563 | rnp->exp_tasks = NULL; | ||
507 | return retval; | 564 | return retval; |
508 | } | 565 | } |
509 | 566 | ||
510 | /* | 567 | /* |
511 | * Do CPU-offline processing for preemptable RCU. | 568 | * Do CPU-offline processing for preemptible RCU. |
512 | */ | 569 | */ |
513 | static void rcu_preempt_offline_cpu(int cpu) | 570 | static void rcu_preempt_offline_cpu(int cpu) |
514 | { | 571 | { |
@@ -537,7 +594,7 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
537 | } | 594 | } |
538 | 595 | ||
539 | /* | 596 | /* |
540 | * Process callbacks for preemptable RCU. | 597 | * Process callbacks for preemptible RCU. |
541 | */ | 598 | */ |
542 | static void rcu_preempt_process_callbacks(void) | 599 | static void rcu_preempt_process_callbacks(void) |
543 | { | 600 | { |
@@ -546,7 +603,7 @@ static void rcu_preempt_process_callbacks(void) | |||
546 | } | 603 | } |
547 | 604 | ||
548 | /* | 605 | /* |
549 | * Queue a preemptable-RCU callback for invocation after a grace period. | 606 | * Queue a preemptible-RCU callback for invocation after a grace period. |
550 | */ | 607 | */ |
551 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 608 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
552 | { | 609 | { |
@@ -594,8 +651,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | |||
594 | */ | 651 | */ |
595 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | 652 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) |
596 | { | 653 | { |
597 | return !list_empty(&rnp->blocked_tasks[2]) || | 654 | return rnp->exp_tasks != NULL; |
598 | !list_empty(&rnp->blocked_tasks[3]); | ||
599 | } | 655 | } |
600 | 656 | ||
601 | /* | 657 | /* |
@@ -655,13 +711,17 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
655 | static void | 711 | static void |
656 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | 712 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) |
657 | { | 713 | { |
658 | int must_wait; | 714 | unsigned long flags; |
715 | int must_wait = 0; | ||
659 | 716 | ||
660 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 717 | raw_spin_lock_irqsave(&rnp->lock, flags); |
661 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | 718 | if (list_empty(&rnp->blkd_tasks)) |
662 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | 719 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
663 | must_wait = rcu_preempted_readers_exp(rnp); | 720 | else { |
664 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | 721 | rnp->exp_tasks = rnp->blkd_tasks.next; |
722 | rcu_initiate_boost(rnp, flags); /* releases rnp->lock */ | ||
723 | must_wait = 1; | ||
724 | } | ||
665 | if (!must_wait) | 725 | if (!must_wait) |
666 | rcu_report_exp_rnp(rsp, rnp); | 726 | rcu_report_exp_rnp(rsp, rnp); |
667 | } | 727 | } |
@@ -669,9 +729,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | |||
669 | /* | 729 | /* |
670 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 730 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
671 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 731 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
672 | * the ->blocked_tasks[] lists, move all entries from the first set of | 732 | * the ->blkd_tasks lists and wait for this list to drain. |
673 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
674 | * second set to drain. | ||
675 | */ | 733 | */ |
676 | void synchronize_rcu_expedited(void) | 734 | void synchronize_rcu_expedited(void) |
677 | { | 735 | { |
@@ -703,7 +761,7 @@ void synchronize_rcu_expedited(void) | |||
703 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | 761 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) |
704 | goto unlock_mb_ret; /* Others did our work for us. */ | 762 | goto unlock_mb_ret; /* Others did our work for us. */ |
705 | 763 | ||
706 | /* force all RCU readers onto blocked_tasks[]. */ | 764 | /* force all RCU readers onto ->blkd_tasks lists. */ |
707 | synchronize_sched_expedited(); | 765 | synchronize_sched_expedited(); |
708 | 766 | ||
709 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | 767 | raw_spin_lock_irqsave(&rsp->onofflock, flags); |
@@ -715,7 +773,7 @@ void synchronize_rcu_expedited(void) | |||
715 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 773 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
716 | } | 774 | } |
717 | 775 | ||
718 | /* Snapshot current state of ->blocked_tasks[] lists. */ | 776 | /* Snapshot current state of ->blkd_tasks lists. */ |
719 | rcu_for_each_leaf_node(rsp, rnp) | 777 | rcu_for_each_leaf_node(rsp, rnp) |
720 | sync_rcu_preempt_exp_init(rsp, rnp); | 778 | sync_rcu_preempt_exp_init(rsp, rnp); |
721 | if (NUM_RCU_NODES > 1) | 779 | if (NUM_RCU_NODES > 1) |
@@ -723,7 +781,7 @@ void synchronize_rcu_expedited(void) | |||
723 | 781 | ||
724 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | 782 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); |
725 | 783 | ||
726 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | 784 | /* Wait for snapshotted ->blkd_tasks lists to drain. */ |
727 | rnp = rcu_get_root(rsp); | 785 | rnp = rcu_get_root(rsp); |
728 | wait_event(sync_rcu_preempt_exp_wq, | 786 | wait_event(sync_rcu_preempt_exp_wq, |
729 | sync_rcu_preempt_exp_done(rnp)); | 787 | sync_rcu_preempt_exp_done(rnp)); |
@@ -739,7 +797,7 @@ mb_ret: | |||
739 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 797 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
740 | 798 | ||
741 | /* | 799 | /* |
742 | * Check to see if there is any immediate preemptable-RCU-related work | 800 | * Check to see if there is any immediate preemptible-RCU-related work |
743 | * to be done. | 801 | * to be done. |
744 | */ | 802 | */ |
745 | static int rcu_preempt_pending(int cpu) | 803 | static int rcu_preempt_pending(int cpu) |
@@ -749,7 +807,7 @@ static int rcu_preempt_pending(int cpu) | |||
749 | } | 807 | } |
750 | 808 | ||
751 | /* | 809 | /* |
752 | * Does preemptable RCU need the CPU to stay out of dynticks mode? | 810 | * Does preemptible RCU need the CPU to stay out of dynticks mode? |
753 | */ | 811 | */ |
754 | static int rcu_preempt_needs_cpu(int cpu) | 812 | static int rcu_preempt_needs_cpu(int cpu) |
755 | { | 813 | { |
@@ -766,7 +824,7 @@ void rcu_barrier(void) | |||
766 | EXPORT_SYMBOL_GPL(rcu_barrier); | 824 | EXPORT_SYMBOL_GPL(rcu_barrier); |
767 | 825 | ||
768 | /* | 826 | /* |
769 | * Initialize preemptable RCU's per-CPU data. | 827 | * Initialize preemptible RCU's per-CPU data. |
770 | */ | 828 | */ |
771 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | 829 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) |
772 | { | 830 | { |
@@ -774,7 +832,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
774 | } | 832 | } |
775 | 833 | ||
776 | /* | 834 | /* |
777 | * Move preemptable RCU's callbacks from dying CPU to other online CPU. | 835 | * Move preemptible RCU's callbacks from dying CPU to other online CPU. |
778 | */ | 836 | */ |
779 | static void rcu_preempt_send_cbs_to_online(void) | 837 | static void rcu_preempt_send_cbs_to_online(void) |
780 | { | 838 | { |
@@ -782,7 +840,7 @@ static void rcu_preempt_send_cbs_to_online(void) | |||
782 | } | 840 | } |
783 | 841 | ||
784 | /* | 842 | /* |
785 | * Initialize preemptable RCU's state structures. | 843 | * Initialize preemptible RCU's state structures. |
786 | */ | 844 | */ |
787 | static void __init __rcu_init_preempt(void) | 845 | static void __init __rcu_init_preempt(void) |
788 | { | 846 | { |
@@ -790,7 +848,7 @@ static void __init __rcu_init_preempt(void) | |||
790 | } | 848 | } |
791 | 849 | ||
792 | /* | 850 | /* |
793 | * Check for a task exiting while in a preemptable-RCU read-side | 851 | * Check for a task exiting while in a preemptible-RCU read-side |
794 | * critical section, clean up if so. No need to issue warnings, | 852 | * critical section, clean up if so. No need to issue warnings, |
795 | * as debug_check_no_locks_held() already does this if lockdep | 853 | * as debug_check_no_locks_held() already does this if lockdep |
796 | * is enabled. | 854 | * is enabled. |
@@ -802,11 +860,13 @@ void exit_rcu(void) | |||
802 | if (t->rcu_read_lock_nesting == 0) | 860 | if (t->rcu_read_lock_nesting == 0) |
803 | return; | 861 | return; |
804 | t->rcu_read_lock_nesting = 1; | 862 | t->rcu_read_lock_nesting = 1; |
805 | rcu_read_unlock(); | 863 | __rcu_read_unlock(); |
806 | } | 864 | } |
807 | 865 | ||
808 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 866 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
809 | 867 | ||
868 | static struct rcu_state *rcu_state = &rcu_sched_state; | ||
869 | |||
810 | /* | 870 | /* |
811 | * Tell them what RCU they are running. | 871 | * Tell them what RCU they are running. |
812 | */ | 872 | */ |
@@ -836,7 +896,7 @@ void rcu_force_quiescent_state(void) | |||
836 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 896 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
837 | 897 | ||
838 | /* | 898 | /* |
839 | * Because preemptable RCU does not exist, we never have to check for | 899 | * Because preemptible RCU does not exist, we never have to check for |
840 | * CPUs being in quiescent states. | 900 | * CPUs being in quiescent states. |
841 | */ | 901 | */ |
842 | static void rcu_preempt_note_context_switch(int cpu) | 902 | static void rcu_preempt_note_context_switch(int cpu) |
@@ -844,10 +904,10 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
844 | } | 904 | } |
845 | 905 | ||
846 | /* | 906 | /* |
847 | * Because preemptable RCU does not exist, there are never any preempted | 907 | * Because preemptible RCU does not exist, there are never any preempted |
848 | * RCU readers. | 908 | * RCU readers. |
849 | */ | 909 | */ |
850 | static int rcu_preempted_readers(struct rcu_node *rnp) | 910 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) |
851 | { | 911 | { |
852 | return 0; | 912 | return 0; |
853 | } | 913 | } |
@@ -862,10 +922,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
862 | 922 | ||
863 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 923 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
864 | 924 | ||
865 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
866 | |||
867 | /* | 925 | /* |
868 | * Because preemptable RCU does not exist, we never have to check for | 926 | * Because preemptible RCU does not exist, we never have to check for |
869 | * tasks blocked within RCU read-side critical sections. | 927 | * tasks blocked within RCU read-side critical sections. |
870 | */ | 928 | */ |
871 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | 929 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) |
@@ -873,7 +931,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
873 | } | 931 | } |
874 | 932 | ||
875 | /* | 933 | /* |
876 | * Because preemptable RCU does not exist, we never have to check for | 934 | * Because preemptible RCU does not exist, we never have to check for |
877 | * tasks blocked within RCU read-side critical sections. | 935 | * tasks blocked within RCU read-side critical sections. |
878 | */ | 936 | */ |
879 | static void rcu_print_task_stall(struct rcu_node *rnp) | 937 | static void rcu_print_task_stall(struct rcu_node *rnp) |
@@ -888,10 +946,8 @@ static void rcu_preempt_stall_reset(void) | |||
888 | { | 946 | { |
889 | } | 947 | } |
890 | 948 | ||
891 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
892 | |||
893 | /* | 949 | /* |
894 | * Because there is no preemptable RCU, there can be no readers blocked, | 950 | * Because there is no preemptible RCU, there can be no readers blocked, |
895 | * so there is no need to check for blocked tasks. So check only for | 951 | * so there is no need to check for blocked tasks. So check only for |
896 | * bogus qsmask values. | 952 | * bogus qsmask values. |
897 | */ | 953 | */ |
@@ -903,7 +959,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) | |||
903 | #ifdef CONFIG_HOTPLUG_CPU | 959 | #ifdef CONFIG_HOTPLUG_CPU |
904 | 960 | ||
905 | /* | 961 | /* |
906 | * Because preemptable RCU does not exist, it never needs to migrate | 962 | * Because preemptible RCU does not exist, it never needs to migrate |
907 | * tasks that were blocked within RCU read-side critical sections, and | 963 | * tasks that were blocked within RCU read-side critical sections, and |
908 | * such non-existent tasks cannot possibly have been blocking the current | 964 | * such non-existent tasks cannot possibly have been blocking the current |
909 | * grace period. | 965 | * grace period. |
@@ -916,7 +972,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
916 | } | 972 | } |
917 | 973 | ||
918 | /* | 974 | /* |
919 | * Because preemptable RCU does not exist, it never needs CPU-offline | 975 | * Because preemptible RCU does not exist, it never needs CPU-offline |
920 | * processing. | 976 | * processing. |
921 | */ | 977 | */ |
922 | static void rcu_preempt_offline_cpu(int cpu) | 978 | static void rcu_preempt_offline_cpu(int cpu) |
@@ -926,7 +982,7 @@ static void rcu_preempt_offline_cpu(int cpu) | |||
926 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 982 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
927 | 983 | ||
928 | /* | 984 | /* |
929 | * Because preemptable RCU does not exist, it never has any callbacks | 985 | * Because preemptible RCU does not exist, it never has any callbacks |
930 | * to check. | 986 | * to check. |
931 | */ | 987 | */ |
932 | static void rcu_preempt_check_callbacks(int cpu) | 988 | static void rcu_preempt_check_callbacks(int cpu) |
@@ -934,7 +990,7 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
934 | } | 990 | } |
935 | 991 | ||
936 | /* | 992 | /* |
937 | * Because preemptable RCU does not exist, it never has any callbacks | 993 | * Because preemptible RCU does not exist, it never has any callbacks |
938 | * to process. | 994 | * to process. |
939 | */ | 995 | */ |
940 | static void rcu_preempt_process_callbacks(void) | 996 | static void rcu_preempt_process_callbacks(void) |
@@ -943,7 +999,7 @@ static void rcu_preempt_process_callbacks(void) | |||
943 | 999 | ||
944 | /* | 1000 | /* |
945 | * Wait for an rcu-preempt grace period, but make it happen quickly. | 1001 | * Wait for an rcu-preempt grace period, but make it happen quickly. |
946 | * But because preemptable RCU does not exist, map to rcu-sched. | 1002 | * But because preemptible RCU does not exist, map to rcu-sched. |
947 | */ | 1003 | */ |
948 | void synchronize_rcu_expedited(void) | 1004 | void synchronize_rcu_expedited(void) |
949 | { | 1005 | { |
@@ -954,7 +1010,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | |||
954 | #ifdef CONFIG_HOTPLUG_CPU | 1010 | #ifdef CONFIG_HOTPLUG_CPU |
955 | 1011 | ||
956 | /* | 1012 | /* |
957 | * Because preemptable RCU does not exist, there is never any need to | 1013 | * Because preemptible RCU does not exist, there is never any need to |
958 | * report on tasks preempted in RCU read-side critical sections during | 1014 | * report on tasks preempted in RCU read-side critical sections during |
959 | * expedited RCU grace periods. | 1015 | * expedited RCU grace periods. |
960 | */ | 1016 | */ |
@@ -966,7 +1022,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | |||
966 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 1022 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
967 | 1023 | ||
968 | /* | 1024 | /* |
969 | * Because preemptable RCU does not exist, it never has any work to do. | 1025 | * Because preemptible RCU does not exist, it never has any work to do. |
970 | */ | 1026 | */ |
971 | static int rcu_preempt_pending(int cpu) | 1027 | static int rcu_preempt_pending(int cpu) |
972 | { | 1028 | { |
@@ -974,7 +1030,7 @@ static int rcu_preempt_pending(int cpu) | |||
974 | } | 1030 | } |
975 | 1031 | ||
976 | /* | 1032 | /* |
977 | * Because preemptable RCU does not exist, it never needs any CPU. | 1033 | * Because preemptible RCU does not exist, it never needs any CPU. |
978 | */ | 1034 | */ |
979 | static int rcu_preempt_needs_cpu(int cpu) | 1035 | static int rcu_preempt_needs_cpu(int cpu) |
980 | { | 1036 | { |
@@ -982,7 +1038,7 @@ static int rcu_preempt_needs_cpu(int cpu) | |||
982 | } | 1038 | } |
983 | 1039 | ||
984 | /* | 1040 | /* |
985 | * Because preemptable RCU does not exist, rcu_barrier() is just | 1041 | * Because preemptible RCU does not exist, rcu_barrier() is just |
986 | * another name for rcu_barrier_sched(). | 1042 | * another name for rcu_barrier_sched(). |
987 | */ | 1043 | */ |
988 | void rcu_barrier(void) | 1044 | void rcu_barrier(void) |
@@ -992,7 +1048,7 @@ void rcu_barrier(void) | |||
992 | EXPORT_SYMBOL_GPL(rcu_barrier); | 1048 | EXPORT_SYMBOL_GPL(rcu_barrier); |
993 | 1049 | ||
994 | /* | 1050 | /* |
995 | * Because preemptable RCU does not exist, there is no per-CPU | 1051 | * Because preemptible RCU does not exist, there is no per-CPU |
996 | * data to initialize. | 1052 | * data to initialize. |
997 | */ | 1053 | */ |
998 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | 1054 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu) |
@@ -1000,14 +1056,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
1000 | } | 1056 | } |
1001 | 1057 | ||
1002 | /* | 1058 | /* |
1003 | * Because there is no preemptable RCU, there are no callbacks to move. | 1059 | * Because there is no preemptible RCU, there are no callbacks to move. |
1004 | */ | 1060 | */ |
1005 | static void rcu_preempt_send_cbs_to_online(void) | 1061 | static void rcu_preempt_send_cbs_to_online(void) |
1006 | { | 1062 | { |
1007 | } | 1063 | } |
1008 | 1064 | ||
1009 | /* | 1065 | /* |
1010 | * Because preemptable RCU does not exist, it need not be initialized. | 1066 | * Because preemptible RCU does not exist, it need not be initialized. |
1011 | */ | 1067 | */ |
1012 | static void __init __rcu_init_preempt(void) | 1068 | static void __init __rcu_init_preempt(void) |
1013 | { | 1069 | { |
@@ -1015,6 +1071,276 @@ static void __init __rcu_init_preempt(void) | |||
1015 | 1071 | ||
1016 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1072 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
1017 | 1073 | ||
1074 | #ifdef CONFIG_RCU_BOOST | ||
1075 | |||
1076 | #include "rtmutex_common.h" | ||
1077 | |||
1078 | #ifdef CONFIG_RCU_TRACE | ||
1079 | |||
1080 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) | ||
1081 | { | ||
1082 | if (list_empty(&rnp->blkd_tasks)) | ||
1083 | rnp->n_balk_blkd_tasks++; | ||
1084 | else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL) | ||
1085 | rnp->n_balk_exp_gp_tasks++; | ||
1086 | else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL) | ||
1087 | rnp->n_balk_boost_tasks++; | ||
1088 | else if (rnp->gp_tasks != NULL && rnp->qsmask != 0) | ||
1089 | rnp->n_balk_notblocked++; | ||
1090 | else if (rnp->gp_tasks != NULL && | ||
1091 | ULONG_CMP_LT(jiffies, rnp->boost_time)) | ||
1092 | rnp->n_balk_notyet++; | ||
1093 | else | ||
1094 | rnp->n_balk_nos++; | ||
1095 | } | ||
1096 | |||
1097 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
1098 | |||
1099 | static void rcu_initiate_boost_trace(struct rcu_node *rnp) | ||
1100 | { | ||
1101 | } | ||
1102 | |||
1103 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
1104 | |||
1105 | /* | ||
1106 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | ||
1107 | * or ->boost_tasks, advancing the pointer to the next task in the | ||
1108 | * ->blkd_tasks list. | ||
1109 | * | ||
1110 | * Note that irqs must be enabled: boosting the task can block. | ||
1111 | * Returns 1 if there are more tasks needing to be boosted. | ||
1112 | */ | ||
1113 | static int rcu_boost(struct rcu_node *rnp) | ||
1114 | { | ||
1115 | unsigned long flags; | ||
1116 | struct rt_mutex mtx; | ||
1117 | struct task_struct *t; | ||
1118 | struct list_head *tb; | ||
1119 | |||
1120 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) | ||
1121 | return 0; /* Nothing left to boost. */ | ||
1122 | |||
1123 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
1124 | |||
1125 | /* | ||
1126 | * Recheck under the lock: all tasks in need of boosting | ||
1127 | * might exit their RCU read-side critical sections on their own. | ||
1128 | */ | ||
1129 | if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) { | ||
1130 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1131 | return 0; | ||
1132 | } | ||
1133 | |||
1134 | /* | ||
1135 | * Preferentially boost tasks blocking expedited grace periods. | ||
1136 | * This cannot starve the normal grace periods because a second | ||
1137 | * expedited grace period must boost all blocked tasks, including | ||
1138 | * those blocking the pre-existing normal grace period. | ||
1139 | */ | ||
1140 | if (rnp->exp_tasks != NULL) { | ||
1141 | tb = rnp->exp_tasks; | ||
1142 | rnp->n_exp_boosts++; | ||
1143 | } else { | ||
1144 | tb = rnp->boost_tasks; | ||
1145 | rnp->n_normal_boosts++; | ||
1146 | } | ||
1147 | rnp->n_tasks_boosted++; | ||
1148 | |||
1149 | /* | ||
1150 | * We boost task t by manufacturing an rt_mutex that appears to | ||
1151 | * be held by task t. We leave a pointer to that rt_mutex where | ||
1152 | * task t can find it, and task t will release the mutex when it | ||
1153 | * exits its outermost RCU read-side critical section. Then | ||
1154 | * simply acquiring this artificial rt_mutex will boost task | ||
1155 | * t's priority. (Thanks to tglx for suggesting this approach!) | ||
1156 | * | ||
1157 | * Note that task t must acquire rnp->lock to remove itself from | ||
1158 | * the ->blkd_tasks list, which it will do from exit() if from | ||
1159 | * nowhere else. We therefore are guaranteed that task t will | ||
1160 | * stay around at least until we drop rnp->lock. Note that | ||
1161 | * rnp->lock also resolves races between our priority boosting | ||
1162 | * and task t's exiting its outermost RCU read-side critical | ||
1163 | * section. | ||
1164 | */ | ||
1165 | t = container_of(tb, struct task_struct, rcu_node_entry); | ||
1166 | rt_mutex_init_proxy_locked(&mtx, t); | ||
1167 | t->rcu_boost_mutex = &mtx; | ||
1168 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | ||
1169 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1170 | rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ | ||
1171 | rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ | ||
1172 | |||
1173 | return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL; | ||
1174 | } | ||
1175 | |||
1176 | /* | ||
1177 | * Timer handler to initiate waking up of boost kthreads that | ||
1178 | * have yielded the CPU due to excessive numbers of tasks to | ||
1179 | * boost. We wake up the per-rcu_node kthread, which in turn | ||
1180 | * will wake up the booster kthread. | ||
1181 | */ | ||
1182 | static void rcu_boost_kthread_timer(unsigned long arg) | ||
1183 | { | ||
1184 | invoke_rcu_node_kthread((struct rcu_node *)arg); | ||
1185 | } | ||
1186 | |||
1187 | /* | ||
1188 | * Priority-boosting kthread. One per leaf rcu_node and one for the | ||
1189 | * root rcu_node. | ||
1190 | */ | ||
1191 | static int rcu_boost_kthread(void *arg) | ||
1192 | { | ||
1193 | struct rcu_node *rnp = (struct rcu_node *)arg; | ||
1194 | int spincnt = 0; | ||
1195 | int more2boost; | ||
1196 | |||
1197 | for (;;) { | ||
1198 | rnp->boost_kthread_status = RCU_KTHREAD_WAITING; | ||
1199 | wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || | ||
1200 | rnp->exp_tasks); | ||
1201 | rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; | ||
1202 | more2boost = rcu_boost(rnp); | ||
1203 | if (more2boost) | ||
1204 | spincnt++; | ||
1205 | else | ||
1206 | spincnt = 0; | ||
1207 | if (spincnt > 10) { | ||
1208 | rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); | ||
1209 | spincnt = 0; | ||
1210 | } | ||
1211 | } | ||
1212 | /* NOTREACHED */ | ||
1213 | return 0; | ||
1214 | } | ||
1215 | |||
1216 | /* | ||
1217 | * Check to see if it is time to start boosting RCU readers that are | ||
1218 | * blocking the current grace period, and, if so, tell the per-rcu_node | ||
1219 | * kthread to start boosting them. If there is an expedited grace | ||
1220 | * period in progress, it is always time to boost. | ||
1221 | * | ||
1222 | * The caller must hold rnp->lock, which this function releases, | ||
1223 | * but irqs remain disabled. The ->boost_kthread_task is immortal, | ||
1224 | * so we don't need to worry about it going away. | ||
1225 | */ | ||
1226 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | ||
1227 | { | ||
1228 | struct task_struct *t; | ||
1229 | |||
1230 | if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { | ||
1231 | rnp->n_balk_exp_gp_tasks++; | ||
1232 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1233 | return; | ||
1234 | } | ||
1235 | if (rnp->exp_tasks != NULL || | ||
1236 | (rnp->gp_tasks != NULL && | ||
1237 | rnp->boost_tasks == NULL && | ||
1238 | rnp->qsmask == 0 && | ||
1239 | ULONG_CMP_GE(jiffies, rnp->boost_time))) { | ||
1240 | if (rnp->exp_tasks == NULL) | ||
1241 | rnp->boost_tasks = rnp->gp_tasks; | ||
1242 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1243 | t = rnp->boost_kthread_task; | ||
1244 | if (t != NULL) | ||
1245 | wake_up_process(t); | ||
1246 | } else { | ||
1247 | rcu_initiate_boost_trace(rnp); | ||
1248 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1249 | } | ||
1250 | } | ||
1251 | |||
1252 | /* | ||
1253 | * Set the affinity of the boost kthread. The CPU-hotplug locks are | ||
1254 | * held, so no one should be messing with the existence of the boost | ||
1255 | * kthread. | ||
1256 | */ | ||
1257 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | ||
1258 | cpumask_var_t cm) | ||
1259 | { | ||
1260 | struct task_struct *t; | ||
1261 | |||
1262 | t = rnp->boost_kthread_task; | ||
1263 | if (t != NULL) | ||
1264 | set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); | ||
1265 | } | ||
1266 | |||
1267 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) | ||
1268 | |||
1269 | /* | ||
1270 | * Do priority-boost accounting for the start of a new grace period. | ||
1271 | */ | ||
1272 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | ||
1273 | { | ||
1274 | rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; | ||
1275 | } | ||
1276 | |||
1277 | /* | ||
1278 | * Initialize the RCU-boost waitqueue. | ||
1279 | */ | ||
1280 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1281 | { | ||
1282 | init_waitqueue_head(&rnp->boost_wq); | ||
1283 | } | ||
1284 | |||
1285 | /* | ||
1286 | * Create an RCU-boost kthread for the specified node if one does not | ||
1287 | * already exist. We only create this kthread for preemptible RCU. | ||
1288 | * Returns zero if all is well, a negated errno otherwise. | ||
1289 | */ | ||
1290 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | ||
1291 | struct rcu_node *rnp, | ||
1292 | int rnp_index) | ||
1293 | { | ||
1294 | unsigned long flags; | ||
1295 | struct sched_param sp; | ||
1296 | struct task_struct *t; | ||
1297 | |||
1298 | if (&rcu_preempt_state != rsp) | ||
1299 | return 0; | ||
1300 | if (rnp->boost_kthread_task != NULL) | ||
1301 | return 0; | ||
1302 | t = kthread_create(rcu_boost_kthread, (void *)rnp, | ||
1303 | "rcub%d", rnp_index); | ||
1304 | if (IS_ERR(t)) | ||
1305 | return PTR_ERR(t); | ||
1306 | raw_spin_lock_irqsave(&rnp->lock, flags); | ||
1307 | rnp->boost_kthread_task = t; | ||
1308 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1309 | wake_up_process(t); | ||
1310 | sp.sched_priority = RCU_KTHREAD_PRIO; | ||
1311 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||
1312 | return 0; | ||
1313 | } | ||
1314 | |||
1315 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
1316 | |||
1317 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | ||
1318 | { | ||
1319 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | ||
1320 | } | ||
1321 | |||
1322 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, | ||
1323 | cpumask_var_t cm) | ||
1324 | { | ||
1325 | } | ||
1326 | |||
1327 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) | ||
1328 | { | ||
1329 | } | ||
1330 | |||
1331 | static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) | ||
1332 | { | ||
1333 | } | ||
1334 | |||
1335 | static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | ||
1336 | struct rcu_node *rnp, | ||
1337 | int rnp_index) | ||
1338 | { | ||
1339 | return 0; | ||
1340 | } | ||
1341 | |||
1342 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
1343 | |||
1018 | #ifndef CONFIG_SMP | 1344 | #ifndef CONFIG_SMP |
1019 | 1345 | ||
1020 | void synchronize_sched_expedited(void) | 1346 | void synchronize_sched_expedited(void) |
@@ -1187,8 +1513,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | |||
1187 | * | 1513 | * |
1188 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | 1514 | * Because it is not legal to invoke rcu_process_callbacks() with irqs |
1189 | * disabled, we do one pass of force_quiescent_state(), then do a | 1515 | * disabled, we do one pass of force_quiescent_state(), then do a |
1190 | * raise_softirq() to cause rcu_process_callbacks() to be invoked later. | 1516 | * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked |
1191 | * The per-cpu rcu_dyntick_drain variable controls the sequencing. | 1517 | * later. The per-cpu rcu_dyntick_drain variable controls the sequencing. |
1192 | */ | 1518 | */ |
1193 | int rcu_needs_cpu(int cpu) | 1519 | int rcu_needs_cpu(int cpu) |
1194 | { | 1520 | { |
@@ -1239,7 +1565,7 @@ int rcu_needs_cpu(int cpu) | |||
1239 | 1565 | ||
1240 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ | 1566 | /* If RCU callbacks are still pending, RCU still needs this CPU. */ |
1241 | if (c) | 1567 | if (c) |
1242 | raise_softirq(RCU_SOFTIRQ); | 1568 | invoke_rcu_cpu_kthread(); |
1243 | return c; | 1569 | return c; |
1244 | } | 1570 | } |
1245 | 1571 | ||