aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h568
1 files changed, 447 insertions, 121 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a3638710dc67..3f6559a5f5cd 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version) 2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic 3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics. 4 * or preemptible semantics.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -54,10 +54,6 @@ static void __init rcu_bootup_announce_oddness(void)
54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 55 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
56#endif 56#endif
57#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
58 printk(KERN_INFO
59 "\tRCU-based detection of stalled CPUs is disabled.\n");
60#endif
61#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 57#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
62 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 58 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
63#endif 59#endif
@@ -70,6 +66,7 @@ static void __init rcu_bootup_announce_oddness(void)
70 66
71struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
72DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state;
73 70
74static int rcu_preempted_readers_exp(struct rcu_node *rnp); 71static int rcu_preempted_readers_exp(struct rcu_node *rnp);
75 72
@@ -78,7 +75,7 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
78 */ 75 */
79static void __init rcu_bootup_announce(void) 76static void __init rcu_bootup_announce(void)
80{ 77{
81 printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); 78 printk(KERN_INFO "Preemptible hierarchical RCU implementation.\n");
82 rcu_bootup_announce_oddness(); 79 rcu_bootup_announce_oddness();
83} 80}
84 81
@@ -111,7 +108,7 @@ void rcu_force_quiescent_state(void)
111EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 108EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
112 109
113/* 110/*
114 * Record a preemptable-RCU quiescent state for the specified CPU. Note 111 * Record a preemptible-RCU quiescent state for the specified CPU. Note
115 * that this just means that the task currently running on the CPU is 112 * that this just means that the task currently running on the CPU is
116 * not in a quiescent state. There might be any number of tasks blocked 113 * not in a quiescent state. There might be any number of tasks blocked
117 * while in an RCU read-side critical section. 114 * while in an RCU read-side critical section.
@@ -134,12 +131,12 @@ static void rcu_preempt_qs(int cpu)
134 * We have entered the scheduler, and the current task might soon be 131 * We have entered the scheduler, and the current task might soon be
135 * context-switched away from. If this task is in an RCU read-side 132 * context-switched away from. If this task is in an RCU read-side
136 * critical section, we will no longer be able to rely on the CPU to 133 * critical section, we will no longer be able to rely on the CPU to
137 * record that fact, so we enqueue the task on the appropriate entry 134 * record that fact, so we enqueue the task on the blkd_tasks list.
138 * of the blocked_tasks[] array. The task will dequeue itself when 135 * The task will dequeue itself when it exits the outermost enclosing
139 * it exits the outermost enclosing RCU read-side critical section. 136 * RCU read-side critical section. Therefore, the current grace period
140 * Therefore, the current grace period cannot be permitted to complete 137 * cannot be permitted to complete until the blkd_tasks list entries
141 * until the blocked_tasks[] entry indexed by the low-order bit of 138 * predating the current grace period drain, in other words, until
142 * rnp->gpnum empties. 139 * rnp->gp_tasks becomes NULL.
143 * 140 *
144 * Caller must disable preemption. 141 * Caller must disable preemption.
145 */ 142 */
@@ -147,7 +144,6 @@ static void rcu_preempt_note_context_switch(int cpu)
147{ 144{
148 struct task_struct *t = current; 145 struct task_struct *t = current;
149 unsigned long flags; 146 unsigned long flags;
150 int phase;
151 struct rcu_data *rdp; 147 struct rcu_data *rdp;
152 struct rcu_node *rnp; 148 struct rcu_node *rnp;
153 149
@@ -169,15 +165,30 @@ static void rcu_preempt_note_context_switch(int cpu)
169 * (i.e., this CPU has not yet passed through a quiescent 165 * (i.e., this CPU has not yet passed through a quiescent
170 * state for the current grace period), then as long 166 * state for the current grace period), then as long
171 * as that task remains queued, the current grace period 167 * as that task remains queued, the current grace period
172 * cannot end. 168 * cannot end. Note that there is some uncertainty as
169 * to exactly when the current grace period started.
170 * We take a conservative approach, which can result
171 * in unnecessarily waiting on tasks that started very
172 * slightly after the current grace period began. C'est
173 * la vie!!!
173 * 174 *
174 * But first, note that the current CPU must still be 175 * But first, note that the current CPU must still be
175 * on line! 176 * on line!
176 */ 177 */
177 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0); 178 WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
178 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 179 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
179 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 180 if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) {
180 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 181 list_add(&t->rcu_node_entry, rnp->gp_tasks->prev);
182 rnp->gp_tasks = &t->rcu_node_entry;
183#ifdef CONFIG_RCU_BOOST
184 if (rnp->boost_tasks != NULL)
185 rnp->boost_tasks = rnp->gp_tasks;
186#endif /* #ifdef CONFIG_RCU_BOOST */
187 } else {
188 list_add(&t->rcu_node_entry, &rnp->blkd_tasks);
189 if (rnp->qsmask & rdp->grpmask)
190 rnp->gp_tasks = &t->rcu_node_entry;
191 }
181 raw_spin_unlock_irqrestore(&rnp->lock, flags); 192 raw_spin_unlock_irqrestore(&rnp->lock, flags);
182 } 193 }
183 194
@@ -196,7 +207,7 @@ static void rcu_preempt_note_context_switch(int cpu)
196} 207}
197 208
198/* 209/*
199 * Tree-preemptable RCU implementation for rcu_read_lock(). 210 * Tree-preemptible RCU implementation for rcu_read_lock().
200 * Just increment ->rcu_read_lock_nesting, shared state will be updated 211 * Just increment ->rcu_read_lock_nesting, shared state will be updated
201 * if we block. 212 * if we block.
202 */ 213 */
@@ -212,12 +223,9 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
212 * for the specified rcu_node structure. If the caller needs a reliable 223 * for the specified rcu_node structure. If the caller needs a reliable
213 * answer, it must hold the rcu_node's ->lock. 224 * answer, it must hold the rcu_node's ->lock.
214 */ 225 */
215static int rcu_preempted_readers(struct rcu_node *rnp) 226static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
216{ 227{
217 int phase = rnp->gpnum & 0x1; 228 return rnp->gp_tasks != NULL;
218
219 return !list_empty(&rnp->blocked_tasks[phase]) ||
220 !list_empty(&rnp->blocked_tasks[phase + 2]);
221} 229}
222 230
223/* 231/*
@@ -233,7 +241,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
233 unsigned long mask; 241 unsigned long mask;
234 struct rcu_node *rnp_p; 242 struct rcu_node *rnp_p;
235 243
236 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 244 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
237 raw_spin_unlock_irqrestore(&rnp->lock, flags); 245 raw_spin_unlock_irqrestore(&rnp->lock, flags);
238 return; /* Still need more quiescent states! */ 246 return; /* Still need more quiescent states! */
239 } 247 }
@@ -257,6 +265,21 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
257} 265}
258 266
259/* 267/*
268 * Advance a ->blkd_tasks-list pointer to the next entry, instead
269 * returning NULL if at the end of the list.
270 */
271static struct list_head *rcu_next_node_entry(struct task_struct *t,
272 struct rcu_node *rnp)
273{
274 struct list_head *np;
275
276 np = t->rcu_node_entry.next;
277 if (np == &rnp->blkd_tasks)
278 np = NULL;
279 return np;
280}
281
282/*
260 * Handle special cases during rcu_read_unlock(), such as needing to 283 * Handle special cases during rcu_read_unlock(), such as needing to
261 * notify RCU core processing or task having blocked during the RCU 284 * notify RCU core processing or task having blocked during the RCU
262 * read-side critical section. 285 * read-side critical section.
@@ -266,6 +289,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
266 int empty; 289 int empty;
267 int empty_exp; 290 int empty_exp;
268 unsigned long flags; 291 unsigned long flags;
292 struct list_head *np;
269 struct rcu_node *rnp; 293 struct rcu_node *rnp;
270 int special; 294 int special;
271 295
@@ -306,10 +330,19 @@ static void rcu_read_unlock_special(struct task_struct *t)
306 break; 330 break;
307 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 331 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
308 } 332 }
309 empty = !rcu_preempted_readers(rnp); 333 empty = !rcu_preempt_blocked_readers_cgp(rnp);
310 empty_exp = !rcu_preempted_readers_exp(rnp); 334 empty_exp = !rcu_preempted_readers_exp(rnp);
311 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 335 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
336 np = rcu_next_node_entry(t, rnp);
312 list_del_init(&t->rcu_node_entry); 337 list_del_init(&t->rcu_node_entry);
338 if (&t->rcu_node_entry == rnp->gp_tasks)
339 rnp->gp_tasks = np;
340 if (&t->rcu_node_entry == rnp->exp_tasks)
341 rnp->exp_tasks = np;
342#ifdef CONFIG_RCU_BOOST
343 if (&t->rcu_node_entry == rnp->boost_tasks)
344 rnp->boost_tasks = np;
345#endif /* #ifdef CONFIG_RCU_BOOST */
313 t->rcu_blocked_node = NULL; 346 t->rcu_blocked_node = NULL;
314 347
315 /* 348 /*
@@ -322,6 +355,15 @@ static void rcu_read_unlock_special(struct task_struct *t)
322 else 355 else
323 rcu_report_unblock_qs_rnp(rnp, flags); 356 rcu_report_unblock_qs_rnp(rnp, flags);
324 357
358#ifdef CONFIG_RCU_BOOST
359 /* Unboost if we were boosted. */
360 if (special & RCU_READ_UNLOCK_BOOSTED) {
361 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED;
362 rt_mutex_unlock(t->rcu_boost_mutex);
363 t->rcu_boost_mutex = NULL;
364 }
365#endif /* #ifdef CONFIG_RCU_BOOST */
366
325 /* 367 /*
326 * If this was the last task on the expedited lists, 368 * If this was the last task on the expedited lists,
327 * then we need to report up the rcu_node hierarchy. 369 * then we need to report up the rcu_node hierarchy.
@@ -334,7 +376,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
334} 376}
335 377
336/* 378/*
337 * Tree-preemptable RCU implementation for rcu_read_unlock(). 379 * Tree-preemptible RCU implementation for rcu_read_unlock().
338 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost 380 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
339 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then 381 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
340 * invoke rcu_read_unlock_special() to clean up after a context switch 382 * invoke rcu_read_unlock_special() to clean up after a context switch
@@ -356,8 +398,6 @@ void __rcu_read_unlock(void)
356} 398}
357EXPORT_SYMBOL_GPL(__rcu_read_unlock); 399EXPORT_SYMBOL_GPL(__rcu_read_unlock);
358 400
359#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
360
361#ifdef CONFIG_RCU_CPU_STALL_VERBOSE 401#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
362 402
363/* 403/*
@@ -367,18 +407,16 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock);
367static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) 407static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
368{ 408{
369 unsigned long flags; 409 unsigned long flags;
370 struct list_head *lp;
371 int phase;
372 struct task_struct *t; 410 struct task_struct *t;
373 411
374 if (rcu_preempted_readers(rnp)) { 412 if (!rcu_preempt_blocked_readers_cgp(rnp))
375 raw_spin_lock_irqsave(&rnp->lock, flags); 413 return;
376 phase = rnp->gpnum & 0x1; 414 raw_spin_lock_irqsave(&rnp->lock, flags);
377 lp = &rnp->blocked_tasks[phase]; 415 t = list_entry(rnp->gp_tasks,
378 list_for_each_entry(t, lp, rcu_node_entry) 416 struct task_struct, rcu_node_entry);
379 sched_show_task(t); 417 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
380 raw_spin_unlock_irqrestore(&rnp->lock, flags); 418 sched_show_task(t);
381 } 419 raw_spin_unlock_irqrestore(&rnp->lock, flags);
382} 420}
383 421
384/* 422/*
@@ -408,16 +446,14 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
408 */ 446 */
409static void rcu_print_task_stall(struct rcu_node *rnp) 447static void rcu_print_task_stall(struct rcu_node *rnp)
410{ 448{
411 struct list_head *lp;
412 int phase;
413 struct task_struct *t; 449 struct task_struct *t;
414 450
415 if (rcu_preempted_readers(rnp)) { 451 if (!rcu_preempt_blocked_readers_cgp(rnp))
416 phase = rnp->gpnum & 0x1; 452 return;
417 lp = &rnp->blocked_tasks[phase]; 453 t = list_entry(rnp->gp_tasks,
418 list_for_each_entry(t, lp, rcu_node_entry) 454 struct task_struct, rcu_node_entry);
419 printk(" P%d", t->pid); 455 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
420 } 456 printk(" P%d", t->pid);
421} 457}
422 458
423/* 459/*
@@ -430,18 +466,21 @@ static void rcu_preempt_stall_reset(void)
430 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; 466 rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
431} 467}
432 468
433#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
434
435/* 469/*
436 * Check that the list of blocked tasks for the newly completed grace 470 * Check that the list of blocked tasks for the newly completed grace
437 * period is in fact empty. It is a serious bug to complete a grace 471 * period is in fact empty. It is a serious bug to complete a grace
438 * period that still has RCU readers blocked! This function must be 472 * period that still has RCU readers blocked! This function must be
439 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock 473 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
440 * must be held by the caller. 474 * must be held by the caller.
475 *
476 * Also, if there are blocked tasks on the list, they automatically
477 * block the newly created grace period, so set up ->gp_tasks accordingly.
441 */ 478 */
442static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 479static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
443{ 480{
444 WARN_ON_ONCE(rcu_preempted_readers(rnp)); 481 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
482 if (!list_empty(&rnp->blkd_tasks))
483 rnp->gp_tasks = rnp->blkd_tasks.next;
445 WARN_ON_ONCE(rnp->qsmask); 484 WARN_ON_ONCE(rnp->qsmask);
446} 485}
447 486
@@ -465,50 +504,68 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
465 struct rcu_node *rnp, 504 struct rcu_node *rnp,
466 struct rcu_data *rdp) 505 struct rcu_data *rdp)
467{ 506{
468 int i;
469 struct list_head *lp; 507 struct list_head *lp;
470 struct list_head *lp_root; 508 struct list_head *lp_root;
471 int retval = 0; 509 int retval = 0;
472 struct rcu_node *rnp_root = rcu_get_root(rsp); 510 struct rcu_node *rnp_root = rcu_get_root(rsp);
473 struct task_struct *tp; 511 struct task_struct *t;
474 512
475 if (rnp == rnp_root) { 513 if (rnp == rnp_root) {
476 WARN_ONCE(1, "Last CPU thought to be offlined?"); 514 WARN_ONCE(1, "Last CPU thought to be offlined?");
477 return 0; /* Shouldn't happen: at least one CPU online. */ 515 return 0; /* Shouldn't happen: at least one CPU online. */
478 } 516 }
479 WARN_ON_ONCE(rnp != rdp->mynode && 517
480 (!list_empty(&rnp->blocked_tasks[0]) || 518 /* If we are on an internal node, complain bitterly. */
481 !list_empty(&rnp->blocked_tasks[1]) || 519 WARN_ON_ONCE(rnp != rdp->mynode);
482 !list_empty(&rnp->blocked_tasks[2]) ||
483 !list_empty(&rnp->blocked_tasks[3])));
484 520
485 /* 521 /*
486 * Move tasks up to root rcu_node. Rely on the fact that the 522 * Move tasks up to root rcu_node. Don't try to get fancy for
487 * root rcu_node can be at most one ahead of the rest of the 523 * this corner-case operation -- just put this node's tasks
488 * rcu_nodes in terms of gp_num value. This fact allows us to 524 * at the head of the root node's list, and update the root node's
489 * move the blocked_tasks[] array directly, element by element. 525 * ->gp_tasks and ->exp_tasks pointers to those of this node's,
526 * if non-NULL. This might result in waiting for more tasks than
527 * absolutely necessary, but this is a good performance/complexity
528 * tradeoff.
490 */ 529 */
491 if (rcu_preempted_readers(rnp)) 530 if (rcu_preempt_blocked_readers_cgp(rnp))
492 retval |= RCU_OFL_TASKS_NORM_GP; 531 retval |= RCU_OFL_TASKS_NORM_GP;
493 if (rcu_preempted_readers_exp(rnp)) 532 if (rcu_preempted_readers_exp(rnp))
494 retval |= RCU_OFL_TASKS_EXP_GP; 533 retval |= RCU_OFL_TASKS_EXP_GP;
495 for (i = 0; i < 4; i++) { 534 lp = &rnp->blkd_tasks;
496 lp = &rnp->blocked_tasks[i]; 535 lp_root = &rnp_root->blkd_tasks;
497 lp_root = &rnp_root->blocked_tasks[i]; 536 while (!list_empty(lp)) {
498 while (!list_empty(lp)) { 537 t = list_entry(lp->next, typeof(*t), rcu_node_entry);
499 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 538 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
500 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ 539 list_del(&t->rcu_node_entry);
501 list_del(&tp->rcu_node_entry); 540 t->rcu_blocked_node = rnp_root;
502 tp->rcu_blocked_node = rnp_root; 541 list_add(&t->rcu_node_entry, lp_root);
503 list_add(&tp->rcu_node_entry, lp_root); 542 if (&t->rcu_node_entry == rnp->gp_tasks)
504 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 543 rnp_root->gp_tasks = rnp->gp_tasks;
505 } 544 if (&t->rcu_node_entry == rnp->exp_tasks)
545 rnp_root->exp_tasks = rnp->exp_tasks;
546#ifdef CONFIG_RCU_BOOST
547 if (&t->rcu_node_entry == rnp->boost_tasks)
548 rnp_root->boost_tasks = rnp->boost_tasks;
549#endif /* #ifdef CONFIG_RCU_BOOST */
550 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
506 } 551 }
552
553#ifdef CONFIG_RCU_BOOST
554 /* In case root is being boosted and leaf is not. */
555 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
556 if (rnp_root->boost_tasks != NULL &&
557 rnp_root->boost_tasks != rnp_root->gp_tasks)
558 rnp_root->boost_tasks = rnp_root->gp_tasks;
559 raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
560#endif /* #ifdef CONFIG_RCU_BOOST */
561
562 rnp->gp_tasks = NULL;
563 rnp->exp_tasks = NULL;
507 return retval; 564 return retval;
508} 565}
509 566
510/* 567/*
511 * Do CPU-offline processing for preemptable RCU. 568 * Do CPU-offline processing for preemptible RCU.
512 */ 569 */
513static void rcu_preempt_offline_cpu(int cpu) 570static void rcu_preempt_offline_cpu(int cpu)
514{ 571{
@@ -537,7 +594,7 @@ static void rcu_preempt_check_callbacks(int cpu)
537} 594}
538 595
539/* 596/*
540 * Process callbacks for preemptable RCU. 597 * Process callbacks for preemptible RCU.
541 */ 598 */
542static void rcu_preempt_process_callbacks(void) 599static void rcu_preempt_process_callbacks(void)
543{ 600{
@@ -546,7 +603,7 @@ static void rcu_preempt_process_callbacks(void)
546} 603}
547 604
548/* 605/*
549 * Queue a preemptable-RCU callback for invocation after a grace period. 606 * Queue a preemptible-RCU callback for invocation after a grace period.
550 */ 607 */
551void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 608void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
552{ 609{
@@ -594,8 +651,7 @@ static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
594 */ 651 */
595static int rcu_preempted_readers_exp(struct rcu_node *rnp) 652static int rcu_preempted_readers_exp(struct rcu_node *rnp)
596{ 653{
597 return !list_empty(&rnp->blocked_tasks[2]) || 654 return rnp->exp_tasks != NULL;
598 !list_empty(&rnp->blocked_tasks[3]);
599} 655}
600 656
601/* 657/*
@@ -655,13 +711,17 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
655static void 711static void
656sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) 712sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
657{ 713{
658 int must_wait; 714 unsigned long flags;
715 int must_wait = 0;
659 716
660 raw_spin_lock(&rnp->lock); /* irqs already disabled */ 717 raw_spin_lock_irqsave(&rnp->lock, flags);
661 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); 718 if (list_empty(&rnp->blkd_tasks))
662 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); 719 raw_spin_unlock_irqrestore(&rnp->lock, flags);
663 must_wait = rcu_preempted_readers_exp(rnp); 720 else {
664 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 721 rnp->exp_tasks = rnp->blkd_tasks.next;
722 rcu_initiate_boost(rnp, flags); /* releases rnp->lock */
723 must_wait = 1;
724 }
665 if (!must_wait) 725 if (!must_wait)
666 rcu_report_exp_rnp(rsp, rnp); 726 rcu_report_exp_rnp(rsp, rnp);
667} 727}
@@ -669,9 +729,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
669/* 729/*
670 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 730 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
671 * is to invoke synchronize_sched_expedited() to push all the tasks to 731 * is to invoke synchronize_sched_expedited() to push all the tasks to
672 * the ->blocked_tasks[] lists, move all entries from the first set of 732 * the ->blkd_tasks lists and wait for this list to drain.
673 * ->blocked_tasks[] lists to the second set, and finally wait for this
674 * second set to drain.
675 */ 733 */
676void synchronize_rcu_expedited(void) 734void synchronize_rcu_expedited(void)
677{ 735{
@@ -703,7 +761,7 @@ void synchronize_rcu_expedited(void)
703 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) 761 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
704 goto unlock_mb_ret; /* Others did our work for us. */ 762 goto unlock_mb_ret; /* Others did our work for us. */
705 763
706 /* force all RCU readers onto blocked_tasks[]. */ 764 /* force all RCU readers onto ->blkd_tasks lists. */
707 synchronize_sched_expedited(); 765 synchronize_sched_expedited();
708 766
709 raw_spin_lock_irqsave(&rsp->onofflock, flags); 767 raw_spin_lock_irqsave(&rsp->onofflock, flags);
@@ -715,7 +773,7 @@ void synchronize_rcu_expedited(void)
715 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 773 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
716 } 774 }
717 775
718 /* Snapshot current state of ->blocked_tasks[] lists. */ 776 /* Snapshot current state of ->blkd_tasks lists. */
719 rcu_for_each_leaf_node(rsp, rnp) 777 rcu_for_each_leaf_node(rsp, rnp)
720 sync_rcu_preempt_exp_init(rsp, rnp); 778 sync_rcu_preempt_exp_init(rsp, rnp);
721 if (NUM_RCU_NODES > 1) 779 if (NUM_RCU_NODES > 1)
@@ -723,7 +781,7 @@ void synchronize_rcu_expedited(void)
723 781
724 raw_spin_unlock_irqrestore(&rsp->onofflock, flags); 782 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
725 783
726 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ 784 /* Wait for snapshotted ->blkd_tasks lists to drain. */
727 rnp = rcu_get_root(rsp); 785 rnp = rcu_get_root(rsp);
728 wait_event(sync_rcu_preempt_exp_wq, 786 wait_event(sync_rcu_preempt_exp_wq,
729 sync_rcu_preempt_exp_done(rnp)); 787 sync_rcu_preempt_exp_done(rnp));
@@ -739,7 +797,7 @@ mb_ret:
739EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 797EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
740 798
741/* 799/*
742 * Check to see if there is any immediate preemptable-RCU-related work 800 * Check to see if there is any immediate preemptible-RCU-related work
743 * to be done. 801 * to be done.
744 */ 802 */
745static int rcu_preempt_pending(int cpu) 803static int rcu_preempt_pending(int cpu)
@@ -749,7 +807,7 @@ static int rcu_preempt_pending(int cpu)
749} 807}
750 808
751/* 809/*
752 * Does preemptable RCU need the CPU to stay out of dynticks mode? 810 * Does preemptible RCU need the CPU to stay out of dynticks mode?
753 */ 811 */
754static int rcu_preempt_needs_cpu(int cpu) 812static int rcu_preempt_needs_cpu(int cpu)
755{ 813{
@@ -766,7 +824,7 @@ void rcu_barrier(void)
766EXPORT_SYMBOL_GPL(rcu_barrier); 824EXPORT_SYMBOL_GPL(rcu_barrier);
767 825
768/* 826/*
769 * Initialize preemptable RCU's per-CPU data. 827 * Initialize preemptible RCU's per-CPU data.
770 */ 828 */
771static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 829static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
772{ 830{
@@ -774,7 +832,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
774} 832}
775 833
776/* 834/*
777 * Move preemptable RCU's callbacks from dying CPU to other online CPU. 835 * Move preemptible RCU's callbacks from dying CPU to other online CPU.
778 */ 836 */
779static void rcu_preempt_send_cbs_to_online(void) 837static void rcu_preempt_send_cbs_to_online(void)
780{ 838{
@@ -782,7 +840,7 @@ static void rcu_preempt_send_cbs_to_online(void)
782} 840}
783 841
784/* 842/*
785 * Initialize preemptable RCU's state structures. 843 * Initialize preemptible RCU's state structures.
786 */ 844 */
787static void __init __rcu_init_preempt(void) 845static void __init __rcu_init_preempt(void)
788{ 846{
@@ -790,7 +848,7 @@ static void __init __rcu_init_preempt(void)
790} 848}
791 849
792/* 850/*
793 * Check for a task exiting while in a preemptable-RCU read-side 851 * Check for a task exiting while in a preemptible-RCU read-side
794 * critical section, clean up if so. No need to issue warnings, 852 * critical section, clean up if so. No need to issue warnings,
795 * as debug_check_no_locks_held() already does this if lockdep 853 * as debug_check_no_locks_held() already does this if lockdep
796 * is enabled. 854 * is enabled.
@@ -802,11 +860,13 @@ void exit_rcu(void)
802 if (t->rcu_read_lock_nesting == 0) 860 if (t->rcu_read_lock_nesting == 0)
803 return; 861 return;
804 t->rcu_read_lock_nesting = 1; 862 t->rcu_read_lock_nesting = 1;
805 rcu_read_unlock(); 863 __rcu_read_unlock();
806} 864}
807 865
808#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 866#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
809 867
868static struct rcu_state *rcu_state = &rcu_sched_state;
869
810/* 870/*
811 * Tell them what RCU they are running. 871 * Tell them what RCU they are running.
812 */ 872 */
@@ -836,7 +896,7 @@ void rcu_force_quiescent_state(void)
836EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 896EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
837 897
838/* 898/*
839 * Because preemptable RCU does not exist, we never have to check for 899 * Because preemptible RCU does not exist, we never have to check for
840 * CPUs being in quiescent states. 900 * CPUs being in quiescent states.
841 */ 901 */
842static void rcu_preempt_note_context_switch(int cpu) 902static void rcu_preempt_note_context_switch(int cpu)
@@ -844,10 +904,10 @@ static void rcu_preempt_note_context_switch(int cpu)
844} 904}
845 905
846/* 906/*
847 * Because preemptable RCU does not exist, there are never any preempted 907 * Because preemptible RCU does not exist, there are never any preempted
848 * RCU readers. 908 * RCU readers.
849 */ 909 */
850static int rcu_preempted_readers(struct rcu_node *rnp) 910static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
851{ 911{
852 return 0; 912 return 0;
853} 913}
@@ -862,10 +922,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
862 922
863#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 923#endif /* #ifdef CONFIG_HOTPLUG_CPU */
864 924
865#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
866
867/* 925/*
868 * Because preemptable RCU does not exist, we never have to check for 926 * Because preemptible RCU does not exist, we never have to check for
869 * tasks blocked within RCU read-side critical sections. 927 * tasks blocked within RCU read-side critical sections.
870 */ 928 */
871static void rcu_print_detail_task_stall(struct rcu_state *rsp) 929static void rcu_print_detail_task_stall(struct rcu_state *rsp)
@@ -873,7 +931,7 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
873} 931}
874 932
875/* 933/*
876 * Because preemptable RCU does not exist, we never have to check for 934 * Because preemptible RCU does not exist, we never have to check for
877 * tasks blocked within RCU read-side critical sections. 935 * tasks blocked within RCU read-side critical sections.
878 */ 936 */
879static void rcu_print_task_stall(struct rcu_node *rnp) 937static void rcu_print_task_stall(struct rcu_node *rnp)
@@ -888,10 +946,8 @@ static void rcu_preempt_stall_reset(void)
888{ 946{
889} 947}
890 948
891#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
892
893/* 949/*
894 * Because there is no preemptable RCU, there can be no readers blocked, 950 * Because there is no preemptible RCU, there can be no readers blocked,
895 * so there is no need to check for blocked tasks. So check only for 951 * so there is no need to check for blocked tasks. So check only for
896 * bogus qsmask values. 952 * bogus qsmask values.
897 */ 953 */
@@ -903,7 +959,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
903#ifdef CONFIG_HOTPLUG_CPU 959#ifdef CONFIG_HOTPLUG_CPU
904 960
905/* 961/*
906 * Because preemptable RCU does not exist, it never needs to migrate 962 * Because preemptible RCU does not exist, it never needs to migrate
907 * tasks that were blocked within RCU read-side critical sections, and 963 * tasks that were blocked within RCU read-side critical sections, and
908 * such non-existent tasks cannot possibly have been blocking the current 964 * such non-existent tasks cannot possibly have been blocking the current
909 * grace period. 965 * grace period.
@@ -916,7 +972,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
916} 972}
917 973
918/* 974/*
919 * Because preemptable RCU does not exist, it never needs CPU-offline 975 * Because preemptible RCU does not exist, it never needs CPU-offline
920 * processing. 976 * processing.
921 */ 977 */
922static void rcu_preempt_offline_cpu(int cpu) 978static void rcu_preempt_offline_cpu(int cpu)
@@ -926,7 +982,7 @@ static void rcu_preempt_offline_cpu(int cpu)
926#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 982#endif /* #ifdef CONFIG_HOTPLUG_CPU */
927 983
928/* 984/*
929 * Because preemptable RCU does not exist, it never has any callbacks 985 * Because preemptible RCU does not exist, it never has any callbacks
930 * to check. 986 * to check.
931 */ 987 */
932static void rcu_preempt_check_callbacks(int cpu) 988static void rcu_preempt_check_callbacks(int cpu)
@@ -934,7 +990,7 @@ static void rcu_preempt_check_callbacks(int cpu)
934} 990}
935 991
936/* 992/*
937 * Because preemptable RCU does not exist, it never has any callbacks 993 * Because preemptible RCU does not exist, it never has any callbacks
938 * to process. 994 * to process.
939 */ 995 */
940static void rcu_preempt_process_callbacks(void) 996static void rcu_preempt_process_callbacks(void)
@@ -943,7 +999,7 @@ static void rcu_preempt_process_callbacks(void)
943 999
944/* 1000/*
945 * Wait for an rcu-preempt grace period, but make it happen quickly. 1001 * Wait for an rcu-preempt grace period, but make it happen quickly.
946 * But because preemptable RCU does not exist, map to rcu-sched. 1002 * But because preemptible RCU does not exist, map to rcu-sched.
947 */ 1003 */
948void synchronize_rcu_expedited(void) 1004void synchronize_rcu_expedited(void)
949{ 1005{
@@ -954,7 +1010,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
954#ifdef CONFIG_HOTPLUG_CPU 1010#ifdef CONFIG_HOTPLUG_CPU
955 1011
956/* 1012/*
957 * Because preemptable RCU does not exist, there is never any need to 1013 * Because preemptible RCU does not exist, there is never any need to
958 * report on tasks preempted in RCU read-side critical sections during 1014 * report on tasks preempted in RCU read-side critical sections during
959 * expedited RCU grace periods. 1015 * expedited RCU grace periods.
960 */ 1016 */
@@ -966,7 +1022,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
966#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 1022#endif /* #ifdef CONFIG_HOTPLUG_CPU */
967 1023
968/* 1024/*
969 * Because preemptable RCU does not exist, it never has any work to do. 1025 * Because preemptible RCU does not exist, it never has any work to do.
970 */ 1026 */
971static int rcu_preempt_pending(int cpu) 1027static int rcu_preempt_pending(int cpu)
972{ 1028{
@@ -974,7 +1030,7 @@ static int rcu_preempt_pending(int cpu)
974} 1030}
975 1031
976/* 1032/*
977 * Because preemptable RCU does not exist, it never needs any CPU. 1033 * Because preemptible RCU does not exist, it never needs any CPU.
978 */ 1034 */
979static int rcu_preempt_needs_cpu(int cpu) 1035static int rcu_preempt_needs_cpu(int cpu)
980{ 1036{
@@ -982,7 +1038,7 @@ static int rcu_preempt_needs_cpu(int cpu)
982} 1038}
983 1039
984/* 1040/*
985 * Because preemptable RCU does not exist, rcu_barrier() is just 1041 * Because preemptible RCU does not exist, rcu_barrier() is just
986 * another name for rcu_barrier_sched(). 1042 * another name for rcu_barrier_sched().
987 */ 1043 */
988void rcu_barrier(void) 1044void rcu_barrier(void)
@@ -992,7 +1048,7 @@ void rcu_barrier(void)
992EXPORT_SYMBOL_GPL(rcu_barrier); 1048EXPORT_SYMBOL_GPL(rcu_barrier);
993 1049
994/* 1050/*
995 * Because preemptable RCU does not exist, there is no per-CPU 1051 * Because preemptible RCU does not exist, there is no per-CPU
996 * data to initialize. 1052 * data to initialize.
997 */ 1053 */
998static void __cpuinit rcu_preempt_init_percpu_data(int cpu) 1054static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
@@ -1000,14 +1056,14 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1000} 1056}
1001 1057
1002/* 1058/*
1003 * Because there is no preemptable RCU, there are no callbacks to move. 1059 * Because there is no preemptible RCU, there are no callbacks to move.
1004 */ 1060 */
1005static void rcu_preempt_send_cbs_to_online(void) 1061static void rcu_preempt_send_cbs_to_online(void)
1006{ 1062{
1007} 1063}
1008 1064
1009/* 1065/*
1010 * Because preemptable RCU does not exist, it need not be initialized. 1066 * Because preemptible RCU does not exist, it need not be initialized.
1011 */ 1067 */
1012static void __init __rcu_init_preempt(void) 1068static void __init __rcu_init_preempt(void)
1013{ 1069{
@@ -1015,6 +1071,276 @@ static void __init __rcu_init_preempt(void)
1015 1071
1016#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1072#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1017 1073
1074#ifdef CONFIG_RCU_BOOST
1075
1076#include "rtmutex_common.h"
1077
1078#ifdef CONFIG_RCU_TRACE
1079
1080static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1081{
1082 if (list_empty(&rnp->blkd_tasks))
1083 rnp->n_balk_blkd_tasks++;
1084 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
1085 rnp->n_balk_exp_gp_tasks++;
1086 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
1087 rnp->n_balk_boost_tasks++;
1088 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
1089 rnp->n_balk_notblocked++;
1090 else if (rnp->gp_tasks != NULL &&
1091 ULONG_CMP_LT(jiffies, rnp->boost_time))
1092 rnp->n_balk_notyet++;
1093 else
1094 rnp->n_balk_nos++;
1095}
1096
1097#else /* #ifdef CONFIG_RCU_TRACE */
1098
1099static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1100{
1101}
1102
1103#endif /* #else #ifdef CONFIG_RCU_TRACE */
1104
1105/*
1106 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1107 * or ->boost_tasks, advancing the pointer to the next task in the
1108 * ->blkd_tasks list.
1109 *
1110 * Note that irqs must be enabled: boosting the task can block.
1111 * Returns 1 if there are more tasks needing to be boosted.
1112 */
1113static int rcu_boost(struct rcu_node *rnp)
1114{
1115 unsigned long flags;
1116 struct rt_mutex mtx;
1117 struct task_struct *t;
1118 struct list_head *tb;
1119
1120 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
1121 return 0; /* Nothing left to boost. */
1122
1123 raw_spin_lock_irqsave(&rnp->lock, flags);
1124
1125 /*
1126 * Recheck under the lock: all tasks in need of boosting
1127 * might exit their RCU read-side critical sections on their own.
1128 */
1129 if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
1130 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1131 return 0;
1132 }
1133
1134 /*
1135 * Preferentially boost tasks blocking expedited grace periods.
1136 * This cannot starve the normal grace periods because a second
1137 * expedited grace period must boost all blocked tasks, including
1138 * those blocking the pre-existing normal grace period.
1139 */
1140 if (rnp->exp_tasks != NULL) {
1141 tb = rnp->exp_tasks;
1142 rnp->n_exp_boosts++;
1143 } else {
1144 tb = rnp->boost_tasks;
1145 rnp->n_normal_boosts++;
1146 }
1147 rnp->n_tasks_boosted++;
1148
1149 /*
1150 * We boost task t by manufacturing an rt_mutex that appears to
1151 * be held by task t. We leave a pointer to that rt_mutex where
1152 * task t can find it, and task t will release the mutex when it
1153 * exits its outermost RCU read-side critical section. Then
1154 * simply acquiring this artificial rt_mutex will boost task
1155 * t's priority. (Thanks to tglx for suggesting this approach!)
1156 *
1157 * Note that task t must acquire rnp->lock to remove itself from
1158 * the ->blkd_tasks list, which it will do from exit() if from
1159 * nowhere else. We therefore are guaranteed that task t will
1160 * stay around at least until we drop rnp->lock. Note that
1161 * rnp->lock also resolves races between our priority boosting
1162 * and task t's exiting its outermost RCU read-side critical
1163 * section.
1164 */
1165 t = container_of(tb, struct task_struct, rcu_node_entry);
1166 rt_mutex_init_proxy_locked(&mtx, t);
1167 t->rcu_boost_mutex = &mtx;
1168 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
1169 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1170 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1171 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
1172
1173 return rnp->exp_tasks != NULL || rnp->boost_tasks != NULL;
1174}
1175
1176/*
1177 * Timer handler to initiate waking up of boost kthreads that
1178 * have yielded the CPU due to excessive numbers of tasks to
1179 * boost. We wake up the per-rcu_node kthread, which in turn
1180 * will wake up the booster kthread.
1181 */
1182static void rcu_boost_kthread_timer(unsigned long arg)
1183{
1184 invoke_rcu_node_kthread((struct rcu_node *)arg);
1185}
1186
1187/*
1188 * Priority-boosting kthread. One per leaf rcu_node and one for the
1189 * root rcu_node.
1190 */
1191static int rcu_boost_kthread(void *arg)
1192{
1193 struct rcu_node *rnp = (struct rcu_node *)arg;
1194 int spincnt = 0;
1195 int more2boost;
1196
1197 for (;;) {
1198 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1199 wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks ||
1200 rnp->exp_tasks);
1201 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1202 more2boost = rcu_boost(rnp);
1203 if (more2boost)
1204 spincnt++;
1205 else
1206 spincnt = 0;
1207 if (spincnt > 10) {
1208 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1209 spincnt = 0;
1210 }
1211 }
1212 /* NOTREACHED */
1213 return 0;
1214}
1215
1216/*
1217 * Check to see if it is time to start boosting RCU readers that are
1218 * blocking the current grace period, and, if so, tell the per-rcu_node
1219 * kthread to start boosting them. If there is an expedited grace
1220 * period in progress, it is always time to boost.
1221 *
1222 * The caller must hold rnp->lock, which this function releases,
1223 * but irqs remain disabled. The ->boost_kthread_task is immortal,
1224 * so we don't need to worry about it going away.
1225 */
1226static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1227{
1228 struct task_struct *t;
1229
1230 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
1231 rnp->n_balk_exp_gp_tasks++;
1232 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1233 return;
1234 }
1235 if (rnp->exp_tasks != NULL ||
1236 (rnp->gp_tasks != NULL &&
1237 rnp->boost_tasks == NULL &&
1238 rnp->qsmask == 0 &&
1239 ULONG_CMP_GE(jiffies, rnp->boost_time))) {
1240 if (rnp->exp_tasks == NULL)
1241 rnp->boost_tasks = rnp->gp_tasks;
1242 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1243 t = rnp->boost_kthread_task;
1244 if (t != NULL)
1245 wake_up_process(t);
1246 } else {
1247 rcu_initiate_boost_trace(rnp);
1248 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1249 }
1250}
1251
1252/*
1253 * Set the affinity of the boost kthread. The CPU-hotplug locks are
1254 * held, so no one should be messing with the existence of the boost
1255 * kthread.
1256 */
1257static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1258 cpumask_var_t cm)
1259{
1260 struct task_struct *t;
1261
1262 t = rnp->boost_kthread_task;
1263 if (t != NULL)
1264 set_cpus_allowed_ptr(rnp->boost_kthread_task, cm);
1265}
1266
1267#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
1268
1269/*
1270 * Do priority-boost accounting for the start of a new grace period.
1271 */
1272static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1273{
1274 rnp->boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES;
1275}
1276
1277/*
1278 * Initialize the RCU-boost waitqueue.
1279 */
1280static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1281{
1282 init_waitqueue_head(&rnp->boost_wq);
1283}
1284
1285/*
1286 * Create an RCU-boost kthread for the specified node if one does not
1287 * already exist. We only create this kthread for preemptible RCU.
1288 * Returns zero if all is well, a negated errno otherwise.
1289 */
1290static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1291 struct rcu_node *rnp,
1292 int rnp_index)
1293{
1294 unsigned long flags;
1295 struct sched_param sp;
1296 struct task_struct *t;
1297
1298 if (&rcu_preempt_state != rsp)
1299 return 0;
1300 if (rnp->boost_kthread_task != NULL)
1301 return 0;
1302 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1303 "rcub%d", rnp_index);
1304 if (IS_ERR(t))
1305 return PTR_ERR(t);
1306 raw_spin_lock_irqsave(&rnp->lock, flags);
1307 rnp->boost_kthread_task = t;
1308 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1309 wake_up_process(t);
1310 sp.sched_priority = RCU_KTHREAD_PRIO;
1311 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1312 return 0;
1313}
1314
1315#else /* #ifdef CONFIG_RCU_BOOST */
1316
1317static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1318{
1319 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1320}
1321
1322static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp,
1323 cpumask_var_t cm)
1324{
1325}
1326
1327static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1328{
1329}
1330
1331static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp)
1332{
1333}
1334
1335static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1336 struct rcu_node *rnp,
1337 int rnp_index)
1338{
1339 return 0;
1340}
1341
1342#endif /* #else #ifdef CONFIG_RCU_BOOST */
1343
1018#ifndef CONFIG_SMP 1344#ifndef CONFIG_SMP
1019 1345
1020void synchronize_sched_expedited(void) 1346void synchronize_sched_expedited(void)
@@ -1187,8 +1513,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1187 * 1513 *
1188 * Because it is not legal to invoke rcu_process_callbacks() with irqs 1514 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1189 * disabled, we do one pass of force_quiescent_state(), then do a 1515 * disabled, we do one pass of force_quiescent_state(), then do a
1190 * raise_softirq() to cause rcu_process_callbacks() to be invoked later. 1516 * invoke_rcu_cpu_kthread() to cause rcu_process_callbacks() to be invoked
1191 * The per-cpu rcu_dyntick_drain variable controls the sequencing. 1517 * later. The per-cpu rcu_dyntick_drain variable controls the sequencing.
1192 */ 1518 */
1193int rcu_needs_cpu(int cpu) 1519int rcu_needs_cpu(int cpu)
1194{ 1520{
@@ -1239,7 +1565,7 @@ int rcu_needs_cpu(int cpu)
1239 1565
1240 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1566 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1241 if (c) 1567 if (c)
1242 raise_softirq(RCU_SOFTIRQ); 1568 invoke_rcu_cpu_kthread();
1243 return c; 1569 return c;
1244} 1570}
1245 1571