aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h512
1 files changed, 468 insertions, 44 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ef2a58c2b9d5..79b53bda8943 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -24,16 +24,19 @@
24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com> 24 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */ 25 */
26 26
27#include <linux/delay.h>
27 28
28#ifdef CONFIG_TREE_PREEMPT_RCU 29#ifdef CONFIG_TREE_PREEMPT_RCU
29 30
30struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 31struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
31DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 32DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
32 33
34static int rcu_preempted_readers_exp(struct rcu_node *rnp);
35
33/* 36/*
34 * Tell them what RCU they are running. 37 * Tell them what RCU they are running.
35 */ 38 */
36static inline void rcu_bootup_announce(void) 39static void __init rcu_bootup_announce(void)
37{ 40{
38 printk(KERN_INFO 41 printk(KERN_INFO
39 "Experimental preemptable hierarchical RCU implementation.\n"); 42 "Experimental preemptable hierarchical RCU implementation.\n");
@@ -59,6 +62,15 @@ long rcu_batches_completed(void)
59EXPORT_SYMBOL_GPL(rcu_batches_completed); 62EXPORT_SYMBOL_GPL(rcu_batches_completed);
60 63
61/* 64/*
65 * Force a quiescent state for preemptible RCU.
66 */
67void rcu_force_quiescent_state(void)
68{
69 force_quiescent_state(&rcu_preempt_state, 0);
70}
71EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
72
73/*
62 * Record a preemptable-RCU quiescent state for the specified CPU. Note 74 * Record a preemptable-RCU quiescent state for the specified CPU. Note
63 * that this just means that the task currently running on the CPU is 75 * that this just means that the task currently running on the CPU is
64 * not in a quiescent state. There might be any number of tasks blocked 76 * not in a quiescent state. There might be any number of tasks blocked
@@ -67,7 +79,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
67static void rcu_preempt_qs(int cpu) 79static void rcu_preempt_qs(int cpu)
68{ 80{
69 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 81 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
70 rdp->passed_quiesc_completed = rdp->completed; 82 rdp->passed_quiesc_completed = rdp->gpnum - 1;
71 barrier(); 83 barrier();
72 rdp->passed_quiesc = 1; 84 rdp->passed_quiesc = 1;
73} 85}
@@ -99,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu)
99 /* Possibly blocking in an RCU read-side critical section. */ 111 /* Possibly blocking in an RCU read-side critical section. */
100 rdp = rcu_preempt_state.rda[cpu]; 112 rdp = rcu_preempt_state.rda[cpu];
101 rnp = rdp->mynode; 113 rnp = rdp->mynode;
102 spin_lock_irqsave(&rnp->lock, flags); 114 raw_spin_lock_irqsave(&rnp->lock, flags);
103 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 115 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
104 t->rcu_blocked_node = rnp; 116 t->rcu_blocked_node = rnp;
105 117
@@ -120,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu)
120 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 132 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
121 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; 133 phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
122 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); 134 list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
123 spin_unlock_irqrestore(&rnp->lock, flags); 135 raw_spin_unlock_irqrestore(&rnp->lock, flags);
124 } 136 }
125 137
126 /* 138 /*
@@ -157,14 +169,58 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
157 */ 169 */
158static int rcu_preempted_readers(struct rcu_node *rnp) 170static int rcu_preempted_readers(struct rcu_node *rnp)
159{ 171{
160 return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); 172 int phase = rnp->gpnum & 0x1;
173
174 return !list_empty(&rnp->blocked_tasks[phase]) ||
175 !list_empty(&rnp->blocked_tasks[phase + 2]);
161} 176}
162 177
178/*
179 * Record a quiescent state for all tasks that were previously queued
180 * on the specified rcu_node structure and that were blocking the current
181 * RCU grace period. The caller must hold the specified rnp->lock with
182 * irqs disabled, and this lock is released upon return, but irqs remain
183 * disabled.
184 */
185static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
186 __releases(rnp->lock)
187{
188 unsigned long mask;
189 struct rcu_node *rnp_p;
190
191 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
192 raw_spin_unlock_irqrestore(&rnp->lock, flags);
193 return; /* Still need more quiescent states! */
194 }
195
196 rnp_p = rnp->parent;
197 if (rnp_p == NULL) {
198 /*
199 * Either there is only one rcu_node in the tree,
200 * or tasks were kicked up to root rcu_node due to
201 * CPUs going offline.
202 */
203 rcu_report_qs_rsp(&rcu_preempt_state, flags);
204 return;
205 }
206
207 /* Report up the rest of the hierarchy. */
208 mask = rnp->grpmask;
209 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
210 raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */
211 rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
212}
213
214/*
215 * Handle special cases during rcu_read_unlock(), such as needing to
216 * notify RCU core processing or task having blocked during the RCU
217 * read-side critical section.
218 */
163static void rcu_read_unlock_special(struct task_struct *t) 219static void rcu_read_unlock_special(struct task_struct *t)
164{ 220{
165 int empty; 221 int empty;
222 int empty_exp;
166 unsigned long flags; 223 unsigned long flags;
167 unsigned long mask;
168 struct rcu_node *rnp; 224 struct rcu_node *rnp;
169 int special; 225 int special;
170 226
@@ -201,42 +257,36 @@ static void rcu_read_unlock_special(struct task_struct *t)
201 */ 257 */
202 for (;;) { 258 for (;;) {
203 rnp = t->rcu_blocked_node; 259 rnp = t->rcu_blocked_node;
204 spin_lock(&rnp->lock); /* irqs already disabled. */ 260 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
205 if (rnp == t->rcu_blocked_node) 261 if (rnp == t->rcu_blocked_node)
206 break; 262 break;
207 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 263 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
208 } 264 }
209 empty = !rcu_preempted_readers(rnp); 265 empty = !rcu_preempted_readers(rnp);
266 empty_exp = !rcu_preempted_readers_exp(rnp);
267 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
210 list_del_init(&t->rcu_node_entry); 268 list_del_init(&t->rcu_node_entry);
211 t->rcu_blocked_node = NULL; 269 t->rcu_blocked_node = NULL;
212 270
213 /* 271 /*
214 * If this was the last task on the current list, and if 272 * If this was the last task on the current list, and if
215 * we aren't waiting on any CPUs, report the quiescent state. 273 * we aren't waiting on any CPUs, report the quiescent state.
216 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() 274 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
217 * drop rnp->lock and restore irq.
218 */ 275 */
219 if (!empty && rnp->qsmask == 0 && 276 if (empty)
220 !rcu_preempted_readers(rnp)) { 277 raw_spin_unlock_irqrestore(&rnp->lock, flags);
221 struct rcu_node *rnp_p; 278 else
222 279 rcu_report_unblock_qs_rnp(rnp, flags);
223 if (rnp->parent == NULL) { 280
224 /* Only one rcu_node in the tree. */ 281 /*
225 cpu_quiet_msk_finish(&rcu_preempt_state, flags); 282 * If this was the last task on the expedited lists,
226 return; 283 * then we need to report up the rcu_node hierarchy.
227 } 284 */
228 /* Report up the rest of the hierarchy. */ 285 if (!empty_exp && !rcu_preempted_readers_exp(rnp))
229 mask = rnp->grpmask; 286 rcu_report_exp_rnp(&rcu_preempt_state, rnp);
230 spin_unlock_irqrestore(&rnp->lock, flags); 287 } else {
231 rnp_p = rnp->parent; 288 local_irq_restore(flags);
232 spin_lock_irqsave(&rnp_p->lock, flags);
233 WARN_ON_ONCE(rnp->qsmask);
234 cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
235 return;
236 }
237 spin_unlock(&rnp->lock);
238 } 289 }
239 local_irq_restore(flags);
240} 290}
241 291
242/* 292/*
@@ -254,29 +304,73 @@ void __rcu_read_unlock(void)
254 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && 304 if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
255 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 305 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
256 rcu_read_unlock_special(t); 306 rcu_read_unlock_special(t);
307#ifdef CONFIG_PROVE_LOCKING
308 WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
309#endif /* #ifdef CONFIG_PROVE_LOCKING */
257} 310}
258EXPORT_SYMBOL_GPL(__rcu_read_unlock); 311EXPORT_SYMBOL_GPL(__rcu_read_unlock);
259 312
260#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 313#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
261 314
315#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
316
317/*
318 * Dump detailed information for all tasks blocking the current RCU
319 * grace period on the specified rcu_node structure.
320 */
321static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
322{
323 unsigned long flags;
324 struct list_head *lp;
325 int phase;
326 struct task_struct *t;
327
328 if (rcu_preempted_readers(rnp)) {
329 raw_spin_lock_irqsave(&rnp->lock, flags);
330 phase = rnp->gpnum & 0x1;
331 lp = &rnp->blocked_tasks[phase];
332 list_for_each_entry(t, lp, rcu_node_entry)
333 sched_show_task(t);
334 raw_spin_unlock_irqrestore(&rnp->lock, flags);
335 }
336}
337
338/*
339 * Dump detailed information for all tasks blocking the current RCU
340 * grace period.
341 */
342static void rcu_print_detail_task_stall(struct rcu_state *rsp)
343{
344 struct rcu_node *rnp = rcu_get_root(rsp);
345
346 rcu_print_detail_task_stall_rnp(rnp);
347 rcu_for_each_leaf_node(rsp, rnp)
348 rcu_print_detail_task_stall_rnp(rnp);
349}
350
351#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
352
353static void rcu_print_detail_task_stall(struct rcu_state *rsp)
354{
355}
356
357#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
358
262/* 359/*
263 * Scan the current list of tasks blocked within RCU read-side critical 360 * Scan the current list of tasks blocked within RCU read-side critical
264 * sections, printing out the tid of each. 361 * sections, printing out the tid of each.
265 */ 362 */
266static void rcu_print_task_stall(struct rcu_node *rnp) 363static void rcu_print_task_stall(struct rcu_node *rnp)
267{ 364{
268 unsigned long flags;
269 struct list_head *lp; 365 struct list_head *lp;
270 int phase; 366 int phase;
271 struct task_struct *t; 367 struct task_struct *t;
272 368
273 if (rcu_preempted_readers(rnp)) { 369 if (rcu_preempted_readers(rnp)) {
274 spin_lock_irqsave(&rnp->lock, flags);
275 phase = rnp->gpnum & 0x1; 370 phase = rnp->gpnum & 0x1;
276 lp = &rnp->blocked_tasks[phase]; 371 lp = &rnp->blocked_tasks[phase];
277 list_for_each_entry(t, lp, rcu_node_entry) 372 list_for_each_entry(t, lp, rcu_node_entry)
278 printk(" P%d", t->pid); 373 printk(" P%d", t->pid);
279 spin_unlock_irqrestore(&rnp->lock, flags);
280 } 374 }
281} 375}
282 376
@@ -303,6 +397,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
303 * rcu_node. The reason for not just moving them to the immediate 397 * rcu_node. The reason for not just moving them to the immediate
304 * parent is to remove the need for rcu_read_unlock_special() to 398 * parent is to remove the need for rcu_read_unlock_special() to
305 * make more than two attempts to acquire the target rcu_node's lock. 399 * make more than two attempts to acquire the target rcu_node's lock.
400 * Returns true if there were tasks blocking the current RCU grace
401 * period.
306 * 402 *
307 * Returns 1 if there was previously a task blocking the current grace 403 * Returns 1 if there was previously a task blocking the current grace
308 * period on the specified rcu_node structure. 404 * period on the specified rcu_node structure.
@@ -316,7 +412,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
316 int i; 412 int i;
317 struct list_head *lp; 413 struct list_head *lp;
318 struct list_head *lp_root; 414 struct list_head *lp_root;
319 int retval = rcu_preempted_readers(rnp); 415 int retval = 0;
320 struct rcu_node *rnp_root = rcu_get_root(rsp); 416 struct rcu_node *rnp_root = rcu_get_root(rsp);
321 struct task_struct *tp; 417 struct task_struct *tp;
322 418
@@ -326,7 +422,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
326 } 422 }
327 WARN_ON_ONCE(rnp != rdp->mynode && 423 WARN_ON_ONCE(rnp != rdp->mynode &&
328 (!list_empty(&rnp->blocked_tasks[0]) || 424 (!list_empty(&rnp->blocked_tasks[0]) ||
329 !list_empty(&rnp->blocked_tasks[1]))); 425 !list_empty(&rnp->blocked_tasks[1]) ||
426 !list_empty(&rnp->blocked_tasks[2]) ||
427 !list_empty(&rnp->blocked_tasks[3])));
330 428
331 /* 429 /*
332 * Move tasks up to root rcu_node. Rely on the fact that the 430 * Move tasks up to root rcu_node. Rely on the fact that the
@@ -334,19 +432,22 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
334 * rcu_nodes in terms of gp_num value. This fact allows us to 432 * rcu_nodes in terms of gp_num value. This fact allows us to
335 * move the blocked_tasks[] array directly, element by element. 433 * move the blocked_tasks[] array directly, element by element.
336 */ 434 */
337 for (i = 0; i < 2; i++) { 435 if (rcu_preempted_readers(rnp))
436 retval |= RCU_OFL_TASKS_NORM_GP;
437 if (rcu_preempted_readers_exp(rnp))
438 retval |= RCU_OFL_TASKS_EXP_GP;
439 for (i = 0; i < 4; i++) {
338 lp = &rnp->blocked_tasks[i]; 440 lp = &rnp->blocked_tasks[i];
339 lp_root = &rnp_root->blocked_tasks[i]; 441 lp_root = &rnp_root->blocked_tasks[i];
340 while (!list_empty(lp)) { 442 while (!list_empty(lp)) {
341 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); 443 tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
342 spin_lock(&rnp_root->lock); /* irqs already disabled */ 444 raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
343 list_del(&tp->rcu_node_entry); 445 list_del(&tp->rcu_node_entry);
344 tp->rcu_blocked_node = rnp_root; 446 tp->rcu_blocked_node = rnp_root;
345 list_add(&tp->rcu_node_entry, lp_root); 447 list_add(&tp->rcu_node_entry, lp_root);
346 spin_unlock(&rnp_root->lock); /* irqs remain disabled */ 448 raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
347 } 449 }
348 } 450 }
349
350 return retval; 451 return retval;
351} 452}
352 453
@@ -398,14 +499,183 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
398} 499}
399EXPORT_SYMBOL_GPL(call_rcu); 500EXPORT_SYMBOL_GPL(call_rcu);
400 501
502/**
503 * synchronize_rcu - wait until a grace period has elapsed.
504 *
505 * Control will return to the caller some time after a full grace
506 * period has elapsed, in other words after all currently executing RCU
507 * read-side critical sections have completed. RCU read-side critical
508 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
509 * and may be nested.
510 */
511void synchronize_rcu(void)
512{
513 struct rcu_synchronize rcu;
514
515 if (!rcu_scheduler_active)
516 return;
517
518 init_completion(&rcu.completion);
519 /* Will wake me after RCU finished. */
520 call_rcu(&rcu.head, wakeme_after_rcu);
521 /* Wait for it. */
522 wait_for_completion(&rcu.completion);
523}
524EXPORT_SYMBOL_GPL(synchronize_rcu);
525
526static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
527static long sync_rcu_preempt_exp_count;
528static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
529
530/*
531 * Return non-zero if there are any tasks in RCU read-side critical
532 * sections blocking the current preemptible-RCU expedited grace period.
533 * If there is no preemptible-RCU expedited grace period currently in
534 * progress, returns zero unconditionally.
535 */
536static int rcu_preempted_readers_exp(struct rcu_node *rnp)
537{
538 return !list_empty(&rnp->blocked_tasks[2]) ||
539 !list_empty(&rnp->blocked_tasks[3]);
540}
541
542/*
543 * return non-zero if there is no RCU expedited grace period in progress
544 * for the specified rcu_node structure, in other words, if all CPUs and
545 * tasks covered by the specified rcu_node structure have done their bit
546 * for the current expedited grace period. Works only for preemptible
547 * RCU -- other RCU implementation use other means.
548 *
549 * Caller must hold sync_rcu_preempt_exp_mutex.
550 */
551static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
552{
553 return !rcu_preempted_readers_exp(rnp) &&
554 ACCESS_ONCE(rnp->expmask) == 0;
555}
556
401/* 557/*
402 * Wait for an rcu-preempt grace period. We are supposed to expedite the 558 * Report the exit from RCU read-side critical section for the last task
403 * grace period, but this is the crude slow compatability hack, so just 559 * that queued itself during or before the current expedited preemptible-RCU
404 * invoke synchronize_rcu(). 560 * grace period. This event is reported either to the rcu_node structure on
561 * which the task was queued or to one of that rcu_node structure's ancestors,
562 * recursively up the tree. (Calm down, calm down, we do the recursion
563 * iteratively!)
564 *
565 * Caller must hold sync_rcu_preempt_exp_mutex.
566 */
567static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
568{
569 unsigned long flags;
570 unsigned long mask;
571
572 raw_spin_lock_irqsave(&rnp->lock, flags);
573 for (;;) {
574 if (!sync_rcu_preempt_exp_done(rnp))
575 break;
576 if (rnp->parent == NULL) {
577 wake_up(&sync_rcu_preempt_exp_wq);
578 break;
579 }
580 mask = rnp->grpmask;
581 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
582 rnp = rnp->parent;
583 raw_spin_lock(&rnp->lock); /* irqs already disabled */
584 rnp->expmask &= ~mask;
585 }
586 raw_spin_unlock_irqrestore(&rnp->lock, flags);
587}
588
589/*
590 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
591 * grace period for the specified rcu_node structure. If there are no such
592 * tasks, report it up the rcu_node hierarchy.
593 *
594 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
595 */
596static void
597sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
598{
599 int must_wait;
600
601 raw_spin_lock(&rnp->lock); /* irqs already disabled */
602 list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
603 list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
604 must_wait = rcu_preempted_readers_exp(rnp);
605 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
606 if (!must_wait)
607 rcu_report_exp_rnp(rsp, rnp);
608}
609
610/*
611 * Wait for an rcu-preempt grace period, but expedite it. The basic idea
612 * is to invoke synchronize_sched_expedited() to push all the tasks to
613 * the ->blocked_tasks[] lists, move all entries from the first set of
614 * ->blocked_tasks[] lists to the second set, and finally wait for this
615 * second set to drain.
405 */ 616 */
406void synchronize_rcu_expedited(void) 617void synchronize_rcu_expedited(void)
407{ 618{
408 synchronize_rcu(); 619 unsigned long flags;
620 struct rcu_node *rnp;
621 struct rcu_state *rsp = &rcu_preempt_state;
622 long snap;
623 int trycount = 0;
624
625 smp_mb(); /* Caller's modifications seen first by other CPUs. */
626 snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
627 smp_mb(); /* Above access cannot bleed into critical section. */
628
629 /*
630 * Acquire lock, falling back to synchronize_rcu() if too many
631 * lock-acquisition failures. Of course, if someone does the
632 * expedited grace period for us, just leave.
633 */
634 while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
635 if (trycount++ < 10)
636 udelay(trycount * num_online_cpus());
637 else {
638 synchronize_rcu();
639 return;
640 }
641 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
642 goto mb_ret; /* Others did our work for us. */
643 }
644 if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
645 goto unlock_mb_ret; /* Others did our work for us. */
646
647 /* force all RCU readers onto blocked_tasks[]. */
648 synchronize_sched_expedited();
649
650 raw_spin_lock_irqsave(&rsp->onofflock, flags);
651
652 /* Initialize ->expmask for all non-leaf rcu_node structures. */
653 rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
654 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
655 rnp->expmask = rnp->qsmaskinit;
656 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
657 }
658
659 /* Snapshot current state of ->blocked_tasks[] lists. */
660 rcu_for_each_leaf_node(rsp, rnp)
661 sync_rcu_preempt_exp_init(rsp, rnp);
662 if (NUM_RCU_NODES > 1)
663 sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
664
665 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
666
667 /* Wait for snapshotted ->blocked_tasks[] lists to drain. */
668 rnp = rcu_get_root(rsp);
669 wait_event(sync_rcu_preempt_exp_wq,
670 sync_rcu_preempt_exp_done(rnp));
671
672 /* Clean up and exit. */
673 smp_mb(); /* ensure expedited GP seen before counter increment. */
674 ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
675unlock_mb_ret:
676 mutex_unlock(&sync_rcu_preempt_exp_mutex);
677mb_ret:
678 smp_mb(); /* ensure subsequent action seen after grace period. */
409} 679}
410EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 680EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
411 681
@@ -481,7 +751,7 @@ void exit_rcu(void)
481/* 751/*
482 * Tell them what RCU they are running. 752 * Tell them what RCU they are running.
483 */ 753 */
484static inline void rcu_bootup_announce(void) 754static void __init rcu_bootup_announce(void)
485{ 755{
486 printk(KERN_INFO "Hierarchical RCU implementation.\n"); 756 printk(KERN_INFO "Hierarchical RCU implementation.\n");
487} 757}
@@ -496,6 +766,16 @@ long rcu_batches_completed(void)
496EXPORT_SYMBOL_GPL(rcu_batches_completed); 766EXPORT_SYMBOL_GPL(rcu_batches_completed);
497 767
498/* 768/*
769 * Force a quiescent state for RCU, which, because there is no preemptible
770 * RCU, becomes the same as rcu-sched.
771 */
772void rcu_force_quiescent_state(void)
773{
774 rcu_sched_force_quiescent_state();
775}
776EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
777
778/*
499 * Because preemptable RCU does not exist, we never have to check for 779 * Because preemptable RCU does not exist, we never have to check for
500 * CPUs being in quiescent states. 780 * CPUs being in quiescent states.
501 */ 781 */
@@ -512,12 +792,30 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
512 return 0; 792 return 0;
513} 793}
514 794
795#ifdef CONFIG_HOTPLUG_CPU
796
797/* Because preemptible RCU does not exist, no quieting of tasks. */
798static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
799{
800 raw_spin_unlock_irqrestore(&rnp->lock, flags);
801}
802
803#endif /* #ifdef CONFIG_HOTPLUG_CPU */
804
515#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 805#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
516 806
517/* 807/*
518 * Because preemptable RCU does not exist, we never have to check for 808 * Because preemptable RCU does not exist, we never have to check for
519 * tasks blocked within RCU read-side critical sections. 809 * tasks blocked within RCU read-side critical sections.
520 */ 810 */
811static void rcu_print_detail_task_stall(struct rcu_state *rsp)
812{
813}
814
815/*
816 * Because preemptable RCU does not exist, we never have to check for
817 * tasks blocked within RCU read-side critical sections.
818 */
521static void rcu_print_task_stall(struct rcu_node *rnp) 819static void rcu_print_task_stall(struct rcu_node *rnp)
522{ 820{
523} 821}
@@ -594,6 +892,20 @@ void synchronize_rcu_expedited(void)
594} 892}
595EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 893EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
596 894
895#ifdef CONFIG_HOTPLUG_CPU
896
897/*
898 * Because preemptable RCU does not exist, there is never any need to
899 * report on tasks preempted in RCU read-side critical sections during
900 * expedited RCU grace periods.
901 */
902static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
903{
904 return;
905}
906
907#endif /* #ifdef CONFIG_HOTPLUG_CPU */
908
597/* 909/*
598 * Because preemptable RCU does not exist, it never has any work to do. 910 * Because preemptable RCU does not exist, it never has any work to do.
599 */ 911 */
@@ -643,3 +955,115 @@ static void __init __rcu_init_preempt(void)
643} 955}
644 956
645#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ 957#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
958
959#if !defined(CONFIG_RCU_FAST_NO_HZ)
960
961/*
962 * Check to see if any future RCU-related work will need to be done
963 * by the current CPU, even if none need be done immediately, returning
964 * 1 if so. This function is part of the RCU implementation; it is -not-
965 * an exported member of the RCU API.
966 *
967 * Because we have preemptible RCU, just check whether this CPU needs
968 * any flavor of RCU. Do not chew up lots of CPU cycles with preemption
969 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
970 */
971int rcu_needs_cpu(int cpu)
972{
973 return rcu_needs_cpu_quick_check(cpu);
974}
975
976/*
977 * Check to see if we need to continue a callback-flush operations to
978 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
979 * entry is not configured, so we never do need to.
980 */
981static void rcu_needs_cpu_flush(void)
982{
983}
984
985#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
986
987#define RCU_NEEDS_CPU_FLUSHES 5
988static DEFINE_PER_CPU(int, rcu_dyntick_drain);
989static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
990
991/*
992 * Check to see if any future RCU-related work will need to be done
993 * by the current CPU, even if none need be done immediately, returning
994 * 1 if so. This function is part of the RCU implementation; it is -not-
995 * an exported member of the RCU API.
996 *
997 * Because we are not supporting preemptible RCU, attempt to accelerate
998 * any current grace periods so that RCU no longer needs this CPU, but
999 * only if all other CPUs are already in dynticks-idle mode. This will
1000 * allow the CPU cores to be powered down immediately, as opposed to after
1001 * waiting many milliseconds for grace periods to elapse.
1002 *
1003 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1004 * disabled, we do one pass of force_quiescent_state(), then do a
1005 * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1006 * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1007 */
1008int rcu_needs_cpu(int cpu)
1009{
1010 int c = 0;
1011 int thatcpu;
1012
1013 /* Check for being in the holdoff period. */
1014 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1015 return rcu_needs_cpu_quick_check(cpu);
1016
1017 /* Don't bother unless we are the last non-dyntick-idle CPU. */
1018 for_each_cpu_not(thatcpu, nohz_cpu_mask)
1019 if (thatcpu != cpu) {
1020 per_cpu(rcu_dyntick_drain, cpu) = 0;
1021 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1022 return rcu_needs_cpu_quick_check(cpu);
1023 }
1024
1025 /* Check and update the rcu_dyntick_drain sequencing. */
1026 if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1027 /* First time through, initialize the counter. */
1028 per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1029 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1030 /* We have hit the limit, so time to give up. */
1031 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1032 return rcu_needs_cpu_quick_check(cpu);
1033 }
1034
1035 /* Do one step pushing remaining RCU callbacks through. */
1036 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1037 rcu_sched_qs(cpu);
1038 force_quiescent_state(&rcu_sched_state, 0);
1039 c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1040 }
1041 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1042 rcu_bh_qs(cpu);
1043 force_quiescent_state(&rcu_bh_state, 0);
1044 c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1045 }
1046
1047 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1048 if (c)
1049 raise_softirq(RCU_SOFTIRQ);
1050 return c;
1051}
1052
1053/*
1054 * Check to see if we need to continue a callback-flush operations to
1055 * allow the last CPU to enter dyntick-idle mode.
1056 */
1057static void rcu_needs_cpu_flush(void)
1058{
1059 int cpu = smp_processor_id();
1060 unsigned long flags;
1061
1062 if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1063 return;
1064 local_irq_save(flags);
1065 (void)rcu_needs_cpu(cpu);
1066 local_irq_restore(flags);
1067}
1068
1069#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */