aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-19 20:12:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 13:10:18 -0400
commit5928a2b60cfdbad730f93696acab142d0b607280 (patch)
tree49bb21c9219673e61bad7a7c9202c7f25f5fe1be /kernel
parent5ed59af85077d28875a3a137b21933aaf1b4cd50 (diff)
parentbdd4431c8d071491a68a65d9457996f222b5ecd3 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU changes for v3.4 from Ingo Molnar. The major features of this series are: - making RCU more aggressive about entering dyntick-idle mode in order to improve energy efficiency - converting a few more call_rcu()s to kfree_rcu()s - applying a number of rcutree fixes and cleanups to rcutiny - removing CONFIG_SMP #ifdefs from treercu - allowing RCU CPU stall times to be set via sysfs - adding CPU-stall capability to rcutorture - adding more RCU-abuse diagnostics - updating documentation - fixing yet more issues located by the still-ongoing top-to-bottom inspection of RCU, this time with a special focus on the CPU-hotplug code path. * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (48 commits) rcu: Stop spurious warnings from synchronize_sched_expedited rcu: Hold off RCU_FAST_NO_HZ after timer posted rcu: Eliminate softirq-mediated RCU_FAST_NO_HZ idle-entry loop rcu: Add RCU_NONIDLE() for idle-loop RCU read-side critical sections rcu: Allow nesting of rcu_idle_enter() and rcu_idle_exit() rcu: Remove redundant check for rcu_head misalignment PTR_ERR should be called before its argument is cleared. rcu: Convert WARN_ON_ONCE() in rcu_lock_acquire() to lockdep rcu: Trace only after NULL-pointer check rcu: Call out dangers of expedited RCU primitives rcu: Rework detection of use of RCU by offline CPUs lockdep: Add CPU-idle/offline warning to lockdep-RCU splat rcu: No interrupt disabling for rcu_prepare_for_idle() rcu: Move synchronize_sched_expedited() to rcutree.c rcu: Check for illegal use of RCU from offlined CPUs rcu: Update stall-warning documentation rcu: Add CPU-stall capability to rcutorture rcu: Make documentation give more realistic rcutorture duration rcutorture: Permit holding off CPU-hotplug operations during boot rcu: Print scheduling-clock information on RCU CPU stall-warning messages ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/lockdep.c8
-rw-r--r--kernel/rcu.h26
-rw-r--r--kernel/rcupdate.c5
-rw-r--r--kernel/rcutiny.c26
-rw-r--r--kernel/rcutiny_plugin.h77
-rw-r--r--kernel/rcutorture.c91
-rw-r--r--kernel/rcutree.c507
-rw-r--r--kernel/rcutree.h27
-rw-r--r--kernel/rcutree_plugin.h450
-rw-r--r--kernel/rcutree_trace.c12
-rw-r--r--kernel/srcu.c33
11 files changed, 893 insertions, 369 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8889f7dd7c46..ea9ee4518c35 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4176,7 +4176,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4176 printk("-------------------------------\n"); 4176 printk("-------------------------------\n");
4177 printk("%s:%d %s!\n", file, line, s); 4177 printk("%s:%d %s!\n", file, line, s);
4178 printk("\nother info that might help us debug this:\n\n"); 4178 printk("\nother info that might help us debug this:\n\n");
4179 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); 4179 printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
4180 !rcu_lockdep_current_cpu_online()
4181 ? "RCU used illegally from offline CPU!\n"
4182 : rcu_is_cpu_idle()
4183 ? "RCU used illegally from idle CPU!\n"
4184 : "",
4185 rcu_scheduler_active, debug_locks);
4180 4186
4181 /* 4187 /*
4182 * If a CPU is in the RCU-free window in idle (ie: in the section 4188 * If a CPU is in the RCU-free window in idle (ie: in the section
diff --git a/kernel/rcu.h b/kernel/rcu.h
index aa88baab5f78..8ba99cdc6515 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -33,8 +33,27 @@
33 * Process-level increment to ->dynticks_nesting field. This allows for 33 * Process-level increment to ->dynticks_nesting field. This allows for
34 * architectures that use half-interrupts and half-exceptions from 34 * architectures that use half-interrupts and half-exceptions from
35 * process context. 35 * process context.
36 *
37 * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH
38 * that counts the number of process-based reasons why RCU cannot
39 * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE
40 * is the value used to increment or decrement this field.
41 *
42 * The rest of the bits could in principle be used to count interrupts,
43 * but this would mean that a negative-one value in the interrupt
44 * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field.
45 * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK
46 * that is set to DYNTICK_TASK_FLAG upon initial exit from idle.
47 * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon
48 * initial exit from idle.
36 */ 49 */
37#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) 50#define DYNTICK_TASK_NEST_WIDTH 7
51#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1)
52#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1)
53#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2)
54#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3)
55#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \
56 DYNTICK_TASK_FLAG)
38 57
39/* 58/*
40 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally 59 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
@@ -50,7 +69,6 @@ extern struct debug_obj_descr rcuhead_debug_descr;
50 69
51static inline void debug_rcu_head_queue(struct rcu_head *head) 70static inline void debug_rcu_head_queue(struct rcu_head *head)
52{ 71{
53 WARN_ON_ONCE((unsigned long)head & 0x3);
54 debug_object_activate(head, &rcuhead_debug_descr); 72 debug_object_activate(head, &rcuhead_debug_descr);
55 debug_object_active_state(head, &rcuhead_debug_descr, 73 debug_object_active_state(head, &rcuhead_debug_descr,
56 STATE_RCU_HEAD_READY, 74 STATE_RCU_HEAD_READY,
@@ -76,16 +94,18 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
76 94
77extern void kfree(const void *); 95extern void kfree(const void *);
78 96
79static inline void __rcu_reclaim(char *rn, struct rcu_head *head) 97static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
80{ 98{
81 unsigned long offset = (unsigned long)head->func; 99 unsigned long offset = (unsigned long)head->func;
82 100
83 if (__is_kfree_rcu_offset(offset)) { 101 if (__is_kfree_rcu_offset(offset)) {
84 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); 102 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
85 kfree((void *)head - offset); 103 kfree((void *)head - offset);
104 return 1;
86 } else { 105 } else {
87 RCU_TRACE(trace_rcu_invoke_callback(rn, head)); 106 RCU_TRACE(trace_rcu_invoke_callback(rn, head));
88 head->func(head); 107 head->func(head);
108 return 0;
89 } 109 }
90} 110}
91 111
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2bc4e135ff23..a86f1741cc27 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -88,6 +88,9 @@ EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
88 * section. 88 * section.
89 * 89 *
90 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 90 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
91 *
92 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
93 * offline from an RCU perspective, so check for those as well.
91 */ 94 */
92int rcu_read_lock_bh_held(void) 95int rcu_read_lock_bh_held(void)
93{ 96{
@@ -95,6 +98,8 @@ int rcu_read_lock_bh_held(void)
95 return 1; 98 return 1;
96 if (rcu_is_cpu_idle()) 99 if (rcu_is_cpu_idle())
97 return 0; 100 return 0;
101 if (!rcu_lockdep_current_cpu_online())
102 return 0;
98 return in_softirq() || irqs_disabled(); 103 return in_softirq() || irqs_disabled();
99} 104}
100EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); 105EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 977296dca0a4..37a5444204d2 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -53,7 +53,7 @@ static void __call_rcu(struct rcu_head *head,
53 53
54#include "rcutiny_plugin.h" 54#include "rcutiny_plugin.h"
55 55
56static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; 56static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
57 57
58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(long long oldval) 59static void rcu_idle_enter_common(long long oldval)
@@ -88,10 +88,16 @@ void rcu_idle_enter(void)
88 88
89 local_irq_save(flags); 89 local_irq_save(flags);
90 oldval = rcu_dynticks_nesting; 90 oldval = rcu_dynticks_nesting;
91 rcu_dynticks_nesting = 0; 91 WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0);
92 if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) ==
93 DYNTICK_TASK_NEST_VALUE)
94 rcu_dynticks_nesting = 0;
95 else
96 rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
92 rcu_idle_enter_common(oldval); 97 rcu_idle_enter_common(oldval);
93 local_irq_restore(flags); 98 local_irq_restore(flags);
94} 99}
100EXPORT_SYMBOL_GPL(rcu_idle_enter);
95 101
96/* 102/*
97 * Exit an interrupt handler towards idle. 103 * Exit an interrupt handler towards idle.
@@ -140,11 +146,15 @@ void rcu_idle_exit(void)
140 146
141 local_irq_save(flags); 147 local_irq_save(flags);
142 oldval = rcu_dynticks_nesting; 148 oldval = rcu_dynticks_nesting;
143 WARN_ON_ONCE(oldval != 0); 149 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
144 rcu_dynticks_nesting = DYNTICK_TASK_NESTING; 150 if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK)
151 rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
152 else
153 rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
145 rcu_idle_exit_common(oldval); 154 rcu_idle_exit_common(oldval);
146 local_irq_restore(flags); 155 local_irq_restore(flags);
147} 156}
157EXPORT_SYMBOL_GPL(rcu_idle_exit);
148 158
149/* 159/*
150 * Enter an interrupt handler, moving away from idle. 160 * Enter an interrupt handler, moving away from idle.
@@ -258,7 +268,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
258 268
259 /* If no RCU callbacks ready to invoke, just return. */ 269 /* If no RCU callbacks ready to invoke, just return. */
260 if (&rcp->rcucblist == rcp->donetail) { 270 if (&rcp->rcucblist == rcp->donetail) {
261 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); 271 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
262 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, 272 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
263 ACCESS_ONCE(rcp->rcucblist), 273 ACCESS_ONCE(rcp->rcucblist),
264 need_resched(), 274 need_resched(),
@@ -269,7 +279,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
269 279
270 /* Move the ready-to-invoke callbacks to a local list. */ 280 /* Move the ready-to-invoke callbacks to a local list. */
271 local_irq_save(flags); 281 local_irq_save(flags);
272 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); 282 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
273 list = rcp->rcucblist; 283 list = rcp->rcucblist;
274 rcp->rcucblist = *rcp->donetail; 284 rcp->rcucblist = *rcp->donetail;
275 *rcp->donetail = NULL; 285 *rcp->donetail = NULL;
@@ -319,6 +329,10 @@ static void rcu_process_callbacks(struct softirq_action *unused)
319 */ 329 */
320void synchronize_sched(void) 330void synchronize_sched(void)
321{ 331{
332 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
333 !lock_is_held(&rcu_lock_map) &&
334 !lock_is_held(&rcu_sched_lock_map),
335 "Illegal synchronize_sched() in RCU read-side critical section");
322 cond_resched(); 336 cond_resched();
323} 337}
324EXPORT_SYMBOL_GPL(synchronize_sched); 338EXPORT_SYMBOL_GPL(synchronize_sched);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 9cb1ae4aabdd..22ecea0dfb62 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -132,6 +132,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
132 RCU_TRACE(.rcb.name = "rcu_preempt") 132 RCU_TRACE(.rcb.name = "rcu_preempt")
133}; 133};
134 134
135static void rcu_read_unlock_special(struct task_struct *t);
135static int rcu_preempted_readers_exp(void); 136static int rcu_preempted_readers_exp(void);
136static void rcu_report_exp_done(void); 137static void rcu_report_exp_done(void);
137 138
@@ -146,6 +147,16 @@ static int rcu_cpu_blocking_cur_gp(void)
146/* 147/*
147 * Check for a running RCU reader. Because there is only one CPU, 148 * Check for a running RCU reader. Because there is only one CPU,
148 * there can be but one running RCU reader at a time. ;-) 149 * there can be but one running RCU reader at a time. ;-)
150 *
151 * Returns zero if there are no running readers. Returns a positive
152 * number if there is at least one reader within its RCU read-side
153 * critical section. Returns a negative number if an outermost reader
154 * is in the midst of exiting from its RCU read-side critical section
155 *
156 * Returns zero if there are no running readers. Returns a positive
157 * number if there is at least one reader within its RCU read-side
158 * critical section. Returns a negative number if an outermost reader
159 * is in the midst of exiting from its RCU read-side critical section.
149 */ 160 */
150static int rcu_preempt_running_reader(void) 161static int rcu_preempt_running_reader(void)
151{ 162{
@@ -307,7 +318,6 @@ static int rcu_boost(void)
307 t = container_of(tb, struct task_struct, rcu_node_entry); 318 t = container_of(tb, struct task_struct, rcu_node_entry);
308 rt_mutex_init_proxy_locked(&mtx, t); 319 rt_mutex_init_proxy_locked(&mtx, t);
309 t->rcu_boost_mutex = &mtx; 320 t->rcu_boost_mutex = &mtx;
310 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED;
311 raw_local_irq_restore(flags); 321 raw_local_irq_restore(flags);
312 rt_mutex_lock(&mtx); 322 rt_mutex_lock(&mtx);
313 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 323 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
@@ -475,7 +485,7 @@ void rcu_preempt_note_context_switch(void)
475 unsigned long flags; 485 unsigned long flags;
476 486
477 local_irq_save(flags); /* must exclude scheduler_tick(). */ 487 local_irq_save(flags); /* must exclude scheduler_tick(). */
478 if (rcu_preempt_running_reader() && 488 if (rcu_preempt_running_reader() > 0 &&
479 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 489 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
480 490
481 /* Possibly blocking in an RCU read-side critical section. */ 491 /* Possibly blocking in an RCU read-side critical section. */
@@ -494,6 +504,13 @@ void rcu_preempt_note_context_switch(void)
494 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); 504 list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
495 if (rcu_cpu_blocking_cur_gp()) 505 if (rcu_cpu_blocking_cur_gp())
496 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; 506 rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
507 } else if (rcu_preempt_running_reader() < 0 &&
508 t->rcu_read_unlock_special) {
509 /*
510 * Complete exit from RCU read-side critical section on
511 * behalf of preempted instance of __rcu_read_unlock().
512 */
513 rcu_read_unlock_special(t);
497 } 514 }
498 515
499 /* 516 /*
@@ -526,12 +543,15 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock);
526 * notify RCU core processing or task having blocked during the RCU 543 * notify RCU core processing or task having blocked during the RCU
527 * read-side critical section. 544 * read-side critical section.
528 */ 545 */
529static void rcu_read_unlock_special(struct task_struct *t) 546static noinline void rcu_read_unlock_special(struct task_struct *t)
530{ 547{
531 int empty; 548 int empty;
532 int empty_exp; 549 int empty_exp;
533 unsigned long flags; 550 unsigned long flags;
534 struct list_head *np; 551 struct list_head *np;
552#ifdef CONFIG_RCU_BOOST
553 struct rt_mutex *rbmp = NULL;
554#endif /* #ifdef CONFIG_RCU_BOOST */
535 int special; 555 int special;
536 556
537 /* 557 /*
@@ -552,7 +572,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
552 rcu_preempt_cpu_qs(); 572 rcu_preempt_cpu_qs();
553 573
554 /* Hardware IRQ handlers cannot block. */ 574 /* Hardware IRQ handlers cannot block. */
555 if (in_irq()) { 575 if (in_irq() || in_serving_softirq()) {
556 local_irq_restore(flags); 576 local_irq_restore(flags);
557 return; 577 return;
558 } 578 }
@@ -597,10 +617,10 @@ static void rcu_read_unlock_special(struct task_struct *t)
597 } 617 }
598#ifdef CONFIG_RCU_BOOST 618#ifdef CONFIG_RCU_BOOST
599 /* Unboost self if was boosted. */ 619 /* Unboost self if was boosted. */
600 if (special & RCU_READ_UNLOCK_BOOSTED) { 620 if (t->rcu_boost_mutex != NULL) {
601 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; 621 rbmp = t->rcu_boost_mutex;
602 rt_mutex_unlock(t->rcu_boost_mutex);
603 t->rcu_boost_mutex = NULL; 622 t->rcu_boost_mutex = NULL;
623 rt_mutex_unlock(rbmp);
604 } 624 }
605#endif /* #ifdef CONFIG_RCU_BOOST */ 625#endif /* #ifdef CONFIG_RCU_BOOST */
606 local_irq_restore(flags); 626 local_irq_restore(flags);
@@ -618,13 +638,22 @@ void __rcu_read_unlock(void)
618 struct task_struct *t = current; 638 struct task_struct *t = current;
619 639
620 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ 640 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
621 --t->rcu_read_lock_nesting; 641 if (t->rcu_read_lock_nesting != 1)
622 barrier(); /* decrement before load of ->rcu_read_unlock_special */ 642 --t->rcu_read_lock_nesting;
623 if (t->rcu_read_lock_nesting == 0 && 643 else {
624 unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 644 t->rcu_read_lock_nesting = INT_MIN;
625 rcu_read_unlock_special(t); 645 barrier(); /* assign before ->rcu_read_unlock_special load */
646 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
647 rcu_read_unlock_special(t);
648 barrier(); /* ->rcu_read_unlock_special load before assign */
649 t->rcu_read_lock_nesting = 0;
650 }
626#ifdef CONFIG_PROVE_LOCKING 651#ifdef CONFIG_PROVE_LOCKING
627 WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); 652 {
653 int rrln = ACCESS_ONCE(t->rcu_read_lock_nesting);
654
655 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
656 }
628#endif /* #ifdef CONFIG_PROVE_LOCKING */ 657#endif /* #ifdef CONFIG_PROVE_LOCKING */
629} 658}
630EXPORT_SYMBOL_GPL(__rcu_read_unlock); 659EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -649,7 +678,7 @@ static void rcu_preempt_check_callbacks(void)
649 invoke_rcu_callbacks(); 678 invoke_rcu_callbacks();
650 if (rcu_preempt_gp_in_progress() && 679 if (rcu_preempt_gp_in_progress() &&
651 rcu_cpu_blocking_cur_gp() && 680 rcu_cpu_blocking_cur_gp() &&
652 rcu_preempt_running_reader()) 681 rcu_preempt_running_reader() > 0)
653 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 682 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
654} 683}
655 684
@@ -706,6 +735,11 @@ EXPORT_SYMBOL_GPL(call_rcu);
706 */ 735 */
707void synchronize_rcu(void) 736void synchronize_rcu(void)
708{ 737{
738 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
739 !lock_is_held(&rcu_lock_map) &&
740 !lock_is_held(&rcu_sched_lock_map),
741 "Illegal synchronize_rcu() in RCU read-side critical section");
742
709#ifdef CONFIG_DEBUG_LOCK_ALLOC 743#ifdef CONFIG_DEBUG_LOCK_ALLOC
710 if (!rcu_scheduler_active) 744 if (!rcu_scheduler_active)
711 return; 745 return;
@@ -882,7 +916,8 @@ static void rcu_preempt_process_callbacks(void)
882static void invoke_rcu_callbacks(void) 916static void invoke_rcu_callbacks(void)
883{ 917{
884 have_rcu_kthread_work = 1; 918 have_rcu_kthread_work = 1;
885 wake_up(&rcu_kthread_wq); 919 if (rcu_kthread_task != NULL)
920 wake_up(&rcu_kthread_wq);
886} 921}
887 922
888#ifdef CONFIG_RCU_TRACE 923#ifdef CONFIG_RCU_TRACE
@@ -943,12 +978,16 @@ early_initcall(rcu_spawn_kthreads);
943 978
944#else /* #ifdef CONFIG_RCU_BOOST */ 979#else /* #ifdef CONFIG_RCU_BOOST */
945 980
981/* Hold off callback invocation until early_initcall() time. */
982static int rcu_scheduler_fully_active __read_mostly;
983
946/* 984/*
947 * Start up softirq processing of callbacks. 985 * Start up softirq processing of callbacks.
948 */ 986 */
949void invoke_rcu_callbacks(void) 987void invoke_rcu_callbacks(void)
950{ 988{
951 raise_softirq(RCU_SOFTIRQ); 989 if (rcu_scheduler_fully_active)
990 raise_softirq(RCU_SOFTIRQ);
952} 991}
953 992
954#ifdef CONFIG_RCU_TRACE 993#ifdef CONFIG_RCU_TRACE
@@ -963,10 +1002,14 @@ static bool rcu_is_callbacks_kthread(void)
963 1002
964#endif /* #ifdef CONFIG_RCU_TRACE */ 1003#endif /* #ifdef CONFIG_RCU_TRACE */
965 1004
966void rcu_init(void) 1005static int __init rcu_scheduler_really_started(void)
967{ 1006{
1007 rcu_scheduler_fully_active = 1;
968 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1008 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1009 raise_softirq(RCU_SOFTIRQ); /* Invoke any callbacks from early boot. */
1010 return 0;
969} 1011}
1012early_initcall(rcu_scheduler_really_started);
970 1013
971#endif /* #else #ifdef CONFIG_RCU_BOOST */ 1014#endif /* #else #ifdef CONFIG_RCU_BOOST */
972 1015
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index a58ac285fc69..a89b381a8c6e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -65,7 +65,10 @@ static int fqs_duration; /* Duration of bursts (us), 0 to disable. */
65static int fqs_holdoff; /* Hold time within burst (us). */ 65static int fqs_holdoff; /* Hold time within burst (us). */
66static int fqs_stutter = 3; /* Wait time between bursts (s). */ 66static int fqs_stutter = 3; /* Wait time between bursts (s). */
67static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ 67static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */
68static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */
68static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ 69static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */
70static int stall_cpu; /* CPU-stall duration (s). 0 for no stall. */
71static int stall_cpu_holdoff = 10; /* Time to wait until stall (s). */
69static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ 72static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */
70static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ 73static int test_boost_interval = 7; /* Interval between boost tests, seconds. */
71static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ 74static int test_boost_duration = 4; /* Duration of each boost test, seconds. */
@@ -95,8 +98,14 @@ module_param(fqs_stutter, int, 0444);
95MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); 98MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
96module_param(onoff_interval, int, 0444); 99module_param(onoff_interval, int, 0444);
97MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); 100MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable");
101module_param(onoff_holdoff, int, 0444);
102MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)");
98module_param(shutdown_secs, int, 0444); 103module_param(shutdown_secs, int, 0444);
99MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable."); 104MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), zero to disable.");
105module_param(stall_cpu, int, 0444);
106MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable.");
107module_param(stall_cpu_holdoff, int, 0444);
108MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s).");
100module_param(test_boost, int, 0444); 109module_param(test_boost, int, 0444);
101MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); 110MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes.");
102module_param(test_boost_interval, int, 0444); 111module_param(test_boost_interval, int, 0444);
@@ -129,6 +138,7 @@ static struct task_struct *shutdown_task;
129#ifdef CONFIG_HOTPLUG_CPU 138#ifdef CONFIG_HOTPLUG_CPU
130static struct task_struct *onoff_task; 139static struct task_struct *onoff_task;
131#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 140#endif /* #ifdef CONFIG_HOTPLUG_CPU */
141static struct task_struct *stall_task;
132 142
133#define RCU_TORTURE_PIPE_LEN 10 143#define RCU_TORTURE_PIPE_LEN 10
134 144
@@ -990,12 +1000,12 @@ static void rcu_torture_timer(unsigned long unused)
990 rcu_read_lock_bh_held() || 1000 rcu_read_lock_bh_held() ||
991 rcu_read_lock_sched_held() || 1001 rcu_read_lock_sched_held() ||
992 srcu_read_lock_held(&srcu_ctl)); 1002 srcu_read_lock_held(&srcu_ctl));
993 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
994 if (p == NULL) { 1003 if (p == NULL) {
995 /* Leave because rcu_torture_writer is not yet underway */ 1004 /* Leave because rcu_torture_writer is not yet underway */
996 cur_ops->readunlock(idx); 1005 cur_ops->readunlock(idx);
997 return; 1006 return;
998 } 1007 }
1008 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
999 if (p->rtort_mbtest == 0) 1009 if (p->rtort_mbtest == 0)
1000 atomic_inc(&n_rcu_torture_mberror); 1010 atomic_inc(&n_rcu_torture_mberror);
1001 spin_lock(&rand_lock); 1011 spin_lock(&rand_lock);
@@ -1053,13 +1063,13 @@ rcu_torture_reader(void *arg)
1053 rcu_read_lock_bh_held() || 1063 rcu_read_lock_bh_held() ||
1054 rcu_read_lock_sched_held() || 1064 rcu_read_lock_sched_held() ||
1055 srcu_read_lock_held(&srcu_ctl)); 1065 srcu_read_lock_held(&srcu_ctl));
1056 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1057 if (p == NULL) { 1066 if (p == NULL) {
1058 /* Wait for rcu_torture_writer to get underway */ 1067 /* Wait for rcu_torture_writer to get underway */
1059 cur_ops->readunlock(idx); 1068 cur_ops->readunlock(idx);
1060 schedule_timeout_interruptible(HZ); 1069 schedule_timeout_interruptible(HZ);
1061 continue; 1070 continue;
1062 } 1071 }
1072 do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu);
1063 if (p->rtort_mbtest == 0) 1073 if (p->rtort_mbtest == 0)
1064 atomic_inc(&n_rcu_torture_mberror); 1074 atomic_inc(&n_rcu_torture_mberror);
1065 cur_ops->read_delay(&rand); 1075 cur_ops->read_delay(&rand);
@@ -1300,13 +1310,13 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
1300 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " 1310 "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
1301 "test_boost=%d/%d test_boost_interval=%d " 1311 "test_boost=%d/%d test_boost_interval=%d "
1302 "test_boost_duration=%d shutdown_secs=%d " 1312 "test_boost_duration=%d shutdown_secs=%d "
1303 "onoff_interval=%d\n", 1313 "onoff_interval=%d onoff_holdoff=%d\n",
1304 torture_type, tag, nrealreaders, nfakewriters, 1314 torture_type, tag, nrealreaders, nfakewriters,
1305 stat_interval, verbose, test_no_idle_hz, shuffle_interval, 1315 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
1306 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, 1316 stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
1307 test_boost, cur_ops->can_boost, 1317 test_boost, cur_ops->can_boost,
1308 test_boost_interval, test_boost_duration, shutdown_secs, 1318 test_boost_interval, test_boost_duration, shutdown_secs,
1309 onoff_interval); 1319 onoff_interval, onoff_holdoff);
1310} 1320}
1311 1321
1312static struct notifier_block rcutorture_shutdown_nb = { 1322static struct notifier_block rcutorture_shutdown_nb = {
@@ -1410,6 +1420,11 @@ rcu_torture_onoff(void *arg)
1410 for_each_online_cpu(cpu) 1420 for_each_online_cpu(cpu)
1411 maxcpu = cpu; 1421 maxcpu = cpu;
1412 WARN_ON(maxcpu < 0); 1422 WARN_ON(maxcpu < 0);
1423 if (onoff_holdoff > 0) {
1424 VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff");
1425 schedule_timeout_interruptible(onoff_holdoff * HZ);
1426 VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff");
1427 }
1413 while (!kthread_should_stop()) { 1428 while (!kthread_should_stop()) {
1414 cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); 1429 cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1);
1415 if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { 1430 if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) {
@@ -1450,12 +1465,15 @@ rcu_torture_onoff(void *arg)
1450static int __cpuinit 1465static int __cpuinit
1451rcu_torture_onoff_init(void) 1466rcu_torture_onoff_init(void)
1452{ 1467{
1468 int ret;
1469
1453 if (onoff_interval <= 0) 1470 if (onoff_interval <= 0)
1454 return 0; 1471 return 0;
1455 onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); 1472 onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff");
1456 if (IS_ERR(onoff_task)) { 1473 if (IS_ERR(onoff_task)) {
1474 ret = PTR_ERR(onoff_task);
1457 onoff_task = NULL; 1475 onoff_task = NULL;
1458 return PTR_ERR(onoff_task); 1476 return ret;
1459 } 1477 }
1460 return 0; 1478 return 0;
1461} 1479}
@@ -1481,6 +1499,63 @@ static void rcu_torture_onoff_cleanup(void)
1481 1499
1482#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ 1500#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
1483 1501
1502/*
1503 * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
1504 * induces a CPU stall for the time specified by stall_cpu.
1505 */
1506static int __cpuinit rcu_torture_stall(void *args)
1507{
1508 unsigned long stop_at;
1509
1510 VERBOSE_PRINTK_STRING("rcu_torture_stall task started");
1511 if (stall_cpu_holdoff > 0) {
1512 VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff");
1513 schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
1514 VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff");
1515 }
1516 if (!kthread_should_stop()) {
1517 stop_at = get_seconds() + stall_cpu;
1518 /* RCU CPU stall is expected behavior in following code. */
1519 printk(KERN_ALERT "rcu_torture_stall start.\n");
1520 rcu_read_lock();
1521 preempt_disable();
1522 while (ULONG_CMP_LT(get_seconds(), stop_at))
1523 continue; /* Induce RCU CPU stall warning. */
1524 preempt_enable();
1525 rcu_read_unlock();
1526 printk(KERN_ALERT "rcu_torture_stall end.\n");
1527 }
1528 rcutorture_shutdown_absorb("rcu_torture_stall");
1529 while (!kthread_should_stop())
1530 schedule_timeout_interruptible(10 * HZ);
1531 return 0;
1532}
1533
1534/* Spawn CPU-stall kthread, if stall_cpu specified. */
1535static int __init rcu_torture_stall_init(void)
1536{
1537 int ret;
1538
1539 if (stall_cpu <= 0)
1540 return 0;
1541 stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall");
1542 if (IS_ERR(stall_task)) {
1543 ret = PTR_ERR(stall_task);
1544 stall_task = NULL;
1545 return ret;
1546 }
1547 return 0;
1548}
1549
1550/* Clean up after the CPU-stall kthread, if one was spawned. */
1551static void rcu_torture_stall_cleanup(void)
1552{
1553 if (stall_task == NULL)
1554 return;
1555 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task.");
1556 kthread_stop(stall_task);
1557}
1558
1484static int rcutorture_cpu_notify(struct notifier_block *self, 1559static int rcutorture_cpu_notify(struct notifier_block *self,
1485 unsigned long action, void *hcpu) 1560 unsigned long action, void *hcpu)
1486{ 1561{
@@ -1523,6 +1598,7 @@ rcu_torture_cleanup(void)
1523 fullstop = FULLSTOP_RMMOD; 1598 fullstop = FULLSTOP_RMMOD;
1524 mutex_unlock(&fullstop_mutex); 1599 mutex_unlock(&fullstop_mutex);
1525 unregister_reboot_notifier(&rcutorture_shutdown_nb); 1600 unregister_reboot_notifier(&rcutorture_shutdown_nb);
1601 rcu_torture_stall_cleanup();
1526 if (stutter_task) { 1602 if (stutter_task) {
1527 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); 1603 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
1528 kthread_stop(stutter_task); 1604 kthread_stop(stutter_task);
@@ -1602,6 +1678,10 @@ rcu_torture_cleanup(void)
1602 cur_ops->cleanup(); 1678 cur_ops->cleanup();
1603 if (atomic_read(&n_rcu_torture_error)) 1679 if (atomic_read(&n_rcu_torture_error))
1604 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); 1680 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
1681 else if (n_online_successes != n_online_attempts ||
1682 n_offline_successes != n_offline_attempts)
1683 rcu_torture_print_module_parms(cur_ops,
1684 "End of test: RCU_HOTPLUG");
1605 else 1685 else
1606 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); 1686 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
1607} 1687}
@@ -1819,6 +1899,7 @@ rcu_torture_init(void)
1819 } 1899 }
1820 rcu_torture_onoff_init(); 1900 rcu_torture_onoff_init();
1821 register_reboot_notifier(&rcutorture_shutdown_nb); 1901 register_reboot_notifier(&rcutorture_shutdown_nb);
1902 rcu_torture_stall_init();
1822 rcutorture_record_test_transition(); 1903 rcutorture_record_test_transition();
1823 mutex_unlock(&fullstop_mutex); 1904 mutex_unlock(&fullstop_mutex);
1824 return 0; 1905 return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6c4a6722abfd..1050d6d3922c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -50,6 +50,8 @@
50#include <linux/wait.h> 50#include <linux/wait.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/prefetch.h> 52#include <linux/prefetch.h>
53#include <linux/delay.h>
54#include <linux/stop_machine.h>
53 55
54#include "rcutree.h" 56#include "rcutree.h"
55#include <trace/events/rcu.h> 57#include <trace/events/rcu.h>
@@ -196,7 +198,7 @@ void rcu_note_context_switch(int cpu)
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 198EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 199
198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 200DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
199 .dynticks_nesting = DYNTICK_TASK_NESTING, 201 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
200 .dynticks = ATOMIC_INIT(1), 202 .dynticks = ATOMIC_INIT(1),
201}; 203};
202 204
@@ -208,8 +210,11 @@ module_param(blimit, int, 0);
208module_param(qhimark, int, 0); 210module_param(qhimark, int, 0);
209module_param(qlowmark, int, 0); 211module_param(qlowmark, int, 0);
210 212
211int rcu_cpu_stall_suppress __read_mostly; 213int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
214int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
215
212module_param(rcu_cpu_stall_suppress, int, 0644); 216module_param(rcu_cpu_stall_suppress, int, 0644);
217module_param(rcu_cpu_stall_timeout, int, 0644);
213 218
214static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 219static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
215static int rcu_pending(int cpu); 220static int rcu_pending(int cpu);
@@ -301,8 +306,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
301 return &rsp->node[0]; 306 return &rsp->node[0];
302} 307}
303 308
304#ifdef CONFIG_SMP
305
306/* 309/*
307 * If the specified CPU is offline, tell the caller that it is in 310 * If the specified CPU is offline, tell the caller that it is in
308 * a quiescent state. Otherwise, whack it with a reschedule IPI. 311 * a quiescent state. Otherwise, whack it with a reschedule IPI.
@@ -317,30 +320,21 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
317static int rcu_implicit_offline_qs(struct rcu_data *rdp) 320static int rcu_implicit_offline_qs(struct rcu_data *rdp)
318{ 321{
319 /* 322 /*
320 * If the CPU is offline, it is in a quiescent state. We can 323 * If the CPU is offline for more than a jiffy, it is in a quiescent
321 * trust its state not to change because interrupts are disabled. 324 * state. We can trust its state not to change because interrupts
325 * are disabled. The reason for the jiffy's worth of slack is to
326 * handle CPUs initializing on the way up and finding their way
327 * to the idle loop on the way down.
322 */ 328 */
323 if (cpu_is_offline(rdp->cpu)) { 329 if (cpu_is_offline(rdp->cpu) &&
330 ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
324 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 331 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
325 rdp->offline_fqs++; 332 rdp->offline_fqs++;
326 return 1; 333 return 1;
327 } 334 }
328
329 /*
330 * The CPU is online, so send it a reschedule IPI. This forces
331 * it through the scheduler, and (inefficiently) also handles cases
332 * where idle loops fail to inform RCU about the CPU being idle.
333 */
334 if (rdp->cpu != smp_processor_id())
335 smp_send_reschedule(rdp->cpu);
336 else
337 set_need_resched();
338 rdp->resched_ipi++;
339 return 0; 335 return 0;
340} 336}
341 337
342#endif /* #ifdef CONFIG_SMP */
343
344/* 338/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle 339 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 * 340 *
@@ -366,6 +360,17 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
366 atomic_inc(&rdtp->dynticks); 360 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 361 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 362 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
363
364 /*
365 * The idle task is not permitted to enter the idle loop while
366 * in an RCU read-side critical section.
367 */
368 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
369 "Illegal idle entry in RCU read-side critical section.");
370 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
371 "Illegal idle entry in RCU-bh read-side critical section.");
372 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
373 "Illegal idle entry in RCU-sched read-side critical section.");
369} 374}
370 375
371/** 376/**
@@ -389,10 +394,15 @@ void rcu_idle_enter(void)
389 local_irq_save(flags); 394 local_irq_save(flags);
390 rdtp = &__get_cpu_var(rcu_dynticks); 395 rdtp = &__get_cpu_var(rcu_dynticks);
391 oldval = rdtp->dynticks_nesting; 396 oldval = rdtp->dynticks_nesting;
392 rdtp->dynticks_nesting = 0; 397 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
398 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
399 rdtp->dynticks_nesting = 0;
400 else
401 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
393 rcu_idle_enter_common(rdtp, oldval); 402 rcu_idle_enter_common(rdtp, oldval);
394 local_irq_restore(flags); 403 local_irq_restore(flags);
395} 404}
405EXPORT_SYMBOL_GPL(rcu_idle_enter);
396 406
397/** 407/**
398 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 408 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -462,7 +472,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
462 * Exit idle mode, in other words, -enter- the mode in which RCU 472 * Exit idle mode, in other words, -enter- the mode in which RCU
463 * read-side critical sections can occur. 473 * read-side critical sections can occur.
464 * 474 *
465 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to 475 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
466 * allow for the possibility of usermode upcalls messing up our count 476 * allow for the possibility of usermode upcalls messing up our count
467 * of interrupt nesting level during the busy period that is just 477 * of interrupt nesting level during the busy period that is just
468 * now starting. 478 * now starting.
@@ -476,11 +486,15 @@ void rcu_idle_exit(void)
476 local_irq_save(flags); 486 local_irq_save(flags);
477 rdtp = &__get_cpu_var(rcu_dynticks); 487 rdtp = &__get_cpu_var(rcu_dynticks);
478 oldval = rdtp->dynticks_nesting; 488 oldval = rdtp->dynticks_nesting;
479 WARN_ON_ONCE(oldval != 0); 489 WARN_ON_ONCE(oldval < 0);
480 rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; 490 if (oldval & DYNTICK_TASK_NEST_MASK)
491 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
492 else
493 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
481 rcu_idle_exit_common(rdtp, oldval); 494 rcu_idle_exit_common(rdtp, oldval);
482 local_irq_restore(flags); 495 local_irq_restore(flags);
483} 496}
497EXPORT_SYMBOL_GPL(rcu_idle_exit);
484 498
485/** 499/**
486 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 500 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -581,6 +595,49 @@ int rcu_is_cpu_idle(void)
581} 595}
582EXPORT_SYMBOL(rcu_is_cpu_idle); 596EXPORT_SYMBOL(rcu_is_cpu_idle);
583 597
598#ifdef CONFIG_HOTPLUG_CPU
599
600/*
601 * Is the current CPU online? Disable preemption to avoid false positives
602 * that could otherwise happen due to the current CPU number being sampled,
603 * this task being preempted, its old CPU being taken offline, resuming
604 * on some other CPU, then determining that its old CPU is now offline.
605 * It is OK to use RCU on an offline processor during initial boot, hence
606 * the check for rcu_scheduler_fully_active. Note also that it is OK
607 * for a CPU coming online to use RCU for one jiffy prior to marking itself
608 * online in the cpu_online_mask. Similarly, it is OK for a CPU going
609 * offline to continue to use RCU for one jiffy after marking itself
610 * offline in the cpu_online_mask. This leniency is necessary given the
611 * non-atomic nature of the online and offline processing, for example,
612 * the fact that a CPU enters the scheduler after completing the CPU_DYING
613 * notifiers.
614 *
615 * This is also why RCU internally marks CPUs online during the
616 * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
617 *
618 * Disable checking if in an NMI handler because we cannot safely report
619 * errors from NMI handlers anyway.
620 */
621bool rcu_lockdep_current_cpu_online(void)
622{
623 struct rcu_data *rdp;
624 struct rcu_node *rnp;
625 bool ret;
626
627 if (in_nmi())
628 return 1;
629 preempt_disable();
630 rdp = &__get_cpu_var(rcu_sched_data);
631 rnp = rdp->mynode;
632 ret = (rdp->grpmask & rnp->qsmaskinit) ||
633 !rcu_scheduler_fully_active;
634 preempt_enable();
635 return ret;
636}
637EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
638
639#endif /* #ifdef CONFIG_HOTPLUG_CPU */
640
584#endif /* #ifdef CONFIG_PROVE_RCU */ 641#endif /* #ifdef CONFIG_PROVE_RCU */
585 642
586/** 643/**
@@ -595,8 +652,6 @@ int rcu_is_cpu_rrupt_from_idle(void)
595 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 652 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
596} 653}
597 654
598#ifdef CONFIG_SMP
599
600/* 655/*
601 * Snapshot the specified CPU's dynticks counter so that we can later 656 * Snapshot the specified CPU's dynticks counter so that we can later
602 * credit them with an implicit quiescent state. Return 1 if this CPU 657 * credit them with an implicit quiescent state. Return 1 if this CPU
@@ -640,12 +695,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
640 return rcu_implicit_offline_qs(rdp); 695 return rcu_implicit_offline_qs(rdp);
641} 696}
642 697
643#endif /* #ifdef CONFIG_SMP */ 698static int jiffies_till_stall_check(void)
699{
700 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
701
702 /*
703 * Limit check must be consistent with the Kconfig limits
704 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
705 */
706 if (till_stall_check < 3) {
707 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
708 till_stall_check = 3;
709 } else if (till_stall_check > 300) {
710 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
711 till_stall_check = 300;
712 }
713 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
714}
644 715
645static void record_gp_stall_check_time(struct rcu_state *rsp) 716static void record_gp_stall_check_time(struct rcu_state *rsp)
646{ 717{
647 rsp->gp_start = jiffies; 718 rsp->gp_start = jiffies;
648 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 719 rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
649} 720}
650 721
651static void print_other_cpu_stall(struct rcu_state *rsp) 722static void print_other_cpu_stall(struct rcu_state *rsp)
@@ -664,13 +735,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
664 raw_spin_unlock_irqrestore(&rnp->lock, flags); 735 raw_spin_unlock_irqrestore(&rnp->lock, flags);
665 return; 736 return;
666 } 737 }
667 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 738 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
668
669 /*
670 * Now rat on any tasks that got kicked up to the root rcu_node
671 * due to CPU offlining.
672 */
673 ndetected = rcu_print_task_stall(rnp);
674 raw_spin_unlock_irqrestore(&rnp->lock, flags); 739 raw_spin_unlock_irqrestore(&rnp->lock, flags);
675 740
676 /* 741 /*
@@ -678,8 +743,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
678 * See Documentation/RCU/stallwarn.txt for info on how to debug 743 * See Documentation/RCU/stallwarn.txt for info on how to debug
679 * RCU CPU stall warnings. 744 * RCU CPU stall warnings.
680 */ 745 */
681 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 746 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
682 rsp->name); 747 rsp->name);
748 print_cpu_stall_info_begin();
683 rcu_for_each_leaf_node(rsp, rnp) { 749 rcu_for_each_leaf_node(rsp, rnp) {
684 raw_spin_lock_irqsave(&rnp->lock, flags); 750 raw_spin_lock_irqsave(&rnp->lock, flags);
685 ndetected += rcu_print_task_stall(rnp); 751 ndetected += rcu_print_task_stall(rnp);
@@ -688,11 +754,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
688 continue; 754 continue;
689 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 755 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
690 if (rnp->qsmask & (1UL << cpu)) { 756 if (rnp->qsmask & (1UL << cpu)) {
691 printk(" %d", rnp->grplo + cpu); 757 print_cpu_stall_info(rsp, rnp->grplo + cpu);
692 ndetected++; 758 ndetected++;
693 } 759 }
694 } 760 }
695 printk("} (detected by %d, t=%ld jiffies)\n", 761
762 /*
763 * Now rat on any tasks that got kicked up to the root rcu_node
764 * due to CPU offlining.
765 */
766 rnp = rcu_get_root(rsp);
767 raw_spin_lock_irqsave(&rnp->lock, flags);
768 ndetected = rcu_print_task_stall(rnp);
769 raw_spin_unlock_irqrestore(&rnp->lock, flags);
770
771 print_cpu_stall_info_end();
772 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
696 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 773 smp_processor_id(), (long)(jiffies - rsp->gp_start));
697 if (ndetected == 0) 774 if (ndetected == 0)
698 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 775 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
@@ -716,15 +793,18 @@ static void print_cpu_stall(struct rcu_state *rsp)
716 * See Documentation/RCU/stallwarn.txt for info on how to debug 793 * See Documentation/RCU/stallwarn.txt for info on how to debug
717 * RCU CPU stall warnings. 794 * RCU CPU stall warnings.
718 */ 795 */
719 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 796 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
720 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 797 print_cpu_stall_info_begin();
798 print_cpu_stall_info(rsp, smp_processor_id());
799 print_cpu_stall_info_end();
800 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
721 if (!trigger_all_cpu_backtrace()) 801 if (!trigger_all_cpu_backtrace())
722 dump_stack(); 802 dump_stack();
723 803
724 raw_spin_lock_irqsave(&rnp->lock, flags); 804 raw_spin_lock_irqsave(&rnp->lock, flags);
725 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 805 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
726 rsp->jiffies_stall = 806 rsp->jiffies_stall = jiffies +
727 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 807 3 * jiffies_till_stall_check() + 3;
728 raw_spin_unlock_irqrestore(&rnp->lock, flags); 808 raw_spin_unlock_irqrestore(&rnp->lock, flags);
729 809
730 set_need_resched(); /* kick ourselves to get things going. */ 810 set_need_resched(); /* kick ourselves to get things going. */
@@ -807,6 +887,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
807 rdp->passed_quiesce = 0; 887 rdp->passed_quiesce = 0;
808 } else 888 } else
809 rdp->qs_pending = 0; 889 rdp->qs_pending = 0;
890 zero_cpu_stall_ticks(rdp);
810 } 891 }
811} 892}
812 893
@@ -943,6 +1024,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
943 * in preparation for detecting the next grace period. The caller must hold 1024 * in preparation for detecting the next grace period. The caller must hold
944 * the root node's ->lock, which is released before return. Hard irqs must 1025 * the root node's ->lock, which is released before return. Hard irqs must
945 * be disabled. 1026 * be disabled.
1027 *
1028 * Note that it is legal for a dying CPU (which is marked as offline) to
1029 * invoke this function. This can happen when the dying CPU reports its
1030 * quiescent state.
946 */ 1031 */
947static void 1032static void
948rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 1033rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
@@ -980,26 +1065,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
980 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 1065 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
981 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1066 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
982 record_gp_stall_check_time(rsp); 1067 record_gp_stall_check_time(rsp);
983
984 /* Special-case the common single-level case. */
985 if (NUM_RCU_NODES == 1) {
986 rcu_preempt_check_blocked_tasks(rnp);
987 rnp->qsmask = rnp->qsmaskinit;
988 rnp->gpnum = rsp->gpnum;
989 rnp->completed = rsp->completed;
990 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
991 rcu_start_gp_per_cpu(rsp, rnp, rdp);
992 rcu_preempt_boost_start_gp(rnp);
993 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
994 rnp->level, rnp->grplo,
995 rnp->grphi, rnp->qsmask);
996 raw_spin_unlock_irqrestore(&rnp->lock, flags);
997 return;
998 }
999
1000 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ 1068 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
1001 1069
1002
1003 /* Exclude any concurrent CPU-hotplug operations. */ 1070 /* Exclude any concurrent CPU-hotplug operations. */
1004 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1071 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
1005 1072
@@ -1245,53 +1312,115 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1245 1312
1246/* 1313/*
1247 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1314 * Move a dying CPU's RCU callbacks to online CPU's callback list.
1248 * Synchronization is not required because this function executes 1315 * Also record a quiescent state for this CPU for the current grace period.
1249 * in stop_machine() context. 1316 * Synchronization and interrupt disabling are not required because
1317 * this function executes in stop_machine() context. Therefore, cleanup
1318 * operations that might block must be done later from the CPU_DEAD
1319 * notifier.
1320 *
1321 * Note that the outgoing CPU's bit has already been cleared in the
1322 * cpu_online_mask. This allows us to randomly pick a callback
1323 * destination from the bits set in that mask.
1250 */ 1324 */
1251static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1325static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1252{ 1326{
1253 int i; 1327 int i;
1254 /* current DYING CPU is cleared in the cpu_online_mask */ 1328 unsigned long mask;
1255 int receive_cpu = cpumask_any(cpu_online_mask); 1329 int receive_cpu = cpumask_any(cpu_online_mask);
1256 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1330 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1257 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1331 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1332 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
1333
1334 /* First, adjust the counts. */
1335 if (rdp->nxtlist != NULL) {
1336 receive_rdp->qlen_lazy += rdp->qlen_lazy;
1337 receive_rdp->qlen += rdp->qlen;
1338 rdp->qlen_lazy = 0;
1339 rdp->qlen = 0;
1340 }
1258 1341
1259 if (rdp->nxtlist == NULL) 1342 /*
1260 return; /* irqs disabled, so comparison is stable. */ 1343 * Next, move ready-to-invoke callbacks to be invoked on some
1344 * other CPU. These will not be required to pass through another
1345 * grace period: They are done, regardless of CPU.
1346 */
1347 if (rdp->nxtlist != NULL &&
1348 rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
1349 struct rcu_head *oldhead;
1350 struct rcu_head **oldtail;
1351 struct rcu_head **newtail;
1352
1353 oldhead = rdp->nxtlist;
1354 oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
1355 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1356 *rdp->nxttail[RCU_DONE_TAIL] = *oldtail;
1357 *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
1358 newtail = rdp->nxttail[RCU_DONE_TAIL];
1359 for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
1360 if (receive_rdp->nxttail[i] == oldtail)
1361 receive_rdp->nxttail[i] = newtail;
1362 if (rdp->nxttail[i] == newtail)
1363 rdp->nxttail[i] = &rdp->nxtlist;
1364 }
1365 }
1261 1366
1262 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 1367 /*
1263 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1368 * Finally, put the rest of the callbacks at the end of the list.
1264 receive_rdp->qlen += rdp->qlen; 1369 * The ones that made it partway through get to start over: We
1265 receive_rdp->n_cbs_adopted += rdp->qlen; 1370 * cannot assume that grace periods are synchronized across CPUs.
1266 rdp->n_cbs_orphaned += rdp->qlen; 1371 * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
1372 * this does not seem compelling. Not yet, anyway.)
1373 */
1374 if (rdp->nxtlist != NULL) {
1375 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1376 receive_rdp->nxttail[RCU_NEXT_TAIL] =
1377 rdp->nxttail[RCU_NEXT_TAIL];
1378 receive_rdp->n_cbs_adopted += rdp->qlen;
1379 rdp->n_cbs_orphaned += rdp->qlen;
1380
1381 rdp->nxtlist = NULL;
1382 for (i = 0; i < RCU_NEXT_SIZE; i++)
1383 rdp->nxttail[i] = &rdp->nxtlist;
1384 }
1267 1385
1268 rdp->nxtlist = NULL; 1386 /*
1269 for (i = 0; i < RCU_NEXT_SIZE; i++) 1387 * Record a quiescent state for the dying CPU. This is safe
1270 rdp->nxttail[i] = &rdp->nxtlist; 1388 * only because we have already cleared out the callbacks.
1271 rdp->qlen = 0; 1389 * (Otherwise, the RCU core might try to schedule the invocation
1390 * of callbacks on this now-offline CPU, which would be bad.)
1391 */
1392 mask = rdp->grpmask; /* rnp->grplo is constant. */
1393 trace_rcu_grace_period(rsp->name,
1394 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1395 "cpuofl");
1396 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
1397 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
1272} 1398}
1273 1399
1274/* 1400/*
1275 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1401 * The CPU has been completely removed, and some other CPU is reporting
1276 * and move all callbacks from the outgoing CPU to the current one. 1402 * this fact from process context. Do the remainder of the cleanup.
1277 * There can only be one CPU hotplug operation at a time, so no other 1403 * There can only be one CPU hotplug operation at a time, so no other
1278 * CPU can be attempting to update rcu_cpu_kthread_task. 1404 * CPU can be attempting to update rcu_cpu_kthread_task.
1279 */ 1405 */
1280static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1406static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1281{ 1407{
1282 unsigned long flags; 1408 unsigned long flags;
1283 unsigned long mask; 1409 unsigned long mask;
1284 int need_report = 0; 1410 int need_report = 0;
1285 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1411 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1286 struct rcu_node *rnp; 1412 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */
1287 1413
1414 /* Adjust any no-longer-needed kthreads. */
1288 rcu_stop_cpu_kthread(cpu); 1415 rcu_stop_cpu_kthread(cpu);
1416 rcu_node_kthread_setaffinity(rnp, -1);
1417
1418 /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
1289 1419
1290 /* Exclude any attempts to start a new grace period. */ 1420 /* Exclude any attempts to start a new grace period. */
1291 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1421 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1292 1422
1293 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1423 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1294 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1295 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1424 mask = rdp->grpmask; /* rnp->grplo is constant. */
1296 do { 1425 do {
1297 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1426 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
@@ -1299,20 +1428,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1299 if (rnp->qsmaskinit != 0) { 1428 if (rnp->qsmaskinit != 0) {
1300 if (rnp != rdp->mynode) 1429 if (rnp != rdp->mynode)
1301 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1430 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1302 else
1303 trace_rcu_grace_period(rsp->name,
1304 rnp->gpnum + 1 -
1305 !!(rnp->qsmask & mask),
1306 "cpuofl");
1307 break; 1431 break;
1308 } 1432 }
1309 if (rnp == rdp->mynode) { 1433 if (rnp == rdp->mynode)
1310 trace_rcu_grace_period(rsp->name,
1311 rnp->gpnum + 1 -
1312 !!(rnp->qsmask & mask),
1313 "cpuofl");
1314 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1434 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1315 } else 1435 else
1316 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1436 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1317 mask = rnp->grpmask; 1437 mask = rnp->grpmask;
1318 rnp = rnp->parent; 1438 rnp = rnp->parent;
@@ -1332,29 +1452,15 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1332 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1452 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1333 if (need_report & RCU_OFL_TASKS_EXP_GP) 1453 if (need_report & RCU_OFL_TASKS_EXP_GP)
1334 rcu_report_exp_rnp(rsp, rnp, true); 1454 rcu_report_exp_rnp(rsp, rnp, true);
1335 rcu_node_kthread_setaffinity(rnp, -1);
1336}
1337
1338/*
1339 * Remove the specified CPU from the RCU hierarchy and move any pending
1340 * callbacks that it might have to the current CPU. This code assumes
1341 * that at least one CPU in the system will remain running at all times.
1342 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
1343 */
1344static void rcu_offline_cpu(int cpu)
1345{
1346 __rcu_offline_cpu(cpu, &rcu_sched_state);
1347 __rcu_offline_cpu(cpu, &rcu_bh_state);
1348 rcu_preempt_offline_cpu(cpu);
1349} 1455}
1350 1456
1351#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1457#else /* #ifdef CONFIG_HOTPLUG_CPU */
1352 1458
1353static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1459static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1354{ 1460{
1355} 1461}
1356 1462
1357static void rcu_offline_cpu(int cpu) 1463static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1358{ 1464{
1359} 1465}
1360 1466
@@ -1368,11 +1474,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1368{ 1474{
1369 unsigned long flags; 1475 unsigned long flags;
1370 struct rcu_head *next, *list, **tail; 1476 struct rcu_head *next, *list, **tail;
1371 int bl, count; 1477 int bl, count, count_lazy;
1372 1478
1373 /* If no callbacks are ready, just return.*/ 1479 /* If no callbacks are ready, just return.*/
1374 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1480 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1375 trace_rcu_batch_start(rsp->name, 0, 0); 1481 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1376 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1482 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1377 need_resched(), is_idle_task(current), 1483 need_resched(), is_idle_task(current),
1378 rcu_is_callbacks_kthread()); 1484 rcu_is_callbacks_kthread());
@@ -1384,8 +1490,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1384 * races with call_rcu() from interrupt handlers. 1490 * races with call_rcu() from interrupt handlers.
1385 */ 1491 */
1386 local_irq_save(flags); 1492 local_irq_save(flags);
1493 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1387 bl = rdp->blimit; 1494 bl = rdp->blimit;
1388 trace_rcu_batch_start(rsp->name, rdp->qlen, bl); 1495 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
1389 list = rdp->nxtlist; 1496 list = rdp->nxtlist;
1390 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1497 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1391 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1498 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
@@ -1396,12 +1503,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1396 local_irq_restore(flags); 1503 local_irq_restore(flags);
1397 1504
1398 /* Invoke callbacks. */ 1505 /* Invoke callbacks. */
1399 count = 0; 1506 count = count_lazy = 0;
1400 while (list) { 1507 while (list) {
1401 next = list->next; 1508 next = list->next;
1402 prefetch(next); 1509 prefetch(next);
1403 debug_rcu_head_unqueue(list); 1510 debug_rcu_head_unqueue(list);
1404 __rcu_reclaim(rsp->name, list); 1511 if (__rcu_reclaim(rsp->name, list))
1512 count_lazy++;
1405 list = next; 1513 list = next;
1406 /* Stop only if limit reached and CPU has something to do. */ 1514 /* Stop only if limit reached and CPU has something to do. */
1407 if (++count >= bl && 1515 if (++count >= bl &&
@@ -1416,6 +1524,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1416 rcu_is_callbacks_kthread()); 1524 rcu_is_callbacks_kthread());
1417 1525
1418 /* Update count, and requeue any remaining callbacks. */ 1526 /* Update count, and requeue any remaining callbacks. */
1527 rdp->qlen_lazy -= count_lazy;
1419 rdp->qlen -= count; 1528 rdp->qlen -= count;
1420 rdp->n_cbs_invoked += count; 1529 rdp->n_cbs_invoked += count;
1421 if (list != NULL) { 1530 if (list != NULL) {
@@ -1458,6 +1567,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1458void rcu_check_callbacks(int cpu, int user) 1567void rcu_check_callbacks(int cpu, int user)
1459{ 1568{
1460 trace_rcu_utilization("Start scheduler-tick"); 1569 trace_rcu_utilization("Start scheduler-tick");
1570 increment_cpu_stall_ticks();
1461 if (user || rcu_is_cpu_rrupt_from_idle()) { 1571 if (user || rcu_is_cpu_rrupt_from_idle()) {
1462 1572
1463 /* 1573 /*
@@ -1492,8 +1602,6 @@ void rcu_check_callbacks(int cpu, int user)
1492 trace_rcu_utilization("End scheduler-tick"); 1602 trace_rcu_utilization("End scheduler-tick");
1493} 1603}
1494 1604
1495#ifdef CONFIG_SMP
1496
1497/* 1605/*
1498 * Scan the leaf rcu_node structures, processing dyntick state for any that 1606 * Scan the leaf rcu_node structures, processing dyntick state for any that
1499 * have not yet encountered a quiescent state, using the function specified. 1607 * have not yet encountered a quiescent state, using the function specified.
@@ -1616,15 +1724,6 @@ unlock_fqs_ret:
1616 trace_rcu_utilization("End fqs"); 1724 trace_rcu_utilization("End fqs");
1617} 1725}
1618 1726
1619#else /* #ifdef CONFIG_SMP */
1620
1621static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1622{
1623 set_need_resched();
1624}
1625
1626#endif /* #else #ifdef CONFIG_SMP */
1627
1628/* 1727/*
1629 * This does the RCU core processing work for the specified rcu_state 1728 * This does the RCU core processing work for the specified rcu_state
1630 * and rcu_data structures. This may be called only from the CPU to 1729 * and rcu_data structures. This may be called only from the CPU to
@@ -1702,11 +1801,12 @@ static void invoke_rcu_core(void)
1702 1801
1703static void 1802static void
1704__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1803__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1705 struct rcu_state *rsp) 1804 struct rcu_state *rsp, bool lazy)
1706{ 1805{
1707 unsigned long flags; 1806 unsigned long flags;
1708 struct rcu_data *rdp; 1807 struct rcu_data *rdp;
1709 1808
1809 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
1710 debug_rcu_head_queue(head); 1810 debug_rcu_head_queue(head);
1711 head->func = func; 1811 head->func = func;
1712 head->next = NULL; 1812 head->next = NULL;
@@ -1720,18 +1820,21 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1720 * a quiescent state betweentimes. 1820 * a quiescent state betweentimes.
1721 */ 1821 */
1722 local_irq_save(flags); 1822 local_irq_save(flags);
1823 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1723 rdp = this_cpu_ptr(rsp->rda); 1824 rdp = this_cpu_ptr(rsp->rda);
1724 1825
1725 /* Add the callback to our list. */ 1826 /* Add the callback to our list. */
1726 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1827 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1727 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1828 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1728 rdp->qlen++; 1829 rdp->qlen++;
1830 if (lazy)
1831 rdp->qlen_lazy++;
1729 1832
1730 if (__is_kfree_rcu_offset((unsigned long)func)) 1833 if (__is_kfree_rcu_offset((unsigned long)func))
1731 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 1834 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
1732 rdp->qlen); 1835 rdp->qlen_lazy, rdp->qlen);
1733 else 1836 else
1734 trace_rcu_callback(rsp->name, head, rdp->qlen); 1837 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1735 1838
1736 /* If interrupts were disabled, don't dive into RCU core. */ 1839 /* If interrupts were disabled, don't dive into RCU core. */
1737 if (irqs_disabled_flags(flags)) { 1840 if (irqs_disabled_flags(flags)) {
@@ -1778,16 +1881,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1778 */ 1881 */
1779void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1882void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1780{ 1883{
1781 __call_rcu(head, func, &rcu_sched_state); 1884 __call_rcu(head, func, &rcu_sched_state, 0);
1782} 1885}
1783EXPORT_SYMBOL_GPL(call_rcu_sched); 1886EXPORT_SYMBOL_GPL(call_rcu_sched);
1784 1887
1785/* 1888/*
1786 * Queue an RCU for invocation after a quicker grace period. 1889 * Queue an RCU callback for invocation after a quicker grace period.
1787 */ 1890 */
1788void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1891void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1789{ 1892{
1790 __call_rcu(head, func, &rcu_bh_state); 1893 __call_rcu(head, func, &rcu_bh_state, 0);
1791} 1894}
1792EXPORT_SYMBOL_GPL(call_rcu_bh); 1895EXPORT_SYMBOL_GPL(call_rcu_bh);
1793 1896
@@ -1816,6 +1919,10 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1816 */ 1919 */
1817void synchronize_sched(void) 1920void synchronize_sched(void)
1818{ 1921{
1922 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
1923 !lock_is_held(&rcu_lock_map) &&
1924 !lock_is_held(&rcu_sched_lock_map),
1925 "Illegal synchronize_sched() in RCU-sched read-side critical section");
1819 if (rcu_blocking_is_gp()) 1926 if (rcu_blocking_is_gp())
1820 return; 1927 return;
1821 wait_rcu_gp(call_rcu_sched); 1928 wait_rcu_gp(call_rcu_sched);
@@ -1833,12 +1940,137 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
1833 */ 1940 */
1834void synchronize_rcu_bh(void) 1941void synchronize_rcu_bh(void)
1835{ 1942{
1943 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
1944 !lock_is_held(&rcu_lock_map) &&
1945 !lock_is_held(&rcu_sched_lock_map),
1946 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
1836 if (rcu_blocking_is_gp()) 1947 if (rcu_blocking_is_gp())
1837 return; 1948 return;
1838 wait_rcu_gp(call_rcu_bh); 1949 wait_rcu_gp(call_rcu_bh);
1839} 1950}
1840EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1951EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1841 1952
1953static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1954static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1955
1956static int synchronize_sched_expedited_cpu_stop(void *data)
1957{
1958 /*
1959 * There must be a full memory barrier on each affected CPU
1960 * between the time that try_stop_cpus() is called and the
1961 * time that it returns.
1962 *
1963 * In the current initial implementation of cpu_stop, the
1964 * above condition is already met when the control reaches
1965 * this point and the following smp_mb() is not strictly
1966 * necessary. Do smp_mb() anyway for documentation and
1967 * robustness against future implementation changes.
1968 */
1969 smp_mb(); /* See above comment block. */
1970 return 0;
1971}
1972
1973/**
1974 * synchronize_sched_expedited - Brute-force RCU-sched grace period
1975 *
1976 * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
1977 * approach to force the grace period to end quickly. This consumes
1978 * significant time on all CPUs and is unfriendly to real-time workloads,
1979 * so is thus not recommended for any sort of common-case code. In fact,
1980 * if you are using synchronize_sched_expedited() in a loop, please
1981 * restructure your code to batch your updates, and then use a single
1982 * synchronize_sched() instead.
1983 *
1984 * Note that it is illegal to call this function while holding any lock
1985 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
1986 * to call this function from a CPU-hotplug notifier. Failing to observe
1987 * these restriction will result in deadlock.
1988 *
1989 * This implementation can be thought of as an application of ticket
1990 * locking to RCU, with sync_sched_expedited_started and
1991 * sync_sched_expedited_done taking on the roles of the halves
1992 * of the ticket-lock word. Each task atomically increments
1993 * sync_sched_expedited_started upon entry, snapshotting the old value,
1994 * then attempts to stop all the CPUs. If this succeeds, then each
1995 * CPU will have executed a context switch, resulting in an RCU-sched
1996 * grace period. We are then done, so we use atomic_cmpxchg() to
1997 * update sync_sched_expedited_done to match our snapshot -- but
1998 * only if someone else has not already advanced past our snapshot.
1999 *
2000 * On the other hand, if try_stop_cpus() fails, we check the value
2001 * of sync_sched_expedited_done. If it has advanced past our
2002 * initial snapshot, then someone else must have forced a grace period
2003 * some time after we took our snapshot. In this case, our work is
2004 * done for us, and we can simply return. Otherwise, we try again,
2005 * but keep our initial snapshot for purposes of checking for someone
2006 * doing our work for us.
2007 *
2008 * If we fail too many times in a row, we fall back to synchronize_sched().
2009 */
2010void synchronize_sched_expedited(void)
2011{
2012 int firstsnap, s, snap, trycount = 0;
2013
2014 /* Note that atomic_inc_return() implies full memory barrier. */
2015 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
2016 get_online_cpus();
2017 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2018
2019 /*
2020 * Each pass through the following loop attempts to force a
2021 * context switch on each CPU.
2022 */
2023 while (try_stop_cpus(cpu_online_mask,
2024 synchronize_sched_expedited_cpu_stop,
2025 NULL) == -EAGAIN) {
2026 put_online_cpus();
2027
2028 /* No joy, try again later. Or just synchronize_sched(). */
2029 if (trycount++ < 10)
2030 udelay(trycount * num_online_cpus());
2031 else {
2032 synchronize_sched();
2033 return;
2034 }
2035
2036 /* Check to see if someone else did our work for us. */
2037 s = atomic_read(&sync_sched_expedited_done);
2038 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
2039 smp_mb(); /* ensure test happens before caller kfree */
2040 return;
2041 }
2042
2043 /*
2044 * Refetching sync_sched_expedited_started allows later
2045 * callers to piggyback on our grace period. We subtract
2046 * 1 to get the same token that the last incrementer got.
2047 * We retry after they started, so our grace period works
2048 * for them, and they started after our first try, so their
2049 * grace period works for us.
2050 */
2051 get_online_cpus();
2052 snap = atomic_read(&sync_sched_expedited_started);
2053 smp_mb(); /* ensure read is before try_stop_cpus(). */
2054 }
2055
2056 /*
2057 * Everyone up to our most recent fetch is covered by our grace
2058 * period. Update the counter, but only if our work is still
2059 * relevant -- which it won't be if someone who started later
2060 * than we did beat us to the punch.
2061 */
2062 do {
2063 s = atomic_read(&sync_sched_expedited_done);
2064 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
2065 smp_mb(); /* ensure test happens before caller kfree */
2066 break;
2067 }
2068 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
2069
2070 put_online_cpus();
2071}
2072EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2073
1842/* 2074/*
1843 * Check to see if there is any immediate RCU-related work to be done 2075 * Check to see if there is any immediate RCU-related work to be done
1844 * by the current CPU, for the specified type of RCU, returning 1 if so. 2076 * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1932,7 +2164,7 @@ static int rcu_cpu_has_callbacks(int cpu)
1932 /* RCU callbacks either ready or pending? */ 2164 /* RCU callbacks either ready or pending? */
1933 return per_cpu(rcu_sched_data, cpu).nxtlist || 2165 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1934 per_cpu(rcu_bh_data, cpu).nxtlist || 2166 per_cpu(rcu_bh_data, cpu).nxtlist ||
1935 rcu_preempt_needs_cpu(cpu); 2167 rcu_preempt_cpu_has_callbacks(cpu);
1936} 2168}
1937 2169
1938static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 2170static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -2027,9 +2259,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2027 rdp->nxtlist = NULL; 2259 rdp->nxtlist = NULL;
2028 for (i = 0; i < RCU_NEXT_SIZE; i++) 2260 for (i = 0; i < RCU_NEXT_SIZE; i++)
2029 rdp->nxttail[i] = &rdp->nxtlist; 2261 rdp->nxttail[i] = &rdp->nxtlist;
2262 rdp->qlen_lazy = 0;
2030 rdp->qlen = 0; 2263 rdp->qlen = 0;
2031 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2264 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2032 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); 2265 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2033 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2266 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2034 rdp->cpu = cpu; 2267 rdp->cpu = cpu;
2035 rdp->rsp = rsp; 2268 rdp->rsp = rsp;
@@ -2057,7 +2290,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2057 rdp->qlen_last_fqs_check = 0; 2290 rdp->qlen_last_fqs_check = 0;
2058 rdp->n_force_qs_snap = rsp->n_force_qs; 2291 rdp->n_force_qs_snap = rsp->n_force_qs;
2059 rdp->blimit = blimit; 2292 rdp->blimit = blimit;
2060 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; 2293 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2061 atomic_set(&rdp->dynticks->dynticks, 2294 atomic_set(&rdp->dynticks->dynticks,
2062 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2295 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2063 rcu_prepare_for_idle_init(cpu); 2296 rcu_prepare_for_idle_init(cpu);
@@ -2139,16 +2372,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2139 * touch any data without introducing corruption. We send the 2372 * touch any data without introducing corruption. We send the
2140 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2373 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2141 */ 2374 */
2142 rcu_send_cbs_to_online(&rcu_bh_state); 2375 rcu_cleanup_dying_cpu(&rcu_bh_state);
2143 rcu_send_cbs_to_online(&rcu_sched_state); 2376 rcu_cleanup_dying_cpu(&rcu_sched_state);
2144 rcu_preempt_send_cbs_to_online(); 2377 rcu_preempt_cleanup_dying_cpu();
2145 rcu_cleanup_after_idle(cpu); 2378 rcu_cleanup_after_idle(cpu);
2146 break; 2379 break;
2147 case CPU_DEAD: 2380 case CPU_DEAD:
2148 case CPU_DEAD_FROZEN: 2381 case CPU_DEAD_FROZEN:
2149 case CPU_UP_CANCELED: 2382 case CPU_UP_CANCELED:
2150 case CPU_UP_CANCELED_FROZEN: 2383 case CPU_UP_CANCELED_FROZEN:
2151 rcu_offline_cpu(cpu); 2384 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
2385 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
2386 rcu_preempt_cleanup_dead_cpu(cpu);
2152 break; 2387 break;
2153 default: 2388 default:
2154 break; 2389 break;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index fddff92d6676..cdd1be0a4072 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -239,6 +239,12 @@ struct rcu_data {
239 bool preemptible; /* Preemptible RCU? */ 239 bool preemptible; /* Preemptible RCU? */
240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 240 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 241 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
242#ifdef CONFIG_RCU_CPU_STALL_INFO
243 unsigned long ticks_this_gp; /* The number of scheduling-clock */
244 /* ticks this CPU has handled */
245 /* during and after the last grace */
246 /* period it is aware of. */
247#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
242 248
243 /* 2) batch handling */ 249 /* 2) batch handling */
244 /* 250 /*
@@ -265,7 +271,8 @@ struct rcu_data {
265 */ 271 */
266 struct rcu_head *nxtlist; 272 struct rcu_head *nxtlist;
267 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 273 struct rcu_head **nxttail[RCU_NEXT_SIZE];
268 long qlen; /* # of queued callbacks */ 274 long qlen_lazy; /* # of lazy queued callbacks */
275 long qlen; /* # of queued callbacks, incl lazy */
269 long qlen_last_fqs_check; 276 long qlen_last_fqs_check;
270 /* qlen at last check for QS forcing */ 277 /* qlen at last check for QS forcing */
271 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ 278 unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */
@@ -282,7 +289,6 @@ struct rcu_data {
282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 289 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 290 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
284 unsigned long offline_fqs; /* Kicked due to being offline. */ 291 unsigned long offline_fqs; /* Kicked due to being offline. */
285 unsigned long resched_ipi; /* Sent a resched IPI. */
286 292
287 /* 5) __rcu_pending() statistics. */ 293 /* 5) __rcu_pending() statistics. */
288 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ 294 unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
@@ -313,12 +319,6 @@ struct rcu_data {
313#else 319#else
314#define RCU_STALL_DELAY_DELTA 0 320#define RCU_STALL_DELAY_DELTA 0
315#endif 321#endif
316
317#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
318 RCU_STALL_DELAY_DELTA)
319 /* for rsp->jiffies_stall */
320#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
321 /* for rsp->jiffies_stall */
322#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 322#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
323 /* to take at least one */ 323 /* to take at least one */
324 /* scheduling clock irq */ 324 /* scheduling clock irq */
@@ -438,8 +438,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
438static int rcu_preempt_offline_tasks(struct rcu_state *rsp, 438static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
439 struct rcu_node *rnp, 439 struct rcu_node *rnp,
440 struct rcu_data *rdp); 440 struct rcu_data *rdp);
441static void rcu_preempt_offline_cpu(int cpu);
442#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 441#endif /* #ifdef CONFIG_HOTPLUG_CPU */
442static void rcu_preempt_cleanup_dead_cpu(int cpu);
443static void rcu_preempt_check_callbacks(int cpu); 443static void rcu_preempt_check_callbacks(int cpu);
444static void rcu_preempt_process_callbacks(void); 444static void rcu_preempt_process_callbacks(void);
445void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 445void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
@@ -448,9 +448,9 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
448 bool wake); 448 bool wake);
449#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ 449#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
450static int rcu_preempt_pending(int cpu); 450static int rcu_preempt_pending(int cpu);
451static int rcu_preempt_needs_cpu(int cpu); 451static int rcu_preempt_cpu_has_callbacks(int cpu);
452static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 452static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
453static void rcu_preempt_send_cbs_to_online(void); 453static void rcu_preempt_cleanup_dying_cpu(void);
454static void __init __rcu_init_preempt(void); 454static void __init __rcu_init_preempt(void);
455static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 455static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
456static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 456static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
@@ -471,5 +471,10 @@ static void __cpuinit rcu_prepare_kthreads(int cpu);
471static void rcu_prepare_for_idle_init(int cpu); 471static void rcu_prepare_for_idle_init(int cpu);
472static void rcu_cleanup_after_idle(int cpu); 472static void rcu_cleanup_after_idle(int cpu);
473static void rcu_prepare_for_idle(int cpu); 473static void rcu_prepare_for_idle(int cpu);
474static void print_cpu_stall_info_begin(void);
475static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
476static void print_cpu_stall_info_end(void);
477static void zero_cpu_stall_ticks(struct rcu_data *rdp);
478static void increment_cpu_stall_ticks(void);
474 479
475#endif /* #ifndef RCU_TREE_NONCORE */ 480#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 8bb35d73e1f9..c023464816be 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -25,7 +25,6 @@
25 */ 25 */
26 26
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/stop_machine.h>
29 28
30#define RCU_KTHREAD_PRIO 1 29#define RCU_KTHREAD_PRIO 1
31 30
@@ -63,7 +62,10 @@ static void __init rcu_bootup_announce_oddness(void)
63 printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); 62 printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
64#endif 63#endif
65#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) 64#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
66 printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); 65 printk(KERN_INFO "\tDump stacks of tasks blocking RCU-preempt GP.\n");
66#endif
67#if defined(CONFIG_RCU_CPU_STALL_INFO)
68 printk(KERN_INFO "\tAdditional per-CPU info printed with stalls.\n");
67#endif 69#endif
68#if NUM_RCU_LVL_4 != 0 70#if NUM_RCU_LVL_4 != 0
69 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); 71 printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
@@ -490,6 +492,31 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
490 492
491#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ 493#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
492 494
495#ifdef CONFIG_RCU_CPU_STALL_INFO
496
497static void rcu_print_task_stall_begin(struct rcu_node *rnp)
498{
499 printk(KERN_ERR "\tTasks blocked on level-%d rcu_node (CPUs %d-%d):",
500 rnp->level, rnp->grplo, rnp->grphi);
501}
502
503static void rcu_print_task_stall_end(void)
504{
505 printk(KERN_CONT "\n");
506}
507
508#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
509
510static void rcu_print_task_stall_begin(struct rcu_node *rnp)
511{
512}
513
514static void rcu_print_task_stall_end(void)
515{
516}
517
518#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
519
493/* 520/*
494 * Scan the current list of tasks blocked within RCU read-side critical 521 * Scan the current list of tasks blocked within RCU read-side critical
495 * sections, printing out the tid of each. 522 * sections, printing out the tid of each.
@@ -501,12 +528,14 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
501 528
502 if (!rcu_preempt_blocked_readers_cgp(rnp)) 529 if (!rcu_preempt_blocked_readers_cgp(rnp))
503 return 0; 530 return 0;
531 rcu_print_task_stall_begin(rnp);
504 t = list_entry(rnp->gp_tasks, 532 t = list_entry(rnp->gp_tasks,
505 struct task_struct, rcu_node_entry); 533 struct task_struct, rcu_node_entry);
506 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { 534 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
507 printk(" P%d", t->pid); 535 printk(KERN_CONT " P%d", t->pid);
508 ndetected++; 536 ndetected++;
509 } 537 }
538 rcu_print_task_stall_end();
510 return ndetected; 539 return ndetected;
511} 540}
512 541
@@ -581,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
581 * absolutely necessary, but this is a good performance/complexity 610 * absolutely necessary, but this is a good performance/complexity
582 * tradeoff. 611 * tradeoff.
583 */ 612 */
584 if (rcu_preempt_blocked_readers_cgp(rnp)) 613 if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
585 retval |= RCU_OFL_TASKS_NORM_GP; 614 retval |= RCU_OFL_TASKS_NORM_GP;
586 if (rcu_preempted_readers_exp(rnp)) 615 if (rcu_preempted_readers_exp(rnp))
587 retval |= RCU_OFL_TASKS_EXP_GP; 616 retval |= RCU_OFL_TASKS_EXP_GP;
@@ -618,16 +647,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
618 return retval; 647 return retval;
619} 648}
620 649
650#endif /* #ifdef CONFIG_HOTPLUG_CPU */
651
621/* 652/*
622 * Do CPU-offline processing for preemptible RCU. 653 * Do CPU-offline processing for preemptible RCU.
623 */ 654 */
624static void rcu_preempt_offline_cpu(int cpu) 655static void rcu_preempt_cleanup_dead_cpu(int cpu)
625{ 656{
626 __rcu_offline_cpu(cpu, &rcu_preempt_state); 657 rcu_cleanup_dead_cpu(cpu, &rcu_preempt_state);
627} 658}
628 659
629#endif /* #ifdef CONFIG_HOTPLUG_CPU */
630
631/* 660/*
632 * Check for a quiescent state from the current CPU. When a task blocks, 661 * Check for a quiescent state from the current CPU. When a task blocks,
633 * the task is recorded in the corresponding CPU's rcu_node structure, 662 * the task is recorded in the corresponding CPU's rcu_node structure,
@@ -671,10 +700,24 @@ static void rcu_preempt_do_callbacks(void)
671 */ 700 */
672void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 701void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
673{ 702{
674 __call_rcu(head, func, &rcu_preempt_state); 703 __call_rcu(head, func, &rcu_preempt_state, 0);
675} 704}
676EXPORT_SYMBOL_GPL(call_rcu); 705EXPORT_SYMBOL_GPL(call_rcu);
677 706
707/*
708 * Queue an RCU callback for lazy invocation after a grace period.
709 * This will likely be later named something like "call_rcu_lazy()",
710 * but this change will require some way of tagging the lazy RCU
711 * callbacks in the list of pending callbacks. Until then, this
712 * function may only be called from __kfree_rcu().
713 */
714void kfree_call_rcu(struct rcu_head *head,
715 void (*func)(struct rcu_head *rcu))
716{
717 __call_rcu(head, func, &rcu_preempt_state, 1);
718}
719EXPORT_SYMBOL_GPL(kfree_call_rcu);
720
678/** 721/**
679 * synchronize_rcu - wait until a grace period has elapsed. 722 * synchronize_rcu - wait until a grace period has elapsed.
680 * 723 *
@@ -688,6 +731,10 @@ EXPORT_SYMBOL_GPL(call_rcu);
688 */ 731 */
689void synchronize_rcu(void) 732void synchronize_rcu(void)
690{ 733{
734 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
735 !lock_is_held(&rcu_lock_map) &&
736 !lock_is_held(&rcu_sched_lock_map),
737 "Illegal synchronize_rcu() in RCU read-side critical section");
691 if (!rcu_scheduler_active) 738 if (!rcu_scheduler_active)
692 return; 739 return;
693 wait_rcu_gp(call_rcu); 740 wait_rcu_gp(call_rcu);
@@ -788,10 +835,22 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
788 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */ 835 rcu_report_exp_rnp(rsp, rnp, false); /* Don't wake self. */
789} 836}
790 837
791/* 838/**
792 * Wait for an rcu-preempt grace period, but expedite it. The basic idea 839 * synchronize_rcu_expedited - Brute-force RCU grace period
793 * is to invoke synchronize_sched_expedited() to push all the tasks to 840 *
794 * the ->blkd_tasks lists and wait for this list to drain. 841 * Wait for an RCU-preempt grace period, but expedite it. The basic
842 * idea is to invoke synchronize_sched_expedited() to push all the tasks to
843 * the ->blkd_tasks lists and wait for this list to drain. This consumes
844 * significant time on all CPUs and is unfriendly to real-time workloads,
845 * so is thus not recommended for any sort of common-case code.
846 * In fact, if you are using synchronize_rcu_expedited() in a loop,
847 * please restructure your code to batch your updates, and then Use a
848 * single synchronize_rcu() instead.
849 *
850 * Note that it is illegal to call this function while holding any lock
851 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
852 * to call this function from a CPU-hotplug notifier. Failing to observe
853 * these restriction will result in deadlock.
795 */ 854 */
796void synchronize_rcu_expedited(void) 855void synchronize_rcu_expedited(void)
797{ 856{
@@ -869,9 +928,9 @@ static int rcu_preempt_pending(int cpu)
869} 928}
870 929
871/* 930/*
872 * Does preemptible RCU need the CPU to stay out of dynticks mode? 931 * Does preemptible RCU have callbacks on this CPU?
873 */ 932 */
874static int rcu_preempt_needs_cpu(int cpu) 933static int rcu_preempt_cpu_has_callbacks(int cpu)
875{ 934{
876 return !!per_cpu(rcu_preempt_data, cpu).nxtlist; 935 return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
877} 936}
@@ -894,11 +953,12 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
894} 953}
895 954
896/* 955/*
897 * Move preemptible RCU's callbacks from dying CPU to other online CPU. 956 * Move preemptible RCU's callbacks from dying CPU to other online CPU
957 * and record a quiescent state.
898 */ 958 */
899static void rcu_preempt_send_cbs_to_online(void) 959static void rcu_preempt_cleanup_dying_cpu(void)
900{ 960{
901 rcu_send_cbs_to_online(&rcu_preempt_state); 961 rcu_cleanup_dying_cpu(&rcu_preempt_state);
902} 962}
903 963
904/* 964/*
@@ -1034,16 +1094,16 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
1034 return 0; 1094 return 0;
1035} 1095}
1036 1096
1097#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1098
1037/* 1099/*
1038 * Because preemptible RCU does not exist, it never needs CPU-offline 1100 * Because preemptible RCU does not exist, it never needs CPU-offline
1039 * processing. 1101 * processing.
1040 */ 1102 */
1041static void rcu_preempt_offline_cpu(int cpu) 1103static void rcu_preempt_cleanup_dead_cpu(int cpu)
1042{ 1104{
1043} 1105}
1044 1106
1045#endif /* #ifdef CONFIG_HOTPLUG_CPU */
1046
1047/* 1107/*
1048 * Because preemptible RCU does not exist, it never has any callbacks 1108 * Because preemptible RCU does not exist, it never has any callbacks
1049 * to check. 1109 * to check.
@@ -1061,6 +1121,22 @@ static void rcu_preempt_process_callbacks(void)
1061} 1121}
1062 1122
1063/* 1123/*
1124 * Queue an RCU callback for lazy invocation after a grace period.
1125 * This will likely be later named something like "call_rcu_lazy()",
1126 * but this change will require some way of tagging the lazy RCU
1127 * callbacks in the list of pending callbacks. Until then, this
1128 * function may only be called from __kfree_rcu().
1129 *
1130 * Because there is no preemptible RCU, we use RCU-sched instead.
1131 */
1132void kfree_call_rcu(struct rcu_head *head,
1133 void (*func)(struct rcu_head *rcu))
1134{
1135 __call_rcu(head, func, &rcu_sched_state, 1);
1136}
1137EXPORT_SYMBOL_GPL(kfree_call_rcu);
1138
1139/*
1064 * Wait for an rcu-preempt grace period, but make it happen quickly. 1140 * Wait for an rcu-preempt grace period, but make it happen quickly.
1065 * But because preemptible RCU does not exist, map to rcu-sched. 1141 * But because preemptible RCU does not exist, map to rcu-sched.
1066 */ 1142 */
@@ -1093,9 +1169,9 @@ static int rcu_preempt_pending(int cpu)
1093} 1169}
1094 1170
1095/* 1171/*
1096 * Because preemptible RCU does not exist, it never needs any CPU. 1172 * Because preemptible RCU does not exist, it never has callbacks
1097 */ 1173 */
1098static int rcu_preempt_needs_cpu(int cpu) 1174static int rcu_preempt_cpu_has_callbacks(int cpu)
1099{ 1175{
1100 return 0; 1176 return 0;
1101} 1177}
@@ -1119,9 +1195,9 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1119} 1195}
1120 1196
1121/* 1197/*
1122 * Because there is no preemptible RCU, there are no callbacks to move. 1198 * Because there is no preemptible RCU, there is no cleanup to do.
1123 */ 1199 */
1124static void rcu_preempt_send_cbs_to_online(void) 1200static void rcu_preempt_cleanup_dying_cpu(void)
1125{ 1201{
1126} 1202}
1127 1203
@@ -1823,132 +1899,6 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1823 1899
1824#endif /* #else #ifdef CONFIG_RCU_BOOST */ 1900#endif /* #else #ifdef CONFIG_RCU_BOOST */
1825 1901
1826#ifndef CONFIG_SMP
1827
1828void synchronize_sched_expedited(void)
1829{
1830 cond_resched();
1831}
1832EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1833
1834#else /* #ifndef CONFIG_SMP */
1835
1836static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1837static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1838
1839static int synchronize_sched_expedited_cpu_stop(void *data)
1840{
1841 /*
1842 * There must be a full memory barrier on each affected CPU
1843 * between the time that try_stop_cpus() is called and the
1844 * time that it returns.
1845 *
1846 * In the current initial implementation of cpu_stop, the
1847 * above condition is already met when the control reaches
1848 * this point and the following smp_mb() is not strictly
1849 * necessary. Do smp_mb() anyway for documentation and
1850 * robustness against future implementation changes.
1851 */
1852 smp_mb(); /* See above comment block. */
1853 return 0;
1854}
1855
1856/*
1857 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
1858 * approach to force grace period to end quickly. This consumes
1859 * significant time on all CPUs, and is thus not recommended for
1860 * any sort of common-case code.
1861 *
1862 * Note that it is illegal to call this function while holding any
1863 * lock that is acquired by a CPU-hotplug notifier. Failing to
1864 * observe this restriction will result in deadlock.
1865 *
1866 * This implementation can be thought of as an application of ticket
1867 * locking to RCU, with sync_sched_expedited_started and
1868 * sync_sched_expedited_done taking on the roles of the halves
1869 * of the ticket-lock word. Each task atomically increments
1870 * sync_sched_expedited_started upon entry, snapshotting the old value,
1871 * then attempts to stop all the CPUs. If this succeeds, then each
1872 * CPU will have executed a context switch, resulting in an RCU-sched
1873 * grace period. We are then done, so we use atomic_cmpxchg() to
1874 * update sync_sched_expedited_done to match our snapshot -- but
1875 * only if someone else has not already advanced past our snapshot.
1876 *
1877 * On the other hand, if try_stop_cpus() fails, we check the value
1878 * of sync_sched_expedited_done. If it has advanced past our
1879 * initial snapshot, then someone else must have forced a grace period
1880 * some time after we took our snapshot. In this case, our work is
1881 * done for us, and we can simply return. Otherwise, we try again,
1882 * but keep our initial snapshot for purposes of checking for someone
1883 * doing our work for us.
1884 *
1885 * If we fail too many times in a row, we fall back to synchronize_sched().
1886 */
1887void synchronize_sched_expedited(void)
1888{
1889 int firstsnap, s, snap, trycount = 0;
1890
1891 /* Note that atomic_inc_return() implies full memory barrier. */
1892 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
1893 get_online_cpus();
1894
1895 /*
1896 * Each pass through the following loop attempts to force a
1897 * context switch on each CPU.
1898 */
1899 while (try_stop_cpus(cpu_online_mask,
1900 synchronize_sched_expedited_cpu_stop,
1901 NULL) == -EAGAIN) {
1902 put_online_cpus();
1903
1904 /* No joy, try again later. Or just synchronize_sched(). */
1905 if (trycount++ < 10)
1906 udelay(trycount * num_online_cpus());
1907 else {
1908 synchronize_sched();
1909 return;
1910 }
1911
1912 /* Check to see if someone else did our work for us. */
1913 s = atomic_read(&sync_sched_expedited_done);
1914 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
1915 smp_mb(); /* ensure test happens before caller kfree */
1916 return;
1917 }
1918
1919 /*
1920 * Refetching sync_sched_expedited_started allows later
1921 * callers to piggyback on our grace period. We subtract
1922 * 1 to get the same token that the last incrementer got.
1923 * We retry after they started, so our grace period works
1924 * for them, and they started after our first try, so their
1925 * grace period works for us.
1926 */
1927 get_online_cpus();
1928 snap = atomic_read(&sync_sched_expedited_started);
1929 smp_mb(); /* ensure read is before try_stop_cpus(). */
1930 }
1931
1932 /*
1933 * Everyone up to our most recent fetch is covered by our grace
1934 * period. Update the counter, but only if our work is still
1935 * relevant -- which it won't be if someone who started later
1936 * than we did beat us to the punch.
1937 */
1938 do {
1939 s = atomic_read(&sync_sched_expedited_done);
1940 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
1941 smp_mb(); /* ensure test happens before caller kfree */
1942 break;
1943 }
1944 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
1945
1946 put_online_cpus();
1947}
1948EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
1949
1950#endif /* #else #ifndef CONFIG_SMP */
1951
1952#if !defined(CONFIG_RCU_FAST_NO_HZ) 1902#if !defined(CONFIG_RCU_FAST_NO_HZ)
1953 1903
1954/* 1904/*
@@ -1981,7 +1931,7 @@ static void rcu_cleanup_after_idle(int cpu)
1981} 1931}
1982 1932
1983/* 1933/*
1984 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=y, 1934 * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n,
1985 * is nothing. 1935 * is nothing.
1986 */ 1936 */
1987static void rcu_prepare_for_idle(int cpu) 1937static void rcu_prepare_for_idle(int cpu)
@@ -2015,6 +1965,9 @@ static void rcu_prepare_for_idle(int cpu)
2015 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your 1965 * number, be warned: Setting RCU_IDLE_GP_DELAY too high can hang your
2016 * system. And if you are -that- concerned about energy efficiency, 1966 * system. And if you are -that- concerned about energy efficiency,
2017 * just power the system down and be done with it! 1967 * just power the system down and be done with it!
1968 * RCU_IDLE_LAZY_GP_DELAY gives the number of jiffies that a CPU is
1969 * permitted to sleep in dyntick-idle mode with only lazy RCU
1970 * callbacks pending. Setting this too high can OOM your system.
2018 * 1971 *
2019 * The values below work well in practice. If future workloads require 1972 * The values below work well in practice. If future workloads require
2020 * adjustment, they can be converted into kernel config parameters, though 1973 * adjustment, they can be converted into kernel config parameters, though
@@ -2023,11 +1976,13 @@ static void rcu_prepare_for_idle(int cpu)
2023#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ 1976#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
2024#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ 1977#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
2025#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ 1978#define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */
1979#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
2026 1980
2027static DEFINE_PER_CPU(int, rcu_dyntick_drain); 1981static DEFINE_PER_CPU(int, rcu_dyntick_drain);
2028static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); 1982static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
2029static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer); 1983static DEFINE_PER_CPU(struct hrtimer, rcu_idle_gp_timer);
2030static ktime_t rcu_idle_gp_wait; 1984static ktime_t rcu_idle_gp_wait; /* If some non-lazy callbacks. */
1985static ktime_t rcu_idle_lazy_gp_wait; /* If only lazy callbacks. */
2031 1986
2032/* 1987/*
2033 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1988 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
@@ -2048,6 +2003,48 @@ int rcu_needs_cpu(int cpu)
2048} 2003}
2049 2004
2050/* 2005/*
2006 * Does the specified flavor of RCU have non-lazy callbacks pending on
2007 * the specified CPU? Both RCU flavor and CPU are specified by the
2008 * rcu_data structure.
2009 */
2010static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
2011{
2012 return rdp->qlen != rdp->qlen_lazy;
2013}
2014
2015#ifdef CONFIG_TREE_PREEMPT_RCU
2016
2017/*
2018 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there
2019 * is no RCU-preempt in the kernel.)
2020 */
2021static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
2022{
2023 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
2024
2025 return __rcu_cpu_has_nonlazy_callbacks(rdp);
2026}
2027
2028#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2029
2030static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
2031{
2032 return 0;
2033}
2034
2035#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
2036
2037/*
2038 * Does any flavor of RCU have non-lazy callbacks on the specified CPU?
2039 */
2040static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
2041{
2042 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
2043 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
2044 rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
2045}
2046
2047/*
2051 * Timer handler used to force CPU to start pushing its remaining RCU 2048 * Timer handler used to force CPU to start pushing its remaining RCU
2052 * callbacks in the case where it entered dyntick-idle mode with callbacks 2049 * callbacks in the case where it entered dyntick-idle mode with callbacks
2053 * pending. The hander doesn't really need to do anything because the 2050 * pending. The hander doesn't really need to do anything because the
@@ -2074,6 +2071,8 @@ static void rcu_prepare_for_idle_init(int cpu)
2074 unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY); 2071 unsigned int upj = jiffies_to_usecs(RCU_IDLE_GP_DELAY);
2075 2072
2076 rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000); 2073 rcu_idle_gp_wait = ns_to_ktime(upj * (u64)1000);
2074 upj = jiffies_to_usecs(RCU_IDLE_LAZY_GP_DELAY);
2075 rcu_idle_lazy_gp_wait = ns_to_ktime(upj * (u64)1000);
2077 firsttime = 0; 2076 firsttime = 0;
2078 } 2077 }
2079} 2078}
@@ -2109,10 +2108,6 @@ static void rcu_cleanup_after_idle(int cpu)
2109 */ 2108 */
2110static void rcu_prepare_for_idle(int cpu) 2109static void rcu_prepare_for_idle(int cpu)
2111{ 2110{
2112 unsigned long flags;
2113
2114 local_irq_save(flags);
2115
2116 /* 2111 /*
2117 * If there are no callbacks on this CPU, enter dyntick-idle mode. 2112 * If there are no callbacks on this CPU, enter dyntick-idle mode.
2118 * Also reset state to avoid prejudicing later attempts. 2113 * Also reset state to avoid prejudicing later attempts.
@@ -2120,7 +2115,6 @@ static void rcu_prepare_for_idle(int cpu)
2120 if (!rcu_cpu_has_callbacks(cpu)) { 2115 if (!rcu_cpu_has_callbacks(cpu)) {
2121 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2116 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
2122 per_cpu(rcu_dyntick_drain, cpu) = 0; 2117 per_cpu(rcu_dyntick_drain, cpu) = 0;
2123 local_irq_restore(flags);
2124 trace_rcu_prep_idle("No callbacks"); 2118 trace_rcu_prep_idle("No callbacks");
2125 return; 2119 return;
2126 } 2120 }
@@ -2130,7 +2124,6 @@ static void rcu_prepare_for_idle(int cpu)
2130 * refrained from disabling the scheduling-clock tick. 2124 * refrained from disabling the scheduling-clock tick.
2131 */ 2125 */
2132 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) { 2126 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) {
2133 local_irq_restore(flags);
2134 trace_rcu_prep_idle("In holdoff"); 2127 trace_rcu_prep_idle("In holdoff");
2135 return; 2128 return;
2136 } 2129 }
@@ -2140,18 +2133,22 @@ static void rcu_prepare_for_idle(int cpu)
2140 /* First time through, initialize the counter. */ 2133 /* First time through, initialize the counter. */
2141 per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES; 2134 per_cpu(rcu_dyntick_drain, cpu) = RCU_IDLE_FLUSHES;
2142 } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES && 2135 } else if (per_cpu(rcu_dyntick_drain, cpu) <= RCU_IDLE_OPT_FLUSHES &&
2143 !rcu_pending(cpu)) { 2136 !rcu_pending(cpu) &&
2137 !local_softirq_pending()) {
2144 /* Can we go dyntick-idle despite still having callbacks? */ 2138 /* Can we go dyntick-idle despite still having callbacks? */
2145 trace_rcu_prep_idle("Dyntick with callbacks"); 2139 trace_rcu_prep_idle("Dyntick with callbacks");
2146 per_cpu(rcu_dyntick_drain, cpu) = 0; 2140 per_cpu(rcu_dyntick_drain, cpu) = 0;
2147 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; 2141 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2148 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), 2142 if (rcu_cpu_has_nonlazy_callbacks(cpu))
2149 rcu_idle_gp_wait, HRTIMER_MODE_REL); 2143 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
2144 rcu_idle_gp_wait, HRTIMER_MODE_REL);
2145 else
2146 hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu),
2147 rcu_idle_lazy_gp_wait, HRTIMER_MODE_REL);
2150 return; /* Nothing more to do immediately. */ 2148 return; /* Nothing more to do immediately. */
2151 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { 2149 } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
2152 /* We have hit the limit, so time to give up. */ 2150 /* We have hit the limit, so time to give up. */
2153 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; 2151 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
2154 local_irq_restore(flags);
2155 trace_rcu_prep_idle("Begin holdoff"); 2152 trace_rcu_prep_idle("Begin holdoff");
2156 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ 2153 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
2157 return; 2154 return;
@@ -2163,23 +2160,17 @@ static void rcu_prepare_for_idle(int cpu)
2163 */ 2160 */
2164#ifdef CONFIG_TREE_PREEMPT_RCU 2161#ifdef CONFIG_TREE_PREEMPT_RCU
2165 if (per_cpu(rcu_preempt_data, cpu).nxtlist) { 2162 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
2166 local_irq_restore(flags);
2167 rcu_preempt_qs(cpu); 2163 rcu_preempt_qs(cpu);
2168 force_quiescent_state(&rcu_preempt_state, 0); 2164 force_quiescent_state(&rcu_preempt_state, 0);
2169 local_irq_save(flags);
2170 } 2165 }
2171#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 2166#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2172 if (per_cpu(rcu_sched_data, cpu).nxtlist) { 2167 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
2173 local_irq_restore(flags);
2174 rcu_sched_qs(cpu); 2168 rcu_sched_qs(cpu);
2175 force_quiescent_state(&rcu_sched_state, 0); 2169 force_quiescent_state(&rcu_sched_state, 0);
2176 local_irq_save(flags);
2177 } 2170 }
2178 if (per_cpu(rcu_bh_data, cpu).nxtlist) { 2171 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
2179 local_irq_restore(flags);
2180 rcu_bh_qs(cpu); 2172 rcu_bh_qs(cpu);
2181 force_quiescent_state(&rcu_bh_state, 0); 2173 force_quiescent_state(&rcu_bh_state, 0);
2182 local_irq_save(flags);
2183 } 2174 }
2184 2175
2185 /* 2176 /*
@@ -2187,13 +2178,124 @@ static void rcu_prepare_for_idle(int cpu)
2187 * So try forcing the callbacks through the grace period. 2178 * So try forcing the callbacks through the grace period.
2188 */ 2179 */
2189 if (rcu_cpu_has_callbacks(cpu)) { 2180 if (rcu_cpu_has_callbacks(cpu)) {
2190 local_irq_restore(flags);
2191 trace_rcu_prep_idle("More callbacks"); 2181 trace_rcu_prep_idle("More callbacks");
2192 invoke_rcu_core(); 2182 invoke_rcu_core();
2193 } else { 2183 } else
2194 local_irq_restore(flags);
2195 trace_rcu_prep_idle("Callbacks drained"); 2184 trace_rcu_prep_idle("Callbacks drained");
2196 }
2197} 2185}
2198 2186
2199#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2187#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
2188
2189#ifdef CONFIG_RCU_CPU_STALL_INFO
2190
2191#ifdef CONFIG_RCU_FAST_NO_HZ
2192
2193static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2194{
2195 struct hrtimer *hrtp = &per_cpu(rcu_idle_gp_timer, cpu);
2196
2197 sprintf(cp, "drain=%d %c timer=%lld",
2198 per_cpu(rcu_dyntick_drain, cpu),
2199 per_cpu(rcu_dyntick_holdoff, cpu) == jiffies ? 'H' : '.',
2200 hrtimer_active(hrtp)
2201 ? ktime_to_us(hrtimer_get_remaining(hrtp))
2202 : -1);
2203}
2204
2205#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
2206
2207static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2208{
2209}
2210
2211#endif /* #else #ifdef CONFIG_RCU_FAST_NO_HZ */
2212
2213/* Initiate the stall-info list. */
2214static void print_cpu_stall_info_begin(void)
2215{
2216 printk(KERN_CONT "\n");
2217}
2218
2219/*
2220 * Print out diagnostic information for the specified stalled CPU.
2221 *
2222 * If the specified CPU is aware of the current RCU grace period
2223 * (flavor specified by rsp), then print the number of scheduling
2224 * clock interrupts the CPU has taken during the time that it has
2225 * been aware. Otherwise, print the number of RCU grace periods
2226 * that this CPU is ignorant of, for example, "1" if the CPU was
2227 * aware of the previous grace period.
2228 *
2229 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
2230 */
2231static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2232{
2233 char fast_no_hz[72];
2234 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2235 struct rcu_dynticks *rdtp = rdp->dynticks;
2236 char *ticks_title;
2237 unsigned long ticks_value;
2238
2239 if (rsp->gpnum == rdp->gpnum) {
2240 ticks_title = "ticks this GP";
2241 ticks_value = rdp->ticks_this_gp;
2242 } else {
2243 ticks_title = "GPs behind";
2244 ticks_value = rsp->gpnum - rdp->gpnum;
2245 }
2246 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
2247 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
2248 cpu, ticks_value, ticks_title,
2249 atomic_read(&rdtp->dynticks) & 0xfff,
2250 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
2251 fast_no_hz);
2252}
2253
2254/* Terminate the stall-info list. */
2255static void print_cpu_stall_info_end(void)
2256{
2257 printk(KERN_ERR "\t");
2258}
2259
2260/* Zero ->ticks_this_gp for all flavors of RCU. */
2261static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2262{
2263 rdp->ticks_this_gp = 0;
2264}
2265
2266/* Increment ->ticks_this_gp for all flavors of RCU. */
2267static void increment_cpu_stall_ticks(void)
2268{
2269 __get_cpu_var(rcu_sched_data).ticks_this_gp++;
2270 __get_cpu_var(rcu_bh_data).ticks_this_gp++;
2271#ifdef CONFIG_TREE_PREEMPT_RCU
2272 __get_cpu_var(rcu_preempt_data).ticks_this_gp++;
2273#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2274}
2275
2276#else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
2277
2278static void print_cpu_stall_info_begin(void)
2279{
2280 printk(KERN_CONT " {");
2281}
2282
2283static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2284{
2285 printk(KERN_CONT " %d", cpu);
2286}
2287
2288static void print_cpu_stall_info_end(void)
2289{
2290 printk(KERN_CONT "} ");
2291}
2292
2293static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2294{
2295}
2296
2297static void increment_cpu_stall_ticks(void)
2298{
2299}
2300
2301#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 654cfe67f0d1..ed459edeff43 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -72,9 +72,9 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
72 rdp->dynticks->dynticks_nesting, 72 rdp->dynticks->dynticks_nesting,
73 rdp->dynticks->dynticks_nmi_nesting, 73 rdp->dynticks->dynticks_nmi_nesting,
74 rdp->dynticks_fqs); 74 rdp->dynticks_fqs);
75 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 75 seq_printf(m, " of=%lu", rdp->offline_fqs);
76 seq_printf(m, " ql=%ld qs=%c%c%c%c", 76 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
77 rdp->qlen, 77 rdp->qlen_lazy, rdp->qlen,
78 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 78 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
79 rdp->nxttail[RCU_NEXT_TAIL]], 79 rdp->nxttail[RCU_NEXT_TAIL]],
80 ".R"[rdp->nxttail[RCU_WAIT_TAIL] != 80 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
@@ -144,8 +144,8 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
144 rdp->dynticks->dynticks_nesting, 144 rdp->dynticks->dynticks_nesting,
145 rdp->dynticks->dynticks_nmi_nesting, 145 rdp->dynticks->dynticks_nmi_nesting,
146 rdp->dynticks_fqs); 146 rdp->dynticks_fqs);
147 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 147 seq_printf(m, ",%lu", rdp->offline_fqs);
148 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, 148 seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen,
149 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 149 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
150 rdp->nxttail[RCU_NEXT_TAIL]], 150 rdp->nxttail[RCU_NEXT_TAIL]],
151 ".R"[rdp->nxttail[RCU_WAIT_TAIL] != 151 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
@@ -168,7 +168,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
168{ 168{
169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
171 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); 171 seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
172#ifdef CONFIG_RCU_BOOST 172#ifdef CONFIG_RCU_BOOST
173 seq_puts(m, "\"kt\",\"ktl\""); 173 seq_puts(m, "\"kt\",\"ktl\"");
174#endif /* #ifdef CONFIG_RCU_BOOST */ 174#endif /* #ifdef CONFIG_RCU_BOOST */
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 0febf61e1aa3..ba35f3a4a1f4 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -172,6 +172,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
172{ 172{
173 int idx; 173 int idx;
174 174
175 rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
176 !lock_is_held(&rcu_bh_lock_map) &&
177 !lock_is_held(&rcu_lock_map) &&
178 !lock_is_held(&rcu_sched_lock_map),
179 "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
180
175 idx = sp->completed; 181 idx = sp->completed;
176 mutex_lock(&sp->mutex); 182 mutex_lock(&sp->mutex);
177 183
@@ -280,19 +286,26 @@ void synchronize_srcu(struct srcu_struct *sp)
280EXPORT_SYMBOL_GPL(synchronize_srcu); 286EXPORT_SYMBOL_GPL(synchronize_srcu);
281 287
282/** 288/**
283 * synchronize_srcu_expedited - like synchronize_srcu, but less patient 289 * synchronize_srcu_expedited - Brute-force SRCU grace period
284 * @sp: srcu_struct with which to synchronize. 290 * @sp: srcu_struct with which to synchronize.
285 * 291 *
286 * Flip the completed counter, and wait for the old count to drain to zero. 292 * Wait for an SRCU grace period to elapse, but use a "big hammer"
287 * As with classic RCU, the updater must use some separate means of 293 * approach to force the grace period to end quickly. This consumes
288 * synchronizing concurrent updates. Can block; must be called from 294 * significant time on all CPUs and is unfriendly to real-time workloads,
289 * process context. 295 * so is thus not recommended for any sort of common-case code. In fact,
296 * if you are using synchronize_srcu_expedited() in a loop, please
297 * restructure your code to batch your updates, and then use a single
298 * synchronize_srcu() instead.
290 * 299 *
291 * Note that it is illegal to call synchronize_srcu_expedited() 300 * Note that it is illegal to call this function while holding any lock
292 * from the corresponding SRCU read-side critical section; doing so 301 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
293 * will result in deadlock. However, it is perfectly legal to call 302 * to call this function from a CPU-hotplug notifier. Failing to observe
294 * synchronize_srcu_expedited() on one srcu_struct from some other 303 * these restriction will result in deadlock. It is also illegal to call
295 * srcu_struct's read-side critical section. 304 * synchronize_srcu_expedited() from the corresponding SRCU read-side
305 * critical section; doing so will result in deadlock. However, it is
306 * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct
307 * from some other srcu_struct's read-side critical section, as long as
308 * the resulting graph of srcu_structs is acyclic.
296 */ 309 */
297void synchronize_srcu_expedited(struct srcu_struct *sp) 310void synchronize_srcu_expedited(struct srcu_struct *sp)
298{ 311{