aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/trace.txt4
-rw-r--r--include/linux/hardirq.h21
-rw-r--r--include/linux/rcupdate.h21
-rw-r--r--include/linux/tick.h11
-rw-r--r--include/trace/events/rcu.h10
-rw-r--r--kernel/rcutiny.c124
-rw-r--r--kernel/rcutree.c229
-rw-r--r--kernel/rcutree.h15
-rw-r--r--kernel/rcutree_trace.c10
-rw-r--r--kernel/time/tick-sched.c6
10 files changed, 297 insertions, 154 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index aaf65f6c6cd..49587abfc2f 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -105,14 +105,10 @@ o "dt" is the current value of the dyntick counter that is incremented
105 or one greater than the interrupt-nesting depth otherwise. 105 or one greater than the interrupt-nesting depth otherwise.
106 The number after the second "/" is the NMI nesting depth. 106 The number after the second "/" is the NMI nesting depth.
107 107
108 This field is displayed only for CONFIG_NO_HZ kernels.
109
110o "df" is the number of times that some other CPU has forced a 108o "df" is the number of times that some other CPU has forced a
111 quiescent state on behalf of this CPU due to this CPU being in 109 quiescent state on behalf of this CPU due to this CPU being in
112 dynticks-idle state. 110 dynticks-idle state.
113 111
114 This field is displayed only for CONFIG_NO_HZ kernels.
115
116o "of" is the number of times that some other CPU has forced a 112o "of" is the number of times that some other CPU has forced a
117 quiescent state on behalf of this CPU due to this CPU being 113 quiescent state on behalf of this CPU due to this CPU being
118 offline. In a perfect world, this might never happen, but it 114 offline. In a perfect world, this might never happen, but it
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f743883f769..bb7f3097185 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,20 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
139extern void account_system_vtime(struct task_struct *tsk); 139extern void account_system_vtime(struct task_struct *tsk);
140#endif 140#endif
141 141
142#if defined(CONFIG_NO_HZ)
143#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 142#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
144extern void rcu_enter_nohz(void);
145extern void rcu_exit_nohz(void);
146
147static inline void rcu_irq_enter(void)
148{
149 rcu_exit_nohz();
150}
151
152static inline void rcu_irq_exit(void)
153{
154 rcu_enter_nohz();
155}
156 143
157static inline void rcu_nmi_enter(void) 144static inline void rcu_nmi_enter(void)
158{ 145{
@@ -163,17 +150,9 @@ static inline void rcu_nmi_exit(void)
163} 150}
164 151
165#else 152#else
166extern void rcu_irq_enter(void);
167extern void rcu_irq_exit(void);
168extern void rcu_nmi_enter(void); 153extern void rcu_nmi_enter(void);
169extern void rcu_nmi_exit(void); 154extern void rcu_nmi_exit(void);
170#endif 155#endif
171#else
172# define rcu_irq_enter() do { } while (0)
173# define rcu_irq_exit() do { } while (0)
174# define rcu_nmi_enter() do { } while (0)
175# define rcu_nmi_exit() do { } while (0)
176#endif /* #if defined(CONFIG_NO_HZ) */
177 156
178/* 157/*
179 * It is safe to do non-atomic ops on ->hardirq_context, 158 * It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 2cf4226ade7..cd1ad4b04c6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -177,23 +177,10 @@ extern void rcu_sched_qs(int cpu);
177extern void rcu_bh_qs(int cpu); 177extern void rcu_bh_qs(int cpu);
178extern void rcu_check_callbacks(int cpu, int user); 178extern void rcu_check_callbacks(int cpu, int user);
179struct notifier_block; 179struct notifier_block;
180 180extern void rcu_idle_enter(void);
181#ifdef CONFIG_NO_HZ 181extern void rcu_idle_exit(void);
182 182extern void rcu_irq_enter(void);
183extern void rcu_enter_nohz(void); 183extern void rcu_irq_exit(void);
184extern void rcu_exit_nohz(void);
185
186#else /* #ifdef CONFIG_NO_HZ */
187
188static inline void rcu_enter_nohz(void)
189{
190}
191
192static inline void rcu_exit_nohz(void)
193{
194}
195
196#endif /* #else #ifdef CONFIG_NO_HZ */
197 184
198/* 185/*
199 * Infrastructure to implement the synchronize_() primitives in 186 * Infrastructure to implement the synchronize_() primitives in
diff --git a/include/linux/tick.h b/include/linux/tick.h
index b232ccc0ee2..ca40838fdfb 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -127,8 +127,15 @@ extern ktime_t tick_nohz_get_sleep_length(void);
127extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); 127extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
128extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); 128extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
129# else 129# else
130static inline void tick_nohz_stop_sched_tick(int inidle) { } 130static inline void tick_nohz_stop_sched_tick(int inidle)
131static inline void tick_nohz_restart_sched_tick(void) { } 131{
132 if (inidle)
133 rcu_idle_enter();
134}
135static inline void tick_nohz_restart_sched_tick(void)
136{
137 rcu_idle_exit();
138}
132static inline ktime_t tick_nohz_get_sleep_length(void) 139static inline ktime_t tick_nohz_get_sleep_length(void)
133{ 140{
134 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; 141 ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 669fbd62ec2..e5771804c50 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -246,19 +246,21 @@ TRACE_EVENT(rcu_fqs,
246 */ 246 */
247TRACE_EVENT(rcu_dyntick, 247TRACE_EVENT(rcu_dyntick,
248 248
249 TP_PROTO(char *polarity), 249 TP_PROTO(char *polarity, int nesting),
250 250
251 TP_ARGS(polarity), 251 TP_ARGS(polarity, nesting),
252 252
253 TP_STRUCT__entry( 253 TP_STRUCT__entry(
254 __field(char *, polarity) 254 __field(char *, polarity)
255 __field(int, nesting)
255 ), 256 ),
256 257
257 TP_fast_assign( 258 TP_fast_assign(
258 __entry->polarity = polarity; 259 __entry->polarity = polarity;
260 __entry->nesting = nesting;
259 ), 261 ),
260 262
261 TP_printk("%s", __entry->polarity) 263 TP_printk("%s %d", __entry->polarity, __entry->nesting)
262); 264);
263 265
264/* 266/*
@@ -443,7 +445,7 @@ TRACE_EVENT(rcu_batch_end,
443#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) 445#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
444#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) 446#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
445#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) 447#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
446#define trace_rcu_dyntick(polarity) do { } while (0) 448#define trace_rcu_dyntick(polarity, nesting) do { } while (0)
447#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) 449#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
448#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) 450#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
449#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) 451#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 636af6d9c6e..3ab77bdc90c 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -53,31 +53,122 @@ static void __call_rcu(struct rcu_head *head,
53 53
54#include "rcutiny_plugin.h" 54#include "rcutiny_plugin.h"
55 55
56#ifdef CONFIG_NO_HZ 56static long long rcu_dynticks_nesting = LLONG_MAX / 2;
57 57
58static long rcu_dynticks_nesting = 1; 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(void)
60{
61 if (rcu_dynticks_nesting) {
62 RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting));
63 return;
64 }
65 RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting));
66 if (!idle_cpu(smp_processor_id())) {
67 WARN_ON_ONCE(1); /* must be idle task! */
68 RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
69 rcu_dynticks_nesting));
70 ftrace_dump(DUMP_ALL);
71 }
72 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
73}
59 74
60/* 75/*
61 * Enter dynticks-idle mode, which is an extended quiescent state 76 * Enter idle, which is an extended quiescent state if we have fully
62 * if we have fully entered that mode (i.e., if the new value of 77 * entered that mode (i.e., if the new value of dynticks_nesting is zero).
63 * dynticks_nesting is zero).
64 */ 78 */
65void rcu_enter_nohz(void) 79void rcu_idle_enter(void)
66{ 80{
67 if (--rcu_dynticks_nesting == 0) 81 unsigned long flags;
68 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 82
83 local_irq_save(flags);
84 rcu_dynticks_nesting = 0;
85 rcu_idle_enter_common();
86 local_irq_restore(flags);
69} 87}
70 88
71/* 89/*
72 * Exit dynticks-idle mode, so that we are no longer in an extended 90 * Exit an interrupt handler towards idle.
73 * quiescent state. 91 */
92void rcu_irq_exit(void)
93{
94 unsigned long flags;
95
96 local_irq_save(flags);
97 rcu_dynticks_nesting--;
98 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
99 rcu_idle_enter_common();
100 local_irq_restore(flags);
101}
102
103/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
104static void rcu_idle_exit_common(long long oldval)
105{
106 if (oldval) {
107 RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting));
108 return;
109 }
110 RCU_TRACE(trace_rcu_dyntick("End", oldval));
111 if (!idle_cpu(smp_processor_id())) {
112 WARN_ON_ONCE(1); /* must be idle task! */
113 RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
114 oldval));
115 ftrace_dump(DUMP_ALL);
116 }
117}
118
119/*
120 * Exit idle, so that we are no longer in an extended quiescent state.
74 */ 121 */
75void rcu_exit_nohz(void) 122void rcu_idle_exit(void)
76{ 123{
124 unsigned long flags;
125 long long oldval;
126
127 local_irq_save(flags);
128 oldval = rcu_dynticks_nesting;
129 WARN_ON_ONCE(oldval != 0);
130 rcu_dynticks_nesting = LLONG_MAX / 2;
131 rcu_idle_exit_common(oldval);
132 local_irq_restore(flags);
133}
134
135/*
136 * Enter an interrupt handler, moving away from idle.
137 */
138void rcu_irq_enter(void)
139{
140 unsigned long flags;
141 long long oldval;
142
143 local_irq_save(flags);
144 oldval = rcu_dynticks_nesting;
77 rcu_dynticks_nesting++; 145 rcu_dynticks_nesting++;
146 WARN_ON_ONCE(rcu_dynticks_nesting == 0);
147 rcu_idle_exit_common(oldval);
148 local_irq_restore(flags);
149}
150
151#ifdef CONFIG_PROVE_RCU
152
153/*
154 * Test whether RCU thinks that the current CPU is idle.
155 */
156int rcu_is_cpu_idle(void)
157{
158 return !rcu_dynticks_nesting;
78} 159}
79 160
80#endif /* #ifdef CONFIG_NO_HZ */ 161#endif /* #ifdef CONFIG_PROVE_RCU */
162
163/*
164 * Test whether the current CPU was interrupted from idle. Nested
165 * interrupts don't count, we must be running at the first interrupt
166 * level.
167 */
168int rcu_is_cpu_rrupt_from_idle(void)
169{
170 return rcu_dynticks_nesting <= 0;
171}
81 172
82/* 173/*
83 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 174 * Helper function for rcu_sched_qs() and rcu_bh_qs().
@@ -126,14 +217,13 @@ void rcu_bh_qs(int cpu)
126 217
127/* 218/*
128 * Check to see if the scheduling-clock interrupt came from an extended 219 * Check to see if the scheduling-clock interrupt came from an extended
129 * quiescent state, and, if so, tell RCU about it. 220 * quiescent state, and, if so, tell RCU about it. This function must
221 * be called from hardirq context. It is normally called from the
222 * scheduling-clock interrupt.
130 */ 223 */
131void rcu_check_callbacks(int cpu, int user) 224void rcu_check_callbacks(int cpu, int user)
132{ 225{
133 if (user || 226 if (user || rcu_is_cpu_rrupt_from_idle())
134 (idle_cpu(cpu) &&
135 !in_softirq() &&
136 hardirq_count() <= (1 << HARDIRQ_SHIFT)))
137 rcu_sched_qs(cpu); 227 rcu_sched_qs(cpu);
138 else if (!in_softirq()) 228 else if (!in_softirq())
139 rcu_bh_qs(cpu); 229 rcu_bh_qs(cpu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5d0b55a3a8c..1c40326724f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
195} 195}
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 197
198#ifdef CONFIG_NO_HZ
199DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
200 .dynticks_nesting = 1, 199 .dynticks_nesting = LLONG_MAX / 2,
201 .dynticks = ATOMIC_INIT(1), 200 .dynticks = ATOMIC_INIT(1),
202}; 201};
203#endif /* #ifdef CONFIG_NO_HZ */
204 202
205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 203static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
206static int qhimark = 10000; /* If this many pending, ignore blimit. */ 204static int qhimark = 10000; /* If this many pending, ignore blimit. */
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
328 return 1; 326 return 1;
329 } 327 }
330 328
331 /* If preemptible RCU, no point in sending reschedule IPI. */ 329 /*
332 if (rdp->preemptible) 330 * The CPU is online, so send it a reschedule IPI. This forces
333 return 0; 331 * it through the scheduler, and (inefficiently) also handles cases
334 332 * where idle loops fail to inform RCU about the CPU being idle.
335 /* The CPU is online, so send it a reschedule IPI. */ 333 */
336 if (rdp->cpu != smp_processor_id()) 334 if (rdp->cpu != smp_processor_id())
337 smp_send_reschedule(rdp->cpu); 335 smp_send_reschedule(rdp->cpu);
338 else 336 else
@@ -343,51 +341,97 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
343 341
344#endif /* #ifdef CONFIG_SMP */ 342#endif /* #ifdef CONFIG_SMP */
345 343
346#ifdef CONFIG_NO_HZ 344/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 *
347 * If the new value of the ->dynticks_nesting counter now is zero,
348 * we really have entered idle, and must do the appropriate accounting.
349 * The caller must have disabled interrupts.
350 */
351static void rcu_idle_enter_common(struct rcu_dynticks *rdtp)
352{
353 if (rdtp->dynticks_nesting) {
354 trace_rcu_dyntick("--=", rdtp->dynticks_nesting);
355 return;
356 }
357 trace_rcu_dyntick("Start", rdtp->dynticks_nesting);
358 if (!idle_cpu(smp_processor_id())) {
359 WARN_ON_ONCE(1); /* must be idle task! */
360 trace_rcu_dyntick("Error on entry: not idle task",
361 rdtp->dynticks_nesting);
362 ftrace_dump(DUMP_ALL);
363 }
364 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
365 smp_mb__before_atomic_inc(); /* See above. */
366 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
369}
347 370
348/** 371/**
349 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 372 * rcu_idle_enter - inform RCU that current CPU is entering idle
350 * 373 *
351 * Enter nohz mode, in other words, -leave- the mode in which RCU 374 * Enter idle mode, in other words, -leave- the mode in which RCU
352 * read-side critical sections can occur. (Though RCU read-side 375 * read-side critical sections can occur. (Though RCU read-side
353 * critical sections can occur in irq handlers in nohz mode, a possibility 376 * critical sections can occur in irq handlers in idle, a possibility
354 * handled by rcu_irq_enter() and rcu_irq_exit()). 377 * handled by irq_enter() and irq_exit().)
378 *
379 * We crowbar the ->dynticks_nesting field to zero to allow for
380 * the possibility of usermode upcalls having messed up our count
381 * of interrupt nesting level during the prior busy period.
355 */ 382 */
356void rcu_enter_nohz(void) 383void rcu_idle_enter(void)
357{ 384{
358 unsigned long flags; 385 unsigned long flags;
359 struct rcu_dynticks *rdtp; 386 struct rcu_dynticks *rdtp;
360 387
361 local_irq_save(flags); 388 local_irq_save(flags);
362 rdtp = &__get_cpu_var(rcu_dynticks); 389 rdtp = &__get_cpu_var(rcu_dynticks);
363 if (--rdtp->dynticks_nesting) { 390 rdtp->dynticks_nesting = 0;
364 local_irq_restore(flags); 391 rcu_idle_enter_common(rdtp);
365 return;
366 }
367 trace_rcu_dyntick("Start");
368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
369 smp_mb__before_atomic_inc(); /* See above. */
370 atomic_inc(&rdtp->dynticks);
371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
373 local_irq_restore(flags); 392 local_irq_restore(flags);
374} 393}
375 394
376/* 395/**
377 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 396 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
397 *
398 * Exit from an interrupt handler, which might possibly result in entering
399 * idle mode, in other words, leaving the mode in which read-side critical
400 * sections can occur.
378 * 401 *
379 * Exit nohz mode, in other words, -enter- the mode in which RCU 402 * This code assumes that the idle loop never does anything that might
380 * read-side critical sections normally occur. 403 * result in unbalanced calls to irq_enter() and irq_exit(). If your
404 * architecture violates this assumption, RCU will give you what you
405 * deserve, good and hard. But very infrequently and irreproducibly.
406 *
407 * Use things like work queues to work around this limitation.
408 *
409 * You have been warned.
381 */ 410 */
382void rcu_exit_nohz(void) 411void rcu_irq_exit(void)
383{ 412{
384 unsigned long flags; 413 unsigned long flags;
385 struct rcu_dynticks *rdtp; 414 struct rcu_dynticks *rdtp;
386 415
387 local_irq_save(flags); 416 local_irq_save(flags);
388 rdtp = &__get_cpu_var(rcu_dynticks); 417 rdtp = &__get_cpu_var(rcu_dynticks);
389 if (rdtp->dynticks_nesting++) { 418 rdtp->dynticks_nesting--;
390 local_irq_restore(flags); 419 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
420 rcu_idle_enter_common(rdtp);
421 local_irq_restore(flags);
422}
423
424/*
425 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
426 *
427 * If the new value of the ->dynticks_nesting counter was previously zero,
428 * we really have exited idle, and must do the appropriate accounting.
429 * The caller must have disabled interrupts.
430 */
431static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
432{
433 if (oldval) {
434 trace_rcu_dyntick("++=", rdtp->dynticks_nesting);
391 return; 435 return;
392 } 436 }
393 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 437 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
@@ -395,7 +439,71 @@ void rcu_exit_nohz(void)
395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 439 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
396 smp_mb__after_atomic_inc(); /* See above. */ 440 smp_mb__after_atomic_inc(); /* See above. */
397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 441 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End"); 442 trace_rcu_dyntick("End", oldval);
443 if (!idle_cpu(smp_processor_id())) {
444 WARN_ON_ONCE(1); /* must be idle task! */
445 trace_rcu_dyntick("Error on exit: not idle task", oldval);
446 ftrace_dump(DUMP_ALL);
447 }
448}
449
450/**
451 * rcu_idle_exit - inform RCU that current CPU is leaving idle
452 *
453 * Exit idle mode, in other words, -enter- the mode in which RCU
454 * read-side critical sections can occur.
455 *
456 * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for
457 * the possibility of usermode upcalls messing up our count
458 * of interrupt nesting level during the busy period that is just
459 * now starting.
460 */
461void rcu_idle_exit(void)
462{
463 unsigned long flags;
464 struct rcu_dynticks *rdtp;
465 long long oldval;
466
467 local_irq_save(flags);
468 rdtp = &__get_cpu_var(rcu_dynticks);
469 oldval = rdtp->dynticks_nesting;
470 WARN_ON_ONCE(oldval != 0);
471 rdtp->dynticks_nesting = LLONG_MAX / 2;
472 rcu_idle_exit_common(rdtp, oldval);
473 local_irq_restore(flags);
474}
475
476/**
477 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
478 *
479 * Enter an interrupt handler, which might possibly result in exiting
480 * idle mode, in other words, entering the mode in which read-side critical
481 * sections can occur.
482 *
483 * Note that the Linux kernel is fully capable of entering an interrupt
484 * handler that it never exits, for example when doing upcalls to
485 * user mode! This code assumes that the idle loop never does upcalls to
486 * user mode. If your architecture does do upcalls from the idle loop (or
487 * does anything else that results in unbalanced calls to the irq_enter()
488 * and irq_exit() functions), RCU will give you what you deserve, good
489 * and hard. But very infrequently and irreproducibly.
490 *
491 * Use things like work queues to work around this limitation.
492 *
493 * You have been warned.
494 */
495void rcu_irq_enter(void)
496{
497 unsigned long flags;
498 struct rcu_dynticks *rdtp;
499 long long oldval;
500
501 local_irq_save(flags);
502 rdtp = &__get_cpu_var(rcu_dynticks);
503 oldval = rdtp->dynticks_nesting;
504 rdtp->dynticks_nesting++;
505 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
506 rcu_idle_exit_common(rdtp, oldval);
399 local_irq_restore(flags); 507 local_irq_restore(flags);
400} 508}
401 509
@@ -442,27 +550,32 @@ void rcu_nmi_exit(void)
442 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 550 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
443} 551}
444 552
553#ifdef CONFIG_PROVE_RCU
554
445/** 555/**
446 * rcu_irq_enter - inform RCU of entry to hard irq context 556 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
447 * 557 *
448 * If the CPU was idle with dynamic ticks active, this updates the 558 * If the current CPU is in its idle loop and is neither in an interrupt
449 * rdtp->dynticks to let the RCU handling know that the CPU is active. 559 * or NMI handler, return true. The caller must have at least disabled
560 * preemption.
450 */ 561 */
451void rcu_irq_enter(void) 562int rcu_is_cpu_idle(void)
452{ 563{
453 rcu_exit_nohz(); 564 return (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
454} 565}
455 566
567#endif /* #ifdef CONFIG_PROVE_RCU */
568
456/** 569/**
457 * rcu_irq_exit - inform RCU of exit from hard irq context 570 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
458 * 571 *
459 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 572 * If the current CPU is idle or running at a first-level (not nested)
460 * to put let the RCU handling be aware that the CPU is going back to idle 573 * interrupt from idle, return true. The caller must have at least
461 * with no ticks. 574 * disabled preemption.
462 */ 575 */
463void rcu_irq_exit(void) 576int rcu_is_cpu_rrupt_from_idle(void)
464{ 577{
465 rcu_enter_nohz(); 578 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
466} 579}
467 580
468#ifdef CONFIG_SMP 581#ifdef CONFIG_SMP
@@ -512,24 +625,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
512 625
513#endif /* #ifdef CONFIG_SMP */ 626#endif /* #ifdef CONFIG_SMP */
514 627
515#else /* #ifdef CONFIG_NO_HZ */
516
517#ifdef CONFIG_SMP
518
519static int dyntick_save_progress_counter(struct rcu_data *rdp)
520{
521 return 0;
522}
523
524static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
525{
526 return rcu_implicit_offline_qs(rdp);
527}
528
529#endif /* #ifdef CONFIG_SMP */
530
531#endif /* #else #ifdef CONFIG_NO_HZ */
532
533int rcu_cpu_stall_suppress __read_mostly; 628int rcu_cpu_stall_suppress __read_mostly;
534 629
535static void record_gp_stall_check_time(struct rcu_state *rsp) 630static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -1334,16 +1429,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1429 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1335 * Also schedule RCU core processing. 1430 * Also schedule RCU core processing.
1336 * 1431 *
1337 * This function must be called with hardirqs disabled. It is normally 1432 * This function must be called from hardirq context. It is normally
1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1433 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1339 * false, there is no point in invoking rcu_check_callbacks(). 1434 * false, there is no point in invoking rcu_check_callbacks().
1340 */ 1435 */
1341void rcu_check_callbacks(int cpu, int user) 1436void rcu_check_callbacks(int cpu, int user)
1342{ 1437{
1343 trace_rcu_utilization("Start scheduler-tick"); 1438 trace_rcu_utilization("Start scheduler-tick");
1344 if (user || 1439 if (user || rcu_is_cpu_rrupt_from_idle()) {
1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1347 1440
1348 /* 1441 /*
1349 * Get here if this CPU took its interrupt from user 1442 * Get here if this CPU took its interrupt from user
@@ -1913,9 +2006,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1913 for (i = 0; i < RCU_NEXT_SIZE; i++) 2006 for (i = 0; i < RCU_NEXT_SIZE; i++)
1914 rdp->nxttail[i] = &rdp->nxtlist; 2007 rdp->nxttail[i] = &rdp->nxtlist;
1915 rdp->qlen = 0; 2008 rdp->qlen = 0;
1916#ifdef CONFIG_NO_HZ
1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2009 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1918#endif /* #ifdef CONFIG_NO_HZ */ 2010 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2011 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
1919 rdp->cpu = cpu; 2012 rdp->cpu = cpu;
1920 rdp->rsp = rsp; 2013 rdp->rsp = rsp;
1921 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2014 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1942,6 +2035,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1942 rdp->qlen_last_fqs_check = 0; 2035 rdp->qlen_last_fqs_check = 0;
1943 rdp->n_force_qs_snap = rsp->n_force_qs; 2036 rdp->n_force_qs_snap = rsp->n_force_qs;
1944 rdp->blimit = blimit; 2037 rdp->blimit = blimit;
2038 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2039 WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1);
1945 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2040 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1946 2041
1947 /* 2042 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 517f2f89a29..0963fa1541a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,9 +84,10 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track irq/process nesting level. */ 87 long long dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 /* Process level is worth LLONG_MAX/2. */
89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ 89 int dynticks_nmi_nesting; /* Track NMI nesting level. */
90 atomic_t dynticks; /* Even value for idle, else odd. */
90}; 91};
91 92
92/* RCU's kthread states for tracing. */ 93/* RCU's kthread states for tracing. */
@@ -274,16 +275,12 @@ struct rcu_data {
274 /* did other CPU force QS recently? */ 275 /* did other CPU force QS recently? */
275 long blimit; /* Upper limit on a processed batch */ 276 long blimit; /* Upper limit on a processed batch */
276 277
277#ifdef CONFIG_NO_HZ
278 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
280 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
281#endif /* #ifdef CONFIG_NO_HZ */
282 281
283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
284#ifdef CONFIG_NO_HZ
285 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
286#endif /* #ifdef CONFIG_NO_HZ */
287 unsigned long offline_fqs; /* Kicked due to being offline. */ 284 unsigned long offline_fqs; /* Kicked due to being offline. */
288 unsigned long resched_ipi; /* Sent a resched IPI. */ 285 unsigned long resched_ipi; /* Sent a resched IPI. */
289 286
@@ -307,11 +304,7 @@ struct rcu_data {
307#define RCU_GP_INIT 1 /* Grace period being initialized. */ 304#define RCU_GP_INIT 1 /* Grace period being initialized. */
308#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 305#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
309#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 306#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
310#ifdef CONFIG_NO_HZ
311#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 307#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
312#else /* #ifdef CONFIG_NO_HZ */
313#define RCU_SIGNAL_INIT RCU_FORCE_QS
314#endif /* #else #ifdef CONFIG_NO_HZ */
315 308
316#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 309#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
317 310
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 59c7bee4ce0..654cfe67f0d 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
67 rdp->completed, rdp->gpnum, 67 rdp->completed, rdp->gpnum,
68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
69 rdp->qs_pending); 69 rdp->qs_pending);
70#ifdef CONFIG_NO_HZ 70 seq_printf(m, " dt=%d/%llx/%d df=%lu",
71 seq_printf(m, " dt=%d/%d/%d df=%lu",
72 atomic_read(&rdp->dynticks->dynticks), 71 atomic_read(&rdp->dynticks->dynticks),
73 rdp->dynticks->dynticks_nesting, 72 rdp->dynticks->dynticks_nesting,
74 rdp->dynticks->dynticks_nmi_nesting, 73 rdp->dynticks->dynticks_nmi_nesting,
75 rdp->dynticks_fqs); 74 rdp->dynticks_fqs);
76#endif /* #ifdef CONFIG_NO_HZ */
77 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 75 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
78 seq_printf(m, " ql=%ld qs=%c%c%c%c", 76 seq_printf(m, " ql=%ld qs=%c%c%c%c",
79 rdp->qlen, 77 rdp->qlen,
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->completed, rdp->gpnum, 139 rdp->completed, rdp->gpnum,
142 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 140 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
143 rdp->qs_pending); 141 rdp->qs_pending);
144#ifdef CONFIG_NO_HZ 142 seq_printf(m, ",%d,%llx,%d,%lu",
145 seq_printf(m, ",%d,%d,%d,%lu",
146 atomic_read(&rdp->dynticks->dynticks), 143 atomic_read(&rdp->dynticks->dynticks),
147 rdp->dynticks->dynticks_nesting, 144 rdp->dynticks->dynticks_nesting,
148 rdp->dynticks->dynticks_nmi_nesting, 145 rdp->dynticks->dynticks_nmi_nesting,
149 rdp->dynticks_fqs); 146 rdp->dynticks_fqs);
150#endif /* #ifdef CONFIG_NO_HZ */
151 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 147 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
152 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, 148 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
153 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 149 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
171static int show_rcudata_csv(struct seq_file *m, void *unused) 167static int show_rcudata_csv(struct seq_file *m, void *unused)
172{ 168{
173 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
174#ifdef CONFIG_NO_HZ
175 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
176#endif /* #ifdef CONFIG_NO_HZ */
177 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); 171 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
178#ifdef CONFIG_RCU_BOOST 172#ifdef CONFIG_RCU_BOOST
179 seq_puts(m, "\"kt\",\"ktl\""); 173 seq_puts(m, "\"kt\",\"ktl\"");
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0b..5d9d23665f1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -434,7 +434,6 @@ void tick_nohz_stop_sched_tick(int inidle)
434 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 434 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
435 ts->tick_stopped = 1; 435 ts->tick_stopped = 1;
436 ts->idle_jiffies = last_jiffies; 436 ts->idle_jiffies = last_jiffies;
437 rcu_enter_nohz();
438 } 437 }
439 438
440 ts->idle_sleeps++; 439 ts->idle_sleeps++;
@@ -473,6 +472,8 @@ out:
473 ts->last_jiffies = last_jiffies; 472 ts->last_jiffies = last_jiffies;
474 ts->sleep_length = ktime_sub(dev->next_event, now); 473 ts->sleep_length = ktime_sub(dev->next_event, now);
475end: 474end:
475 if (inidle)
476 rcu_idle_enter();
476 local_irq_restore(flags); 477 local_irq_restore(flags);
477} 478}
478 479
@@ -529,6 +530,7 @@ void tick_nohz_restart_sched_tick(void)
529 ktime_t now; 530 ktime_t now;
530 531
531 local_irq_disable(); 532 local_irq_disable();
533 rcu_idle_exit();
532 if (ts->idle_active || (ts->inidle && ts->tick_stopped)) 534 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
533 now = ktime_get(); 535 now = ktime_get();
534 536
@@ -543,8 +545,6 @@ void tick_nohz_restart_sched_tick(void)
543 545
544 ts->inidle = 0; 546 ts->inidle = 0;
545 547
546 rcu_exit_nohz();
547
548 /* Update jiffies first */ 548 /* Update jiffies first */
549 select_nohz_load_balancer(0); 549 select_nohz_load_balancer(0);
550 tick_do_update_jiffies64(now); 550 tick_do_update_jiffies64(now);