aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2011-09-30 15:10:22 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2011-12-11 13:31:24 -0500
commit9b2e4f1880b789be1f24f9684f7a54b90310b5c0 (patch)
tree1fa922e0616e298837a7079cb49118188a58186c /kernel
parentb804cb9e91c6c304959c69d4f9daeef4ffdba71c (diff)
rcu: Track idleness independent of idle tasks
Earlier versions of RCU used the scheduling-clock tick to detect idleness by checking for the idle task, but handled idleness differently for CONFIG_NO_HZ=y. But there are now a number of uses of RCU read-side critical sections in the idle task, for example, for tracing. A more fine-grained detection of idleness is therefore required. This commit presses the old dyntick-idle code into full-time service, so that rcu_idle_enter(), previously known as rcu_enter_nohz(), is always invoked at the beginning of an idle loop iteration. Similarly, rcu_idle_exit(), previously known as rcu_exit_nohz(), is always invoked at the end of an idle-loop iteration. This allows the idle task to use RCU everywhere except between consecutive rcu_idle_enter() and rcu_idle_exit() calls, in turn allowing architecture maintainers to specify exactly where in the idle loop that RCU may be used. Because some of the userspace upcall uses can result in what looks to RCU like half of an interrupt, it is not possible to expect that the irq_enter() and irq_exit() hooks will give exact counts. This patch therefore expands the ->dynticks_nesting counter to 64 bits and uses two separate bitfields to count process/idle transitions and interrupt entry/exit transitions. It is presumed that userspace upcalls do not happen in the idle loop or from usermode execution (though usermode might do a system call that results in an upcall). The counter is hard-reset on each process/idle transition, which avoids the interrupt entry/exit error from accumulating. Overflow is avoided by the 64-bitness of the ->dyntick_nesting counter. This commit also adds warnings if a non-idle task asks RCU to enter idle state (and these checks will need some adjustment before applying Frederic's OS-jitter patches (http://lkml.org/lkml/2011/10/7/246). In addition, validation of ->dynticks and ->dynticks_nesting is added. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutiny.c124
-rw-r--r--kernel/rcutree.c229
-rw-r--r--kernel/rcutree.h15
-rw-r--r--kernel/rcutree_trace.c10
-rw-r--r--kernel/time/tick-sched.c6
5 files changed, 278 insertions, 106 deletions
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 636af6d9c6e5..3ab77bdc90c4 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -53,31 +53,122 @@ static void __call_rcu(struct rcu_head *head,
53 53
54#include "rcutiny_plugin.h" 54#include "rcutiny_plugin.h"
55 55
56#ifdef CONFIG_NO_HZ 56static long long rcu_dynticks_nesting = LLONG_MAX / 2;
57 57
58static long rcu_dynticks_nesting = 1; 58/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
59static void rcu_idle_enter_common(void)
60{
61 if (rcu_dynticks_nesting) {
62 RCU_TRACE(trace_rcu_dyntick("--=", rcu_dynticks_nesting));
63 return;
64 }
65 RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting));
66 if (!idle_cpu(smp_processor_id())) {
67 WARN_ON_ONCE(1); /* must be idle task! */
68 RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
69 rcu_dynticks_nesting));
70 ftrace_dump(DUMP_ALL);
71 }
72 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
73}
59 74
60/* 75/*
61 * Enter dynticks-idle mode, which is an extended quiescent state 76 * Enter idle, which is an extended quiescent state if we have fully
62 * if we have fully entered that mode (i.e., if the new value of 77 * entered that mode (i.e., if the new value of dynticks_nesting is zero).
63 * dynticks_nesting is zero).
64 */ 78 */
65void rcu_enter_nohz(void) 79void rcu_idle_enter(void)
66{ 80{
67 if (--rcu_dynticks_nesting == 0) 81 unsigned long flags;
68 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 82
83 local_irq_save(flags);
84 rcu_dynticks_nesting = 0;
85 rcu_idle_enter_common();
86 local_irq_restore(flags);
69} 87}
70 88
71/* 89/*
72 * Exit dynticks-idle mode, so that we are no longer in an extended 90 * Exit an interrupt handler towards idle.
73 * quiescent state. 91 */
92void rcu_irq_exit(void)
93{
94 unsigned long flags;
95
96 local_irq_save(flags);
97 rcu_dynticks_nesting--;
98 WARN_ON_ONCE(rcu_dynticks_nesting < 0);
99 rcu_idle_enter_common();
100 local_irq_restore(flags);
101}
102
103/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
104static void rcu_idle_exit_common(long long oldval)
105{
106 if (oldval) {
107 RCU_TRACE(trace_rcu_dyntick("++=", rcu_dynticks_nesting));
108 return;
109 }
110 RCU_TRACE(trace_rcu_dyntick("End", oldval));
111 if (!idle_cpu(smp_processor_id())) {
112 WARN_ON_ONCE(1); /* must be idle task! */
113 RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
114 oldval));
115 ftrace_dump(DUMP_ALL);
116 }
117}
118
119/*
120 * Exit idle, so that we are no longer in an extended quiescent state.
74 */ 121 */
75void rcu_exit_nohz(void) 122void rcu_idle_exit(void)
76{ 123{
124 unsigned long flags;
125 long long oldval;
126
127 local_irq_save(flags);
128 oldval = rcu_dynticks_nesting;
129 WARN_ON_ONCE(oldval != 0);
130 rcu_dynticks_nesting = LLONG_MAX / 2;
131 rcu_idle_exit_common(oldval);
132 local_irq_restore(flags);
133}
134
135/*
136 * Enter an interrupt handler, moving away from idle.
137 */
138void rcu_irq_enter(void)
139{
140 unsigned long flags;
141 long long oldval;
142
143 local_irq_save(flags);
144 oldval = rcu_dynticks_nesting;
77 rcu_dynticks_nesting++; 145 rcu_dynticks_nesting++;
146 WARN_ON_ONCE(rcu_dynticks_nesting == 0);
147 rcu_idle_exit_common(oldval);
148 local_irq_restore(flags);
149}
150
151#ifdef CONFIG_PROVE_RCU
152
153/*
154 * Test whether RCU thinks that the current CPU is idle.
155 */
156int rcu_is_cpu_idle(void)
157{
158 return !rcu_dynticks_nesting;
78} 159}
79 160
80#endif /* #ifdef CONFIG_NO_HZ */ 161#endif /* #ifdef CONFIG_PROVE_RCU */
162
163/*
164 * Test whether the current CPU was interrupted from idle. Nested
165 * interrupts don't count, we must be running at the first interrupt
166 * level.
167 */
168int rcu_is_cpu_rrupt_from_idle(void)
169{
170 return rcu_dynticks_nesting <= 0;
171}
81 172
82/* 173/*
83 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 174 * Helper function for rcu_sched_qs() and rcu_bh_qs().
@@ -126,14 +217,13 @@ void rcu_bh_qs(int cpu)
126 217
127/* 218/*
128 * Check to see if the scheduling-clock interrupt came from an extended 219 * Check to see if the scheduling-clock interrupt came from an extended
129 * quiescent state, and, if so, tell RCU about it. 220 * quiescent state, and, if so, tell RCU about it. This function must
221 * be called from hardirq context. It is normally called from the
222 * scheduling-clock interrupt.
130 */ 223 */
131void rcu_check_callbacks(int cpu, int user) 224void rcu_check_callbacks(int cpu, int user)
132{ 225{
133 if (user || 226 if (user || rcu_is_cpu_rrupt_from_idle())
134 (idle_cpu(cpu) &&
135 !in_softirq() &&
136 hardirq_count() <= (1 << HARDIRQ_SHIFT)))
137 rcu_sched_qs(cpu); 227 rcu_sched_qs(cpu);
138 else if (!in_softirq()) 228 else if (!in_softirq())
139 rcu_bh_qs(cpu); 229 rcu_bh_qs(cpu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5d0b55a3a8c0..1c40326724f6 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)
195} 195}
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 197
198#ifdef CONFIG_NO_HZ
199DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
200 .dynticks_nesting = 1, 199 .dynticks_nesting = LLONG_MAX / 2,
201 .dynticks = ATOMIC_INIT(1), 200 .dynticks = ATOMIC_INIT(1),
202}; 201};
203#endif /* #ifdef CONFIG_NO_HZ */
204 202
205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 203static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
206static int qhimark = 10000; /* If this many pending, ignore blimit. */ 204static int qhimark = 10000; /* If this many pending, ignore blimit. */
@@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
328 return 1; 326 return 1;
329 } 327 }
330 328
331 /* If preemptible RCU, no point in sending reschedule IPI. */ 329 /*
332 if (rdp->preemptible) 330 * The CPU is online, so send it a reschedule IPI. This forces
333 return 0; 331 * it through the scheduler, and (inefficiently) also handles cases
334 332 * where idle loops fail to inform RCU about the CPU being idle.
335 /* The CPU is online, so send it a reschedule IPI. */ 333 */
336 if (rdp->cpu != smp_processor_id()) 334 if (rdp->cpu != smp_processor_id())
337 smp_send_reschedule(rdp->cpu); 335 smp_send_reschedule(rdp->cpu);
338 else 336 else
@@ -343,51 +341,97 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
343 341
344#endif /* #ifdef CONFIG_SMP */ 342#endif /* #ifdef CONFIG_SMP */
345 343
346#ifdef CONFIG_NO_HZ 344/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 *
347 * If the new value of the ->dynticks_nesting counter now is zero,
348 * we really have entered idle, and must do the appropriate accounting.
349 * The caller must have disabled interrupts.
350 */
351static void rcu_idle_enter_common(struct rcu_dynticks *rdtp)
352{
353 if (rdtp->dynticks_nesting) {
354 trace_rcu_dyntick("--=", rdtp->dynticks_nesting);
355 return;
356 }
357 trace_rcu_dyntick("Start", rdtp->dynticks_nesting);
358 if (!idle_cpu(smp_processor_id())) {
359 WARN_ON_ONCE(1); /* must be idle task! */
360 trace_rcu_dyntick("Error on entry: not idle task",
361 rdtp->dynticks_nesting);
362 ftrace_dump(DUMP_ALL);
363 }
364 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
365 smp_mb__before_atomic_inc(); /* See above. */
366 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
369}
347 370
348/** 371/**
349 * rcu_enter_nohz - inform RCU that current CPU is entering nohz 372 * rcu_idle_enter - inform RCU that current CPU is entering idle
350 * 373 *
351 * Enter nohz mode, in other words, -leave- the mode in which RCU 374 * Enter idle mode, in other words, -leave- the mode in which RCU
352 * read-side critical sections can occur. (Though RCU read-side 375 * read-side critical sections can occur. (Though RCU read-side
353 * critical sections can occur in irq handlers in nohz mode, a possibility 376 * critical sections can occur in irq handlers in idle, a possibility
354 * handled by rcu_irq_enter() and rcu_irq_exit()). 377 * handled by irq_enter() and irq_exit().)
378 *
379 * We crowbar the ->dynticks_nesting field to zero to allow for
380 * the possibility of usermode upcalls having messed up our count
381 * of interrupt nesting level during the prior busy period.
355 */ 382 */
356void rcu_enter_nohz(void) 383void rcu_idle_enter(void)
357{ 384{
358 unsigned long flags; 385 unsigned long flags;
359 struct rcu_dynticks *rdtp; 386 struct rcu_dynticks *rdtp;
360 387
361 local_irq_save(flags); 388 local_irq_save(flags);
362 rdtp = &__get_cpu_var(rcu_dynticks); 389 rdtp = &__get_cpu_var(rcu_dynticks);
363 if (--rdtp->dynticks_nesting) { 390 rdtp->dynticks_nesting = 0;
364 local_irq_restore(flags); 391 rcu_idle_enter_common(rdtp);
365 return;
366 }
367 trace_rcu_dyntick("Start");
368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
369 smp_mb__before_atomic_inc(); /* See above. */
370 atomic_inc(&rdtp->dynticks);
371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
373 local_irq_restore(flags); 392 local_irq_restore(flags);
374} 393}
375 394
376/* 395/**
377 * rcu_exit_nohz - inform RCU that current CPU is leaving nohz 396 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
397 *
398 * Exit from an interrupt handler, which might possibly result in entering
399 * idle mode, in other words, leaving the mode in which read-side critical
400 * sections can occur.
378 * 401 *
379 * Exit nohz mode, in other words, -enter- the mode in which RCU 402 * This code assumes that the idle loop never does anything that might
380 * read-side critical sections normally occur. 403 * result in unbalanced calls to irq_enter() and irq_exit(). If your
404 * architecture violates this assumption, RCU will give you what you
405 * deserve, good and hard. But very infrequently and irreproducibly.
406 *
407 * Use things like work queues to work around this limitation.
408 *
409 * You have been warned.
381 */ 410 */
382void rcu_exit_nohz(void) 411void rcu_irq_exit(void)
383{ 412{
384 unsigned long flags; 413 unsigned long flags;
385 struct rcu_dynticks *rdtp; 414 struct rcu_dynticks *rdtp;
386 415
387 local_irq_save(flags); 416 local_irq_save(flags);
388 rdtp = &__get_cpu_var(rcu_dynticks); 417 rdtp = &__get_cpu_var(rcu_dynticks);
389 if (rdtp->dynticks_nesting++) { 418 rdtp->dynticks_nesting--;
390 local_irq_restore(flags); 419 WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
420 rcu_idle_enter_common(rdtp);
421 local_irq_restore(flags);
422}
423
424/*
425 * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle
426 *
427 * If the new value of the ->dynticks_nesting counter was previously zero,
428 * we really have exited idle, and must do the appropriate accounting.
429 * The caller must have disabled interrupts.
430 */
431static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
432{
433 if (oldval) {
434 trace_rcu_dyntick("++=", rdtp->dynticks_nesting);
391 return; 435 return;
392 } 436 }
393 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ 437 smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */
@@ -395,7 +439,71 @@ void rcu_exit_nohz(void)
395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 439 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
396 smp_mb__after_atomic_inc(); /* See above. */ 440 smp_mb__after_atomic_inc(); /* See above. */
397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 441 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End"); 442 trace_rcu_dyntick("End", oldval);
443 if (!idle_cpu(smp_processor_id())) {
444 WARN_ON_ONCE(1); /* must be idle task! */
445 trace_rcu_dyntick("Error on exit: not idle task", oldval);
446 ftrace_dump(DUMP_ALL);
447 }
448}
449
450/**
451 * rcu_idle_exit - inform RCU that current CPU is leaving idle
452 *
453 * Exit idle mode, in other words, -enter- the mode in which RCU
454 * read-side critical sections can occur.
455 *
456 * We crowbar the ->dynticks_nesting field to LLONG_MAX/2 to allow for
457 * the possibility of usermode upcalls messing up our count
458 * of interrupt nesting level during the busy period that is just
459 * now starting.
460 */
461void rcu_idle_exit(void)
462{
463 unsigned long flags;
464 struct rcu_dynticks *rdtp;
465 long long oldval;
466
467 local_irq_save(flags);
468 rdtp = &__get_cpu_var(rcu_dynticks);
469 oldval = rdtp->dynticks_nesting;
470 WARN_ON_ONCE(oldval != 0);
471 rdtp->dynticks_nesting = LLONG_MAX / 2;
472 rcu_idle_exit_common(rdtp, oldval);
473 local_irq_restore(flags);
474}
475
476/**
477 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
478 *
479 * Enter an interrupt handler, which might possibly result in exiting
480 * idle mode, in other words, entering the mode in which read-side critical
481 * sections can occur.
482 *
483 * Note that the Linux kernel is fully capable of entering an interrupt
484 * handler that it never exits, for example when doing upcalls to
485 * user mode! This code assumes that the idle loop never does upcalls to
486 * user mode. If your architecture does do upcalls from the idle loop (or
487 * does anything else that results in unbalanced calls to the irq_enter()
488 * and irq_exit() functions), RCU will give you what you deserve, good
489 * and hard. But very infrequently and irreproducibly.
490 *
491 * Use things like work queues to work around this limitation.
492 *
493 * You have been warned.
494 */
495void rcu_irq_enter(void)
496{
497 unsigned long flags;
498 struct rcu_dynticks *rdtp;
499 long long oldval;
500
501 local_irq_save(flags);
502 rdtp = &__get_cpu_var(rcu_dynticks);
503 oldval = rdtp->dynticks_nesting;
504 rdtp->dynticks_nesting++;
505 WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
506 rcu_idle_exit_common(rdtp, oldval);
399 local_irq_restore(flags); 507 local_irq_restore(flags);
400} 508}
401 509
@@ -442,27 +550,32 @@ void rcu_nmi_exit(void)
442 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 550 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
443} 551}
444 552
553#ifdef CONFIG_PROVE_RCU
554
445/** 555/**
446 * rcu_irq_enter - inform RCU of entry to hard irq context 556 * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle
447 * 557 *
448 * If the CPU was idle with dynamic ticks active, this updates the 558 * If the current CPU is in its idle loop and is neither in an interrupt
449 * rdtp->dynticks to let the RCU handling know that the CPU is active. 559 * or NMI handler, return true. The caller must have at least disabled
560 * preemption.
450 */ 561 */
451void rcu_irq_enter(void) 562int rcu_is_cpu_idle(void)
452{ 563{
453 rcu_exit_nohz(); 564 return (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
454} 565}
455 566
567#endif /* #ifdef CONFIG_PROVE_RCU */
568
456/** 569/**
457 * rcu_irq_exit - inform RCU of exit from hard irq context 570 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
458 * 571 *
459 * If the CPU was idle with dynamic ticks active, update the rdp->dynticks 572 * If the current CPU is idle or running at a first-level (not nested)
460 * to put let the RCU handling be aware that the CPU is going back to idle 573 * interrupt from idle, return true. The caller must have at least
461 * with no ticks. 574 * disabled preemption.
462 */ 575 */
463void rcu_irq_exit(void) 576int rcu_is_cpu_rrupt_from_idle(void)
464{ 577{
465 rcu_enter_nohz(); 578 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
466} 579}
467 580
468#ifdef CONFIG_SMP 581#ifdef CONFIG_SMP
@@ -512,24 +625,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
512 625
513#endif /* #ifdef CONFIG_SMP */ 626#endif /* #ifdef CONFIG_SMP */
514 627
515#else /* #ifdef CONFIG_NO_HZ */
516
517#ifdef CONFIG_SMP
518
519static int dyntick_save_progress_counter(struct rcu_data *rdp)
520{
521 return 0;
522}
523
524static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
525{
526 return rcu_implicit_offline_qs(rdp);
527}
528
529#endif /* #ifdef CONFIG_SMP */
530
531#endif /* #else #ifdef CONFIG_NO_HZ */
532
533int rcu_cpu_stall_suppress __read_mostly; 628int rcu_cpu_stall_suppress __read_mostly;
534 629
535static void record_gp_stall_check_time(struct rcu_state *rsp) 630static void record_gp_stall_check_time(struct rcu_state *rsp)
@@ -1334,16 +1429,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1429 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1335 * Also schedule RCU core processing. 1430 * Also schedule RCU core processing.
1336 * 1431 *
1337 * This function must be called with hardirqs disabled. It is normally 1432 * This function must be called from hardirq context. It is normally
1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1433 * invoked from the scheduling-clock interrupt. If rcu_pending returns
1339 * false, there is no point in invoking rcu_check_callbacks(). 1434 * false, there is no point in invoking rcu_check_callbacks().
1340 */ 1435 */
1341void rcu_check_callbacks(int cpu, int user) 1436void rcu_check_callbacks(int cpu, int user)
1342{ 1437{
1343 trace_rcu_utilization("Start scheduler-tick"); 1438 trace_rcu_utilization("Start scheduler-tick");
1344 if (user || 1439 if (user || rcu_is_cpu_rrupt_from_idle()) {
1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
1347 1440
1348 /* 1441 /*
1349 * Get here if this CPU took its interrupt from user 1442 * Get here if this CPU took its interrupt from user
@@ -1913,9 +2006,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1913 for (i = 0; i < RCU_NEXT_SIZE; i++) 2006 for (i = 0; i < RCU_NEXT_SIZE; i++)
1914 rdp->nxttail[i] = &rdp->nxtlist; 2007 rdp->nxttail[i] = &rdp->nxtlist;
1915 rdp->qlen = 0; 2008 rdp->qlen = 0;
1916#ifdef CONFIG_NO_HZ
1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2009 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1918#endif /* #ifdef CONFIG_NO_HZ */ 2010 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2011 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
1919 rdp->cpu = cpu; 2012 rdp->cpu = cpu;
1920 rdp->rsp = rsp; 2013 rdp->rsp = rsp;
1921 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2014 raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1942,6 +2035,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1942 rdp->qlen_last_fqs_check = 0; 2035 rdp->qlen_last_fqs_check = 0;
1943 rdp->n_force_qs_snap = rsp->n_force_qs; 2036 rdp->n_force_qs_snap = rsp->n_force_qs;
1944 rdp->blimit = blimit; 2037 rdp->blimit = blimit;
2038 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != LLONG_MAX / 2);
2039 WARN_ON_ONCE((atomic_read(&rdp->dynticks->dynticks) & 0x1) != 1);
1945 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2040 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1946 2041
1947 /* 2042 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 517f2f89a293..0963fa1541ac 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -84,9 +84,10 @@
84 * Dynticks per-CPU state. 84 * Dynticks per-CPU state.
85 */ 85 */
86struct rcu_dynticks { 86struct rcu_dynticks {
87 int dynticks_nesting; /* Track irq/process nesting level. */ 87 long long dynticks_nesting; /* Track irq/process nesting level. */
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 /* Process level is worth LLONG_MAX/2. */
89 atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ 89 int dynticks_nmi_nesting; /* Track NMI nesting level. */
90 atomic_t dynticks; /* Even value for idle, else odd. */
90}; 91};
91 92
92/* RCU's kthread states for tracing. */ 93/* RCU's kthread states for tracing. */
@@ -274,16 +275,12 @@ struct rcu_data {
274 /* did other CPU force QS recently? */ 275 /* did other CPU force QS recently? */
275 long blimit; /* Upper limit on a processed batch */ 276 long blimit; /* Upper limit on a processed batch */
276 277
277#ifdef CONFIG_NO_HZ
278 /* 3) dynticks interface. */ 278 /* 3) dynticks interface. */
279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ 279 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
280 int dynticks_snap; /* Per-GP tracking for dynticks. */ 280 int dynticks_snap; /* Per-GP tracking for dynticks. */
281#endif /* #ifdef CONFIG_NO_HZ */
282 281
283 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 282 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */
284#ifdef CONFIG_NO_HZ
285 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 283 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */
286#endif /* #ifdef CONFIG_NO_HZ */
287 unsigned long offline_fqs; /* Kicked due to being offline. */ 284 unsigned long offline_fqs; /* Kicked due to being offline. */
288 unsigned long resched_ipi; /* Sent a resched IPI. */ 285 unsigned long resched_ipi; /* Sent a resched IPI. */
289 286
@@ -307,11 +304,7 @@ struct rcu_data {
307#define RCU_GP_INIT 1 /* Grace period being initialized. */ 304#define RCU_GP_INIT 1 /* Grace period being initialized. */
308#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ 305#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
309#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 306#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
310#ifdef CONFIG_NO_HZ
311#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 307#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
312#else /* #ifdef CONFIG_NO_HZ */
313#define RCU_SIGNAL_INIT RCU_FORCE_QS
314#endif /* #else #ifdef CONFIG_NO_HZ */
315 308
316#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 309#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
317 310
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 59c7bee4ce0f..654cfe67f0d1 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -67,13 +67,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
67 rdp->completed, rdp->gpnum, 67 rdp->completed, rdp->gpnum,
68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
69 rdp->qs_pending); 69 rdp->qs_pending);
70#ifdef CONFIG_NO_HZ 70 seq_printf(m, " dt=%d/%llx/%d df=%lu",
71 seq_printf(m, " dt=%d/%d/%d df=%lu",
72 atomic_read(&rdp->dynticks->dynticks), 71 atomic_read(&rdp->dynticks->dynticks),
73 rdp->dynticks->dynticks_nesting, 72 rdp->dynticks->dynticks_nesting,
74 rdp->dynticks->dynticks_nmi_nesting, 73 rdp->dynticks->dynticks_nmi_nesting,
75 rdp->dynticks_fqs); 74 rdp->dynticks_fqs);
76#endif /* #ifdef CONFIG_NO_HZ */
77 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); 75 seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi);
78 seq_printf(m, " ql=%ld qs=%c%c%c%c", 76 seq_printf(m, " ql=%ld qs=%c%c%c%c",
79 rdp->qlen, 77 rdp->qlen,
@@ -141,13 +139,11 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
141 rdp->completed, rdp->gpnum, 139 rdp->completed, rdp->gpnum,
142 rdp->passed_quiesce, rdp->passed_quiesce_gpnum, 140 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
143 rdp->qs_pending); 141 rdp->qs_pending);
144#ifdef CONFIG_NO_HZ 142 seq_printf(m, ",%d,%llx,%d,%lu",
145 seq_printf(m, ",%d,%d,%d,%lu",
146 atomic_read(&rdp->dynticks->dynticks), 143 atomic_read(&rdp->dynticks->dynticks),
147 rdp->dynticks->dynticks_nesting, 144 rdp->dynticks->dynticks_nesting,
148 rdp->dynticks->dynticks_nmi_nesting, 145 rdp->dynticks->dynticks_nmi_nesting,
149 rdp->dynticks_fqs); 146 rdp->dynticks_fqs);
150#endif /* #ifdef CONFIG_NO_HZ */
151 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); 147 seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi);
152 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, 148 seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen,
153 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 149 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
@@ -171,9 +167,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
171static int show_rcudata_csv(struct seq_file *m, void *unused) 167static int show_rcudata_csv(struct seq_file *m, void *unused)
172{ 168{
173 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); 169 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
174#ifdef CONFIG_NO_HZ
175 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 170 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
176#endif /* #ifdef CONFIG_NO_HZ */
177 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); 171 seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\"");
178#ifdef CONFIG_RCU_BOOST 172#ifdef CONFIG_RCU_BOOST
179 seq_puts(m, "\"kt\",\"ktl\""); 173 seq_puts(m, "\"kt\",\"ktl\"");
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 40420644d0ba..5d9d23665f12 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -434,7 +434,6 @@ void tick_nohz_stop_sched_tick(int inidle)
434 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); 434 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
435 ts->tick_stopped = 1; 435 ts->tick_stopped = 1;
436 ts->idle_jiffies = last_jiffies; 436 ts->idle_jiffies = last_jiffies;
437 rcu_enter_nohz();
438 } 437 }
439 438
440 ts->idle_sleeps++; 439 ts->idle_sleeps++;
@@ -473,6 +472,8 @@ out:
473 ts->last_jiffies = last_jiffies; 472 ts->last_jiffies = last_jiffies;
474 ts->sleep_length = ktime_sub(dev->next_event, now); 473 ts->sleep_length = ktime_sub(dev->next_event, now);
475end: 474end:
475 if (inidle)
476 rcu_idle_enter();
476 local_irq_restore(flags); 477 local_irq_restore(flags);
477} 478}
478 479
@@ -529,6 +530,7 @@ void tick_nohz_restart_sched_tick(void)
529 ktime_t now; 530 ktime_t now;
530 531
531 local_irq_disable(); 532 local_irq_disable();
533 rcu_idle_exit();
532 if (ts->idle_active || (ts->inidle && ts->tick_stopped)) 534 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
533 now = ktime_get(); 535 now = ktime_get();
534 536
@@ -543,8 +545,6 @@ void tick_nohz_restart_sched_tick(void)
543 545
544 ts->inidle = 0; 546 ts->inidle = 0;
545 547
546 rcu_exit_nohz();
547
548 /* Update jiffies first */ 548 /* Update jiffies first */
549 select_nohz_load_balancer(0); 549 select_nohz_load_balancer(0);
550 tick_do_update_jiffies64(now); 550 tick_do_update_jiffies64(now);