aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-06-21 19:37:22 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-08-31 17:43:50 -0400
commit0edd1b1784cbdad55aca2c1293be018f53c0ab1d (patch)
tree61e17002ce447f0042a65429cfa33c6462f872a1
parent217af2a2ffbfc1498d1cf3a89fa478b5632df8f7 (diff)
nohz_full: Add full-system-idle state machine
This commit adds the state machine that takes the per-CPU idle data as input and produces a full-system-idle indication as output. This state machine is driven out of RCU's quiescent-state-forcing mechanism, which invokes rcu_sysidle_check_cpu() to collect per-CPU idle state and then rcu_sysidle_report() to drive the state machine. The full-system-idle state is sampled using rcu_sys_is_idle(), which also drives the state machine if RCU is idle (and does so by forcing RCU to become non-idle). This function returns true if all but the timekeeping CPU (tick_do_timer_cpu) are idle and have been idle long enough to avoid memory contention on the full_sysidle_state state variable. The rcu_sysidle_force_exit() may be called externally to reset the state machine back into non-idle state. For large systems the state machine is driven out of RCU's force-quiescent-state logic, which provides good scalability at the price of millisecond-scale latencies on the transition to full-system-idle state. This is not so good for battery-powered systems, which are usually small enough that they don't need to care about scalability, but which do care deeply about energy efficiency. Small systems therefore drive the state machine directly out of the idle-entry code. The number of CPUs in a "small" system is defined by a new NO_HZ_FULL_SYSIDLE_SMALL Kconfig parameter, which defaults to 8. Note that this is a build-time definition. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> [ paulmck: Use true and false for boolean constants per Lai Jiangshan. ] Reviewed-by: Josh Triplett <josh@joshtriplett.org> [ paulmck: Simplify logic and provide better comments for memory barriers, based on review comments and questions by Lai Jiangshan. ]
-rw-r--r--include/linux/rcupdate.h18
-rw-r--r--kernel/rcutree.c16
-rw-r--r--kernel/rcutree.h5
-rw-r--r--kernel/rcutree_plugin.h296
-rw-r--r--kernel/time/Kconfig27
5 files changed, 355 insertions, 7 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 30bea9c25735..f1f1bc39346b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1011,4 +1011,22 @@ static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
1011#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 1011#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
1012 1012
1013 1013
1014/* Only for use by adaptive-ticks code. */
1015#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
1016extern bool rcu_sys_is_idle(void);
1017extern void rcu_sysidle_force_exit(void);
1018#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
1019
1020static inline bool rcu_sys_is_idle(void)
1021{
1022 return false;
1023}
1024
1025static inline void rcu_sysidle_force_exit(void)
1026{
1027}
1028
1029#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
1030
1031
1014#endif /* __LINUX_RCUPDATE_H */ 1032#endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7b5be56d95ae..eca70f4469c1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -734,6 +734,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
734 bool *isidle, unsigned long *maxj) 734 bool *isidle, unsigned long *maxj)
735{ 735{
736 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); 736 rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks);
737 rcu_sysidle_check_cpu(rdp, isidle, maxj);
737 return (rdp->dynticks_snap & 0x1) == 0; 738 return (rdp->dynticks_snap & 0x1) == 0;
738} 739}
739 740
@@ -1373,11 +1374,17 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1373 rsp->n_force_qs++; 1374 rsp->n_force_qs++;
1374 if (fqs_state == RCU_SAVE_DYNTICK) { 1375 if (fqs_state == RCU_SAVE_DYNTICK) {
1375 /* Collect dyntick-idle snapshots. */ 1376 /* Collect dyntick-idle snapshots. */
1377 if (is_sysidle_rcu_state(rsp)) {
1378 isidle = 1;
1379 maxj = jiffies - ULONG_MAX / 4;
1380 }
1376 force_qs_rnp(rsp, dyntick_save_progress_counter, 1381 force_qs_rnp(rsp, dyntick_save_progress_counter,
1377 &isidle, &maxj); 1382 &isidle, &maxj);
1383 rcu_sysidle_report_gp(rsp, isidle, maxj);
1378 fqs_state = RCU_FORCE_QS; 1384 fqs_state = RCU_FORCE_QS;
1379 } else { 1385 } else {
1380 /* Handle dyntick-idle and offline CPUs. */ 1386 /* Handle dyntick-idle and offline CPUs. */
1387 isidle = 0;
1381 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 1388 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1382 } 1389 }
1383 /* Clear flag to prevent immediate re-entry. */ 1390 /* Clear flag to prevent immediate re-entry. */
@@ -2103,9 +2110,12 @@ static void force_qs_rnp(struct rcu_state *rsp,
2103 cpu = rnp->grplo; 2110 cpu = rnp->grplo;
2104 bit = 1; 2111 bit = 1;
2105 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2112 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2106 if ((rnp->qsmask & bit) != 0 && 2113 if ((rnp->qsmask & bit) != 0) {
2107 f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2114 if ((rnp->qsmaskinit & bit) != 0)
2108 mask |= bit; 2115 *isidle = 0;
2116 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2117 mask |= bit;
2118 }
2109 } 2119 }
2110 if (mask != 0) { 2120 if (mask != 0) {
2111 2121
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 9dd8b177f1ac..6fd3659cf01a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -555,6 +555,11 @@ static void rcu_kick_nohz_cpu(int cpu);
555static bool init_nocb_callback_list(struct rcu_data *rdp); 555static bool init_nocb_callback_list(struct rcu_data *rdp);
556static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); 556static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
557static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); 557static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq);
558static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
559 unsigned long *maxj);
560static bool is_sysidle_rcu_state(struct rcu_state *rsp);
561static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
562 unsigned long maxj);
558static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 563static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
559 564
560#endif /* #ifndef RCU_TREE_NONCORE */ 565#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index a7419ceb19ad..45ebba747af4 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -28,7 +28,7 @@
28#include <linux/gfp.h> 28#include <linux/gfp.h>
29#include <linux/oom.h> 29#include <linux/oom.h>
30#include <linux/smpboot.h> 30#include <linux/smpboot.h>
31#include <linux/tick.h> 31#include "time/tick-internal.h"
32 32
33#define RCU_KTHREAD_PRIO 1 33#define RCU_KTHREAD_PRIO 1
34 34
@@ -2382,12 +2382,12 @@ static void rcu_kick_nohz_cpu(int cpu)
2382 * most active flavor of RCU. 2382 * most active flavor of RCU.
2383 */ 2383 */
2384#ifdef CONFIG_PREEMPT_RCU 2384#ifdef CONFIG_PREEMPT_RCU
2385static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_preempt_state; 2385static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
2386#else /* #ifdef CONFIG_PREEMPT_RCU */ 2386#else /* #ifdef CONFIG_PREEMPT_RCU */
2387static struct rcu_state __maybe_unused *rcu_sysidle_state = &rcu_sched_state; 2387static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
2388#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 2388#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
2389 2389
2390static int __maybe_unused full_sysidle_state; /* Current system-idle state. */ 2390static int full_sysidle_state; /* Current system-idle state. */
2391#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ 2391#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
2392#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ 2392#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
2393#define RCU_SYSIDLE_LONG 2 /* All CPUs idle for long enough. */ 2393#define RCU_SYSIDLE_LONG 2 /* All CPUs idle for long enough. */
@@ -2431,6 +2431,38 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2431} 2431}
2432 2432
2433/* 2433/*
2434 * Unconditionally force exit from full system-idle state. This is
2435 * invoked when a normal CPU exits idle, but must be called separately
2436 * for the timekeeping CPU (tick_do_timer_cpu). The reason for this
2437 * is that the timekeeping CPU is permitted to take scheduling-clock
2438 * interrupts while the system is in system-idle state, and of course
2439 * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
2440 * interrupt from any other type of interrupt.
2441 */
2442void rcu_sysidle_force_exit(void)
2443{
2444 int oldstate = ACCESS_ONCE(full_sysidle_state);
2445 int newoldstate;
2446
2447 /*
2448 * Each pass through the following loop attempts to exit full
2449 * system-idle state. If contention proves to be a problem,
2450 * a trylock-based contention tree could be used here.
2451 */
2452 while (oldstate > RCU_SYSIDLE_SHORT) {
2453 newoldstate = cmpxchg(&full_sysidle_state,
2454 oldstate, RCU_SYSIDLE_NOT);
2455 if (oldstate == newoldstate &&
2456 oldstate == RCU_SYSIDLE_FULL_NOTED) {
2457 rcu_kick_nohz_cpu(tick_do_timer_cpu);
2458 return; /* We cleared it, done! */
2459 }
2460 oldstate = newoldstate;
2461 }
2462 smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
2463}
2464
2465/*
2434 * Invoked to note entry to irq or task transition from idle. Note that 2466 * Invoked to note entry to irq or task transition from idle. Note that
2435 * usermode execution does -not- count as idle here! The caller must 2467 * usermode execution does -not- count as idle here! The caller must
2436 * have disabled interrupts. 2468 * have disabled interrupts.
@@ -2463,6 +2495,247 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2463 atomic_inc(&rdtp->dynticks_idle); 2495 atomic_inc(&rdtp->dynticks_idle);
2464 smp_mb__after_atomic_inc(); 2496 smp_mb__after_atomic_inc();
2465 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1)); 2497 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
2498
2499 /*
2500 * If we are the timekeeping CPU, we are permitted to be non-idle
2501 * during a system-idle state. This must be the case, because
2502 * the timekeeping CPU has to take scheduling-clock interrupts
2503 * during the time that the system is transitioning to full
2504 * system-idle state. This means that the timekeeping CPU must
2505 * invoke rcu_sysidle_force_exit() directly if it does anything
2506 * more than take a scheduling-clock interrupt.
2507 */
2508 if (smp_processor_id() == tick_do_timer_cpu)
2509 return;
2510
2511 /* Update system-idle state: We are clearly no longer fully idle! */
2512 rcu_sysidle_force_exit();
2513}
2514
2515/*
2516 * Check to see if the current CPU is idle. Note that usermode execution
2517 * does not count as idle. The caller must have disabled interrupts.
2518 */
2519static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2520 unsigned long *maxj)
2521{
2522 int cur;
2523 unsigned long j;
2524 struct rcu_dynticks *rdtp = rdp->dynticks;
2525
2526 /*
2527 * If some other CPU has already reported non-idle, if this is
2528 * not the flavor of RCU that tracks sysidle state, or if this
2529 * is an offline or the timekeeping CPU, nothing to do.
2530 */
2531 if (!*isidle || rdp->rsp != rcu_sysidle_state ||
2532 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2533 return;
2534 /* WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); */
2535
2536 /* Pick up current idle and NMI-nesting counter and check. */
2537 cur = atomic_read(&rdtp->dynticks_idle);
2538 if (cur & 0x1) {
2539 *isidle = false; /* We are not idle! */
2540 return;
2541 }
2542 smp_mb(); /* Read counters before timestamps. */
2543
2544 /* Pick up timestamps. */
2545 j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
2546 /* If this CPU entered idle more recently, update maxj timestamp. */
2547 if (ULONG_CMP_LT(*maxj, j))
2548 *maxj = j;
2549}
2550
2551/*
2552 * Is this the flavor of RCU that is handling full-system idle?
2553 */
2554static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2555{
2556 return rsp == rcu_sysidle_state;
2557}
2558
2559/*
2560 * Return a delay in jiffies based on the number of CPUs, rcu_node
2561 * leaf fanout, and jiffies tick rate. The idea is to allow larger
2562 * systems more time to transition to full-idle state in order to
2563 * avoid the cache thrashing that otherwise occur on the state variable.
2564 * Really small systems (less than a couple of tens of CPUs) should
2565 * instead use a single global atomically incremented counter, and later
2566 * versions of this will automatically reconfigure themselves accordingly.
2567 */
2568static unsigned long rcu_sysidle_delay(void)
2569{
2570 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2571 return 0;
2572 return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
2573}
2574
2575/*
2576 * Advance the full-system-idle state. This is invoked when all of
2577 * the non-timekeeping CPUs are idle.
2578 */
2579static void rcu_sysidle(unsigned long j)
2580{
2581 /* Check the current state. */
2582 switch (ACCESS_ONCE(full_sysidle_state)) {
2583 case RCU_SYSIDLE_NOT:
2584
2585 /* First time all are idle, so note a short idle period. */
2586 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
2587 break;
2588
2589 case RCU_SYSIDLE_SHORT:
2590
2591 /*
2592 * Idle for a bit, time to advance to next state?
2593 * cmpxchg failure means race with non-idle, let them win.
2594 */
2595 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2596 (void)cmpxchg(&full_sysidle_state,
2597 RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
2598 break;
2599
2600 case RCU_SYSIDLE_LONG:
2601
2602 /*
2603 * Do an additional check pass before advancing to full.
2604 * cmpxchg failure means race with non-idle, let them win.
2605 */
2606 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2607 (void)cmpxchg(&full_sysidle_state,
2608 RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
2609 break;
2610
2611 default:
2612 break;
2613 }
2614}
2615
2616/*
2617 * Found a non-idle non-timekeeping CPU, so kick the system-idle state
2618 * back to the beginning.
2619 */
2620static void rcu_sysidle_cancel(void)
2621{
2622 smp_mb();
2623 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
2624}
2625
2626/*
2627 * Update the sysidle state based on the results of a force-quiescent-state
2628 * scan of the CPUs' dyntick-idle state.
2629 */
2630static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2631 unsigned long maxj, bool gpkt)
2632{
2633 if (rsp != rcu_sysidle_state)
2634 return; /* Wrong flavor, ignore. */
2635 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2636 return; /* Running state machine from timekeeping CPU. */
2637 if (isidle)
2638 rcu_sysidle(maxj); /* More idle! */
2639 else
2640 rcu_sysidle_cancel(); /* Idle is over. */
2641}
2642
2643/*
2644 * Wrapper for rcu_sysidle_report() when called from the grace-period
2645 * kthread's context.
2646 */
2647static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2648 unsigned long maxj)
2649{
2650 rcu_sysidle_report(rsp, isidle, maxj, true);
2651}
2652
2653/* Callback and function for forcing an RCU grace period. */
2654struct rcu_sysidle_head {
2655 struct rcu_head rh;
2656 int inuse;
2657};
2658
2659static void rcu_sysidle_cb(struct rcu_head *rhp)
2660{
2661 struct rcu_sysidle_head *rshp;
2662
2663 /*
2664 * The following memory barrier is needed to replace the
2665 * memory barriers that would normally be in the memory
2666 * allocator.
2667 */
2668 smp_mb(); /* grace period precedes setting inuse. */
2669
2670 rshp = container_of(rhp, struct rcu_sysidle_head, rh);
2671 ACCESS_ONCE(rshp->inuse) = 0;
2672}
2673
2674/*
2675 * Check to see if the system is fully idle, other than the timekeeping CPU.
2676 * The caller must have disabled interrupts.
2677 */
2678bool rcu_sys_is_idle(void)
2679{
2680 static struct rcu_sysidle_head rsh;
2681 int rss = ACCESS_ONCE(full_sysidle_state);
2682
2683 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
2684 return false;
2685
2686 /* Handle small-system case by doing a full scan of CPUs. */
2687 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
2688 int oldrss = rss - 1;
2689
2690 /*
2691 * One pass to advance to each state up to _FULL.
2692 * Give up if any pass fails to advance the state.
2693 */
2694 while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
2695 int cpu;
2696 bool isidle = true;
2697 unsigned long maxj = jiffies - ULONG_MAX / 4;
2698 struct rcu_data *rdp;
2699
2700 /* Scan all the CPUs looking for nonidle CPUs. */
2701 for_each_possible_cpu(cpu) {
2702 rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
2703 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2704 if (!isidle)
2705 break;
2706 }
2707 rcu_sysidle_report(rcu_sysidle_state,
2708 isidle, maxj, false);
2709 oldrss = rss;
2710 rss = ACCESS_ONCE(full_sysidle_state);
2711 }
2712 }
2713
2714 /* If this is the first observation of an idle period, record it. */
2715 if (rss == RCU_SYSIDLE_FULL) {
2716 rss = cmpxchg(&full_sysidle_state,
2717 RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
2718 return rss == RCU_SYSIDLE_FULL;
2719 }
2720
2721 smp_mb(); /* ensure rss load happens before later caller actions. */
2722
2723 /* If already fully idle, tell the caller (in case of races). */
2724 if (rss == RCU_SYSIDLE_FULL_NOTED)
2725 return true;
2726
2727 /*
2728 * If we aren't there yet, and a grace period is not in flight,
2729 * initiate a grace period. Either way, tell the caller that
2730 * we are not there yet. We use an xchg() rather than an assignment
2731 * to make up for the memory barriers that would otherwise be
2732 * provided by the memory allocator.
2733 */
2734 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2735 !rcu_gp_in_progress(rcu_sysidle_state) &&
2736 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2737 call_rcu(&rsh.rh, rcu_sysidle_cb);
2738 return false;
2466} 2739}
2467 2740
2468/* 2741/*
@@ -2483,6 +2756,21 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2483{ 2756{
2484} 2757}
2485 2758
2759static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2760 unsigned long *maxj)
2761{
2762}
2763
2764static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2765{
2766 return false;
2767}
2768
2769static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2770 unsigned long maxj)
2771{
2772}
2773
2486static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) 2774static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2487{ 2775{
2488} 2776}
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index c7d2fd67799e..3381f098070f 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -157,6 +157,33 @@ config NO_HZ_FULL_SYSIDLE
157 157
158 Say N if you are unsure. 158 Say N if you are unsure.
159 159
160config NO_HZ_FULL_SYSIDLE_SMALL
161 int "Number of CPUs above which large-system approach is used"
162 depends on NO_HZ_FULL_SYSIDLE
163 range 1 NR_CPUS
164 default 8
165 help
166 The full-system idle detection mechanism takes a lazy approach
167 on large systems, as is required to attain decent scalability.
168 However, on smaller systems, scalability is not anywhere near as
169 large a concern as is energy efficiency. The sysidle subsystem
170 therefore uses a fast but non-scalable algorithm for small
171 systems and a lazier but scalable algorithm for large systems.
172 This Kconfig parameter defines the number of CPUs in the largest
173 system that will be considered to be "small".
174
175 The default value will be fine in most cases. Battery-powered
176 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
177 numbers of CPUs, and (3) are suffering from battery-lifetime
178 problems due to long sysidle latencies might wish to experiment
179 with larger values for this Kconfig parameter. On the other
180 hand, they might be even better served by disabling NO_HZ_FULL
181 entirely, given that NO_HZ_FULL is intended for HPC and
182 real-time workloads that at present do not tend to be run on
183 battery-powered systems.
184
185 Take the default if you are unsure.
186
160config NO_HZ 187config NO_HZ
161 bool "Old Idle dynticks config" 188 bool "Old Idle dynticks config"
162 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS 189 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS