aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig51
-rw-r--r--kernel/time/ntp.c6
-rw-r--r--kernel/time/tick-sched.c61
-rw-r--r--kernel/time/timekeeping.c2
4 files changed, 83 insertions, 37 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 70f27e89012b..2b62fe86f9ec 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
105 select RCU_USER_QS 105 select RCU_USER_QS
106 select RCU_NOCB_CPU 106 select RCU_NOCB_CPU
107 select VIRT_CPU_ACCOUNTING_GEN 107 select VIRT_CPU_ACCOUNTING_GEN
108 select CONTEXT_TRACKING_FORCE
109 select IRQ_WORK 108 select IRQ_WORK
110 help 109 help
111 Adaptively try to shutdown the tick whenever possible, even when 110 Adaptively try to shutdown the tick whenever possible, even when
@@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL
134 Note the boot CPU will still be kept outside the range to 133 Note the boot CPU will still be kept outside the range to
135 handle the timekeeping duty. 134 handle the timekeeping duty.
136 135
136config NO_HZ_FULL_SYSIDLE
137 bool "Detect full-system idle state for full dynticks system"
138 depends on NO_HZ_FULL
139 default n
140 help
141 At least one CPU must keep the scheduling-clock tick running for
142 timekeeping purposes whenever there is a non-idle CPU, where
143 "non-idle" also includes dynticks CPUs as long as they are
144 running non-idle tasks. Because the underlying adaptive-tick
145 support cannot distinguish between all CPUs being idle and
146 all CPUs each running a single task in dynticks mode, the
147 underlying support simply ensures that there is always a CPU
148 handling the scheduling-clock tick, whether or not all CPUs
149 are idle. This Kconfig option enables scalable detection of
150 the all-CPUs-idle state, thus allowing the scheduling-clock
151 tick to be disabled when all CPUs are idle. Note that scalable
152 detection of the all-CPUs-idle state means that larger systems
153 will be slower to declare the all-CPUs-idle state.
154
155 Say Y if you would like to help debug all-CPUs-idle detection.
156
157 Say N if you are unsure.
158
159config NO_HZ_FULL_SYSIDLE_SMALL
160 int "Number of CPUs above which large-system approach is used"
161 depends on NO_HZ_FULL_SYSIDLE
162 range 1 NR_CPUS
163 default 8
164 help
165 The full-system idle detection mechanism takes a lazy approach
166 on large systems, as is required to attain decent scalability.
167 However, on smaller systems, scalability is not anywhere near as
168 large a concern as is energy efficiency. The sysidle subsystem
169 therefore uses a fast but non-scalable algorithm for small
170 systems and a lazier but scalable algorithm for large systems.
171 This Kconfig parameter defines the number of CPUs in the largest
172 system that will be considered to be "small".
173
174 The default value will be fine in most cases. Battery-powered
175 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
176 numbers of CPUs, and (3) are suffering from battery-lifetime
177 problems due to long sysidle latencies might wish to experiment
178 with larger values for this Kconfig parameter. On the other
179 hand, they might be even better served by disabling NO_HZ_FULL
180 entirely, given that NO_HZ_FULL is intended for HPC and
181 real-time workloads that at present do not tend to be run on
182 battery-powered systems.
183
184 Take the default if you are unsure.
185
137config NO_HZ 186config NO_HZ
138 bool "Old Idle dynticks config" 187 bool "Old Idle dynticks config"
139 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS 188 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8f5b3b98577b..bb2215174f05 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work)
516 schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); 516 schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
517} 517}
518 518
519static void notify_cmos_timer(void) 519void ntp_notify_cmos_timer(void)
520{ 520{
521 schedule_delayed_work(&sync_cmos_work, 0); 521 schedule_delayed_work(&sync_cmos_work, 0);
522} 522}
523 523
524#else 524#else
525static inline void notify_cmos_timer(void) { } 525void ntp_notify_cmos_timer(void) { }
526#endif 526#endif
527 527
528 528
@@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
687 if (!(time_status & STA_NANO)) 687 if (!(time_status & STA_NANO))
688 txc->time.tv_usec /= NSEC_PER_USEC; 688 txc->time.tv_usec /= NSEC_PER_USEC;
689 689
690 notify_cmos_timer();
691
692 return result; 690 return result;
693} 691}
694 692
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e8a1516cc0a3..3612fc77f834 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
23#include <linux/irq_work.h> 23#include <linux/irq_work.h>
24#include <linux/posix-timers.h> 24#include <linux/posix-timers.h>
25#include <linux/perf_event.h> 25#include <linux/perf_event.h>
26#include <linux/context_tracking.h>
26 27
27#include <asm/irq_regs.h> 28#include <asm/irq_regs.h>
28 29
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
148} 149}
149 150
150#ifdef CONFIG_NO_HZ_FULL 151#ifdef CONFIG_NO_HZ_FULL
151static cpumask_var_t nohz_full_mask; 152cpumask_var_t tick_nohz_full_mask;
152bool have_nohz_full_mask; 153bool tick_nohz_full_running;
153 154
154static bool can_stop_full_tick(void) 155static bool can_stop_full_tick(void)
155{ 156{
@@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
182 * Don't allow the user to think they can get 183 * Don't allow the user to think they can get
183 * full NO_HZ with this machine. 184 * full NO_HZ with this machine.
184 */ 185 */
185 WARN_ONCE(have_nohz_full_mask, 186 WARN_ONCE(tick_nohz_full_running,
186 "NO_HZ FULL will not work with unstable sched clock"); 187 "NO_HZ FULL will not work with unstable sched clock");
187 return false; 188 return false;
188 } 189 }
@@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
197 * Re-evaluate the need for the tick on the current CPU 198 * Re-evaluate the need for the tick on the current CPU
198 * and restart it if necessary. 199 * and restart it if necessary.
199 */ 200 */
200void tick_nohz_full_check(void) 201void __tick_nohz_full_check(void)
201{ 202{
202 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 203 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
203 204
@@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
211 212
212static void nohz_full_kick_work_func(struct irq_work *work) 213static void nohz_full_kick_work_func(struct irq_work *work)
213{ 214{
214 tick_nohz_full_check(); 215 __tick_nohz_full_check();
215} 216}
216 217
217static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { 218static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
230 231
231static void nohz_full_kick_ipi(void *info) 232static void nohz_full_kick_ipi(void *info)
232{ 233{
233 tick_nohz_full_check(); 234 __tick_nohz_full_check();
234} 235}
235 236
236/* 237/*
@@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
239 */ 240 */
240void tick_nohz_full_kick_all(void) 241void tick_nohz_full_kick_all(void)
241{ 242{
242 if (!have_nohz_full_mask) 243 if (!tick_nohz_full_running)
243 return; 244 return;
244 245
245 preempt_disable(); 246 preempt_disable();
246 smp_call_function_many(nohz_full_mask, 247 smp_call_function_many(tick_nohz_full_mask,
247 nohz_full_kick_ipi, NULL, false); 248 nohz_full_kick_ipi, NULL, false);
249 tick_nohz_full_kick();
248 preempt_enable(); 250 preempt_enable();
249} 251}
250 252
@@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
253 * It might need the tick due to per task/process properties: 255 * It might need the tick due to per task/process properties:
254 * perf events, posix cpu timers, ... 256 * perf events, posix cpu timers, ...
255 */ 257 */
256void tick_nohz_task_switch(struct task_struct *tsk) 258void __tick_nohz_task_switch(struct task_struct *tsk)
257{ 259{
258 unsigned long flags; 260 unsigned long flags;
259 261
@@ -269,31 +271,23 @@ out:
269 local_irq_restore(flags); 271 local_irq_restore(flags);
270} 272}
271 273
272int tick_nohz_full_cpu(int cpu)
273{
274 if (!have_nohz_full_mask)
275 return 0;
276
277 return cpumask_test_cpu(cpu, nohz_full_mask);
278}
279
280/* Parse the boot-time nohz CPU list from the kernel parameters. */ 274/* Parse the boot-time nohz CPU list from the kernel parameters. */
281static int __init tick_nohz_full_setup(char *str) 275static int __init tick_nohz_full_setup(char *str)
282{ 276{
283 int cpu; 277 int cpu;
284 278
285 alloc_bootmem_cpumask_var(&nohz_full_mask); 279 alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
286 if (cpulist_parse(str, nohz_full_mask) < 0) { 280 if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
287 pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); 281 pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
288 return 1; 282 return 1;
289 } 283 }
290 284
291 cpu = smp_processor_id(); 285 cpu = smp_processor_id();
292 if (cpumask_test_cpu(cpu, nohz_full_mask)) { 286 if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
293 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); 287 pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
294 cpumask_clear_cpu(cpu, nohz_full_mask); 288 cpumask_clear_cpu(cpu, tick_nohz_full_mask);
295 } 289 }
296 have_nohz_full_mask = true; 290 tick_nohz_full_running = true;
297 291
298 return 1; 292 return 1;
299} 293}
@@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
311 * If we handle the timekeeping duty for full dynticks CPUs, 305 * If we handle the timekeeping duty for full dynticks CPUs,
312 * we can't safely shutdown that CPU. 306 * we can't safely shutdown that CPU.
313 */ 307 */
314 if (have_nohz_full_mask && tick_do_timer_cpu == cpu) 308 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
315 return NOTIFY_BAD; 309 return NOTIFY_BAD;
316 break; 310 break;
317 } 311 }
@@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
330 int err = -1; 324 int err = -1;
331 325
332#ifdef CONFIG_NO_HZ_FULL_ALL 326#ifdef CONFIG_NO_HZ_FULL_ALL
333 if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { 327 if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
334 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); 328 pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
335 return err; 329 return err;
336 } 330 }
337 err = 0; 331 err = 0;
338 cpumask_setall(nohz_full_mask); 332 cpumask_setall(tick_nohz_full_mask);
339 cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); 333 cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
340 have_nohz_full_mask = true; 334 tick_nohz_full_running = true;
341#endif 335#endif
342 return err; 336 return err;
343} 337}
344 338
345void __init tick_nohz_init(void) 339void __init tick_nohz_init(void)
346{ 340{
347 if (!have_nohz_full_mask) { 341 int cpu;
342
343 if (!tick_nohz_full_running) {
348 if (tick_nohz_init_all() < 0) 344 if (tick_nohz_init_all() < 0)
349 return; 345 return;
350 } 346 }
351 347
348 for_each_cpu(cpu, tick_nohz_full_mask)
349 context_tracking_cpu_set(cpu);
350
352 cpu_notifier(tick_nohz_cpu_down_callback, 0); 351 cpu_notifier(tick_nohz_cpu_down_callback, 0);
353 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); 352 cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
354 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); 353 pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
355} 354}
356#else
357#define have_nohz_full_mask (0)
358#endif 355#endif
359 356
360/* 357/*
@@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
732 return false; 729 return false;
733 } 730 }
734 731
735 if (have_nohz_full_mask) { 732 if (tick_nohz_full_enabled()) {
736 /* 733 /*
737 * Keep the tick alive to guarantee timekeeping progression 734 * Keep the tick alive to guarantee timekeeping progression
738 * if there are full dynticks CPUs around 735 * if there are full dynticks CPUs around
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 48b9fffabdc2..947ba25a95a0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc)
1703 write_seqcount_end(&timekeeper_seq); 1703 write_seqcount_end(&timekeeper_seq);
1704 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1704 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
1705 1705
1706 ntp_notify_cmos_timer();
1707
1706 return ret; 1708 return ret;
1707} 1709}
1708 1710