diff options
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/Kconfig | 51 | ||||
| -rw-r--r-- | kernel/time/ntp.c | 6 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 61 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 2 |
4 files changed, 83 insertions, 37 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 70f27e89012b..2b62fe86f9ec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -105,7 +105,6 @@ config NO_HZ_FULL | |||
| 105 | select RCU_USER_QS | 105 | select RCU_USER_QS |
| 106 | select RCU_NOCB_CPU | 106 | select RCU_NOCB_CPU |
| 107 | select VIRT_CPU_ACCOUNTING_GEN | 107 | select VIRT_CPU_ACCOUNTING_GEN |
| 108 | select CONTEXT_TRACKING_FORCE | ||
| 109 | select IRQ_WORK | 108 | select IRQ_WORK |
| 110 | help | 109 | help |
| 111 | Adaptively try to shutdown the tick whenever possible, even when | 110 | Adaptively try to shutdown the tick whenever possible, even when |
| @@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL | |||
| 134 | Note the boot CPU will still be kept outside the range to | 133 | Note the boot CPU will still be kept outside the range to |
| 135 | handle the timekeeping duty. | 134 | handle the timekeeping duty. |
| 136 | 135 | ||
| 136 | config NO_HZ_FULL_SYSIDLE | ||
| 137 | bool "Detect full-system idle state for full dynticks system" | ||
| 138 | depends on NO_HZ_FULL | ||
| 139 | default n | ||
| 140 | help | ||
| 141 | At least one CPU must keep the scheduling-clock tick running for | ||
| 142 | timekeeping purposes whenever there is a non-idle CPU, where | ||
| 143 | "non-idle" also includes dynticks CPUs as long as they are | ||
| 144 | running non-idle tasks. Because the underlying adaptive-tick | ||
| 145 | support cannot distinguish between all CPUs being idle and | ||
| 146 | all CPUs each running a single task in dynticks mode, the | ||
| 147 | underlying support simply ensures that there is always a CPU | ||
| 148 | handling the scheduling-clock tick, whether or not all CPUs | ||
| 149 | are idle. This Kconfig option enables scalable detection of | ||
| 150 | the all-CPUs-idle state, thus allowing the scheduling-clock | ||
| 151 | tick to be disabled when all CPUs are idle. Note that scalable | ||
| 152 | detection of the all-CPUs-idle state means that larger systems | ||
| 153 | will be slower to declare the all-CPUs-idle state. | ||
| 154 | |||
| 155 | Say Y if you would like to help debug all-CPUs-idle detection. | ||
| 156 | |||
| 157 | Say N if you are unsure. | ||
| 158 | |||
| 159 | config NO_HZ_FULL_SYSIDLE_SMALL | ||
| 160 | int "Number of CPUs above which large-system approach is used" | ||
| 161 | depends on NO_HZ_FULL_SYSIDLE | ||
| 162 | range 1 NR_CPUS | ||
| 163 | default 8 | ||
| 164 | help | ||
| 165 | The full-system idle detection mechanism takes a lazy approach | ||
| 166 | on large systems, as is required to attain decent scalability. | ||
| 167 | However, on smaller systems, scalability is not anywhere near as | ||
| 168 | large a concern as is energy efficiency. The sysidle subsystem | ||
| 169 | therefore uses a fast but non-scalable algorithm for small | ||
| 170 | systems and a lazier but scalable algorithm for large systems. | ||
| 171 | This Kconfig parameter defines the number of CPUs in the largest | ||
| 172 | system that will be considered to be "small". | ||
| 173 | |||
| 174 | The default value will be fine in most cases. Battery-powered | ||
| 175 | systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger | ||
| 176 | numbers of CPUs, and (3) are suffering from battery-lifetime | ||
| 177 | problems due to long sysidle latencies might wish to experiment | ||
| 178 | with larger values for this Kconfig parameter. On the other | ||
| 179 | hand, they might be even better served by disabling NO_HZ_FULL | ||
| 180 | entirely, given that NO_HZ_FULL is intended for HPC and | ||
| 181 | real-time workloads that at present do not tend to be run on | ||
| 182 | battery-powered systems. | ||
| 183 | |||
| 184 | Take the default if you are unsure. | ||
| 185 | |||
| 137 | config NO_HZ | 186 | config NO_HZ |
| 138 | bool "Old Idle dynticks config" | 187 | bool "Old Idle dynticks config" |
| 139 | depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS | 188 | depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8f5b3b98577b..bb2215174f05 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work) | |||
| 516 | schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); | 516 | schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); |
| 517 | } | 517 | } |
| 518 | 518 | ||
| 519 | static void notify_cmos_timer(void) | 519 | void ntp_notify_cmos_timer(void) |
| 520 | { | 520 | { |
| 521 | schedule_delayed_work(&sync_cmos_work, 0); | 521 | schedule_delayed_work(&sync_cmos_work, 0); |
| 522 | } | 522 | } |
| 523 | 523 | ||
| 524 | #else | 524 | #else |
| 525 | static inline void notify_cmos_timer(void) { } | 525 | void ntp_notify_cmos_timer(void) { } |
| 526 | #endif | 526 | #endif |
| 527 | 527 | ||
| 528 | 528 | ||
| @@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai) | |||
| 687 | if (!(time_status & STA_NANO)) | 687 | if (!(time_status & STA_NANO)) |
| 688 | txc->time.tv_usec /= NSEC_PER_USEC; | 688 | txc->time.tv_usec /= NSEC_PER_USEC; |
| 689 | 689 | ||
| 690 | notify_cmos_timer(); | ||
| 691 | |||
| 692 | return result; | 690 | return result; |
| 693 | } | 691 | } |
| 694 | 692 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e8a1516cc0a3..3612fc77f834 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <linux/irq_work.h> | 23 | #include <linux/irq_work.h> |
| 24 | #include <linux/posix-timers.h> | 24 | #include <linux/posix-timers.h> |
| 25 | #include <linux/perf_event.h> | 25 | #include <linux/perf_event.h> |
| 26 | #include <linux/context_tracking.h> | ||
| 26 | 27 | ||
| 27 | #include <asm/irq_regs.h> | 28 | #include <asm/irq_regs.h> |
| 28 | 29 | ||
| @@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) | |||
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | #ifdef CONFIG_NO_HZ_FULL | 151 | #ifdef CONFIG_NO_HZ_FULL |
| 151 | static cpumask_var_t nohz_full_mask; | 152 | cpumask_var_t tick_nohz_full_mask; |
| 152 | bool have_nohz_full_mask; | 153 | bool tick_nohz_full_running; |
| 153 | 154 | ||
| 154 | static bool can_stop_full_tick(void) | 155 | static bool can_stop_full_tick(void) |
| 155 | { | 156 | { |
| @@ -182,7 +183,7 @@ static bool can_stop_full_tick(void) | |||
| 182 | * Don't allow the user to think they can get | 183 | * Don't allow the user to think they can get |
| 183 | * full NO_HZ with this machine. | 184 | * full NO_HZ with this machine. |
| 184 | */ | 185 | */ |
| 185 | WARN_ONCE(have_nohz_full_mask, | 186 | WARN_ONCE(tick_nohz_full_running, |
| 186 | "NO_HZ FULL will not work with unstable sched clock"); | 187 | "NO_HZ FULL will not work with unstable sched clock"); |
| 187 | return false; | 188 | return false; |
| 188 | } | 189 | } |
| @@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); | |||
| 197 | * Re-evaluate the need for the tick on the current CPU | 198 | * Re-evaluate the need for the tick on the current CPU |
| 198 | * and restart it if necessary. | 199 | * and restart it if necessary. |
| 199 | */ | 200 | */ |
| 200 | void tick_nohz_full_check(void) | 201 | void __tick_nohz_full_check(void) |
| 201 | { | 202 | { |
| 202 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 203 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
| 203 | 204 | ||
| @@ -211,7 +212,7 @@ void tick_nohz_full_check(void) | |||
| 211 | 212 | ||
| 212 | static void nohz_full_kick_work_func(struct irq_work *work) | 213 | static void nohz_full_kick_work_func(struct irq_work *work) |
| 213 | { | 214 | { |
| 214 | tick_nohz_full_check(); | 215 | __tick_nohz_full_check(); |
| 215 | } | 216 | } |
| 216 | 217 | ||
| 217 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | 218 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { |
| @@ -230,7 +231,7 @@ void tick_nohz_full_kick(void) | |||
| 230 | 231 | ||
| 231 | static void nohz_full_kick_ipi(void *info) | 232 | static void nohz_full_kick_ipi(void *info) |
| 232 | { | 233 | { |
| 233 | tick_nohz_full_check(); | 234 | __tick_nohz_full_check(); |
| 234 | } | 235 | } |
| 235 | 236 | ||
| 236 | /* | 237 | /* |
| @@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info) | |||
| 239 | */ | 240 | */ |
| 240 | void tick_nohz_full_kick_all(void) | 241 | void tick_nohz_full_kick_all(void) |
| 241 | { | 242 | { |
| 242 | if (!have_nohz_full_mask) | 243 | if (!tick_nohz_full_running) |
| 243 | return; | 244 | return; |
| 244 | 245 | ||
| 245 | preempt_disable(); | 246 | preempt_disable(); |
| 246 | smp_call_function_many(nohz_full_mask, | 247 | smp_call_function_many(tick_nohz_full_mask, |
| 247 | nohz_full_kick_ipi, NULL, false); | 248 | nohz_full_kick_ipi, NULL, false); |
| 249 | tick_nohz_full_kick(); | ||
| 248 | preempt_enable(); | 250 | preempt_enable(); |
| 249 | } | 251 | } |
| 250 | 252 | ||
| @@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void) | |||
| 253 | * It might need the tick due to per task/process properties: | 255 | * It might need the tick due to per task/process properties: |
| 254 | * perf events, posix cpu timers, ... | 256 | * perf events, posix cpu timers, ... |
| 255 | */ | 257 | */ |
| 256 | void tick_nohz_task_switch(struct task_struct *tsk) | 258 | void __tick_nohz_task_switch(struct task_struct *tsk) |
| 257 | { | 259 | { |
| 258 | unsigned long flags; | 260 | unsigned long flags; |
| 259 | 261 | ||
| @@ -269,31 +271,23 @@ out: | |||
| 269 | local_irq_restore(flags); | 271 | local_irq_restore(flags); |
| 270 | } | 272 | } |
| 271 | 273 | ||
| 272 | int tick_nohz_full_cpu(int cpu) | ||
| 273 | { | ||
| 274 | if (!have_nohz_full_mask) | ||
| 275 | return 0; | ||
| 276 | |||
| 277 | return cpumask_test_cpu(cpu, nohz_full_mask); | ||
| 278 | } | ||
| 279 | |||
| 280 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ | 274 | /* Parse the boot-time nohz CPU list from the kernel parameters. */ |
| 281 | static int __init tick_nohz_full_setup(char *str) | 275 | static int __init tick_nohz_full_setup(char *str) |
| 282 | { | 276 | { |
| 283 | int cpu; | 277 | int cpu; |
| 284 | 278 | ||
| 285 | alloc_bootmem_cpumask_var(&nohz_full_mask); | 279 | alloc_bootmem_cpumask_var(&tick_nohz_full_mask); |
| 286 | if (cpulist_parse(str, nohz_full_mask) < 0) { | 280 | if (cpulist_parse(str, tick_nohz_full_mask) < 0) { |
| 287 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); | 281 | pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); |
| 288 | return 1; | 282 | return 1; |
| 289 | } | 283 | } |
| 290 | 284 | ||
| 291 | cpu = smp_processor_id(); | 285 | cpu = smp_processor_id(); |
| 292 | if (cpumask_test_cpu(cpu, nohz_full_mask)) { | 286 | if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) { |
| 293 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); | 287 | pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); |
| 294 | cpumask_clear_cpu(cpu, nohz_full_mask); | 288 | cpumask_clear_cpu(cpu, tick_nohz_full_mask); |
| 295 | } | 289 | } |
| 296 | have_nohz_full_mask = true; | 290 | tick_nohz_full_running = true; |
| 297 | 291 | ||
| 298 | return 1; | 292 | return 1; |
| 299 | } | 293 | } |
| @@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | |||
| 311 | * If we handle the timekeeping duty for full dynticks CPUs, | 305 | * If we handle the timekeeping duty for full dynticks CPUs, |
| 312 | * we can't safely shutdown that CPU. | 306 | * we can't safely shutdown that CPU. |
| 313 | */ | 307 | */ |
| 314 | if (have_nohz_full_mask && tick_do_timer_cpu == cpu) | 308 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
| 315 | return NOTIFY_BAD; | 309 | return NOTIFY_BAD; |
| 316 | break; | 310 | break; |
| 317 | } | 311 | } |
| @@ -330,31 +324,34 @@ static int tick_nohz_init_all(void) | |||
| 330 | int err = -1; | 324 | int err = -1; |
| 331 | 325 | ||
| 332 | #ifdef CONFIG_NO_HZ_FULL_ALL | 326 | #ifdef CONFIG_NO_HZ_FULL_ALL |
| 333 | if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { | 327 | if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) { |
| 334 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); | 328 | pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); |
| 335 | return err; | 329 | return err; |
| 336 | } | 330 | } |
| 337 | err = 0; | 331 | err = 0; |
| 338 | cpumask_setall(nohz_full_mask); | 332 | cpumask_setall(tick_nohz_full_mask); |
| 339 | cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); | 333 | cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask); |
| 340 | have_nohz_full_mask = true; | 334 | tick_nohz_full_running = true; |
| 341 | #endif | 335 | #endif |
| 342 | return err; | 336 | return err; |
| 343 | } | 337 | } |
| 344 | 338 | ||
| 345 | void __init tick_nohz_init(void) | 339 | void __init tick_nohz_init(void) |
| 346 | { | 340 | { |
| 347 | if (!have_nohz_full_mask) { | 341 | int cpu; |
| 342 | |||
| 343 | if (!tick_nohz_full_running) { | ||
| 348 | if (tick_nohz_init_all() < 0) | 344 | if (tick_nohz_init_all() < 0) |
| 349 | return; | 345 | return; |
| 350 | } | 346 | } |
| 351 | 347 | ||
| 348 | for_each_cpu(cpu, tick_nohz_full_mask) | ||
| 349 | context_tracking_cpu_set(cpu); | ||
| 350 | |||
| 352 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 351 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
| 353 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); | 352 | cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); |
| 354 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); | 353 | pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); |
| 355 | } | 354 | } |
| 356 | #else | ||
| 357 | #define have_nohz_full_mask (0) | ||
| 358 | #endif | 355 | #endif |
| 359 | 356 | ||
| 360 | /* | 357 | /* |
| @@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
| 732 | return false; | 729 | return false; |
| 733 | } | 730 | } |
| 734 | 731 | ||
| 735 | if (have_nohz_full_mask) { | 732 | if (tick_nohz_full_enabled()) { |
| 736 | /* | 733 | /* |
| 737 | * Keep the tick alive to guarantee timekeeping progression | 734 | * Keep the tick alive to guarantee timekeeping progression |
| 738 | * if there are full dynticks CPUs around | 735 | * if there are full dynticks CPUs around |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 48b9fffabdc2..947ba25a95a0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc) | |||
| 1703 | write_seqcount_end(&timekeeper_seq); | 1703 | write_seqcount_end(&timekeeper_seq); |
| 1704 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1704 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
| 1705 | 1705 | ||
| 1706 | ntp_notify_cmos_timer(); | ||
| 1707 | |||
| 1706 | return ret; | 1708 | return ret; |
| 1707 | } | 1709 | } |
| 1708 | 1710 | ||
