diff options
author | Frederic Weisbecker <fweisbec@gmail.com> | 2012-12-18 12:24:35 -0500 |
---|---|---|
committer | Frederic Weisbecker <fweisbec@gmail.com> | 2013-03-21 10:55:45 -0400 |
commit | a382bf934449ddeb625167537ae81daa0211b477 (patch) | |
tree | 6ee5becb6b01c952d8fb7d561e10c1e2967d7b99 /kernel/time | |
parent | a831881be220358a1d28c5d95d69449fb6d623ca (diff) |
nohz: Assign timekeeping duty to a CPU outside the full dynticks range
This way the full nohz CPUs can safely run with the tick
stopped with a guarantee that somebody else is taking
care of the jiffies and GTOD progression.
Once the duty is attributed to a CPU, it won't change. Also that
CPU can't enter into dyntick idle mode or be hot unplugged.
This may later be improved from a power consumption POV. At
least we should be able to share the duty amongst all CPUs
outside the full dynticks range. Then the duty could even be
shared with full dynticks CPUs when those can't stop their
tick for any reason.
But let's start with that very simple approach first.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
[fix have_nohz_full_mask offcase]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel/time')
-rw-r--r-- | kernel/time/tick-broadcast.c | 3 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 5 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 47 |
3 files changed, 51 insertions, 4 deletions
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 2fb8cb88df8d..8a6875cc1879 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -573,7 +573,8 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | |||
573 | bc->event_handler = tick_handle_oneshot_broadcast; | 573 | bc->event_handler = tick_handle_oneshot_broadcast; |
574 | 574 | ||
575 | /* Take the do_timer update */ | 575 | /* Take the do_timer update */ |
576 | tick_do_timer_cpu = cpu; | 576 | if (!tick_nohz_extended_cpu(cpu)) |
577 | tick_do_timer_cpu = cpu; | ||
577 | 578 | ||
578 | /* | 579 | /* |
579 | * We must be careful here. There might be other CPUs | 580 | * We must be careful here. There might be other CPUs |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b1600a6973f4..b7dc0cbdb59b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -163,7 +163,10 @@ static void tick_setup_device(struct tick_device *td, | |||
163 | * this cpu: | 163 | * this cpu: |
164 | */ | 164 | */ |
165 | if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { | 165 | if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { |
166 | tick_do_timer_cpu = cpu; | 166 | if (!tick_nohz_extended_cpu(cpu)) |
167 | tick_do_timer_cpu = cpu; | ||
168 | else | ||
169 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | ||
167 | tick_next_period = ktime_get(); | 170 | tick_next_period = ktime_get(); |
168 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); | 171 | tick_period = ktime_set(0, NSEC_PER_SEC / HZ); |
169 | } | 172 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 79c275f08b7d..57bb3fe5aaa3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -112,7 +112,8 @@ static void tick_sched_do_timer(ktime_t now) | |||
112 | * this duty, then the jiffies update is still serialized by | 112 | * this duty, then the jiffies update is still serialized by |
113 | * jiffies_lock. | 113 | * jiffies_lock. |
114 | */ | 114 | */ |
115 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) | 115 | if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) |
116 | && !tick_nohz_extended_cpu(cpu)) | ||
116 | tick_do_timer_cpu = cpu; | 117 | tick_do_timer_cpu = cpu; |
117 | #endif | 118 | #endif |
118 | 119 | ||
@@ -166,6 +167,25 @@ static int __init tick_nohz_extended_setup(char *str) | |||
166 | } | 167 | } |
167 | __setup("nohz_extended=", tick_nohz_extended_setup); | 168 | __setup("nohz_extended=", tick_nohz_extended_setup); |
168 | 169 | ||
170 | static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, | ||
171 | unsigned long action, | ||
172 | void *hcpu) | ||
173 | { | ||
174 | unsigned int cpu = (unsigned long)hcpu; | ||
175 | |||
176 | switch (action & ~CPU_TASKS_FROZEN) { | ||
177 | case CPU_DOWN_PREPARE: | ||
178 | /* | ||
179 | * If we handle the timekeeping duty for full dynticks CPUs, | ||
180 | * we can't safely shutdown that CPU. | ||
181 | */ | ||
182 | if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) | ||
183 | return -EINVAL; | ||
184 | break; | ||
185 | } | ||
186 | return NOTIFY_OK; | ||
187 | } | ||
188 | |||
169 | static int __init init_tick_nohz_extended(void) | 189 | static int __init init_tick_nohz_extended(void) |
170 | { | 190 | { |
171 | cpumask_var_t online_nohz; | 191 | cpumask_var_t online_nohz; |
@@ -174,6 +194,8 @@ static int __init init_tick_nohz_extended(void) | |||
174 | if (!have_nohz_extended_mask) | 194 | if (!have_nohz_extended_mask) |
175 | return 0; | 195 | return 0; |
176 | 196 | ||
197 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | ||
198 | |||
177 | if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { | 199 | if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { |
178 | pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); | 200 | pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); |
179 | return -ENOMEM; | 201 | return -ENOMEM; |
@@ -188,11 +210,17 @@ static int __init init_tick_nohz_extended(void) | |||
188 | /* Ensure we keep a CPU outside the dynticks range for timekeeping */ | 210 | /* Ensure we keep a CPU outside the dynticks range for timekeeping */ |
189 | cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); | 211 | cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); |
190 | if (cpumask_equal(online_nohz, cpu_online_mask)) { | 212 | if (cpumask_equal(online_nohz, cpu_online_mask)) { |
191 | cpu = cpumask_any(cpu_online_mask); | ||
192 | pr_warning("NO_HZ: Must keep at least one online CPU " | 213 | pr_warning("NO_HZ: Must keep at least one online CPU " |
193 | "out of nohz_extended range\n"); | 214 | "out of nohz_extended range\n"); |
215 | /* | ||
216 | * We know the current CPU doesn't have its tick stopped. | ||
217 | * Let's use it for the timekeeping duty. | ||
218 | */ | ||
219 | preempt_disable(); | ||
220 | cpu = smp_processor_id(); | ||
194 | pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); | 221 | pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); |
195 | cpumask_clear_cpu(cpu, nohz_extended_mask); | 222 | cpumask_clear_cpu(cpu, nohz_extended_mask); |
223 | preempt_enable(); | ||
196 | } | 224 | } |
197 | put_online_cpus(); | 225 | put_online_cpus(); |
198 | free_cpumask_var(online_nohz); | 226 | free_cpumask_var(online_nohz); |
@@ -551,6 +579,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
551 | return false; | 579 | return false; |
552 | } | 580 | } |
553 | 581 | ||
582 | if (have_nohz_extended_mask) { | ||
583 | /* | ||
584 | * Keep the tick alive to guarantee timekeeping progression | ||
585 | * if there are full dynticks CPUs around | ||
586 | */ | ||
587 | if (tick_do_timer_cpu == cpu) | ||
588 | return false; | ||
589 | /* | ||
590 | * Boot safety: make sure the timekeeping duty has been | ||
591 | * assigned before entering dyntick-idle mode, | ||
592 | */ | ||
593 | if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) | ||
594 | return false; | ||
595 | } | ||
596 | |||
554 | return true; | 597 | return true; |
555 | } | 598 | } |
556 | 599 | ||