aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorArun R Bharadwaj <arun@linux.vnet.ibm.com>2009-04-16 02:46:41 -0400
committerThomas Gleixner <tglx@linutronix.de>2009-05-13 10:52:42 -0400
commiteea08f32adb3f97553d49a4f79a119833036000a (patch)
tree4e6af5185309d7abe49a8fa19634ea38582381e4 /kernel
parentcd1bb94b4a0531e8211a3774f17de831f8285f76 (diff)
timers: Logic to move non pinned timers
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]: This patch migrates all non pinned timers and hrtimers to the current idle load balancer, from all the idle CPUs. Timers firing on busy CPUs are not migrated. While migrating hrtimers, care should be taken to check if migrating a hrtimer would result in a latency or not. So we compare the expiry of the hrtimer with the next timer interrupt on the target cpu and migrate the hrtimer only if it expires *after* the next interrupt on the target cpu. So, added a clockevents_get_next_event() helper function to return the next_event on the target cpu's clock_event_device. [ tglx: cleanups and simplifications ] Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c51
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/time/clockevents.c12
-rw-r--r--kernel/timer.c17
4 files changed, 80 insertions, 5 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd54924..b675a67c9ac 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
198{ 200{
199 struct hrtimer_clock_base *new_base; 201 struct hrtimer_clock_base *new_base;
200 struct hrtimer_cpu_base *new_cpu_base; 202 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1;
204
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
201 213
202 new_cpu_base = &__get_cpu_var(hrtimer_bases); 214again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
203 new_base = &new_cpu_base->clock_base[base->index]; 216 new_base = &new_cpu_base->clock_base[base->index];
204 217
205 if (base != new_base) { 218 if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
219 timer->base = NULL; 232 timer->base = NULL;
220 spin_unlock(&base->cpu_base->lock); 233 spin_unlock(&base->cpu_base->lock);
221 spin_lock(&new_base->cpu_base->lock); 234 spin_lock(&new_base->cpu_base->lock);
235
236 /* Optimized away for NOHZ=n SMP=n */
237 if (cpu == preferred_cpu) {
238 /* Calculate clock monotonic expiry time */
239#ifdef CONFIG_HIGH_RES_TIMERS
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241 new_base->offset);
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 }
222 timer->base = new_base; 269 timer->base = new_base;
223 } 270 }
224 return new_base; 271 return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
236 return base; 283 return base;
237} 284}
238 285
239# define switch_hrtimer_base(t, b) (b) 286# define switch_hrtimer_base(t, b, p) (b)
240 287
241#endif /* !CONFIG_SMP */ 288#endif /* !CONFIG_SMP */
242 289
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f1dd56af86..9fe3774a0fd 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4244,6 +4244,11 @@ static struct {
4244 .load_balancer = ATOMIC_INIT(-1), 4244 .load_balancer = ATOMIC_INIT(-1),
4245}; 4245};
4246 4246
4247int get_nohz_load_balancer(void)
4248{
4249 return atomic_read(&nohz.load_balancer);
4250}
4251
4247/* 4252/*
4248 * This routine will try to nominate the ilb (idle load balancing) 4253 * This routine will try to nominate the ilb (idle load balancing)
4249 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4254 * owner among the cpus whose ticks are stopped. ilb owner will do the idle
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a79..ab20ded013b 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
21 22
22/* The registered clock event devices */ 23/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
251 spin_unlock(&clockevents_lock); 252 spin_unlock(&clockevents_lock);
252} 253}
253EXPORT_SYMBOL_GPL(clockevents_notify); 254EXPORT_SYMBOL_GPL(clockevents_notify);
255
256ktime_t clockevents_get_next_event(int cpu)
257{
258 struct tick_device *td;
259 struct clock_event_device *dev;
260
261 td = &per_cpu(tick_cpu_device, cpu);
262 dev = td->evtdev;
263
264 return dev->next_event;
265}
254#endif 266#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3424dfd11d5..3f841db5edf 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/sched.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/unistd.h> 43#include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
609{ 610{
610 struct tvec_base *base, *new_base; 611 struct tvec_base *base, *new_base;
611 unsigned long flags; 612 unsigned long flags;
612 int ret; 613 int ret = 0 , cpu;
613
614 ret = 0;
615 614
616 timer_stats_timer_set_start_info(timer); 615 timer_stats_timer_set_start_info(timer);
617 BUG_ON(!timer->function); 616 BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
630 629
631 new_base = __get_cpu_var(tvec_bases); 630 new_base = __get_cpu_var(tvec_bases);
632 631
632 cpu = smp_processor_id();
633
634#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
635 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
636 int preferred_cpu = get_nohz_load_balancer();
637
638 if (preferred_cpu >= 0)
639 cpu = preferred_cpu;
640 }
641#endif
642 new_base = per_cpu(tvec_bases, cpu);
643
633 if (base != new_base) { 644 if (base != new_base) {
634 /* 645 /*
635 * We are trying to schedule the timer on the local CPU. 646 * We are trying to schedule the timer on the local CPU.