aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArun R Bharadwaj <arun@linux.vnet.ibm.com>2009-04-16 02:46:41 -0400
committerThomas Gleixner <tglx@linutronix.de>2009-05-13 10:52:42 -0400
commiteea08f32adb3f97553d49a4f79a119833036000a (patch)
tree4e6af5185309d7abe49a8fa19634ea38582381e4
parentcd1bb94b4a0531e8211a3774f17de831f8285f76 (diff)
timers: Logic to move non pinned timers
* Arun R Bharadwaj <arun@linux.vnet.ibm.com> [2009-04-16 12:11:36]: This patch migrates all non pinned timers and hrtimers to the current idle load balancer, from all the idle CPUs. Timers firing on busy CPUs are not migrated. While migrating hrtimers, care should be taken to check if migrating a hrtimer would result in a latency or not. So we compare the expiry of the hrtimer with the next timer interrupt on the target cpu and migrate the hrtimer only if it expires *after* the next interrupt on the target cpu. So, added a clockevents_get_next_event() helper function to return the next_event on the target cpu's clock_event_device. [ tglx: cleanups and simplifications ] Signed-off-by: Arun R Bharadwaj <arun@linux.vnet.ibm.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/hrtimer.c51
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/time/clockevents.c12
-rw-r--r--kernel/timer.c17
6 files changed, 101 insertions, 5 deletions
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3ae..20a100fe2b4f 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
143#endif 143#endif
144 144
145#endif 145#endif
146
147#ifdef CONFIG_GENERIC_CLOCKEVENTS
148extern ktime_t clockevents_get_next_event(int cpu);
149#else
150static inline ktime_t clockevents_get_next_event(int cpu)
151{
152 return (ktime_t) { .tv64 = KTIME_MAX };
153}
154#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 618504010400..311dec123974 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
257extern cpumask_var_t nohz_cpu_mask; 257extern cpumask_var_t nohz_cpu_mask;
258#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 258#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
259extern int select_nohz_load_balancer(int cpu); 259extern int select_nohz_load_balancer(int cpu);
260extern int get_nohz_load_balancer(void);
260#else 261#else
261static inline int select_nohz_load_balancer(int cpu) 262static inline int select_nohz_load_balancer(int cpu)
262{ 263{
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
1772 struct file *file, void __user *buffer, size_t *length, 1773 struct file *file, void __user *buffer, size_t *length,
1773 loff_t *ppos); 1774 loff_t *ppos);
1774#endif 1775#endif
1776#ifdef CONFIG_SCHED_DEBUG
1777static inline unsigned int get_sysctl_timer_migration(void)
1778{
1779 return sysctl_timer_migration;
1780}
1781#else
1782static inline unsigned int get_sysctl_timer_migration(void)
1783{
1784 return 1;
1785}
1786#endif
1775extern unsigned int sysctl_sched_rt_period; 1787extern unsigned int sysctl_sched_rt_period;
1776extern int sysctl_sched_rt_runtime; 1788extern int sysctl_sched_rt_runtime;
1777 1789
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd549241..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
198{ 200{
199 struct hrtimer_clock_base *new_base; 201 struct hrtimer_clock_base *new_base;
200 struct hrtimer_cpu_base *new_cpu_base; 202 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1;
204
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
201 213
202 new_cpu_base = &__get_cpu_var(hrtimer_bases); 214again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
203 new_base = &new_cpu_base->clock_base[base->index]; 216 new_base = &new_cpu_base->clock_base[base->index];
204 217
205 if (base != new_base) { 218 if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
219 timer->base = NULL; 232 timer->base = NULL;
220 spin_unlock(&base->cpu_base->lock); 233 spin_unlock(&base->cpu_base->lock);
221 spin_lock(&new_base->cpu_base->lock); 234 spin_lock(&new_base->cpu_base->lock);
235
236 /* Optimized away for NOHZ=n SMP=n */
237 if (cpu == preferred_cpu) {
238 /* Calculate clock monotonic expiry time */
239#ifdef CONFIG_HIGH_RES_TIMERS
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241 new_base->offset);
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 }
222 timer->base = new_base; 269 timer->base = new_base;
223 } 270 }
224 return new_base; 271 return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
236 return base; 283 return base;
237} 284}
238 285
239# define switch_hrtimer_base(t, b) (b) 286# define switch_hrtimer_base(t, b, p) (b)
240 287
241#endif /* !CONFIG_SMP */ 288#endif /* !CONFIG_SMP */
242 289
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f1dd56af863..9fe3774a0fd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4244,6 +4244,11 @@ static struct {
4244 .load_balancer = ATOMIC_INIT(-1), 4244 .load_balancer = ATOMIC_INIT(-1),
4245}; 4245};
4246 4246
4247int get_nohz_load_balancer(void)
4248{
4249 return atomic_read(&nohz.load_balancer);
4250}
4251
4247/* 4252/*
4248 * This routine will try to nominate the ilb (idle load balancing) 4253 * This routine will try to nominate the ilb (idle load balancing)
4249 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4254 * owner among the cpus whose ticks are stopped. ilb owner will do the idle
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a790..ab20ded013bd 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
21 22
22/* The registered clock event devices */ 23/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
251 spin_unlock(&clockevents_lock); 252 spin_unlock(&clockevents_lock);
252} 253}
253EXPORT_SYMBOL_GPL(clockevents_notify); 254EXPORT_SYMBOL_GPL(clockevents_notify);
255
256ktime_t clockevents_get_next_event(int cpu)
257{
258 struct tick_device *td;
259 struct clock_event_device *dev;
260
261 td = &per_cpu(tick_cpu_device, cpu);
262 dev = td->evtdev;
263
264 return dev->next_event;
265}
254#endif 266#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3424dfd11d50..3f841db5edf9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/sched.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/unistd.h> 43#include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
609{ 610{
610 struct tvec_base *base, *new_base; 611 struct tvec_base *base, *new_base;
611 unsigned long flags; 612 unsigned long flags;
612 int ret; 613 int ret = 0 , cpu;
613
614 ret = 0;
615 614
616 timer_stats_timer_set_start_info(timer); 615 timer_stats_timer_set_start_info(timer);
617 BUG_ON(!timer->function); 616 BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
630 629
631 new_base = __get_cpu_var(tvec_bases); 630 new_base = __get_cpu_var(tvec_bases);
632 631
632 cpu = smp_processor_id();
633
634#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
635 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
636 int preferred_cpu = get_nohz_load_balancer();
637
638 if (preferred_cpu >= 0)
639 cpu = preferred_cpu;
640 }
641#endif
642 new_base = per_cpu(tvec_bases, cpu);
643
633 if (base != new_base) { 644 if (base != new_base) {
634 /* 645 /*
635 * We are trying to schedule the timer on the local CPU. 646 * We are trying to schedule the timer on the local CPU.