aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/clockchips.h9
-rw-r--r--include/linux/sched.h12
-rw-r--r--kernel/hrtimer.c51
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/time/clockevents.c12
-rw-r--r--kernel/timer.c17
6 files changed, 101 insertions, 5 deletions
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 3a1dbba4d3ae..20a100fe2b4f 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
143#endif 143#endif
144 144
145#endif 145#endif
146
147#ifdef CONFIG_GENERIC_CLOCKEVENTS
148extern ktime_t clockevents_get_next_event(int cpu);
149#else
150static inline ktime_t clockevents_get_next_event(int cpu)
151{
152 return (ktime_t) { .tv64 = KTIME_MAX };
153}
154#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 618504010400..311dec123974 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,6 +257,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
257extern cpumask_var_t nohz_cpu_mask; 257extern cpumask_var_t nohz_cpu_mask;
258#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 258#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
259extern int select_nohz_load_balancer(int cpu); 259extern int select_nohz_load_balancer(int cpu);
260extern int get_nohz_load_balancer(void);
260#else 261#else
261static inline int select_nohz_load_balancer(int cpu) 262static inline int select_nohz_load_balancer(int cpu)
262{ 263{
@@ -1772,6 +1773,17 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
1772 struct file *file, void __user *buffer, size_t *length, 1773 struct file *file, void __user *buffer, size_t *length,
1773 loff_t *ppos); 1774 loff_t *ppos);
1774#endif 1775#endif
1776#ifdef CONFIG_SCHED_DEBUG
1777static inline unsigned int get_sysctl_timer_migration(void)
1778{
1779 return sysctl_timer_migration;
1780}
1781#else
1782static inline unsigned int get_sysctl_timer_migration(void)
1783{
1784 return 1;
1785}
1786#endif
1775extern unsigned int sysctl_sched_rt_period; 1787extern unsigned int sysctl_sched_rt_period;
1776extern int sysctl_sched_rt_runtime; 1788extern int sysctl_sched_rt_runtime;
1777 1789
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c71bcd549241..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -198,8 +200,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
198{ 200{
199 struct hrtimer_clock_base *new_base; 201 struct hrtimer_clock_base *new_base;
200 struct hrtimer_cpu_base *new_cpu_base; 202 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1;
204
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
201 213
202 new_cpu_base = &__get_cpu_var(hrtimer_bases); 214again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
203 new_base = &new_cpu_base->clock_base[base->index]; 216 new_base = &new_cpu_base->clock_base[base->index];
204 217
205 if (base != new_base) { 218 if (base != new_base) {
@@ -219,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
219 timer->base = NULL; 232 timer->base = NULL;
220 spin_unlock(&base->cpu_base->lock); 233 spin_unlock(&base->cpu_base->lock);
221 spin_lock(&new_base->cpu_base->lock); 234 spin_lock(&new_base->cpu_base->lock);
235
236 /* Optimized away for NOHZ=n SMP=n */
237 if (cpu == preferred_cpu) {
238 /* Calculate clock monotonic expiry time */
239#ifdef CONFIG_HIGH_RES_TIMERS
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241 new_base->offset);
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 }
222 timer->base = new_base; 269 timer->base = new_base;
223 } 270 }
224 return new_base; 271 return new_base;
@@ -236,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
236 return base; 283 return base;
237} 284}
238 285
239# define switch_hrtimer_base(t, b) (b) 286# define switch_hrtimer_base(t, b, p) (b)
240 287
241#endif /* !CONFIG_SMP */ 288#endif /* !CONFIG_SMP */
242 289
diff --git a/kernel/sched.c b/kernel/sched.c
index 7f1dd56af863..9fe3774a0fd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4244,6 +4244,11 @@ static struct {
4244 .load_balancer = ATOMIC_INIT(-1), 4244 .load_balancer = ATOMIC_INIT(-1),
4245}; 4245};
4246 4246
4247int get_nohz_load_balancer(void)
4248{
4249 return atomic_read(&nohz.load_balancer);
4250}
4251
4247/* 4252/*
4248 * This routine will try to nominate the ilb (idle load balancing) 4253 * This routine will try to nominate the ilb (idle load balancing)
4249 * owner among the cpus whose ticks are stopped. ilb owner will do the idle 4254 * owner among the cpus whose ticks are stopped. ilb owner will do the idle
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a790..ab20ded013bd 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
21 22
22/* The registered clock event devices */ 23/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
@@ -251,4 +252,15 @@ void clockevents_notify(unsigned long reason, void *arg)
251 spin_unlock(&clockevents_lock); 252 spin_unlock(&clockevents_lock);
252} 253}
253EXPORT_SYMBOL_GPL(clockevents_notify); 254EXPORT_SYMBOL_GPL(clockevents_notify);
255
256ktime_t clockevents_get_next_event(int cpu)
257{
258 struct tick_device *td;
259 struct clock_event_device *dev;
260
261 td = &per_cpu(tick_cpu_device, cpu);
262 dev = td->evtdev;
263
264 return dev->next_event;
265}
254#endif 266#endif
diff --git a/kernel/timer.c b/kernel/timer.c
index 3424dfd11d50..3f841db5edf9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,6 +37,7 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/sched.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/unistd.h> 43#include <asm/unistd.h>
@@ -609,9 +610,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
609{ 610{
610 struct tvec_base *base, *new_base; 611 struct tvec_base *base, *new_base;
611 unsigned long flags; 612 unsigned long flags;
612 int ret; 613 int ret = 0 , cpu;
613
614 ret = 0;
615 614
616 timer_stats_timer_set_start_info(timer); 615 timer_stats_timer_set_start_info(timer);
617 BUG_ON(!timer->function); 616 BUG_ON(!timer->function);
@@ -630,6 +629,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
630 629
631 new_base = __get_cpu_var(tvec_bases); 630 new_base = __get_cpu_var(tvec_bases);
632 631
632 cpu = smp_processor_id();
633
634#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
635 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
636 int preferred_cpu = get_nohz_load_balancer();
637
638 if (preferred_cpu >= 0)
639 cpu = preferred_cpu;
640 }
641#endif
642 new_base = per_cpu(tvec_bases, cpu);
643
633 if (base != new_base) { 644 if (base != new_base) {
634 /* 645 /*
635 * We are trying to schedule the timer on the local CPU. 646 * We are trying to schedule the timer on the local CPU.