aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-06-15 13:06:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-06-15 13:06:19 -0400
commit19035e5b5d1e3127b4925d86f6a77964f91f2c3c (patch)
treec9e7e9073970176a5b0970da715cb6430c3c9069 /kernel
parentf9db6e095115f9411b9647bdb9d81fe11f3d8b54 (diff)
parenteea08f32adb3f97553d49a4f79a119833036000a (diff)
Merge branch 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: timers: Logic to move non pinned timers timers: /proc/sys sysctl hook to enable timer migration timers: Identifying the existing pinned timers timers: Framework for identifying pinned timers timers: allow deferrable timers for intervals tv2-tv5 to be deferred Fix up conflicts in kernel/sched.c and kernel/timer.c manually
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c58
-rw-r--r--kernel/sched.c11
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/clockevents.c12
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/timer.c51
-rw-r--r--kernel/trace/trace_sysprof.c3
7 files changed, 132 insertions, 18 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cb8a15c19583..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
193 * Switch the timer base to the current CPU when possible. 195 * Switch the timer base to the current CPU when possible.
194 */ 196 */
195static inline struct hrtimer_clock_base * 197static inline struct hrtimer_clock_base *
196switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) 198switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
199 int pinned)
197{ 200{
198 struct hrtimer_clock_base *new_base; 201 struct hrtimer_clock_base *new_base;
199 struct hrtimer_cpu_base *new_cpu_base; 202 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1;
204
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
200 213
201 new_cpu_base = &__get_cpu_var(hrtimer_bases); 214again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
202 new_base = &new_cpu_base->clock_base[base->index]; 216 new_base = &new_cpu_base->clock_base[base->index];
203 217
204 if (base != new_base) { 218 if (base != new_base) {
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
218 timer->base = NULL; 232 timer->base = NULL;
219 spin_unlock(&base->cpu_base->lock); 233 spin_unlock(&base->cpu_base->lock);
220 spin_lock(&new_base->cpu_base->lock); 234 spin_lock(&new_base->cpu_base->lock);
235
236 /* Optimized away for NOHZ=n SMP=n */
237 if (cpu == preferred_cpu) {
238 /* Calculate clock monotonic expiry time */
239#ifdef CONFIG_HIGH_RES_TIMERS
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241 new_base->offset);
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 }
221 timer->base = new_base; 269 timer->base = new_base;
222 } 270 }
223 return new_base; 271 return new_base;
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
235 return base; 283 return base;
236} 284}
237 285
238# define switch_hrtimer_base(t, b) (b) 286# define switch_hrtimer_base(t, b, p) (b)
239 287
240#endif /* !CONFIG_SMP */ 288#endif /* !CONFIG_SMP */
241 289
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
907 ret = remove_hrtimer(timer, base); 955 ret = remove_hrtimer(timer, base);
908 956
909 /* Switch the timer base, if necessary: */ 957 /* Switch the timer base, if necessary: */
910 new_base = switch_hrtimer_base(timer, base); 958 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
911 959
912 if (mode == HRTIMER_MODE_REL) { 960 if (mode & HRTIMER_MODE_REL) {
913 tim = ktime_add_safe(tim, new_base->get_time()); 961 tim = ktime_add_safe(tim, new_base->get_time());
914 /* 962 /*
915 * CONFIG_TIME_LOW_RES is a temporary way for architectures 963 * CONFIG_TIME_LOW_RES is a temporary way for architectures
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ec9d13140be..8fb88a906aaa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
240 hard = hrtimer_get_expires(&rt_b->rt_period_timer); 240 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
241 delta = ktime_to_ns(ktime_sub(hard, soft)); 241 delta = ktime_to_ns(ktime_sub(hard, soft));
242 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, 242 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
243 HRTIMER_MODE_ABS, 0); 243 HRTIMER_MODE_ABS_PINNED, 0);
244 } 244 }
245 spin_unlock(&rt_b->rt_runtime_lock); 245 spin_unlock(&rt_b->rt_runtime_lock);
246} 246}
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
1155static void hrtick_start(struct rq *rq, u64 delay) 1155static void hrtick_start(struct rq *rq, u64 delay)
1156{ 1156{
1157 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, 1157 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1158 HRTIMER_MODE_REL, 0); 1158 HRTIMER_MODE_REL_PINNED, 0);
1159} 1159}
1160 1160
1161static inline void init_hrtick(void) 1161static inline void init_hrtick(void)
@@ -4397,6 +4397,11 @@ static struct {
4397 .load_balancer = ATOMIC_INIT(-1), 4397 .load_balancer = ATOMIC_INIT(-1),
4398}; 4398};
4399 4399
4400int get_nohz_load_balancer(void)
4401{
4402 return atomic_read(&nohz.load_balancer);
4403}
4404
4400#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 4405#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
4401/** 4406/**
4402 * lowest_flag_domain - Return lowest sched_domain containing flag. 4407 * lowest_flag_domain - Return lowest sched_domain containing flag.
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
9029} 9034}
9030#endif /* CONFIG_SMP */ 9035#endif /* CONFIG_SMP */
9031 9036
9037const_debug unsigned int sysctl_timer_migration = 1;
9038
9032int in_sched_functions(unsigned long addr) 9039int in_sched_functions(unsigned long addr)
9033{ 9040{
9034 return in_lock_functions(addr) || 9041 return in_lock_functions(addr) ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ce664f98e3fb..0e51a35a4486 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -328,6 +328,14 @@ static struct ctl_table kern_table[] = {
328 .mode = 0644, 328 .mode = 0644,
329 .proc_handler = &proc_dointvec, 329 .proc_handler = &proc_dointvec,
330 }, 330 },
331 {
332 .ctl_name = CTL_UNNUMBERED,
333 .procname = "timer_migration",
334 .data = &sysctl_timer_migration,
335 .maxlen = sizeof(unsigned int),
336 .mode = 0644,
337 .proc_handler = &proc_dointvec,
338 },
331#endif 339#endif
332 { 340 {
333 .ctl_name = CTL_UNNUMBERED, 341 .ctl_name = CTL_UNNUMBERED,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 3948fa644a2d..1ad6dd461119 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
21 22
22/* The registered clock event devices */ 23/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
@@ -253,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
253 spin_unlock(&clockevents_lock); 254 spin_unlock(&clockevents_lock);
254} 255}
255EXPORT_SYMBOL_GPL(clockevents_notify); 256EXPORT_SYMBOL_GPL(clockevents_notify);
257
258ktime_t clockevents_get_next_event(int cpu)
259{
260 struct tick_device *td;
261 struct clock_event_device *dev;
262
263 td = &per_cpu(tick_cpu_device, cpu);
264 dev = td->evtdev;
265
266 return dev->next_event;
267}
256#endif 268#endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d3f1ef4d5cbe..2aff39c6f10c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
349 349
350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
351 hrtimer_start(&ts->sched_timer, expires, 351 hrtimer_start(&ts->sched_timer, expires,
352 HRTIMER_MODE_ABS); 352 HRTIMER_MODE_ABS_PINNED);
353 /* Check, if the timer was already in the past */ 353 /* Check, if the timer was already in the past */
354 if (hrtimer_active(&ts->sched_timer)) 354 if (hrtimer_active(&ts->sched_timer))
355 goto out; 355 goto out;
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
395 395
396 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 396 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
397 hrtimer_start_expires(&ts->sched_timer, 397 hrtimer_start_expires(&ts->sched_timer,
398 HRTIMER_MODE_ABS); 398 HRTIMER_MODE_ABS_PINNED);
399 /* Check, if the timer was already in the past */ 399 /* Check, if the timer was already in the past */
400 if (hrtimer_active(&ts->sched_timer)) 400 if (hrtimer_active(&ts->sched_timer))
401 break; 401 break;
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
698 698
699 for (;;) { 699 for (;;) {
700 hrtimer_forward(&ts->sched_timer, now, tick_period); 700 hrtimer_forward(&ts->sched_timer, now, tick_period);
701 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); 701 hrtimer_start_expires(&ts->sched_timer,
702 HRTIMER_MODE_ABS_PINNED);
702 /* Check, if the timer was already in the past */ 703 /* Check, if the timer was already in the past */
703 if (hrtimer_active(&ts->sched_timer)) 704 if (hrtimer_active(&ts->sched_timer))
704 break; 705 break;
diff --git a/kernel/timer.c b/kernel/timer.c
index faf2db897de4..54d3912f8cad 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -38,6 +38,7 @@
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_counter.h> 40#include <linux/perf_counter.h>
41#include <linux/sched.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/unistd.h> 44#include <asm/unistd.h>
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
605} 606}
606 607
607static inline int 608static inline int
608__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) 609__mod_timer(struct timer_list *timer, unsigned long expires,
610 bool pending_only, int pinned)
609{ 611{
610 struct tvec_base *base, *new_base; 612 struct tvec_base *base, *new_base;
611 unsigned long flags; 613 unsigned long flags;
612 int ret; 614 int ret = 0 , cpu;
613
614 ret = 0;
615 615
616 timer_stats_timer_set_start_info(timer); 616 timer_stats_timer_set_start_info(timer);
617 BUG_ON(!timer->function); 617 BUG_ON(!timer->function);
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
630 630
631 new_base = __get_cpu_var(tvec_bases); 631 new_base = __get_cpu_var(tvec_bases);
632 632
633 cpu = smp_processor_id();
634
635#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
636 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
637 int preferred_cpu = get_nohz_load_balancer();
638
639 if (preferred_cpu >= 0)
640 cpu = preferred_cpu;
641 }
642#endif
643 new_base = per_cpu(tvec_bases, cpu);
644
633 if (base != new_base) { 645 if (base != new_base) {
634 /* 646 /*
635 * We are trying to schedule the timer on the local CPU. 647 * We are trying to schedule the timer on the local CPU.
@@ -669,7 +681,7 @@ out_unlock:
669 */ 681 */
670int mod_timer_pending(struct timer_list *timer, unsigned long expires) 682int mod_timer_pending(struct timer_list *timer, unsigned long expires)
671{ 683{
672 return __mod_timer(timer, expires, true); 684 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
673} 685}
674EXPORT_SYMBOL(mod_timer_pending); 686EXPORT_SYMBOL(mod_timer_pending);
675 687
@@ -703,11 +715,33 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
703 if (timer->expires == expires && timer_pending(timer)) 715 if (timer->expires == expires && timer_pending(timer))
704 return 1; 716 return 1;
705 717
706 return __mod_timer(timer, expires, false); 718 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
707} 719}
708EXPORT_SYMBOL(mod_timer); 720EXPORT_SYMBOL(mod_timer);
709 721
710/** 722/**
723 * mod_timer_pinned - modify a timer's timeout
724 * @timer: the timer to be modified
725 * @expires: new timeout in jiffies
726 *
727 * mod_timer_pinned() is a way to update the expire field of an
728 * active timer (if the timer is inactive it will be activated)
729 * and not allow the timer to be migrated to a different CPU.
730 *
731 * mod_timer_pinned(timer, expires) is equivalent to:
732 *
733 * del_timer(timer); timer->expires = expires; add_timer(timer);
734 */
735int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
736{
737 if (timer->expires == expires && timer_pending(timer))
738 return 1;
739
740 return __mod_timer(timer, expires, false, TIMER_PINNED);
741}
742EXPORT_SYMBOL(mod_timer_pinned);
743
744/**
711 * add_timer - start a timer 745 * add_timer - start a timer
712 * @timer: the timer to be added 746 * @timer: the timer to be added
713 * 747 *
@@ -1017,6 +1051,9 @@ cascade:
1017 index = slot = timer_jiffies & TVN_MASK; 1051 index = slot = timer_jiffies & TVN_MASK;
1018 do { 1052 do {
1019 list_for_each_entry(nte, varp->vec + slot, entry) { 1053 list_for_each_entry(nte, varp->vec + slot, entry) {
1054 if (tbase_get_deferrable(nte->base))
1055 continue;
1056
1020 found = 1; 1057 found = 1;
1021 if (time_before(nte->expires, expires)) 1058 if (time_before(nte->expires, expires))
1022 expires = nte->expires; 1059 expires = nte->expires;
@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
1307 expire = timeout + jiffies; 1344 expire = timeout + jiffies;
1308 1345
1309 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1346 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1310 __mod_timer(&timer, expire, false); 1347 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1311 schedule(); 1348 schedule();
1312 del_singleshot_timer_sync(&timer); 1349 del_singleshot_timer_sync(&timer);
1313 1350
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index e04b76cc238a..f6693969287d 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 205
206 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 206 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
207 HRTIMER_MODE_REL_PINNED);
207} 208}
208 209
209static void start_stack_timers(void) 210static void start_stack_timers(void)