diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-15 13:06:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-15 13:06:19 -0400 |
commit | 19035e5b5d1e3127b4925d86f6a77964f91f2c3c (patch) | |
tree | c9e7e9073970176a5b0970da715cb6430c3c9069 /kernel | |
parent | f9db6e095115f9411b9647bdb9d81fe11f3d8b54 (diff) | |
parent | eea08f32adb3f97553d49a4f79a119833036000a (diff) |
Merge branch 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
timers: Logic to move non pinned timers
timers: /proc/sys sysctl hook to enable timer migration
timers: Identifying the existing pinned timers
timers: Framework for identifying pinned timers
timers: allow deferrable timers for intervals tv2-tv5 to be deferred
Fix up conflicts in kernel/sched.c and kernel/timer.c manually
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/hrtimer.c | 58 | ||||
-rw-r--r-- | kernel/sched.c | 11 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 12 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
-rw-r--r-- | kernel/timer.c | 51 | ||||
-rw-r--r-- | kernel/trace/trace_sysprof.c | 3 |
7 files changed, 132 insertions, 18 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cb8a15c19583..b675a67c9ac3 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/err.h> | 44 | #include <linux/err.h> |
45 | #include <linux/debugobjects.h> | 45 | #include <linux/debugobjects.h> |
46 | #include <linux/sched.h> | ||
47 | #include <linux/timer.h> | ||
46 | 48 | ||
47 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
48 | 50 | ||
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
193 | * Switch the timer base to the current CPU when possible. | 195 | * Switch the timer base to the current CPU when possible. |
194 | */ | 196 | */ |
195 | static inline struct hrtimer_clock_base * | 197 | static inline struct hrtimer_clock_base * |
196 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) | 198 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, |
199 | int pinned) | ||
197 | { | 200 | { |
198 | struct hrtimer_clock_base *new_base; | 201 | struct hrtimer_clock_base *new_base; |
199 | struct hrtimer_cpu_base *new_cpu_base; | 202 | struct hrtimer_cpu_base *new_cpu_base; |
203 | int cpu, preferred_cpu = -1; | ||
204 | |||
205 | cpu = smp_processor_id(); | ||
206 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
207 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
208 | preferred_cpu = get_nohz_load_balancer(); | ||
209 | if (preferred_cpu >= 0) | ||
210 | cpu = preferred_cpu; | ||
211 | } | ||
212 | #endif | ||
200 | 213 | ||
201 | new_cpu_base = &__get_cpu_var(hrtimer_bases); | 214 | again: |
215 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | ||
202 | new_base = &new_cpu_base->clock_base[base->index]; | 216 | new_base = &new_cpu_base->clock_base[base->index]; |
203 | 217 | ||
204 | if (base != new_base) { | 218 | if (base != new_base) { |
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
218 | timer->base = NULL; | 232 | timer->base = NULL; |
219 | spin_unlock(&base->cpu_base->lock); | 233 | spin_unlock(&base->cpu_base->lock); |
220 | spin_lock(&new_base->cpu_base->lock); | 234 | spin_lock(&new_base->cpu_base->lock); |
235 | |||
236 | /* Optimized away for NOHZ=n SMP=n */ | ||
237 | if (cpu == preferred_cpu) { | ||
238 | /* Calculate clock monotonic expiry time */ | ||
239 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
240 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), | ||
241 | new_base->offset); | ||
242 | #else | ||
243 | ktime_t expires = hrtimer_get_expires(timer); | ||
244 | #endif | ||
245 | |||
246 | /* | ||
247 | * Get the next event on target cpu from the | ||
248 | * clock events layer. | ||
249 | * This covers the highres=off nohz=on case as well. | ||
250 | */ | ||
251 | ktime_t next = clockevents_get_next_event(cpu); | ||
252 | |||
253 | ktime_t delta = ktime_sub(expires, next); | ||
254 | |||
255 | /* | ||
256 | * We do not migrate the timer when it is expiring | ||
257 | * before the next event on the target cpu because | ||
258 | * we cannot reprogram the target cpu hardware and | ||
259 | * we would cause it to fire late. | ||
260 | */ | ||
261 | if (delta.tv64 < 0) { | ||
262 | cpu = smp_processor_id(); | ||
263 | spin_unlock(&new_base->cpu_base->lock); | ||
264 | spin_lock(&base->cpu_base->lock); | ||
265 | timer->base = base; | ||
266 | goto again; | ||
267 | } | ||
268 | } | ||
221 | timer->base = new_base; | 269 | timer->base = new_base; |
222 | } | 270 | } |
223 | return new_base; | 271 | return new_base; |
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
235 | return base; | 283 | return base; |
236 | } | 284 | } |
237 | 285 | ||
238 | # define switch_hrtimer_base(t, b) (b) | 286 | # define switch_hrtimer_base(t, b, p) (b) |
239 | 287 | ||
240 | #endif /* !CONFIG_SMP */ | 288 | #endif /* !CONFIG_SMP */ |
241 | 289 | ||
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
907 | ret = remove_hrtimer(timer, base); | 955 | ret = remove_hrtimer(timer, base); |
908 | 956 | ||
909 | /* Switch the timer base, if necessary: */ | 957 | /* Switch the timer base, if necessary: */ |
910 | new_base = switch_hrtimer_base(timer, base); | 958 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); |
911 | 959 | ||
912 | if (mode == HRTIMER_MODE_REL) { | 960 | if (mode & HRTIMER_MODE_REL) { |
913 | tim = ktime_add_safe(tim, new_base->get_time()); | 961 | tim = ktime_add_safe(tim, new_base->get_time()); |
914 | /* | 962 | /* |
915 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 963 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
diff --git a/kernel/sched.c b/kernel/sched.c index 8ec9d13140be..8fb88a906aaa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
240 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | 240 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); |
241 | delta = ktime_to_ns(ktime_sub(hard, soft)); | 241 | delta = ktime_to_ns(ktime_sub(hard, soft)); |
242 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | 242 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, |
243 | HRTIMER_MODE_ABS, 0); | 243 | HRTIMER_MODE_ABS_PINNED, 0); |
244 | } | 244 | } |
245 | spin_unlock(&rt_b->rt_runtime_lock); | 245 | spin_unlock(&rt_b->rt_runtime_lock); |
246 | } | 246 | } |
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void) | |||
1155 | static void hrtick_start(struct rq *rq, u64 delay) | 1155 | static void hrtick_start(struct rq *rq, u64 delay) |
1156 | { | 1156 | { |
1157 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, | 1157 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
1158 | HRTIMER_MODE_REL, 0); | 1158 | HRTIMER_MODE_REL_PINNED, 0); |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | static inline void init_hrtick(void) | 1161 | static inline void init_hrtick(void) |
@@ -4397,6 +4397,11 @@ static struct { | |||
4397 | .load_balancer = ATOMIC_INIT(-1), | 4397 | .load_balancer = ATOMIC_INIT(-1), |
4398 | }; | 4398 | }; |
4399 | 4399 | ||
4400 | int get_nohz_load_balancer(void) | ||
4401 | { | ||
4402 | return atomic_read(&nohz.load_balancer); | ||
4403 | } | ||
4404 | |||
4400 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 4405 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
4401 | /** | 4406 | /** |
4402 | * lowest_flag_domain - Return lowest sched_domain containing flag. | 4407 | * lowest_flag_domain - Return lowest sched_domain containing flag. |
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void) | |||
9029 | } | 9034 | } |
9030 | #endif /* CONFIG_SMP */ | 9035 | #endif /* CONFIG_SMP */ |
9031 | 9036 | ||
9037 | const_debug unsigned int sysctl_timer_migration = 1; | ||
9038 | |||
9032 | int in_sched_functions(unsigned long addr) | 9039 | int in_sched_functions(unsigned long addr) |
9033 | { | 9040 | { |
9034 | return in_lock_functions(addr) || | 9041 | return in_lock_functions(addr) || |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ce664f98e3fb..0e51a35a4486 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -328,6 +328,14 @@ static struct ctl_table kern_table[] = { | |||
328 | .mode = 0644, | 328 | .mode = 0644, |
329 | .proc_handler = &proc_dointvec, | 329 | .proc_handler = &proc_dointvec, |
330 | }, | 330 | }, |
331 | { | ||
332 | .ctl_name = CTL_UNNUMBERED, | ||
333 | .procname = "timer_migration", | ||
334 | .data = &sysctl_timer_migration, | ||
335 | .maxlen = sizeof(unsigned int), | ||
336 | .mode = 0644, | ||
337 | .proc_handler = &proc_dointvec, | ||
338 | }, | ||
331 | #endif | 339 | #endif |
332 | { | 340 | { |
333 | .ctl_name = CTL_UNNUMBERED, | 341 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3948fa644a2d..1ad6dd461119 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/tick.h> | ||
21 | 22 | ||
22 | /* The registered clock event devices */ | 23 | /* The registered clock event devices */ |
23 | static LIST_HEAD(clockevent_devices); | 24 | static LIST_HEAD(clockevent_devices); |
@@ -253,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
253 | spin_unlock(&clockevents_lock); | 254 | spin_unlock(&clockevents_lock); |
254 | } | 255 | } |
255 | EXPORT_SYMBOL_GPL(clockevents_notify); | 256 | EXPORT_SYMBOL_GPL(clockevents_notify); |
257 | |||
258 | ktime_t clockevents_get_next_event(int cpu) | ||
259 | { | ||
260 | struct tick_device *td; | ||
261 | struct clock_event_device *dev; | ||
262 | |||
263 | td = &per_cpu(tick_cpu_device, cpu); | ||
264 | dev = td->evtdev; | ||
265 | |||
266 | return dev->next_event; | ||
267 | } | ||
256 | #endif | 268 | #endif |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d3f1ef4d5cbe..2aff39c6f10c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
349 | 349 | ||
350 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 350 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
351 | hrtimer_start(&ts->sched_timer, expires, | 351 | hrtimer_start(&ts->sched_timer, expires, |
352 | HRTIMER_MODE_ABS); | 352 | HRTIMER_MODE_ABS_PINNED); |
353 | /* Check, if the timer was already in the past */ | 353 | /* Check, if the timer was already in the past */ |
354 | if (hrtimer_active(&ts->sched_timer)) | 354 | if (hrtimer_active(&ts->sched_timer)) |
355 | goto out; | 355 | goto out; |
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
395 | 395 | ||
396 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 396 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
397 | hrtimer_start_expires(&ts->sched_timer, | 397 | hrtimer_start_expires(&ts->sched_timer, |
398 | HRTIMER_MODE_ABS); | 398 | HRTIMER_MODE_ABS_PINNED); |
399 | /* Check, if the timer was already in the past */ | 399 | /* Check, if the timer was already in the past */ |
400 | if (hrtimer_active(&ts->sched_timer)) | 400 | if (hrtimer_active(&ts->sched_timer)) |
401 | break; | 401 | break; |
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void) | |||
698 | 698 | ||
699 | for (;;) { | 699 | for (;;) { |
700 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 700 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
701 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); | 701 | hrtimer_start_expires(&ts->sched_timer, |
702 | HRTIMER_MODE_ABS_PINNED); | ||
702 | /* Check, if the timer was already in the past */ | 703 | /* Check, if the timer was already in the past */ |
703 | if (hrtimer_active(&ts->sched_timer)) | 704 | if (hrtimer_active(&ts->sched_timer)) |
704 | break; | 705 | break; |
diff --git a/kernel/timer.c b/kernel/timer.c index faf2db897de4..54d3912f8cad 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | 40 | #include <linux/perf_counter.h> |
41 | #include <linux/sched.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/unistd.h> | 44 | #include <asm/unistd.h> |
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, | |||
605 | } | 606 | } |
606 | 607 | ||
607 | static inline int | 608 | static inline int |
608 | __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | 609 | __mod_timer(struct timer_list *timer, unsigned long expires, |
610 | bool pending_only, int pinned) | ||
609 | { | 611 | { |
610 | struct tvec_base *base, *new_base; | 612 | struct tvec_base *base, *new_base; |
611 | unsigned long flags; | 613 | unsigned long flags; |
612 | int ret; | 614 | int ret = 0 , cpu; |
613 | |||
614 | ret = 0; | ||
615 | 615 | ||
616 | timer_stats_timer_set_start_info(timer); | 616 | timer_stats_timer_set_start_info(timer); |
617 | BUG_ON(!timer->function); | 617 | BUG_ON(!timer->function); |
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
630 | 630 | ||
631 | new_base = __get_cpu_var(tvec_bases); | 631 | new_base = __get_cpu_var(tvec_bases); |
632 | 632 | ||
633 | cpu = smp_processor_id(); | ||
634 | |||
635 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
636 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
637 | int preferred_cpu = get_nohz_load_balancer(); | ||
638 | |||
639 | if (preferred_cpu >= 0) | ||
640 | cpu = preferred_cpu; | ||
641 | } | ||
642 | #endif | ||
643 | new_base = per_cpu(tvec_bases, cpu); | ||
644 | |||
633 | if (base != new_base) { | 645 | if (base != new_base) { |
634 | /* | 646 | /* |
635 | * We are trying to schedule the timer on the local CPU. | 647 | * We are trying to schedule the timer on the local CPU. |
@@ -669,7 +681,7 @@ out_unlock: | |||
669 | */ | 681 | */ |
670 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) | 682 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) |
671 | { | 683 | { |
672 | return __mod_timer(timer, expires, true); | 684 | return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); |
673 | } | 685 | } |
674 | EXPORT_SYMBOL(mod_timer_pending); | 686 | EXPORT_SYMBOL(mod_timer_pending); |
675 | 687 | ||
@@ -703,11 +715,33 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
703 | if (timer->expires == expires && timer_pending(timer)) | 715 | if (timer->expires == expires && timer_pending(timer)) |
704 | return 1; | 716 | return 1; |
705 | 717 | ||
706 | return __mod_timer(timer, expires, false); | 718 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
707 | } | 719 | } |
708 | EXPORT_SYMBOL(mod_timer); | 720 | EXPORT_SYMBOL(mod_timer); |
709 | 721 | ||
710 | /** | 722 | /** |
723 | * mod_timer_pinned - modify a timer's timeout | ||
724 | * @timer: the timer to be modified | ||
725 | * @expires: new timeout in jiffies | ||
726 | * | ||
727 | * mod_timer_pinned() is a way to update the expire field of an | ||
728 | * active timer (if the timer is inactive it will be activated) | ||
729 | * and not allow the timer to be migrated to a different CPU. | ||
730 | * | ||
731 | * mod_timer_pinned(timer, expires) is equivalent to: | ||
732 | * | ||
733 | * del_timer(timer); timer->expires = expires; add_timer(timer); | ||
734 | */ | ||
735 | int mod_timer_pinned(struct timer_list *timer, unsigned long expires) | ||
736 | { | ||
737 | if (timer->expires == expires && timer_pending(timer)) | ||
738 | return 1; | ||
739 | |||
740 | return __mod_timer(timer, expires, false, TIMER_PINNED); | ||
741 | } | ||
742 | EXPORT_SYMBOL(mod_timer_pinned); | ||
743 | |||
744 | /** | ||
711 | * add_timer - start a timer | 745 | * add_timer - start a timer |
712 | * @timer: the timer to be added | 746 | * @timer: the timer to be added |
713 | * | 747 | * |
@@ -1017,6 +1051,9 @@ cascade: | |||
1017 | index = slot = timer_jiffies & TVN_MASK; | 1051 | index = slot = timer_jiffies & TVN_MASK; |
1018 | do { | 1052 | do { |
1019 | list_for_each_entry(nte, varp->vec + slot, entry) { | 1053 | list_for_each_entry(nte, varp->vec + slot, entry) { |
1054 | if (tbase_get_deferrable(nte->base)) | ||
1055 | continue; | ||
1056 | |||
1020 | found = 1; | 1057 | found = 1; |
1021 | if (time_before(nte->expires, expires)) | 1058 | if (time_before(nte->expires, expires)) |
1022 | expires = nte->expires; | 1059 | expires = nte->expires; |
@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout) | |||
1307 | expire = timeout + jiffies; | 1344 | expire = timeout + jiffies; |
1308 | 1345 | ||
1309 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); | 1346 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); |
1310 | __mod_timer(&timer, expire, false); | 1347 | __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); |
1311 | schedule(); | 1348 | schedule(); |
1312 | del_singleshot_timer_sync(&timer); | 1349 | del_singleshot_timer_sync(&timer); |
1313 | 1350 | ||
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index e04b76cc238a..f6693969287d 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c | |||
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused) | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
204 | hrtimer->function = stack_trace_timer_fn; | 204 | hrtimer->function = stack_trace_timer_fn; |
205 | 205 | ||
206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | 206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
207 | HRTIMER_MODE_REL_PINNED); | ||
207 | } | 208 | } |
208 | 209 | ||
209 | static void start_stack_timers(void) | 210 | static void start_stack_timers(void) |