diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 22:27:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-08 22:27:08 -0500 |
commit | 60d8ce2cd6c283132928c11f3fd57ff4187287e0 (patch) | |
tree | 36d08a2ead7a7d8c3c081d484215ccca00bf6aab /kernel | |
parent | 849e8dea099aafa56db9e74b580b0d858b956533 (diff) | |
parent | feae3203d711db0a9965300ee6d592257fdaae4f (diff) |
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
timers, init: Limit the number of per cpu calibration bootup messages
posix-cpu-timers: optimize and document timer_create callback
clockevents: Add missing include to pacify sparse
x86: vmiclock: Fix printk format
x86: Fix printk format due to variable type change
sparc: fix printk for change of variable type
clocksource/events: Fix fallout of generic code changes
nohz: Allow 32-bit machines to sleep for more than 2.15 seconds
nohz: Track last do_timer() cpu
nohz: Prevent clocksource wrapping during idle
nohz: Type cast printk argument
mips: Use generic mult/shift factor calculation for clocks
clocksource: Provide a generic mult/shift factor calculation
clockevents: Use u32 for mult and shift factors
nohz: Introduce arch_needs_cpu
nohz: Reuse ktime in sub-functions of tick_check_idle.
time: Remove xtime_cache
time: Implement logarithmic time accumulation
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 5 | ||||
-rw-r--r-- | kernel/hrtimer.c | 3 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 5 | ||||
-rw-r--r-- | kernel/time.c | 1 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 13 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 97 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 141 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 119 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 10 |
10 files changed, 279 insertions, 119 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 6ba0f1ecb212..7c4e2713df0a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -392,10 +392,9 @@ int disable_nonboot_cpus(void) | |||
392 | if (cpu == first_cpu) | 392 | if (cpu == first_cpu) |
393 | continue; | 393 | continue; |
394 | error = _cpu_down(cpu, 1); | 394 | error = _cpu_down(cpu, 1); |
395 | if (!error) { | 395 | if (!error) |
396 | cpumask_set_cpu(cpu, frozen_cpus); | 396 | cpumask_set_cpu(cpu, frozen_cpus); |
397 | printk("CPU%d is down\n", cpu); | 397 | else { |
398 | } else { | ||
399 | printk(KERN_ERR "Error taking CPU%d down: %d\n", | 398 | printk(KERN_ERR "Error taking CPU%d down: %d\n", |
400 | cpu, error); | 399 | cpu, error); |
401 | break; | 400 | break; |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 3e1c36e7998f..ede527708123 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -1238,7 +1238,8 @@ hrtimer_interrupt_hanging(struct clock_event_device *dev, | |||
1238 | force_clock_reprogram = 1; | 1238 | force_clock_reprogram = 1; |
1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; | 1239 | dev->min_delta_ns = (unsigned long)try_time.tv64 * 3; |
1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " | 1240 | printk(KERN_WARNING "hrtimer: interrupt too slow, " |
1241 | "forcing clock min delta to %lu ns\n", dev->min_delta_ns); | 1241 | "forcing clock min delta to %llu ns\n", |
1242 | (unsigned long long) dev->min_delta_ns); | ||
1242 | } | 1243 | } |
1243 | /* | 1244 | /* |
1244 | * High resolution timer interrupt | 1245 | * High resolution timer interrupt |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 5c9dc228747b..438ff4523513 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -384,7 +384,8 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) | |||
384 | 384 | ||
385 | /* | 385 | /* |
386 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. | 386 | * Validate the clockid_t for a new CPU-clock timer, and initialize the timer. |
387 | * This is called from sys_timer_create with the new timer already locked. | 387 | * This is called from sys_timer_create() and do_cpu_nanosleep() with the |
388 | * new timer already all-zeros initialized. | ||
388 | */ | 389 | */ |
389 | int posix_cpu_timer_create(struct k_itimer *new_timer) | 390 | int posix_cpu_timer_create(struct k_itimer *new_timer) |
390 | { | 391 | { |
@@ -396,8 +397,6 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
396 | return -EINVAL; | 397 | return -EINVAL; |
397 | 398 | ||
398 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | 399 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); |
399 | new_timer->it.cpu.incr.sched = 0; | ||
400 | new_timer->it.cpu.expires.sched = 0; | ||
401 | 400 | ||
402 | read_lock(&tasklist_lock); | 401 | read_lock(&tasklist_lock); |
403 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | 402 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { |
diff --git a/kernel/time.c b/kernel/time.c index 804798005d19..c6324d96009e 100644 --- a/kernel/time.c +++ b/kernel/time.c | |||
@@ -136,7 +136,6 @@ static inline void warp_clock(void) | |||
136 | write_seqlock_irq(&xtime_lock); | 136 | write_seqlock_irq(&xtime_lock); |
137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; | 137 | wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; |
138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; | 138 | xtime.tv_sec += sys_tz.tz_minuteswest * 60; |
139 | update_xtime_cache(0); | ||
140 | write_sequnlock_irq(&xtime_lock); | 139 | write_sequnlock_irq(&xtime_lock); |
141 | clock_was_set(); | 140 | clock_was_set(); |
142 | } | 141 | } |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 620b58abdc32..20a8920029ee 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -20,6 +20,8 @@ | |||
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/tick.h> | 21 | #include <linux/tick.h> |
22 | 22 | ||
23 | #include "tick-internal.h" | ||
24 | |||
23 | /* The registered clock event devices */ | 25 | /* The registered clock event devices */ |
24 | static LIST_HEAD(clockevent_devices); | 26 | static LIST_HEAD(clockevent_devices); |
25 | static LIST_HEAD(clockevents_released); | 27 | static LIST_HEAD(clockevents_released); |
@@ -37,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock); | |||
37 | * | 39 | * |
38 | * Math helper, returns latch value converted to nanoseconds (bound checked) | 40 | * Math helper, returns latch value converted to nanoseconds (bound checked) |
39 | */ | 41 | */ |
40 | unsigned long clockevent_delta2ns(unsigned long latch, | 42 | u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) |
41 | struct clock_event_device *evt) | ||
42 | { | 43 | { |
43 | u64 clc = ((u64) latch << evt->shift); | 44 | u64 clc = (u64) latch << evt->shift; |
44 | 45 | ||
45 | if (unlikely(!evt->mult)) { | 46 | if (unlikely(!evt->mult)) { |
46 | evt->mult = 1; | 47 | evt->mult = 1; |
@@ -50,10 +51,10 @@ unsigned long clockevent_delta2ns(unsigned long latch, | |||
50 | do_div(clc, evt->mult); | 51 | do_div(clc, evt->mult); |
51 | if (clc < 1000) | 52 | if (clc < 1000) |
52 | clc = 1000; | 53 | clc = 1000; |
53 | if (clc > LONG_MAX) | 54 | if (clc > KTIME_MAX) |
54 | clc = LONG_MAX; | 55 | clc = KTIME_MAX; |
55 | 56 | ||
56 | return (unsigned long) clc; | 57 | return clc; |
57 | } | 58 | } |
58 | EXPORT_SYMBOL_GPL(clockevent_delta2ns); | 59 | EXPORT_SYMBOL_GPL(clockevent_delta2ns); |
59 | 60 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4a310906b3e8..d422c7b2236b 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -107,6 +107,59 @@ u64 timecounter_cyc2time(struct timecounter *tc, | |||
107 | } | 107 | } |
108 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); | 108 | EXPORT_SYMBOL_GPL(timecounter_cyc2time); |
109 | 109 | ||
110 | /** | ||
111 | * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks | ||
112 | * @mult: pointer to mult variable | ||
113 | * @shift: pointer to shift variable | ||
114 | * @from: frequency to convert from | ||
115 | * @to: frequency to convert to | ||
116 | * @minsec: guaranteed runtime conversion range in seconds | ||
117 | * | ||
118 | * The function evaluates the shift/mult pair for the scaled math | ||
119 | * operations of clocksources and clockevents. | ||
120 | * | ||
121 | * @to and @from are frequency values in HZ. For clock sources @to is | ||
122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock | ||
123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. | ||
124 | * | ||
125 | * The @minsec conversion range argument controls the time frame in | ||
126 | * seconds which must be covered by the runtime conversion with the | ||
127 | * calculated mult and shift factors. This guarantees that no 64bit | ||
128 | * overflow happens when the input value of the conversion is | ||
129 | * multiplied with the calculated mult factor. Larger ranges may | ||
130 | * reduce the conversion accuracy by chosing smaller mult and shift | ||
131 | * factors. | ||
132 | */ | ||
133 | void | ||
134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | ||
135 | { | ||
136 | u64 tmp; | ||
137 | u32 sft, sftacc= 32; | ||
138 | |||
139 | /* | ||
140 | * Calculate the shift factor which is limiting the conversion | ||
141 | * range: | ||
142 | */ | ||
143 | tmp = ((u64)minsec * from) >> 32; | ||
144 | while (tmp) { | ||
145 | tmp >>=1; | ||
146 | sftacc--; | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Find the conversion shift/mult pair which has the best | ||
151 | * accuracy and fits the maxsec conversion range: | ||
152 | */ | ||
153 | for (sft = 32; sft > 0; sft--) { | ||
154 | tmp = (u64) to << sft; | ||
155 | do_div(tmp, from); | ||
156 | if ((tmp >> sftacc) == 0) | ||
157 | break; | ||
158 | } | ||
159 | *mult = tmp; | ||
160 | *shift = sft; | ||
161 | } | ||
162 | |||
110 | /*[Clocksource internal variables]--------- | 163 | /*[Clocksource internal variables]--------- |
111 | * curr_clocksource: | 164 | * curr_clocksource: |
112 | * currently selected clocksource. | 165 | * currently selected clocksource. |
@@ -413,6 +466,47 @@ void clocksource_touch_watchdog(void) | |||
413 | clocksource_resume_watchdog(); | 466 | clocksource_resume_watchdog(); |
414 | } | 467 | } |
415 | 468 | ||
469 | /** | ||
470 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | ||
471 | * @cs: Pointer to clocksource | ||
472 | * | ||
473 | */ | ||
474 | static u64 clocksource_max_deferment(struct clocksource *cs) | ||
475 | { | ||
476 | u64 max_nsecs, max_cycles; | ||
477 | |||
478 | /* | ||
479 | * Calculate the maximum number of cycles that we can pass to the | ||
480 | * cyc2ns function without overflowing a 64-bit signed result. The | ||
481 | * maximum number of cycles is equal to ULLONG_MAX/cs->mult which | ||
482 | * is equivalent to the below. | ||
483 | * max_cycles < (2^63)/cs->mult | ||
484 | * max_cycles < 2^(log2((2^63)/cs->mult)) | ||
485 | * max_cycles < 2^(log2(2^63) - log2(cs->mult)) | ||
486 | * max_cycles < 2^(63 - log2(cs->mult)) | ||
487 | * max_cycles < 1 << (63 - log2(cs->mult)) | ||
488 | * Please note that we add 1 to the result of the log2 to account for | ||
489 | * any rounding errors, ensure the above inequality is satisfied and | ||
490 | * no overflow will occur. | ||
491 | */ | ||
492 | max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); | ||
493 | |||
494 | /* | ||
495 | * The actual maximum number of cycles we can defer the clocksource is | ||
496 | * determined by the minimum of max_cycles and cs->mask. | ||
497 | */ | ||
498 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); | ||
499 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); | ||
500 | |||
501 | /* | ||
502 | * To ensure that the clocksource does not wrap whilst we are idle, | ||
503 | * limit the time the clocksource can be deferred by 12.5%. Please | ||
504 | * note a margin of 12.5% is used because this can be computed with | ||
505 | * a shift, versus say 10% which would require division. | ||
506 | */ | ||
507 | return max_nsecs - (max_nsecs >> 5); | ||
508 | } | ||
509 | |||
416 | #ifdef CONFIG_GENERIC_TIME | 510 | #ifdef CONFIG_GENERIC_TIME |
417 | 511 | ||
418 | /** | 512 | /** |
@@ -511,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
511 | */ | 605 | */ |
512 | int clocksource_register(struct clocksource *cs) | 606 | int clocksource_register(struct clocksource *cs) |
513 | { | 607 | { |
608 | /* calculate max idle time permitted for this clocksource */ | ||
609 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
610 | |||
514 | mutex_lock(&clocksource_mutex); | 611 | mutex_lock(&clocksource_mutex); |
515 | clocksource_enqueue(cs); | 612 | clocksource_enqueue(cs); |
516 | clocksource_select(); | 613 | clocksource_select(); |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index a96c0e2b89cf..0a8a213016f0 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires, | |||
50 | dev->min_delta_ns += dev->min_delta_ns >> 1; | 50 | dev->min_delta_ns += dev->min_delta_ns >> 1; |
51 | 51 | ||
52 | printk(KERN_WARNING | 52 | printk(KERN_WARNING |
53 | "CE: %s increasing min_delta_ns to %lu nsec\n", | 53 | "CE: %s increasing min_delta_ns to %llu nsec\n", |
54 | dev->name ? dev->name : "?", | 54 | dev->name ? dev->name : "?", |
55 | dev->min_delta_ns << 1); | 55 | (unsigned long long) dev->min_delta_ns << 1); |
56 | 56 | ||
57 | i = 0; | 57 | i = 0; |
58 | } | 58 | } |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 89aed5933ed4..f992762d7f51 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz); | |||
134 | * value. We do this unconditionally on any cpu, as we don't know whether the | 134 | * value. We do this unconditionally on any cpu, as we don't know whether the |
135 | * cpu, which has the update task assigned is in a long sleep. | 135 | * cpu, which has the update task assigned is in a long sleep. |
136 | */ | 136 | */ |
137 | static void tick_nohz_update_jiffies(void) | 137 | static void tick_nohz_update_jiffies(ktime_t now) |
138 | { | 138 | { |
139 | int cpu = smp_processor_id(); | 139 | int cpu = smp_processor_id(); |
140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 140 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
141 | unsigned long flags; | 141 | unsigned long flags; |
142 | ktime_t now; | ||
143 | |||
144 | if (!ts->tick_stopped) | ||
145 | return; | ||
146 | 142 | ||
147 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 143 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
148 | now = ktime_get(); | ||
149 | ts->idle_waketime = now; | 144 | ts->idle_waketime = now; |
150 | 145 | ||
151 | local_irq_save(flags); | 146 | local_irq_save(flags); |
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void) | |||
155 | touch_softlockup_watchdog(); | 150 | touch_softlockup_watchdog(); |
156 | } | 151 | } |
157 | 152 | ||
158 | static void tick_nohz_stop_idle(int cpu) | 153 | static void tick_nohz_stop_idle(int cpu, ktime_t now) |
159 | { | 154 | { |
160 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 155 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
156 | ktime_t delta; | ||
161 | 157 | ||
162 | if (ts->idle_active) { | 158 | delta = ktime_sub(now, ts->idle_entrytime); |
163 | ktime_t now, delta; | 159 | ts->idle_lastupdate = now; |
164 | now = ktime_get(); | 160 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); |
165 | delta = ktime_sub(now, ts->idle_entrytime); | 161 | ts->idle_active = 0; |
166 | ts->idle_lastupdate = now; | ||
167 | ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); | ||
168 | ts->idle_active = 0; | ||
169 | 162 | ||
170 | sched_clock_idle_wakeup_event(0); | 163 | sched_clock_idle_wakeup_event(0); |
171 | } | ||
172 | } | 164 | } |
173 | 165 | ||
174 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) | 166 | static ktime_t tick_nohz_start_idle(struct tick_sched *ts) |
@@ -216,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
216 | struct tick_sched *ts; | 208 | struct tick_sched *ts; |
217 | ktime_t last_update, expires, now; | 209 | ktime_t last_update, expires, now; |
218 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 210 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
211 | u64 time_delta; | ||
219 | int cpu; | 212 | int cpu; |
220 | 213 | ||
221 | local_irq_save(flags); | 214 | local_irq_save(flags); |
@@ -263,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
263 | 256 | ||
264 | if (ratelimit < 10) { | 257 | if (ratelimit < 10) { |
265 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | 258 | printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", |
266 | local_softirq_pending()); | 259 | (unsigned int) local_softirq_pending()); |
267 | ratelimit++; | 260 | ratelimit++; |
268 | } | 261 | } |
269 | goto end; | 262 | goto end; |
@@ -275,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
275 | seq = read_seqbegin(&xtime_lock); | 268 | seq = read_seqbegin(&xtime_lock); |
276 | last_update = last_jiffies_update; | 269 | last_update = last_jiffies_update; |
277 | last_jiffies = jiffies; | 270 | last_jiffies = jiffies; |
271 | time_delta = timekeeping_max_deferment(); | ||
278 | } while (read_seqretry(&xtime_lock, seq)); | 272 | } while (read_seqretry(&xtime_lock, seq)); |
279 | 273 | ||
280 | /* Get the next timer wheel timer */ | 274 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
281 | next_jiffies = get_next_timer_interrupt(last_jiffies); | 275 | arch_needs_cpu(cpu)) { |
282 | delta_jiffies = next_jiffies - last_jiffies; | 276 | next_jiffies = last_jiffies + 1; |
283 | |||
284 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu)) | ||
285 | delta_jiffies = 1; | 277 | delta_jiffies = 1; |
278 | } else { | ||
279 | /* Get the next timer wheel timer */ | ||
280 | next_jiffies = get_next_timer_interrupt(last_jiffies); | ||
281 | delta_jiffies = next_jiffies - last_jiffies; | ||
282 | } | ||
286 | /* | 283 | /* |
287 | * Do not stop the tick, if we are only one off | 284 | * Do not stop the tick, if we are only one off |
288 | * or if the cpu is required for rcu | 285 | * or if the cpu is required for rcu |
@@ -294,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
294 | if ((long)delta_jiffies >= 1) { | 291 | if ((long)delta_jiffies >= 1) { |
295 | 292 | ||
296 | /* | 293 | /* |
297 | * calculate the expiry time for the next timer wheel | ||
298 | * timer | ||
299 | */ | ||
300 | expires = ktime_add_ns(last_update, tick_period.tv64 * | ||
301 | delta_jiffies); | ||
302 | |||
303 | /* | ||
304 | * If this cpu is the one which updates jiffies, then | 294 | * If this cpu is the one which updates jiffies, then |
305 | * give up the assignment and let it be taken by the | 295 | * give up the assignment and let it be taken by the |
306 | * cpu which runs the tick timer next, which might be | 296 | * cpu which runs the tick timer next, which might be |
307 | * this cpu as well. If we don't drop this here the | 297 | * this cpu as well. If we don't drop this here the |
308 | * jiffies might be stale and do_timer() never | 298 | * jiffies might be stale and do_timer() never |
309 | * invoked. | 299 | * invoked. Keep track of the fact that it was the one |
300 | * which had the do_timer() duty last. If this cpu is | ||
301 | * the one which had the do_timer() duty last, we | ||
302 | * limit the sleep time to the timekeeping | ||
303 | * max_deferement value which we retrieved | ||
304 | * above. Otherwise we can sleep as long as we want. | ||
310 | */ | 305 | */ |
311 | if (cpu == tick_do_timer_cpu) | 306 | if (cpu == tick_do_timer_cpu) { |
312 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; | 307 | tick_do_timer_cpu = TICK_DO_TIMER_NONE; |
308 | ts->do_timer_last = 1; | ||
309 | } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) { | ||
310 | time_delta = KTIME_MAX; | ||
311 | ts->do_timer_last = 0; | ||
312 | } else if (!ts->do_timer_last) { | ||
313 | time_delta = KTIME_MAX; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * calculate the expiry time for the next timer wheel | ||
318 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals | ||
319 | * that there is no timer pending or at least extremely | ||
320 | * far into the future (12 days for HZ=1000). In this | ||
321 | * case we set the expiry to the end of time. | ||
322 | */ | ||
323 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | ||
324 | /* | ||
325 | * Calculate the time delta for the next timer event. | ||
326 | * If the time delta exceeds the maximum time delta | ||
327 | * permitted by the current clocksource then adjust | ||
328 | * the time delta accordingly to ensure the | ||
329 | * clocksource does not wrap. | ||
330 | */ | ||
331 | time_delta = min_t(u64, time_delta, | ||
332 | tick_period.tv64 * delta_jiffies); | ||
333 | } | ||
334 | |||
335 | if (time_delta < KTIME_MAX) | ||
336 | expires = ktime_add_ns(last_update, time_delta); | ||
337 | else | ||
338 | expires.tv64 = KTIME_MAX; | ||
313 | 339 | ||
314 | if (delta_jiffies > 1) | 340 | if (delta_jiffies > 1) |
315 | cpumask_set_cpu(cpu, nohz_cpu_mask); | 341 | cpumask_set_cpu(cpu, nohz_cpu_mask); |
@@ -342,22 +368,19 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
342 | 368 | ||
343 | ts->idle_sleeps++; | 369 | ts->idle_sleeps++; |
344 | 370 | ||
371 | /* Mark expires */ | ||
372 | ts->idle_expires = expires; | ||
373 | |||
345 | /* | 374 | /* |
346 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | 375 | * If the expiration time == KTIME_MAX, then |
347 | * there is no timer pending or at least extremly far | 376 | * in this case we simply stop the tick timer. |
348 | * into the future (12 days for HZ=1000). In this case | ||
349 | * we simply stop the tick timer: | ||
350 | */ | 377 | */ |
351 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | 378 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
352 | ts->idle_expires.tv64 = KTIME_MAX; | ||
353 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 379 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
354 | hrtimer_cancel(&ts->sched_timer); | 380 | hrtimer_cancel(&ts->sched_timer); |
355 | goto out; | 381 | goto out; |
356 | } | 382 | } |
357 | 383 | ||
358 | /* Mark expiries */ | ||
359 | ts->idle_expires = expires; | ||
360 | |||
361 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 384 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
362 | hrtimer_start(&ts->sched_timer, expires, | 385 | hrtimer_start(&ts->sched_timer, expires, |
363 | HRTIMER_MODE_ABS_PINNED); | 386 | HRTIMER_MODE_ABS_PINNED); |
@@ -436,7 +459,11 @@ void tick_nohz_restart_sched_tick(void) | |||
436 | ktime_t now; | 459 | ktime_t now; |
437 | 460 | ||
438 | local_irq_disable(); | 461 | local_irq_disable(); |
439 | tick_nohz_stop_idle(cpu); | 462 | if (ts->idle_active || (ts->inidle && ts->tick_stopped)) |
463 | now = ktime_get(); | ||
464 | |||
465 | if (ts->idle_active) | ||
466 | tick_nohz_stop_idle(cpu, now); | ||
440 | 467 | ||
441 | if (!ts->inidle || !ts->tick_stopped) { | 468 | if (!ts->inidle || !ts->tick_stopped) { |
442 | ts->inidle = 0; | 469 | ts->inidle = 0; |
@@ -450,7 +477,6 @@ void tick_nohz_restart_sched_tick(void) | |||
450 | 477 | ||
451 | /* Update jiffies first */ | 478 | /* Update jiffies first */ |
452 | select_nohz_load_balancer(0); | 479 | select_nohz_load_balancer(0); |
453 | now = ktime_get(); | ||
454 | tick_do_update_jiffies64(now); | 480 | tick_do_update_jiffies64(now); |
455 | cpumask_clear_cpu(cpu, nohz_cpu_mask); | 481 | cpumask_clear_cpu(cpu, nohz_cpu_mask); |
456 | 482 | ||
@@ -584,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void) | |||
584 | * timer and do not touch the other magic bits which need to be done | 610 | * timer and do not touch the other magic bits which need to be done |
585 | * when idle is left. | 611 | * when idle is left. |
586 | */ | 612 | */ |
587 | static void tick_nohz_kick_tick(int cpu) | 613 | static void tick_nohz_kick_tick(int cpu, ktime_t now) |
588 | { | 614 | { |
589 | #if 0 | 615 | #if 0 |
590 | /* Switch back to 2.6.27 behaviour */ | 616 | /* Switch back to 2.6.27 behaviour */ |
591 | 617 | ||
592 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 618 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
593 | ktime_t delta, now; | 619 | ktime_t delta; |
594 | |||
595 | if (!ts->tick_stopped) | ||
596 | return; | ||
597 | 620 | ||
598 | /* | 621 | /* |
599 | * Do not touch the tick device, when the next expiry is either | 622 | * Do not touch the tick device, when the next expiry is either |
600 | * already reached or less/equal than the tick period. | 623 | * already reached or less/equal than the tick period. |
601 | */ | 624 | */ |
602 | now = ktime_get(); | ||
603 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); | 625 | delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); |
604 | if (delta.tv64 <= tick_period.tv64) | 626 | if (delta.tv64 <= tick_period.tv64) |
605 | return; | 627 | return; |
@@ -608,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu) | |||
608 | #endif | 630 | #endif |
609 | } | 631 | } |
610 | 632 | ||
633 | static inline void tick_check_nohz(int cpu) | ||
634 | { | ||
635 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | ||
636 | ktime_t now; | ||
637 | |||
638 | if (!ts->idle_active && !ts->tick_stopped) | ||
639 | return; | ||
640 | now = ktime_get(); | ||
641 | if (ts->idle_active) | ||
642 | tick_nohz_stop_idle(cpu, now); | ||
643 | if (ts->tick_stopped) { | ||
644 | tick_nohz_update_jiffies(now); | ||
645 | tick_nohz_kick_tick(cpu, now); | ||
646 | } | ||
647 | } | ||
648 | |||
611 | #else | 649 | #else |
612 | 650 | ||
613 | static inline void tick_nohz_switch_to_nohz(void) { } | 651 | static inline void tick_nohz_switch_to_nohz(void) { } |
652 | static inline void tick_check_nohz(int cpu) { } | ||
614 | 653 | ||
615 | #endif /* NO_HZ */ | 654 | #endif /* NO_HZ */ |
616 | 655 | ||
@@ -620,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { } | |||
620 | void tick_check_idle(int cpu) | 659 | void tick_check_idle(int cpu) |
621 | { | 660 | { |
622 | tick_check_oneshot_broadcast(cpu); | 661 | tick_check_oneshot_broadcast(cpu); |
623 | #ifdef CONFIG_NO_HZ | 662 | tick_check_nohz(cpu); |
624 | tick_nohz_stop_idle(cpu); | ||
625 | tick_nohz_update_jiffies(); | ||
626 | tick_nohz_kick_tick(cpu); | ||
627 | #endif | ||
628 | } | 663 | } |
629 | 664 | ||
630 | /* | 665 | /* |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3a4e2907eaa..d1aebd73b191 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -165,13 +165,6 @@ struct timespec raw_time; | |||
165 | /* flag for if timekeeping is suspended */ | 165 | /* flag for if timekeeping is suspended */ |
166 | int __read_mostly timekeeping_suspended; | 166 | int __read_mostly timekeeping_suspended; |
167 | 167 | ||
168 | static struct timespec xtime_cache __attribute__ ((aligned (16))); | ||
169 | void update_xtime_cache(u64 nsec) | ||
170 | { | ||
171 | xtime_cache = xtime; | ||
172 | timespec_add_ns(&xtime_cache, nsec); | ||
173 | } | ||
174 | |||
175 | /* must hold xtime_lock */ | 168 | /* must hold xtime_lock */ |
176 | void timekeeping_leap_insert(int leapsecond) | 169 | void timekeeping_leap_insert(int leapsecond) |
177 | { | 170 | { |
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) | |||
332 | 325 | ||
333 | xtime = *tv; | 326 | xtime = *tv; |
334 | 327 | ||
335 | update_xtime_cache(0); | ||
336 | |||
337 | timekeeper.ntp_error = 0; | 328 | timekeeper.ntp_error = 0; |
338 | ntp_clear(); | 329 | ntp_clear(); |
339 | 330 | ||
@@ -488,6 +479,17 @@ int timekeeping_valid_for_hres(void) | |||
488 | } | 479 | } |
489 | 480 | ||
490 | /** | 481 | /** |
482 | * timekeeping_max_deferment - Returns max time the clocksource can be deferred | ||
483 | * | ||
484 | * Caller must observe xtime_lock via read_seqbegin/read_seqretry to | ||
485 | * ensure that the clocksource does not change! | ||
486 | */ | ||
487 | u64 timekeeping_max_deferment(void) | ||
488 | { | ||
489 | return timekeeper.clock->max_idle_ns; | ||
490 | } | ||
491 | |||
492 | /** | ||
491 | * read_persistent_clock - Return time from the persistent clock. | 493 | * read_persistent_clock - Return time from the persistent clock. |
492 | * | 494 | * |
493 | * Weak dummy function for arches that do not yet support it. | 495 | * Weak dummy function for arches that do not yet support it. |
@@ -548,7 +550,6 @@ void __init timekeeping_init(void) | |||
548 | } | 550 | } |
549 | set_normalized_timespec(&wall_to_monotonic, | 551 | set_normalized_timespec(&wall_to_monotonic, |
550 | -boot.tv_sec, -boot.tv_nsec); | 552 | -boot.tv_sec, -boot.tv_nsec); |
551 | update_xtime_cache(0); | ||
552 | total_sleep_time.tv_sec = 0; | 553 | total_sleep_time.tv_sec = 0; |
553 | total_sleep_time.tv_nsec = 0; | 554 | total_sleep_time.tv_nsec = 0; |
554 | write_sequnlock_irqrestore(&xtime_lock, flags); | 555 | write_sequnlock_irqrestore(&xtime_lock, flags); |
@@ -582,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) | |||
582 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); | 583 | wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); |
583 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); | 584 | total_sleep_time = timespec_add_safe(total_sleep_time, ts); |
584 | } | 585 | } |
585 | update_xtime_cache(0); | ||
586 | /* re-base the last cycle value */ | 586 | /* re-base the last cycle value */ |
587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); | 587 | timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); |
588 | timekeeper.ntp_error = 0; | 588 | timekeeper.ntp_error = 0; |
@@ -723,6 +723,49 @@ static void timekeeping_adjust(s64 offset) | |||
723 | } | 723 | } |
724 | 724 | ||
725 | /** | 725 | /** |
726 | * logarithmic_accumulation - shifted accumulation of cycles | ||
727 | * | ||
728 | * This functions accumulates a shifted interval of cycles into | ||
729 | * into a shifted interval nanoseconds. Allows for O(log) accumulation | ||
730 | * loop. | ||
731 | * | ||
732 | * Returns the unconsumed cycles. | ||
733 | */ | ||
734 | static cycle_t logarithmic_accumulation(cycle_t offset, int shift) | ||
735 | { | ||
736 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | ||
737 | |||
738 | /* If the offset is smaller then a shifted interval, do nothing */ | ||
739 | if (offset < timekeeper.cycle_interval<<shift) | ||
740 | return offset; | ||
741 | |||
742 | /* Accumulate one shifted interval */ | ||
743 | offset -= timekeeper.cycle_interval << shift; | ||
744 | timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift; | ||
745 | |||
746 | timekeeper.xtime_nsec += timekeeper.xtime_interval << shift; | ||
747 | while (timekeeper.xtime_nsec >= nsecps) { | ||
748 | timekeeper.xtime_nsec -= nsecps; | ||
749 | xtime.tv_sec++; | ||
750 | second_overflow(); | ||
751 | } | ||
752 | |||
753 | /* Accumulate into raw time */ | ||
754 | raw_time.tv_nsec += timekeeper.raw_interval << shift;; | ||
755 | while (raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
756 | raw_time.tv_nsec -= NSEC_PER_SEC; | ||
757 | raw_time.tv_sec++; | ||
758 | } | ||
759 | |||
760 | /* Accumulate error between NTP and clock interval */ | ||
761 | timekeeper.ntp_error += tick_length << shift; | ||
762 | timekeeper.ntp_error -= timekeeper.xtime_interval << | ||
763 | (timekeeper.ntp_error_shift + shift); | ||
764 | |||
765 | return offset; | ||
766 | } | ||
767 | |||
768 | /** | ||
726 | * update_wall_time - Uses the current clocksource to increment the wall time | 769 | * update_wall_time - Uses the current clocksource to increment the wall time |
727 | * | 770 | * |
728 | * Called from the timer interrupt, must hold a write on xtime_lock. | 771 | * Called from the timer interrupt, must hold a write on xtime_lock. |
@@ -731,7 +774,7 @@ void update_wall_time(void) | |||
731 | { | 774 | { |
732 | struct clocksource *clock; | 775 | struct clocksource *clock; |
733 | cycle_t offset; | 776 | cycle_t offset; |
734 | u64 nsecs; | 777 | int shift = 0, maxshift; |
735 | 778 | ||
736 | /* Make sure we're fully resumed: */ | 779 | /* Make sure we're fully resumed: */ |
737 | if (unlikely(timekeeping_suspended)) | 780 | if (unlikely(timekeeping_suspended)) |
@@ -745,33 +788,22 @@ void update_wall_time(void) | |||
745 | #endif | 788 | #endif |
746 | timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; | 789 | timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift; |
747 | 790 | ||
748 | /* normally this loop will run just once, however in the | 791 | /* |
749 | * case of lost or late ticks, it will accumulate correctly. | 792 | * With NO_HZ we may have to accumulate many cycle_intervals |
793 | * (think "ticks") worth of time at once. To do this efficiently, | ||
794 | * we calculate the largest doubling multiple of cycle_intervals | ||
795 | * that is smaller then the offset. We then accumulate that | ||
796 | * chunk in one go, and then try to consume the next smaller | ||
797 | * doubled multiple. | ||
750 | */ | 798 | */ |
799 | shift = ilog2(offset) - ilog2(timekeeper.cycle_interval); | ||
800 | shift = max(0, shift); | ||
801 | /* Bound shift to one less then what overflows tick_length */ | ||
802 | maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1; | ||
803 | shift = min(shift, maxshift); | ||
751 | while (offset >= timekeeper.cycle_interval) { | 804 | while (offset >= timekeeper.cycle_interval) { |
752 | u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift; | 805 | offset = logarithmic_accumulation(offset, shift); |
753 | 806 | shift--; | |
754 | /* accumulate one interval */ | ||
755 | offset -= timekeeper.cycle_interval; | ||
756 | clock->cycle_last += timekeeper.cycle_interval; | ||
757 | |||
758 | timekeeper.xtime_nsec += timekeeper.xtime_interval; | ||
759 | if (timekeeper.xtime_nsec >= nsecps) { | ||
760 | timekeeper.xtime_nsec -= nsecps; | ||
761 | xtime.tv_sec++; | ||
762 | second_overflow(); | ||
763 | } | ||
764 | |||
765 | raw_time.tv_nsec += timekeeper.raw_interval; | ||
766 | if (raw_time.tv_nsec >= NSEC_PER_SEC) { | ||
767 | raw_time.tv_nsec -= NSEC_PER_SEC; | ||
768 | raw_time.tv_sec++; | ||
769 | } | ||
770 | |||
771 | /* accumulate error between NTP and clock interval */ | ||
772 | timekeeper.ntp_error += tick_length; | ||
773 | timekeeper.ntp_error -= timekeeper.xtime_interval << | ||
774 | timekeeper.ntp_error_shift; | ||
775 | } | 807 | } |
776 | 808 | ||
777 | /* correct the clock when NTP error is too big */ | 809 | /* correct the clock when NTP error is too big */ |
@@ -807,9 +839,6 @@ void update_wall_time(void) | |||
807 | timekeeper.ntp_error += timekeeper.xtime_nsec << | 839 | timekeeper.ntp_error += timekeeper.xtime_nsec << |
808 | timekeeper.ntp_error_shift; | 840 | timekeeper.ntp_error_shift; |
809 | 841 | ||
810 | nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); | ||
811 | update_xtime_cache(nsecs); | ||
812 | |||
813 | /* check to see if there is a new clocksource to use */ | 842 | /* check to see if there is a new clocksource to use */ |
814 | update_vsyscall(&xtime, timekeeper.clock); | 843 | update_vsyscall(&xtime, timekeeper.clock); |
815 | } | 844 | } |
@@ -846,13 +875,13 @@ void monotonic_to_bootbased(struct timespec *ts) | |||
846 | 875 | ||
847 | unsigned long get_seconds(void) | 876 | unsigned long get_seconds(void) |
848 | { | 877 | { |
849 | return xtime_cache.tv_sec; | 878 | return xtime.tv_sec; |
850 | } | 879 | } |
851 | EXPORT_SYMBOL(get_seconds); | 880 | EXPORT_SYMBOL(get_seconds); |
852 | 881 | ||
853 | struct timespec __current_kernel_time(void) | 882 | struct timespec __current_kernel_time(void) |
854 | { | 883 | { |
855 | return xtime_cache; | 884 | return xtime; |
856 | } | 885 | } |
857 | 886 | ||
858 | struct timespec current_kernel_time(void) | 887 | struct timespec current_kernel_time(void) |
@@ -862,8 +891,7 @@ struct timespec current_kernel_time(void) | |||
862 | 891 | ||
863 | do { | 892 | do { |
864 | seq = read_seqbegin(&xtime_lock); | 893 | seq = read_seqbegin(&xtime_lock); |
865 | 894 | now = xtime; | |
866 | now = xtime_cache; | ||
867 | } while (read_seqretry(&xtime_lock, seq)); | 895 | } while (read_seqretry(&xtime_lock, seq)); |
868 | 896 | ||
869 | return now; | 897 | return now; |
@@ -877,8 +905,7 @@ struct timespec get_monotonic_coarse(void) | |||
877 | 905 | ||
878 | do { | 906 | do { |
879 | seq = read_seqbegin(&xtime_lock); | 907 | seq = read_seqbegin(&xtime_lock); |
880 | 908 | now = xtime; | |
881 | now = xtime_cache; | ||
882 | mono = wall_to_monotonic; | 909 | mono = wall_to_monotonic; |
883 | } while (read_seqretry(&xtime_lock, seq)); | 910 | } while (read_seqretry(&xtime_lock, seq)); |
884 | 911 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 1b5b7aa2fdfd..665c76edbf17 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -204,10 +204,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
204 | return; | 204 | return; |
205 | } | 205 | } |
206 | SEQ_printf(m, "%s\n", dev->name); | 206 | SEQ_printf(m, "%s\n", dev->name); |
207 | SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); | 207 | SEQ_printf(m, " max_delta_ns: %llu\n", |
208 | SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); | 208 | (unsigned long long) dev->max_delta_ns); |
209 | SEQ_printf(m, " mult: %lu\n", dev->mult); | 209 | SEQ_printf(m, " min_delta_ns: %llu\n", |
210 | SEQ_printf(m, " shift: %d\n", dev->shift); | 210 | (unsigned long long) dev->min_delta_ns); |
211 | SEQ_printf(m, " mult: %u\n", dev->mult); | ||
212 | SEQ_printf(m, " shift: %u\n", dev->shift); | ||
211 | SEQ_printf(m, " mode: %d\n", dev->mode); | 213 | SEQ_printf(m, " mode: %d\n", dev->mode); |
212 | SEQ_printf(m, " next_event: %Ld nsecs\n", | 214 | SEQ_printf(m, " next_event: %Ld nsecs\n", |
213 | (unsigned long long) ktime_to_ns(dev->next_event)); | 215 | (unsigned long long) ktime_to_ns(dev->next_event)); |