diff options
-rw-r--r-- | include/linux/clocksource.h | 2 | ||||
-rw-r--r-- | include/linux/time.h | 1 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 44 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 52 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 11 |
5 files changed, 96 insertions, 14 deletions
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f57f88250526..279c5478e8a6 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h | |||
@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc, | |||
151 | * subtraction of non 64 bit counters | 151 | * subtraction of non 64 bit counters |
152 | * @mult: cycle to nanosecond multiplier | 152 | * @mult: cycle to nanosecond multiplier |
153 | * @shift: cycle to nanosecond divisor (power of two) | 153 | * @shift: cycle to nanosecond divisor (power of two) |
154 | * @max_idle_ns: max idle time permitted by the clocksource (nsecs) | ||
154 | * @flags: flags describing special properties | 155 | * @flags: flags describing special properties |
155 | * @vread: vsyscall based read | 156 | * @vread: vsyscall based read |
156 | * @resume: resume function for the clocksource, if necessary | 157 | * @resume: resume function for the clocksource, if necessary |
@@ -168,6 +169,7 @@ struct clocksource { | |||
168 | cycle_t mask; | 169 | cycle_t mask; |
169 | u32 mult; | 170 | u32 mult; |
170 | u32 shift; | 171 | u32 shift; |
172 | u64 max_idle_ns; | ||
171 | unsigned long flags; | 173 | unsigned long flags; |
172 | cycle_t (*vread)(void); | 174 | cycle_t (*vread)(void); |
173 | void (*resume)(void); | 175 | void (*resume)(void); |
diff --git a/include/linux/time.h b/include/linux/time.h index fe04e5ef6a59..6e026e45a179 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts); | |||
148 | 148 | ||
149 | extern struct timespec timespec_trunc(struct timespec t, unsigned gran); | 149 | extern struct timespec timespec_trunc(struct timespec t, unsigned gran); |
150 | extern int timekeeping_valid_for_hres(void); | 150 | extern int timekeeping_valid_for_hres(void); |
151 | extern u64 timekeeping_max_deferment(void); | ||
151 | extern void update_wall_time(void); | 152 | extern void update_wall_time(void); |
152 | extern void update_xtime_cache(u64 nsec); | 153 | extern void update_xtime_cache(u64 nsec); |
153 | extern void timekeeping_leap_insert(int leapsecond); | 154 | extern void timekeeping_leap_insert(int leapsecond); |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 407c0894ef37..b65b242f04dd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -469,6 +469,47 @@ void clocksource_touch_watchdog(void) | |||
469 | #ifdef CONFIG_GENERIC_TIME | 469 | #ifdef CONFIG_GENERIC_TIME |
470 | 470 | ||
471 | /** | 471 | /** |
472 | * clocksource_max_deferment - Returns max time the clocksource can be deferred | ||
473 | * @cs: Pointer to clocksource | ||
474 | * | ||
475 | */ | ||
476 | static u64 clocksource_max_deferment(struct clocksource *cs) | ||
477 | { | ||
478 | u64 max_nsecs, max_cycles; | ||
479 | |||
480 | /* | ||
481 | * Calculate the maximum number of cycles that we can pass to the | ||
482 | * cyc2ns function without overflowing a 64-bit signed result. The | ||
483 | * maximum number of cycles is equal to ULLONG_MAX/cs->mult which | ||
484 | * is equivalent to the below. | ||
485 | * max_cycles < (2^63)/cs->mult | ||
486 | * max_cycles < 2^(log2((2^63)/cs->mult)) | ||
487 | * max_cycles < 2^(log2(2^63) - log2(cs->mult)) | ||
488 | * max_cycles < 2^(63 - log2(cs->mult)) | ||
489 | * max_cycles < 1 << (63 - log2(cs->mult)) | ||
490 | * Please note that we add 1 to the result of the log2 to account for | ||
491 | * any rounding errors, ensure the above inequality is satisfied and | ||
492 | * no overflow will occur. | ||
493 | */ | ||
494 | max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); | ||
495 | |||
496 | /* | ||
497 | * The actual maximum number of cycles we can defer the clocksource is | ||
498 | * determined by the minimum of max_cycles and cs->mask. | ||
499 | */ | ||
500 | max_cycles = min_t(u64, max_cycles, (u64) cs->mask); | ||
501 | max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); | ||
502 | |||
503 | /* | ||
504 | * To ensure that the clocksource does not wrap whilst we are idle, | ||
505 | * limit the time the clocksource can be deferred by 12.5%. Please | ||
506 | * note a margin of 12.5% is used because this can be computed with | ||
507 | * a shift, versus say 10% which would require division. | ||
508 | */ | ||
509 | return max_nsecs - (max_nsecs >> 5); | ||
510 | } | ||
511 | |||
512 | /** | ||
472 | * clocksource_select - Select the best clocksource available | 513 | * clocksource_select - Select the best clocksource available |
473 | * | 514 | * |
474 | * Private function. Must hold clocksource_mutex when called. | 515 | * Private function. Must hold clocksource_mutex when called. |
@@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs) | |||
564 | */ | 605 | */ |
565 | int clocksource_register(struct clocksource *cs) | 606 | int clocksource_register(struct clocksource *cs) |
566 | { | 607 | { |
608 | /* calculate max idle time permitted for this clocksource */ | ||
609 | cs->max_idle_ns = clocksource_max_deferment(cs); | ||
610 | |||
567 | mutex_lock(&clocksource_mutex); | 611 | mutex_lock(&clocksource_mutex); |
568 | clocksource_enqueue(cs); | 612 | clocksource_enqueue(cs); |
569 | clocksource_select(); | 613 | clocksource_select(); |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c65ba0faa98f..a80b4644fe6b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
208 | struct tick_sched *ts; | 208 | struct tick_sched *ts; |
209 | ktime_t last_update, expires, now; | 209 | ktime_t last_update, expires, now; |
210 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; | 210 | struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; |
211 | u64 time_delta; | ||
211 | int cpu; | 212 | int cpu; |
212 | 213 | ||
213 | local_irq_save(flags); | 214 | local_irq_save(flags); |
@@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
262 | seq = read_seqbegin(&xtime_lock); | 263 | seq = read_seqbegin(&xtime_lock); |
263 | last_update = last_jiffies_update; | 264 | last_update = last_jiffies_update; |
264 | last_jiffies = jiffies; | 265 | last_jiffies = jiffies; |
266 | |||
267 | /* | ||
268 | * On SMP we really should only care for the CPU which | ||
269 | * has the do_timer duty assigned. All other CPUs can | ||
270 | * sleep as long as they want. | ||
271 | */ | ||
272 | if (cpu == tick_do_timer_cpu || | ||
273 | tick_do_timer_cpu == TICK_DO_TIMER_NONE) | ||
274 | time_delta = timekeeping_max_deferment(); | ||
275 | else | ||
276 | time_delta = KTIME_MAX; | ||
265 | } while (read_seqretry(&xtime_lock, seq)); | 277 | } while (read_seqretry(&xtime_lock, seq)); |
266 | 278 | ||
267 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || | 279 | if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || |
@@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
284 | if ((long)delta_jiffies >= 1) { | 296 | if ((long)delta_jiffies >= 1) { |
285 | 297 | ||
286 | /* | 298 | /* |
287 | * calculate the expiry time for the next timer wheel | 299 | * calculate the expiry time for the next timer wheel |
288 | * timer | 300 | * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals |
289 | */ | 301 | * that there is no timer pending or at least extremely |
290 | expires = ktime_add_ns(last_update, tick_period.tv64 * | 302 | * far into the future (12 days for HZ=1000). In this |
291 | delta_jiffies); | 303 | * case we set the expiry to the end of time. |
304 | */ | ||
305 | if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) { | ||
306 | /* | ||
307 | * Calculate the time delta for the next timer event. | ||
308 | * If the time delta exceeds the maximum time delta | ||
309 | * permitted by the current clocksource then adjust | ||
310 | * the time delta accordingly to ensure the | ||
311 | * clocksource does not wrap. | ||
312 | */ | ||
313 | time_delta = min_t(u64, time_delta, | ||
314 | tick_period.tv64 * delta_jiffies); | ||
315 | expires = ktime_add_ns(last_update, time_delta); | ||
316 | } else { | ||
317 | expires.tv64 = KTIME_MAX; | ||
318 | } | ||
292 | 319 | ||
293 | /* | 320 | /* |
294 | * If this cpu is the one which updates jiffies, then | 321 | * If this cpu is the one which updates jiffies, then |
@@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
332 | 359 | ||
333 | ts->idle_sleeps++; | 360 | ts->idle_sleeps++; |
334 | 361 | ||
362 | /* Mark expires */ | ||
363 | ts->idle_expires = expires; | ||
364 | |||
335 | /* | 365 | /* |
336 | * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that | 366 | * If the expiration time == KTIME_MAX, then |
337 | * there is no timer pending or at least extremly far | 367 | * in this case we simply stop the tick timer. |
338 | * into the future (12 days for HZ=1000). In this case | ||
339 | * we simply stop the tick timer: | ||
340 | */ | 368 | */ |
341 | if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { | 369 | if (unlikely(expires.tv64 == KTIME_MAX)) { |
342 | ts->idle_expires.tv64 = KTIME_MAX; | ||
343 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) | 370 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) |
344 | hrtimer_cancel(&ts->sched_timer); | 371 | hrtimer_cancel(&ts->sched_timer); |
345 | goto out; | 372 | goto out; |
346 | } | 373 | } |
347 | 374 | ||
348 | /* Mark expiries */ | ||
349 | ts->idle_expires = expires; | ||
350 | |||
351 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 375 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
352 | hrtimer_start(&ts->sched_timer, expires, | 376 | hrtimer_start(&ts->sched_timer, expires, |
353 | HRTIMER_MODE_ABS_PINNED); | 377 | HRTIMER_MODE_ABS_PINNED); |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 96b3f0dfa5dc..5d4d4239a0aa 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -478,6 +478,17 @@ int timekeeping_valid_for_hres(void) | |||
478 | } | 478 | } |
479 | 479 | ||
480 | /** | 480 | /** |
481 | * timekeeping_max_deferment - Returns max time the clocksource can be deferred | ||
482 | * | ||
483 | * Caller must observe xtime_lock via read_seqbegin/read_seqretry to | ||
484 | * ensure that the clocksource does not change! | ||
485 | */ | ||
486 | u64 timekeeping_max_deferment(void) | ||
487 | { | ||
488 | return timekeeper.clock->max_idle_ns; | ||
489 | } | ||
490 | |||
491 | /** | ||
481 | * read_persistent_clock - Return time from the persistent clock. | 492 | * read_persistent_clock - Return time from the persistent clock. |
482 | * | 493 | * |
483 | * Weak dummy function for arches that do not yet support it. | 494 | * Weak dummy function for arches that do not yet support it. |