aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/clocksource.h2
-rw-r--r--include/linux/time.h1
-rw-r--r--kernel/time/clocksource.c44
-rw-r--r--kernel/time/tick-sched.c52
-rw-r--r--kernel/time/timekeeping.c11
5 files changed, 96 insertions, 14 deletions
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f57f88250526..279c5478e8a6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -151,6 +151,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
151 * subtraction of non 64 bit counters 151 * subtraction of non 64 bit counters
152 * @mult: cycle to nanosecond multiplier 152 * @mult: cycle to nanosecond multiplier
153 * @shift: cycle to nanosecond divisor (power of two) 153 * @shift: cycle to nanosecond divisor (power of two)
154 * @max_idle_ns: max idle time permitted by the clocksource (nsecs)
154 * @flags: flags describing special properties 155 * @flags: flags describing special properties
155 * @vread: vsyscall based read 156 * @vread: vsyscall based read
156 * @resume: resume function for the clocksource, if necessary 157 * @resume: resume function for the clocksource, if necessary
@@ -168,6 +169,7 @@ struct clocksource {
168 cycle_t mask; 169 cycle_t mask;
169 u32 mult; 170 u32 mult;
170 u32 shift; 171 u32 shift;
172 u64 max_idle_ns;
171 unsigned long flags; 173 unsigned long flags;
172 cycle_t (*vread)(void); 174 cycle_t (*vread)(void);
173 void (*resume)(void); 175 void (*resume)(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index fe04e5ef6a59..6e026e45a179 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -148,6 +148,7 @@ extern void monotonic_to_bootbased(struct timespec *ts);
148 148
149extern struct timespec timespec_trunc(struct timespec t, unsigned gran); 149extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
150extern int timekeeping_valid_for_hres(void); 150extern int timekeeping_valid_for_hres(void);
151extern u64 timekeeping_max_deferment(void);
151extern void update_wall_time(void); 152extern void update_wall_time(void);
152extern void update_xtime_cache(u64 nsec); 153extern void update_xtime_cache(u64 nsec);
153extern void timekeeping_leap_insert(int leapsecond); 154extern void timekeeping_leap_insert(int leapsecond);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 407c0894ef37..b65b242f04dd 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -469,6 +469,47 @@ void clocksource_touch_watchdog(void)
469#ifdef CONFIG_GENERIC_TIME 469#ifdef CONFIG_GENERIC_TIME
470 470
471/** 471/**
472 * clocksource_max_deferment - Returns max time the clocksource can be deferred
473 * @cs: Pointer to clocksource
474 *
475 */
476static u64 clocksource_max_deferment(struct clocksource *cs)
477{
478 u64 max_nsecs, max_cycles;
479
480 /*
481 * Calculate the maximum number of cycles that we can pass to the
482 * cyc2ns function without overflowing a 64-bit signed result. The
483 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
484 * is equivalent to the below.
485 * max_cycles < (2^63)/cs->mult
486 * max_cycles < 2^(log2((2^63)/cs->mult))
487 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
488 * max_cycles < 2^(63 - log2(cs->mult))
489 * max_cycles < 1 << (63 - log2(cs->mult))
490 * Please note that we add 1 to the result of the log2 to account for
491 * any rounding errors, ensure the above inequality is satisfied and
492 * no overflow will occur.
493 */
494 max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
495
496 /*
497 * The actual maximum number of cycles we can defer the clocksource is
498 * determined by the minimum of max_cycles and cs->mask.
499 */
500 max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
501 max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
502
503 /*
504 * To ensure that the clocksource does not wrap whilst we are idle,
505 * limit the time the clocksource can be deferred by 12.5%. Please
506 * note a margin of 12.5% is used because this can be computed with
507 * a shift, versus say 10% which would require division.
508 */
509 return max_nsecs - (max_nsecs >> 5);
510}
511
512/**
472 * clocksource_select - Select the best clocksource available 513 * clocksource_select - Select the best clocksource available
473 * 514 *
474 * Private function. Must hold clocksource_mutex when called. 515 * Private function. Must hold clocksource_mutex when called.
@@ -564,6 +605,9 @@ static void clocksource_enqueue(struct clocksource *cs)
564 */ 605 */
565int clocksource_register(struct clocksource *cs) 606int clocksource_register(struct clocksource *cs)
566{ 607{
608 /* calculate max idle time permitted for this clocksource */
609 cs->max_idle_ns = clocksource_max_deferment(cs);
610
567 mutex_lock(&clocksource_mutex); 611 mutex_lock(&clocksource_mutex);
568 clocksource_enqueue(cs); 612 clocksource_enqueue(cs);
569 clocksource_select(); 613 clocksource_select();
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c65ba0faa98f..a80b4644fe6b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -208,6 +208,7 @@ void tick_nohz_stop_sched_tick(int inidle)
208 struct tick_sched *ts; 208 struct tick_sched *ts;
209 ktime_t last_update, expires, now; 209 ktime_t last_update, expires, now;
210 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 210 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
211 u64 time_delta;
211 int cpu; 212 int cpu;
212 213
213 local_irq_save(flags); 214 local_irq_save(flags);
@@ -262,6 +263,17 @@ void tick_nohz_stop_sched_tick(int inidle)
262 seq = read_seqbegin(&xtime_lock); 263 seq = read_seqbegin(&xtime_lock);
263 last_update = last_jiffies_update; 264 last_update = last_jiffies_update;
264 last_jiffies = jiffies; 265 last_jiffies = jiffies;
266
267 /*
268 * On SMP we really should only care for the CPU which
269 * has the do_timer duty assigned. All other CPUs can
270 * sleep as long as they want.
271 */
272 if (cpu == tick_do_timer_cpu ||
273 tick_do_timer_cpu == TICK_DO_TIMER_NONE)
274 time_delta = timekeeping_max_deferment();
275 else
276 time_delta = KTIME_MAX;
265 } while (read_seqretry(&xtime_lock, seq)); 277 } while (read_seqretry(&xtime_lock, seq));
266 278
267 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || 279 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
@@ -284,11 +296,26 @@ void tick_nohz_stop_sched_tick(int inidle)
284 if ((long)delta_jiffies >= 1) { 296 if ((long)delta_jiffies >= 1) {
285 297
286 /* 298 /*
287 * calculate the expiry time for the next timer wheel 299 * calculate the expiry time for the next timer wheel
288 * timer 300 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
289 */ 301 * that there is no timer pending or at least extremely
290 expires = ktime_add_ns(last_update, tick_period.tv64 * 302 * far into the future (12 days for HZ=1000). In this
291 delta_jiffies); 303 * case we set the expiry to the end of time.
304 */
305 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
306 /*
307 * Calculate the time delta for the next timer event.
308 * If the time delta exceeds the maximum time delta
309 * permitted by the current clocksource then adjust
310 * the time delta accordingly to ensure the
311 * clocksource does not wrap.
312 */
313 time_delta = min_t(u64, time_delta,
314 tick_period.tv64 * delta_jiffies);
315 expires = ktime_add_ns(last_update, time_delta);
316 } else {
317 expires.tv64 = KTIME_MAX;
318 }
292 319
293 /* 320 /*
294 * If this cpu is the one which updates jiffies, then 321 * If this cpu is the one which updates jiffies, then
@@ -332,22 +359,19 @@ void tick_nohz_stop_sched_tick(int inidle)
332 359
333 ts->idle_sleeps++; 360 ts->idle_sleeps++;
334 361
362 /* Mark expires */
363 ts->idle_expires = expires;
364
335 /* 365 /*
336 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that 366 * If the expiration time == KTIME_MAX, then
337 * there is no timer pending or at least extremly far 367 * in this case we simply stop the tick timer.
338 * into the future (12 days for HZ=1000). In this case
339 * we simply stop the tick timer:
340 */ 368 */
341 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { 369 if (unlikely(expires.tv64 == KTIME_MAX)) {
342 ts->idle_expires.tv64 = KTIME_MAX;
343 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 370 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
344 hrtimer_cancel(&ts->sched_timer); 371 hrtimer_cancel(&ts->sched_timer);
345 goto out; 372 goto out;
346 } 373 }
347 374
348 /* Mark expiries */
349 ts->idle_expires = expires;
350
351 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 375 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
352 hrtimer_start(&ts->sched_timer, expires, 376 hrtimer_start(&ts->sched_timer, expires,
353 HRTIMER_MODE_ABS_PINNED); 377 HRTIMER_MODE_ABS_PINNED);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 96b3f0dfa5dc..5d4d4239a0aa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -478,6 +478,17 @@ int timekeeping_valid_for_hres(void)
478} 478}
479 479
480/** 480/**
481 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
482 *
483 * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
484 * ensure that the clocksource does not change!
485 */
486u64 timekeeping_max_deferment(void)
487{
488 return timekeeper.clock->max_idle_ns;
489}
490
491/**
481 * read_persistent_clock - Return time from the persistent clock. 492 * read_persistent_clock - Return time from the persistent clock.
482 * 493 *
483 * Weak dummy function for arches that do not yet support it. 494 * Weak dummy function for arches that do not yet support it.