aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /kernel/time
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig64
-rw-r--r--kernel/time/Makefile4
-rw-r--r--kernel/time/alarmtimer.c342
-rw-r--r--kernel/time/clockevents.c157
-rw-r--r--kernel/time/clocksource.c89
-rw-r--r--kernel/time/jiffies.c40
-rw-r--r--kernel/time/ntp.c201
-rw-r--r--kernel/time/posix-clock.c1
-rw-r--r--kernel/time/tick-broadcast.c13
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/tick-internal.h3
-rw-r--r--kernel/time/tick-oneshot.c77
-rw-r--r--kernel/time/tick-sched.c519
-rw-r--r--kernel/time/timekeeping.c963
-rw-r--r--kernel/time/timer_list.c4
-rw-r--r--kernel/time/timer_stats.c6
16 files changed, 961 insertions, 1534 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 8601f0db126..f06a8a36564 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,63 +1,6 @@
1# 1#
2# Timer subsystem related configuration options 2# Timer subsystem related configuration options
3# 3#
4
5# Options selectable by arch Kconfig
6
7# Watchdog function for clocksources to detect instabilities
8config CLOCKSOURCE_WATCHDOG
9 bool
10
11# Architecture has extra clocksource data
12config ARCH_CLOCKSOURCE_DATA
13 bool
14
15# Timekeeping vsyscall support
16config GENERIC_TIME_VSYSCALL
17 bool
18
19# Timekeeping vsyscall support
20config GENERIC_TIME_VSYSCALL_OLD
21 bool
22
23# ktime_t scalar 64bit nsec representation
24config KTIME_SCALAR
25 bool
26
27# Old style timekeeping
28config ARCH_USES_GETTIMEOFFSET
29 bool
30
31# The generic clock events infrastructure
32config GENERIC_CLOCKEVENTS
33 bool
34
35# Migration helper. Builds, but does not invoke
36config GENERIC_CLOCKEVENTS_BUILD
37 bool
38 default y
39 depends on GENERIC_CLOCKEVENTS
40
41# Clockevents broadcasting infrastructure
42config GENERIC_CLOCKEVENTS_BROADCAST
43 bool
44 depends on GENERIC_CLOCKEVENTS
45
46# Automatically adjust the min. reprogramming time for
47# clock event device
48config GENERIC_CLOCKEVENTS_MIN_ADJUST
49 bool
50
51# Generic update of CMOS clock
52config GENERIC_CMOS_UPDATE
53 bool
54
55if GENERIC_CLOCKEVENTS
56menu "Timers subsystem"
57
58# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
59# only related to the tick functionality. Oneshot clockevent devices
60# are supported independ of this.
61config TICK_ONESHOT 4config TICK_ONESHOT
62 bool 5 bool
63 6
@@ -79,5 +22,8 @@ config HIGH_RES_TIMERS
79 hardware is not capable then this option only increases 22 hardware is not capable then this option only increases
80 the size of the kernel image. 23 the size of the kernel image.
81 24
82endmenu 25config GENERIC_CLOCKEVENTS_BUILD
83endif 26 bool
27 default y
28 depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR
29
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ff7d9d2ab50..cae2ad7491b 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o 1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
2obj-y += timeconv.o posix-clock.o alarmtimer.o 2obj-y += timeconv.o posix-clock.o #alarmtimer.o
3 3
4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 5obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f11d83b1294..8b70c76910a 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -37,6 +37,7 @@
37static struct alarm_base { 37static struct alarm_base {
38 spinlock_t lock; 38 spinlock_t lock;
39 struct timerqueue_head timerqueue; 39 struct timerqueue_head timerqueue;
40 struct hrtimer timer;
40 ktime_t (*gettime)(void); 41 ktime_t (*gettime)(void);
41 clockid_t base_clockid; 42 clockid_t base_clockid;
42} alarm_bases[ALARM_NUMTYPE]; 43} alarm_bases[ALARM_NUMTYPE];
@@ -45,8 +46,6 @@ static struct alarm_base {
45static ktime_t freezer_delta; 46static ktime_t freezer_delta;
46static DEFINE_SPINLOCK(freezer_delta_lock); 47static DEFINE_SPINLOCK(freezer_delta_lock);
47 48
48static struct wakeup_source *ws;
49
50#ifdef CONFIG_RTC_CLASS 49#ifdef CONFIG_RTC_CLASS
51/* rtc timer and device for setting alarm wakeups at suspend */ 50/* rtc timer and device for setting alarm wakeups at suspend */
52static struct rtc_timer rtctimer; 51static struct rtc_timer rtctimer;
@@ -54,112 +53,108 @@ static struct rtc_device *rtcdev;
54static DEFINE_SPINLOCK(rtcdev_lock); 53static DEFINE_SPINLOCK(rtcdev_lock);
55 54
56/** 55/**
57 * alarmtimer_get_rtcdev - Return selected rtcdevice 56 * has_wakealarm - check rtc device has wakealarm ability
57 * @dev: current device
58 * @name_ptr: name to be returned
58 * 59 *
59 * This function returns the rtc device to use for wakealarms. 60 * This helper function checks to see if the rtc device can wake
60 * If one has not already been chosen, it checks to see if a 61 * from suspend.
61 * functional rtc device is available.
62 */ 62 */
63struct rtc_device *alarmtimer_get_rtcdev(void) 63static int has_wakealarm(struct device *dev, void *name_ptr)
64{ 64{
65 unsigned long flags; 65 struct rtc_device *candidate = to_rtc_device(dev);
66 struct rtc_device *ret;
67 66
68 spin_lock_irqsave(&rtcdev_lock, flags); 67 if (!candidate->ops->set_alarm)
69 ret = rtcdev; 68 return 0;
70 spin_unlock_irqrestore(&rtcdev_lock, flags); 69 if (!device_may_wakeup(candidate->dev.parent))
70 return 0;
71 71
72 return ret; 72 *(const char **)name_ptr = dev_name(dev);
73 return 1;
73} 74}
74 75
75 76/**
76static int alarmtimer_rtc_add_device(struct device *dev, 77 * alarmtimer_get_rtcdev - Return selected rtcdevice
77 struct class_interface *class_intf) 78 *
79 * This function returns the rtc device to use for wakealarms.
80 * If one has not already been chosen, it checks to see if a
81 * functional rtc device is available.
82 */
83static struct rtc_device *alarmtimer_get_rtcdev(void)
78{ 84{
85 struct device *dev;
86 char *str;
79 unsigned long flags; 87 unsigned long flags;
80 struct rtc_device *rtc = to_rtc_device(dev); 88 struct rtc_device *ret;
81
82 if (rtcdev)
83 return -EBUSY;
84
85 if (!rtc->ops->set_alarm)
86 return -1;
87 if (!device_may_wakeup(rtc->dev.parent))
88 return -1;
89 89
90 spin_lock_irqsave(&rtcdev_lock, flags); 90 spin_lock_irqsave(&rtcdev_lock, flags);
91 if (!rtcdev) { 91 if (!rtcdev) {
92 rtcdev = rtc; 92 /* Find an rtc device and init the rtc_timer */
93 /* hold a reference so it doesn't go away */ 93 dev = class_find_device(rtc_class, NULL, &str, has_wakealarm);
94 get_device(dev); 94 /* If we have a device then str is valid. See has_wakealarm() */
95 if (dev) {
96 rtcdev = rtc_class_open(str);
97 /*
98 * Drop the reference we got in class_find_device,
99 * rtc_open takes its own.
100 */
101 put_device(dev);
102 rtc_timer_init(&rtctimer, NULL, NULL);
103 }
95 } 104 }
105 ret = rtcdev;
96 spin_unlock_irqrestore(&rtcdev_lock, flags); 106 spin_unlock_irqrestore(&rtcdev_lock, flags);
97 return 0;
98}
99
100static inline void alarmtimer_rtc_timer_init(void)
101{
102 rtc_timer_init(&rtctimer, NULL, NULL);
103}
104
105static struct class_interface alarmtimer_rtc_interface = {
106 .add_dev = &alarmtimer_rtc_add_device,
107};
108 107
109static int alarmtimer_rtc_interface_setup(void) 108 return ret;
110{
111 alarmtimer_rtc_interface.class = rtc_class;
112 return class_interface_register(&alarmtimer_rtc_interface);
113}
114static void alarmtimer_rtc_interface_remove(void)
115{
116 class_interface_unregister(&alarmtimer_rtc_interface);
117} 109}
118#else 110#else
119struct rtc_device *alarmtimer_get_rtcdev(void) 111#define alarmtimer_get_rtcdev() (0)
120{ 112#define rtcdev (0)
121 return NULL;
122}
123#define rtcdev (NULL)
124static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
125static inline void alarmtimer_rtc_interface_remove(void) { }
126static inline void alarmtimer_rtc_timer_init(void) { }
127#endif 113#endif
128 114
115
129/** 116/**
130 * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue 117 * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue
131 * @base: pointer to the base where the timer is being run 118 * @base: pointer to the base where the timer is being run
132 * @alarm: pointer to alarm being enqueued. 119 * @alarm: pointer to alarm being enqueued.
133 * 120 *
134 * Adds alarm to a alarm_base timerqueue 121 * Adds alarm to a alarm_base timerqueue and if necessary sets
122 * an hrtimer to run.
135 * 123 *
136 * Must hold base->lock when calling. 124 * Must hold base->lock when calling.
137 */ 125 */
138static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) 126static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
139{ 127{
140 if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
141 timerqueue_del(&base->timerqueue, &alarm->node);
142
143 timerqueue_add(&base->timerqueue, &alarm->node); 128 timerqueue_add(&base->timerqueue, &alarm->node);
144 alarm->state |= ALARMTIMER_STATE_ENQUEUED; 129 if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
130 hrtimer_try_to_cancel(&base->timer);
131 hrtimer_start(&base->timer, alarm->node.expires,
132 HRTIMER_MODE_ABS);
133 }
145} 134}
146 135
147/** 136/**
148 * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue 137 * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
149 * @base: pointer to the base where the timer is running 138 * @base: pointer to the base where the timer is running
150 * @alarm: pointer to alarm being removed 139 * @alarm: pointer to alarm being removed
151 * 140 *
152 * Removes alarm to a alarm_base timerqueue 141 * Removes alarm to a alarm_base timerqueue and if necessary sets
142 * a new timer to run.
153 * 143 *
154 * Must hold base->lock when calling. 144 * Must hold base->lock when calling.
155 */ 145 */
156static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) 146static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
157{ 147{
158 if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) 148 struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
159 return;
160 149
161 timerqueue_del(&base->timerqueue, &alarm->node); 150 timerqueue_del(&base->timerqueue, &alarm->node);
162 alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; 151 if (next == &alarm->node) {
152 hrtimer_try_to_cancel(&base->timer);
153 next = timerqueue_getnext(&base->timerqueue);
154 if (!next)
155 return;
156 hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
157 }
163} 158}
164 159
165 160
@@ -174,23 +169,39 @@ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
174 */ 169 */
175static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) 170static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
176{ 171{
177 struct alarm *alarm = container_of(timer, struct alarm, timer); 172 struct alarm_base *base = container_of(timer, struct alarm_base, timer);
178 struct alarm_base *base = &alarm_bases[alarm->type]; 173 struct timerqueue_node *next;
179 unsigned long flags; 174 unsigned long flags;
175 ktime_t now;
180 int ret = HRTIMER_NORESTART; 176 int ret = HRTIMER_NORESTART;
181 int restart = ALARMTIMER_NORESTART;
182 177
183 spin_lock_irqsave(&base->lock, flags); 178 spin_lock_irqsave(&base->lock, flags);
184 alarmtimer_dequeue(base, alarm); 179 now = base->gettime();
185 spin_unlock_irqrestore(&base->lock, flags); 180 while ((next = timerqueue_getnext(&base->timerqueue))) {
181 struct alarm *alarm;
182 ktime_t expired = next->expires;
186 183
187 if (alarm->function) 184 if (expired.tv64 > now.tv64)
188 restart = alarm->function(alarm, base->gettime()); 185 break;
189 186
190 spin_lock_irqsave(&base->lock, flags); 187 alarm = container_of(next, struct alarm, node);
191 if (restart != ALARMTIMER_NORESTART) { 188
192 hrtimer_set_expires(&alarm->timer, alarm->node.expires); 189 timerqueue_del(&base->timerqueue, &alarm->node);
193 alarmtimer_enqueue(base, alarm); 190 alarm->enabled = 0;
191 /* Re-add periodic timers */
192 if (alarm->period.tv64) {
193 alarm->node.expires = ktime_add(expired, alarm->period);
194 timerqueue_add(&base->timerqueue, &alarm->node);
195 alarm->enabled = 1;
196 }
197 spin_unlock_irqrestore(&base->lock, flags);
198 if (alarm->function)
199 alarm->function(alarm);
200 spin_lock_irqsave(&base->lock, flags);
201 }
202
203 if (next) {
204 hrtimer_set_expires(&base->timer, next->expires);
194 ret = HRTIMER_RESTART; 205 ret = HRTIMER_RESTART;
195 } 206 }
196 spin_unlock_irqrestore(&base->lock, flags); 207 spin_unlock_irqrestore(&base->lock, flags);
@@ -217,14 +228,13 @@ static int alarmtimer_suspend(struct device *dev)
217 unsigned long flags; 228 unsigned long flags;
218 struct rtc_device *rtc; 229 struct rtc_device *rtc;
219 int i; 230 int i;
220 int ret;
221 231
222 spin_lock_irqsave(&freezer_delta_lock, flags); 232 spin_lock_irqsave(&freezer_delta_lock, flags);
223 min = freezer_delta; 233 min = freezer_delta;
224 freezer_delta = ktime_set(0, 0); 234 freezer_delta = ktime_set(0, 0);
225 spin_unlock_irqrestore(&freezer_delta_lock, flags); 235 spin_unlock_irqrestore(&freezer_delta_lock, flags);
226 236
227 rtc = alarmtimer_get_rtcdev(); 237 rtc = rtcdev;
228 /* If we have no rtcdev, just return */ 238 /* If we have no rtcdev, just return */
229 if (!rtc) 239 if (!rtc)
230 return 0; 240 return 0;
@@ -247,10 +257,8 @@ static int alarmtimer_suspend(struct device *dev)
247 if (min.tv64 == 0) 257 if (min.tv64 == 0)
248 return 0; 258 return 0;
249 259
250 if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { 260 /* XXX - Should we enforce a minimum sleep time? */
251 __pm_wakeup_event(ws, 2 * MSEC_PER_SEC); 261 WARN_ON(min.tv64 < NSEC_PER_SEC);
252 return -EBUSY;
253 }
254 262
255 /* Setup an rtc timer to fire that far in the future */ 263 /* Setup an rtc timer to fire that far in the future */
256 rtc_timer_cancel(rtc, &rtctimer); 264 rtc_timer_cancel(rtc, &rtctimer);
@@ -258,11 +266,9 @@ static int alarmtimer_suspend(struct device *dev)
258 now = rtc_tm_to_ktime(tm); 266 now = rtc_tm_to_ktime(tm);
259 now = ktime_add(now, min); 267 now = ktime_add(now, min);
260 268
261 /* Set alarm, if in the past reject suspend briefly to handle */ 269 rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
262 ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0)); 270
263 if (ret < 0) 271 return 0;
264 __pm_wakeup_event(ws, MSEC_PER_SEC);
265 return ret;
266} 272}
267#else 273#else
268static int alarmtimer_suspend(struct device *dev) 274static int alarmtimer_suspend(struct device *dev)
@@ -293,110 +299,53 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
293 * @function: callback that is run when the alarm fires 299 * @function: callback that is run when the alarm fires
294 */ 300 */
295void alarm_init(struct alarm *alarm, enum alarmtimer_type type, 301void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
296 enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) 302 void (*function)(struct alarm *))
297{ 303{
298 timerqueue_init(&alarm->node); 304 timerqueue_init(&alarm->node);
299 hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, 305 alarm->period = ktime_set(0, 0);
300 HRTIMER_MODE_ABS);
301 alarm->timer.function = alarmtimer_fired;
302 alarm->function = function; 306 alarm->function = function;
303 alarm->type = type; 307 alarm->type = type;
304 alarm->state = ALARMTIMER_STATE_INACTIVE; 308 alarm->enabled = 0;
305} 309}
306 310
307/** 311/**
308 * alarm_start - Sets an alarm to fire 312 * alarm_start - Sets an alarm to fire
309 * @alarm: ptr to alarm to set 313 * @alarm: ptr to alarm to set
310 * @start: time to run the alarm 314 * @start: time to run the alarm
315 * @period: period at which the alarm will recur
311 */ 316 */
312int alarm_start(struct alarm *alarm, ktime_t start) 317void alarm_start(struct alarm *alarm, ktime_t start, ktime_t period)
313{ 318{
314 struct alarm_base *base = &alarm_bases[alarm->type]; 319 struct alarm_base *base = &alarm_bases[alarm->type];
315 unsigned long flags; 320 unsigned long flags;
316 int ret;
317 321
318 spin_lock_irqsave(&base->lock, flags); 322 spin_lock_irqsave(&base->lock, flags);
323 if (alarm->enabled)
324 alarmtimer_remove(base, alarm);
319 alarm->node.expires = start; 325 alarm->node.expires = start;
326 alarm->period = period;
320 alarmtimer_enqueue(base, alarm); 327 alarmtimer_enqueue(base, alarm);
321 ret = hrtimer_start(&alarm->timer, alarm->node.expires, 328 alarm->enabled = 1;
322 HRTIMER_MODE_ABS);
323 spin_unlock_irqrestore(&base->lock, flags); 329 spin_unlock_irqrestore(&base->lock, flags);
324 return ret;
325} 330}
326 331
327/** 332/**
328 * alarm_try_to_cancel - Tries to cancel an alarm timer 333 * alarm_cancel - Tries to cancel an alarm timer
329 * @alarm: ptr to alarm to be canceled 334 * @alarm: ptr to alarm to be canceled
330 *
331 * Returns 1 if the timer was canceled, 0 if it was not running,
332 * and -1 if the callback was running
333 */ 335 */
334int alarm_try_to_cancel(struct alarm *alarm) 336void alarm_cancel(struct alarm *alarm)
335{ 337{
336 struct alarm_base *base = &alarm_bases[alarm->type]; 338 struct alarm_base *base = &alarm_bases[alarm->type];
337 unsigned long flags; 339 unsigned long flags;
338 int ret;
339 340
340 spin_lock_irqsave(&base->lock, flags); 341 spin_lock_irqsave(&base->lock, flags);
341 ret = hrtimer_try_to_cancel(&alarm->timer); 342 if (alarm->enabled)
342 if (ret >= 0) 343 alarmtimer_remove(base, alarm);
343 alarmtimer_dequeue(base, alarm); 344 alarm->enabled = 0;
344 spin_unlock_irqrestore(&base->lock, flags); 345 spin_unlock_irqrestore(&base->lock, flags);
345 return ret;
346}
347
348
349/**
350 * alarm_cancel - Spins trying to cancel an alarm timer until it is done
351 * @alarm: ptr to alarm to be canceled
352 *
353 * Returns 1 if the timer was canceled, 0 if it was not active.
354 */
355int alarm_cancel(struct alarm *alarm)
356{
357 for (;;) {
358 int ret = alarm_try_to_cancel(alarm);
359 if (ret >= 0)
360 return ret;
361 cpu_relax();
362 }
363}
364
365
366u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
367{
368 u64 overrun = 1;
369 ktime_t delta;
370
371 delta = ktime_sub(now, alarm->node.expires);
372
373 if (delta.tv64 < 0)
374 return 0;
375
376 if (unlikely(delta.tv64 >= interval.tv64)) {
377 s64 incr = ktime_to_ns(interval);
378
379 overrun = ktime_divns(delta, incr);
380
381 alarm->node.expires = ktime_add_ns(alarm->node.expires,
382 incr*overrun);
383
384 if (alarm->node.expires.tv64 > now.tv64)
385 return overrun;
386 /*
387 * This (and the ktime_add() below) is the
388 * correction for exact:
389 */
390 overrun++;
391 }
392
393 alarm->node.expires = ktime_add(alarm->node.expires, interval);
394 return overrun;
395} 346}
396 347
397 348
398
399
400/** 349/**
401 * clock2alarm - helper that converts from clockid to alarmtypes 350 * clock2alarm - helper that converts from clockid to alarmtypes
402 * @clockid: clockid. 351 * @clockid: clockid.
@@ -416,21 +365,12 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid)
416 * 365 *
417 * Posix timer callback for expired alarm timers. 366 * Posix timer callback for expired alarm timers.
418 */ 367 */
419static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, 368static void alarm_handle_timer(struct alarm *alarm)
420 ktime_t now)
421{ 369{
422 struct k_itimer *ptr = container_of(alarm, struct k_itimer, 370 struct k_itimer *ptr = container_of(alarm, struct k_itimer,
423 it.alarm.alarmtimer); 371 it.alarmtimer);
424 if (posix_timer_event(ptr, 0) != 0) 372 if (posix_timer_event(ptr, 0) != 0)
425 ptr->it_overrun++; 373 ptr->it_overrun++;
426
427 /* Re-add periodic timers */
428 if (ptr->it.alarm.interval.tv64) {
429 ptr->it_overrun += alarm_forward(alarm, now,
430 ptr->it.alarm.interval);
431 return ALARMTIMER_RESTART;
432 }
433 return ALARMTIMER_NORESTART;
434} 374}
435 375
436/** 376/**
@@ -487,7 +427,7 @@ static int alarm_timer_create(struct k_itimer *new_timer)
487 427
488 type = clock2alarm(new_timer->it_clock); 428 type = clock2alarm(new_timer->it_clock);
489 base = &alarm_bases[type]; 429 base = &alarm_bases[type];
490 alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); 430 alarm_init(&new_timer->it.alarmtimer, type, alarm_handle_timer);
491 return 0; 431 return 0;
492} 432}
493 433
@@ -504,9 +444,9 @@ static void alarm_timer_get(struct k_itimer *timr,
504 memset(cur_setting, 0, sizeof(struct itimerspec)); 444 memset(cur_setting, 0, sizeof(struct itimerspec));
505 445
506 cur_setting->it_interval = 446 cur_setting->it_interval =
507 ktime_to_timespec(timr->it.alarm.interval); 447 ktime_to_timespec(timr->it.alarmtimer.period);
508 cur_setting->it_value = 448 cur_setting->it_value =
509 ktime_to_timespec(timr->it.alarm.alarmtimer.node.expires); 449 ktime_to_timespec(timr->it.alarmtimer.node.expires);
510 return; 450 return;
511} 451}
512 452
@@ -521,9 +461,7 @@ static int alarm_timer_del(struct k_itimer *timr)
521 if (!rtcdev) 461 if (!rtcdev)
522 return -ENOTSUPP; 462 return -ENOTSUPP;
523 463
524 if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) 464 alarm_cancel(&timr->it.alarmtimer);
525 return TIMER_RETRY;
526
527 return 0; 465 return 0;
528} 466}
529 467
@@ -543,17 +481,25 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
543 if (!rtcdev) 481 if (!rtcdev)
544 return -ENOTSUPP; 482 return -ENOTSUPP;
545 483
484 /*
485 * XXX HACK! Currently we can DOS a system if the interval
486 * period on alarmtimers is too small. Cap the interval here
487 * to 100us and solve this properly in a future patch! -jstultz
488 */
489 if ((new_setting->it_interval.tv_sec == 0) &&
490 (new_setting->it_interval.tv_nsec < 100000))
491 new_setting->it_interval.tv_nsec = 100000;
492
546 if (old_setting) 493 if (old_setting)
547 alarm_timer_get(timr, old_setting); 494 alarm_timer_get(timr, old_setting);
548 495
549 /* If the timer was already set, cancel it */ 496 /* If the timer was already set, cancel it */
550 if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) 497 alarm_cancel(&timr->it.alarmtimer);
551 return TIMER_RETRY;
552 498
553 /* start the timer */ 499 /* start the timer */
554 timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); 500 alarm_start(&timr->it.alarmtimer,
555 alarm_start(&timr->it.alarm.alarmtimer, 501 timespec_to_ktime(new_setting->it_value),
556 timespec_to_ktime(new_setting->it_value)); 502 timespec_to_ktime(new_setting->it_interval));
557 return 0; 503 return 0;
558} 504}
559 505
@@ -563,15 +509,13 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
563 * 509 *
564 * Wakes up the task that set the alarmtimer 510 * Wakes up the task that set the alarmtimer
565 */ 511 */
566static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, 512static void alarmtimer_nsleep_wakeup(struct alarm *alarm)
567 ktime_t now)
568{ 513{
569 struct task_struct *task = (struct task_struct *)alarm->data; 514 struct task_struct *task = (struct task_struct *)alarm->data;
570 515
571 alarm->data = NULL; 516 alarm->data = NULL;
572 if (task) 517 if (task)
573 wake_up_process(task); 518 wake_up_process(task);
574 return ALARMTIMER_NORESTART;
575} 519}
576 520
577/** 521/**
@@ -586,7 +530,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp)
586 alarm->data = (void *)current; 530 alarm->data = (void *)current;
587 do { 531 do {
588 set_current_state(TASK_INTERRUPTIBLE); 532 set_current_state(TASK_INTERRUPTIBLE);
589 alarm_start(alarm, absexp); 533 alarm_start(alarm, absexp, ktime_set(0, 0));
590 if (likely(alarm->data)) 534 if (likely(alarm->data))
591 schedule(); 535 schedule();
592 536
@@ -747,7 +691,6 @@ static struct platform_driver alarmtimer_driver = {
747 */ 691 */
748static int __init alarmtimer_init(void) 692static int __init alarmtimer_init(void)
749{ 693{
750 struct platform_device *pdev;
751 int error = 0; 694 int error = 0;
752 int i; 695 int i;
753 struct k_clock alarm_clock = { 696 struct k_clock alarm_clock = {
@@ -760,8 +703,6 @@ static int __init alarmtimer_init(void)
760 .nsleep = alarm_timer_nsleep, 703 .nsleep = alarm_timer_nsleep,
761 }; 704 };
762 705
763 alarmtimer_rtc_timer_init();
764
765 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); 706 posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
766 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); 707 posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
767 708
@@ -773,28 +714,15 @@ static int __init alarmtimer_init(void)
773 for (i = 0; i < ALARM_NUMTYPE; i++) { 714 for (i = 0; i < ALARM_NUMTYPE; i++) {
774 timerqueue_init_head(&alarm_bases[i].timerqueue); 715 timerqueue_init_head(&alarm_bases[i].timerqueue);
775 spin_lock_init(&alarm_bases[i].lock); 716 spin_lock_init(&alarm_bases[i].lock);
717 hrtimer_init(&alarm_bases[i].timer,
718 alarm_bases[i].base_clockid,
719 HRTIMER_MODE_ABS);
720 alarm_bases[i].timer.function = alarmtimer_fired;
776 } 721 }
777
778 error = alarmtimer_rtc_interface_setup();
779 if (error)
780 return error;
781
782 error = platform_driver_register(&alarmtimer_driver); 722 error = platform_driver_register(&alarmtimer_driver);
783 if (error) 723 platform_device_register_simple("alarmtimer", -1, NULL, 0);
784 goto out_if;
785
786 pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
787 if (IS_ERR(pdev)) {
788 error = PTR_ERR(pdev);
789 goto out_drv;
790 }
791 ws = wakeup_source_register("alarmtimer");
792 return 0;
793 724
794out_drv:
795 platform_driver_unregister(&alarmtimer_driver);
796out_if:
797 alarmtimer_rtc_interface_remove();
798 return error; 725 return error;
799} 726}
800device_initcall(alarmtimer_init); 727device_initcall(alarmtimer_init);
728
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 30b6de0d977..e4c699dfa4e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h>
20 21
21#include "tick-internal.h" 22#include "tick-internal.h"
22 23
@@ -93,143 +94,42 @@ void clockevents_shutdown(struct clock_event_device *dev)
93 dev->next_event.tv64 = KTIME_MAX; 94 dev->next_event.tv64 = KTIME_MAX;
94} 95}
95 96
96#ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
97
98/* Limit min_delta to a jiffie */
99#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
100
101/**
102 * clockevents_increase_min_delta - raise minimum delta of a clock event device
103 * @dev: device to increase the minimum delta
104 *
105 * Returns 0 on success, -ETIME when the minimum delta reached the limit.
106 */
107static int clockevents_increase_min_delta(struct clock_event_device *dev)
108{
109 /* Nothing to do if we already reached the limit */
110 if (dev->min_delta_ns >= MIN_DELTA_LIMIT) {
111 printk(KERN_WARNING "CE: Reprogramming failure. Giving up\n");
112 dev->next_event.tv64 = KTIME_MAX;
113 return -ETIME;
114 }
115
116 if (dev->min_delta_ns < 5000)
117 dev->min_delta_ns = 5000;
118 else
119 dev->min_delta_ns += dev->min_delta_ns >> 1;
120
121 if (dev->min_delta_ns > MIN_DELTA_LIMIT)
122 dev->min_delta_ns = MIN_DELTA_LIMIT;
123
124 printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
125 dev->name ? dev->name : "?",
126 (unsigned long long) dev->min_delta_ns);
127 return 0;
128}
129
130/**
131 * clockevents_program_min_delta - Set clock event device to the minimum delay.
132 * @dev: device to program
133 *
134 * Returns 0 on success, -ETIME when the retry loop failed.
135 */
136static int clockevents_program_min_delta(struct clock_event_device *dev)
137{
138 unsigned long long clc;
139 int64_t delta;
140 int i;
141
142 for (i = 0;;) {
143 delta = dev->min_delta_ns;
144 dev->next_event = ktime_add_ns(ktime_get(), delta);
145
146 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
147 return 0;
148
149 dev->retries++;
150 clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
151 if (dev->set_next_event((unsigned long) clc, dev) == 0)
152 return 0;
153
154 if (++i > 2) {
155 /*
156 * We tried 3 times to program the device with the
157 * given min_delta_ns. Try to increase the minimum
158 * delta, if that fails as well get out of here.
159 */
160 if (clockevents_increase_min_delta(dev))
161 return -ETIME;
162 i = 0;
163 }
164 }
165}
166
167#else /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
168
169/**
170 * clockevents_program_min_delta - Set clock event device to the minimum delay.
171 * @dev: device to program
172 *
173 * Returns 0 on success, -ETIME when the retry loop failed.
174 */
175static int clockevents_program_min_delta(struct clock_event_device *dev)
176{
177 unsigned long long clc;
178 int64_t delta;
179
180 delta = dev->min_delta_ns;
181 dev->next_event = ktime_add_ns(ktime_get(), delta);
182
183 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
184 return 0;
185
186 dev->retries++;
187 clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
188 return dev->set_next_event((unsigned long) clc, dev);
189}
190
191#endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
192
193/** 97/**
194 * clockevents_program_event - Reprogram the clock event device. 98 * clockevents_program_event - Reprogram the clock event device.
195 * @dev: device to program
196 * @expires: absolute expiry time (monotonic clock) 99 * @expires: absolute expiry time (monotonic clock)
197 * @force: program minimum delay if expires can not be set
198 * 100 *
199 * Returns 0 on success, -ETIME when the event is in the past. 101 * Returns 0 on success, -ETIME when the event is in the past.
200 */ 102 */
201int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, 103int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
202 bool force) 104 ktime_t now)
203{ 105{
204 unsigned long long clc; 106 unsigned long long clc;
205 int64_t delta; 107 int64_t delta;
206 int rc;
207 108
208 if (unlikely(expires.tv64 < 0)) { 109 if (unlikely(expires.tv64 < 0)) {
209 WARN_ON_ONCE(1); 110 WARN_ON_ONCE(1);
210 return -ETIME; 111 return -ETIME;
211 } 112 }
212 113
114 delta = ktime_to_ns(ktime_sub(expires, now));
115
116 if (delta <= 0)
117 return -ETIME;
118
213 dev->next_event = expires; 119 dev->next_event = expires;
214 120
215 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) 121 if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
216 return 0; 122 return 0;
217 123
218 /* Shortcut for clockevent devices that can deal with ktime. */ 124 if (delta > dev->max_delta_ns)
219 if (dev->features & CLOCK_EVT_FEAT_KTIME) 125 delta = dev->max_delta_ns;
220 return dev->set_next_ktime(expires, dev); 126 if (delta < dev->min_delta_ns)
221 127 delta = dev->min_delta_ns;
222 delta = ktime_to_ns(ktime_sub(expires, ktime_get()));
223 if (delta <= 0)
224 return force ? clockevents_program_min_delta(dev) : -ETIME;
225
226 delta = min(delta, (int64_t) dev->max_delta_ns);
227 delta = max(delta, (int64_t) dev->min_delta_ns);
228 128
229 clc = ((unsigned long long) delta * dev->mult) >> dev->shift; 129 clc = delta * dev->mult;
230 rc = dev->set_next_event((unsigned long) clc, dev); 130 clc >>= dev->shift;
231 131
232 return (rc && force) ? clockevents_program_min_delta(dev) : rc; 132 return dev->set_next_event((unsigned long) clc, dev);
233} 133}
234 134
235/** 135/**
@@ -297,7 +197,8 @@ void clockevents_register_device(struct clock_event_device *dev)
297} 197}
298EXPORT_SYMBOL_GPL(clockevents_register_device); 198EXPORT_SYMBOL_GPL(clockevents_register_device);
299 199
300void clockevents_config(struct clock_event_device *dev, u32 freq) 200static void clockevents_config(struct clock_event_device *dev,
201 u32 freq)
301{ 202{
302 u64 sec; 203 u64 sec;
303 204
@@ -357,7 +258,7 @@ int clockevents_update_freq(struct clock_event_device *dev, u32 freq)
357 if (dev->mode != CLOCK_EVT_MODE_ONESHOT) 258 if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
358 return 0; 259 return 0;
359 260
360 return clockevents_program_event(dev, dev->next_event, false); 261 return clockevents_program_event(dev, dev->next_event, ktime_get());
361} 262}
362 263
363/* 264/*
@@ -397,30 +298,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
397 local_irq_restore(flags); 298 local_irq_restore(flags);
398} 299}
399 300
400/**
401 * clockevents_suspend - suspend clock devices
402 */
403void clockevents_suspend(void)
404{
405 struct clock_event_device *dev;
406
407 list_for_each_entry_reverse(dev, &clockevent_devices, list)
408 if (dev->suspend)
409 dev->suspend(dev);
410}
411
412/**
413 * clockevents_resume - resume clock devices
414 */
415void clockevents_resume(void)
416{
417 struct clock_event_device *dev;
418
419 list_for_each_entry(dev, &clockevent_devices, list)
420 if (dev->resume)
421 dev->resume(dev);
422}
423
424#ifdef CONFIG_GENERIC_CLOCKEVENTS 301#ifdef CONFIG_GENERIC_CLOCKEVENTS
425/** 302/**
426 * clockevents_notify - notification about relevant events 303 * clockevents_notify - notification about relevant events
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c9583382141..8f77da18fef 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -23,8 +23,8 @@
23 * o Allow clocksource drivers to be unregistered 23 * o Allow clocksource drivers to be unregistered
24 */ 24 */
25 25
26#include <linux/device.h>
27#include <linux/clocksource.h> 26#include <linux/clocksource.h>
27#include <linux/sysdev.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 30#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
@@ -186,7 +186,6 @@ static struct timer_list watchdog_timer;
186static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); 186static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
187static DEFINE_SPINLOCK(watchdog_lock); 187static DEFINE_SPINLOCK(watchdog_lock);
188static int watchdog_running; 188static int watchdog_running;
189static atomic_t watchdog_reset_pending;
190 189
191static int clocksource_watchdog_kthread(void *data); 190static int clocksource_watchdog_kthread(void *data);
192static void __clocksource_change_rating(struct clocksource *cs, int rating); 191static void __clocksource_change_rating(struct clocksource *cs, int rating);
@@ -248,14 +247,12 @@ static void clocksource_watchdog(unsigned long data)
248 struct clocksource *cs; 247 struct clocksource *cs;
249 cycle_t csnow, wdnow; 248 cycle_t csnow, wdnow;
250 int64_t wd_nsec, cs_nsec; 249 int64_t wd_nsec, cs_nsec;
251 int next_cpu, reset_pending; 250 int next_cpu;
252 251
253 spin_lock(&watchdog_lock); 252 spin_lock(&watchdog_lock);
254 if (!watchdog_running) 253 if (!watchdog_running)
255 goto out; 254 goto out;
256 255
257 reset_pending = atomic_read(&watchdog_reset_pending);
258
259 list_for_each_entry(cs, &watchdog_list, wd_list) { 256 list_for_each_entry(cs, &watchdog_list, wd_list) {
260 257
261 /* Clocksource already marked unstable? */ 258 /* Clocksource already marked unstable? */
@@ -271,8 +268,7 @@ static void clocksource_watchdog(unsigned long data)
271 local_irq_enable(); 268 local_irq_enable();
272 269
273 /* Clocksource initialized ? */ 270 /* Clocksource initialized ? */
274 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) || 271 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
275 atomic_read(&watchdog_reset_pending)) {
276 cs->flags |= CLOCK_SOURCE_WATCHDOG; 272 cs->flags |= CLOCK_SOURCE_WATCHDOG;
277 cs->wd_last = wdnow; 273 cs->wd_last = wdnow;
278 cs->cs_last = csnow; 274 cs->cs_last = csnow;
@@ -287,11 +283,8 @@ static void clocksource_watchdog(unsigned long data)
287 cs->cs_last = csnow; 283 cs->cs_last = csnow;
288 cs->wd_last = wdnow; 284 cs->wd_last = wdnow;
289 285
290 if (atomic_read(&watchdog_reset_pending))
291 continue;
292
293 /* Check the deviation from the watchdog clocksource. */ 286 /* Check the deviation from the watchdog clocksource. */
294 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 287 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
295 clocksource_unstable(cs, cs_nsec - wd_nsec); 288 clocksource_unstable(cs, cs_nsec - wd_nsec);
296 continue; 289 continue;
297 } 290 }
@@ -310,13 +303,6 @@ static void clocksource_watchdog(unsigned long data)
310 } 303 }
311 304
312 /* 305 /*
313 * We only clear the watchdog_reset_pending, when we did a
314 * full cycle through all clocksources.
315 */
316 if (reset_pending)
317 atomic_dec(&watchdog_reset_pending);
318
319 /*
320 * Cycle through CPUs to check if the CPUs stay synchronized 306 * Cycle through CPUs to check if the CPUs stay synchronized
321 * to each other. 307 * to each other.
322 */ 308 */
@@ -358,7 +344,23 @@ static inline void clocksource_reset_watchdog(void)
358 344
359static void clocksource_resume_watchdog(void) 345static void clocksource_resume_watchdog(void)
360{ 346{
361 atomic_inc(&watchdog_reset_pending); 347 unsigned long flags;
348
349 /*
350 * We use trylock here to avoid a potential dead lock when
351 * kgdb calls this code after the kernel has been stopped with
352 * watchdog_lock held. When watchdog_lock is held we just
353 * return and accept, that the watchdog might trigger and mark
354 * the monitored clock source (usually TSC) unstable.
355 *
356 * This does not affect the other caller clocksource_resume()
357 * because at this point the kernel is UP, interrupts are
358 * disabled and nothing can hold watchdog_lock.
359 */
360 if (!spin_trylock_irqsave(&watchdog_lock, flags))
361 return;
362 clocksource_reset_watchdog();
363 spin_unlock_irqrestore(&watchdog_lock, flags);
362} 364}
363 365
364static void clocksource_enqueue_watchdog(struct clocksource *cs) 366static void clocksource_enqueue_watchdog(struct clocksource *cs)
@@ -500,7 +502,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
500{ 502{
501 u64 ret; 503 u64 ret;
502 /* 504 /*
503 * We won't try to correct for more than 11% adjustments (110,000 ppm), 505 * We won't try to correct for more then 11% adjustments (110,000 ppm),
504 */ 506 */
505 ret = (u64)cs->mult * 11; 507 ret = (u64)cs->mult * 11;
506 do_div(ret,100); 508 do_div(ret,100);
@@ -647,7 +649,7 @@ static void clocksource_enqueue(struct clocksource *cs)
647 649
648/** 650/**
649 * __clocksource_updatefreq_scale - Used update clocksource with new freq 651 * __clocksource_updatefreq_scale - Used update clocksource with new freq
650 * @cs: clocksource to be registered 652 * @t: clocksource to be registered
651 * @scale: Scale factor multiplied against freq to get clocksource hz 653 * @scale: Scale factor multiplied against freq to get clocksource hz
652 * @freq: clocksource frequency (cycles per second) divided by scale 654 * @freq: clocksource frequency (cycles per second) divided by scale
653 * 655 *
@@ -699,7 +701,7 @@ EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
699 701
700/** 702/**
701 * __clocksource_register_scale - Used to install new clocksources 703 * __clocksource_register_scale - Used to install new clocksources
702 * @cs: clocksource to be registered 704 * @t: clocksource to be registered
703 * @scale: Scale factor multiplied against freq to get clocksource hz 705 * @scale: Scale factor multiplied against freq to get clocksource hz
704 * @freq: clocksource frequency (cycles per second) divided by scale 706 * @freq: clocksource frequency (cycles per second) divided by scale
705 * 707 *
@@ -727,7 +729,7 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale);
727 729
728/** 730/**
729 * clocksource_register - Used to install new clocksources 731 * clocksource_register - Used to install new clocksources
730 * @cs: clocksource to be registered 732 * @t: clocksource to be registered
731 * 733 *
732 * Returns -EBUSY if registration fails, zero otherwise. 734 * Returns -EBUSY if registration fails, zero otherwise.
733 */ 735 */
@@ -761,8 +763,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
761 763
762/** 764/**
763 * clocksource_change_rating - Change the rating of a registered clocksource 765 * clocksource_change_rating - Change the rating of a registered clocksource
764 * @cs: clocksource to be changed
765 * @rating: new rating
766 */ 766 */
767void clocksource_change_rating(struct clocksource *cs, int rating) 767void clocksource_change_rating(struct clocksource *cs, int rating)
768{ 768{
@@ -774,7 +774,6 @@ EXPORT_SYMBOL(clocksource_change_rating);
774 774
775/** 775/**
776 * clocksource_unregister - remove a registered clocksource 776 * clocksource_unregister - remove a registered clocksource
777 * @cs: clocksource to be unregistered
778 */ 777 */
779void clocksource_unregister(struct clocksource *cs) 778void clocksource_unregister(struct clocksource *cs)
780{ 779{
@@ -790,14 +789,13 @@ EXPORT_SYMBOL(clocksource_unregister);
790/** 789/**
791 * sysfs_show_current_clocksources - sysfs interface for current clocksource 790 * sysfs_show_current_clocksources - sysfs interface for current clocksource
792 * @dev: unused 791 * @dev: unused
793 * @attr: unused
794 * @buf: char buffer to be filled with clocksource list 792 * @buf: char buffer to be filled with clocksource list
795 * 793 *
796 * Provides sysfs interface for listing current clocksource. 794 * Provides sysfs interface for listing current clocksource.
797 */ 795 */
798static ssize_t 796static ssize_t
799sysfs_show_current_clocksources(struct device *dev, 797sysfs_show_current_clocksources(struct sys_device *dev,
800 struct device_attribute *attr, char *buf) 798 struct sysdev_attribute *attr, char *buf)
801{ 799{
802 ssize_t count = 0; 800 ssize_t count = 0;
803 801
@@ -811,15 +809,14 @@ sysfs_show_current_clocksources(struct device *dev,
811/** 809/**
812 * sysfs_override_clocksource - interface for manually overriding clocksource 810 * sysfs_override_clocksource - interface for manually overriding clocksource
813 * @dev: unused 811 * @dev: unused
814 * @attr: unused
815 * @buf: name of override clocksource 812 * @buf: name of override clocksource
816 * @count: length of buffer 813 * @count: length of buffer
817 * 814 *
818 * Takes input from sysfs interface for manually overriding the default 815 * Takes input from sysfs interface for manually overriding the default
819 * clocksource selection. 816 * clocksource selection.
820 */ 817 */
821static ssize_t sysfs_override_clocksource(struct device *dev, 818static ssize_t sysfs_override_clocksource(struct sys_device *dev,
822 struct device_attribute *attr, 819 struct sysdev_attribute *attr,
823 const char *buf, size_t count) 820 const char *buf, size_t count)
824{ 821{
825 size_t ret = count; 822 size_t ret = count;
@@ -847,14 +844,13 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
847/** 844/**
848 * sysfs_show_available_clocksources - sysfs interface for listing clocksource 845 * sysfs_show_available_clocksources - sysfs interface for listing clocksource
849 * @dev: unused 846 * @dev: unused
850 * @attr: unused
851 * @buf: char buffer to be filled with clocksource list 847 * @buf: char buffer to be filled with clocksource list
852 * 848 *
853 * Provides sysfs interface for listing registered clocksources 849 * Provides sysfs interface for listing registered clocksources
854 */ 850 */
855static ssize_t 851static ssize_t
856sysfs_show_available_clocksources(struct device *dev, 852sysfs_show_available_clocksources(struct sys_device *dev,
857 struct device_attribute *attr, 853 struct sysdev_attribute *attr,
858 char *buf) 854 char *buf)
859{ 855{
860 struct clocksource *src; 856 struct clocksource *src;
@@ -883,36 +879,35 @@ sysfs_show_available_clocksources(struct device *dev,
883/* 879/*
884 * Sysfs setup bits: 880 * Sysfs setup bits:
885 */ 881 */
886static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources, 882static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
887 sysfs_override_clocksource); 883 sysfs_override_clocksource);
888 884
889static DEVICE_ATTR(available_clocksource, 0444, 885static SYSDEV_ATTR(available_clocksource, 0444,
890 sysfs_show_available_clocksources, NULL); 886 sysfs_show_available_clocksources, NULL);
891 887
892static struct bus_type clocksource_subsys = { 888static struct sysdev_class clocksource_sysclass = {
893 .name = "clocksource", 889 .name = "clocksource",
894 .dev_name = "clocksource",
895}; 890};
896 891
897static struct device device_clocksource = { 892static struct sys_device device_clocksource = {
898 .id = 0, 893 .id = 0,
899 .bus = &clocksource_subsys, 894 .cls = &clocksource_sysclass,
900}; 895};
901 896
902static int __init init_clocksource_sysfs(void) 897static int __init init_clocksource_sysfs(void)
903{ 898{
904 int error = subsys_system_register(&clocksource_subsys, NULL); 899 int error = sysdev_class_register(&clocksource_sysclass);
905 900
906 if (!error) 901 if (!error)
907 error = device_register(&device_clocksource); 902 error = sysdev_register(&device_clocksource);
908 if (!error) 903 if (!error)
909 error = device_create_file( 904 error = sysdev_create_file(
910 &device_clocksource, 905 &device_clocksource,
911 &dev_attr_current_clocksource); 906 &attr_current_clocksource);
912 if (!error) 907 if (!error)
913 error = device_create_file( 908 error = sysdev_create_file(
914 &device_clocksource, 909 &device_clocksource,
915 &dev_attr_available_clocksource); 910 &attr_available_clocksource);
916 return error; 911 return error;
917} 912}
918 913
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7a925ba456f..a470154e040 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
37 * requested HZ value. It is also not recommended 37 * requested HZ value. It is also not recommended
38 * for "tick-less" systems. 38 * for "tick-less" systems.
39 */ 39 */
40#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ) 40#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/ACTHZ))
41 41
42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier 42/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
43 * conversion, the .shift value could be zero. However 43 * conversion, the .shift value could be zero. However
@@ -58,7 +58,7 @@ static cycle_t jiffies_read(struct clocksource *cs)
58 return (cycle_t) jiffies; 58 return (cycle_t) jiffies;
59} 59}
60 60
61static struct clocksource clocksource_jiffies = { 61struct clocksource clocksource_jiffies = {
62 .name = "jiffies", 62 .name = "jiffies",
63 .rating = 1, /* lowest valid rating*/ 63 .rating = 1, /* lowest valid rating*/
64 .read = jiffies_read, 64 .read = jiffies_read,
@@ -67,8 +67,6 @@ static struct clocksource clocksource_jiffies = {
67 .shift = JIFFIES_SHIFT, 67 .shift = JIFFIES_SHIFT,
68}; 68};
69 69
70__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
71
72#if (BITS_PER_LONG < 64) 70#if (BITS_PER_LONG < 64)
73u64 get_jiffies_64(void) 71u64 get_jiffies_64(void)
74{ 72{
@@ -76,9 +74,9 @@ u64 get_jiffies_64(void)
76 u64 ret; 74 u64 ret;
77 75
78 do { 76 do {
79 seq = read_seqbegin(&jiffies_lock); 77 seq = read_seqbegin(&xtime_lock);
80 ret = jiffies_64; 78 ret = jiffies_64;
81 } while (read_seqretry(&jiffies_lock, seq)); 79 } while (read_seqretry(&xtime_lock, seq));
82 return ret; 80 return ret;
83} 81}
84EXPORT_SYMBOL(get_jiffies_64); 82EXPORT_SYMBOL(get_jiffies_64);
@@ -97,33 +95,3 @@ struct clocksource * __init __weak clocksource_default_clock(void)
97{ 95{
98 return &clocksource_jiffies; 96 return &clocksource_jiffies;
99} 97}
100
101struct clocksource refined_jiffies;
102
103int register_refined_jiffies(long cycles_per_second)
104{
105 u64 nsec_per_tick, shift_hz;
106 long cycles_per_tick;
107
108
109
110 refined_jiffies = clocksource_jiffies;
111 refined_jiffies.name = "refined-jiffies";
112 refined_jiffies.rating++;
113
114 /* Calc cycles per tick */
115 cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
116 /* shift_hz stores hz<<8 for extra accuracy */
117 shift_hz = (u64)cycles_per_second << 8;
118 shift_hz += cycles_per_tick/2;
119 do_div(shift_hz, cycles_per_tick);
120 /* Calculate nsec_per_tick using shift_hz */
121 nsec_per_tick = (u64)NSEC_PER_SEC << 8;
122 nsec_per_tick += (u32)shift_hz/2;
123 do_div(nsec_per_tick, (u32)shift_hz);
124
125 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
126
127 clocksource_register(&refined_jiffies);
128 return 0;
129}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 24174b4d669..f6117a4c7cb 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -22,18 +22,17 @@
22 * NTP timekeeping variables: 22 * NTP timekeeping variables:
23 */ 23 */
24 24
25DEFINE_SPINLOCK(ntp_lock);
26
27
28/* USER_HZ period (usecs): */ 25/* USER_HZ period (usecs): */
29unsigned long tick_usec = TICK_USEC; 26unsigned long tick_usec = TICK_USEC;
30 27
31/* SHIFTED_HZ period (nsecs): */ 28/* ACTHZ period (nsecs): */
32unsigned long tick_nsec; 29unsigned long tick_nsec;
33 30
34static u64 tick_length; 31u64 tick_length;
35static u64 tick_length_base; 32static u64 tick_length_base;
36 33
34static struct hrtimer leap_timer;
35
37#define MAX_TICKADJ 500LL /* usecs */ 36#define MAX_TICKADJ 500LL /* usecs */
38#define MAX_TICKADJ_SCALED \ 37#define MAX_TICKADJ_SCALED \
39 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) 38 (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -50,7 +49,7 @@ static u64 tick_length_base;
50static int time_state = TIME_OK; 49static int time_state = TIME_OK;
51 50
52/* clock status bits: */ 51/* clock status bits: */
53static int time_status = STA_UNSYNC; 52int time_status = STA_UNSYNC;
54 53
55/* TAI offset (secs): */ 54/* TAI offset (secs): */
56static long time_tai; 55static long time_tai;
@@ -134,7 +133,7 @@ static inline void pps_reset_freq_interval(void)
134/** 133/**
135 * pps_clear - Clears the PPS state variables 134 * pps_clear - Clears the PPS state variables
136 * 135 *
137 * Must be called while holding a write on the ntp_lock 136 * Must be called while holding a write on the xtime_lock
138 */ 137 */
139static inline void pps_clear(void) 138static inline void pps_clear(void)
140{ 139{
@@ -150,7 +149,7 @@ static inline void pps_clear(void)
150 * the last PPS signal. When it reaches 0, indicate that PPS signal is 149 * the last PPS signal. When it reaches 0, indicate that PPS signal is
151 * missing. 150 * missing.
152 * 151 *
153 * Must be called while holding a write on the ntp_lock 152 * Must be called while holding a write on the xtime_lock
154 */ 153 */
155static inline void pps_dec_valid(void) 154static inline void pps_dec_valid(void)
156{ 155{
@@ -234,17 +233,6 @@ static inline void pps_fill_timex(struct timex *txc)
234 233
235#endif /* CONFIG_NTP_PPS */ 234#endif /* CONFIG_NTP_PPS */
236 235
237
238/**
239 * ntp_synced - Returns 1 if the NTP status is not UNSYNC
240 *
241 */
242static inline int ntp_synced(void)
243{
244 return !(time_status & STA_UNSYNC);
245}
246
247
248/* 236/*
249 * NTP methods: 237 * NTP methods:
250 */ 238 */
@@ -287,7 +275,7 @@ static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
287 275
288 time_status |= STA_MODE; 276 time_status |= STA_MODE;
289 277
290 return div64_long(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs); 278 return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
291} 279}
292 280
293static void ntp_update_offset(long offset) 281static void ntp_update_offset(long offset)
@@ -342,13 +330,11 @@ static void ntp_update_offset(long offset)
342 330
343/** 331/**
344 * ntp_clear - Clears the NTP state variables 332 * ntp_clear - Clears the NTP state variables
333 *
334 * Must be called while holding a write on the xtime_lock
345 */ 335 */
346void ntp_clear(void) 336void ntp_clear(void)
347{ 337{
348 unsigned long flags;
349
350 spin_lock_irqsave(&ntp_lock, flags);
351
352 time_adjust = 0; /* stop active adjtime() */ 338 time_adjust = 0; /* stop active adjtime() */
353 time_status |= STA_UNSYNC; 339 time_status |= STA_UNSYNC;
354 time_maxerror = NTP_PHASE_LIMIT; 340 time_maxerror = NTP_PHASE_LIMIT;
@@ -361,85 +347,63 @@ void ntp_clear(void)
361 347
362 /* Clear PPS state variables */ 348 /* Clear PPS state variables */
363 pps_clear(); 349 pps_clear();
364 spin_unlock_irqrestore(&ntp_lock, flags);
365
366}
367
368
369u64 ntp_tick_length(void)
370{
371 unsigned long flags;
372 s64 ret;
373
374 spin_lock_irqsave(&ntp_lock, flags);
375 ret = tick_length;
376 spin_unlock_irqrestore(&ntp_lock, flags);
377 return ret;
378} 350}
379 351
380
381/* 352/*
382 * this routine handles the overflow of the microsecond field 353 * Leap second processing. If in leap-insert state at the end of the
383 * 354 * day, the system clock is set back one second; if in leap-delete
384 * The tricky bits of code to handle the accurate clock support 355 * state, the system clock is set ahead one second.
385 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
386 * They were originally developed for SUN and DEC kernels.
387 * All the kudos should go to Dave for this stuff.
388 *
389 * Also handles leap second processing, and returns leap offset
390 */ 356 */
391int second_overflow(unsigned long secs) 357static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
392{ 358{
393 s64 delta; 359 enum hrtimer_restart res = HRTIMER_NORESTART;
394 int leap = 0;
395 unsigned long flags;
396 360
397 spin_lock_irqsave(&ntp_lock, flags); 361 write_seqlock(&xtime_lock);
398 362
399 /*
400 * Leap second processing. If in leap-insert state at the end of the
401 * day, the system clock is set back one second; if in leap-delete
402 * state, the system clock is set ahead one second.
403 */
404 switch (time_state) { 363 switch (time_state) {
405 case TIME_OK: 364 case TIME_OK:
406 if (time_status & STA_INS)
407 time_state = TIME_INS;
408 else if (time_status & STA_DEL)
409 time_state = TIME_DEL;
410 break; 365 break;
411 case TIME_INS: 366 case TIME_INS:
412 if (!(time_status & STA_INS)) 367 timekeeping_leap_insert(-1);
413 time_state = TIME_OK; 368 time_state = TIME_OOP;
414 else if (secs % 86400 == 0) { 369 printk(KERN_NOTICE
415 leap = -1; 370 "Clock: inserting leap second 23:59:60 UTC\n");
416 time_state = TIME_OOP; 371 hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
417 time_tai++; 372 res = HRTIMER_RESTART;
418 printk(KERN_NOTICE
419 "Clock: inserting leap second 23:59:60 UTC\n");
420 }
421 break; 373 break;
422 case TIME_DEL: 374 case TIME_DEL:
423 if (!(time_status & STA_DEL)) 375 timekeeping_leap_insert(1);
424 time_state = TIME_OK; 376 time_tai--;
425 else if ((secs + 1) % 86400 == 0) { 377 time_state = TIME_WAIT;
426 leap = 1; 378 printk(KERN_NOTICE
427 time_tai--; 379 "Clock: deleting leap second 23:59:59 UTC\n");
428 time_state = TIME_WAIT;
429 printk(KERN_NOTICE
430 "Clock: deleting leap second 23:59:59 UTC\n");
431 }
432 break; 380 break;
433 case TIME_OOP: 381 case TIME_OOP:
382 time_tai++;
434 time_state = TIME_WAIT; 383 time_state = TIME_WAIT;
435 break; 384 /* fall through */
436
437 case TIME_WAIT: 385 case TIME_WAIT:
438 if (!(time_status & (STA_INS | STA_DEL))) 386 if (!(time_status & (STA_INS | STA_DEL)))
439 time_state = TIME_OK; 387 time_state = TIME_OK;
440 break; 388 break;
441 } 389 }
442 390
391 write_sequnlock(&xtime_lock);
392
393 return res;
394}
395
396/*
397 * this routine handles the overflow of the microsecond field
398 *
399 * The tricky bits of code to handle the accurate clock support
400 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
401 * They were originally developed for SUN and DEC kernels.
402 * All the kudos should go to Dave for this stuff.
403 */
404void second_overflow(void)
405{
406 s64 delta;
443 407
444 /* Bump the maxerror field */ 408 /* Bump the maxerror field */
445 time_maxerror += MAXFREQ / NSEC_PER_USEC; 409 time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -459,32 +423,30 @@ int second_overflow(unsigned long secs)
459 pps_dec_valid(); 423 pps_dec_valid();
460 424
461 if (!time_adjust) 425 if (!time_adjust)
462 goto out; 426 return;
463 427
464 if (time_adjust > MAX_TICKADJ) { 428 if (time_adjust > MAX_TICKADJ) {
465 time_adjust -= MAX_TICKADJ; 429 time_adjust -= MAX_TICKADJ;
466 tick_length += MAX_TICKADJ_SCALED; 430 tick_length += MAX_TICKADJ_SCALED;
467 goto out; 431 return;
468 } 432 }
469 433
470 if (time_adjust < -MAX_TICKADJ) { 434 if (time_adjust < -MAX_TICKADJ) {
471 time_adjust += MAX_TICKADJ; 435 time_adjust += MAX_TICKADJ;
472 tick_length -= MAX_TICKADJ_SCALED; 436 tick_length -= MAX_TICKADJ_SCALED;
473 goto out; 437 return;
474 } 438 }
475 439
476 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ) 440 tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
477 << NTP_SCALE_SHIFT; 441 << NTP_SCALE_SHIFT;
478 time_adjust = 0; 442 time_adjust = 0;
479
480out:
481 spin_unlock_irqrestore(&ntp_lock, flags);
482
483 return leap;
484} 443}
485 444
486#ifdef CONFIG_GENERIC_CMOS_UPDATE 445#ifdef CONFIG_GENERIC_CMOS_UPDATE
487 446
447/* Disable the cmos update - used by virtualization and embedded */
448int no_sync_cmos_clock __read_mostly;
449
488static void sync_cmos_clock(struct work_struct *work); 450static void sync_cmos_clock(struct work_struct *work);
489 451
490static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); 452static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -531,13 +493,35 @@ static void sync_cmos_clock(struct work_struct *work)
531 493
532static void notify_cmos_timer(void) 494static void notify_cmos_timer(void)
533{ 495{
534 schedule_delayed_work(&sync_cmos_work, 0); 496 if (!no_sync_cmos_clock)
497 schedule_delayed_work(&sync_cmos_work, 0);
535} 498}
536 499
537#else 500#else
538static inline void notify_cmos_timer(void) { } 501static inline void notify_cmos_timer(void) { }
539#endif 502#endif
540 503
504/*
505 * Start the leap seconds timer:
506 */
507static inline void ntp_start_leap_timer(struct timespec *ts)
508{
509 long now = ts->tv_sec;
510
511 if (time_status & STA_INS) {
512 time_state = TIME_INS;
513 now += 86400 - now % 86400;
514 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
515
516 return;
517 }
518
519 if (time_status & STA_DEL) {
520 time_state = TIME_DEL;
521 now += 86400 - (now + 1) % 86400;
522 hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
523 }
524}
541 525
542/* 526/*
543 * Propagate a new txc->status value into the NTP state: 527 * Propagate a new txc->status value into the NTP state:
@@ -561,10 +545,26 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
561 /* only set allowed bits */ 545 /* only set allowed bits */
562 time_status &= STA_RONLY; 546 time_status &= STA_RONLY;
563 time_status |= txc->status & ~STA_RONLY; 547 time_status |= txc->status & ~STA_RONLY;
564}
565 548
549 switch (time_state) {
550 case TIME_OK:
551 ntp_start_leap_timer(ts);
552 break;
553 case TIME_INS:
554 case TIME_DEL:
555 time_state = TIME_OK;
556 ntp_start_leap_timer(ts);
557 case TIME_WAIT:
558 if (!(time_status & (STA_INS | STA_DEL)))
559 time_state = TIME_OK;
560 break;
561 case TIME_OOP:
562 hrtimer_restart(&leap_timer);
563 break;
564 }
565}
566/* 566/*
567 * Called with ntp_lock held, so we can access and modify 567 * Called with the xtime lock held, so we can access and modify
568 * all the global NTP state: 568 * all the global NTP state:
569 */ 569 */
570static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts) 570static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
@@ -643,6 +643,9 @@ int do_adjtimex(struct timex *txc)
643 (txc->tick < 900000/USER_HZ || 643 (txc->tick < 900000/USER_HZ ||
644 txc->tick > 1100000/USER_HZ)) 644 txc->tick > 1100000/USER_HZ))
645 return -EINVAL; 645 return -EINVAL;
646
647 if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
648 hrtimer_cancel(&leap_timer);
646 } 649 }
647 650
648 if (txc->modes & ADJ_SETOFFSET) { 651 if (txc->modes & ADJ_SETOFFSET) {
@@ -660,7 +663,7 @@ int do_adjtimex(struct timex *txc)
660 663
661 getnstimeofday(&ts); 664 getnstimeofday(&ts);
662 665
663 spin_lock_irq(&ntp_lock); 666 write_seqlock_irq(&xtime_lock);
664 667
665 if (txc->modes & ADJ_ADJTIME) { 668 if (txc->modes & ADJ_ADJTIME) {
666 long save_adjust = time_adjust; 669 long save_adjust = time_adjust;
@@ -702,7 +705,7 @@ int do_adjtimex(struct timex *txc)
702 /* fill PPS status fields */ 705 /* fill PPS status fields */
703 pps_fill_timex(txc); 706 pps_fill_timex(txc);
704 707
705 spin_unlock_irq(&ntp_lock); 708 write_sequnlock_irq(&xtime_lock);
706 709
707 txc->time.tv_sec = ts.tv_sec; 710 txc->time.tv_sec = ts.tv_sec;
708 txc->time.tv_usec = ts.tv_nsec; 711 txc->time.tv_usec = ts.tv_nsec;
@@ -900,7 +903,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
900 903
901 pts_norm = pps_normalize_ts(*phase_ts); 904 pts_norm = pps_normalize_ts(*phase_ts);
902 905
903 spin_lock_irqsave(&ntp_lock, flags); 906 write_seqlock_irqsave(&xtime_lock, flags);
904 907
905 /* clear the error bits, they will be set again if needed */ 908 /* clear the error bits, they will be set again if needed */
906 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR); 909 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
@@ -913,7 +916,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
913 * just start the frequency interval */ 916 * just start the frequency interval */
914 if (unlikely(pps_fbase.tv_sec == 0)) { 917 if (unlikely(pps_fbase.tv_sec == 0)) {
915 pps_fbase = *raw_ts; 918 pps_fbase = *raw_ts;
916 spin_unlock_irqrestore(&ntp_lock, flags); 919 write_sequnlock_irqrestore(&xtime_lock, flags);
917 return; 920 return;
918 } 921 }
919 922
@@ -928,7 +931,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
928 time_status |= STA_PPSJITTER; 931 time_status |= STA_PPSJITTER;
929 /* restart the frequency calibration interval */ 932 /* restart the frequency calibration interval */
930 pps_fbase = *raw_ts; 933 pps_fbase = *raw_ts;
931 spin_unlock_irqrestore(&ntp_lock, flags); 934 write_sequnlock_irqrestore(&xtime_lock, flags);
932 pr_err("hardpps: PPSJITTER: bad pulse\n"); 935 pr_err("hardpps: PPSJITTER: bad pulse\n");
933 return; 936 return;
934 } 937 }
@@ -945,7 +948,7 @@ void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
945 948
946 hardpps_update_phase(pts_norm.nsec); 949 hardpps_update_phase(pts_norm.nsec);
947 950
948 spin_unlock_irqrestore(&ntp_lock, flags); 951 write_sequnlock_irqrestore(&xtime_lock, flags);
949} 952}
950EXPORT_SYMBOL(hardpps); 953EXPORT_SYMBOL(hardpps);
951 954
@@ -964,4 +967,6 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
964void __init ntp_init(void) 967void __init ntp_init(void)
965{ 968{
966 ntp_clear(); 969 ntp_clear();
970 hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
971 leap_timer.function = ntp_leap_second;
967} 972}
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7aa2e..c340ca658f3 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -18,7 +18,6 @@
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20#include <linux/device.h> 20#include <linux/device.h>
21#include <linux/export.h>
22#include <linux/file.h> 21#include <linux/file.h>
23#include <linux/posix-clock.h> 22#include <linux/posix-clock.h>
24#include <linux/slab.h> 23#include <linux/slab.h>
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f113755695e..7a90d021b79 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -194,7 +194,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
194 for (next = dev->next_event; ;) { 194 for (next = dev->next_event; ;) {
195 next = ktime_add(next, tick_period); 195 next = ktime_add(next, tick_period);
196 196
197 if (!clockevents_program_event(dev, next, false)) 197 if (!clockevents_program_event(dev, next, ktime_get()))
198 return; 198 return;
199 tick_do_periodic_broadcast(); 199 tick_do_periodic_broadcast();
200 } 200 }
@@ -346,8 +346,7 @@ int tick_resume_broadcast(void)
346 tick_get_broadcast_mask()); 346 tick_get_broadcast_mask());
347 break; 347 break;
348 case TICKDEV_MODE_ONESHOT: 348 case TICKDEV_MODE_ONESHOT:
349 if (!cpumask_empty(tick_get_broadcast_mask())) 349 broadcast = tick_resume_broadcast_oneshot(bc);
350 broadcast = tick_resume_broadcast_oneshot(bc);
351 break; 350 break;
352 } 351 }
353 } 352 }
@@ -374,10 +373,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
374{ 373{
375 struct clock_event_device *bc = tick_broadcast_device.evtdev; 374 struct clock_event_device *bc = tick_broadcast_device.evtdev;
376 375
377 if (bc->mode != CLOCK_EVT_MODE_ONESHOT) 376 return tick_dev_program_event(bc, expires, force);
378 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
379
380 return clockevents_program_event(bc, expires, force);
381} 377}
382 378
383int tick_resume_broadcast_oneshot(struct clock_event_device *bc) 379int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -535,6 +531,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
535 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; 531 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
536 532
537 bc->event_handler = tick_handle_oneshot_broadcast; 533 bc->event_handler = tick_handle_oneshot_broadcast;
534 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
538 535
539 /* Take the do_timer update */ 536 /* Take the do_timer update */
540 tick_do_timer_cpu = cpu; 537 tick_do_timer_cpu = cpu;
@@ -552,7 +549,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
552 to_cpumask(tmpmask)); 549 to_cpumask(tmpmask));
553 550
554 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) { 551 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
555 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
556 tick_broadcast_init_next_event(to_cpumask(tmpmask), 552 tick_broadcast_init_next_event(to_cpumask(tmpmask),
557 tick_next_period); 553 tick_next_period);
558 tick_broadcast_set_event(tick_next_period, 1); 554 tick_broadcast_set_event(tick_next_period, 1);
@@ -584,7 +580,6 @@ void tick_broadcast_switch_to_oneshot(void)
584 bc = tick_broadcast_device.evtdev; 580 bc = tick_broadcast_device.evtdev;
585 if (bc) 581 if (bc)
586 tick_broadcast_setup_oneshot(bc); 582 tick_broadcast_setup_oneshot(bc);
587
588 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 583 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
589} 584}
590 585
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b1600a6973f..119528de823 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -63,13 +63,13 @@ int tick_is_oneshot_available(void)
63static void tick_periodic(int cpu) 63static void tick_periodic(int cpu)
64{ 64{
65 if (tick_do_timer_cpu == cpu) { 65 if (tick_do_timer_cpu == cpu) {
66 write_seqlock(&jiffies_lock); 66 write_seqlock(&xtime_lock);
67 67
68 /* Keep track of the next tick event */ 68 /* Keep track of the next tick event */
69 tick_next_period = ktime_add(tick_next_period, tick_period); 69 tick_next_period = ktime_add(tick_next_period, tick_period);
70 70
71 do_timer(1); 71 do_timer(1);
72 write_sequnlock(&jiffies_lock); 72 write_sequnlock(&xtime_lock);
73 } 73 }
74 74
75 update_process_times(user_mode(get_irq_regs())); 75 update_process_times(user_mode(get_irq_regs()));
@@ -94,7 +94,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
94 */ 94 */
95 next = ktime_add(dev->next_event, tick_period); 95 next = ktime_add(dev->next_event, tick_period);
96 for (;;) { 96 for (;;) {
97 if (!clockevents_program_event(dev, next, false)) 97 if (!clockevents_program_event(dev, next, ktime_get()))
98 return; 98 return;
99 /* 99 /*
100 * Have to be careful here. If we're in oneshot mode, 100 * Have to be careful here. If we're in oneshot mode,
@@ -130,14 +130,14 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
130 ktime_t next; 130 ktime_t next;
131 131
132 do { 132 do {
133 seq = read_seqbegin(&jiffies_lock); 133 seq = read_seqbegin(&xtime_lock);
134 next = tick_next_period; 134 next = tick_next_period;
135 } while (read_seqretry(&jiffies_lock, seq)); 135 } while (read_seqretry(&xtime_lock, seq));
136 136
137 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 137 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
138 138
139 for (;;) { 139 for (;;) {
140 if (!clockevents_program_event(dev, next, false)) 140 if (!clockevents_program_event(dev, next, ktime_get()))
141 return; 141 return;
142 next = ktime_add(next, tick_period); 142 next = ktime_add(next, tick_period);
143 } 143 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index cf3e59ed6dc..1009b06d6f8 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -26,6 +26,8 @@ extern void clockevents_shutdown(struct clock_event_device *dev);
26extern void tick_setup_oneshot(struct clock_event_device *newdev, 26extern void tick_setup_oneshot(struct clock_event_device *newdev,
27 void (*handler)(struct clock_event_device *), 27 void (*handler)(struct clock_event_device *),
28 ktime_t nextevt); 28 ktime_t nextevt);
29extern int tick_dev_program_event(struct clock_event_device *dev,
30 ktime_t expires, int force);
29extern int tick_program_event(ktime_t expires, int force); 31extern int tick_program_event(ktime_t expires, int force);
30extern void tick_oneshot_notify(void); 32extern void tick_oneshot_notify(void);
31extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); 33extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
@@ -141,3 +143,4 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
141#endif 143#endif
142 144
143extern void do_timer(unsigned long ticks); 145extern void do_timer(unsigned long ticks);
146extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 824109060a3..2d04411a5f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -21,6 +21,74 @@
21 21
22#include "tick-internal.h" 22#include "tick-internal.h"
23 23
24/* Limit min_delta to a jiffie */
25#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
26
27static int tick_increase_min_delta(struct clock_event_device *dev)
28{
29 /* Nothing to do if we already reached the limit */
30 if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
31 return -ETIME;
32
33 if (dev->min_delta_ns < 5000)
34 dev->min_delta_ns = 5000;
35 else
36 dev->min_delta_ns += dev->min_delta_ns >> 1;
37
38 if (dev->min_delta_ns > MIN_DELTA_LIMIT)
39 dev->min_delta_ns = MIN_DELTA_LIMIT;
40
41 printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
42 dev->name ? dev->name : "?",
43 (unsigned long long) dev->min_delta_ns);
44 return 0;
45}
46
47/**
48 * tick_program_event internal worker function
49 */
50int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
51 int force)
52{
53 ktime_t now = ktime_get();
54 int i;
55
56 for (i = 0;;) {
57 int ret = clockevents_program_event(dev, expires, now);
58
59 if (!ret || !force)
60 return ret;
61
62 dev->retries++;
63 /*
64 * We tried 3 times to program the device with the given
65 * min_delta_ns. If that's not working then we increase it
66 * and emit a warning.
67 */
68 if (++i > 2) {
69 /* Increase the min. delta and try again */
70 if (tick_increase_min_delta(dev)) {
71 /*
72 * Get out of the loop if min_delta_ns
73 * hit the limit already. That's
74 * better than staying here forever.
75 *
76 * We clear next_event so we have a
77 * chance that the box survives.
78 */
79 printk(KERN_WARNING
80 "CE: Reprogramming failure. Giving up\n");
81 dev->next_event.tv64 = KTIME_MAX;
82 return -ETIME;
83 }
84 i = 0;
85 }
86
87 now = ktime_get();
88 expires = ktime_add_ns(now, dev->min_delta_ns);
89 }
90}
91
24/** 92/**
25 * tick_program_event 93 * tick_program_event
26 */ 94 */
@@ -28,7 +96,7 @@ int tick_program_event(ktime_t expires, int force)
28{ 96{
29 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 97 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
30 98
31 return clockevents_program_event(dev, expires, force); 99 return tick_dev_program_event(dev, expires, force);
32} 100}
33 101
34/** 102/**
@@ -36,10 +104,11 @@ int tick_program_event(ktime_t expires, int force)
36 */ 104 */
37void tick_resume_oneshot(void) 105void tick_resume_oneshot(void)
38{ 106{
39 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 107 struct tick_device *td = &__get_cpu_var(tick_cpu_device);
108 struct clock_event_device *dev = td->evtdev;
40 109
41 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); 110 clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
42 clockevents_program_event(dev, ktime_get(), true); 111 tick_program_event(ktime_get(), 1);
43} 112}
44 113
45/** 114/**
@@ -51,7 +120,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
51{ 120{
52 newdev->event_handler = handler; 121 newdev->event_handler = handler;
53 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); 122 clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
54 clockevents_program_event(newdev, next_event, true); 123 tick_dev_program_event(newdev, next_event, 1);
55} 124}
56 125
57/** 126/**
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d58e552d9fd..d5097c44b40 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -31,7 +31,7 @@
31static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); 31static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
32 32
33/* 33/*
34 * The time, when the last jiffy update happened. Protected by jiffies_lock. 34 * The time, when the last jiffy update happened. Protected by xtime_lock.
35 */ 35 */
36static ktime_t last_jiffies_update; 36static ktime_t last_jiffies_update;
37 37
@@ -49,14 +49,14 @@ static void tick_do_update_jiffies64(ktime_t now)
49 ktime_t delta; 49 ktime_t delta;
50 50
51 /* 51 /*
52 * Do a quick check without holding jiffies_lock: 52 * Do a quick check without holding xtime_lock:
53 */ 53 */
54 delta = ktime_sub(now, last_jiffies_update); 54 delta = ktime_sub(now, last_jiffies_update);
55 if (delta.tv64 < tick_period.tv64) 55 if (delta.tv64 < tick_period.tv64)
56 return; 56 return;
57 57
58 /* Reevalute with jiffies_lock held */ 58 /* Reevalute with xtime_lock held */
59 write_seqlock(&jiffies_lock); 59 write_seqlock(&xtime_lock);
60 60
61 delta = ktime_sub(now, last_jiffies_update); 61 delta = ktime_sub(now, last_jiffies_update);
62 if (delta.tv64 >= tick_period.tv64) { 62 if (delta.tv64 >= tick_period.tv64) {
@@ -79,7 +79,7 @@ static void tick_do_update_jiffies64(ktime_t now)
79 /* Keep the tick_next_period variable up to date */ 79 /* Keep the tick_next_period variable up to date */
80 tick_next_period = ktime_add(last_jiffies_update, tick_period); 80 tick_next_period = ktime_add(last_jiffies_update, tick_period);
81 } 81 }
82 write_sequnlock(&jiffies_lock); 82 write_sequnlock(&xtime_lock);
83} 83}
84 84
85/* 85/*
@@ -89,58 +89,15 @@ static ktime_t tick_init_jiffy_update(void)
89{ 89{
90 ktime_t period; 90 ktime_t period;
91 91
92 write_seqlock(&jiffies_lock); 92 write_seqlock(&xtime_lock);
93 /* Did we start the jiffies update yet ? */ 93 /* Did we start the jiffies update yet ? */
94 if (last_jiffies_update.tv64 == 0) 94 if (last_jiffies_update.tv64 == 0)
95 last_jiffies_update = tick_next_period; 95 last_jiffies_update = tick_next_period;
96 period = last_jiffies_update; 96 period = last_jiffies_update;
97 write_sequnlock(&jiffies_lock); 97 write_sequnlock(&xtime_lock);
98 return period; 98 return period;
99} 99}
100 100
101
102static void tick_sched_do_timer(ktime_t now)
103{
104 int cpu = smp_processor_id();
105
106#ifdef CONFIG_NO_HZ
107 /*
108 * Check if the do_timer duty was dropped. We don't care about
109 * concurrency: This happens only when the cpu in charge went
110 * into a long sleep. If two cpus happen to assign themself to
111 * this duty, then the jiffies update is still serialized by
112 * jiffies_lock.
113 */
114 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
115 tick_do_timer_cpu = cpu;
116#endif
117
118 /* Check, if the jiffies need an update */
119 if (tick_do_timer_cpu == cpu)
120 tick_do_update_jiffies64(now);
121}
122
123static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
124{
125#ifdef CONFIG_NO_HZ
126 /*
127 * When we are idle and the tick is stopped, we have to touch
128 * the watchdog as we might not schedule for a really long
129 * time. This happens on complete idle SMP systems while
130 * waiting on the login prompt. We also increment the "start of
131 * idle" jiffy stamp so the idle accounting adjustment we do
132 * when we go busy again does not account too much ticks.
133 */
134 if (ts->tick_stopped) {
135 touch_softlockup_watchdog();
136 if (is_idle_task(current))
137 ts->idle_jiffies++;
138 }
139#endif
140 update_process_times(user_mode(regs));
141 profile_tick(CPU_PROFILING);
142}
143
144/* 101/*
145 * NOHZ - aka dynamic tick functionality 102 * NOHZ - aka dynamic tick functionality
146 */ 103 */
@@ -148,7 +105,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
148/* 105/*
149 * NO HZ enabled ? 106 * NO HZ enabled ?
150 */ 107 */
151int tick_nohz_enabled __read_mostly = 1; 108static int tick_nohz_enabled __read_mostly = 1;
152 109
153/* 110/*
154 * Enable / Disable tickless mode 111 * Enable / Disable tickless mode
@@ -182,6 +139,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
182 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 139 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
183 unsigned long flags; 140 unsigned long flags;
184 141
142 cpumask_clear_cpu(cpu, nohz_cpu_mask);
185 ts->idle_waketime = now; 143 ts->idle_waketime = now;
186 144
187 local_irq_save(flags); 145 local_irq_save(flags);
@@ -201,10 +159,9 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
201 159
202 if (ts->idle_active) { 160 if (ts->idle_active) {
203 delta = ktime_sub(now, ts->idle_entrytime); 161 delta = ktime_sub(now, ts->idle_entrytime);
162 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
204 if (nr_iowait_cpu(cpu) > 0) 163 if (nr_iowait_cpu(cpu) > 0)
205 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta); 164 ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
206 else
207 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
208 ts->idle_entrytime = now; 165 ts->idle_entrytime = now;
209 } 166 }
210 167
@@ -225,7 +182,11 @@ static void tick_nohz_stop_idle(int cpu, ktime_t now)
225 182
226static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts) 183static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
227{ 184{
228 ktime_t now = ktime_get(); 185 ktime_t now;
186
187 now = ktime_get();
188
189 update_ts_time_stats(cpu, ts, now, NULL);
229 190
230 ts->idle_entrytime = now; 191 ts->idle_entrytime = now;
231 ts->idle_active = 1; 192 ts->idle_active = 1;
@@ -236,11 +197,11 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
236/** 197/**
237 * get_cpu_idle_time_us - get the total idle time of a cpu 198 * get_cpu_idle_time_us - get the total idle time of a cpu
238 * @cpu: CPU number to query 199 * @cpu: CPU number to query
239 * @last_update_time: variable to store update time in. Do not update 200 * @last_update_time: variable to store update time in
240 * counters if NULL.
241 * 201 *
242 * Return the cummulative idle time (since boot) for a given 202 * Return the cummulative idle time (since boot) for a given
243 * CPU, in microseconds. 203 * CPU, in microseconds. The idle time returned includes
204 * the iowait time (unlike what "top" and co report).
244 * 205 *
245 * This time is measured via accounting rather than sampling, 206 * This time is measured via accounting rather than sampling,
246 * and is as accurate as ktime_get() is. 207 * and is as accurate as ktime_get() is.
@@ -250,35 +211,20 @@ static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
250u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) 211u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
251{ 212{
252 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 213 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
253 ktime_t now, idle;
254 214
255 if (!tick_nohz_enabled) 215 if (!tick_nohz_enabled)
256 return -1; 216 return -1;
257 217
258 now = ktime_get(); 218 update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
259 if (last_update_time) {
260 update_ts_time_stats(cpu, ts, now, last_update_time);
261 idle = ts->idle_sleeptime;
262 } else {
263 if (ts->idle_active && !nr_iowait_cpu(cpu)) {
264 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
265
266 idle = ktime_add(ts->idle_sleeptime, delta);
267 } else {
268 idle = ts->idle_sleeptime;
269 }
270 }
271
272 return ktime_to_us(idle);
273 219
220 return ktime_to_us(ts->idle_sleeptime);
274} 221}
275EXPORT_SYMBOL_GPL(get_cpu_idle_time_us); 222EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
276 223
277/** 224/*
278 * get_cpu_iowait_time_us - get the total iowait time of a cpu 225 * get_cpu_iowait_time_us - get the total iowait time of a cpu
279 * @cpu: CPU number to query 226 * @cpu: CPU number to query
280 * @last_update_time: variable to store update time in. Do not update 227 * @last_update_time: variable to store update time in
281 * counters if NULL.
282 * 228 *
283 * Return the cummulative iowait time (since boot) for a given 229 * Return the cummulative iowait time (since boot) for a given
284 * CPU, in microseconds. 230 * CPU, in microseconds.
@@ -291,47 +237,93 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
291u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) 237u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
292{ 238{
293 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 239 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
294 ktime_t now, iowait;
295 240
296 if (!tick_nohz_enabled) 241 if (!tick_nohz_enabled)
297 return -1; 242 return -1;
298 243
299 now = ktime_get(); 244 update_ts_time_stats(cpu, ts, ktime_get(), last_update_time);
300 if (last_update_time) {
301 update_ts_time_stats(cpu, ts, now, last_update_time);
302 iowait = ts->iowait_sleeptime;
303 } else {
304 if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
305 ktime_t delta = ktime_sub(now, ts->idle_entrytime);
306
307 iowait = ktime_add(ts->iowait_sleeptime, delta);
308 } else {
309 iowait = ts->iowait_sleeptime;
310 }
311 }
312 245
313 return ktime_to_us(iowait); 246 return ktime_to_us(ts->iowait_sleeptime);
314} 247}
315EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); 248EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
316 249
317static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 250/**
318 ktime_t now, int cpu) 251 * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
252 *
253 * When the next event is more than a tick into the future, stop the idle tick
254 * Called either from the idle loop or from irq_exit() when an idle period was
255 * just interrupted by an interrupt which did not cause a reschedule.
256 */
257void tick_nohz_stop_sched_tick(int inidle)
319{ 258{
320 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; 259 unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
321 ktime_t last_update, expires, ret = { .tv64 = 0 }; 260 struct tick_sched *ts;
322 unsigned long rcu_delta_jiffies; 261 ktime_t last_update, expires, now;
323 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 262 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
324 u64 time_delta; 263 u64 time_delta;
264 int cpu;
265
266 local_irq_save(flags);
267
268 cpu = smp_processor_id();
269 ts = &per_cpu(tick_cpu_sched, cpu);
270
271 /*
272 * Call to tick_nohz_start_idle stops the last_update_time from being
273 * updated. Thus, it must not be called in the event we are called from
274 * irq_exit() with the prior state different than idle.
275 */
276 if (!inidle && !ts->inidle)
277 goto end;
278
279 /*
280 * Set ts->inidle unconditionally. Even if the system did not
281 * switch to NOHZ mode the cpu frequency governers rely on the
282 * update of the idle time accounting in tick_nohz_start_idle().
283 */
284 ts->inidle = 1;
325 285
286 now = tick_nohz_start_idle(cpu, ts);
287
288 /*
289 * If this cpu is offline and it is the one which updates
290 * jiffies, then give up the assignment and let it be taken by
291 * the cpu which runs the tick timer next. If we don't drop
292 * this here the jiffies might be stale and do_timer() never
293 * invoked.
294 */
295 if (unlikely(!cpu_online(cpu))) {
296 if (cpu == tick_do_timer_cpu)
297 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
298 }
299
300 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
301 goto end;
302
303 if (need_resched())
304 goto end;
305
306 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
307 static int ratelimit;
308
309 if (ratelimit < 10) {
310 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
311 (unsigned int) local_softirq_pending());
312 ratelimit++;
313 }
314 goto end;
315 }
316
317 ts->idle_calls++;
326 /* Read jiffies and the time when jiffies were updated last */ 318 /* Read jiffies and the time when jiffies were updated last */
327 do { 319 do {
328 seq = read_seqbegin(&jiffies_lock); 320 seq = read_seqbegin(&xtime_lock);
329 last_update = last_jiffies_update; 321 last_update = last_jiffies_update;
330 last_jiffies = jiffies; 322 last_jiffies = jiffies;
331 time_delta = timekeeping_max_deferment(); 323 time_delta = timekeeping_max_deferment();
332 } while (read_seqretry(&jiffies_lock, seq)); 324 } while (read_seqretry(&xtime_lock, seq));
333 325
334 if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || 326 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
335 arch_needs_cpu(cpu)) { 327 arch_needs_cpu(cpu)) {
336 next_jiffies = last_jiffies + 1; 328 next_jiffies = last_jiffies + 1;
337 delta_jiffies = 1; 329 delta_jiffies = 1;
@@ -339,10 +331,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
339 /* Get the next timer wheel timer */ 331 /* Get the next timer wheel timer */
340 next_jiffies = get_next_timer_interrupt(last_jiffies); 332 next_jiffies = get_next_timer_interrupt(last_jiffies);
341 delta_jiffies = next_jiffies - last_jiffies; 333 delta_jiffies = next_jiffies - last_jiffies;
342 if (rcu_delta_jiffies < delta_jiffies) {
343 next_jiffies = last_jiffies + rcu_delta_jiffies;
344 delta_jiffies = rcu_delta_jiffies;
345 }
346 } 334 }
347 /* 335 /*
348 * Do not stop the tick, if we are only one off 336 * Do not stop the tick, if we are only one off
@@ -401,12 +389,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
401 else 389 else
402 expires.tv64 = KTIME_MAX; 390 expires.tv64 = KTIME_MAX;
403 391
392 if (delta_jiffies > 1)
393 cpumask_set_cpu(cpu, nohz_cpu_mask);
394
404 /* Skip reprogram of event if its not changed */ 395 /* Skip reprogram of event if its not changed */
405 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 396 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
406 goto out; 397 goto out;
407 398
408 ret = expires;
409
410 /* 399 /*
411 * nohz_stop_sched_tick can be called several times before 400 * nohz_stop_sched_tick can be called several times before
412 * the nohz_restart_sched_tick is called. This happens when 401 * the nohz_restart_sched_tick is called. This happens when
@@ -415,13 +404,19 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
415 * the scheduler tick in nohz_restart_sched_tick. 404 * the scheduler tick in nohz_restart_sched_tick.
416 */ 405 */
417 if (!ts->tick_stopped) { 406 if (!ts->tick_stopped) {
418 nohz_balance_enter_idle(cpu); 407 select_nohz_load_balancer(1);
419 calc_load_enter_idle();
420 408
421 ts->last_tick = hrtimer_get_expires(&ts->sched_timer); 409 ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
422 ts->tick_stopped = 1; 410 ts->tick_stopped = 1;
411 ts->idle_jiffies = last_jiffies;
412 rcu_enter_nohz();
423 } 413 }
424 414
415 ts->idle_sleeps++;
416
417 /* Mark expires */
418 ts->idle_expires = expires;
419
425 /* 420 /*
426 * If the expiration time == KTIME_MAX, then 421 * If the expiration time == KTIME_MAX, then
427 * in this case we simply stop the tick timer. 422 * in this case we simply stop the tick timer.
@@ -446,132 +441,15 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
446 * softirq. 441 * softirq.
447 */ 442 */
448 tick_do_update_jiffies64(ktime_get()); 443 tick_do_update_jiffies64(ktime_get());
444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
449 } 445 }
450 raise_softirq_irqoff(TIMER_SOFTIRQ); 446 raise_softirq_irqoff(TIMER_SOFTIRQ);
451out: 447out:
452 ts->next_jiffies = next_jiffies; 448 ts->next_jiffies = next_jiffies;
453 ts->last_jiffies = last_jiffies; 449 ts->last_jiffies = last_jiffies;
454 ts->sleep_length = ktime_sub(dev->next_event, now); 450 ts->sleep_length = ktime_sub(dev->next_event, now);
455 451end:
456 return ret; 452 local_irq_restore(flags);
457}
458
459static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
460{
461 /*
462 * If this cpu is offline and it is the one which updates
463 * jiffies, then give up the assignment and let it be taken by
464 * the cpu which runs the tick timer next. If we don't drop
465 * this here the jiffies might be stale and do_timer() never
466 * invoked.
467 */
468 if (unlikely(!cpu_online(cpu))) {
469 if (cpu == tick_do_timer_cpu)
470 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
471 }
472
473 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
474 return false;
475
476 if (need_resched())
477 return false;
478
479 if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
480 static int ratelimit;
481
482 if (ratelimit < 10 &&
483 (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) {
484 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
485 (unsigned int) local_softirq_pending());
486 ratelimit++;
487 }
488 return false;
489 }
490
491 return true;
492}
493
494static void __tick_nohz_idle_enter(struct tick_sched *ts)
495{
496 ktime_t now, expires;
497 int cpu = smp_processor_id();
498
499 now = tick_nohz_start_idle(cpu, ts);
500
501 if (can_stop_idle_tick(cpu, ts)) {
502 int was_stopped = ts->tick_stopped;
503
504 ts->idle_calls++;
505
506 expires = tick_nohz_stop_sched_tick(ts, now, cpu);
507 if (expires.tv64 > 0LL) {
508 ts->idle_sleeps++;
509 ts->idle_expires = expires;
510 }
511
512 if (!was_stopped && ts->tick_stopped)
513 ts->idle_jiffies = ts->last_jiffies;
514 }
515}
516
517/**
518 * tick_nohz_idle_enter - stop the idle tick from the idle task
519 *
520 * When the next event is more than a tick into the future, stop the idle tick
521 * Called when we start the idle loop.
522 *
523 * The arch is responsible of calling:
524 *
525 * - rcu_idle_enter() after its last use of RCU before the CPU is put
526 * to sleep.
527 * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
528 */
529void tick_nohz_idle_enter(void)
530{
531 struct tick_sched *ts;
532
533 WARN_ON_ONCE(irqs_disabled());
534
535 /*
536 * Update the idle state in the scheduler domain hierarchy
537 * when tick_nohz_stop_sched_tick() is called from the idle loop.
538 * State will be updated to busy during the first busy tick after
539 * exiting idle.
540 */
541 set_cpu_sd_state_idle();
542
543 local_irq_disable();
544
545 ts = &__get_cpu_var(tick_cpu_sched);
546 /*
547 * set ts->inidle unconditionally. even if the system did not
548 * switch to nohz mode the cpu frequency governers rely on the
549 * update of the idle time accounting in tick_nohz_start_idle().
550 */
551 ts->inidle = 1;
552 __tick_nohz_idle_enter(ts);
553
554 local_irq_enable();
555}
556
557/**
558 * tick_nohz_irq_exit - update next tick event from interrupt exit
559 *
560 * When an interrupt fires while we are idle and it doesn't cause
561 * a reschedule, it may still add, modify or delete a timer, enqueue
562 * an RCU callback, etc...
563 * So we need to re-calculate and reprogram the next tick event.
564 */
565void tick_nohz_irq_exit(void)
566{
567 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
568
569 if (!ts->inidle)
570 return;
571
572 /* Cancel the timer because CPU already waken up from the C-states*/
573 menu_hrtimer_cancel();
574 __tick_nohz_idle_enter(ts);
575} 453}
576 454
577/** 455/**
@@ -589,7 +467,7 @@ ktime_t tick_nohz_get_sleep_length(void)
589static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) 467static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
590{ 468{
591 hrtimer_cancel(&ts->sched_timer); 469 hrtimer_cancel(&ts->sched_timer);
592 hrtimer_set_expires(&ts->sched_timer, ts->last_tick); 470 hrtimer_set_expires(&ts->sched_timer, ts->idle_tick);
593 471
594 while (1) { 472 while (1) {
595 /* Forward the time to expire in the future */ 473 /* Forward the time to expire in the future */
@@ -606,33 +484,49 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
606 hrtimer_get_expires(&ts->sched_timer), 0)) 484 hrtimer_get_expires(&ts->sched_timer), 0))
607 break; 485 break;
608 } 486 }
609 /* Reread time and update jiffies */ 487 /* Update jiffies and reread time */
610 now = ktime_get();
611 tick_do_update_jiffies64(now); 488 tick_do_update_jiffies64(now);
489 now = ktime_get();
612 } 490 }
613} 491}
614 492
615static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) 493/**
494 * tick_nohz_restart_sched_tick - restart the idle tick from the idle task
495 *
496 * Restart the idle tick when the CPU is woken up from idle
497 */
498void tick_nohz_restart_sched_tick(void)
616{ 499{
617 /* Update jiffies first */ 500 int cpu = smp_processor_id();
618 tick_do_update_jiffies64(now); 501 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
619 update_cpu_load_nohz(); 502#ifndef CONFIG_VIRT_CPU_ACCOUNTING
503 unsigned long ticks;
504#endif
505 ktime_t now;
620 506
621 calc_load_exit_idle(); 507 local_irq_disable();
622 touch_softlockup_watchdog(); 508 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
623 /* 509 now = ktime_get();
624 * Cancel the scheduled timer and restore the tick
625 */
626 ts->tick_stopped = 0;
627 ts->idle_exittime = now;
628 510
629 tick_nohz_restart(ts, now); 511 if (ts->idle_active)
630} 512 tick_nohz_stop_idle(cpu, now);
513
514 if (!ts->inidle || !ts->tick_stopped) {
515 ts->inidle = 0;
516 local_irq_enable();
517 return;
518 }
519
520 ts->inidle = 0;
521
522 rcu_exit_nohz();
523
524 /* Update jiffies first */
525 select_nohz_load_balancer(0);
526 tick_do_update_jiffies64(now);
527 cpumask_clear_cpu(cpu, nohz_cpu_mask);
631 528
632static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
633{
634#ifndef CONFIG_VIRT_CPU_ACCOUNTING 529#ifndef CONFIG_VIRT_CPU_ACCOUNTING
635 unsigned long ticks;
636 /* 530 /*
637 * We stopped the tick in idle. Update process times would miss the 531 * We stopped the tick in idle. Update process times would miss the
638 * time we slept as update_process_times does only a 1 tick 532 * time we slept as update_process_times does only a 1 tick
@@ -645,39 +539,15 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
645 if (ticks && ticks < LONG_MAX) 539 if (ticks && ticks < LONG_MAX)
646 account_idle_ticks(ticks); 540 account_idle_ticks(ticks);
647#endif 541#endif
648}
649 542
650/** 543 touch_softlockup_watchdog();
651 * tick_nohz_idle_exit - restart the idle tick from the idle task 544 /*
652 * 545 * Cancel the scheduled timer and restore the tick
653 * Restart the idle tick when the CPU is woken up from idle 546 */
654 * This also exit the RCU extended quiescent state. The CPU 547 ts->tick_stopped = 0;
655 * can use RCU again after this function is called. 548 ts->idle_exittime = now;
656 */
657void tick_nohz_idle_exit(void)
658{
659 int cpu = smp_processor_id();
660 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
661 ktime_t now;
662
663 local_irq_disable();
664
665 WARN_ON_ONCE(!ts->inidle);
666
667 ts->inidle = 0;
668
669 /* Cancel the timer because CPU already waken up from the C-states*/
670 menu_hrtimer_cancel();
671 if (ts->idle_active || ts->tick_stopped)
672 now = ktime_get();
673
674 if (ts->idle_active)
675 tick_nohz_stop_idle(cpu, now);
676 549
677 if (ts->tick_stopped) { 550 tick_nohz_restart(ts, now);
678 tick_nohz_restart_sched_tick(ts, now);
679 tick_nohz_account_idle_ticks(ts);
680 }
681 551
682 local_irq_enable(); 552 local_irq_enable();
683} 553}
@@ -695,12 +565,40 @@ static void tick_nohz_handler(struct clock_event_device *dev)
695{ 565{
696 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 566 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
697 struct pt_regs *regs = get_irq_regs(); 567 struct pt_regs *regs = get_irq_regs();
568 int cpu = smp_processor_id();
698 ktime_t now = ktime_get(); 569 ktime_t now = ktime_get();
699 570
700 dev->next_event.tv64 = KTIME_MAX; 571 dev->next_event.tv64 = KTIME_MAX;
701 572
702 tick_sched_do_timer(now); 573 /*
703 tick_sched_handle(ts, regs); 574 * Check if the do_timer duty was dropped. We don't care about
575 * concurrency: This happens only when the cpu in charge went
576 * into a long sleep. If two cpus happen to assign themself to
577 * this duty, then the jiffies update is still serialized by
578 * xtime_lock.
579 */
580 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
581 tick_do_timer_cpu = cpu;
582
583 /* Check, if the jiffies need an update */
584 if (tick_do_timer_cpu == cpu)
585 tick_do_update_jiffies64(now);
586
587 /*
588 * When we are idle and the tick is stopped, we have to touch
589 * the watchdog as we might not schedule for a really long
590 * time. This happens on complete idle SMP systems while
591 * waiting on the login prompt. We also increment the "start
592 * of idle" jiffy stamp so the idle accounting adjustment we
593 * do when we go busy again does not account too much ticks.
594 */
595 if (ts->tick_stopped) {
596 touch_softlockup_watchdog();
597 ts->idle_jiffies++;
598 }
599
600 update_process_times(user_mode(regs));
601 profile_tick(CPU_PROFILING);
704 602
705 while (tick_nohz_reprogram(ts, now)) { 603 while (tick_nohz_reprogram(ts, now)) {
706 now = ktime_get(); 604 now = ktime_get();
@@ -742,6 +640,8 @@ static void tick_nohz_switch_to_nohz(void)
742 next = ktime_add(next, tick_period); 640 next = ktime_add(next, tick_period);
743 } 641 }
744 local_irq_enable(); 642 local_irq_enable();
643
644 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
745} 645}
746 646
747/* 647/*
@@ -813,7 +713,7 @@ void tick_check_idle(int cpu)
813#ifdef CONFIG_HIGH_RES_TIMERS 713#ifdef CONFIG_HIGH_RES_TIMERS
814/* 714/*
815 * We rearm the timer until we get disabled by the idle code. 715 * We rearm the timer until we get disabled by the idle code.
816 * Called with interrupts disabled. 716 * Called with interrupts disabled and timer->base->cpu_base->lock held.
817 */ 717 */
818static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) 718static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
819{ 719{
@@ -821,31 +721,50 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
821 container_of(timer, struct tick_sched, sched_timer); 721 container_of(timer, struct tick_sched, sched_timer);
822 struct pt_regs *regs = get_irq_regs(); 722 struct pt_regs *regs = get_irq_regs();
823 ktime_t now = ktime_get(); 723 ktime_t now = ktime_get();
724 int cpu = smp_processor_id();
824 725
825 tick_sched_do_timer(now); 726#ifdef CONFIG_NO_HZ
727 /*
728 * Check if the do_timer duty was dropped. We don't care about
729 * concurrency: This happens only when the cpu in charge went
730 * into a long sleep. If two cpus happen to assign themself to
731 * this duty, then the jiffies update is still serialized by
732 * xtime_lock.
733 */
734 if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
735 tick_do_timer_cpu = cpu;
736#endif
737
738 /* Check, if the jiffies need an update */
739 if (tick_do_timer_cpu == cpu)
740 tick_do_update_jiffies64(now);
826 741
827 /* 742 /*
828 * Do not call, when we are not in irq context and have 743 * Do not call, when we are not in irq context and have
829 * no valid regs pointer 744 * no valid regs pointer
830 */ 745 */
831 if (regs) 746 if (regs) {
832 tick_sched_handle(ts, regs); 747 /*
748 * When we are idle and the tick is stopped, we have to touch
749 * the watchdog as we might not schedule for a really long
750 * time. This happens on complete idle SMP systems while
751 * waiting on the login prompt. We also increment the "start of
752 * idle" jiffy stamp so the idle accounting adjustment we do
753 * when we go busy again does not account too much ticks.
754 */
755 if (ts->tick_stopped) {
756 touch_softlockup_watchdog();
757 ts->idle_jiffies++;
758 }
759 update_process_times(user_mode(regs));
760 profile_tick(CPU_PROFILING);
761 }
833 762
834 hrtimer_forward(timer, now, tick_period); 763 hrtimer_forward(timer, now, tick_period);
835 764
836 return HRTIMER_RESTART; 765 return HRTIMER_RESTART;
837} 766}
838 767
839static int sched_skew_tick;
840
841static int __init skew_tick(char *str)
842{
843 get_option(&str, &sched_skew_tick);
844
845 return 0;
846}
847early_param("skew_tick", skew_tick);
848
849/** 768/**
850 * tick_setup_sched_timer - setup the tick emulation timer 769 * tick_setup_sched_timer - setup the tick emulation timer
851 */ 770 */
@@ -863,14 +782,6 @@ void tick_setup_sched_timer(void)
863 /* Get the next period (per cpu) */ 782 /* Get the next period (per cpu) */
864 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 783 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
865 784
866 /* Offset the tick to avert jiffies_lock contention. */
867 if (sched_skew_tick) {
868 u64 offset = ktime_to_ns(tick_period) >> 1;
869 do_div(offset, num_possible_cpus());
870 offset *= smp_processor_id();
871 hrtimer_add_expires_ns(&ts->sched_timer, offset);
872 }
873
874 for (;;) { 785 for (;;) {
875 hrtimer_forward(&ts->sched_timer, now, tick_period); 786 hrtimer_forward(&ts->sched_timer, now, tick_period);
876 hrtimer_start_expires(&ts->sched_timer, 787 hrtimer_start_expires(&ts->sched_timer,
@@ -882,8 +793,10 @@ void tick_setup_sched_timer(void)
882 } 793 }
883 794
884#ifdef CONFIG_NO_HZ 795#ifdef CONFIG_NO_HZ
885 if (tick_nohz_enabled) 796 if (tick_nohz_enabled) {
886 ts->nohz_mode = NOHZ_MODE_HIGHRES; 797 ts->nohz_mode = NOHZ_MODE_HIGHRES;
798 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
799 }
887#endif 800#endif
888} 801}
889#endif /* HIGH_RES_TIMERS */ 802#endif /* HIGH_RES_TIMERS */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cbc6acb0db3..6f9798bf240 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -8,7 +8,6 @@
8 * 8 *
9 */ 9 */
10 10
11#include <linux/timekeeper_internal.h>
12#include <linux/module.h> 11#include <linux/module.h>
13#include <linux/interrupt.h> 12#include <linux/interrupt.h>
14#include <linux/percpu.h> 13#include <linux/percpu.h>
@@ -21,60 +20,37 @@
21#include <linux/time.h> 20#include <linux/time.h>
22#include <linux/tick.h> 21#include <linux/tick.h>
23#include <linux/stop_machine.h> 22#include <linux/stop_machine.h>
24#include <linux/pvclock_gtod.h>
25 23
24/* Structure holding internal timekeeping values. */
25struct timekeeper {
26 /* Current clocksource used for timekeeping. */
27 struct clocksource *clock;
28 /* The shift value of the current clocksource. */
29 int shift;
30
31 /* Number of clock cycles in one NTP interval. */
32 cycle_t cycle_interval;
33 /* Number of clock shifted nano seconds in one NTP interval. */
34 u64 xtime_interval;
35 /* shifted nano seconds left over when rounding cycle_interval */
36 s64 xtime_remainder;
37 /* Raw nano seconds accumulated per NTP interval. */
38 u32 raw_interval;
39
40 /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
41 u64 xtime_nsec;
42 /* Difference between accumulated time and NTP time in ntp
43 * shifted nano seconds. */
44 s64 ntp_error;
45 /* Shift conversion between clock shifted nano seconds and
46 * ntp shifted nano seconds. */
47 int ntp_error_shift;
48 /* NTP adjusted clock multiplier */
49 u32 mult;
50};
26 51
27static struct timekeeper timekeeper; 52static struct timekeeper timekeeper;
28 53
29/* flag for if timekeeping is suspended */
30int __read_mostly timekeeping_suspended;
31
32static inline void tk_normalize_xtime(struct timekeeper *tk)
33{
34 while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
35 tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
36 tk->xtime_sec++;
37 }
38}
39
40static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
41{
42 tk->xtime_sec = ts->tv_sec;
43 tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
44}
45
46static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
47{
48 tk->xtime_sec += ts->tv_sec;
49 tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
50 tk_normalize_xtime(tk);
51}
52
53static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
54{
55 struct timespec tmp;
56
57 /*
58 * Verify consistency of: offset_real = -wall_to_monotonic
59 * before modifying anything
60 */
61 set_normalized_timespec(&tmp, -tk->wall_to_monotonic.tv_sec,
62 -tk->wall_to_monotonic.tv_nsec);
63 WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
64 tk->wall_to_monotonic = wtm;
65 set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
66 tk->offs_real = timespec_to_ktime(tmp);
67}
68
69static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
70{
71 /* Verify consistency before modifying */
72 WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
73
74 tk->total_sleep_time = t;
75 tk->offs_boot = timespec_to_ktime(t);
76}
77
78/** 54/**
79 * timekeeper_setup_internals - Set up internals to use clocksource clock. 55 * timekeeper_setup_internals - Set up internals to use clocksource clock.
80 * 56 *
@@ -85,14 +61,12 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
85 * 61 *
86 * Unless you're the timekeeping code, you should not be using this! 62 * Unless you're the timekeeping code, you should not be using this!
87 */ 63 */
88static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) 64static void timekeeper_setup_internals(struct clocksource *clock)
89{ 65{
90 cycle_t interval; 66 cycle_t interval;
91 u64 tmp, ntpinterval; 67 u64 tmp, ntpinterval;
92 struct clocksource *old_clock;
93 68
94 old_clock = tk->clock; 69 timekeeper.clock = clock;
95 tk->clock = clock;
96 clock->cycle_last = clock->read(clock); 70 clock->cycle_last = clock->read(clock);
97 71
98 /* Do the ns -> cycle conversion first, using original mult */ 72 /* Do the ns -> cycle conversion first, using original mult */
@@ -105,133 +79,103 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
105 tmp = 1; 79 tmp = 1;
106 80
107 interval = (cycle_t) tmp; 81 interval = (cycle_t) tmp;
108 tk->cycle_interval = interval; 82 timekeeper.cycle_interval = interval;
109 83
110 /* Go back from cycles -> shifted ns */ 84 /* Go back from cycles -> shifted ns */
111 tk->xtime_interval = (u64) interval * clock->mult; 85 timekeeper.xtime_interval = (u64) interval * clock->mult;
112 tk->xtime_remainder = ntpinterval - tk->xtime_interval; 86 timekeeper.xtime_remainder = ntpinterval - timekeeper.xtime_interval;
113 tk->raw_interval = 87 timekeeper.raw_interval =
114 ((u64) interval * clock->mult) >> clock->shift; 88 ((u64) interval * clock->mult) >> clock->shift;
115 89
116 /* if changing clocks, convert xtime_nsec shift units */ 90 timekeeper.xtime_nsec = 0;
117 if (old_clock) { 91 timekeeper.shift = clock->shift;
118 int shift_change = clock->shift - old_clock->shift;
119 if (shift_change < 0)
120 tk->xtime_nsec >>= -shift_change;
121 else
122 tk->xtime_nsec <<= shift_change;
123 }
124 tk->shift = clock->shift;
125 92
126 tk->ntp_error = 0; 93 timekeeper.ntp_error = 0;
127 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 94 timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
128 95
129 /* 96 /*
130 * The timekeeper keeps its own mult values for the currently 97 * The timekeeper keeps its own mult values for the currently
131 * active clocksource. These value will be adjusted via NTP 98 * active clocksource. These value will be adjusted via NTP
132 * to counteract clock drifting. 99 * to counteract clock drifting.
133 */ 100 */
134 tk->mult = clock->mult; 101 timekeeper.mult = clock->mult;
135} 102}
136 103
137/* Timekeeper helper functions. */ 104/* Timekeeper helper functions. */
138static inline s64 timekeeping_get_ns(struct timekeeper *tk) 105static inline s64 timekeeping_get_ns(void)
139{ 106{
140 cycle_t cycle_now, cycle_delta; 107 cycle_t cycle_now, cycle_delta;
141 struct clocksource *clock; 108 struct clocksource *clock;
142 s64 nsec;
143 109
144 /* read clocksource: */ 110 /* read clocksource: */
145 clock = tk->clock; 111 clock = timekeeper.clock;
146 cycle_now = clock->read(clock); 112 cycle_now = clock->read(clock);
147 113
148 /* calculate the delta since the last update_wall_time: */ 114 /* calculate the delta since the last update_wall_time: */
149 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 115 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
150 116
151 nsec = cycle_delta * tk->mult + tk->xtime_nsec; 117 /* return delta convert to nanoseconds using ntp adjusted mult. */
152 nsec >>= tk->shift; 118 return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
153 119 timekeeper.shift);
154 /* If arch requires, add in gettimeoffset() */
155 return nsec + arch_gettimeoffset();
156} 120}
157 121
158static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) 122static inline s64 timekeeping_get_ns_raw(void)
159{ 123{
160 cycle_t cycle_now, cycle_delta; 124 cycle_t cycle_now, cycle_delta;
161 struct clocksource *clock; 125 struct clocksource *clock;
162 s64 nsec;
163 126
164 /* read clocksource: */ 127 /* read clocksource: */
165 clock = tk->clock; 128 clock = timekeeper.clock;
166 cycle_now = clock->read(clock); 129 cycle_now = clock->read(clock);
167 130
168 /* calculate the delta since the last update_wall_time: */ 131 /* calculate the delta since the last update_wall_time: */
169 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 132 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
170 133
171 /* convert delta to nanoseconds. */ 134 /* return delta convert to nanoseconds using ntp adjusted mult. */
172 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 135 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
173
174 /* If arch requires, add in gettimeoffset() */
175 return nsec + arch_gettimeoffset();
176}
177
178static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
179
180static void update_pvclock_gtod(struct timekeeper *tk)
181{
182 raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
183} 136}
184 137
185/** 138/*
186 * pvclock_gtod_register_notifier - register a pvclock timedata update listener 139 * This read-write spinlock protects us from races in SMP while
187 * 140 * playing with xtime.
188 * Must hold write on timekeeper.lock
189 */ 141 */
190int pvclock_gtod_register_notifier(struct notifier_block *nb) 142__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
191{
192 struct timekeeper *tk = &timekeeper;
193 unsigned long flags;
194 int ret;
195
196 write_seqlock_irqsave(&tk->lock, flags);
197 ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
198 /* update timekeeping data */
199 update_pvclock_gtod(tk);
200 write_sequnlock_irqrestore(&tk->lock, flags);
201 143
202 return ret;
203}
204EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
205 144
206/** 145/*
207 * pvclock_gtod_unregister_notifier - unregister a pvclock 146 * The current time
208 * timedata update listener 147 * wall_to_monotonic is what we need to add to xtime (or xtime corrected
148 * for sub jiffie times) to get to monotonic time. Monotonic is pegged
149 * at zero at system boot time, so wall_to_monotonic will be negative,
150 * however, we will ALWAYS keep the tv_nsec part positive so we can use
151 * the usual normalization.
209 * 152 *
210 * Must hold write on timekeeper.lock 153 * wall_to_monotonic is moved after resume from suspend for the monotonic
154 * time not to jump. We need to add total_sleep_time to wall_to_monotonic
155 * to get the real boot based time offset.
156 *
157 * - wall_to_monotonic is no longer the boot time, getboottime must be
158 * used instead.
211 */ 159 */
212int pvclock_gtod_unregister_notifier(struct notifier_block *nb) 160static struct timespec xtime __attribute__ ((aligned (16)));
213{ 161static struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
214 struct timekeeper *tk = &timekeeper; 162static struct timespec total_sleep_time;
215 unsigned long flags;
216 int ret;
217 163
218 write_seqlock_irqsave(&tk->lock, flags); 164/*
219 ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); 165 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
220 write_sequnlock_irqrestore(&tk->lock, flags); 166 */
167static struct timespec raw_time;
221 168
222 return ret; 169/* flag for if timekeeping is suspended */
223} 170int __read_mostly timekeeping_suspended;
224EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
225 171
226/* must hold write on timekeeper.lock */ 172/* must hold xtime_lock */
227static void timekeeping_update(struct timekeeper *tk, bool clearntp) 173void timekeeping_leap_insert(int leapsecond)
228{ 174{
229 if (clearntp) { 175 xtime.tv_sec += leapsecond;
230 tk->ntp_error = 0; 176 wall_to_monotonic.tv_sec -= leapsecond;
231 ntp_clear(); 177 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
232 } 178 timekeeper.mult);
233 update_vsyscall(tk);
234 update_pvclock_gtod(tk);
235} 179}
236 180
237/** 181/**
@@ -241,26 +185,27 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
241 * update_wall_time(). This is useful before significant clock changes, 185 * update_wall_time(). This is useful before significant clock changes,
242 * as it avoids having to deal with this time offset explicitly. 186 * as it avoids having to deal with this time offset explicitly.
243 */ 187 */
244static void timekeeping_forward_now(struct timekeeper *tk) 188static void timekeeping_forward_now(void)
245{ 189{
246 cycle_t cycle_now, cycle_delta; 190 cycle_t cycle_now, cycle_delta;
247 struct clocksource *clock; 191 struct clocksource *clock;
248 s64 nsec; 192 s64 nsec;
249 193
250 clock = tk->clock; 194 clock = timekeeper.clock;
251 cycle_now = clock->read(clock); 195 cycle_now = clock->read(clock);
252 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 196 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
253 clock->cycle_last = cycle_now; 197 clock->cycle_last = cycle_now;
254 198
255 tk->xtime_nsec += cycle_delta * tk->mult; 199 nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
200 timekeeper.shift);
256 201
257 /* If arch requires, add in gettimeoffset() */ 202 /* If arch requires, add in gettimeoffset() */
258 tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; 203 nsec += arch_gettimeoffset();
259 204
260 tk_normalize_xtime(tk); 205 timespec_add_ns(&xtime, nsec);
261 206
262 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift); 207 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
263 timespec_add_ns(&tk->raw_time, nsec); 208 timespec_add_ns(&raw_time, nsec);
264} 209}
265 210
266/** 211/**
@@ -271,39 +216,43 @@ static void timekeeping_forward_now(struct timekeeper *tk)
271 */ 216 */
272void getnstimeofday(struct timespec *ts) 217void getnstimeofday(struct timespec *ts)
273{ 218{
274 struct timekeeper *tk = &timekeeper;
275 unsigned long seq; 219 unsigned long seq;
276 s64 nsecs = 0; 220 s64 nsecs;
277 221
278 WARN_ON(timekeeping_suspended); 222 WARN_ON(timekeeping_suspended);
279 223
280 do { 224 do {
281 seq = read_seqbegin(&tk->lock); 225 seq = read_seqbegin(&xtime_lock);
282 226
283 ts->tv_sec = tk->xtime_sec; 227 *ts = xtime;
284 nsecs = timekeeping_get_ns(tk); 228 nsecs = timekeeping_get_ns();
285 229
286 } while (read_seqretry(&tk->lock, seq)); 230 /* If arch requires, add in gettimeoffset() */
231 nsecs += arch_gettimeoffset();
232
233 } while (read_seqretry(&xtime_lock, seq));
287 234
288 ts->tv_nsec = 0;
289 timespec_add_ns(ts, nsecs); 235 timespec_add_ns(ts, nsecs);
290} 236}
237
291EXPORT_SYMBOL(getnstimeofday); 238EXPORT_SYMBOL(getnstimeofday);
292 239
293ktime_t ktime_get(void) 240ktime_t ktime_get(void)
294{ 241{
295 struct timekeeper *tk = &timekeeper;
296 unsigned int seq; 242 unsigned int seq;
297 s64 secs, nsecs; 243 s64 secs, nsecs;
298 244
299 WARN_ON(timekeeping_suspended); 245 WARN_ON(timekeeping_suspended);
300 246
301 do { 247 do {
302 seq = read_seqbegin(&tk->lock); 248 seq = read_seqbegin(&xtime_lock);
303 secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 249 secs = xtime.tv_sec + wall_to_monotonic.tv_sec;
304 nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec; 250 nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
305 251 nsecs += timekeeping_get_ns();
306 } while (read_seqretry(&tk->lock, seq)); 252 /* If arch requires, add in gettimeoffset() */
253 nsecs += arch_gettimeoffset();
254
255 } while (read_seqretry(&xtime_lock, seq));
307 /* 256 /*
308 * Use ktime_set/ktime_add_ns to create a proper ktime on 257 * Use ktime_set/ktime_add_ns to create a proper ktime on
309 * 32-bit architectures without CONFIG_KTIME_SCALAR. 258 * 32-bit architectures without CONFIG_KTIME_SCALAR.
@@ -322,24 +271,24 @@ EXPORT_SYMBOL_GPL(ktime_get);
322 */ 271 */
323void ktime_get_ts(struct timespec *ts) 272void ktime_get_ts(struct timespec *ts)
324{ 273{
325 struct timekeeper *tk = &timekeeper;
326 struct timespec tomono; 274 struct timespec tomono;
327 s64 nsec;
328 unsigned int seq; 275 unsigned int seq;
276 s64 nsecs;
329 277
330 WARN_ON(timekeeping_suspended); 278 WARN_ON(timekeeping_suspended);
331 279
332 do { 280 do {
333 seq = read_seqbegin(&tk->lock); 281 seq = read_seqbegin(&xtime_lock);
334 ts->tv_sec = tk->xtime_sec; 282 *ts = xtime;
335 nsec = timekeeping_get_ns(tk); 283 tomono = wall_to_monotonic;
336 tomono = tk->wall_to_monotonic; 284 nsecs = timekeeping_get_ns();
285 /* If arch requires, add in gettimeoffset() */
286 nsecs += arch_gettimeoffset();
337 287
338 } while (read_seqretry(&tk->lock, seq)); 288 } while (read_seqretry(&xtime_lock, seq));
339 289
340 ts->tv_sec += tomono.tv_sec; 290 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
341 ts->tv_nsec = 0; 291 ts->tv_nsec + tomono.tv_nsec + nsecs);
342 timespec_add_ns(ts, nsec + tomono.tv_nsec);
343} 292}
344EXPORT_SYMBOL_GPL(ktime_get_ts); 293EXPORT_SYMBOL_GPL(ktime_get_ts);
345 294
@@ -356,23 +305,28 @@ EXPORT_SYMBOL_GPL(ktime_get_ts);
356 */ 305 */
357void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) 306void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
358{ 307{
359 struct timekeeper *tk = &timekeeper;
360 unsigned long seq; 308 unsigned long seq;
361 s64 nsecs_raw, nsecs_real; 309 s64 nsecs_raw, nsecs_real;
362 310
363 WARN_ON_ONCE(timekeeping_suspended); 311 WARN_ON_ONCE(timekeeping_suspended);
364 312
365 do { 313 do {
366 seq = read_seqbegin(&tk->lock); 314 u32 arch_offset;
367 315
368 *ts_raw = tk->raw_time; 316 seq = read_seqbegin(&xtime_lock);
369 ts_real->tv_sec = tk->xtime_sec;
370 ts_real->tv_nsec = 0;
371 317
372 nsecs_raw = timekeeping_get_ns_raw(tk); 318 *ts_raw = raw_time;
373 nsecs_real = timekeeping_get_ns(tk); 319 *ts_real = xtime;
374 320
375 } while (read_seqretry(&tk->lock, seq)); 321 nsecs_raw = timekeeping_get_ns_raw();
322 nsecs_real = timekeeping_get_ns();
323
324 /* If arch requires, add in gettimeoffset() */
325 arch_offset = arch_gettimeoffset();
326 nsecs_raw += arch_offset;
327 nsecs_real += arch_offset;
328
329 } while (read_seqretry(&xtime_lock, seq));
376 330
377 timespec_add_ns(ts_raw, nsecs_raw); 331 timespec_add_ns(ts_raw, nsecs_raw);
378 timespec_add_ns(ts_real, nsecs_real); 332 timespec_add_ns(ts_real, nsecs_real);
@@ -395,8 +349,8 @@ void do_gettimeofday(struct timeval *tv)
395 tv->tv_sec = now.tv_sec; 349 tv->tv_sec = now.tv_sec;
396 tv->tv_usec = now.tv_nsec/1000; 350 tv->tv_usec = now.tv_nsec/1000;
397} 351}
398EXPORT_SYMBOL(do_gettimeofday);
399 352
353EXPORT_SYMBOL(do_gettimeofday);
400/** 354/**
401 * do_settimeofday - Sets the time of day 355 * do_settimeofday - Sets the time of day
402 * @tv: pointer to the timespec variable containing the new time 356 * @tv: pointer to the timespec variable containing the new time
@@ -405,36 +359,39 @@ EXPORT_SYMBOL(do_gettimeofday);
405 */ 359 */
406int do_settimeofday(const struct timespec *tv) 360int do_settimeofday(const struct timespec *tv)
407{ 361{
408 struct timekeeper *tk = &timekeeper; 362 struct timespec ts_delta;
409 struct timespec ts_delta, xt;
410 unsigned long flags; 363 unsigned long flags;
411 364
412 if (!timespec_valid_strict(tv)) 365 if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
413 return -EINVAL; 366 return -EINVAL;
414 367
415 write_seqlock_irqsave(&tk->lock, flags); 368 write_seqlock_irqsave(&xtime_lock, flags);
416 369
417 timekeeping_forward_now(tk); 370 timekeeping_forward_now();
418 371
419 xt = tk_xtime(tk); 372 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
420 ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; 373 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
421 ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; 374 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts_delta);
422 375
423 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta)); 376 xtime = *tv;
424 377
425 tk_set_xtime(tk, tv); 378 timekeeper.ntp_error = 0;
379 ntp_clear();
426 380
427 timekeeping_update(tk, true); 381 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
382 timekeeper.mult);
428 383
429 write_sequnlock_irqrestore(&tk->lock, flags); 384 write_sequnlock_irqrestore(&xtime_lock, flags);
430 385
431 /* signal hrtimers about time change */ 386 /* signal hrtimers about time change */
432 clock_was_set(); 387 clock_was_set();
433 388
434 return 0; 389 return 0;
435} 390}
391
436EXPORT_SYMBOL(do_settimeofday); 392EXPORT_SYMBOL(do_settimeofday);
437 393
394
438/** 395/**
439 * timekeeping_inject_offset - Adds or subtracts from the current time. 396 * timekeeping_inject_offset - Adds or subtracts from the current time.
440 * @tv: pointer to the timespec variable containing the offset 397 * @tv: pointer to the timespec variable containing the offset
@@ -443,37 +400,30 @@ EXPORT_SYMBOL(do_settimeofday);
443 */ 400 */
444int timekeeping_inject_offset(struct timespec *ts) 401int timekeeping_inject_offset(struct timespec *ts)
445{ 402{
446 struct timekeeper *tk = &timekeeper;
447 unsigned long flags; 403 unsigned long flags;
448 struct timespec tmp;
449 int ret = 0;
450 404
451 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) 405 if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
452 return -EINVAL; 406 return -EINVAL;
453 407
454 write_seqlock_irqsave(&tk->lock, flags); 408 write_seqlock_irqsave(&xtime_lock, flags);
455 409
456 timekeeping_forward_now(tk); 410 timekeeping_forward_now();
457 411
458 /* Make sure the proposed value is valid */ 412 xtime = timespec_add(xtime, *ts);
459 tmp = timespec_add(tk_xtime(tk), *ts); 413 wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
460 if (!timespec_valid_strict(&tmp)) {
461 ret = -EINVAL;
462 goto error;
463 }
464 414
465 tk_xtime_add(tk, ts); 415 timekeeper.ntp_error = 0;
466 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); 416 ntp_clear();
467 417
468error: /* even if we error out, we forwarded the time, so call update */ 418 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
469 timekeeping_update(tk, true); 419 timekeeper.mult);
470 420
471 write_sequnlock_irqrestore(&tk->lock, flags); 421 write_sequnlock_irqrestore(&xtime_lock, flags);
472 422
473 /* signal hrtimers about time change */ 423 /* signal hrtimers about time change */
474 clock_was_set(); 424 clock_was_set();
475 425
476 return ret; 426 return 0;
477} 427}
478EXPORT_SYMBOL(timekeeping_inject_offset); 428EXPORT_SYMBOL(timekeeping_inject_offset);
479 429
@@ -484,25 +434,17 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
484 */ 434 */
485static int change_clocksource(void *data) 435static int change_clocksource(void *data)
486{ 436{
487 struct timekeeper *tk = &timekeeper;
488 struct clocksource *new, *old; 437 struct clocksource *new, *old;
489 unsigned long flags;
490 438
491 new = (struct clocksource *) data; 439 new = (struct clocksource *) data;
492 440
493 write_seqlock_irqsave(&tk->lock, flags); 441 timekeeping_forward_now();
494
495 timekeeping_forward_now(tk);
496 if (!new->enable || new->enable(new) == 0) { 442 if (!new->enable || new->enable(new) == 0) {
497 old = tk->clock; 443 old = timekeeper.clock;
498 tk_setup_internals(tk, new); 444 timekeeper_setup_internals(new);
499 if (old->disable) 445 if (old->disable)
500 old->disable(old); 446 old->disable(old);
501 } 447 }
502 timekeeping_update(tk, true);
503
504 write_sequnlock_irqrestore(&tk->lock, flags);
505
506 return 0; 448 return 0;
507} 449}
508 450
@@ -515,9 +457,7 @@ static int change_clocksource(void *data)
515 */ 457 */
516void timekeeping_notify(struct clocksource *clock) 458void timekeeping_notify(struct clocksource *clock)
517{ 459{
518 struct timekeeper *tk = &timekeeper; 460 if (timekeeper.clock == clock)
519
520 if (tk->clock == clock)
521 return; 461 return;
522 stop_machine(change_clocksource, clock, NULL); 462 stop_machine(change_clocksource, clock, NULL);
523 tick_clock_notify(); 463 tick_clock_notify();
@@ -546,57 +486,48 @@ EXPORT_SYMBOL_GPL(ktime_get_real);
546 */ 486 */
547void getrawmonotonic(struct timespec *ts) 487void getrawmonotonic(struct timespec *ts)
548{ 488{
549 struct timekeeper *tk = &timekeeper;
550 unsigned long seq; 489 unsigned long seq;
551 s64 nsecs; 490 s64 nsecs;
552 491
553 do { 492 do {
554 seq = read_seqbegin(&tk->lock); 493 seq = read_seqbegin(&xtime_lock);
555 nsecs = timekeeping_get_ns_raw(tk); 494 nsecs = timekeeping_get_ns_raw();
556 *ts = tk->raw_time; 495 *ts = raw_time;
557 496
558 } while (read_seqretry(&tk->lock, seq)); 497 } while (read_seqretry(&xtime_lock, seq));
559 498
560 timespec_add_ns(ts, nsecs); 499 timespec_add_ns(ts, nsecs);
561} 500}
562EXPORT_SYMBOL(getrawmonotonic); 501EXPORT_SYMBOL(getrawmonotonic);
563 502
503
564/** 504/**
565 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres 505 * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
566 */ 506 */
567int timekeeping_valid_for_hres(void) 507int timekeeping_valid_for_hres(void)
568{ 508{
569 struct timekeeper *tk = &timekeeper;
570 unsigned long seq; 509 unsigned long seq;
571 int ret; 510 int ret;
572 511
573 do { 512 do {
574 seq = read_seqbegin(&tk->lock); 513 seq = read_seqbegin(&xtime_lock);
575 514
576 ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 515 ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
577 516
578 } while (read_seqretry(&tk->lock, seq)); 517 } while (read_seqretry(&xtime_lock, seq));
579 518
580 return ret; 519 return ret;
581} 520}
582 521
583/** 522/**
584 * timekeeping_max_deferment - Returns max time the clocksource can be deferred 523 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
524 *
525 * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
526 * ensure that the clocksource does not change!
585 */ 527 */
586u64 timekeeping_max_deferment(void) 528u64 timekeeping_max_deferment(void)
587{ 529{
588 struct timekeeper *tk = &timekeeper; 530 return timekeeper.clock->max_idle_ns;
589 unsigned long seq;
590 u64 ret;
591
592 do {
593 seq = read_seqbegin(&tk->lock);
594
595 ret = tk->clock->max_idle_ns;
596
597 } while (read_seqretry(&tk->lock, seq));
598
599 return ret;
600} 531}
601 532
602/** 533/**
@@ -634,51 +565,35 @@ void __attribute__((weak)) read_boot_clock(struct timespec *ts)
634 */ 565 */
635void __init timekeeping_init(void) 566void __init timekeeping_init(void)
636{ 567{
637 struct timekeeper *tk = &timekeeper;
638 struct clocksource *clock; 568 struct clocksource *clock;
639 unsigned long flags; 569 unsigned long flags;
640 struct timespec now, boot, tmp; 570 struct timespec now, boot;
641 571
642 read_persistent_clock(&now); 572 read_persistent_clock(&now);
643 if (!timespec_valid_strict(&now)) {
644 pr_warn("WARNING: Persistent clock returned invalid value!\n"
645 " Check your CMOS/BIOS settings.\n");
646 now.tv_sec = 0;
647 now.tv_nsec = 0;
648 }
649
650 read_boot_clock(&boot); 573 read_boot_clock(&boot);
651 if (!timespec_valid_strict(&boot)) {
652 pr_warn("WARNING: Boot clock returned invalid value!\n"
653 " Check your CMOS/BIOS settings.\n");
654 boot.tv_sec = 0;
655 boot.tv_nsec = 0;
656 }
657 574
658 seqlock_init(&tk->lock); 575 write_seqlock_irqsave(&xtime_lock, flags);
659 576
660 ntp_init(); 577 ntp_init();
661 578
662 write_seqlock_irqsave(&tk->lock, flags);
663 clock = clocksource_default_clock(); 579 clock = clocksource_default_clock();
664 if (clock->enable) 580 if (clock->enable)
665 clock->enable(clock); 581 clock->enable(clock);
666 tk_setup_internals(tk, clock); 582 timekeeper_setup_internals(clock);
667 583
668 tk_set_xtime(tk, &now); 584 xtime.tv_sec = now.tv_sec;
669 tk->raw_time.tv_sec = 0; 585 xtime.tv_nsec = now.tv_nsec;
670 tk->raw_time.tv_nsec = 0; 586 raw_time.tv_sec = 0;
671 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 587 raw_time.tv_nsec = 0;
672 boot = tk_xtime(tk); 588 if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
673 589 boot.tv_sec = xtime.tv_sec;
674 set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec); 590 boot.tv_nsec = xtime.tv_nsec;
675 tk_set_wall_to_mono(tk, tmp); 591 }
676 592 set_normalized_timespec(&wall_to_monotonic,
677 tmp.tv_sec = 0; 593 -boot.tv_sec, -boot.tv_nsec);
678 tmp.tv_nsec = 0; 594 total_sleep_time.tv_sec = 0;
679 tk_set_sleep_time(tk, tmp); 595 total_sleep_time.tv_nsec = 0;
680 596 write_sequnlock_irqrestore(&xtime_lock, flags);
681 write_sequnlock_irqrestore(&tk->lock, flags);
682} 597}
683 598
684/* time in seconds when suspend began */ 599/* time in seconds when suspend began */
@@ -691,19 +606,20 @@ static struct timespec timekeeping_suspend_time;
691 * Takes a timespec offset measuring a suspend interval and properly 606 * Takes a timespec offset measuring a suspend interval and properly
692 * adds the sleep offset to the timekeeping variables. 607 * adds the sleep offset to the timekeeping variables.
693 */ 608 */
694static void __timekeeping_inject_sleeptime(struct timekeeper *tk, 609static void __timekeeping_inject_sleeptime(struct timespec *delta)
695 struct timespec *delta)
696{ 610{
697 if (!timespec_valid_strict(delta)) { 611 if (!timespec_valid(delta)) {
698 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " 612 printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
699 "sleep delta value!\n"); 613 "sleep delta value!\n");
700 return; 614 return;
701 } 615 }
702 tk_xtime_add(tk, delta); 616
703 tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta)); 617 xtime = timespec_add(xtime, *delta);
704 tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta)); 618 wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
619 total_sleep_time = timespec_add(total_sleep_time, *delta);
705} 620}
706 621
622
707/** 623/**
708 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values 624 * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
709 * @delta: pointer to a timespec delta value 625 * @delta: pointer to a timespec delta value
@@ -716,7 +632,6 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
716 */ 632 */
717void timekeeping_inject_sleeptime(struct timespec *delta) 633void timekeeping_inject_sleeptime(struct timespec *delta)
718{ 634{
719 struct timekeeper *tk = &timekeeper;
720 unsigned long flags; 635 unsigned long flags;
721 struct timespec ts; 636 struct timespec ts;
722 637
@@ -725,20 +640,23 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
725 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) 640 if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
726 return; 641 return;
727 642
728 write_seqlock_irqsave(&tk->lock, flags); 643 write_seqlock_irqsave(&xtime_lock, flags);
729 644 timekeeping_forward_now();
730 timekeeping_forward_now(tk);
731 645
732 __timekeeping_inject_sleeptime(tk, delta); 646 __timekeeping_inject_sleeptime(delta);
733 647
734 timekeeping_update(tk, true); 648 timekeeper.ntp_error = 0;
649 ntp_clear();
650 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
651 timekeeper.mult);
735 652
736 write_sequnlock_irqrestore(&tk->lock, flags); 653 write_sequnlock_irqrestore(&xtime_lock, flags);
737 654
738 /* signal hrtimers about time change */ 655 /* signal hrtimers about time change */
739 clock_was_set(); 656 clock_was_set();
740} 657}
741 658
659
742/** 660/**
743 * timekeeping_resume - Resumes the generic timekeeping subsystem. 661 * timekeeping_resume - Resumes the generic timekeeping subsystem.
744 * 662 *
@@ -748,27 +666,24 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
748 */ 666 */
749static void timekeeping_resume(void) 667static void timekeeping_resume(void)
750{ 668{
751 struct timekeeper *tk = &timekeeper;
752 unsigned long flags; 669 unsigned long flags;
753 struct timespec ts; 670 struct timespec ts;
754 671
755 read_persistent_clock(&ts); 672 read_persistent_clock(&ts);
756 673
757 clockevents_resume();
758 clocksource_resume(); 674 clocksource_resume();
759 675
760 write_seqlock_irqsave(&tk->lock, flags); 676 write_seqlock_irqsave(&xtime_lock, flags);
761 677
762 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) { 678 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
763 ts = timespec_sub(ts, timekeeping_suspend_time); 679 ts = timespec_sub(ts, timekeeping_suspend_time);
764 __timekeeping_inject_sleeptime(tk, &ts); 680 __timekeeping_inject_sleeptime(&ts);
765 } 681 }
766 /* re-base the last cycle value */ 682 /* re-base the last cycle value */
767 tk->clock->cycle_last = tk->clock->read(tk->clock); 683 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
768 tk->ntp_error = 0; 684 timekeeper.ntp_error = 0;
769 timekeeping_suspended = 0; 685 timekeeping_suspended = 0;
770 timekeeping_update(tk, false); 686 write_sequnlock_irqrestore(&xtime_lock, flags);
771 write_sequnlock_irqrestore(&tk->lock, flags);
772 687
773 touch_softlockup_watchdog(); 688 touch_softlockup_watchdog();
774 689
@@ -780,15 +695,14 @@ static void timekeeping_resume(void)
780 695
781static int timekeeping_suspend(void) 696static int timekeeping_suspend(void)
782{ 697{
783 struct timekeeper *tk = &timekeeper;
784 unsigned long flags; 698 unsigned long flags;
785 struct timespec delta, delta_delta; 699 struct timespec delta, delta_delta;
786 static struct timespec old_delta; 700 static struct timespec old_delta;
787 701
788 read_persistent_clock(&timekeeping_suspend_time); 702 read_persistent_clock(&timekeeping_suspend_time);
789 703
790 write_seqlock_irqsave(&tk->lock, flags); 704 write_seqlock_irqsave(&xtime_lock, flags);
791 timekeeping_forward_now(tk); 705 timekeeping_forward_now();
792 timekeeping_suspended = 1; 706 timekeeping_suspended = 1;
793 707
794 /* 708 /*
@@ -797,7 +711,7 @@ static int timekeeping_suspend(void)
797 * try to compensate so the difference in system time 711 * try to compensate so the difference in system time
798 * and persistent_clock time stays close to constant. 712 * and persistent_clock time stays close to constant.
799 */ 713 */
800 delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time); 714 delta = timespec_sub(xtime, timekeeping_suspend_time);
801 delta_delta = timespec_sub(delta, old_delta); 715 delta_delta = timespec_sub(delta, old_delta);
802 if (abs(delta_delta.tv_sec) >= 2) { 716 if (abs(delta_delta.tv_sec) >= 2) {
803 /* 717 /*
@@ -810,11 +724,10 @@ static int timekeeping_suspend(void)
810 timekeeping_suspend_time = 724 timekeeping_suspend_time =
811 timespec_add(timekeeping_suspend_time, delta_delta); 725 timespec_add(timekeeping_suspend_time, delta_delta);
812 } 726 }
813 write_sequnlock_irqrestore(&tk->lock, flags); 727 write_sequnlock_irqrestore(&xtime_lock, flags);
814 728
815 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 729 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
816 clocksource_suspend(); 730 clocksource_suspend();
817 clockevents_suspend();
818 731
819 return 0; 732 return 0;
820} 733}
@@ -837,8 +750,7 @@ device_initcall(timekeeping_init_ops);
837 * If the error is already larger, we look ahead even further 750 * If the error is already larger, we look ahead even further
838 * to compensate for late or lost adjustments. 751 * to compensate for late or lost adjustments.
839 */ 752 */
840static __always_inline int timekeeping_bigadjust(struct timekeeper *tk, 753static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
841 s64 error, s64 *interval,
842 s64 *offset) 754 s64 *offset)
843{ 755{
844 s64 tick_error, i; 756 s64 tick_error, i;
@@ -854,7 +766,7 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
854 * here. This is tuned so that an error of about 1 msec is adjusted 766 * here. This is tuned so that an error of about 1 msec is adjusted
855 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 767 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
856 */ 768 */
857 error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); 769 error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
858 error2 = abs(error2); 770 error2 = abs(error2);
859 for (look_ahead = 0; error2 > 0; look_ahead++) 771 for (look_ahead = 0; error2 > 0; look_ahead++)
860 error2 >>= 2; 772 error2 >>= 2;
@@ -863,8 +775,8 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
863 * Now calculate the error in (1 << look_ahead) ticks, but first 775 * Now calculate the error in (1 << look_ahead) ticks, but first
864 * remove the single look ahead already included in the error. 776 * remove the single look ahead already included in the error.
865 */ 777 */
866 tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1); 778 tick_error = tick_length >> (timekeeper.ntp_error_shift + 1);
867 tick_error -= tk->xtime_interval >> 1; 779 tick_error -= timekeeper.xtime_interval >> 1;
868 error = ((error - tick_error) >> look_ahead) + tick_error; 780 error = ((error - tick_error) >> look_ahead) + tick_error;
869 781
870 /* Finally calculate the adjustment shift value. */ 782 /* Finally calculate the adjustment shift value. */
@@ -889,181 +801,43 @@ static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
889 * this is optimized for the most common adjustments of -1,0,1, 801 * this is optimized for the most common adjustments of -1,0,1,
890 * for other values we can do a bit more work. 802 * for other values we can do a bit more work.
891 */ 803 */
892static void timekeeping_adjust(struct timekeeper *tk, s64 offset) 804static void timekeeping_adjust(s64 offset)
893{ 805{
894 s64 error, interval = tk->cycle_interval; 806 s64 error, interval = timekeeper.cycle_interval;
895 int adj; 807 int adj;
896 808
897 /* 809 error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
898 * The point of this is to check if the error is greater than half
899 * an interval.
900 *
901 * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
902 *
903 * Note we subtract one in the shift, so that error is really error*2.
904 * This "saves" dividing(shifting) interval twice, but keeps the
905 * (error > interval) comparison as still measuring if error is
906 * larger than half an interval.
907 *
908 * Note: It does not "save" on aggravation when reading the code.
909 */
910 error = tk->ntp_error >> (tk->ntp_error_shift - 1);
911 if (error > interval) { 810 if (error > interval) {
912 /*
913 * We now divide error by 4(via shift), which checks if
914 * the error is greater than twice the interval.
915 * If it is greater, we need a bigadjust, if its smaller,
916 * we can adjust by 1.
917 */
918 error >>= 2; 811 error >>= 2;
919 /*
920 * XXX - In update_wall_time, we round up to the next
921 * nanosecond, and store the amount rounded up into
922 * the error. This causes the likely below to be unlikely.
923 *
924 * The proper fix is to avoid rounding up by using
925 * the high precision tk->xtime_nsec instead of
926 * xtime.tv_nsec everywhere. Fixing this will take some
927 * time.
928 */
929 if (likely(error <= interval)) 812 if (likely(error <= interval))
930 adj = 1; 813 adj = 1;
931 else 814 else
932 adj = timekeeping_bigadjust(tk, error, &interval, &offset); 815 adj = timekeeping_bigadjust(error, &interval, &offset);
933 } else { 816 } else if (error < -interval) {
934 if (error < -interval) { 817 error >>= 2;
935 /* See comment above, this is just switched for the negative */ 818 if (likely(error >= -interval)) {
936 error >>= 2; 819 adj = -1;
937 if (likely(error >= -interval)) { 820 interval = -interval;
938 adj = -1; 821 offset = -offset;
939 interval = -interval; 822 } else
940 offset = -offset; 823 adj = timekeeping_bigadjust(error, &interval, &offset);
941 } else { 824 } else
942 adj = timekeeping_bigadjust(tk, error, &interval, &offset); 825 return;
943 }
944 } else {
945 goto out_adjust;
946 }
947 }
948
949 if (unlikely(tk->clock->maxadj &&
950 (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
951 printk_once(KERN_WARNING
952 "Adjusting %s more than 11%% (%ld vs %ld)\n",
953 tk->clock->name, (long)tk->mult + adj,
954 (long)tk->clock->mult + tk->clock->maxadj);
955 }
956 /*
957 * So the following can be confusing.
958 *
959 * To keep things simple, lets assume adj == 1 for now.
960 *
961 * When adj != 1, remember that the interval and offset values
962 * have been appropriately scaled so the math is the same.
963 *
964 * The basic idea here is that we're increasing the multiplier
965 * by one, this causes the xtime_interval to be incremented by
966 * one cycle_interval. This is because:
967 * xtime_interval = cycle_interval * mult
968 * So if mult is being incremented by one:
969 * xtime_interval = cycle_interval * (mult + 1)
970 * Its the same as:
971 * xtime_interval = (cycle_interval * mult) + cycle_interval
972 * Which can be shortened to:
973 * xtime_interval += cycle_interval
974 *
975 * So offset stores the non-accumulated cycles. Thus the current
976 * time (in shifted nanoseconds) is:
977 * now = (offset * adj) + xtime_nsec
978 * Now, even though we're adjusting the clock frequency, we have
979 * to keep time consistent. In other words, we can't jump back
980 * in time, and we also want to avoid jumping forward in time.
981 *
982 * So given the same offset value, we need the time to be the same
983 * both before and after the freq adjustment.
984 * now = (offset * adj_1) + xtime_nsec_1
985 * now = (offset * adj_2) + xtime_nsec_2
986 * So:
987 * (offset * adj_1) + xtime_nsec_1 =
988 * (offset * adj_2) + xtime_nsec_2
989 * And we know:
990 * adj_2 = adj_1 + 1
991 * So:
992 * (offset * adj_1) + xtime_nsec_1 =
993 * (offset * (adj_1+1)) + xtime_nsec_2
994 * (offset * adj_1) + xtime_nsec_1 =
995 * (offset * adj_1) + offset + xtime_nsec_2
996 * Canceling the sides:
997 * xtime_nsec_1 = offset + xtime_nsec_2
998 * Which gives us:
999 * xtime_nsec_2 = xtime_nsec_1 - offset
1000 * Which simplfies to:
1001 * xtime_nsec -= offset
1002 *
1003 * XXX - TODO: Doc ntp_error calculation.
1004 */
1005 tk->mult += adj;
1006 tk->xtime_interval += interval;
1007 tk->xtime_nsec -= offset;
1008 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
1009
1010out_adjust:
1011 /*
1012 * It may be possible that when we entered this function, xtime_nsec
1013 * was very small. Further, if we're slightly speeding the clocksource
1014 * in the code above, its possible the required corrective factor to
1015 * xtime_nsec could cause it to underflow.
1016 *
1017 * Now, since we already accumulated the second, cannot simply roll
1018 * the accumulated second back, since the NTP subsystem has been
1019 * notified via second_overflow. So instead we push xtime_nsec forward
1020 * by the amount we underflowed, and add that amount into the error.
1021 *
1022 * We'll correct this error next time through this function, when
1023 * xtime_nsec is not as small.
1024 */
1025 if (unlikely((s64)tk->xtime_nsec < 0)) {
1026 s64 neg = -(s64)tk->xtime_nsec;
1027 tk->xtime_nsec = 0;
1028 tk->ntp_error += neg << tk->ntp_error_shift;
1029 }
1030 826
827 WARN_ONCE(timekeeper.clock->maxadj &&
828 (timekeeper.mult + adj > timekeeper.clock->mult +
829 timekeeper.clock->maxadj),
830 "Adjusting %s more then 11%% (%ld vs %ld)\n",
831 timekeeper.clock->name, (long)timekeeper.mult + adj,
832 (long)timekeeper.clock->mult +
833 timekeeper.clock->maxadj);
834 timekeeper.mult += adj;
835 timekeeper.xtime_interval += interval;
836 timekeeper.xtime_nsec -= offset;
837 timekeeper.ntp_error -= (interval - offset) <<
838 timekeeper.ntp_error_shift;
1031} 839}
1032 840
1033/**
1034 * accumulate_nsecs_to_secs - Accumulates nsecs into secs
1035 *
1036 * Helper function that accumulates a the nsecs greater then a second
1037 * from the xtime_nsec field to the xtime_secs field.
1038 * It also calls into the NTP code to handle leapsecond processing.
1039 *
1040 */
1041static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1042{
1043 u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
1044
1045 while (tk->xtime_nsec >= nsecps) {
1046 int leap;
1047
1048 tk->xtime_nsec -= nsecps;
1049 tk->xtime_sec++;
1050
1051 /* Figure out if its a leap sec and apply if needed */
1052 leap = second_overflow(tk->xtime_sec);
1053 if (unlikely(leap)) {
1054 struct timespec ts;
1055
1056 tk->xtime_sec += leap;
1057
1058 ts.tv_sec = leap;
1059 ts.tv_nsec = 0;
1060 tk_set_wall_to_mono(tk,
1061 timespec_sub(tk->wall_to_monotonic, ts));
1062
1063 clock_was_set_delayed();
1064 }
1065 }
1066}
1067 841
1068/** 842/**
1069 * logarithmic_accumulation - shifted accumulation of cycles 843 * logarithmic_accumulation - shifted accumulation of cycles
@@ -1074,136 +848,137 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
1074 * 848 *
1075 * Returns the unconsumed cycles. 849 * Returns the unconsumed cycles.
1076 */ 850 */
1077static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, 851static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
1078 u32 shift)
1079{ 852{
853 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
1080 u64 raw_nsecs; 854 u64 raw_nsecs;
1081 855
1082 /* If the offset is smaller then a shifted interval, do nothing */ 856 /* If the offset is smaller then a shifted interval, do nothing */
1083 if (offset < tk->cycle_interval<<shift) 857 if (offset < timekeeper.cycle_interval<<shift)
1084 return offset; 858 return offset;
1085 859
1086 /* Accumulate one shifted interval */ 860 /* Accumulate one shifted interval */
1087 offset -= tk->cycle_interval << shift; 861 offset -= timekeeper.cycle_interval << shift;
1088 tk->clock->cycle_last += tk->cycle_interval << shift; 862 timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
1089 863
1090 tk->xtime_nsec += tk->xtime_interval << shift; 864 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
1091 accumulate_nsecs_to_secs(tk); 865 while (timekeeper.xtime_nsec >= nsecps) {
866 timekeeper.xtime_nsec -= nsecps;
867 xtime.tv_sec++;
868 second_overflow();
869 }
1092 870
1093 /* Accumulate raw time */ 871 /* Accumulate raw time */
1094 raw_nsecs = (u64)tk->raw_interval << shift; 872 raw_nsecs = timekeeper.raw_interval << shift;
1095 raw_nsecs += tk->raw_time.tv_nsec; 873 raw_nsecs += raw_time.tv_nsec;
1096 if (raw_nsecs >= NSEC_PER_SEC) { 874 if (raw_nsecs >= NSEC_PER_SEC) {
1097 u64 raw_secs = raw_nsecs; 875 u64 raw_secs = raw_nsecs;
1098 raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); 876 raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
1099 tk->raw_time.tv_sec += raw_secs; 877 raw_time.tv_sec += raw_secs;
1100 } 878 }
1101 tk->raw_time.tv_nsec = raw_nsecs; 879 raw_time.tv_nsec = raw_nsecs;
1102 880
1103 /* Accumulate error between NTP and clock interval */ 881 /* Accumulate error between NTP and clock interval */
1104 tk->ntp_error += ntp_tick_length() << shift; 882 timekeeper.ntp_error += tick_length << shift;
1105 tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) << 883 timekeeper.ntp_error -=
1106 (tk->ntp_error_shift + shift); 884 (timekeeper.xtime_interval + timekeeper.xtime_remainder) <<
885 (timekeeper.ntp_error_shift + shift);
1107 886
1108 return offset; 887 return offset;
1109} 888}
1110 889
1111#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
1112static inline void old_vsyscall_fixup(struct timekeeper *tk)
1113{
1114 s64 remainder;
1115
1116 /*
1117 * Store only full nanoseconds into xtime_nsec after rounding
1118 * it up and add the remainder to the error difference.
1119 * XXX - This is necessary to avoid small 1ns inconsistnecies caused
1120 * by truncating the remainder in vsyscalls. However, it causes
1121 * additional work to be done in timekeeping_adjust(). Once
1122 * the vsyscall implementations are converted to use xtime_nsec
1123 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
1124 * users are removed, this can be killed.
1125 */
1126 remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1127 tk->xtime_nsec -= remainder;
1128 tk->xtime_nsec += 1ULL << tk->shift;
1129 tk->ntp_error += remainder << tk->ntp_error_shift;
1130
1131}
1132#else
1133#define old_vsyscall_fixup(tk)
1134#endif
1135
1136
1137 890
1138/** 891/**
1139 * update_wall_time - Uses the current clocksource to increment the wall time 892 * update_wall_time - Uses the current clocksource to increment the wall time
1140 * 893 *
894 * Called from the timer interrupt, must hold a write on xtime_lock.
1141 */ 895 */
1142static void update_wall_time(void) 896static void update_wall_time(void)
1143{ 897{
1144 struct clocksource *clock; 898 struct clocksource *clock;
1145 struct timekeeper *tk = &timekeeper;
1146 cycle_t offset; 899 cycle_t offset;
1147 int shift = 0, maxshift; 900 int shift = 0, maxshift;
1148 unsigned long flags;
1149
1150 write_seqlock_irqsave(&tk->lock, flags);
1151 901
1152 /* Make sure we're fully resumed: */ 902 /* Make sure we're fully resumed: */
1153 if (unlikely(timekeeping_suspended)) 903 if (unlikely(timekeeping_suspended))
1154 goto out; 904 return;
1155 905
1156 clock = tk->clock; 906 clock = timekeeper.clock;
1157 907
1158#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 908#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1159 offset = tk->cycle_interval; 909 offset = timekeeper.cycle_interval;
1160#else 910#else
1161 offset = (clock->read(clock) - clock->cycle_last) & clock->mask; 911 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1162#endif 912#endif
1163 913 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
1164 /* Check if there's really nothing to do */
1165 if (offset < tk->cycle_interval)
1166 goto out;
1167 914
1168 /* 915 /*
1169 * With NO_HZ we may have to accumulate many cycle_intervals 916 * With NO_HZ we may have to accumulate many cycle_intervals
1170 * (think "ticks") worth of time at once. To do this efficiently, 917 * (think "ticks") worth of time at once. To do this efficiently,
1171 * we calculate the largest doubling multiple of cycle_intervals 918 * we calculate the largest doubling multiple of cycle_intervals
1172 * that is smaller than the offset. We then accumulate that 919 * that is smaller then the offset. We then accumulate that
1173 * chunk in one go, and then try to consume the next smaller 920 * chunk in one go, and then try to consume the next smaller
1174 * doubled multiple. 921 * doubled multiple.
1175 */ 922 */
1176 shift = ilog2(offset) - ilog2(tk->cycle_interval); 923 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
1177 shift = max(0, shift); 924 shift = max(0, shift);
1178 /* Bound shift to one less than what overflows tick_length */ 925 /* Bound shift to one less then what overflows tick_length */
1179 maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1; 926 maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
1180 shift = min(shift, maxshift); 927 shift = min(shift, maxshift);
1181 while (offset >= tk->cycle_interval) { 928 while (offset >= timekeeper.cycle_interval) {
1182 offset = logarithmic_accumulation(tk, offset, shift); 929 offset = logarithmic_accumulation(offset, shift);
1183 if (offset < tk->cycle_interval<<shift) 930 if(offset < timekeeper.cycle_interval<<shift)
1184 shift--; 931 shift--;
1185 } 932 }
1186 933
1187 /* correct the clock when NTP error is too big */ 934 /* correct the clock when NTP error is too big */
1188 timekeeping_adjust(tk, offset); 935 timekeeping_adjust(offset);
1189 936
1190 /* 937 /*
1191 * XXX This can be killed once everyone converts 938 * Since in the loop above, we accumulate any amount of time
1192 * to the new update_vsyscall. 939 * in xtime_nsec over a second into xtime.tv_sec, its possible for
940 * xtime_nsec to be fairly small after the loop. Further, if we're
941 * slightly speeding the clocksource up in timekeeping_adjust(),
942 * its possible the required corrective factor to xtime_nsec could
943 * cause it to underflow.
944 *
945 * Now, we cannot simply roll the accumulated second back, since
946 * the NTP subsystem has been notified via second_overflow. So
947 * instead we push xtime_nsec forward by the amount we underflowed,
948 * and add that amount into the error.
949 *
950 * We'll correct this error next time through this function, when
951 * xtime_nsec is not as small.
1193 */ 952 */
1194 old_vsyscall_fixup(tk); 953 if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
954 s64 neg = -(s64)timekeeper.xtime_nsec;
955 timekeeper.xtime_nsec = 0;
956 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
957 }
958
1195 959
1196 /* 960 /*
1197 * Finally, make sure that after the rounding 961 * Store full nanoseconds into xtime after rounding it up and
1198 * xtime_nsec isn't larger than NSEC_PER_SEC 962 * add the remainder to the error difference.
1199 */ 963 */
1200 accumulate_nsecs_to_secs(tk); 964 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
965 timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift;
966 timekeeper.ntp_error += timekeeper.xtime_nsec <<
967 timekeeper.ntp_error_shift;
1201 968
1202 timekeeping_update(tk, false); 969 /*
1203 970 * Finally, make sure that after the rounding
1204out: 971 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
1205 write_sequnlock_irqrestore(&tk->lock, flags); 972 */
973 if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
974 xtime.tv_nsec -= NSEC_PER_SEC;
975 xtime.tv_sec++;
976 second_overflow();
977 }
1206 978
979 /* check to see if there is a new clocksource to use */
980 update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
981 timekeeper.mult);
1207} 982}
1208 983
1209/** 984/**
@@ -1219,18 +994,16 @@ out:
1219 */ 994 */
1220void getboottime(struct timespec *ts) 995void getboottime(struct timespec *ts)
1221{ 996{
1222 struct timekeeper *tk = &timekeeper;
1223 struct timespec boottime = { 997 struct timespec boottime = {
1224 .tv_sec = tk->wall_to_monotonic.tv_sec + 998 .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
1225 tk->total_sleep_time.tv_sec, 999 .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
1226 .tv_nsec = tk->wall_to_monotonic.tv_nsec +
1227 tk->total_sleep_time.tv_nsec
1228 }; 1000 };
1229 1001
1230 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec); 1002 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
1231} 1003}
1232EXPORT_SYMBOL_GPL(getboottime); 1004EXPORT_SYMBOL_GPL(getboottime);
1233 1005
1006
1234/** 1007/**
1235 * get_monotonic_boottime - Returns monotonic time since boot 1008 * get_monotonic_boottime - Returns monotonic time since boot
1236 * @ts: pointer to the timespec to be set 1009 * @ts: pointer to the timespec to be set
@@ -1242,25 +1015,23 @@ EXPORT_SYMBOL_GPL(getboottime);
1242 */ 1015 */
1243void get_monotonic_boottime(struct timespec *ts) 1016void get_monotonic_boottime(struct timespec *ts)
1244{ 1017{
1245 struct timekeeper *tk = &timekeeper;
1246 struct timespec tomono, sleep; 1018 struct timespec tomono, sleep;
1247 s64 nsec;
1248 unsigned int seq; 1019 unsigned int seq;
1020 s64 nsecs;
1249 1021
1250 WARN_ON(timekeeping_suspended); 1022 WARN_ON(timekeeping_suspended);
1251 1023
1252 do { 1024 do {
1253 seq = read_seqbegin(&tk->lock); 1025 seq = read_seqbegin(&xtime_lock);
1254 ts->tv_sec = tk->xtime_sec; 1026 *ts = xtime;
1255 nsec = timekeeping_get_ns(tk); 1027 tomono = wall_to_monotonic;
1256 tomono = tk->wall_to_monotonic; 1028 sleep = total_sleep_time;
1257 sleep = tk->total_sleep_time; 1029 nsecs = timekeeping_get_ns();
1258 1030
1259 } while (read_seqretry(&tk->lock, seq)); 1031 } while (read_seqretry(&xtime_lock, seq));
1260 1032
1261 ts->tv_sec += tomono.tv_sec + sleep.tv_sec; 1033 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec + sleep.tv_sec,
1262 ts->tv_nsec = 0; 1034 ts->tv_nsec + tomono.tv_nsec + sleep.tv_nsec + nsecs);
1263 timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec);
1264} 1035}
1265EXPORT_SYMBOL_GPL(get_monotonic_boottime); 1036EXPORT_SYMBOL_GPL(get_monotonic_boottime);
1266 1037
@@ -1287,38 +1058,31 @@ EXPORT_SYMBOL_GPL(ktime_get_boottime);
1287 */ 1058 */
1288void monotonic_to_bootbased(struct timespec *ts) 1059void monotonic_to_bootbased(struct timespec *ts)
1289{ 1060{
1290 struct timekeeper *tk = &timekeeper; 1061 *ts = timespec_add(*ts, total_sleep_time);
1291
1292 *ts = timespec_add(*ts, tk->total_sleep_time);
1293} 1062}
1294EXPORT_SYMBOL_GPL(monotonic_to_bootbased); 1063EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
1295 1064
1296unsigned long get_seconds(void) 1065unsigned long get_seconds(void)
1297{ 1066{
1298 struct timekeeper *tk = &timekeeper; 1067 return xtime.tv_sec;
1299
1300 return tk->xtime_sec;
1301} 1068}
1302EXPORT_SYMBOL(get_seconds); 1069EXPORT_SYMBOL(get_seconds);
1303 1070
1304struct timespec __current_kernel_time(void) 1071struct timespec __current_kernel_time(void)
1305{ 1072{
1306 struct timekeeper *tk = &timekeeper; 1073 return xtime;
1307
1308 return tk_xtime(tk);
1309} 1074}
1310 1075
1311struct timespec current_kernel_time(void) 1076struct timespec current_kernel_time(void)
1312{ 1077{
1313 struct timekeeper *tk = &timekeeper;
1314 struct timespec now; 1078 struct timespec now;
1315 unsigned long seq; 1079 unsigned long seq;
1316 1080
1317 do { 1081 do {
1318 seq = read_seqbegin(&tk->lock); 1082 seq = read_seqbegin(&xtime_lock);
1319 1083
1320 now = tk_xtime(tk); 1084 now = xtime;
1321 } while (read_seqretry(&tk->lock, seq)); 1085 } while (read_seqretry(&xtime_lock, seq));
1322 1086
1323 return now; 1087 return now;
1324} 1088}
@@ -1326,16 +1090,15 @@ EXPORT_SYMBOL(current_kernel_time);
1326 1090
1327struct timespec get_monotonic_coarse(void) 1091struct timespec get_monotonic_coarse(void)
1328{ 1092{
1329 struct timekeeper *tk = &timekeeper;
1330 struct timespec now, mono; 1093 struct timespec now, mono;
1331 unsigned long seq; 1094 unsigned long seq;
1332 1095
1333 do { 1096 do {
1334 seq = read_seqbegin(&tk->lock); 1097 seq = read_seqbegin(&xtime_lock);
1335 1098
1336 now = tk_xtime(tk); 1099 now = xtime;
1337 mono = tk->wall_to_monotonic; 1100 mono = wall_to_monotonic;
1338 } while (read_seqretry(&tk->lock, seq)); 1101 } while (read_seqretry(&xtime_lock, seq));
1339 1102
1340 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec, 1103 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1341 now.tv_nsec + mono.tv_nsec); 1104 now.tv_nsec + mono.tv_nsec);
@@ -1343,7 +1106,9 @@ struct timespec get_monotonic_coarse(void)
1343} 1106}
1344 1107
1345/* 1108/*
1346 * Must hold jiffies_lock 1109 * The 64-bit jiffies value is not atomic - you MUST NOT read it
1110 * without sampling the sequence number in xtime_lock.
1111 * jiffies is defined in the linker script...
1347 */ 1112 */
1348void do_timer(unsigned long ticks) 1113void do_timer(unsigned long ticks)
1349{ 1114{
@@ -1362,66 +1127,30 @@ void do_timer(unsigned long ticks)
1362void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, 1127void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim,
1363 struct timespec *wtom, struct timespec *sleep) 1128 struct timespec *wtom, struct timespec *sleep)
1364{ 1129{
1365 struct timekeeper *tk = &timekeeper;
1366 unsigned long seq; 1130 unsigned long seq;
1367 1131
1368 do { 1132 do {
1369 seq = read_seqbegin(&tk->lock); 1133 seq = read_seqbegin(&xtime_lock);
1370 *xtim = tk_xtime(tk); 1134 *xtim = xtime;
1371 *wtom = tk->wall_to_monotonic; 1135 *wtom = wall_to_monotonic;
1372 *sleep = tk->total_sleep_time; 1136 *sleep = total_sleep_time;
1373 } while (read_seqretry(&tk->lock, seq)); 1137 } while (read_seqretry(&xtime_lock, seq));
1374}
1375
1376#ifdef CONFIG_HIGH_RES_TIMERS
1377/**
1378 * ktime_get_update_offsets - hrtimer helper
1379 * @offs_real: pointer to storage for monotonic -> realtime offset
1380 * @offs_boot: pointer to storage for monotonic -> boottime offset
1381 *
1382 * Returns current monotonic time and updates the offsets
1383 * Called from hrtimer_interupt() or retrigger_next_event()
1384 */
1385ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot)
1386{
1387 struct timekeeper *tk = &timekeeper;
1388 ktime_t now;
1389 unsigned int seq;
1390 u64 secs, nsecs;
1391
1392 do {
1393 seq = read_seqbegin(&tk->lock);
1394
1395 secs = tk->xtime_sec;
1396 nsecs = timekeeping_get_ns(tk);
1397
1398 *offs_real = tk->offs_real;
1399 *offs_boot = tk->offs_boot;
1400 } while (read_seqretry(&tk->lock, seq));
1401
1402 now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1403 now = ktime_sub(now, *offs_real);
1404 return now;
1405} 1138}
1406#endif
1407 1139
1408/** 1140/**
1409 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format 1141 * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format
1410 */ 1142 */
1411ktime_t ktime_get_monotonic_offset(void) 1143ktime_t ktime_get_monotonic_offset(void)
1412{ 1144{
1413 struct timekeeper *tk = &timekeeper;
1414 unsigned long seq; 1145 unsigned long seq;
1415 struct timespec wtom; 1146 struct timespec wtom;
1416 1147
1417 do { 1148 do {
1418 seq = read_seqbegin(&tk->lock); 1149 seq = read_seqbegin(&xtime_lock);
1419 wtom = tk->wall_to_monotonic; 1150 wtom = wall_to_monotonic;
1420 } while (read_seqretry(&tk->lock, seq)); 1151 } while (read_seqretry(&xtime_lock, seq));
1421
1422 return timespec_to_ktime(wtom); 1152 return timespec_to_ktime(wtom);
1423} 1153}
1424EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1425 1154
1426/** 1155/**
1427 * xtime_update() - advances the timekeeping infrastructure 1156 * xtime_update() - advances the timekeeping infrastructure
@@ -1431,7 +1160,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset);
1431 */ 1160 */
1432void xtime_update(unsigned long ticks) 1161void xtime_update(unsigned long ticks)
1433{ 1162{
1434 write_seqlock(&jiffies_lock); 1163 write_seqlock(&xtime_lock);
1435 do_timer(ticks); 1164 do_timer(ticks);
1436 write_sequnlock(&jiffies_lock); 1165 write_sequnlock(&xtime_lock);
1437} 1166}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index af5a7e9f164..3258455549f 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -167,7 +167,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
167 { 167 {
168 struct tick_sched *ts = tick_get_tick_sched(cpu); 168 struct tick_sched *ts = tick_get_tick_sched(cpu);
169 P(nohz_mode); 169 P(nohz_mode);
170 P_ns(last_tick); 170 P_ns(idle_tick);
171 P(tick_stopped); 171 P(tick_stopped);
172 P(idle_jiffies); 172 P(idle_jiffies);
173 P(idle_calls); 173 P(idle_calls);
@@ -259,7 +259,7 @@ static int timer_list_show(struct seq_file *m, void *v)
259 u64 now = ktime_to_ns(ktime_get()); 259 u64 now = ktime_to_ns(ktime_get());
260 int cpu; 260 int cpu;
261 261
262 SEQ_printf(m, "Timer List Version: v0.7\n"); 262 SEQ_printf(m, "Timer List Version: v0.6\n");
263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 263 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 264 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
265 265
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 0b537f27b55..a5d0a3a85dd 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -81,7 +81,7 @@ struct entry {
81/* 81/*
82 * Spinlock protecting the tables - not taken during lookup: 82 * Spinlock protecting the tables - not taken during lookup:
83 */ 83 */
84static DEFINE_RAW_SPINLOCK(table_lock); 84static DEFINE_SPINLOCK(table_lock);
85 85
86/* 86/*
87 * Per-CPU lookup locks for fast hash lookup: 87 * Per-CPU lookup locks for fast hash lookup:
@@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
188 prev = NULL; 188 prev = NULL;
189 curr = *head; 189 curr = *head;
190 190
191 raw_spin_lock(&table_lock); 191 spin_lock(&table_lock);
192 /* 192 /*
193 * Make sure we have not raced with another CPU: 193 * Make sure we have not raced with another CPU:
194 */ 194 */
@@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct entry *entry, char *comm)
215 *head = curr; 215 *head = curr;
216 } 216 }
217 out_unlock: 217 out_unlock:
218 raw_spin_unlock(&table_lock); 218 spin_unlock(&table_lock);
219 219
220 return curr; 220 return curr;
221} 221}