aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Makefile2
-rw-r--r--kernel/time/clockevents.c32
-rw-r--r--kernel/time/clocksource.c619
-rw-r--r--kernel/time/jiffies.c8
-rw-r--r--kernel/time/ntp.c7
-rw-r--r--kernel/time/tick-broadcast.c9
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/tick-oneshot.c21
-rw-r--r--kernel/time/tick-sched.c169
-rw-r--r--kernel/time/timecompare.c6
-rw-r--r--kernel/time/timeconv.c127
-rw-r--r--kernel/time/timekeeping.c616
-rw-r--r--kernel/time/timer_list.c14
-rw-r--r--kernel/time/timer_stats.c18
14 files changed, 1183 insertions, 477 deletions
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 0b0a6366c9d4..ee266620b06c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o 1obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
2 2
3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o 3obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o 4obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a790..20a8920029ee 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,9 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
22
23#include "tick-internal.h"
21 24
22/* The registered clock event devices */ 25/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 26static LIST_HEAD(clockevent_devices);
@@ -36,10 +39,9 @@ static DEFINE_SPINLOCK(clockevents_lock);
36 * 39 *
37 * Math helper, returns latch value converted to nanoseconds (bound checked) 40 * Math helper, returns latch value converted to nanoseconds (bound checked)
38 */ 41 */
39unsigned long clockevent_delta2ns(unsigned long latch, 42u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
40 struct clock_event_device *evt)
41{ 43{
42 u64 clc = ((u64) latch << evt->shift); 44 u64 clc = (u64) latch << evt->shift;
43 45
44 if (unlikely(!evt->mult)) { 46 if (unlikely(!evt->mult)) {
45 evt->mult = 1; 47 evt->mult = 1;
@@ -49,11 +51,12 @@ unsigned long clockevent_delta2ns(unsigned long latch,
49 do_div(clc, evt->mult); 51 do_div(clc, evt->mult);
50 if (clc < 1000) 52 if (clc < 1000)
51 clc = 1000; 53 clc = 1000;
52 if (clc > LONG_MAX) 54 if (clc > KTIME_MAX)
53 clc = LONG_MAX; 55 clc = KTIME_MAX;
54 56
55 return (unsigned long) clc; 57 return clc;
56} 58}
59EXPORT_SYMBOL_GPL(clockevent_delta2ns);
57 60
58/** 61/**
59 * clockevents_set_mode - set the operating mode of a clock event device 62 * clockevents_set_mode - set the operating mode of a clock event device
@@ -135,11 +138,12 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
135 */ 138 */
136int clockevents_register_notifier(struct notifier_block *nb) 139int clockevents_register_notifier(struct notifier_block *nb)
137{ 140{
141 unsigned long flags;
138 int ret; 142 int ret;
139 143
140 spin_lock(&clockevents_lock); 144 spin_lock_irqsave(&clockevents_lock, flags);
141 ret = raw_notifier_chain_register(&clockevents_chain, nb); 145 ret = raw_notifier_chain_register(&clockevents_chain, nb);
142 spin_unlock(&clockevents_lock); 146 spin_unlock_irqrestore(&clockevents_lock, flags);
143 147
144 return ret; 148 return ret;
145} 149}
@@ -176,17 +180,20 @@ static void clockevents_notify_released(void)
176 */ 180 */
177void clockevents_register_device(struct clock_event_device *dev) 181void clockevents_register_device(struct clock_event_device *dev)
178{ 182{
183 unsigned long flags;
184
179 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 185 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
180 BUG_ON(!dev->cpumask); 186 BUG_ON(!dev->cpumask);
181 187
182 spin_lock(&clockevents_lock); 188 spin_lock_irqsave(&clockevents_lock, flags);
183 189
184 list_add(&dev->list, &clockevent_devices); 190 list_add(&dev->list, &clockevent_devices);
185 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); 191 clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
186 clockevents_notify_released(); 192 clockevents_notify_released();
187 193
188 spin_unlock(&clockevents_lock); 194 spin_unlock_irqrestore(&clockevents_lock, flags);
189} 195}
196EXPORT_SYMBOL_GPL(clockevents_register_device);
190 197
191/* 198/*
192 * Noop handler when we shut down an event device 199 * Noop handler when we shut down an event device
@@ -232,8 +239,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
232void clockevents_notify(unsigned long reason, void *arg) 239void clockevents_notify(unsigned long reason, void *arg)
233{ 240{
234 struct list_head *node, *tmp; 241 struct list_head *node, *tmp;
242 unsigned long flags;
235 243
236 spin_lock(&clockevents_lock); 244 spin_lock_irqsave(&clockevents_lock, flags);
237 clockevents_do_notify(reason, arg); 245 clockevents_do_notify(reason, arg);
238 246
239 switch (reason) { 247 switch (reason) {
@@ -248,7 +256,7 @@ void clockevents_notify(unsigned long reason, void *arg)
248 default: 256 default:
249 break; 257 break;
250 } 258 }
251 spin_unlock(&clockevents_lock); 259 spin_unlock_irqrestore(&clockevents_lock, flags);
252} 260}
253EXPORT_SYMBOL_GPL(clockevents_notify); 261EXPORT_SYMBOL_GPL(clockevents_notify);
254#endif 262#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c46c931a7fe7..d422c7b2236b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -21,7 +21,6 @@
21 * 21 *
22 * TODO WishList: 22 * TODO WishList:
23 * o Allow clocksource drivers to be unregistered 23 * o Allow clocksource drivers to be unregistered
24 * o get rid of clocksource_jiffies extern
25 */ 24 */
26 25
27#include <linux/clocksource.h> 26#include <linux/clocksource.h>
@@ -30,6 +29,7 @@
30#include <linux/module.h> 29#include <linux/module.h>
31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 30#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
32#include <linux/tick.h> 31#include <linux/tick.h>
32#include <linux/kthread.h>
33 33
34void timecounter_init(struct timecounter *tc, 34void timecounter_init(struct timecounter *tc,
35 const struct cyclecounter *cc, 35 const struct cyclecounter *cc,
@@ -39,7 +39,7 @@ void timecounter_init(struct timecounter *tc,
39 tc->cycle_last = cc->read(cc); 39 tc->cycle_last = cc->read(cc);
40 tc->nsec = start_tstamp; 40 tc->nsec = start_tstamp;
41} 41}
42EXPORT_SYMBOL(timecounter_init); 42EXPORT_SYMBOL_GPL(timecounter_init);
43 43
44/** 44/**
45 * timecounter_read_delta - get nanoseconds since last call of this function 45 * timecounter_read_delta - get nanoseconds since last call of this function
@@ -83,7 +83,7 @@ u64 timecounter_read(struct timecounter *tc)
83 83
84 return nsec; 84 return nsec;
85} 85}
86EXPORT_SYMBOL(timecounter_read); 86EXPORT_SYMBOL_GPL(timecounter_read);
87 87
88u64 timecounter_cyc2time(struct timecounter *tc, 88u64 timecounter_cyc2time(struct timecounter *tc,
89 cycle_t cycle_tstamp) 89 cycle_t cycle_tstamp)
@@ -105,52 +105,90 @@ u64 timecounter_cyc2time(struct timecounter *tc,
105 105
106 return nsec; 106 return nsec;
107} 107}
108EXPORT_SYMBOL(timecounter_cyc2time); 108EXPORT_SYMBOL_GPL(timecounter_cyc2time);
109 109
110/* XXX - Would like a better way for initializing curr_clocksource */ 110/**
111extern struct clocksource clocksource_jiffies; 111 * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
112 * @mult: pointer to mult variable
113 * @shift: pointer to shift variable
114 * @from: frequency to convert from
115 * @to: frequency to convert to
116 * @minsec: guaranteed runtime conversion range in seconds
117 *
118 * The function evaluates the shift/mult pair for the scaled math
119 * operations of clocksources and clockevents.
120 *
121 * @to and @from are frequency values in HZ. For clock sources @to is
122 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
123 * event @to is the counter frequency and @from is NSEC_PER_SEC.
124 *
125 * The @minsec conversion range argument controls the time frame in
126 * seconds which must be covered by the runtime conversion with the
127 * calculated mult and shift factors. This guarantees that no 64bit
128 * overflow happens when the input value of the conversion is
129 * multiplied with the calculated mult factor. Larger ranges may
130 * reduce the conversion accuracy by chosing smaller mult and shift
131 * factors.
132 */
133void
134clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
135{
136 u64 tmp;
137 u32 sft, sftacc= 32;
138
139 /*
140 * Calculate the shift factor which is limiting the conversion
141 * range:
142 */
143 tmp = ((u64)minsec * from) >> 32;
144 while (tmp) {
145 tmp >>=1;
146 sftacc--;
147 }
148
149 /*
150 * Find the conversion shift/mult pair which has the best
151 * accuracy and fits the maxsec conversion range:
152 */
153 for (sft = 32; sft > 0; sft--) {
154 tmp = (u64) to << sft;
155 do_div(tmp, from);
156 if ((tmp >> sftacc) == 0)
157 break;
158 }
159 *mult = tmp;
160 *shift = sft;
161}
112 162
113/*[Clocksource internal variables]--------- 163/*[Clocksource internal variables]---------
114 * curr_clocksource: 164 * curr_clocksource:
115 * currently selected clocksource. Initialized to clocksource_jiffies. 165 * currently selected clocksource.
116 * next_clocksource:
117 * pending next selected clocksource.
118 * clocksource_list: 166 * clocksource_list:
119 * linked list with the registered clocksources 167 * linked list with the registered clocksources
120 * clocksource_lock: 168 * clocksource_mutex:
121 * protects manipulations to curr_clocksource and next_clocksource 169 * protects manipulations to curr_clocksource and the clocksource_list
122 * and the clocksource_list
123 * override_name: 170 * override_name:
124 * Name of the user-specified clocksource. 171 * Name of the user-specified clocksource.
125 */ 172 */
126static struct clocksource *curr_clocksource = &clocksource_jiffies; 173static struct clocksource *curr_clocksource;
127static struct clocksource *next_clocksource;
128static struct clocksource *clocksource_override;
129static LIST_HEAD(clocksource_list); 174static LIST_HEAD(clocksource_list);
130static DEFINE_SPINLOCK(clocksource_lock); 175static DEFINE_MUTEX(clocksource_mutex);
131static char override_name[32]; 176static char override_name[32];
132static int finished_booting; 177static int finished_booting;
133 178
134/* clocksource_done_booting - Called near the end of core bootup
135 *
136 * Hack to avoid lots of clocksource churn at boot time.
137 * We use fs_initcall because we want this to start before
138 * device_initcall but after subsys_initcall.
139 */
140static int __init clocksource_done_booting(void)
141{
142 finished_booting = 1;
143 return 0;
144}
145fs_initcall(clocksource_done_booting);
146
147#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 179#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
180static void clocksource_watchdog_work(struct work_struct *work);
181
148static LIST_HEAD(watchdog_list); 182static LIST_HEAD(watchdog_list);
149static struct clocksource *watchdog; 183static struct clocksource *watchdog;
150static struct timer_list watchdog_timer; 184static struct timer_list watchdog_timer;
185static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
151static DEFINE_SPINLOCK(watchdog_lock); 186static DEFINE_SPINLOCK(watchdog_lock);
152static cycle_t watchdog_last; 187static cycle_t watchdog_last;
153static unsigned long watchdog_resumed; 188static int watchdog_running;
189
190static int clocksource_watchdog_kthread(void *data);
191static void __clocksource_change_rating(struct clocksource *cs, int rating);
154 192
155/* 193/*
156 * Interval: 0.5sec Threshold: 0.0625s 194 * Interval: 0.5sec Threshold: 0.0625s
@@ -158,135 +196,249 @@ static unsigned long watchdog_resumed;
158#define WATCHDOG_INTERVAL (HZ >> 1) 196#define WATCHDOG_INTERVAL (HZ >> 1)
159#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 197#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
160 198
161static void clocksource_ratewd(struct clocksource *cs, int64_t delta) 199static void clocksource_watchdog_work(struct work_struct *work)
162{ 200{
163 if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) 201 /*
164 return; 202 * If kthread_run fails the next watchdog scan over the
203 * watchdog_list will find the unstable clock again.
204 */
205 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
206}
165 207
208static void __clocksource_unstable(struct clocksource *cs)
209{
210 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
211 cs->flags |= CLOCK_SOURCE_UNSTABLE;
212 if (finished_booting)
213 schedule_work(&watchdog_work);
214}
215
216static void clocksource_unstable(struct clocksource *cs, int64_t delta)
217{
166 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", 218 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
167 cs->name, delta); 219 cs->name, delta);
168 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 220 __clocksource_unstable(cs);
169 clocksource_change_rating(cs, 0); 221}
170 list_del(&cs->wd_list); 222
223/**
224 * clocksource_mark_unstable - mark clocksource unstable via watchdog
225 * @cs: clocksource to be marked unstable
226 *
227 * This function is called instead of clocksource_change_rating from
228 * cpu hotplug code to avoid a deadlock between the clocksource mutex
229 * and the cpu hotplug mutex. It defers the update of the clocksource
230 * to the watchdog thread.
231 */
232void clocksource_mark_unstable(struct clocksource *cs)
233{
234 unsigned long flags;
235
236 spin_lock_irqsave(&watchdog_lock, flags);
237 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
238 if (list_empty(&cs->wd_list))
239 list_add(&cs->wd_list, &watchdog_list);
240 __clocksource_unstable(cs);
241 }
242 spin_unlock_irqrestore(&watchdog_lock, flags);
171} 243}
172 244
173static void clocksource_watchdog(unsigned long data) 245static void clocksource_watchdog(unsigned long data)
174{ 246{
175 struct clocksource *cs, *tmp; 247 struct clocksource *cs;
176 cycle_t csnow, wdnow; 248 cycle_t csnow, wdnow;
177 int64_t wd_nsec, cs_nsec; 249 int64_t wd_nsec, cs_nsec;
178 int resumed; 250 int next_cpu;
179 251
180 spin_lock(&watchdog_lock); 252 spin_lock(&watchdog_lock);
253 if (!watchdog_running)
254 goto out;
181 255
182 resumed = test_and_clear_bit(0, &watchdog_resumed); 256 wdnow = watchdog->read(watchdog);
183 257 wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
184 wdnow = watchdog->read(); 258 watchdog->mult, watchdog->shift);
185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
186 watchdog_last = wdnow; 259 watchdog_last = wdnow;
187 260
188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 261 list_for_each_entry(cs, &watchdog_list, wd_list) {
189 csnow = cs->read();
190 262
191 if (unlikely(resumed)) { 263 /* Clocksource already marked unstable? */
192 cs->wd_last = csnow; 264 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
265 if (finished_booting)
266 schedule_work(&watchdog_work);
193 continue; 267 continue;
194 } 268 }
195 269
196 /* Initialized ? */ 270 csnow = cs->read(cs);
271
272 /* Clocksource initialized ? */
197 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { 273 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
198 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
199 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
200 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
201 /*
202 * We just marked the clocksource as
203 * highres-capable, notify the rest of the
204 * system as well so that we transition
205 * into high-res mode:
206 */
207 tick_clock_notify();
208 }
209 cs->flags |= CLOCK_SOURCE_WATCHDOG; 274 cs->flags |= CLOCK_SOURCE_WATCHDOG;
210 cs->wd_last = csnow; 275 cs->wd_last = csnow;
211 } else { 276 continue;
212 cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
213 cs->wd_last = csnow;
214 /* Check the delta. Might remove from the list ! */
215 clocksource_ratewd(cs, cs_nsec - wd_nsec);
216 } 277 }
217 }
218 278
219 if (!list_empty(&watchdog_list)) { 279 /* Check the deviation from the watchdog clocksource. */
220 /* 280 cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
221 * Cycle through CPUs to check if the CPUs stay 281 cs->mask, cs->mult, cs->shift);
222 * synchronized to each other. 282 cs->wd_last = csnow;
223 */ 283 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
224 int next_cpu = cpumask_next(raw_smp_processor_id(), 284 clocksource_unstable(cs, cs_nsec - wd_nsec);
225 cpu_online_mask); 285 continue;
286 }
226 287
227 if (next_cpu >= nr_cpu_ids) 288 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
228 next_cpu = cpumask_first(cpu_online_mask); 289 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
229 watchdog_timer.expires += WATCHDOG_INTERVAL; 290 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
230 add_timer_on(&watchdog_timer, next_cpu); 291 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
292 /*
293 * We just marked the clocksource as highres-capable,
294 * notify the rest of the system as well so that we
295 * transition into high-res mode:
296 */
297 tick_clock_notify();
298 }
231 } 299 }
300
301 /*
302 * Cycle through CPUs to check if the CPUs stay synchronized
303 * to each other.
304 */
305 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
306 if (next_cpu >= nr_cpu_ids)
307 next_cpu = cpumask_first(cpu_online_mask);
308 watchdog_timer.expires += WATCHDOG_INTERVAL;
309 add_timer_on(&watchdog_timer, next_cpu);
310out:
232 spin_unlock(&watchdog_lock); 311 spin_unlock(&watchdog_lock);
233} 312}
313
314static inline void clocksource_start_watchdog(void)
315{
316 if (watchdog_running || !watchdog || list_empty(&watchdog_list))
317 return;
318 init_timer(&watchdog_timer);
319 watchdog_timer.function = clocksource_watchdog;
320 watchdog_last = watchdog->read(watchdog);
321 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
322 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
323 watchdog_running = 1;
324}
325
326static inline void clocksource_stop_watchdog(void)
327{
328 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
329 return;
330 del_timer(&watchdog_timer);
331 watchdog_running = 0;
332}
333
334static inline void clocksource_reset_watchdog(void)
335{
336 struct clocksource *cs;
337
338 list_for_each_entry(cs, &watchdog_list, wd_list)
339 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
340}
341
234static void clocksource_resume_watchdog(void) 342static void clocksource_resume_watchdog(void)
235{ 343{
236 set_bit(0, &watchdog_resumed); 344 unsigned long flags;
345
346 spin_lock_irqsave(&watchdog_lock, flags);
347 clocksource_reset_watchdog();
348 spin_unlock_irqrestore(&watchdog_lock, flags);
237} 349}
238 350
239static void clocksource_check_watchdog(struct clocksource *cs) 351static void clocksource_enqueue_watchdog(struct clocksource *cs)
240{ 352{
241 struct clocksource *cse;
242 unsigned long flags; 353 unsigned long flags;
243 354
244 spin_lock_irqsave(&watchdog_lock, flags); 355 spin_lock_irqsave(&watchdog_lock, flags);
245 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 356 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
246 int started = !list_empty(&watchdog_list); 357 /* cs is a clocksource to be watched. */
247
248 list_add(&cs->wd_list, &watchdog_list); 358 list_add(&cs->wd_list, &watchdog_list);
249 if (!started && watchdog) { 359 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
250 watchdog_last = watchdog->read();
251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
252 add_timer_on(&watchdog_timer,
253 cpumask_first(cpu_online_mask));
254 }
255 } else { 360 } else {
361 /* cs is a watchdog. */
256 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 362 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
257 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 363 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
258 364 /* Pick the best watchdog. */
259 if (!watchdog || cs->rating > watchdog->rating) { 365 if (!watchdog || cs->rating > watchdog->rating) {
260 if (watchdog)
261 del_timer(&watchdog_timer);
262 watchdog = cs; 366 watchdog = cs;
263 init_timer(&watchdog_timer);
264 watchdog_timer.function = clocksource_watchdog;
265
266 /* Reset watchdog cycles */ 367 /* Reset watchdog cycles */
267 list_for_each_entry(cse, &watchdog_list, wd_list) 368 clocksource_reset_watchdog();
268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG; 369 }
269 /* Start if list is not empty */ 370 }
270 if (!list_empty(&watchdog_list)) { 371 /* Check if the watchdog timer needs to be started. */
271 watchdog_last = watchdog->read(); 372 clocksource_start_watchdog();
272 watchdog_timer.expires = 373 spin_unlock_irqrestore(&watchdog_lock, flags);
273 jiffies + WATCHDOG_INTERVAL; 374}
274 add_timer_on(&watchdog_timer, 375
275 cpumask_first(cpu_online_mask)); 376static void clocksource_dequeue_watchdog(struct clocksource *cs)
276 } 377{
378 struct clocksource *tmp;
379 unsigned long flags;
380
381 spin_lock_irqsave(&watchdog_lock, flags);
382 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
383 /* cs is a watched clocksource. */
384 list_del_init(&cs->wd_list);
385 } else if (cs == watchdog) {
386 /* Reset watchdog cycles */
387 clocksource_reset_watchdog();
388 /* Current watchdog is removed. Find an alternative. */
389 watchdog = NULL;
390 list_for_each_entry(tmp, &clocksource_list, list) {
391 if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
392 continue;
393 if (!watchdog || tmp->rating > watchdog->rating)
394 watchdog = tmp;
277 } 395 }
278 } 396 }
397 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
398 /* Check if the watchdog timer needs to be stopped. */
399 clocksource_stop_watchdog();
400 spin_unlock_irqrestore(&watchdog_lock, flags);
401}
402
403static int clocksource_watchdog_kthread(void *data)
404{
405 struct clocksource *cs, *tmp;
406 unsigned long flags;
407 LIST_HEAD(unstable);
408
409 mutex_lock(&clocksource_mutex);
410 spin_lock_irqsave(&watchdog_lock, flags);
411 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
412 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
413 list_del_init(&cs->wd_list);
414 list_add(&cs->wd_list, &unstable);
415 }
416 /* Check if the watchdog timer needs to be stopped. */
417 clocksource_stop_watchdog();
279 spin_unlock_irqrestore(&watchdog_lock, flags); 418 spin_unlock_irqrestore(&watchdog_lock, flags);
419
420 /* Needs to be done outside of watchdog lock */
421 list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
422 list_del_init(&cs->wd_list);
423 __clocksource_change_rating(cs, 0);
424 }
425 mutex_unlock(&clocksource_mutex);
426 return 0;
280} 427}
281#else 428
282static void clocksource_check_watchdog(struct clocksource *cs) 429#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
430
431static void clocksource_enqueue_watchdog(struct clocksource *cs)
283{ 432{
284 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 433 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
285 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 434 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
286} 435}
287 436
437static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
288static inline void clocksource_resume_watchdog(void) { } 438static inline void clocksource_resume_watchdog(void) { }
289#endif 439static inline int clocksource_watchdog_kthread(void *data) { return 0; }
440
441#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
290 442
291/** 443/**
292 * clocksource_resume - resume the clocksource(s) 444 * clocksource_resume - resume the clocksource(s)
@@ -294,18 +446,12 @@ static inline void clocksource_resume_watchdog(void) { }
294void clocksource_resume(void) 446void clocksource_resume(void)
295{ 447{
296 struct clocksource *cs; 448 struct clocksource *cs;
297 unsigned long flags;
298
299 spin_lock_irqsave(&clocksource_lock, flags);
300 449
301 list_for_each_entry(cs, &clocksource_list, list) { 450 list_for_each_entry(cs, &clocksource_list, list)
302 if (cs->resume) 451 if (cs->resume)
303 cs->resume(); 452 cs->resume();
304 }
305 453
306 clocksource_resume_watchdog(); 454 clocksource_resume_watchdog();
307
308 spin_unlock_irqrestore(&clocksource_lock, flags);
309} 455}
310 456
311/** 457/**
@@ -321,74 +467,134 @@ void clocksource_touch_watchdog(void)
321} 467}
322 468
323/** 469/**
324 * clocksource_get_next - Returns the selected clocksource 470 * clocksource_max_deferment - Returns max time the clocksource can be deferred
471 * @cs: Pointer to clocksource
325 * 472 *
326 */ 473 */
327struct clocksource *clocksource_get_next(void) 474static u64 clocksource_max_deferment(struct clocksource *cs)
328{ 475{
329 unsigned long flags; 476 u64 max_nsecs, max_cycles;
330 477
331 spin_lock_irqsave(&clocksource_lock, flags); 478 /*
332 if (next_clocksource && finished_booting) { 479 * Calculate the maximum number of cycles that we can pass to the
333 curr_clocksource = next_clocksource; 480 * cyc2ns function without overflowing a 64-bit signed result. The
334 next_clocksource = NULL; 481 * maximum number of cycles is equal to ULLONG_MAX/cs->mult which
335 } 482 * is equivalent to the below.
336 spin_unlock_irqrestore(&clocksource_lock, flags); 483 * max_cycles < (2^63)/cs->mult
484 * max_cycles < 2^(log2((2^63)/cs->mult))
485 * max_cycles < 2^(log2(2^63) - log2(cs->mult))
486 * max_cycles < 2^(63 - log2(cs->mult))
487 * max_cycles < 1 << (63 - log2(cs->mult))
488 * Please note that we add 1 to the result of the log2 to account for
489 * any rounding errors, ensure the above inequality is satisfied and
490 * no overflow will occur.
491 */
492 max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1));
337 493
338 return curr_clocksource; 494 /*
495 * The actual maximum number of cycles we can defer the clocksource is
496 * determined by the minimum of max_cycles and cs->mask.
497 */
498 max_cycles = min_t(u64, max_cycles, (u64) cs->mask);
499 max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift);
500
501 /*
502 * To ensure that the clocksource does not wrap whilst we are idle,
503 * limit the time the clocksource can be deferred by 12.5%. Please
504 * note a margin of 12.5% is used because this can be computed with
505 * a shift, versus say 10% which would require division.
506 */
507 return max_nsecs - (max_nsecs >> 5);
339} 508}
340 509
510#ifdef CONFIG_GENERIC_TIME
511
341/** 512/**
342 * select_clocksource - Selects the best registered clocksource. 513 * clocksource_select - Select the best clocksource available
343 * 514 *
344 * Private function. Must hold clocksource_lock when called. 515 * Private function. Must hold clocksource_mutex when called.
345 * 516 *
346 * Select the clocksource with the best rating, or the clocksource, 517 * Select the clocksource with the best rating, or the clocksource,
347 * which is selected by userspace override. 518 * which is selected by userspace override.
348 */ 519 */
349static struct clocksource *select_clocksource(void) 520static void clocksource_select(void)
350{ 521{
351 struct clocksource *next; 522 struct clocksource *best, *cs;
352 523
353 if (list_empty(&clocksource_list)) 524 if (!finished_booting || list_empty(&clocksource_list))
354 return NULL; 525 return;
526 /* First clocksource on the list has the best rating. */
527 best = list_first_entry(&clocksource_list, struct clocksource, list);
528 /* Check for the override clocksource. */
529 list_for_each_entry(cs, &clocksource_list, list) {
530 if (strcmp(cs->name, override_name) != 0)
531 continue;
532 /*
533 * Check to make sure we don't switch to a non-highres
534 * capable clocksource if the tick code is in oneshot
535 * mode (highres or nohz)
536 */
537 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
538 tick_oneshot_mode_active()) {
539 /* Override clocksource cannot be used. */
540 printk(KERN_WARNING "Override clocksource %s is not "
541 "HRT compatible. Cannot switch while in "
542 "HRT/NOHZ mode\n", cs->name);
543 override_name[0] = 0;
544 } else
545 /* Override clocksource can be used. */
546 best = cs;
547 break;
548 }
549 if (curr_clocksource != best) {
550 printk(KERN_INFO "Switching to clocksource %s\n", best->name);
551 curr_clocksource = best;
552 timekeeping_notify(curr_clocksource);
553 }
554}
355 555
356 if (clocksource_override) 556#else /* CONFIG_GENERIC_TIME */
357 next = clocksource_override;
358 else
359 next = list_entry(clocksource_list.next, struct clocksource,
360 list);
361 557
362 if (next == curr_clocksource) 558static inline void clocksource_select(void) { }
363 return NULL;
364 559
365 return next; 560#endif
366}
367 561
368/* 562/*
369 * Enqueue the clocksource sorted by rating 563 * clocksource_done_booting - Called near the end of core bootup
564 *
565 * Hack to avoid lots of clocksource churn at boot time.
566 * We use fs_initcall because we want this to start before
567 * device_initcall but after subsys_initcall.
370 */ 568 */
371static int clocksource_enqueue(struct clocksource *c) 569static int __init clocksource_done_booting(void)
372{ 570{
373 struct list_head *tmp, *entry = &clocksource_list; 571 finished_booting = 1;
374 572
375 list_for_each(tmp, &clocksource_list) { 573 /*
376 struct clocksource *cs; 574 * Run the watchdog first to eliminate unstable clock sources
575 */
576 clocksource_watchdog_kthread(NULL);
377 577
378 cs = list_entry(tmp, struct clocksource, list); 578 mutex_lock(&clocksource_mutex);
379 if (cs == c) 579 clocksource_select();
380 return -EBUSY; 580 mutex_unlock(&clocksource_mutex);
381 /* Keep track of the place, where to insert */ 581 return 0;
382 if (cs->rating >= c->rating) 582}
383 entry = tmp; 583fs_initcall(clocksource_done_booting);
384 }
385 list_add(&c->list, entry);
386 584
387 if (strlen(c->name) == strlen(override_name) && 585/*
388 !strcmp(c->name, override_name)) 586 * Enqueue the clocksource sorted by rating
389 clocksource_override = c; 587 */
588static void clocksource_enqueue(struct clocksource *cs)
589{
590 struct list_head *entry = &clocksource_list;
591 struct clocksource *tmp;
390 592
391 return 0; 593 list_for_each_entry(tmp, &clocksource_list, list)
594 /* Keep track of the place, where to insert */
595 if (tmp->rating >= cs->rating)
596 entry = &tmp->list;
597 list_add(&cs->list, entry);
392} 598}
393 599
394/** 600/**
@@ -397,55 +603,51 @@ static int clocksource_enqueue(struct clocksource *c)
397 * 603 *
398 * Returns -EBUSY if registration fails, zero otherwise. 604 * Returns -EBUSY if registration fails, zero otherwise.
399 */ 605 */
400int clocksource_register(struct clocksource *c) 606int clocksource_register(struct clocksource *cs)
401{ 607{
402 unsigned long flags; 608 /* calculate max idle time permitted for this clocksource */
403 int ret; 609 cs->max_idle_ns = clocksource_max_deferment(cs);
404 610
405 /* save mult_orig on registration */ 611 mutex_lock(&clocksource_mutex);
406 c->mult_orig = c->mult; 612 clocksource_enqueue(cs);
407 613 clocksource_select();
408 spin_lock_irqsave(&clocksource_lock, flags); 614 clocksource_enqueue_watchdog(cs);
409 ret = clocksource_enqueue(c); 615 mutex_unlock(&clocksource_mutex);
410 if (!ret) 616 return 0;
411 next_clocksource = select_clocksource();
412 spin_unlock_irqrestore(&clocksource_lock, flags);
413 if (!ret)
414 clocksource_check_watchdog(c);
415 return ret;
416} 617}
417EXPORT_SYMBOL(clocksource_register); 618EXPORT_SYMBOL(clocksource_register);
418 619
620static void __clocksource_change_rating(struct clocksource *cs, int rating)
621{
622 list_del(&cs->list);
623 cs->rating = rating;
624 clocksource_enqueue(cs);
625 clocksource_select();
626}
627
419/** 628/**
420 * clocksource_change_rating - Change the rating of a registered clocksource 629 * clocksource_change_rating - Change the rating of a registered clocksource
421 *
422 */ 630 */
423void clocksource_change_rating(struct clocksource *cs, int rating) 631void clocksource_change_rating(struct clocksource *cs, int rating)
424{ 632{
425 unsigned long flags; 633 mutex_lock(&clocksource_mutex);
426 634 __clocksource_change_rating(cs, rating);
427 spin_lock_irqsave(&clocksource_lock, flags); 635 mutex_unlock(&clocksource_mutex);
428 list_del(&cs->list);
429 cs->rating = rating;
430 clocksource_enqueue(cs);
431 next_clocksource = select_clocksource();
432 spin_unlock_irqrestore(&clocksource_lock, flags);
433} 636}
637EXPORT_SYMBOL(clocksource_change_rating);
434 638
435/** 639/**
436 * clocksource_unregister - remove a registered clocksource 640 * clocksource_unregister - remove a registered clocksource
437 */ 641 */
438void clocksource_unregister(struct clocksource *cs) 642void clocksource_unregister(struct clocksource *cs)
439{ 643{
440 unsigned long flags; 644 mutex_lock(&clocksource_mutex);
441 645 clocksource_dequeue_watchdog(cs);
442 spin_lock_irqsave(&clocksource_lock, flags);
443 list_del(&cs->list); 646 list_del(&cs->list);
444 if (clocksource_override == cs) 647 clocksource_select();
445 clocksource_override = NULL; 648 mutex_unlock(&clocksource_mutex);
446 next_clocksource = select_clocksource();
447 spin_unlock_irqrestore(&clocksource_lock, flags);
448} 649}
650EXPORT_SYMBOL(clocksource_unregister);
449 651
450#ifdef CONFIG_SYSFS 652#ifdef CONFIG_SYSFS
451/** 653/**
@@ -461,9 +663,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
461{ 663{
462 ssize_t count = 0; 664 ssize_t count = 0;
463 665
464 spin_lock_irq(&clocksource_lock); 666 mutex_lock(&clocksource_mutex);
465 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 667 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
466 spin_unlock_irq(&clocksource_lock); 668 mutex_unlock(&clocksource_mutex);
467 669
468 return count; 670 return count;
469} 671}
@@ -481,9 +683,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
481 struct sysdev_attribute *attr, 683 struct sysdev_attribute *attr,
482 const char *buf, size_t count) 684 const char *buf, size_t count)
483{ 685{
484 struct clocksource *ovr = NULL;
485 size_t ret = count; 686 size_t ret = count;
486 int len;
487 687
488 /* strings from sysfs write are not 0 terminated! */ 688 /* strings from sysfs write are not 0 terminated! */
489 if (count >= sizeof(override_name)) 689 if (count >= sizeof(override_name))
@@ -493,32 +693,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
493 if (buf[count-1] == '\n') 693 if (buf[count-1] == '\n')
494 count--; 694 count--;
495 695
496 spin_lock_irq(&clocksource_lock); 696 mutex_lock(&clocksource_mutex);
497 697
498 if (count > 0) 698 if (count > 0)
499 memcpy(override_name, buf, count); 699 memcpy(override_name, buf, count);
500 override_name[count] = 0; 700 override_name[count] = 0;
701 clocksource_select();
501 702
502 len = strlen(override_name); 703 mutex_unlock(&clocksource_mutex);
503 if (len) {
504 struct clocksource *cs;
505
506 ovr = clocksource_override;
507 /* try to select it: */
508 list_for_each_entry(cs, &clocksource_list, list) {
509 if (strlen(cs->name) == len &&
510 !strcmp(cs->name, override_name))
511 ovr = cs;
512 }
513 }
514
515 /* Reselect, when the override name has changed */
516 if (ovr != clocksource_override) {
517 clocksource_override = ovr;
518 next_clocksource = select_clocksource();
519 }
520
521 spin_unlock_irq(&clocksource_lock);
522 704
523 return ret; 705 return ret;
524} 706}
@@ -538,13 +720,19 @@ sysfs_show_available_clocksources(struct sys_device *dev,
538 struct clocksource *src; 720 struct clocksource *src;
539 ssize_t count = 0; 721 ssize_t count = 0;
540 722
541 spin_lock_irq(&clocksource_lock); 723 mutex_lock(&clocksource_mutex);
542 list_for_each_entry(src, &clocksource_list, list) { 724 list_for_each_entry(src, &clocksource_list, list) {
543 count += snprintf(buf + count, 725 /*
726 * Don't show non-HRES clocksource if the tick code is
727 * in one shot mode (highres=on or nohz=on)
728 */
729 if (!tick_oneshot_mode_active() ||
730 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
731 count += snprintf(buf + count,
544 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 732 max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
545 "%s ", src->name); 733 "%s ", src->name);
546 } 734 }
547 spin_unlock_irq(&clocksource_lock); 735 mutex_unlock(&clocksource_mutex);
548 736
549 count += snprintf(buf + count, 737 count += snprintf(buf + count,
550 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 738 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -599,11 +787,10 @@ device_initcall(init_clocksource_sysfs);
599 */ 787 */
600static int __init boot_override_clocksource(char* str) 788static int __init boot_override_clocksource(char* str)
601{ 789{
602 unsigned long flags; 790 mutex_lock(&clocksource_mutex);
603 spin_lock_irqsave(&clocksource_lock, flags);
604 if (str) 791 if (str)
605 strlcpy(override_name, str, sizeof(override_name)); 792 strlcpy(override_name, str, sizeof(override_name));
606 spin_unlock_irqrestore(&clocksource_lock, flags); 793 mutex_unlock(&clocksource_mutex);
607 return 1; 794 return 1;
608} 795}
609 796
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 06f197560f3b..5404a8456909 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -50,7 +50,7 @@
50 */ 50 */
51#define JIFFIES_SHIFT 8 51#define JIFFIES_SHIFT 8
52 52
53static cycle_t jiffies_read(void) 53static cycle_t jiffies_read(struct clocksource *cs)
54{ 54{
55 return (cycle_t) jiffies; 55 return (cycle_t) jiffies;
56} 56}
@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
61 .read = jiffies_read, 61 .read = jiffies_read,
62 .mask = 0xffffffff, /*32bits*/ 62 .mask = 0xffffffff, /*32bits*/
63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
64 .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT,
65 .shift = JIFFIES_SHIFT, 64 .shift = JIFFIES_SHIFT,
66}; 65};
67 66
@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
71} 70}
72 71
73core_initcall(init_jiffies_clocksource); 72core_initcall(init_jiffies_clocksource);
73
74struct clocksource * __init __weak clocksource_default_clock(void)
75{
76 return &clocksource_jiffies;
77}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7fc64375ff43..4800f933910e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
194 case TIME_OK: 194 case TIME_OK:
195 break; 195 break;
196 case TIME_INS: 196 case TIME_INS:
197 xtime.tv_sec--; 197 timekeeping_leap_insert(-1);
198 wall_to_monotonic.tv_sec++;
199 time_state = TIME_OOP; 198 time_state = TIME_OOP;
200 printk(KERN_NOTICE 199 printk(KERN_NOTICE
201 "Clock: inserting leap second 23:59:60 UTC\n"); 200 "Clock: inserting leap second 23:59:60 UTC\n");
@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
203 res = HRTIMER_RESTART; 202 res = HRTIMER_RESTART;
204 break; 203 break;
205 case TIME_DEL: 204 case TIME_DEL:
206 xtime.tv_sec++; 205 timekeeping_leap_insert(1);
207 time_tai--; 206 time_tai--;
208 wall_to_monotonic.tv_sec--;
209 time_state = TIME_WAIT; 207 time_state = TIME_WAIT;
210 printk(KERN_NOTICE 208 printk(KERN_NOTICE
211 "Clock: deleting leap second 23:59:59 UTC\n"); 209 "Clock: deleting leap second 23:59:59 UTC\n");
@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
219 time_state = TIME_OK; 217 time_state = TIME_OK;
220 break; 218 break;
221 } 219 }
222 update_vsyscall(&xtime, clock);
223 220
224 write_sequnlock(&xtime_lock); 221 write_sequnlock(&xtime_lock);
225 222
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 118a3b3b3f9a..c2ec25087a35 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -27,7 +27,7 @@
27 * timer stops in C3 state. 27 * timer stops in C3 state.
28 */ 28 */
29 29
30struct tick_device tick_broadcast_device; 30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31/* FIXME: Use cpumask_var_t. */
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33static DECLARE_BITMAP(tmpmask, NR_CPUS); 33static DECLARE_BITMAP(tmpmask, NR_CPUS);
@@ -205,11 +205,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
205 * Powerstate information: The system enters/leaves a state, where 205 * Powerstate information: The system enters/leaves a state, where
206 * affected devices might stop 206 * affected devices might stop
207 */ 207 */
208static void tick_do_broadcast_on_off(void *why) 208static void tick_do_broadcast_on_off(unsigned long *reason)
209{ 209{
210 struct clock_event_device *bc, *dev; 210 struct clock_event_device *bc, *dev;
211 struct tick_device *td; 211 struct tick_device *td;
212 unsigned long flags, *reason = why; 212 unsigned long flags;
213 int cpu, bc_stopped; 213 int cpu, bc_stopped;
214 214
215 spin_lock_irqsave(&tick_broadcast_lock, flags); 215 spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -276,8 +276,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
276 printk(KERN_ERR "tick-broadcast: ignoring broadcast for " 276 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
277 "offline CPU #%d\n", *oncpu); 277 "offline CPU #%d\n", *oncpu);
278 else 278 else
279 smp_call_function_single(*oncpu, tick_do_broadcast_on_off, 279 tick_do_broadcast_on_off(&reason);
280 &reason, 1);
281} 280}
282 281
283/* 282/*
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 21a5ca849514..83c4417b6a3c 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
93 for (;;) { 93 for (;;) {
94 if (!clockevents_program_event(dev, next, ktime_get())) 94 if (!clockevents_program_event(dev, next, ktime_get()))
95 return; 95 return;
96 tick_periodic(cpu); 96 /*
97 * Have to be careful here. If we're in oneshot mode,
98 * before we call tick_periodic() in a loop, we need
99 * to be sure we're using a real hardware clocksource.
100 * Otherwise we could get trapped in an infinite
101 * loop, as the tick_periodic() increments jiffies,
102 * when then will increment time, posibly causing
103 * the loop to trigger again and again.
104 */
105 if (timekeeping_valid_for_hres())
106 tick_periodic(cpu);
97 next = ktime_add(next, tick_period); 107 next = ktime_add(next, tick_period);
98 } 108 }
99} 109}
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 2e8de678e767..0a8a213016f0 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -50,9 +50,9 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
50 dev->min_delta_ns += dev->min_delta_ns >> 1; 50 dev->min_delta_ns += dev->min_delta_ns >> 1;
51 51
52 printk(KERN_WARNING 52 printk(KERN_WARNING
53 "CE: %s increasing min_delta_ns to %lu nsec\n", 53 "CE: %s increasing min_delta_ns to %llu nsec\n",
54 dev->name ? dev->name : "?", 54 dev->name ? dev->name : "?",
55 dev->min_delta_ns << 1); 55 (unsigned long long) dev->min_delta_ns << 1);
56 56
57 i = 0; 57 i = 0;
58 } 58 }
@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
128 return 0; 128 return 0;
129} 129}
130 130
131/**
132 * tick_check_oneshot_mode - check whether the system is in oneshot mode
133 *
134 * returns 1 when either nohz or highres are enabled. otherwise 0.
135 */
136int tick_oneshot_mode_active(void)
137{
138 unsigned long flags;
139 int ret;
140
141 local_irq_save(flags);
142 ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
143 local_irq_restore(flags);
144
145 return ret;
146}
147
131#ifdef CONFIG_HIGH_RES_TIMERS 148#ifdef CONFIG_HIGH_RES_TIMERS
132/** 149/**
133 * tick_init_highres - switch to high resolution mode 150 * tick_init_highres - switch to high resolution mode
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d3f1ef4d5cbe..f992762d7f51 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
134 * value. We do this unconditionally on any cpu, as we don't know whether the 134 * value. We do this unconditionally on any cpu, as we don't know whether the
135 * cpu, which has the update task assigned is in a long sleep. 135 * cpu, which has the update task assigned is in a long sleep.
136 */ 136 */
137static void tick_nohz_update_jiffies(void) 137static void tick_nohz_update_jiffies(ktime_t now)
138{ 138{
139 int cpu = smp_processor_id(); 139 int cpu = smp_processor_id();
140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 140 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
141 unsigned long flags; 141 unsigned long flags;
142 ktime_t now;
143
144 if (!ts->tick_stopped)
145 return;
146 142
147 cpumask_clear_cpu(cpu, nohz_cpu_mask); 143 cpumask_clear_cpu(cpu, nohz_cpu_mask);
148 now = ktime_get();
149 ts->idle_waketime = now; 144 ts->idle_waketime = now;
150 145
151 local_irq_save(flags); 146 local_irq_save(flags);
@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void)
155 touch_softlockup_watchdog(); 150 touch_softlockup_watchdog();
156} 151}
157 152
158static void tick_nohz_stop_idle(int cpu) 153static void tick_nohz_stop_idle(int cpu, ktime_t now)
159{ 154{
160 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 155 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
156 ktime_t delta;
161 157
162 if (ts->idle_active) { 158 delta = ktime_sub(now, ts->idle_entrytime);
163 ktime_t now, delta; 159 ts->idle_lastupdate = now;
164 now = ktime_get(); 160 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
165 delta = ktime_sub(now, ts->idle_entrytime); 161 ts->idle_active = 0;
166 ts->idle_lastupdate = now;
167 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
168 ts->idle_active = 0;
169 162
170 sched_clock_idle_wakeup_event(0); 163 sched_clock_idle_wakeup_event(0);
171 }
172} 164}
173 165
174static ktime_t tick_nohz_start_idle(struct tick_sched *ts) 166static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
@@ -216,12 +208,29 @@ void tick_nohz_stop_sched_tick(int inidle)
216 struct tick_sched *ts; 208 struct tick_sched *ts;
217 ktime_t last_update, expires, now; 209 ktime_t last_update, expires, now;
218 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; 210 struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
211 u64 time_delta;
219 int cpu; 212 int cpu;
220 213
221 local_irq_save(flags); 214 local_irq_save(flags);
222 215
223 cpu = smp_processor_id(); 216 cpu = smp_processor_id();
224 ts = &per_cpu(tick_cpu_sched, cpu); 217 ts = &per_cpu(tick_cpu_sched, cpu);
218
219 /*
220 * Call to tick_nohz_start_idle stops the last_update_time from being
221 * updated. Thus, it must not be called in the event we are called from
222 * irq_exit() with the prior state different than idle.
223 */
224 if (!inidle && !ts->inidle)
225 goto end;
226
227 /*
228 * Set ts->inidle unconditionally. Even if the system did not
229 * switch to NOHZ mode the cpu frequency governers rely on the
230 * update of the idle time accounting in tick_nohz_start_idle().
231 */
232 ts->inidle = 1;
233
225 now = tick_nohz_start_idle(ts); 234 now = tick_nohz_start_idle(ts);
226 235
227 /* 236 /*
@@ -239,11 +248,6 @@ void tick_nohz_stop_sched_tick(int inidle)
239 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) 248 if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
240 goto end; 249 goto end;
241 250
242 if (!inidle && !ts->inidle)
243 goto end;
244
245 ts->inidle = 1;
246
247 if (need_resched()) 251 if (need_resched())
248 goto end; 252 goto end;
249 253
@@ -252,7 +256,7 @@ void tick_nohz_stop_sched_tick(int inidle)
252 256
253 if (ratelimit < 10) { 257 if (ratelimit < 10) {
254 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", 258 printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
255 local_softirq_pending()); 259 (unsigned int) local_softirq_pending());
256 ratelimit++; 260 ratelimit++;
257 } 261 }
258 goto end; 262 goto end;
@@ -264,14 +268,18 @@ void tick_nohz_stop_sched_tick(int inidle)
264 seq = read_seqbegin(&xtime_lock); 268 seq = read_seqbegin(&xtime_lock);
265 last_update = last_jiffies_update; 269 last_update = last_jiffies_update;
266 last_jiffies = jiffies; 270 last_jiffies = jiffies;
271 time_delta = timekeeping_max_deferment();
267 } while (read_seqretry(&xtime_lock, seq)); 272 } while (read_seqretry(&xtime_lock, seq));
268 273
269 /* Get the next timer wheel timer */ 274 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
270 next_jiffies = get_next_timer_interrupt(last_jiffies); 275 arch_needs_cpu(cpu)) {
271 delta_jiffies = next_jiffies - last_jiffies; 276 next_jiffies = last_jiffies + 1;
272
273 if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
274 delta_jiffies = 1; 277 delta_jiffies = 1;
278 } else {
279 /* Get the next timer wheel timer */
280 next_jiffies = get_next_timer_interrupt(last_jiffies);
281 delta_jiffies = next_jiffies - last_jiffies;
282 }
275 /* 283 /*
276 * Do not stop the tick, if we are only one off 284 * Do not stop the tick, if we are only one off
277 * or if the cpu is required for rcu 285 * or if the cpu is required for rcu
@@ -283,22 +291,51 @@ void tick_nohz_stop_sched_tick(int inidle)
283 if ((long)delta_jiffies >= 1) { 291 if ((long)delta_jiffies >= 1) {
284 292
285 /* 293 /*
286 * calculate the expiry time for the next timer wheel
287 * timer
288 */
289 expires = ktime_add_ns(last_update, tick_period.tv64 *
290 delta_jiffies);
291
292 /*
293 * If this cpu is the one which updates jiffies, then 294 * If this cpu is the one which updates jiffies, then
294 * give up the assignment and let it be taken by the 295 * give up the assignment and let it be taken by the
295 * cpu which runs the tick timer next, which might be 296 * cpu which runs the tick timer next, which might be
296 * this cpu as well. If we don't drop this here the 297 * this cpu as well. If we don't drop this here the
297 * jiffies might be stale and do_timer() never 298 * jiffies might be stale and do_timer() never
298 * invoked. 299 * invoked. Keep track of the fact that it was the one
300 * which had the do_timer() duty last. If this cpu is
301 * the one which had the do_timer() duty last, we
302 * limit the sleep time to the timekeeping
303 * max_deferement value which we retrieved
304 * above. Otherwise we can sleep as long as we want.
299 */ 305 */
300 if (cpu == tick_do_timer_cpu) 306 if (cpu == tick_do_timer_cpu) {
301 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 307 tick_do_timer_cpu = TICK_DO_TIMER_NONE;
308 ts->do_timer_last = 1;
309 } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
310 time_delta = KTIME_MAX;
311 ts->do_timer_last = 0;
312 } else if (!ts->do_timer_last) {
313 time_delta = KTIME_MAX;
314 }
315
316 /*
317 * calculate the expiry time for the next timer wheel
318 * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals
319 * that there is no timer pending or at least extremely
320 * far into the future (12 days for HZ=1000). In this
321 * case we set the expiry to the end of time.
322 */
323 if (likely(delta_jiffies < NEXT_TIMER_MAX_DELTA)) {
324 /*
325 * Calculate the time delta for the next timer event.
326 * If the time delta exceeds the maximum time delta
327 * permitted by the current clocksource then adjust
328 * the time delta accordingly to ensure the
329 * clocksource does not wrap.
330 */
331 time_delta = min_t(u64, time_delta,
332 tick_period.tv64 * delta_jiffies);
333 }
334
335 if (time_delta < KTIME_MAX)
336 expires = ktime_add_ns(last_update, time_delta);
337 else
338 expires.tv64 = KTIME_MAX;
302 339
303 if (delta_jiffies > 1) 340 if (delta_jiffies > 1)
304 cpumask_set_cpu(cpu, nohz_cpu_mask); 341 cpumask_set_cpu(cpu, nohz_cpu_mask);
@@ -331,25 +368,22 @@ void tick_nohz_stop_sched_tick(int inidle)
331 368
332 ts->idle_sleeps++; 369 ts->idle_sleeps++;
333 370
371 /* Mark expires */
372 ts->idle_expires = expires;
373
334 /* 374 /*
335 * delta_jiffies >= NEXT_TIMER_MAX_DELTA signals that 375 * If the expiration time == KTIME_MAX, then
336 * there is no timer pending or at least extremly far 376 * in this case we simply stop the tick timer.
337 * into the future (12 days for HZ=1000). In this case
338 * we simply stop the tick timer:
339 */ 377 */
340 if (unlikely(delta_jiffies >= NEXT_TIMER_MAX_DELTA)) { 378 if (unlikely(expires.tv64 == KTIME_MAX)) {
341 ts->idle_expires.tv64 = KTIME_MAX;
342 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) 379 if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
343 hrtimer_cancel(&ts->sched_timer); 380 hrtimer_cancel(&ts->sched_timer);
344 goto out; 381 goto out;
345 } 382 }
346 383
347 /* Mark expiries */
348 ts->idle_expires = expires;
349
350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 384 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
351 hrtimer_start(&ts->sched_timer, expires, 385 hrtimer_start(&ts->sched_timer, expires,
352 HRTIMER_MODE_ABS); 386 HRTIMER_MODE_ABS_PINNED);
353 /* Check, if the timer was already in the past */ 387 /* Check, if the timer was already in the past */
354 if (hrtimer_active(&ts->sched_timer)) 388 if (hrtimer_active(&ts->sched_timer))
355 goto out; 389 goto out;
@@ -395,7 +429,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
395 429
396 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 430 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
397 hrtimer_start_expires(&ts->sched_timer, 431 hrtimer_start_expires(&ts->sched_timer,
398 HRTIMER_MODE_ABS); 432 HRTIMER_MODE_ABS_PINNED);
399 /* Check, if the timer was already in the past */ 433 /* Check, if the timer was already in the past */
400 if (hrtimer_active(&ts->sched_timer)) 434 if (hrtimer_active(&ts->sched_timer))
401 break; 435 break;
@@ -425,7 +459,11 @@ void tick_nohz_restart_sched_tick(void)
425 ktime_t now; 459 ktime_t now;
426 460
427 local_irq_disable(); 461 local_irq_disable();
428 tick_nohz_stop_idle(cpu); 462 if (ts->idle_active || (ts->inidle && ts->tick_stopped))
463 now = ktime_get();
464
465 if (ts->idle_active)
466 tick_nohz_stop_idle(cpu, now);
429 467
430 if (!ts->inidle || !ts->tick_stopped) { 468 if (!ts->inidle || !ts->tick_stopped) {
431 ts->inidle = 0; 469 ts->inidle = 0;
@@ -439,7 +477,6 @@ void tick_nohz_restart_sched_tick(void)
439 477
440 /* Update jiffies first */ 478 /* Update jiffies first */
441 select_nohz_load_balancer(0); 479 select_nohz_load_balancer(0);
442 now = ktime_get();
443 tick_do_update_jiffies64(now); 480 tick_do_update_jiffies64(now);
444 cpumask_clear_cpu(cpu, nohz_cpu_mask); 481 cpumask_clear_cpu(cpu, nohz_cpu_mask);
445 482
@@ -573,22 +610,18 @@ static void tick_nohz_switch_to_nohz(void)
573 * timer and do not touch the other magic bits which need to be done 610 * timer and do not touch the other magic bits which need to be done
574 * when idle is left. 611 * when idle is left.
575 */ 612 */
576static void tick_nohz_kick_tick(int cpu) 613static void tick_nohz_kick_tick(int cpu, ktime_t now)
577{ 614{
578#if 0 615#if 0
579 /* Switch back to 2.6.27 behaviour */ 616 /* Switch back to 2.6.27 behaviour */
580 617
581 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 618 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
582 ktime_t delta, now; 619 ktime_t delta;
583
584 if (!ts->tick_stopped)
585 return;
586 620
587 /* 621 /*
588 * Do not touch the tick device, when the next expiry is either 622 * Do not touch the tick device, when the next expiry is either
589 * already reached or less/equal than the tick period. 623 * already reached or less/equal than the tick period.
590 */ 624 */
591 now = ktime_get();
592 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now); 625 delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
593 if (delta.tv64 <= tick_period.tv64) 626 if (delta.tv64 <= tick_period.tv64)
594 return; 627 return;
@@ -597,9 +630,26 @@ static void tick_nohz_kick_tick(int cpu)
597#endif 630#endif
598} 631}
599 632
633static inline void tick_check_nohz(int cpu)
634{
635 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
636 ktime_t now;
637
638 if (!ts->idle_active && !ts->tick_stopped)
639 return;
640 now = ktime_get();
641 if (ts->idle_active)
642 tick_nohz_stop_idle(cpu, now);
643 if (ts->tick_stopped) {
644 tick_nohz_update_jiffies(now);
645 tick_nohz_kick_tick(cpu, now);
646 }
647}
648
600#else 649#else
601 650
602static inline void tick_nohz_switch_to_nohz(void) { } 651static inline void tick_nohz_switch_to_nohz(void) { }
652static inline void tick_check_nohz(int cpu) { }
603 653
604#endif /* NO_HZ */ 654#endif /* NO_HZ */
605 655
@@ -609,11 +659,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
609void tick_check_idle(int cpu) 659void tick_check_idle(int cpu)
610{ 660{
611 tick_check_oneshot_broadcast(cpu); 661 tick_check_oneshot_broadcast(cpu);
612#ifdef CONFIG_NO_HZ 662 tick_check_nohz(cpu);
613 tick_nohz_stop_idle(cpu);
614 tick_nohz_update_jiffies();
615 tick_nohz_kick_tick(cpu);
616#endif
617} 663}
618 664
619/* 665/*
@@ -698,7 +744,8 @@ void tick_setup_sched_timer(void)
698 744
699 for (;;) { 745 for (;;) {
700 hrtimer_forward(&ts->sched_timer, now, tick_period); 746 hrtimer_forward(&ts->sched_timer, now, tick_period);
701 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); 747 hrtimer_start_expires(&ts->sched_timer,
748 HRTIMER_MODE_ABS_PINNED);
702 /* Check, if the timer was already in the past */ 749 /* Check, if the timer was already in the past */
703 if (hrtimer_active(&ts->sched_timer)) 750 if (hrtimer_active(&ts->sched_timer))
704 break; 751 break;
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 71e7f1a19156..96ff643a5a59 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -40,7 +40,7 @@ ktime_t timecompare_transform(struct timecompare *sync,
40 40
41 return ns_to_ktime(nsec); 41 return ns_to_ktime(nsec);
42} 42}
43EXPORT_SYMBOL(timecompare_transform); 43EXPORT_SYMBOL_GPL(timecompare_transform);
44 44
45int timecompare_offset(struct timecompare *sync, 45int timecompare_offset(struct timecompare *sync,
46 s64 *offset, 46 s64 *offset,
@@ -131,7 +131,7 @@ int timecompare_offset(struct timecompare *sync,
131 131
132 return used; 132 return used;
133} 133}
134EXPORT_SYMBOL(timecompare_offset); 134EXPORT_SYMBOL_GPL(timecompare_offset);
135 135
136void __timecompare_update(struct timecompare *sync, 136void __timecompare_update(struct timecompare *sync,
137 u64 source_tstamp) 137 u64 source_tstamp)
@@ -188,4 +188,4 @@ void __timecompare_update(struct timecompare *sync,
188 } 188 }
189 } 189 }
190} 190}
191EXPORT_SYMBOL(__timecompare_update); 191EXPORT_SYMBOL_GPL(__timecompare_update);
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644
index 000000000000..86628e755f38
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
1/*
2 * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
3 * This file is part of the GNU C Library.
4 * Contributed by Paul Eggert (eggert@twinsun.com).
5 *
6 * The GNU C Library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
10 *
11 * The GNU C Library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with the GNU C Library; see the file COPYING.LIB. If not,
18 * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22/*
23 * Converts the calendar time to broken-down time representation
24 * Based on code from glibc-2.6
25 *
26 * 2009-7-14:
27 * Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
28 */
29
30#include <linux/time.h>
31#include <linux/module.h>
32
33/*
34 * Nonzero if YEAR is a leap year (every 4 years,
35 * except every 100th isn't, and every 400th is).
36 */
37static int __isleap(long year)
38{
39 return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
40}
41
42/* do a mathdiv for long type */
43static long math_div(long a, long b)
44{
45 return a / b - (a % b < 0);
46}
47
48/* How many leap years between y1 and y2, y1 must less or equal to y2 */
49static long leaps_between(long y1, long y2)
50{
51 long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
52 + math_div(y1 - 1, 400);
53 long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
54 + math_div(y2 - 1, 400);
55 return leaps2 - leaps1;
56}
57
58/* How many days come before each month (0-12). */
59static const unsigned short __mon_yday[2][13] = {
60 /* Normal years. */
61 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
62 /* Leap years. */
63 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
64};
65
66#define SECS_PER_HOUR (60 * 60)
67#define SECS_PER_DAY (SECS_PER_HOUR * 24)
68
69/**
70 * time_to_tm - converts the calendar time to local broken-down time
71 *
72 * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970,
73 * Coordinated Universal Time (UTC).
74 * @offset offset seconds adding to totalsecs.
75 * @result pointer to struct tm variable to receive broken-down time
76 */
77void time_to_tm(time_t totalsecs, int offset, struct tm *result)
78{
79 long days, rem, y;
80 const unsigned short *ip;
81
82 days = totalsecs / SECS_PER_DAY;
83 rem = totalsecs % SECS_PER_DAY;
84 rem += offset;
85 while (rem < 0) {
86 rem += SECS_PER_DAY;
87 --days;
88 }
89 while (rem >= SECS_PER_DAY) {
90 rem -= SECS_PER_DAY;
91 ++days;
92 }
93
94 result->tm_hour = rem / SECS_PER_HOUR;
95 rem %= SECS_PER_HOUR;
96 result->tm_min = rem / 60;
97 result->tm_sec = rem % 60;
98
99 /* January 1, 1970 was a Thursday. */
100 result->tm_wday = (4 + days) % 7;
101 if (result->tm_wday < 0)
102 result->tm_wday += 7;
103
104 y = 1970;
105
106 while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
107 /* Guess a corrected year, assuming 365 days per year. */
108 long yg = y + math_div(days, 365);
109
110 /* Adjust DAYS and Y to match the guessed year. */
111 days -= (yg - y) * 365 + leaps_between(y, yg);
112 y = yg;
113 }
114
115 result->tm_year = y - 1900;
116
117 result->tm_yday = days;
118
119 ip = __mon_yday[__isleap(y)];
120 for (y = 11; days < ip[y]; y--)
121 continue;
122 days -= ip[y];
123
124 result->tm_mon = y;
125 result->tm_mday = days + 1;
126}
127EXPORT_SYMBOL(time_to_tm);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 900f1b6598d1..af4135f05825 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -13,16 +13,127 @@
13#include <linux/percpu.h> 13#include <linux/percpu.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/sched.h>
16#include <linux/sysdev.h> 17#include <linux/sysdev.h>
17#include <linux/clocksource.h> 18#include <linux/clocksource.h>
18#include <linux/jiffies.h> 19#include <linux/jiffies.h>
19#include <linux/time.h> 20#include <linux/time.h>
20#include <linux/tick.h> 21#include <linux/tick.h>
22#include <linux/stop_machine.h>
23
24/* Structure holding internal timekeeping values. */
25struct timekeeper {
26 /* Current clocksource used for timekeeping. */
27 struct clocksource *clock;
28 /* The shift value of the current clocksource. */
29 int shift;
30
31 /* Number of clock cycles in one NTP interval. */
32 cycle_t cycle_interval;
33 /* Number of clock shifted nano seconds in one NTP interval. */
34 u64 xtime_interval;
35 /* Raw nano seconds accumulated per NTP interval. */
36 u32 raw_interval;
37
38 /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
39 u64 xtime_nsec;
40 /* Difference between accumulated time and NTP time in ntp
41 * shifted nano seconds. */
42 s64 ntp_error;
43 /* Shift conversion between clock shifted nano seconds and
44 * ntp shifted nano seconds. */
45 int ntp_error_shift;
46 /* NTP adjusted clock multiplier */
47 u32 mult;
48};
49
50struct timekeeper timekeeper;
51
52/**
53 * timekeeper_setup_internals - Set up internals to use clocksource clock.
54 *
55 * @clock: Pointer to clocksource.
56 *
57 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
58 * pair and interval request.
59 *
60 * Unless you're the timekeeping code, you should not be using this!
61 */
62static void timekeeper_setup_internals(struct clocksource *clock)
63{
64 cycle_t interval;
65 u64 tmp;
66
67 timekeeper.clock = clock;
68 clock->cycle_last = clock->read(clock);
69
70 /* Do the ns -> cycle conversion first, using original mult */
71 tmp = NTP_INTERVAL_LENGTH;
72 tmp <<= clock->shift;
73 tmp += clock->mult/2;
74 do_div(tmp, clock->mult);
75 if (tmp == 0)
76 tmp = 1;
77
78 interval = (cycle_t) tmp;
79 timekeeper.cycle_interval = interval;
80
81 /* Go back from cycles -> shifted ns */
82 timekeeper.xtime_interval = (u64) interval * clock->mult;
83 timekeeper.raw_interval =
84 ((u64) interval * clock->mult) >> clock->shift;
85
86 timekeeper.xtime_nsec = 0;
87 timekeeper.shift = clock->shift;
88
89 timekeeper.ntp_error = 0;
90 timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
91
92 /*
93 * The timekeeper keeps its own mult values for the currently
94 * active clocksource. These value will be adjusted via NTP
95 * to counteract clock drifting.
96 */
97 timekeeper.mult = clock->mult;
98}
99
100/* Timekeeper helper functions. */
101static inline s64 timekeeping_get_ns(void)
102{
103 cycle_t cycle_now, cycle_delta;
104 struct clocksource *clock;
21 105
106 /* read clocksource: */
107 clock = timekeeper.clock;
108 cycle_now = clock->read(clock);
109
110 /* calculate the delta since the last update_wall_time: */
111 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
112
113 /* return delta convert to nanoseconds using ntp adjusted mult. */
114 return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
115 timekeeper.shift);
116}
117
118static inline s64 timekeeping_get_ns_raw(void)
119{
120 cycle_t cycle_now, cycle_delta;
121 struct clocksource *clock;
122
123 /* read clocksource: */
124 clock = timekeeper.clock;
125 cycle_now = clock->read(clock);
126
127 /* calculate the delta since the last update_wall_time: */
128 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
129
130 /* return delta convert to nanoseconds using ntp adjusted mult. */
131 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
132}
22 133
23/* 134/*
24 * This read-write spinlock protects us from races in SMP while 135 * This read-write spinlock protects us from races in SMP while
25 * playing with xtime and avenrun. 136 * playing with xtime.
26 */ 137 */
27__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); 138__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
28 139
@@ -44,43 +155,54 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
44 */ 155 */
45struct timespec xtime __attribute__ ((aligned (16))); 156struct timespec xtime __attribute__ ((aligned (16)));
46struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 157struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
47static unsigned long total_sleep_time; /* seconds */ 158static struct timespec total_sleep_time;
159
160/*
161 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
162 */
163struct timespec raw_time;
48 164
49/* flag for if timekeeping is suspended */ 165/* flag for if timekeeping is suspended */
50int __read_mostly timekeeping_suspended; 166int __read_mostly timekeeping_suspended;
51 167
52static struct timespec xtime_cache __attribute__ ((aligned (16))); 168/* must hold xtime_lock */
53void update_xtime_cache(u64 nsec) 169void timekeeping_leap_insert(int leapsecond)
54{ 170{
55 xtime_cache = xtime; 171 xtime.tv_sec += leapsecond;
56 timespec_add_ns(&xtime_cache, nsec); 172 wall_to_monotonic.tv_sec -= leapsecond;
173 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
57} 174}
58 175
59struct clocksource *clock;
60
61
62#ifdef CONFIG_GENERIC_TIME 176#ifdef CONFIG_GENERIC_TIME
177
63/** 178/**
64 * clocksource_forward_now - update clock to the current time 179 * timekeeping_forward_now - update clock to the current time
65 * 180 *
66 * Forward the current clock to update its state since the last call to 181 * Forward the current clock to update its state since the last call to
67 * update_wall_time(). This is useful before significant clock changes, 182 * update_wall_time(). This is useful before significant clock changes,
68 * as it avoids having to deal with this time offset explicitly. 183 * as it avoids having to deal with this time offset explicitly.
69 */ 184 */
70static void clocksource_forward_now(void) 185static void timekeeping_forward_now(void)
71{ 186{
72 cycle_t cycle_now, cycle_delta; 187 cycle_t cycle_now, cycle_delta;
188 struct clocksource *clock;
73 s64 nsec; 189 s64 nsec;
74 190
75 cycle_now = clocksource_read(clock); 191 clock = timekeeper.clock;
192 cycle_now = clock->read(clock);
76 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 193 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
77 clock->cycle_last = cycle_now; 194 clock->cycle_last = cycle_now;
78 195
79 nsec = cyc2ns(clock, cycle_delta); 196 nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
197 timekeeper.shift);
198
199 /* If arch requires, add in gettimeoffset() */
200 nsec += arch_gettimeoffset();
201
80 timespec_add_ns(&xtime, nsec); 202 timespec_add_ns(&xtime, nsec);
81 203
82 nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; 204 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
83 clock->raw_time.tv_nsec += nsec; 205 timespec_add_ns(&raw_time, nsec);
84} 206}
85 207
86/** 208/**
@@ -91,7 +213,6 @@ static void clocksource_forward_now(void)
91 */ 213 */
92void getnstimeofday(struct timespec *ts) 214void getnstimeofday(struct timespec *ts)
93{ 215{
94 cycle_t cycle_now, cycle_delta;
95 unsigned long seq; 216 unsigned long seq;
96 s64 nsecs; 217 s64 nsecs;
97 218
@@ -101,15 +222,10 @@ void getnstimeofday(struct timespec *ts)
101 seq = read_seqbegin(&xtime_lock); 222 seq = read_seqbegin(&xtime_lock);
102 223
103 *ts = xtime; 224 *ts = xtime;
225 nsecs = timekeeping_get_ns();
104 226
105 /* read clocksource: */ 227 /* If arch requires, add in gettimeoffset() */
106 cycle_now = clocksource_read(clock); 228 nsecs += arch_gettimeoffset();
107
108 /* calculate the delta since the last update_wall_time: */
109 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
110
111 /* convert to nanoseconds: */
112 nsecs = cyc2ns(clock, cycle_delta);
113 229
114 } while (read_seqretry(&xtime_lock, seq)); 230 } while (read_seqretry(&xtime_lock, seq));
115 231
@@ -118,6 +234,57 @@ void getnstimeofday(struct timespec *ts)
118 234
119EXPORT_SYMBOL(getnstimeofday); 235EXPORT_SYMBOL(getnstimeofday);
120 236
237ktime_t ktime_get(void)
238{
239 unsigned int seq;
240 s64 secs, nsecs;
241
242 WARN_ON(timekeeping_suspended);
243
244 do {
245 seq = read_seqbegin(&xtime_lock);
246 secs = xtime.tv_sec + wall_to_monotonic.tv_sec;
247 nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
248 nsecs += timekeeping_get_ns();
249
250 } while (read_seqretry(&xtime_lock, seq));
251 /*
252 * Use ktime_set/ktime_add_ns to create a proper ktime on
253 * 32-bit architectures without CONFIG_KTIME_SCALAR.
254 */
255 return ktime_add_ns(ktime_set(secs, 0), nsecs);
256}
257EXPORT_SYMBOL_GPL(ktime_get);
258
259/**
260 * ktime_get_ts - get the monotonic clock in timespec format
261 * @ts: pointer to timespec variable
262 *
263 * The function calculates the monotonic clock from the realtime
264 * clock and the wall_to_monotonic offset and stores the result
265 * in normalized timespec format in the variable pointed to by @ts.
266 */
267void ktime_get_ts(struct timespec *ts)
268{
269 struct timespec tomono;
270 unsigned int seq;
271 s64 nsecs;
272
273 WARN_ON(timekeeping_suspended);
274
275 do {
276 seq = read_seqbegin(&xtime_lock);
277 *ts = xtime;
278 tomono = wall_to_monotonic;
279 nsecs = timekeeping_get_ns();
280
281 } while (read_seqretry(&xtime_lock, seq));
282
283 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
284 ts->tv_nsec + tomono.tv_nsec + nsecs);
285}
286EXPORT_SYMBOL_GPL(ktime_get_ts);
287
121/** 288/**
122 * do_gettimeofday - Returns the time of day in a timeval 289 * do_gettimeofday - Returns the time of day in a timeval
123 * @tv: pointer to the timeval to be set 290 * @tv: pointer to the timeval to be set
@@ -150,7 +317,7 @@ int do_settimeofday(struct timespec *tv)
150 317
151 write_seqlock_irqsave(&xtime_lock, flags); 318 write_seqlock_irqsave(&xtime_lock, flags);
152 319
153 clocksource_forward_now(); 320 timekeeping_forward_now();
154 321
155 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; 322 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
156 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; 323 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
@@ -158,12 +325,10 @@ int do_settimeofday(struct timespec *tv)
158 325
159 xtime = *tv; 326 xtime = *tv;
160 327
161 update_xtime_cache(0); 328 timekeeper.ntp_error = 0;
162
163 clock->error = 0;
164 ntp_clear(); 329 ntp_clear();
165 330
166 update_vsyscall(&xtime, clock); 331 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
167 332
168 write_sequnlock_irqrestore(&xtime_lock, flags); 333 write_sequnlock_irqrestore(&xtime_lock, flags);
169 334
@@ -180,39 +345,97 @@ EXPORT_SYMBOL(do_settimeofday);
180 * 345 *
181 * Accumulates current time interval and initializes new clocksource 346 * Accumulates current time interval and initializes new clocksource
182 */ 347 */
183static void change_clocksource(void) 348static int change_clocksource(void *data)
184{ 349{
185 struct clocksource *new; 350 struct clocksource *new, *old;
351
352 new = (struct clocksource *) data;
186 353
187 new = clocksource_get_next(); 354 timekeeping_forward_now();
355 if (!new->enable || new->enable(new) == 0) {
356 old = timekeeper.clock;
357 timekeeper_setup_internals(new);
358 if (old->disable)
359 old->disable(old);
360 }
361 return 0;
362}
188 363
189 if (clock == new) 364/**
365 * timekeeping_notify - Install a new clock source
366 * @clock: pointer to the clock source
367 *
368 * This function is called from clocksource.c after a new, better clock
369 * source has been registered. The caller holds the clocksource_mutex.
370 */
371void timekeeping_notify(struct clocksource *clock)
372{
373 if (timekeeper.clock == clock)
190 return; 374 return;
375 stop_machine(change_clocksource, clock, NULL);
376 tick_clock_notify();
377}
191 378
192 clocksource_forward_now(); 379#else /* GENERIC_TIME */
193 380
194 new->raw_time = clock->raw_time; 381static inline void timekeeping_forward_now(void) { }
195 382
196 clock = new; 383/**
197 clock->cycle_last = 0; 384 * ktime_get - get the monotonic time in ktime_t format
198 clock->cycle_last = clocksource_read(new); 385 *
199 clock->error = 0; 386 * returns the time in ktime_t format
200 clock->xtime_nsec = 0; 387 */
201 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 388ktime_t ktime_get(void)
389{
390 struct timespec now;
202 391
203 tick_clock_notify(); 392 ktime_get_ts(&now);
204 393
205 /* 394 return timespec_to_ktime(now);
206 * We're holding xtime lock and waking up klogd would deadlock
207 * us on enqueue. So no printing!
208 printk(KERN_INFO "Time: %s clocksource has been installed.\n",
209 clock->name);
210 */
211} 395}
212#else 396EXPORT_SYMBOL_GPL(ktime_get);
213static inline void clocksource_forward_now(void) { } 397
214static inline void change_clocksource(void) { } 398/**
215#endif 399 * ktime_get_ts - get the monotonic clock in timespec format
400 * @ts: pointer to timespec variable
401 *
402 * The function calculates the monotonic clock from the realtime
403 * clock and the wall_to_monotonic offset and stores the result
404 * in normalized timespec format in the variable pointed to by @ts.
405 */
406void ktime_get_ts(struct timespec *ts)
407{
408 struct timespec tomono;
409 unsigned long seq;
410
411 do {
412 seq = read_seqbegin(&xtime_lock);
413 getnstimeofday(ts);
414 tomono = wall_to_monotonic;
415
416 } while (read_seqretry(&xtime_lock, seq));
417
418 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
419 ts->tv_nsec + tomono.tv_nsec);
420}
421EXPORT_SYMBOL_GPL(ktime_get_ts);
422
423#endif /* !GENERIC_TIME */
424
425/**
426 * ktime_get_real - get the real (wall-) time in ktime_t format
427 *
428 * returns the time in ktime_t format
429 */
430ktime_t ktime_get_real(void)
431{
432 struct timespec now;
433
434 getnstimeofday(&now);
435
436 return timespec_to_ktime(now);
437}
438EXPORT_SYMBOL_GPL(ktime_get_real);
216 439
217/** 440/**
218 * getrawmonotonic - Returns the raw monotonic time in a timespec 441 * getrawmonotonic - Returns the raw monotonic time in a timespec
@@ -224,21 +447,11 @@ void getrawmonotonic(struct timespec *ts)
224{ 447{
225 unsigned long seq; 448 unsigned long seq;
226 s64 nsecs; 449 s64 nsecs;
227 cycle_t cycle_now, cycle_delta;
228 450
229 do { 451 do {
230 seq = read_seqbegin(&xtime_lock); 452 seq = read_seqbegin(&xtime_lock);
231 453 nsecs = timekeeping_get_ns_raw();
232 /* read clocksource: */ 454 *ts = raw_time;
233 cycle_now = clocksource_read(clock);
234
235 /* calculate the delta since the last update_wall_time: */
236 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
237
238 /* convert to nanoseconds: */
239 nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
240
241 *ts = clock->raw_time;
242 455
243 } while (read_seqretry(&xtime_lock, seq)); 456 } while (read_seqretry(&xtime_lock, seq));
244 457
@@ -258,7 +471,7 @@ int timekeeping_valid_for_hres(void)
258 do { 471 do {
259 seq = read_seqbegin(&xtime_lock); 472 seq = read_seqbegin(&xtime_lock);
260 473
261 ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 474 ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
262 475
263 } while (read_seqretry(&xtime_lock, seq)); 476 } while (read_seqretry(&xtime_lock, seq));
264 477
@@ -266,17 +479,44 @@ int timekeeping_valid_for_hres(void)
266} 479}
267 480
268/** 481/**
269 * read_persistent_clock - Return time in seconds from the persistent clock. 482 * timekeeping_max_deferment - Returns max time the clocksource can be deferred
483 *
484 * Caller must observe xtime_lock via read_seqbegin/read_seqretry to
485 * ensure that the clocksource does not change!
486 */
487u64 timekeeping_max_deferment(void)
488{
489 return timekeeper.clock->max_idle_ns;
490}
491
492/**
493 * read_persistent_clock - Return time from the persistent clock.
270 * 494 *
271 * Weak dummy function for arches that do not yet support it. 495 * Weak dummy function for arches that do not yet support it.
272 * Returns seconds from epoch using the battery backed persistent clock. 496 * Reads the time from the battery backed persistent clock.
273 * Returns zero if unsupported. 497 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
274 * 498 *
275 * XXX - Do be sure to remove it once all arches implement it. 499 * XXX - Do be sure to remove it once all arches implement it.
276 */ 500 */
277unsigned long __attribute__((weak)) read_persistent_clock(void) 501void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
278{ 502{
279 return 0; 503 ts->tv_sec = 0;
504 ts->tv_nsec = 0;
505}
506
507/**
508 * read_boot_clock - Return time of the system start.
509 *
510 * Weak dummy function for arches that do not yet support it.
511 * Function to read the exact time the system has been started.
512 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
513 *
514 * XXX - Do be sure to remove it once all arches implement it.
515 */
516void __attribute__((weak)) read_boot_clock(struct timespec *ts)
517{
518 ts->tv_sec = 0;
519 ts->tv_nsec = 0;
280} 520}
281 521
282/* 522/*
@@ -284,28 +524,39 @@ unsigned long __attribute__((weak)) read_persistent_clock(void)
284 */ 524 */
285void __init timekeeping_init(void) 525void __init timekeeping_init(void)
286{ 526{
527 struct clocksource *clock;
287 unsigned long flags; 528 unsigned long flags;
288 unsigned long sec = read_persistent_clock(); 529 struct timespec now, boot;
530
531 read_persistent_clock(&now);
532 read_boot_clock(&boot);
289 533
290 write_seqlock_irqsave(&xtime_lock, flags); 534 write_seqlock_irqsave(&xtime_lock, flags);
291 535
292 ntp_init(); 536 ntp_init();
293 537
294 clock = clocksource_get_next(); 538 clock = clocksource_default_clock();
295 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 539 if (clock->enable)
296 clock->cycle_last = clocksource_read(clock); 540 clock->enable(clock);
297 541 timekeeper_setup_internals(clock);
298 xtime.tv_sec = sec; 542
299 xtime.tv_nsec = 0; 543 xtime.tv_sec = now.tv_sec;
544 xtime.tv_nsec = now.tv_nsec;
545 raw_time.tv_sec = 0;
546 raw_time.tv_nsec = 0;
547 if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
548 boot.tv_sec = xtime.tv_sec;
549 boot.tv_nsec = xtime.tv_nsec;
550 }
300 set_normalized_timespec(&wall_to_monotonic, 551 set_normalized_timespec(&wall_to_monotonic,
301 -xtime.tv_sec, -xtime.tv_nsec); 552 -boot.tv_sec, -boot.tv_nsec);
302 update_xtime_cache(0); 553 total_sleep_time.tv_sec = 0;
303 total_sleep_time = 0; 554 total_sleep_time.tv_nsec = 0;
304 write_sequnlock_irqrestore(&xtime_lock, flags); 555 write_sequnlock_irqrestore(&xtime_lock, flags);
305} 556}
306 557
307/* time in seconds when suspend began */ 558/* time in seconds when suspend began */
308static unsigned long timekeeping_suspend_time; 559static struct timespec timekeeping_suspend_time;
309 560
310/** 561/**
311 * timekeeping_resume - Resumes the generic timekeeping subsystem. 562 * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -318,24 +569,23 @@ static unsigned long timekeeping_suspend_time;
318static int timekeeping_resume(struct sys_device *dev) 569static int timekeeping_resume(struct sys_device *dev)
319{ 570{
320 unsigned long flags; 571 unsigned long flags;
321 unsigned long now = read_persistent_clock(); 572 struct timespec ts;
573
574 read_persistent_clock(&ts);
322 575
323 clocksource_resume(); 576 clocksource_resume();
324 577
325 write_seqlock_irqsave(&xtime_lock, flags); 578 write_seqlock_irqsave(&xtime_lock, flags);
326 579
327 if (now && (now > timekeeping_suspend_time)) { 580 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
328 unsigned long sleep_length = now - timekeeping_suspend_time; 581 ts = timespec_sub(ts, timekeeping_suspend_time);
329 582 xtime = timespec_add_safe(xtime, ts);
330 xtime.tv_sec += sleep_length; 583 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
331 wall_to_monotonic.tv_sec -= sleep_length; 584 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
332 total_sleep_time += sleep_length;
333 } 585 }
334 update_xtime_cache(0);
335 /* re-base the last cycle value */ 586 /* re-base the last cycle value */
336 clock->cycle_last = 0; 587 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
337 clock->cycle_last = clocksource_read(clock); 588 timekeeper.ntp_error = 0;
338 clock->error = 0;
339 timekeeping_suspended = 0; 589 timekeeping_suspended = 0;
340 write_sequnlock_irqrestore(&xtime_lock, flags); 590 write_sequnlock_irqrestore(&xtime_lock, flags);
341 591
@@ -353,10 +603,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
353{ 603{
354 unsigned long flags; 604 unsigned long flags;
355 605
356 timekeeping_suspend_time = read_persistent_clock(); 606 read_persistent_clock(&timekeeping_suspend_time);
357 607
358 write_seqlock_irqsave(&xtime_lock, flags); 608 write_seqlock_irqsave(&xtime_lock, flags);
359 clocksource_forward_now(); 609 timekeeping_forward_now();
360 timekeeping_suspended = 1; 610 timekeeping_suspended = 1;
361 write_sequnlock_irqrestore(&xtime_lock, flags); 611 write_sequnlock_irqrestore(&xtime_lock, flags);
362 612
@@ -391,7 +641,7 @@ device_initcall(timekeeping_init_device);
391 * If the error is already larger, we look ahead even further 641 * If the error is already larger, we look ahead even further
392 * to compensate for late or lost adjustments. 642 * to compensate for late or lost adjustments.
393 */ 643 */
394static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, 644static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
395 s64 *offset) 645 s64 *offset)
396{ 646{
397 s64 tick_error, i; 647 s64 tick_error, i;
@@ -407,7 +657,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
407 * here. This is tuned so that an error of about 1 msec is adjusted 657 * here. This is tuned so that an error of about 1 msec is adjusted
408 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 658 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
409 */ 659 */
410 error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); 660 error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
411 error2 = abs(error2); 661 error2 = abs(error2);
412 for (look_ahead = 0; error2 > 0; look_ahead++) 662 for (look_ahead = 0; error2 > 0; look_ahead++)
413 error2 >>= 2; 663 error2 >>= 2;
@@ -416,8 +666,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
416 * Now calculate the error in (1 << look_ahead) ticks, but first 666 * Now calculate the error in (1 << look_ahead) ticks, but first
417 * remove the single look ahead already included in the error. 667 * remove the single look ahead already included in the error.
418 */ 668 */
419 tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); 669 tick_error = tick_length >> (timekeeper.ntp_error_shift + 1);
420 tick_error -= clock->xtime_interval >> 1; 670 tick_error -= timekeeper.xtime_interval >> 1;
421 error = ((error - tick_error) >> look_ahead) + tick_error; 671 error = ((error - tick_error) >> look_ahead) + tick_error;
422 672
423 /* Finally calculate the adjustment shift value. */ 673 /* Finally calculate the adjustment shift value. */
@@ -442,18 +692,18 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
442 * this is optimized for the most common adjustments of -1,0,1, 692 * this is optimized for the most common adjustments of -1,0,1,
443 * for other values we can do a bit more work. 693 * for other values we can do a bit more work.
444 */ 694 */
445static void clocksource_adjust(s64 offset) 695static void timekeeping_adjust(s64 offset)
446{ 696{
447 s64 error, interval = clock->cycle_interval; 697 s64 error, interval = timekeeper.cycle_interval;
448 int adj; 698 int adj;
449 699
450 error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); 700 error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
451 if (error > interval) { 701 if (error > interval) {
452 error >>= 2; 702 error >>= 2;
453 if (likely(error <= interval)) 703 if (likely(error <= interval))
454 adj = 1; 704 adj = 1;
455 else 705 else
456 adj = clocksource_bigadjust(error, &interval, &offset); 706 adj = timekeeping_bigadjust(error, &interval, &offset);
457 } else if (error < -interval) { 707 } else if (error < -interval) {
458 error >>= 2; 708 error >>= 2;
459 if (likely(error >= -interval)) { 709 if (likely(error >= -interval)) {
@@ -461,15 +711,58 @@ static void clocksource_adjust(s64 offset)
461 interval = -interval; 711 interval = -interval;
462 offset = -offset; 712 offset = -offset;
463 } else 713 } else
464 adj = clocksource_bigadjust(error, &interval, &offset); 714 adj = timekeeping_bigadjust(error, &interval, &offset);
465 } else 715 } else
466 return; 716 return;
467 717
468 clock->mult += adj; 718 timekeeper.mult += adj;
469 clock->xtime_interval += interval; 719 timekeeper.xtime_interval += interval;
470 clock->xtime_nsec -= offset; 720 timekeeper.xtime_nsec -= offset;
471 clock->error -= (interval - offset) << 721 timekeeper.ntp_error -= (interval - offset) <<
472 (NTP_SCALE_SHIFT - clock->shift); 722 timekeeper.ntp_error_shift;
723}
724
725/**
726 * logarithmic_accumulation - shifted accumulation of cycles
727 *
728 * This functions accumulates a shifted interval of cycles into
729 * into a shifted interval nanoseconds. Allows for O(log) accumulation
730 * loop.
731 *
732 * Returns the unconsumed cycles.
733 */
734static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
735{
736 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
737
738 /* If the offset is smaller then a shifted interval, do nothing */
739 if (offset < timekeeper.cycle_interval<<shift)
740 return offset;
741
742 /* Accumulate one shifted interval */
743 offset -= timekeeper.cycle_interval << shift;
744 timekeeper.clock->cycle_last += timekeeper.cycle_interval << shift;
745
746 timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
747 while (timekeeper.xtime_nsec >= nsecps) {
748 timekeeper.xtime_nsec -= nsecps;
749 xtime.tv_sec++;
750 second_overflow();
751 }
752
753 /* Accumulate into raw time */
754 raw_time.tv_nsec += timekeeper.raw_interval << shift;;
755 while (raw_time.tv_nsec >= NSEC_PER_SEC) {
756 raw_time.tv_nsec -= NSEC_PER_SEC;
757 raw_time.tv_sec++;
758 }
759
760 /* Accumulate error between NTP and clock interval */
761 timekeeper.ntp_error += tick_length << shift;
762 timekeeper.ntp_error -= timekeeper.xtime_interval <<
763 (timekeeper.ntp_error_shift + shift);
764
765 return offset;
473} 766}
474 767
475/** 768/**
@@ -479,53 +772,48 @@ static void clocksource_adjust(s64 offset)
479 */ 772 */
480void update_wall_time(void) 773void update_wall_time(void)
481{ 774{
775 struct clocksource *clock;
482 cycle_t offset; 776 cycle_t offset;
777 int shift = 0, maxshift;
483 778
484 /* Make sure we're fully resumed: */ 779 /* Make sure we're fully resumed: */
485 if (unlikely(timekeeping_suspended)) 780 if (unlikely(timekeeping_suspended))
486 return; 781 return;
487 782
783 clock = timekeeper.clock;
488#ifdef CONFIG_GENERIC_TIME 784#ifdef CONFIG_GENERIC_TIME
489 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; 785 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
490#else 786#else
491 offset = clock->cycle_interval; 787 offset = timekeeper.cycle_interval;
492#endif 788#endif
493 clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; 789 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
494 790
495 /* normally this loop will run just once, however in the 791 /*
496 * case of lost or late ticks, it will accumulate correctly. 792 * With NO_HZ we may have to accumulate many cycle_intervals
793 * (think "ticks") worth of time at once. To do this efficiently,
794 * we calculate the largest doubling multiple of cycle_intervals
795 * that is smaller then the offset. We then accumulate that
796 * chunk in one go, and then try to consume the next smaller
797 * doubled multiple.
497 */ 798 */
498 while (offset >= clock->cycle_interval) { 799 shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
499 /* accumulate one interval */ 800 shift = max(0, shift);
500 offset -= clock->cycle_interval; 801 /* Bound shift to one less then what overflows tick_length */
501 clock->cycle_last += clock->cycle_interval; 802 maxshift = (8*sizeof(tick_length) - (ilog2(tick_length)+1)) - 1;
502 803 shift = min(shift, maxshift);
503 clock->xtime_nsec += clock->xtime_interval; 804 while (offset >= timekeeper.cycle_interval) {
504 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { 805 offset = logarithmic_accumulation(offset, shift);
505 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; 806 shift--;
506 xtime.tv_sec++;
507 second_overflow();
508 }
509
510 clock->raw_time.tv_nsec += clock->raw_interval;
511 if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
512 clock->raw_time.tv_nsec -= NSEC_PER_SEC;
513 clock->raw_time.tv_sec++;
514 }
515
516 /* accumulate error between NTP and clock interval */
517 clock->error += tick_length;
518 clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
519 } 807 }
520 808
521 /* correct the clock when NTP error is too big */ 809 /* correct the clock when NTP error is too big */
522 clocksource_adjust(offset); 810 timekeeping_adjust(offset);
523 811
524 /* 812 /*
525 * Since in the loop above, we accumulate any amount of time 813 * Since in the loop above, we accumulate any amount of time
526 * in xtime_nsec over a second into xtime.tv_sec, its possible for 814 * in xtime_nsec over a second into xtime.tv_sec, its possible for
527 * xtime_nsec to be fairly small after the loop. Further, if we're 815 * xtime_nsec to be fairly small after the loop. Further, if we're
528 * slightly speeding the clocksource up in clocksource_adjust(), 816 * slightly speeding the clocksource up in timekeeping_adjust(),
529 * its possible the required corrective factor to xtime_nsec could 817 * its possible the required corrective factor to xtime_nsec could
530 * cause it to underflow. 818 * cause it to underflow.
531 * 819 *
@@ -537,24 +825,22 @@ void update_wall_time(void)
537 * We'll correct this error next time through this function, when 825 * We'll correct this error next time through this function, when
538 * xtime_nsec is not as small. 826 * xtime_nsec is not as small.
539 */ 827 */
540 if (unlikely((s64)clock->xtime_nsec < 0)) { 828 if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
541 s64 neg = -(s64)clock->xtime_nsec; 829 s64 neg = -(s64)timekeeper.xtime_nsec;
542 clock->xtime_nsec = 0; 830 timekeeper.xtime_nsec = 0;
543 clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); 831 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
544 } 832 }
545 833
546 /* store full nanoseconds into xtime after rounding it up and 834 /* store full nanoseconds into xtime after rounding it up and
547 * add the remainder to the error difference. 835 * add the remainder to the error difference.
548 */ 836 */
549 xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; 837 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
550 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 838 timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift;
551 clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); 839 timekeeper.ntp_error += timekeeper.xtime_nsec <<
552 840 timekeeper.ntp_error_shift;
553 update_xtime_cache(cyc2ns(clock, offset));
554 841
555 /* check to see if there is a new clocksource to use */ 842 /* check to see if there is a new clocksource to use */
556 change_clocksource(); 843 update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
557 update_vsyscall(&xtime, clock);
558} 844}
559 845
560/** 846/**
@@ -570,9 +856,12 @@ void update_wall_time(void)
570 */ 856 */
571void getboottime(struct timespec *ts) 857void getboottime(struct timespec *ts)
572{ 858{
573 set_normalized_timespec(ts, 859 struct timespec boottime = {
574 - (wall_to_monotonic.tv_sec + total_sleep_time), 860 .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
575 - wall_to_monotonic.tv_nsec); 861 .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
862 };
863
864 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
576} 865}
577 866
578/** 867/**
@@ -581,15 +870,19 @@ void getboottime(struct timespec *ts)
581 */ 870 */
582void monotonic_to_bootbased(struct timespec *ts) 871void monotonic_to_bootbased(struct timespec *ts)
583{ 872{
584 ts->tv_sec += total_sleep_time; 873 *ts = timespec_add_safe(*ts, total_sleep_time);
585} 874}
586 875
587unsigned long get_seconds(void) 876unsigned long get_seconds(void)
588{ 877{
589 return xtime_cache.tv_sec; 878 return xtime.tv_sec;
590} 879}
591EXPORT_SYMBOL(get_seconds); 880EXPORT_SYMBOL(get_seconds);
592 881
882struct timespec __current_kernel_time(void)
883{
884 return xtime;
885}
593 886
594struct timespec current_kernel_time(void) 887struct timespec current_kernel_time(void)
595{ 888{
@@ -598,10 +891,25 @@ struct timespec current_kernel_time(void)
598 891
599 do { 892 do {
600 seq = read_seqbegin(&xtime_lock); 893 seq = read_seqbegin(&xtime_lock);
601 894 now = xtime;
602 now = xtime_cache;
603 } while (read_seqretry(&xtime_lock, seq)); 895 } while (read_seqretry(&xtime_lock, seq));
604 896
605 return now; 897 return now;
606} 898}
607EXPORT_SYMBOL(current_kernel_time); 899EXPORT_SYMBOL(current_kernel_time);
900
901struct timespec get_monotonic_coarse(void)
902{
903 struct timespec now, mono;
904 unsigned long seq;
905
906 do {
907 seq = read_seqbegin(&xtime_lock);
908 now = xtime;
909 mono = wall_to_monotonic;
910 } while (read_seqretry(&xtime_lock, seq));
911
912 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
913 now.tv_nsec + mono.tv_nsec);
914 return now;
915}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index a999b92a1277..665c76edbf17 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -204,10 +204,12 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
204 return; 204 return;
205 } 205 }
206 SEQ_printf(m, "%s\n", dev->name); 206 SEQ_printf(m, "%s\n", dev->name);
207 SEQ_printf(m, " max_delta_ns: %lu\n", dev->max_delta_ns); 207 SEQ_printf(m, " max_delta_ns: %llu\n",
208 SEQ_printf(m, " min_delta_ns: %lu\n", dev->min_delta_ns); 208 (unsigned long long) dev->max_delta_ns);
209 SEQ_printf(m, " mult: %lu\n", dev->mult); 209 SEQ_printf(m, " min_delta_ns: %llu\n",
210 SEQ_printf(m, " shift: %d\n", dev->shift); 210 (unsigned long long) dev->min_delta_ns);
211 SEQ_printf(m, " mult: %u\n", dev->mult);
212 SEQ_printf(m, " shift: %u\n", dev->shift);
211 SEQ_printf(m, " mode: %d\n", dev->mode); 213 SEQ_printf(m, " mode: %d\n", dev->mode);
212 SEQ_printf(m, " next_event: %Ld nsecs\n", 214 SEQ_printf(m, " next_event: %Ld nsecs\n",
213 (unsigned long long) ktime_to_ns(dev->next_event)); 215 (unsigned long long) ktime_to_ns(dev->next_event));
@@ -275,7 +277,7 @@ static int timer_list_open(struct inode *inode, struct file *filp)
275 return single_open(filp, timer_list_show, NULL); 277 return single_open(filp, timer_list_show, NULL);
276} 278}
277 279
278static struct file_operations timer_list_fops = { 280static const struct file_operations timer_list_fops = {
279 .open = timer_list_open, 281 .open = timer_list_open,
280 .read = seq_read, 282 .read = seq_read,
281 .llseek = seq_lseek, 283 .llseek = seq_lseek,
@@ -286,7 +288,7 @@ static int __init init_timer_list_procfs(void)
286{ 288{
287 struct proc_dir_entry *pe; 289 struct proc_dir_entry *pe;
288 290
289 pe = proc_create("timer_list", 0644, NULL, &timer_list_fops); 291 pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
290 if (!pe) 292 if (!pe)
291 return -ENOMEM; 293 return -ENOMEM;
292 return 0; 294 return 0;
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c994530d166d..ee5681f8d7ec 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -96,7 +96,7 @@ static DEFINE_MUTEX(show_mutex);
96/* 96/*
97 * Collection status, active/inactive: 97 * Collection status, active/inactive:
98 */ 98 */
99static int __read_mostly active; 99int __read_mostly timer_stats_active;
100 100
101/* 101/*
102 * Beginning/end timestamps of measurement: 102 * Beginning/end timestamps of measurement:
@@ -242,7 +242,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
242 struct entry *entry, input; 242 struct entry *entry, input;
243 unsigned long flags; 243 unsigned long flags;
244 244
245 if (likely(!active)) 245 if (likely(!timer_stats_active))
246 return; 246 return;
247 247
248 lock = &per_cpu(lookup_lock, raw_smp_processor_id()); 248 lock = &per_cpu(lookup_lock, raw_smp_processor_id());
@@ -254,7 +254,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
254 input.timer_flag = timer_flag; 254 input.timer_flag = timer_flag;
255 255
256 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
257 if (!active) 257 if (!timer_stats_active)
258 goto out_unlock; 258 goto out_unlock;
259 259
260 entry = tstat_lookup(&input, comm); 260 entry = tstat_lookup(&input, comm);
@@ -290,7 +290,7 @@ static int tstats_show(struct seq_file *m, void *v)
290 /* 290 /*
291 * If still active then calculate up to now: 291 * If still active then calculate up to now:
292 */ 292 */
293 if (active) 293 if (timer_stats_active)
294 time_stop = ktime_get(); 294 time_stop = ktime_get();
295 295
296 time = ktime_sub(time_stop, time_start); 296 time = ktime_sub(time_stop, time_start);
@@ -368,18 +368,18 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
368 mutex_lock(&show_mutex); 368 mutex_lock(&show_mutex);
369 switch (ctl[0]) { 369 switch (ctl[0]) {
370 case '0': 370 case '0':
371 if (active) { 371 if (timer_stats_active) {
372 active = 0; 372 timer_stats_active = 0;
373 time_stop = ktime_get(); 373 time_stop = ktime_get();
374 sync_access(); 374 sync_access();
375 } 375 }
376 break; 376 break;
377 case '1': 377 case '1':
378 if (!active) { 378 if (!timer_stats_active) {
379 reset_entries(); 379 reset_entries();
380 time_start = ktime_get(); 380 time_start = ktime_get();
381 smp_mb(); 381 smp_mb();
382 active = 1; 382 timer_stats_active = 1;
383 } 383 }
384 break; 384 break;
385 default: 385 default:
@@ -395,7 +395,7 @@ static int tstats_open(struct inode *inode, struct file *filp)
395 return single_open(filp, tstats_show, NULL); 395 return single_open(filp, tstats_show, NULL);
396} 396}
397 397
398static struct file_operations tstats_fops = { 398static const struct file_operations tstats_fops = {
399 .open = tstats_open, 399 .open = tstats_open,
400 .read = seq_read, 400 .read = seq_read,
401 .write = tstats_write, 401 .write = tstats_write,