aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 12:15:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-18 12:15:24 -0400
commita03fdb7612874834d6847107198712d18b5242c7 (patch)
tree9ae81170509fd8b1c23d1b7e8edfa7a2203ffce3
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
parent12e09337fe238981cb0c87543306e23775d1a143 (diff)
Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (34 commits) time: Prevent 32 bit overflow with set_normalized_timespec() clocksource: Delay clocksource down rating to late boot clocksource: clocksource_select must be called with mutex locked clocksource: Resolve cpu hotplug dead lock with TSC unstable, fix crash timers: Drop a function prototype clocksource: Resolve cpu hotplug dead lock with TSC unstable timer.c: Fix S/390 comments timekeeping: Fix invalid getboottime() value timekeeping: Fix up read_persistent_clock() breakage on sh timekeeping: Increase granularity of read_persistent_clock(), build fix time: Introduce CLOCK_REALTIME_COARSE x86: Do not unregister PIT clocksource on PIT oneshot setup/shutdown clocksource: Avoid clocksource watchdog circular locking dependency clocksource: Protect the watchdog rating changes with clocksource_mutex clocksource: Call clocksource_change_rating() outside of watchdog_lock timekeeping: Introduce read_boot_clock timekeeping: Increase granularity of read_persistent_clock() timekeeping: Update clocksource with stop_machine timekeeping: Add timekeeper read_clock helper functions timekeeping: Move NTP adjusted clock multiplier to struct timekeeper ... Fix trivial conflict due to MIPS lemote -> loongson renaming.
-rw-r--r--arch/arm/plat-omap/common.c7
-rw-r--r--arch/m68knommu/kernel/time.c5
-rw-r--r--arch/mips/dec/time.c5
-rw-r--r--arch/mips/lasat/ds1603.c5
-rw-r--r--arch/mips/lasat/sysctl.c8
-rw-r--r--arch/mips/loongson/common/time.c5
-rw-r--r--arch/mips/mti-malta/malta-time.c5
-rw-r--r--arch/mips/pmc-sierra/yosemite/setup.c5
-rw-r--r--arch/mips/sibyte/swarm/setup.c15
-rw-r--r--arch/mips/sni/time.c5
-rw-r--r--arch/powerpc/kernel/time.c19
-rw-r--r--arch/s390/kernel/time.c38
-rw-r--r--arch/sh/kernel/time.c6
-rw-r--r--arch/x86/include/asm/vgtod.h1
-rw-r--r--arch/x86/kernel/i8253.c19
-rw-r--r--arch/x86/kernel/rtc.c5
-rw-r--r--arch/x86/kernel/tsc.c14
-rw-r--r--arch/x86/kernel/vsyscall_64.c1
-rw-r--r--arch/x86/vdso/vclock_gettime.c39
-rw-r--r--arch/xtensa/kernel/time.c5
-rw-r--r--include/linux/clocksource.h122
-rw-r--r--include/linux/hrtimer.h2
-rw-r--r--include/linux/time.h10
-rw-r--r--include/linux/timer.h5
-rw-r--r--kernel/hrtimer.c57
-rw-r--r--kernel/posix-timers.c35
-rw-r--r--kernel/time.c9
-rw-r--r--kernel/time/clocksource.c529
-rw-r--r--kernel/time/jiffies.c6
-rw-r--r--kernel/time/ntp.c7
-rw-r--r--kernel/time/timekeeping.c535
-rw-r--r--kernel/timer.c28
32 files changed, 917 insertions, 640 deletions
diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index ebcf006406f..95587b6c025 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = {
253 */ 253 */
254unsigned long long sched_clock(void) 254unsigned long long sched_clock(void)
255{ 255{
256 unsigned long long ret; 256 return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
257 257 clocksource_32k.mult, clocksource_32k.shift);
258 ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
259 ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
260 return ret;
261} 258}
262 259
263static int __init omap_init_clocksource_32k(void) 260static int __init omap_init_clocksource_32k(void)
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index c2aa717de08..a90acf5b0cd 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void)
72 return mktime(year, mon, day, hour, min, sec); 72 return mktime(year, mon, day, hour, min, sec);
73} 73}
74 74
75unsigned long read_persistent_clock(void) 75void read_persistent_clock(struct timespec *ts)
76{ 76{
77 return read_rtc_mmss(); 77 ts->tv_sec = read_rtc_mmss();
78 ts->tv_nsec = 0;
78} 79}
79 80
80int update_persistent_clock(struct timespec now) 81int update_persistent_clock(struct timespec now)
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 463136e6685..02f505f23c3 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -18,7 +18,7 @@
18#include <asm/dec/ioasic.h> 18#include <asm/dec/ioasic.h>
19#include <asm/dec/machtype.h> 19#include <asm/dec/machtype.h>
20 20
21unsigned long read_persistent_clock(void) 21void read_persistent_clock(struct timespec *ts)
22{ 22{
23 unsigned int year, mon, day, hour, min, sec, real_year; 23 unsigned int year, mon, day, hour, min, sec, real_year;
24 unsigned long flags; 24 unsigned long flags;
@@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void)
53 53
54 year += real_year - 72 + 2000; 54 year += real_year - 72 + 2000;
55 55
56 return mktime(year, mon, day, hour, min, sec); 56 ts->tv_sec = mktime(year, mon, day, hour, min, sec);
57 ts->tv_nsec = 0;
57} 58}
58 59
59/* 60/*
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index 52cb1436a12..c6fd96ff118 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -135,7 +135,7 @@ static void rtc_end_op(void)
135 lasat_ndelay(1000); 135 lasat_ndelay(1000);
136} 136}
137 137
138unsigned long read_persistent_clock(void) 138void read_persistent_clock(struct timespec *ts)
139{ 139{
140 unsigned long word; 140 unsigned long word;
141 unsigned long flags; 141 unsigned long flags;
@@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void)
147 rtc_end_op(); 147 rtc_end_op();
148 spin_unlock_irqrestore(&rtc_lock, flags); 148 spin_unlock_irqrestore(&rtc_lock, flags);
149 149
150 return word; 150 ts->tv_sec = word;
151 ts->tv_nsec = 0;
151} 152}
152 153
153int rtc_mips_set_mmss(unsigned long time) 154int rtc_mips_set_mmss(unsigned long time)
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 8f88886feb1..3f04d4c406b 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -92,10 +92,12 @@ static int rtctmp;
92int proc_dolasatrtc(ctl_table *table, int write, struct file *filp, 92int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
93 void *buffer, size_t *lenp, loff_t *ppos) 93 void *buffer, size_t *lenp, loff_t *ppos)
94{ 94{
95 struct timespec ts;
95 int r; 96 int r;
96 97
97 if (!write) { 98 if (!write) {
98 rtctmp = read_persistent_clock(); 99 read_persistent_clock(&ts);
100 rtctmp = ts.tv_sec;
99 /* check for time < 0 and set to 0 */ 101 /* check for time < 0 and set to 0 */
100 if (rtctmp < 0) 102 if (rtctmp < 0)
101 rtctmp = 0; 103 rtctmp = 0;
@@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table,
134 void *oldval, size_t *oldlenp, 136 void *oldval, size_t *oldlenp,
135 void *newval, size_t newlen) 137 void *newval, size_t newlen)
136{ 138{
139 struct timespec ts;
137 int r; 140 int r;
138 141
139 rtctmp = read_persistent_clock(); 142 read_persistent_clock(&ts);
143 rtctmp = ts.tv_sec;
140 if (rtctmp < 0) 144 if (rtctmp < 0)
141 rtctmp = 0; 145 rtctmp = 0;
142 r = sysctl_intvec(table, oldval, oldlenp, newval, newlen); 146 r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
diff --git a/arch/mips/loongson/common/time.c b/arch/mips/loongson/common/time.c
index b13d1717465..0edbef32b86 100644
--- a/arch/mips/loongson/common/time.c
+++ b/arch/mips/loongson/common/time.c
@@ -21,7 +21,8 @@ void __init plat_time_init(void)
21 mips_hpt_frequency = cpu_clock_freq / 2; 21 mips_hpt_frequency = cpu_clock_freq / 2;
22} 22}
23 23
24unsigned long read_persistent_clock(void) 24void read_persistent_clock(struct timespec *ts)
25{ 25{
26 return mc146818_get_cmos_time(); 26 ts->tv_sec = return mc146818_get_cmos_time();
27 ts->tv_nsec = 0;
27} 28}
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 0b97d47691f..3c6f190aa61 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void)
100 return count; 100 return count;
101} 101}
102 102
103unsigned long read_persistent_clock(void) 103void read_persistent_clock(struct timespec *ts)
104{ 104{
105 return mc146818_get_cmos_time(); 105 ts->tv_sec = mc146818_get_cmos_time();
106 ts->tv_nsec = 0;
106} 107}
107 108
108static void __init plat_perf_setup(void) 109static void __init plat_perf_setup(void)
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 2d3c0dca275..3498ac9c35a 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -70,7 +70,7 @@ void __init bus_error_init(void)
70} 70}
71 71
72 72
73unsigned long read_persistent_clock(void) 73void read_persistent_clock(struct timespec *ts)
74{ 74{
75 unsigned int year, month, day, hour, min, sec; 75 unsigned int year, month, day, hour, min, sec;
76 unsigned long flags; 76 unsigned long flags;
@@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void)
92 m48t37_base->control = 0x00; 92 m48t37_base->control = 0x00;
93 spin_unlock_irqrestore(&rtc_lock, flags); 93 spin_unlock_irqrestore(&rtc_lock, flags);
94 94
95 return mktime(year, month, day, hour, min, sec); 95 ts->tv_sec = mktime(year, month, day, hour, min, sec);
96 ts->tv_nsec = 0;
96} 97}
97 98
98int rtc_mips_set_time(unsigned long tim) 99int rtc_mips_set_time(unsigned long tim)
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 672e45d495a..623ffc933c4 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -87,19 +87,26 @@ enum swarm_rtc_type {
87 87
88enum swarm_rtc_type swarm_rtc_type; 88enum swarm_rtc_type swarm_rtc_type;
89 89
90unsigned long read_persistent_clock(void) 90void read_persistent_clock(struct timespec *ts)
91{ 91{
92 unsigned long sec;
93
92 switch (swarm_rtc_type) { 94 switch (swarm_rtc_type) {
93 case RTC_XICOR: 95 case RTC_XICOR:
94 return xicor_get_time(); 96 sec = xicor_get_time();
97 break;
95 98
96 case RTC_M4LT81: 99 case RTC_M4LT81:
97 return m41t81_get_time(); 100 sec = m41t81_get_time();
101 break;
98 102
99 case RTC_NONE: 103 case RTC_NONE:
100 default: 104 default:
101 return mktime(2000, 1, 1, 0, 0, 0); 105 sec = mktime(2000, 1, 1, 0, 0, 0);
106 break;
102 } 107 }
108 ts->tv_sec = sec;
109 tv->tv_nsec = 0;
103} 110}
104 111
105int rtc_mips_set_time(unsigned long sec) 112int rtc_mips_set_time(unsigned long sec)
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 0d9ec1a5c24..62df6a598e0 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -182,7 +182,8 @@ void __init plat_time_init(void)
182 setup_pit_timer(); 182 setup_pit_timer();
183} 183}
184 184
185unsigned long read_persistent_clock(void) 185void read_persistent_clock(struct timespec *ts)
186{ 186{
187 return -1; 187 ts->tv_sec = -1;
188 ts->tv_nsec = 0;
188} 189}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a180b4f9a4f..465e498bcb3 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -774,11 +774,12 @@ int update_persistent_clock(struct timespec now)
774 return ppc_md.set_rtc_time(&tm); 774 return ppc_md.set_rtc_time(&tm);
775} 775}
776 776
777unsigned long read_persistent_clock(void) 777void read_persistent_clock(struct timespec *ts)
778{ 778{
779 struct rtc_time tm; 779 struct rtc_time tm;
780 static int first = 1; 780 static int first = 1;
781 781
782 ts->tv_nsec = 0;
782 /* XXX this is a litle fragile but will work okay in the short term */ 783 /* XXX this is a litle fragile but will work okay in the short term */
783 if (first) { 784 if (first) {
784 first = 0; 785 first = 0;
@@ -786,14 +787,18 @@ unsigned long read_persistent_clock(void)
786 timezone_offset = ppc_md.time_init(); 787 timezone_offset = ppc_md.time_init();
787 788
788 /* get_boot_time() isn't guaranteed to be safe to call late */ 789 /* get_boot_time() isn't guaranteed to be safe to call late */
789 if (ppc_md.get_boot_time) 790 if (ppc_md.get_boot_time) {
790 return ppc_md.get_boot_time() -timezone_offset; 791 ts->tv_sec = ppc_md.get_boot_time() - timezone_offset;
792 return;
793 }
794 }
795 if (!ppc_md.get_rtc_time) {
796 ts->tv_sec = 0;
797 return;
791 } 798 }
792 if (!ppc_md.get_rtc_time)
793 return 0;
794 ppc_md.get_rtc_time(&tm); 799 ppc_md.get_rtc_time(&tm);
795 return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday, 800 ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
796 tm.tm_hour, tm.tm_min, tm.tm_sec); 801 tm.tm_hour, tm.tm_min, tm.tm_sec);
797} 802}
798 803
799/* clocksource code */ 804/* clocksource code */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e3dc28b8075..34162a0b2ca 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -184,12 +184,14 @@ static void timing_alert_interrupt(__u16 code)
184static void etr_reset(void); 184static void etr_reset(void);
185static void stp_reset(void); 185static void stp_reset(void);
186 186
187unsigned long read_persistent_clock(void) 187void read_persistent_clock(struct timespec *ts)
188{ 188{
189 struct timespec ts; 189 tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
190}
190 191
191 tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts); 192void read_boot_clock(struct timespec *ts)
192 return ts.tv_sec; 193{
194 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
193} 195}
194 196
195static cycle_t read_tod_clock(struct clocksource *cs) 197static cycle_t read_tod_clock(struct clocksource *cs)
@@ -207,6 +209,10 @@ static struct clocksource clocksource_tod = {
207 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 209 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
208}; 210};
209 211
212struct clocksource * __init clocksource_default_clock(void)
213{
214 return &clocksource_tod;
215}
210 216
211void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) 217void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
212{ 218{
@@ -244,10 +250,6 @@ void update_vsyscall_tz(void)
244 */ 250 */
245void __init time_init(void) 251void __init time_init(void)
246{ 252{
247 struct timespec ts;
248 unsigned long flags;
249 cycle_t now;
250
251 /* Reset time synchronization interfaces. */ 253 /* Reset time synchronization interfaces. */
252 etr_reset(); 254 etr_reset();
253 stp_reset(); 255 stp_reset();
@@ -263,26 +265,6 @@ void __init time_init(void)
263 if (clocksource_register(&clocksource_tod) != 0) 265 if (clocksource_register(&clocksource_tod) != 0)
264 panic("Could not register TOD clock source"); 266 panic("Could not register TOD clock source");
265 267
266 /*
267 * The TOD clock is an accurate clock. The xtime should be
268 * initialized in a way that the difference between TOD and
269 * xtime is reasonably small. Too bad that timekeeping_init
270 * sets xtime.tv_nsec to zero. In addition the clock source
271 * change from the jiffies clock source to the TOD clock
272 * source add another error of up to 1/HZ second. The same
273 * function sets wall_to_monotonic to a value that is too
274 * small for /proc/uptime to be accurate.
275 * Reset xtime and wall_to_monotonic to sane values.
276 */
277 write_seqlock_irqsave(&xtime_lock, flags);
278 now = get_clock();
279 tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
280 clocksource_tod.cycle_last = now;
281 clocksource_tod.raw_time = xtime;
282 tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
283 set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
284 write_sequnlock_irqrestore(&xtime_lock, flags);
285
286 /* Enable TOD clock interrupts on the boot cpu. */ 268 /* Enable TOD clock interrupts on the boot cpu. */
287 init_cpu_timer(); 269 init_cpu_timer();
288 270
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 9b352a1e3fb..0e0e8581cf7 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
39int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; 39int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
40 40
41#ifdef CONFIG_GENERIC_CMOS_UPDATE 41#ifdef CONFIG_GENERIC_CMOS_UPDATE
42unsigned long read_persistent_clock(void) 42void read_persistent_clock(struct timespec *ts)
43{ 43{
44 struct timespec tv; 44 rtc_sh_get_time(ts);
45 rtc_sh_get_time(&tv);
46 return tv.tv_sec;
47} 45}
48 46
49int update_persistent_clock(struct timespec now) 47int update_persistent_clock(struct timespec now)
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index dc27a69e5d2..3d61e204826 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -21,6 +21,7 @@ struct vsyscall_gtod_data {
21 u32 shift; 21 u32 shift;
22 } clock; 22 } clock;
23 struct timespec wall_to_monotonic; 23 struct timespec wall_to_monotonic;
24 struct timespec wall_time_coarse;
24}; 25};
25extern struct vsyscall_gtod_data __vsyscall_gtod_data 26extern struct vsyscall_gtod_data __vsyscall_gtod_data
26__section_vsyscall_gtod_data; 27__section_vsyscall_gtod_data;
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 5cf36c053ac..23c167925a5 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -19,12 +19,6 @@
19DEFINE_SPINLOCK(i8253_lock); 19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock); 20EXPORT_SYMBOL(i8253_lock);
21 21
22#ifdef CONFIG_X86_32
23static void pit_disable_clocksource(void);
24#else
25static inline void pit_disable_clocksource(void) { }
26#endif
27
28/* 22/*
29 * HPET replaces the PIT, when enabled. So we need to know, which of 23 * HPET replaces the PIT, when enabled. So we need to know, which of
30 * the two timers is used 24 * the two timers is used
@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode,
57 outb_pit(0, PIT_CH0); 51 outb_pit(0, PIT_CH0);
58 outb_pit(0, PIT_CH0); 52 outb_pit(0, PIT_CH0);
59 } 53 }
60 pit_disable_clocksource();
61 break; 54 break;
62 55
63 case CLOCK_EVT_MODE_ONESHOT: 56 case CLOCK_EVT_MODE_ONESHOT:
64 /* One shot setup */ 57 /* One shot setup */
65 pit_disable_clocksource();
66 outb_pit(0x38, PIT_MODE); 58 outb_pit(0x38, PIT_MODE);
67 break; 59 break;
68 60
@@ -200,17 +192,6 @@ static struct clocksource pit_cs = {
200 .shift = 20, 192 .shift = 20,
201}; 193};
202 194
203static void pit_disable_clocksource(void)
204{
205 /*
206 * Use mult to check whether it is registered or not
207 */
208 if (pit_cs.mult) {
209 clocksource_unregister(&pit_cs);
210 pit_cs.mult = 0;
211 }
212}
213
214static int __init init_pit_clocksource(void) 195static int __init init_pit_clocksource(void)
215{ 196{
216 /* 197 /*
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5d465b207e7..bf67dcb4a44 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime)
178} 178}
179 179
180/* not static: needed by APM */ 180/* not static: needed by APM */
181unsigned long read_persistent_clock(void) 181void read_persistent_clock(struct timespec *ts)
182{ 182{
183 unsigned long retval, flags; 183 unsigned long retval, flags;
184 184
@@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void)
186 retval = get_wallclock(); 186 retval = get_wallclock();
187 spin_unlock_irqrestore(&rtc_lock, flags); 187 spin_unlock_irqrestore(&rtc_lock, flags);
188 188
189 return retval; 189 ts->tv_sec = retval;
190 ts->tv_nsec = 0;
190} 191}
191 192
192int update_persistent_clock(struct timespec now) 193int update_persistent_clock(struct timespec now)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 71f4368b357..fc3672a303d 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -744,10 +744,16 @@ static cycle_t __vsyscall_fn vread_tsc(void)
744} 744}
745#endif 745#endif
746 746
747static void resume_tsc(void)
748{
749 clocksource_tsc.cycle_last = 0;
750}
751
747static struct clocksource clocksource_tsc = { 752static struct clocksource clocksource_tsc = {
748 .name = "tsc", 753 .name = "tsc",
749 .rating = 300, 754 .rating = 300,
750 .read = read_tsc, 755 .read = read_tsc,
756 .resume = resume_tsc,
751 .mask = CLOCKSOURCE_MASK(64), 757 .mask = CLOCKSOURCE_MASK(64),
752 .shift = 22, 758 .shift = 22,
753 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 759 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
@@ -761,12 +767,14 @@ void mark_tsc_unstable(char *reason)
761{ 767{
762 if (!tsc_unstable) { 768 if (!tsc_unstable) {
763 tsc_unstable = 1; 769 tsc_unstable = 1;
764 printk("Marking TSC unstable due to %s\n", reason); 770 printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
765 /* Change only the rating, when not registered */ 771 /* Change only the rating, when not registered */
766 if (clocksource_tsc.mult) 772 if (clocksource_tsc.mult)
767 clocksource_change_rating(&clocksource_tsc, 0); 773 clocksource_mark_unstable(&clocksource_tsc);
768 else 774 else {
775 clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
769 clocksource_tsc.rating = 0; 776 clocksource_tsc.rating = 0;
777 }
770 } 778 }
771} 779}
772 780
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 25ee06a80aa..cf53a78e2dc 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
87 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; 87 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
88 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; 88 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
89 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; 89 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
90 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
90 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 91 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
91} 92}
92 93
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6a40b78b46a..ee55754cc3c 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts)
86 return 0; 86 return 0;
87} 87}
88 88
89notrace static noinline int do_realtime_coarse(struct timespec *ts)
90{
91 unsigned long seq;
92 do {
93 seq = read_seqbegin(&gtod->lock);
94 ts->tv_sec = gtod->wall_time_coarse.tv_sec;
95 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
96 } while (unlikely(read_seqretry(&gtod->lock, seq)));
97 return 0;
98}
99
100notrace static noinline int do_monotonic_coarse(struct timespec *ts)
101{
102 unsigned long seq, ns, secs;
103 do {
104 seq = read_seqbegin(&gtod->lock);
105 secs = gtod->wall_time_coarse.tv_sec;
106 ns = gtod->wall_time_coarse.tv_nsec;
107 secs += gtod->wall_to_monotonic.tv_sec;
108 ns += gtod->wall_to_monotonic.tv_nsec;
109 } while (unlikely(read_seqretry(&gtod->lock, seq)));
110 vset_normalized_timespec(ts, secs, ns);
111 return 0;
112}
113
89notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 114notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
90{ 115{
91 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) 116 if (likely(gtod->sysctl_enabled))
92 switch (clock) { 117 switch (clock) {
93 case CLOCK_REALTIME: 118 case CLOCK_REALTIME:
94 return do_realtime(ts); 119 if (likely(gtod->clock.vread))
120 return do_realtime(ts);
121 break;
95 case CLOCK_MONOTONIC: 122 case CLOCK_MONOTONIC:
96 return do_monotonic(ts); 123 if (likely(gtod->clock.vread))
124 return do_monotonic(ts);
125 break;
126 case CLOCK_REALTIME_COARSE:
127 return do_realtime_coarse(ts);
128 case CLOCK_MONOTONIC_COARSE:
129 return do_monotonic_coarse(ts);
97 } 130 }
98 return vdso_fallback_gettime(clock, ts); 131 return vdso_fallback_gettime(clock, ts);
99} 132}
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 8848120d291..19085ff0484 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = {
59 59
60void __init time_init(void) 60void __init time_init(void)
61{ 61{
62 xtime.tv_nsec = 0; 62 /* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */
63 xtime.tv_sec = read_persistent_clock(); 63 read_persistent_clock(&xtime);
64
65 set_normalized_timespec(&wall_to_monotonic, 64 set_normalized_timespec(&wall_to_monotonic,
66 -xtime.tv_sec, -xtime.tv_nsec); 65 -xtime.tv_sec, -xtime.tv_nsec);
67 66
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 1219be4fb42..83d2fbd81b9 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -14,6 +14,7 @@
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/cache.h> 15#include <linux/cache.h>
16#include <linux/timer.h> 16#include <linux/timer.h>
17#include <linux/init.h>
17#include <asm/div64.h> 18#include <asm/div64.h>
18#include <asm/io.h> 19#include <asm/io.h>
19 20
@@ -148,14 +149,11 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
148 * @disable: optional function to disable the clocksource 149 * @disable: optional function to disable the clocksource
149 * @mask: bitmask for two's complement 150 * @mask: bitmask for two's complement
150 * subtraction of non 64 bit counters 151 * subtraction of non 64 bit counters
151 * @mult: cycle to nanosecond multiplier (adjusted by NTP) 152 * @mult: cycle to nanosecond multiplier
152 * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP)
153 * @shift: cycle to nanosecond divisor (power of two) 153 * @shift: cycle to nanosecond divisor (power of two)
154 * @flags: flags describing special properties 154 * @flags: flags describing special properties
155 * @vread: vsyscall based read 155 * @vread: vsyscall based read
156 * @resume: resume function for the clocksource, if necessary 156 * @resume: resume function for the clocksource, if necessary
157 * @cycle_interval: Used internally by timekeeping core, please ignore.
158 * @xtime_interval: Used internally by timekeeping core, please ignore.
159 */ 157 */
160struct clocksource { 158struct clocksource {
161 /* 159 /*
@@ -169,7 +167,6 @@ struct clocksource {
169 void (*disable)(struct clocksource *cs); 167 void (*disable)(struct clocksource *cs);
170 cycle_t mask; 168 cycle_t mask;
171 u32 mult; 169 u32 mult;
172 u32 mult_orig;
173 u32 shift; 170 u32 shift;
174 unsigned long flags; 171 unsigned long flags;
175 cycle_t (*vread)(void); 172 cycle_t (*vread)(void);
@@ -181,19 +178,12 @@ struct clocksource {
181#define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0) 178#define CLKSRC_FSYS_MMIO_SET(mmio, addr) do { } while (0)
182#endif 179#endif
183 180
184 /* timekeeping specific data, ignore */
185 cycle_t cycle_interval;
186 u64 xtime_interval;
187 u32 raw_interval;
188 /* 181 /*
189 * Second part is written at each timer interrupt 182 * Second part is written at each timer interrupt
190 * Keep it in a different cache line to dirty no 183 * Keep it in a different cache line to dirty no
191 * more than one cache line. 184 * more than one cache line.
192 */ 185 */
193 cycle_t cycle_last ____cacheline_aligned_in_smp; 186 cycle_t cycle_last ____cacheline_aligned_in_smp;
194 u64 xtime_nsec;
195 s64 error;
196 struct timespec raw_time;
197 187
198#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 188#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
199 /* Watchdog related data, used by the framework */ 189 /* Watchdog related data, used by the framework */
@@ -202,8 +192,6 @@ struct clocksource {
202#endif 192#endif
203}; 193};
204 194
205extern struct clocksource *clock; /* current clocksource */
206
207/* 195/*
208 * Clock source flags bits:: 196 * Clock source flags bits::
209 */ 197 */
@@ -212,6 +200,7 @@ extern struct clocksource *clock; /* current clocksource */
212 200
213#define CLOCK_SOURCE_WATCHDOG 0x10 201#define CLOCK_SOURCE_WATCHDOG 0x10
214#define CLOCK_SOURCE_VALID_FOR_HRES 0x20 202#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
203#define CLOCK_SOURCE_UNSTABLE 0x40
215 204
216/* simplify initialization of mask field */ 205/* simplify initialization of mask field */
217#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) 206#define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
@@ -268,108 +257,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
268} 257}
269 258
270/** 259/**
271 * clocksource_read: - Access the clocksource's current cycle value 260 * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
272 * @cs: pointer to clocksource being read
273 *
274 * Uses the clocksource to return the current cycle_t value
275 */
276static inline cycle_t clocksource_read(struct clocksource *cs)
277{
278 return cs->read(cs);
279}
280
281/**
282 * clocksource_enable: - enable clocksource
283 * @cs: pointer to clocksource
284 *
285 * Enables the specified clocksource. The clocksource callback
286 * function should start up the hardware and setup mult and field
287 * members of struct clocksource to reflect hardware capabilities.
288 */
289static inline int clocksource_enable(struct clocksource *cs)
290{
291 int ret = 0;
292
293 if (cs->enable)
294 ret = cs->enable(cs);
295
296 /*
297 * The frequency may have changed while the clocksource
298 * was disabled. If so the code in ->enable() must update
299 * the mult value to reflect the new frequency. Make sure
300 * mult_orig follows this change.
301 */
302 cs->mult_orig = cs->mult;
303
304 return ret;
305}
306
307/**
308 * clocksource_disable: - disable clocksource
309 * @cs: pointer to clocksource
310 *
311 * Disables the specified clocksource. The clocksource callback
312 * function should power down the now unused hardware block to
313 * save power.
314 */
315static inline void clocksource_disable(struct clocksource *cs)
316{
317 /*
318 * Save mult_orig in mult so clocksource_enable() can
319 * restore the value regardless if ->enable() updates
320 * the value of mult or not.
321 */
322 cs->mult = cs->mult_orig;
323
324 if (cs->disable)
325 cs->disable(cs);
326}
327
328/**
329 * cyc2ns - converts clocksource cycles to nanoseconds
330 * @cs: Pointer to clocksource
331 * @cycles: Cycles
332 * 261 *
333 * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds. 262 * Converts cycles to nanoseconds, using the given mult and shift.
334 * 263 *
335 * XXX - This could use some mult_lxl_ll() asm optimization 264 * XXX - This could use some mult_lxl_ll() asm optimization
336 */ 265 */
337static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles) 266static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
338{
339 u64 ret = (u64)cycles;
340 ret = (ret * cs->mult) >> cs->shift;
341 return ret;
342}
343
344/**
345 * clocksource_calculate_interval - Calculates a clocksource interval struct
346 *
347 * @c: Pointer to clocksource.
348 * @length_nsec: Desired interval length in nanoseconds.
349 *
350 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
351 * pair and interval request.
352 *
353 * Unless you're the timekeeping code, you should not be using this!
354 */
355static inline void clocksource_calculate_interval(struct clocksource *c,
356 unsigned long length_nsec)
357{ 267{
358 u64 tmp; 268 return ((u64) cycles * mult) >> shift;
359
360 /* Do the ns -> cycle conversion first, using original mult */
361 tmp = length_nsec;
362 tmp <<= c->shift;
363 tmp += c->mult_orig/2;
364 do_div(tmp, c->mult_orig);
365
366 c->cycle_interval = (cycle_t)tmp;
367 if (c->cycle_interval == 0)
368 c->cycle_interval = 1;
369
370 /* Go back from cycles -> shifted ns, this time use ntp adjused mult */
371 c->xtime_interval = (u64)c->cycle_interval * c->mult;
372 c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
373} 269}
374 270
375 271
@@ -380,6 +276,8 @@ extern void clocksource_touch_watchdog(void);
380extern struct clocksource* clocksource_get_next(void); 276extern struct clocksource* clocksource_get_next(void);
381extern void clocksource_change_rating(struct clocksource *cs, int rating); 277extern void clocksource_change_rating(struct clocksource *cs, int rating);
382extern void clocksource_resume(void); 278extern void clocksource_resume(void);
279extern struct clocksource * __init __weak clocksource_default_clock(void);
280extern void clocksource_mark_unstable(struct clocksource *cs);
383 281
384#ifdef CONFIG_GENERIC_TIME_VSYSCALL 282#ifdef CONFIG_GENERIC_TIME_VSYSCALL
385extern void update_vsyscall(struct timespec *ts, struct clocksource *c); 283extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
@@ -394,4 +292,6 @@ static inline void update_vsyscall_tz(void)
394} 292}
395#endif 293#endif
396 294
295extern void timekeeping_notify(struct clocksource *clock);
296
397#endif /* _LINUX_CLOCKSOURCE_H */ 297#endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4759917adc7..ff037f0b1b4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,7 +91,6 @@ enum hrtimer_restart {
91 * @function: timer expiry callback function 91 * @function: timer expiry callback function
92 * @base: pointer to the timer base (per cpu and per clock) 92 * @base: pointer to the timer base (per cpu and per clock)
93 * @state: state information (See bit values above) 93 * @state: state information (See bit values above)
94 * @cb_entry: list head to enqueue an expired timer into the callback list
95 * @start_site: timer statistics field to store the site where the timer 94 * @start_site: timer statistics field to store the site where the timer
96 * was started 95 * was started
97 * @start_comm: timer statistics field to store the name of the process which 96 * @start_comm: timer statistics field to store the name of the process which
@@ -108,7 +107,6 @@ struct hrtimer {
108 enum hrtimer_restart (*function)(struct hrtimer *); 107 enum hrtimer_restart (*function)(struct hrtimer *);
109 struct hrtimer_clock_base *base; 108 struct hrtimer_clock_base *base;
110 unsigned long state; 109 unsigned long state;
111 struct list_head cb_entry;
112#ifdef CONFIG_TIMER_STATS 110#ifdef CONFIG_TIMER_STATS
113 int start_pid; 111 int start_pid;
114 void *start_site; 112 void *start_site;
diff --git a/include/linux/time.h b/include/linux/time.h
index ea16c1a01d5..56787c09334 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
75 const unsigned int day, const unsigned int hour, 75 const unsigned int day, const unsigned int hour,
76 const unsigned int min, const unsigned int sec); 76 const unsigned int min, const unsigned int sec);
77 77
78extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); 78extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
79extern struct timespec timespec_add_safe(const struct timespec lhs, 79extern struct timespec timespec_add_safe(const struct timespec lhs,
80 const struct timespec rhs); 80 const struct timespec rhs);
81 81
@@ -101,7 +101,8 @@ extern struct timespec xtime;
101extern struct timespec wall_to_monotonic; 101extern struct timespec wall_to_monotonic;
102extern seqlock_t xtime_lock; 102extern seqlock_t xtime_lock;
103 103
104extern unsigned long read_persistent_clock(void); 104extern void read_persistent_clock(struct timespec *ts);
105extern void read_boot_clock(struct timespec *ts);
105extern int update_persistent_clock(struct timespec now); 106extern int update_persistent_clock(struct timespec now);
106extern int no_sync_cmos_clock __read_mostly; 107extern int no_sync_cmos_clock __read_mostly;
107void timekeeping_init(void); 108void timekeeping_init(void);
@@ -109,6 +110,8 @@ extern int timekeeping_suspended;
109 110
110unsigned long get_seconds(void); 111unsigned long get_seconds(void);
111struct timespec current_kernel_time(void); 112struct timespec current_kernel_time(void);
113struct timespec __current_kernel_time(void); /* does not hold xtime_lock */
114struct timespec get_monotonic_coarse(void);
112 115
113#define CURRENT_TIME (current_kernel_time()) 116#define CURRENT_TIME (current_kernel_time())
114#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) 117#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,6 +150,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
147extern int timekeeping_valid_for_hres(void); 150extern int timekeeping_valid_for_hres(void);
148extern void update_wall_time(void); 151extern void update_wall_time(void);
149extern void update_xtime_cache(u64 nsec); 152extern void update_xtime_cache(u64 nsec);
153extern void timekeeping_leap_insert(int leapsecond);
150 154
151struct tms; 155struct tms;
152extern void do_sys_times(struct tms *); 156extern void do_sys_times(struct tms *);
@@ -241,6 +245,8 @@ struct itimerval {
241#define CLOCK_PROCESS_CPUTIME_ID 2 245#define CLOCK_PROCESS_CPUTIME_ID 2
242#define CLOCK_THREAD_CPUTIME_ID 3 246#define CLOCK_THREAD_CPUTIME_ID 3
243#define CLOCK_MONOTONIC_RAW 4 247#define CLOCK_MONOTONIC_RAW 4
248#define CLOCK_REALTIME_COARSE 5
249#define CLOCK_MONOTONIC_COARSE 6
244 250
245/* 251/*
246 * The IDs of various hardware clocks: 252 * The IDs of various hardware clocks:
diff --git a/include/linux/timer.h b/include/linux/timer.h
index be62ec2ebea..a2d1eb6cb3f 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -175,11 +175,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
175 175
176/* 176/*
177 * Return when the next timer-wheel timeout occurs (in absolute jiffies), 177 * Return when the next timer-wheel timeout occurs (in absolute jiffies),
178 * locks the timer base:
179 */
180extern unsigned long next_timer_interrupt(void);
181/*
182 * Return when the next timer-wheel timeout occurs (in absolute jiffies),
183 * locks the timer base and does the comparison against the given 178 * locks the timer base and does the comparison against the given
184 * jiffie. 179 * jiffie.
185 */ 180 */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 05071bf6a37..c03f221fee4 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -48,37 +48,6 @@
48 48
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
50 50
51/**
52 * ktime_get - get the monotonic time in ktime_t format
53 *
54 * returns the time in ktime_t format
55 */
56ktime_t ktime_get(void)
57{
58 struct timespec now;
59
60 ktime_get_ts(&now);
61
62 return timespec_to_ktime(now);
63}
64EXPORT_SYMBOL_GPL(ktime_get);
65
66/**
67 * ktime_get_real - get the real (wall-) time in ktime_t format
68 *
69 * returns the time in ktime_t format
70 */
71ktime_t ktime_get_real(void)
72{
73 struct timespec now;
74
75 getnstimeofday(&now);
76
77 return timespec_to_ktime(now);
78}
79
80EXPORT_SYMBOL_GPL(ktime_get_real);
81
82/* 51/*
83 * The timer bases: 52 * The timer bases:
84 * 53 *
@@ -106,31 +75,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
106 } 75 }
107}; 76};
108 77
109/**
110 * ktime_get_ts - get the monotonic clock in timespec format
111 * @ts: pointer to timespec variable
112 *
113 * The function calculates the monotonic clock from the realtime
114 * clock and the wall_to_monotonic offset and stores the result
115 * in normalized timespec format in the variable pointed to by @ts.
116 */
117void ktime_get_ts(struct timespec *ts)
118{
119 struct timespec tomono;
120 unsigned long seq;
121
122 do {
123 seq = read_seqbegin(&xtime_lock);
124 getnstimeofday(ts);
125 tomono = wall_to_monotonic;
126
127 } while (read_seqretry(&xtime_lock, seq));
128
129 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
130 ts->tv_nsec + tomono.tv_nsec);
131}
132EXPORT_SYMBOL_GPL(ktime_get_ts);
133
134/* 78/*
135 * Get the coarse grained time at the softirq based on xtime and 79 * Get the coarse grained time at the softirq based on xtime and
136 * wall_to_monotonic. 80 * wall_to_monotonic.
@@ -1155,7 +1099,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1155 clock_id = CLOCK_MONOTONIC; 1099 clock_id = CLOCK_MONOTONIC;
1156 1100
1157 timer->base = &cpu_base->clock_base[clock_id]; 1101 timer->base = &cpu_base->clock_base[clock_id];
1158 INIT_LIST_HEAD(&timer->cb_entry);
1159 hrtimer_init_timer_hres(timer); 1102 hrtimer_init_timer_hres(timer);
1160 1103
1161#ifdef CONFIG_TIMER_STATS 1104#ifdef CONFIG_TIMER_STATS
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a..495440779ce 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
242 return 0; 242 return 0;
243} 243}
244 244
245
246static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
247{
248 *tp = current_kernel_time();
249 return 0;
250}
251
252static int posix_get_monotonic_coarse(clockid_t which_clock,
253 struct timespec *tp)
254{
255 *tp = get_monotonic_coarse();
256 return 0;
257}
258
259int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
260{
261 *tp = ktime_to_timespec(KTIME_LOW_RES);
262 return 0;
263}
245/* 264/*
246 * Initialize everything, well, just everything in Posix clocks/timers ;) 265 * Initialize everything, well, just everything in Posix clocks/timers ;)
247 */ 266 */
@@ -262,10 +281,26 @@ static __init int init_posix_timers(void)
262 .timer_create = no_timer_create, 281 .timer_create = no_timer_create,
263 .nsleep = no_nsleep, 282 .nsleep = no_nsleep,
264 }; 283 };
284 struct k_clock clock_realtime_coarse = {
285 .clock_getres = posix_get_coarse_res,
286 .clock_get = posix_get_realtime_coarse,
287 .clock_set = do_posix_clock_nosettime,
288 .timer_create = no_timer_create,
289 .nsleep = no_nsleep,
290 };
291 struct k_clock clock_monotonic_coarse = {
292 .clock_getres = posix_get_coarse_res,
293 .clock_get = posix_get_monotonic_coarse,
294 .clock_set = do_posix_clock_nosettime,
295 .timer_create = no_timer_create,
296 .nsleep = no_nsleep,
297 };
265 298
266 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 299 register_posix_clock(CLOCK_REALTIME, &clock_realtime);
267 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 300 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
268 register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); 301 register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
302 register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
303 register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
269 304
270 posix_timers_cache = kmem_cache_create("posix_timers_cache", 305 posix_timers_cache = kmem_cache_create("posix_timers_cache",
271 sizeof (struct k_itimer), 0, SLAB_PANIC, 306 sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time.c b/kernel/time.c
index 29511943871..2e2e469a7fe 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime);
370 * 0 <= tv_nsec < NSEC_PER_SEC 370 * 0 <= tv_nsec < NSEC_PER_SEC
371 * For negative values only the tv_sec field is negative ! 371 * For negative values only the tv_sec field is negative !
372 */ 372 */
373void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) 373void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
374{ 374{
375 while (nsec >= NSEC_PER_SEC) { 375 while (nsec >= NSEC_PER_SEC) {
376 /*
377 * The following asm() prevents the compiler from
378 * optimising this loop into a modulo operation. See
379 * also __iter_div_u64_rem() in include/linux/time.h
380 */
381 asm("" : "+rm"(nsec));
376 nsec -= NSEC_PER_SEC; 382 nsec -= NSEC_PER_SEC;
377 ++sec; 383 ++sec;
378 } 384 }
379 while (nsec < 0) { 385 while (nsec < 0) {
386 asm("" : "+rm"(nsec));
380 nsec += NSEC_PER_SEC; 387 nsec += NSEC_PER_SEC;
381 --sec; 388 --sec;
382 } 389 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7466cb81125..09113347d32 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -21,7 +21,6 @@
21 * 21 *
22 * TODO WishList: 22 * TODO WishList:
23 * o Allow clocksource drivers to be unregistered 23 * o Allow clocksource drivers to be unregistered
24 * o get rid of clocksource_jiffies extern
25 */ 24 */
26 25
27#include <linux/clocksource.h> 26#include <linux/clocksource.h>
@@ -30,6 +29,7 @@
30#include <linux/module.h> 29#include <linux/module.h>
31#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */ 30#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
32#include <linux/tick.h> 31#include <linux/tick.h>
32#include <linux/kthread.h>
33 33
34void timecounter_init(struct timecounter *tc, 34void timecounter_init(struct timecounter *tc,
35 const struct cyclecounter *cc, 35 const struct cyclecounter *cc,
@@ -107,50 +107,35 @@ u64 timecounter_cyc2time(struct timecounter *tc,
107} 107}
108EXPORT_SYMBOL(timecounter_cyc2time); 108EXPORT_SYMBOL(timecounter_cyc2time);
109 109
110/* XXX - Would like a better way for initializing curr_clocksource */
111extern struct clocksource clocksource_jiffies;
112
113/*[Clocksource internal variables]--------- 110/*[Clocksource internal variables]---------
114 * curr_clocksource: 111 * curr_clocksource:
115 * currently selected clocksource. Initialized to clocksource_jiffies. 112 * currently selected clocksource.
116 * next_clocksource:
117 * pending next selected clocksource.
118 * clocksource_list: 113 * clocksource_list:
119 * linked list with the registered clocksources 114 * linked list with the registered clocksources
120 * clocksource_lock: 115 * clocksource_mutex:
121 * protects manipulations to curr_clocksource and next_clocksource 116 * protects manipulations to curr_clocksource and the clocksource_list
122 * and the clocksource_list
123 * override_name: 117 * override_name:
124 * Name of the user-specified clocksource. 118 * Name of the user-specified clocksource.
125 */ 119 */
126static struct clocksource *curr_clocksource = &clocksource_jiffies; 120static struct clocksource *curr_clocksource;
127static struct clocksource *next_clocksource;
128static struct clocksource *clocksource_override;
129static LIST_HEAD(clocksource_list); 121static LIST_HEAD(clocksource_list);
130static DEFINE_SPINLOCK(clocksource_lock); 122static DEFINE_MUTEX(clocksource_mutex);
131static char override_name[32]; 123static char override_name[32];
132static int finished_booting; 124static int finished_booting;
133 125
134/* clocksource_done_booting - Called near the end of core bootup
135 *
136 * Hack to avoid lots of clocksource churn at boot time.
137 * We use fs_initcall because we want this to start before
138 * device_initcall but after subsys_initcall.
139 */
140static int __init clocksource_done_booting(void)
141{
142 finished_booting = 1;
143 return 0;
144}
145fs_initcall(clocksource_done_booting);
146
147#ifdef CONFIG_CLOCKSOURCE_WATCHDOG 126#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
127static void clocksource_watchdog_work(struct work_struct *work);
128
148static LIST_HEAD(watchdog_list); 129static LIST_HEAD(watchdog_list);
149static struct clocksource *watchdog; 130static struct clocksource *watchdog;
150static struct timer_list watchdog_timer; 131static struct timer_list watchdog_timer;
132static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
151static DEFINE_SPINLOCK(watchdog_lock); 133static DEFINE_SPINLOCK(watchdog_lock);
152static cycle_t watchdog_last; 134static cycle_t watchdog_last;
153static unsigned long watchdog_resumed; 135static int watchdog_running;
136
137static int clocksource_watchdog_kthread(void *data);
138static void __clocksource_change_rating(struct clocksource *cs, int rating);
154 139
155/* 140/*
156 * Interval: 0.5sec Threshold: 0.0625s 141 * Interval: 0.5sec Threshold: 0.0625s
@@ -158,135 +143,249 @@ static unsigned long watchdog_resumed;
158#define WATCHDOG_INTERVAL (HZ >> 1) 143#define WATCHDOG_INTERVAL (HZ >> 1)
159#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) 144#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
160 145
161static void clocksource_ratewd(struct clocksource *cs, int64_t delta) 146static void clocksource_watchdog_work(struct work_struct *work)
162{ 147{
163 if (delta > -WATCHDOG_THRESHOLD && delta < WATCHDOG_THRESHOLD) 148 /*
164 return; 149 * If kthread_run fails the next watchdog scan over the
150 * watchdog_list will find the unstable clock again.
151 */
152 kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
153}
165 154
155static void __clocksource_unstable(struct clocksource *cs)
156{
157 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
158 cs->flags |= CLOCK_SOURCE_UNSTABLE;
159 if (finished_booting)
160 schedule_work(&watchdog_work);
161}
162
163static void clocksource_unstable(struct clocksource *cs, int64_t delta)
164{
166 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", 165 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
167 cs->name, delta); 166 cs->name, delta);
168 cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); 167 __clocksource_unstable(cs);
169 clocksource_change_rating(cs, 0); 168}
170 list_del(&cs->wd_list); 169
170/**
171 * clocksource_mark_unstable - mark clocksource unstable via watchdog
172 * @cs: clocksource to be marked unstable
173 *
174 * This function is called instead of clocksource_change_rating from
175 * cpu hotplug code to avoid a deadlock between the clocksource mutex
176 * and the cpu hotplug mutex. It defers the update of the clocksource
177 * to the watchdog thread.
178 */
179void clocksource_mark_unstable(struct clocksource *cs)
180{
181 unsigned long flags;
182
183 spin_lock_irqsave(&watchdog_lock, flags);
184 if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
185 if (list_empty(&cs->wd_list))
186 list_add(&cs->wd_list, &watchdog_list);
187 __clocksource_unstable(cs);
188 }
189 spin_unlock_irqrestore(&watchdog_lock, flags);
171} 190}
172 191
173static void clocksource_watchdog(unsigned long data) 192static void clocksource_watchdog(unsigned long data)
174{ 193{
175 struct clocksource *cs, *tmp; 194 struct clocksource *cs;
176 cycle_t csnow, wdnow; 195 cycle_t csnow, wdnow;
177 int64_t wd_nsec, cs_nsec; 196 int64_t wd_nsec, cs_nsec;
178 int resumed; 197 int next_cpu;
179 198
180 spin_lock(&watchdog_lock); 199 spin_lock(&watchdog_lock);
181 200 if (!watchdog_running)
182 resumed = test_and_clear_bit(0, &watchdog_resumed); 201 goto out;
183 202
184 wdnow = watchdog->read(watchdog); 203 wdnow = watchdog->read(watchdog);
185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); 204 wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
205 watchdog->mult, watchdog->shift);
186 watchdog_last = wdnow; 206 watchdog_last = wdnow;
187 207
188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 208 list_for_each_entry(cs, &watchdog_list, wd_list) {
189 csnow = cs->read(cs);
190 209
191 if (unlikely(resumed)) { 210 /* Clocksource already marked unstable? */
192 cs->wd_last = csnow; 211 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
212 if (finished_booting)
213 schedule_work(&watchdog_work);
193 continue; 214 continue;
194 } 215 }
195 216
196 /* Initialized ? */ 217 csnow = cs->read(cs);
218
219 /* Clocksource initialized ? */
197 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { 220 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) {
198 if ((cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
199 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
200 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
201 /*
202 * We just marked the clocksource as
203 * highres-capable, notify the rest of the
204 * system as well so that we transition
205 * into high-res mode:
206 */
207 tick_clock_notify();
208 }
209 cs->flags |= CLOCK_SOURCE_WATCHDOG; 221 cs->flags |= CLOCK_SOURCE_WATCHDOG;
210 cs->wd_last = csnow; 222 cs->wd_last = csnow;
211 } else { 223 continue;
212 cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
213 cs->wd_last = csnow;
214 /* Check the delta. Might remove from the list ! */
215 clocksource_ratewd(cs, cs_nsec - wd_nsec);
216 } 224 }
217 }
218 225
219 if (!list_empty(&watchdog_list)) { 226 /* Check the deviation from the watchdog clocksource. */
220 /* 227 cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
221 * Cycle through CPUs to check if the CPUs stay 228 cs->mask, cs->mult, cs->shift);
222 * synchronized to each other. 229 cs->wd_last = csnow;
223 */ 230 if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
224 int next_cpu = cpumask_next(raw_smp_processor_id(), 231 clocksource_unstable(cs, cs_nsec - wd_nsec);
225 cpu_online_mask); 232 continue;
233 }
226 234
227 if (next_cpu >= nr_cpu_ids) 235 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
228 next_cpu = cpumask_first(cpu_online_mask); 236 (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
229 watchdog_timer.expires += WATCHDOG_INTERVAL; 237 (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
230 add_timer_on(&watchdog_timer, next_cpu); 238 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
239 /*
240 * We just marked the clocksource as highres-capable,
241 * notify the rest of the system as well so that we
242 * transition into high-res mode:
243 */
244 tick_clock_notify();
245 }
231 } 246 }
247
248 /*
249 * Cycle through CPUs to check if the CPUs stay synchronized
250 * to each other.
251 */
252 next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
253 if (next_cpu >= nr_cpu_ids)
254 next_cpu = cpumask_first(cpu_online_mask);
255 watchdog_timer.expires += WATCHDOG_INTERVAL;
256 add_timer_on(&watchdog_timer, next_cpu);
257out:
232 spin_unlock(&watchdog_lock); 258 spin_unlock(&watchdog_lock);
233} 259}
260
261static inline void clocksource_start_watchdog(void)
262{
263 if (watchdog_running || !watchdog || list_empty(&watchdog_list))
264 return;
265 init_timer(&watchdog_timer);
266 watchdog_timer.function = clocksource_watchdog;
267 watchdog_last = watchdog->read(watchdog);
268 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
269 add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
270 watchdog_running = 1;
271}
272
273static inline void clocksource_stop_watchdog(void)
274{
275 if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
276 return;
277 del_timer(&watchdog_timer);
278 watchdog_running = 0;
279}
280
281static inline void clocksource_reset_watchdog(void)
282{
283 struct clocksource *cs;
284
285 list_for_each_entry(cs, &watchdog_list, wd_list)
286 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
287}
288
234static void clocksource_resume_watchdog(void) 289static void clocksource_resume_watchdog(void)
235{ 290{
236 set_bit(0, &watchdog_resumed); 291 unsigned long flags;
292
293 spin_lock_irqsave(&watchdog_lock, flags);
294 clocksource_reset_watchdog();
295 spin_unlock_irqrestore(&watchdog_lock, flags);
237} 296}
238 297
239static void clocksource_check_watchdog(struct clocksource *cs) 298static void clocksource_enqueue_watchdog(struct clocksource *cs)
240{ 299{
241 struct clocksource *cse;
242 unsigned long flags; 300 unsigned long flags;
243 301
244 spin_lock_irqsave(&watchdog_lock, flags); 302 spin_lock_irqsave(&watchdog_lock, flags);
245 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) { 303 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
246 int started = !list_empty(&watchdog_list); 304 /* cs is a clocksource to be watched. */
247
248 list_add(&cs->wd_list, &watchdog_list); 305 list_add(&cs->wd_list, &watchdog_list);
249 if (!started && watchdog) { 306 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
250 watchdog_last = watchdog->read(watchdog);
251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
252 add_timer_on(&watchdog_timer,
253 cpumask_first(cpu_online_mask));
254 }
255 } else { 307 } else {
308 /* cs is a watchdog. */
256 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 309 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
257 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 310 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
258 311 /* Pick the best watchdog. */
259 if (!watchdog || cs->rating > watchdog->rating) { 312 if (!watchdog || cs->rating > watchdog->rating) {
260 if (watchdog)
261 del_timer(&watchdog_timer);
262 watchdog = cs; 313 watchdog = cs;
263 init_timer(&watchdog_timer);
264 watchdog_timer.function = clocksource_watchdog;
265
266 /* Reset watchdog cycles */ 314 /* Reset watchdog cycles */
267 list_for_each_entry(cse, &watchdog_list, wd_list) 315 clocksource_reset_watchdog();
268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG; 316 }
269 /* Start if list is not empty */ 317 }
270 if (!list_empty(&watchdog_list)) { 318 /* Check if the watchdog timer needs to be started. */
271 watchdog_last = watchdog->read(watchdog); 319 clocksource_start_watchdog();
272 watchdog_timer.expires = 320 spin_unlock_irqrestore(&watchdog_lock, flags);
273 jiffies + WATCHDOG_INTERVAL; 321}
274 add_timer_on(&watchdog_timer, 322
275 cpumask_first(cpu_online_mask)); 323static void clocksource_dequeue_watchdog(struct clocksource *cs)
276 } 324{
325 struct clocksource *tmp;
326 unsigned long flags;
327
328 spin_lock_irqsave(&watchdog_lock, flags);
329 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
330 /* cs is a watched clocksource. */
331 list_del_init(&cs->wd_list);
332 } else if (cs == watchdog) {
333 /* Reset watchdog cycles */
334 clocksource_reset_watchdog();
335 /* Current watchdog is removed. Find an alternative. */
336 watchdog = NULL;
337 list_for_each_entry(tmp, &clocksource_list, list) {
338 if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
339 continue;
340 if (!watchdog || tmp->rating > watchdog->rating)
341 watchdog = tmp;
277 } 342 }
278 } 343 }
344 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
345 /* Check if the watchdog timer needs to be stopped. */
346 clocksource_stop_watchdog();
279 spin_unlock_irqrestore(&watchdog_lock, flags); 347 spin_unlock_irqrestore(&watchdog_lock, flags);
280} 348}
281#else 349
282static void clocksource_check_watchdog(struct clocksource *cs) 350static int clocksource_watchdog_kthread(void *data)
351{
352 struct clocksource *cs, *tmp;
353 unsigned long flags;
354 LIST_HEAD(unstable);
355
356 mutex_lock(&clocksource_mutex);
357 spin_lock_irqsave(&watchdog_lock, flags);
358 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
359 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
360 list_del_init(&cs->wd_list);
361 list_add(&cs->wd_list, &unstable);
362 }
363 /* Check if the watchdog timer needs to be stopped. */
364 clocksource_stop_watchdog();
365 spin_unlock_irqrestore(&watchdog_lock, flags);
366
367 /* Needs to be done outside of watchdog lock */
368 list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
369 list_del_init(&cs->wd_list);
370 __clocksource_change_rating(cs, 0);
371 }
372 mutex_unlock(&clocksource_mutex);
373 return 0;
374}
375
376#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
377
378static void clocksource_enqueue_watchdog(struct clocksource *cs)
283{ 379{
284 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) 380 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
285 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES; 381 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
286} 382}
287 383
384static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
288static inline void clocksource_resume_watchdog(void) { } 385static inline void clocksource_resume_watchdog(void) { }
289#endif 386static inline int clocksource_watchdog_kthread(void *data) { return 0; }
387
388#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
290 389
291/** 390/**
292 * clocksource_resume - resume the clocksource(s) 391 * clocksource_resume - resume the clocksource(s)
@@ -294,18 +393,16 @@ static inline void clocksource_resume_watchdog(void) { }
294void clocksource_resume(void) 393void clocksource_resume(void)
295{ 394{
296 struct clocksource *cs; 395 struct clocksource *cs;
297 unsigned long flags;
298 396
299 spin_lock_irqsave(&clocksource_lock, flags); 397 mutex_lock(&clocksource_mutex);
300 398
301 list_for_each_entry(cs, &clocksource_list, list) { 399 list_for_each_entry(cs, &clocksource_list, list)
302 if (cs->resume) 400 if (cs->resume)
303 cs->resume(); 401 cs->resume();
304 }
305 402
306 clocksource_resume_watchdog(); 403 clocksource_resume_watchdog();
307 404
308 spin_unlock_irqrestore(&clocksource_lock, flags); 405 mutex_unlock(&clocksource_mutex);
309} 406}
310 407
311/** 408/**
@@ -320,75 +417,94 @@ void clocksource_touch_watchdog(void)
320 clocksource_resume_watchdog(); 417 clocksource_resume_watchdog();
321} 418}
322 419
420#ifdef CONFIG_GENERIC_TIME
421
323/** 422/**
324 * clocksource_get_next - Returns the selected clocksource 423 * clocksource_select - Select the best clocksource available
424 *
425 * Private function. Must hold clocksource_mutex when called.
325 * 426 *
427 * Select the clocksource with the best rating, or the clocksource,
428 * which is selected by userspace override.
326 */ 429 */
327struct clocksource *clocksource_get_next(void) 430static void clocksource_select(void)
328{ 431{
329 unsigned long flags; 432 struct clocksource *best, *cs;
330 433
331 spin_lock_irqsave(&clocksource_lock, flags); 434 if (!finished_booting || list_empty(&clocksource_list))
332 if (next_clocksource && finished_booting) { 435 return;
333 curr_clocksource = next_clocksource; 436 /* First clocksource on the list has the best rating. */
334 next_clocksource = NULL; 437 best = list_first_entry(&clocksource_list, struct clocksource, list);
438 /* Check for the override clocksource. */
439 list_for_each_entry(cs, &clocksource_list, list) {
440 if (strcmp(cs->name, override_name) != 0)
441 continue;
442 /*
443 * Check to make sure we don't switch to a non-highres
444 * capable clocksource if the tick code is in oneshot
445 * mode (highres or nohz)
446 */
447 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
448 tick_oneshot_mode_active()) {
449 /* Override clocksource cannot be used. */
450 printk(KERN_WARNING "Override clocksource %s is not "
451 "HRT compatible. Cannot switch while in "
452 "HRT/NOHZ mode\n", cs->name);
453 override_name[0] = 0;
454 } else
455 /* Override clocksource can be used. */
456 best = cs;
457 break;
458 }
459 if (curr_clocksource != best) {
460 printk(KERN_INFO "Switching to clocksource %s\n", best->name);
461 curr_clocksource = best;
462 timekeeping_notify(curr_clocksource);
335 } 463 }
336 spin_unlock_irqrestore(&clocksource_lock, flags);
337
338 return curr_clocksource;
339} 464}
340 465
341/** 466#else /* CONFIG_GENERIC_TIME */
342 * select_clocksource - Selects the best registered clocksource. 467
343 * 468static inline void clocksource_select(void) { }
344 * Private function. Must hold clocksource_lock when called. 469
470#endif
471
472/*
473 * clocksource_done_booting - Called near the end of core bootup
345 * 474 *
346 * Select the clocksource with the best rating, or the clocksource, 475 * Hack to avoid lots of clocksource churn at boot time.
347 * which is selected by userspace override. 476 * We use fs_initcall because we want this to start before
477 * device_initcall but after subsys_initcall.
348 */ 478 */
349static struct clocksource *select_clocksource(void) 479static int __init clocksource_done_booting(void)
350{ 480{
351 struct clocksource *next; 481 finished_booting = 1;
352
353 if (list_empty(&clocksource_list))
354 return NULL;
355
356 if (clocksource_override)
357 next = clocksource_override;
358 else
359 next = list_entry(clocksource_list.next, struct clocksource,
360 list);
361 482
362 if (next == curr_clocksource) 483 /*
363 return NULL; 484 * Run the watchdog first to eliminate unstable clock sources
485 */
486 clocksource_watchdog_kthread(NULL);
364 487
365 return next; 488 mutex_lock(&clocksource_mutex);
489 clocksource_select();
490 mutex_unlock(&clocksource_mutex);
491 return 0;
366} 492}
493fs_initcall(clocksource_done_booting);
367 494
368/* 495/*
369 * Enqueue the clocksource sorted by rating 496 * Enqueue the clocksource sorted by rating
370 */ 497 */
371static int clocksource_enqueue(struct clocksource *c) 498static void clocksource_enqueue(struct clocksource *cs)
372{ 499{
373 struct list_head *tmp, *entry = &clocksource_list; 500 struct list_head *entry = &clocksource_list;
501 struct clocksource *tmp;
374 502
375 list_for_each(tmp, &clocksource_list) { 503 list_for_each_entry(tmp, &clocksource_list, list)
376 struct clocksource *cs;
377
378 cs = list_entry(tmp, struct clocksource, list);
379 if (cs == c)
380 return -EBUSY;
381 /* Keep track of the place, where to insert */ 504 /* Keep track of the place, where to insert */
382 if (cs->rating >= c->rating) 505 if (tmp->rating >= cs->rating)
383 entry = tmp; 506 entry = &tmp->list;
384 } 507 list_add(&cs->list, entry);
385 list_add(&c->list, entry);
386
387 if (strlen(c->name) == strlen(override_name) &&
388 !strcmp(c->name, override_name))
389 clocksource_override = c;
390
391 return 0;
392} 508}
393 509
394/** 510/**
@@ -397,52 +513,48 @@ static int clocksource_enqueue(struct clocksource *c)
397 * 513 *
398 * Returns -EBUSY if registration fails, zero otherwise. 514 * Returns -EBUSY if registration fails, zero otherwise.
399 */ 515 */
400int clocksource_register(struct clocksource *c) 516int clocksource_register(struct clocksource *cs)
401{ 517{
402 unsigned long flags; 518 mutex_lock(&clocksource_mutex);
403 int ret; 519 clocksource_enqueue(cs);
404 520 clocksource_select();
405 spin_lock_irqsave(&clocksource_lock, flags); 521 clocksource_enqueue_watchdog(cs);
406 ret = clocksource_enqueue(c); 522 mutex_unlock(&clocksource_mutex);
407 if (!ret) 523 return 0;
408 next_clocksource = select_clocksource();
409 spin_unlock_irqrestore(&clocksource_lock, flags);
410 if (!ret)
411 clocksource_check_watchdog(c);
412 return ret;
413} 524}
414EXPORT_SYMBOL(clocksource_register); 525EXPORT_SYMBOL(clocksource_register);
415 526
527static void __clocksource_change_rating(struct clocksource *cs, int rating)
528{
529 list_del(&cs->list);
530 cs->rating = rating;
531 clocksource_enqueue(cs);
532 clocksource_select();
533}
534
416/** 535/**
417 * clocksource_change_rating - Change the rating of a registered clocksource 536 * clocksource_change_rating - Change the rating of a registered clocksource
418 *
419 */ 537 */
420void clocksource_change_rating(struct clocksource *cs, int rating) 538void clocksource_change_rating(struct clocksource *cs, int rating)
421{ 539{
422 unsigned long flags; 540 mutex_lock(&clocksource_mutex);
423 541 __clocksource_change_rating(cs, rating);
424 spin_lock_irqsave(&clocksource_lock, flags); 542 mutex_unlock(&clocksource_mutex);
425 list_del(&cs->list);
426 cs->rating = rating;
427 clocksource_enqueue(cs);
428 next_clocksource = select_clocksource();
429 spin_unlock_irqrestore(&clocksource_lock, flags);
430} 543}
544EXPORT_SYMBOL(clocksource_change_rating);
431 545
432/** 546/**
433 * clocksource_unregister - remove a registered clocksource 547 * clocksource_unregister - remove a registered clocksource
434 */ 548 */
435void clocksource_unregister(struct clocksource *cs) 549void clocksource_unregister(struct clocksource *cs)
436{ 550{
437 unsigned long flags; 551 mutex_lock(&clocksource_mutex);
438 552 clocksource_dequeue_watchdog(cs);
439 spin_lock_irqsave(&clocksource_lock, flags);
440 list_del(&cs->list); 553 list_del(&cs->list);
441 if (clocksource_override == cs) 554 clocksource_select();
442 clocksource_override = NULL; 555 mutex_unlock(&clocksource_mutex);
443 next_clocksource = select_clocksource();
444 spin_unlock_irqrestore(&clocksource_lock, flags);
445} 556}
557EXPORT_SYMBOL(clocksource_unregister);
446 558
447#ifdef CONFIG_SYSFS 559#ifdef CONFIG_SYSFS
448/** 560/**
@@ -458,9 +570,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
458{ 570{
459 ssize_t count = 0; 571 ssize_t count = 0;
460 572
461 spin_lock_irq(&clocksource_lock); 573 mutex_lock(&clocksource_mutex);
462 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name); 574 count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
463 spin_unlock_irq(&clocksource_lock); 575 mutex_unlock(&clocksource_mutex);
464 576
465 return count; 577 return count;
466} 578}
@@ -478,9 +590,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
478 struct sysdev_attribute *attr, 590 struct sysdev_attribute *attr,
479 const char *buf, size_t count) 591 const char *buf, size_t count)
480{ 592{
481 struct clocksource *ovr = NULL;
482 size_t ret = count; 593 size_t ret = count;
483 int len;
484 594
485 /* strings from sysfs write are not 0 terminated! */ 595 /* strings from sysfs write are not 0 terminated! */
486 if (count >= sizeof(override_name)) 596 if (count >= sizeof(override_name))
@@ -490,44 +600,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
490 if (buf[count-1] == '\n') 600 if (buf[count-1] == '\n')
491 count--; 601 count--;
492 602
493 spin_lock_irq(&clocksource_lock); 603 mutex_lock(&clocksource_mutex);
494 604
495 if (count > 0) 605 if (count > 0)
496 memcpy(override_name, buf, count); 606 memcpy(override_name, buf, count);
497 override_name[count] = 0; 607 override_name[count] = 0;
608 clocksource_select();
498 609
499 len = strlen(override_name); 610 mutex_unlock(&clocksource_mutex);
500 if (len) {
501 struct clocksource *cs;
502
503 ovr = clocksource_override;
504 /* try to select it: */
505 list_for_each_entry(cs, &clocksource_list, list) {
506 if (strlen(cs->name) == len &&
507 !strcmp(cs->name, override_name))
508 ovr = cs;
509 }
510 }
511
512 /*
513 * Check to make sure we don't switch to a non-highres capable
514 * clocksource if the tick code is in oneshot mode (highres or nohz)
515 */
516 if (tick_oneshot_mode_active() && ovr &&
517 !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
518 printk(KERN_WARNING "%s clocksource is not HRT compatible. "
519 "Cannot switch while in HRT/NOHZ mode\n", ovr->name);
520 ovr = NULL;
521 override_name[0] = 0;
522 }
523
524 /* Reselect, when the override name has changed */
525 if (ovr != clocksource_override) {
526 clocksource_override = ovr;
527 next_clocksource = select_clocksource();
528 }
529
530 spin_unlock_irq(&clocksource_lock);
531 611
532 return ret; 612 return ret;
533} 613}
@@ -547,7 +627,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
547 struct clocksource *src; 627 struct clocksource *src;
548 ssize_t count = 0; 628 ssize_t count = 0;
549 629
550 spin_lock_irq(&clocksource_lock); 630 mutex_lock(&clocksource_mutex);
551 list_for_each_entry(src, &clocksource_list, list) { 631 list_for_each_entry(src, &clocksource_list, list) {
552 /* 632 /*
553 * Don't show non-HRES clocksource if the tick code is 633 * Don't show non-HRES clocksource if the tick code is
@@ -559,7 +639,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
559 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 639 max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
560 "%s ", src->name); 640 "%s ", src->name);
561 } 641 }
562 spin_unlock_irq(&clocksource_lock); 642 mutex_unlock(&clocksource_mutex);
563 643
564 count += snprintf(buf + count, 644 count += snprintf(buf + count,
565 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n"); 645 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -614,11 +694,10 @@ device_initcall(init_clocksource_sysfs);
614 */ 694 */
615static int __init boot_override_clocksource(char* str) 695static int __init boot_override_clocksource(char* str)
616{ 696{
617 unsigned long flags; 697 mutex_lock(&clocksource_mutex);
618 spin_lock_irqsave(&clocksource_lock, flags);
619 if (str) 698 if (str)
620 strlcpy(override_name, str, sizeof(override_name)); 699 strlcpy(override_name, str, sizeof(override_name));
621 spin_unlock_irqrestore(&clocksource_lock, flags); 700 mutex_unlock(&clocksource_mutex);
622 return 1; 701 return 1;
623} 702}
624 703
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30816e..5404a845690 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
61 .read = jiffies_read, 61 .read = jiffies_read,
62 .mask = 0xffffffff, /*32bits*/ 62 .mask = 0xffffffff, /*32bits*/
63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 63 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
64 .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT,
65 .shift = JIFFIES_SHIFT, 64 .shift = JIFFIES_SHIFT,
66}; 65};
67 66
@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
71} 70}
72 71
73core_initcall(init_jiffies_clocksource); 72core_initcall(init_jiffies_clocksource);
73
74struct clocksource * __init __weak clocksource_default_clock(void)
75{
76 return &clocksource_jiffies;
77}
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7fc64375ff4..4800f933910 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
194 case TIME_OK: 194 case TIME_OK:
195 break; 195 break;
196 case TIME_INS: 196 case TIME_INS:
197 xtime.tv_sec--; 197 timekeeping_leap_insert(-1);
198 wall_to_monotonic.tv_sec++;
199 time_state = TIME_OOP; 198 time_state = TIME_OOP;
200 printk(KERN_NOTICE 199 printk(KERN_NOTICE
201 "Clock: inserting leap second 23:59:60 UTC\n"); 200 "Clock: inserting leap second 23:59:60 UTC\n");
@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
203 res = HRTIMER_RESTART; 202 res = HRTIMER_RESTART;
204 break; 203 break;
205 case TIME_DEL: 204 case TIME_DEL:
206 xtime.tv_sec++; 205 timekeeping_leap_insert(1);
207 time_tai--; 206 time_tai--;
208 wall_to_monotonic.tv_sec--;
209 time_state = TIME_WAIT; 207 time_state = TIME_WAIT;
210 printk(KERN_NOTICE 208 printk(KERN_NOTICE
211 "Clock: deleting leap second 23:59:59 UTC\n"); 209 "Clock: deleting leap second 23:59:59 UTC\n");
@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
219 time_state = TIME_OK; 217 time_state = TIME_OK;
220 break; 218 break;
221 } 219 }
222 update_vsyscall(&xtime, clock);
223 220
224 write_sequnlock(&xtime_lock); 221 write_sequnlock(&xtime_lock);
225 222
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index e8c77d9c633..fb0f46fa1ec 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,7 +18,117 @@
18#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/tick.h> 20#include <linux/tick.h>
21#include <linux/stop_machine.h>
22
23/* Structure holding internal timekeeping values. */
24struct timekeeper {
25 /* Current clocksource used for timekeeping. */
26 struct clocksource *clock;
27 /* The shift value of the current clocksource. */
28 int shift;
29
30 /* Number of clock cycles in one NTP interval. */
31 cycle_t cycle_interval;
32 /* Number of clock shifted nano seconds in one NTP interval. */
33 u64 xtime_interval;
34 /* Raw nano seconds accumulated per NTP interval. */
35 u32 raw_interval;
36
37 /* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
38 u64 xtime_nsec;
39 /* Difference between accumulated time and NTP time in ntp
40 * shifted nano seconds. */
41 s64 ntp_error;
42 /* Shift conversion between clock shifted nano seconds and
43 * ntp shifted nano seconds. */
44 int ntp_error_shift;
45 /* NTP adjusted clock multiplier */
46 u32 mult;
47};
48
49struct timekeeper timekeeper;
50
51/**
52 * timekeeper_setup_internals - Set up internals to use clocksource clock.
53 *
54 * @clock: Pointer to clocksource.
55 *
56 * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
57 * pair and interval request.
58 *
59 * Unless you're the timekeeping code, you should not be using this!
60 */
61static void timekeeper_setup_internals(struct clocksource *clock)
62{
63 cycle_t interval;
64 u64 tmp;
65
66 timekeeper.clock = clock;
67 clock->cycle_last = clock->read(clock);
21 68
69 /* Do the ns -> cycle conversion first, using original mult */
70 tmp = NTP_INTERVAL_LENGTH;
71 tmp <<= clock->shift;
72 tmp += clock->mult/2;
73 do_div(tmp, clock->mult);
74 if (tmp == 0)
75 tmp = 1;
76
77 interval = (cycle_t) tmp;
78 timekeeper.cycle_interval = interval;
79
80 /* Go back from cycles -> shifted ns */
81 timekeeper.xtime_interval = (u64) interval * clock->mult;
82 timekeeper.raw_interval =
83 ((u64) interval * clock->mult) >> clock->shift;
84
85 timekeeper.xtime_nsec = 0;
86 timekeeper.shift = clock->shift;
87
88 timekeeper.ntp_error = 0;
89 timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
90
91 /*
92 * The timekeeper keeps its own mult values for the currently
93 * active clocksource. These value will be adjusted via NTP
94 * to counteract clock drifting.
95 */
96 timekeeper.mult = clock->mult;
97}
98
99/* Timekeeper helper functions. */
100static inline s64 timekeeping_get_ns(void)
101{
102 cycle_t cycle_now, cycle_delta;
103 struct clocksource *clock;
104
105 /* read clocksource: */
106 clock = timekeeper.clock;
107 cycle_now = clock->read(clock);
108
109 /* calculate the delta since the last update_wall_time: */
110 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
111
112 /* return delta convert to nanoseconds using ntp adjusted mult. */
113 return clocksource_cyc2ns(cycle_delta, timekeeper.mult,
114 timekeeper.shift);
115}
116
117static inline s64 timekeeping_get_ns_raw(void)
118{
119 cycle_t cycle_now, cycle_delta;
120 struct clocksource *clock;
121
122 /* read clocksource: */
123 clock = timekeeper.clock;
124 cycle_now = clock->read(clock);
125
126 /* calculate the delta since the last update_wall_time: */
127 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
128
129 /* return delta convert to nanoseconds using ntp adjusted mult. */
130 return clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
131}
22 132
23/* 133/*
24 * This read-write spinlock protects us from races in SMP while 134 * This read-write spinlock protects us from races in SMP while
@@ -44,7 +154,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
44 */ 154 */
45struct timespec xtime __attribute__ ((aligned (16))); 155struct timespec xtime __attribute__ ((aligned (16)));
46struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 156struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
47static unsigned long total_sleep_time; /* seconds */ 157static struct timespec total_sleep_time;
158
159/*
160 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
161 */
162struct timespec raw_time;
48 163
49/* flag for if timekeeping is suspended */ 164/* flag for if timekeeping is suspended */
50int __read_mostly timekeeping_suspended; 165int __read_mostly timekeeping_suspended;
@@ -56,35 +171,44 @@ void update_xtime_cache(u64 nsec)
56 timespec_add_ns(&xtime_cache, nsec); 171 timespec_add_ns(&xtime_cache, nsec);
57} 172}
58 173
59struct clocksource *clock; 174/* must hold xtime_lock */
60 175void timekeeping_leap_insert(int leapsecond)
176{
177 xtime.tv_sec += leapsecond;
178 wall_to_monotonic.tv_sec -= leapsecond;
179 update_vsyscall(&xtime, timekeeper.clock);
180}
61 181
62#ifdef CONFIG_GENERIC_TIME 182#ifdef CONFIG_GENERIC_TIME
183
63/** 184/**
64 * clocksource_forward_now - update clock to the current time 185 * timekeeping_forward_now - update clock to the current time
65 * 186 *
66 * Forward the current clock to update its state since the last call to 187 * Forward the current clock to update its state since the last call to
67 * update_wall_time(). This is useful before significant clock changes, 188 * update_wall_time(). This is useful before significant clock changes,
68 * as it avoids having to deal with this time offset explicitly. 189 * as it avoids having to deal with this time offset explicitly.
69 */ 190 */
70static void clocksource_forward_now(void) 191static void timekeeping_forward_now(void)
71{ 192{
72 cycle_t cycle_now, cycle_delta; 193 cycle_t cycle_now, cycle_delta;
194 struct clocksource *clock;
73 s64 nsec; 195 s64 nsec;
74 196
75 cycle_now = clocksource_read(clock); 197 clock = timekeeper.clock;
198 cycle_now = clock->read(clock);
76 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; 199 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
77 clock->cycle_last = cycle_now; 200 clock->cycle_last = cycle_now;
78 201
79 nsec = cyc2ns(clock, cycle_delta); 202 nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
203 timekeeper.shift);
80 204
81 /* If arch requires, add in gettimeoffset() */ 205 /* If arch requires, add in gettimeoffset() */
82 nsec += arch_gettimeoffset(); 206 nsec += arch_gettimeoffset();
83 207
84 timespec_add_ns(&xtime, nsec); 208 timespec_add_ns(&xtime, nsec);
85 209
86 nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; 210 nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
87 clock->raw_time.tv_nsec += nsec; 211 timespec_add_ns(&raw_time, nsec);
88} 212}
89 213
90/** 214/**
@@ -95,7 +219,6 @@ static void clocksource_forward_now(void)
95 */ 219 */
96void getnstimeofday(struct timespec *ts) 220void getnstimeofday(struct timespec *ts)
97{ 221{
98 cycle_t cycle_now, cycle_delta;
99 unsigned long seq; 222 unsigned long seq;
100 s64 nsecs; 223 s64 nsecs;
101 224
@@ -105,15 +228,7 @@ void getnstimeofday(struct timespec *ts)
105 seq = read_seqbegin(&xtime_lock); 228 seq = read_seqbegin(&xtime_lock);
106 229
107 *ts = xtime; 230 *ts = xtime;
108 231 nsecs = timekeeping_get_ns();
109 /* read clocksource: */
110 cycle_now = clocksource_read(clock);
111
112 /* calculate the delta since the last update_wall_time: */
113 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
114
115 /* convert to nanoseconds: */
116 nsecs = cyc2ns(clock, cycle_delta);
117 232
118 /* If arch requires, add in gettimeoffset() */ 233 /* If arch requires, add in gettimeoffset() */
119 nsecs += arch_gettimeoffset(); 234 nsecs += arch_gettimeoffset();
@@ -125,6 +240,57 @@ void getnstimeofday(struct timespec *ts)
125 240
126EXPORT_SYMBOL(getnstimeofday); 241EXPORT_SYMBOL(getnstimeofday);
127 242
243ktime_t ktime_get(void)
244{
245 unsigned int seq;
246 s64 secs, nsecs;
247
248 WARN_ON(timekeeping_suspended);
249
250 do {
251 seq = read_seqbegin(&xtime_lock);
252 secs = xtime.tv_sec + wall_to_monotonic.tv_sec;
253 nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
254 nsecs += timekeeping_get_ns();
255
256 } while (read_seqretry(&xtime_lock, seq));
257 /*
258 * Use ktime_set/ktime_add_ns to create a proper ktime on
259 * 32-bit architectures without CONFIG_KTIME_SCALAR.
260 */
261 return ktime_add_ns(ktime_set(secs, 0), nsecs);
262}
263EXPORT_SYMBOL_GPL(ktime_get);
264
265/**
266 * ktime_get_ts - get the monotonic clock in timespec format
267 * @ts: pointer to timespec variable
268 *
269 * The function calculates the monotonic clock from the realtime
270 * clock and the wall_to_monotonic offset and stores the result
271 * in normalized timespec format in the variable pointed to by @ts.
272 */
273void ktime_get_ts(struct timespec *ts)
274{
275 struct timespec tomono;
276 unsigned int seq;
277 s64 nsecs;
278
279 WARN_ON(timekeeping_suspended);
280
281 do {
282 seq = read_seqbegin(&xtime_lock);
283 *ts = xtime;
284 tomono = wall_to_monotonic;
285 nsecs = timekeeping_get_ns();
286
287 } while (read_seqretry(&xtime_lock, seq));
288
289 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
290 ts->tv_nsec + tomono.tv_nsec + nsecs);
291}
292EXPORT_SYMBOL_GPL(ktime_get_ts);
293
128/** 294/**
129 * do_gettimeofday - Returns the time of day in a timeval 295 * do_gettimeofday - Returns the time of day in a timeval
130 * @tv: pointer to the timeval to be set 296 * @tv: pointer to the timeval to be set
@@ -157,7 +323,7 @@ int do_settimeofday(struct timespec *tv)
157 323
158 write_seqlock_irqsave(&xtime_lock, flags); 324 write_seqlock_irqsave(&xtime_lock, flags);
159 325
160 clocksource_forward_now(); 326 timekeeping_forward_now();
161 327
162 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec; 328 ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
163 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec; 329 ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
@@ -167,10 +333,10 @@ int do_settimeofday(struct timespec *tv)
167 333
168 update_xtime_cache(0); 334 update_xtime_cache(0);
169 335
170 clock->error = 0; 336 timekeeper.ntp_error = 0;
171 ntp_clear(); 337 ntp_clear();
172 338
173 update_vsyscall(&xtime, clock); 339 update_vsyscall(&xtime, timekeeper.clock);
174 340
175 write_sequnlock_irqrestore(&xtime_lock, flags); 341 write_sequnlock_irqrestore(&xtime_lock, flags);
176 342
@@ -187,44 +353,97 @@ EXPORT_SYMBOL(do_settimeofday);
187 * 353 *
188 * Accumulates current time interval and initializes new clocksource 354 * Accumulates current time interval and initializes new clocksource
189 */ 355 */
190static void change_clocksource(void) 356static int change_clocksource(void *data)
191{ 357{
192 struct clocksource *new, *old; 358 struct clocksource *new, *old;
193 359
194 new = clocksource_get_next(); 360 new = (struct clocksource *) data;
361
362 timekeeping_forward_now();
363 if (!new->enable || new->enable(new) == 0) {
364 old = timekeeper.clock;
365 timekeeper_setup_internals(new);
366 if (old->disable)
367 old->disable(old);
368 }
369 return 0;
370}
195 371
196 if (clock == new) 372/**
373 * timekeeping_notify - Install a new clock source
374 * @clock: pointer to the clock source
375 *
376 * This function is called from clocksource.c after a new, better clock
377 * source has been registered. The caller holds the clocksource_mutex.
378 */
379void timekeeping_notify(struct clocksource *clock)
380{
381 if (timekeeper.clock == clock)
197 return; 382 return;
383 stop_machine(change_clocksource, clock, NULL);
384 tick_clock_notify();
385}
198 386
199 clocksource_forward_now(); 387#else /* GENERIC_TIME */
200 388
201 if (clocksource_enable(new)) 389static inline void timekeeping_forward_now(void) { }
202 return;
203 390
204 new->raw_time = clock->raw_time; 391/**
205 old = clock; 392 * ktime_get - get the monotonic time in ktime_t format
206 clock = new; 393 *
207 clocksource_disable(old); 394 * returns the time in ktime_t format
395 */
396ktime_t ktime_get(void)
397{
398 struct timespec now;
208 399
209 clock->cycle_last = 0; 400 ktime_get_ts(&now);
210 clock->cycle_last = clocksource_read(clock);
211 clock->error = 0;
212 clock->xtime_nsec = 0;
213 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
214 401
215 tick_clock_notify(); 402 return timespec_to_ktime(now);
403}
404EXPORT_SYMBOL_GPL(ktime_get);
216 405
217 /* 406/**
218 * We're holding xtime lock and waking up klogd would deadlock 407 * ktime_get_ts - get the monotonic clock in timespec format
219 * us on enqueue. So no printing! 408 * @ts: pointer to timespec variable
220 printk(KERN_INFO "Time: %s clocksource has been installed.\n", 409 *
221 clock->name); 410 * The function calculates the monotonic clock from the realtime
222 */ 411 * clock and the wall_to_monotonic offset and stores the result
412 * in normalized timespec format in the variable pointed to by @ts.
413 */
414void ktime_get_ts(struct timespec *ts)
415{
416 struct timespec tomono;
417 unsigned long seq;
418
419 do {
420 seq = read_seqbegin(&xtime_lock);
421 getnstimeofday(ts);
422 tomono = wall_to_monotonic;
423
424 } while (read_seqretry(&xtime_lock, seq));
425
426 set_normalized_timespec(ts, ts->tv_sec + tomono.tv_sec,
427 ts->tv_nsec + tomono.tv_nsec);
223} 428}
224#else 429EXPORT_SYMBOL_GPL(ktime_get_ts);
225static inline void clocksource_forward_now(void) { } 430
226static inline void change_clocksource(void) { } 431#endif /* !GENERIC_TIME */
227#endif 432
433/**
434 * ktime_get_real - get the real (wall-) time in ktime_t format
435 *
436 * returns the time in ktime_t format
437 */
438ktime_t ktime_get_real(void)
439{
440 struct timespec now;
441
442 getnstimeofday(&now);
443
444 return timespec_to_ktime(now);
445}
446EXPORT_SYMBOL_GPL(ktime_get_real);
228 447
229/** 448/**
230 * getrawmonotonic - Returns the raw monotonic time in a timespec 449 * getrawmonotonic - Returns the raw monotonic time in a timespec
@@ -236,21 +455,11 @@ void getrawmonotonic(struct timespec *ts)
236{ 455{
237 unsigned long seq; 456 unsigned long seq;
238 s64 nsecs; 457 s64 nsecs;
239 cycle_t cycle_now, cycle_delta;
240 458
241 do { 459 do {
242 seq = read_seqbegin(&xtime_lock); 460 seq = read_seqbegin(&xtime_lock);
243 461 nsecs = timekeeping_get_ns_raw();
244 /* read clocksource: */ 462 *ts = raw_time;
245 cycle_now = clocksource_read(clock);
246
247 /* calculate the delta since the last update_wall_time: */
248 cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
249
250 /* convert to nanoseconds: */
251 nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
252
253 *ts = clock->raw_time;
254 463
255 } while (read_seqretry(&xtime_lock, seq)); 464 } while (read_seqretry(&xtime_lock, seq));
256 465
@@ -270,7 +479,7 @@ int timekeeping_valid_for_hres(void)
270 do { 479 do {
271 seq = read_seqbegin(&xtime_lock); 480 seq = read_seqbegin(&xtime_lock);
272 481
273 ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 482 ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
274 483
275 } while (read_seqretry(&xtime_lock, seq)); 484 } while (read_seqretry(&xtime_lock, seq));
276 485
@@ -278,17 +487,33 @@ int timekeeping_valid_for_hres(void)
278} 487}
279 488
280/** 489/**
281 * read_persistent_clock - Return time in seconds from the persistent clock. 490 * read_persistent_clock - Return time from the persistent clock.
282 * 491 *
283 * Weak dummy function for arches that do not yet support it. 492 * Weak dummy function for arches that do not yet support it.
284 * Returns seconds from epoch using the battery backed persistent clock. 493 * Reads the time from the battery backed persistent clock.
285 * Returns zero if unsupported. 494 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
286 * 495 *
287 * XXX - Do be sure to remove it once all arches implement it. 496 * XXX - Do be sure to remove it once all arches implement it.
288 */ 497 */
289unsigned long __attribute__((weak)) read_persistent_clock(void) 498void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
290{ 499{
291 return 0; 500 ts->tv_sec = 0;
501 ts->tv_nsec = 0;
502}
503
504/**
505 * read_boot_clock - Return time of the system start.
506 *
507 * Weak dummy function for arches that do not yet support it.
508 * Function to read the exact time the system has been started.
509 * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
510 *
511 * XXX - Do be sure to remove it once all arches implement it.
512 */
513void __attribute__((weak)) read_boot_clock(struct timespec *ts)
514{
515 ts->tv_sec = 0;
516 ts->tv_nsec = 0;
292} 517}
293 518
294/* 519/*
@@ -296,29 +521,40 @@ unsigned long __attribute__((weak)) read_persistent_clock(void)
296 */ 521 */
297void __init timekeeping_init(void) 522void __init timekeeping_init(void)
298{ 523{
524 struct clocksource *clock;
299 unsigned long flags; 525 unsigned long flags;
300 unsigned long sec = read_persistent_clock(); 526 struct timespec now, boot;
527
528 read_persistent_clock(&now);
529 read_boot_clock(&boot);
301 530
302 write_seqlock_irqsave(&xtime_lock, flags); 531 write_seqlock_irqsave(&xtime_lock, flags);
303 532
304 ntp_init(); 533 ntp_init();
305 534
306 clock = clocksource_get_next(); 535 clock = clocksource_default_clock();
307 clocksource_enable(clock); 536 if (clock->enable)
308 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 537 clock->enable(clock);
309 clock->cycle_last = clocksource_read(clock); 538 timekeeper_setup_internals(clock);
310 539
311 xtime.tv_sec = sec; 540 xtime.tv_sec = now.tv_sec;
312 xtime.tv_nsec = 0; 541 xtime.tv_nsec = now.tv_nsec;
542 raw_time.tv_sec = 0;
543 raw_time.tv_nsec = 0;
544 if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
545 boot.tv_sec = xtime.tv_sec;
546 boot.tv_nsec = xtime.tv_nsec;
547 }
313 set_normalized_timespec(&wall_to_monotonic, 548 set_normalized_timespec(&wall_to_monotonic,
314 -xtime.tv_sec, -xtime.tv_nsec); 549 -boot.tv_sec, -boot.tv_nsec);
315 update_xtime_cache(0); 550 update_xtime_cache(0);
316 total_sleep_time = 0; 551 total_sleep_time.tv_sec = 0;
552 total_sleep_time.tv_nsec = 0;
317 write_sequnlock_irqrestore(&xtime_lock, flags); 553 write_sequnlock_irqrestore(&xtime_lock, flags);
318} 554}
319 555
320/* time in seconds when suspend began */ 556/* time in seconds when suspend began */
321static unsigned long timekeeping_suspend_time; 557static struct timespec timekeeping_suspend_time;
322 558
323/** 559/**
324 * timekeeping_resume - Resumes the generic timekeeping subsystem. 560 * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -331,24 +567,24 @@ static unsigned long timekeeping_suspend_time;
331static int timekeeping_resume(struct sys_device *dev) 567static int timekeeping_resume(struct sys_device *dev)
332{ 568{
333 unsigned long flags; 569 unsigned long flags;
334 unsigned long now = read_persistent_clock(); 570 struct timespec ts;
571
572 read_persistent_clock(&ts);
335 573
336 clocksource_resume(); 574 clocksource_resume();
337 575
338 write_seqlock_irqsave(&xtime_lock, flags); 576 write_seqlock_irqsave(&xtime_lock, flags);
339 577
340 if (now && (now > timekeeping_suspend_time)) { 578 if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
341 unsigned long sleep_length = now - timekeeping_suspend_time; 579 ts = timespec_sub(ts, timekeeping_suspend_time);
342 580 xtime = timespec_add_safe(xtime, ts);
343 xtime.tv_sec += sleep_length; 581 wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
344 wall_to_monotonic.tv_sec -= sleep_length; 582 total_sleep_time = timespec_add_safe(total_sleep_time, ts);
345 total_sleep_time += sleep_length;
346 } 583 }
347 update_xtime_cache(0); 584 update_xtime_cache(0);
348 /* re-base the last cycle value */ 585 /* re-base the last cycle value */
349 clock->cycle_last = 0; 586 timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
350 clock->cycle_last = clocksource_read(clock); 587 timekeeper.ntp_error = 0;
351 clock->error = 0;
352 timekeeping_suspended = 0; 588 timekeeping_suspended = 0;
353 write_sequnlock_irqrestore(&xtime_lock, flags); 589 write_sequnlock_irqrestore(&xtime_lock, flags);
354 590
@@ -366,10 +602,10 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
366{ 602{
367 unsigned long flags; 603 unsigned long flags;
368 604
369 timekeeping_suspend_time = read_persistent_clock(); 605 read_persistent_clock(&timekeeping_suspend_time);
370 606
371 write_seqlock_irqsave(&xtime_lock, flags); 607 write_seqlock_irqsave(&xtime_lock, flags);
372 clocksource_forward_now(); 608 timekeeping_forward_now();
373 timekeeping_suspended = 1; 609 timekeeping_suspended = 1;
374 write_sequnlock_irqrestore(&xtime_lock, flags); 610 write_sequnlock_irqrestore(&xtime_lock, flags);
375 611
@@ -404,7 +640,7 @@ device_initcall(timekeeping_init_device);
404 * If the error is already larger, we look ahead even further 640 * If the error is already larger, we look ahead even further
405 * to compensate for late or lost adjustments. 641 * to compensate for late or lost adjustments.
406 */ 642 */
407static __always_inline int clocksource_bigadjust(s64 error, s64 *interval, 643static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
408 s64 *offset) 644 s64 *offset)
409{ 645{
410 s64 tick_error, i; 646 s64 tick_error, i;
@@ -420,7 +656,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
420 * here. This is tuned so that an error of about 1 msec is adjusted 656 * here. This is tuned so that an error of about 1 msec is adjusted
421 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks). 657 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
422 */ 658 */
423 error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ); 659 error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
424 error2 = abs(error2); 660 error2 = abs(error2);
425 for (look_ahead = 0; error2 > 0; look_ahead++) 661 for (look_ahead = 0; error2 > 0; look_ahead++)
426 error2 >>= 2; 662 error2 >>= 2;
@@ -429,8 +665,8 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
429 * Now calculate the error in (1 << look_ahead) ticks, but first 665 * Now calculate the error in (1 << look_ahead) ticks, but first
430 * remove the single look ahead already included in the error. 666 * remove the single look ahead already included in the error.
431 */ 667 */
432 tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1); 668 tick_error = tick_length >> (timekeeper.ntp_error_shift + 1);
433 tick_error -= clock->xtime_interval >> 1; 669 tick_error -= timekeeper.xtime_interval >> 1;
434 error = ((error - tick_error) >> look_ahead) + tick_error; 670 error = ((error - tick_error) >> look_ahead) + tick_error;
435 671
436 /* Finally calculate the adjustment shift value. */ 672 /* Finally calculate the adjustment shift value. */
@@ -455,18 +691,18 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
455 * this is optimized for the most common adjustments of -1,0,1, 691 * this is optimized for the most common adjustments of -1,0,1,
456 * for other values we can do a bit more work. 692 * for other values we can do a bit more work.
457 */ 693 */
458static void clocksource_adjust(s64 offset) 694static void timekeeping_adjust(s64 offset)
459{ 695{
460 s64 error, interval = clock->cycle_interval; 696 s64 error, interval = timekeeper.cycle_interval;
461 int adj; 697 int adj;
462 698
463 error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1); 699 error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1);
464 if (error > interval) { 700 if (error > interval) {
465 error >>= 2; 701 error >>= 2;
466 if (likely(error <= interval)) 702 if (likely(error <= interval))
467 adj = 1; 703 adj = 1;
468 else 704 else
469 adj = clocksource_bigadjust(error, &interval, &offset); 705 adj = timekeeping_bigadjust(error, &interval, &offset);
470 } else if (error < -interval) { 706 } else if (error < -interval) {
471 error >>= 2; 707 error >>= 2;
472 if (likely(error >= -interval)) { 708 if (likely(error >= -interval)) {
@@ -474,15 +710,15 @@ static void clocksource_adjust(s64 offset)
474 interval = -interval; 710 interval = -interval;
475 offset = -offset; 711 offset = -offset;
476 } else 712 } else
477 adj = clocksource_bigadjust(error, &interval, &offset); 713 adj = timekeeping_bigadjust(error, &interval, &offset);
478 } else 714 } else
479 return; 715 return;
480 716
481 clock->mult += adj; 717 timekeeper.mult += adj;
482 clock->xtime_interval += interval; 718 timekeeper.xtime_interval += interval;
483 clock->xtime_nsec -= offset; 719 timekeeper.xtime_nsec -= offset;
484 clock->error -= (interval - offset) << 720 timekeeper.ntp_error -= (interval - offset) <<
485 (NTP_SCALE_SHIFT - clock->shift); 721 timekeeper.ntp_error_shift;
486} 722}
487 723
488/** 724/**
@@ -492,53 +728,59 @@ static void clocksource_adjust(s64 offset)
492 */ 728 */
493void update_wall_time(void) 729void update_wall_time(void)
494{ 730{
731 struct clocksource *clock;
495 cycle_t offset; 732 cycle_t offset;
733 u64 nsecs;
496 734
497 /* Make sure we're fully resumed: */ 735 /* Make sure we're fully resumed: */
498 if (unlikely(timekeeping_suspended)) 736 if (unlikely(timekeeping_suspended))
499 return; 737 return;
500 738
739 clock = timekeeper.clock;
501#ifdef CONFIG_GENERIC_TIME 740#ifdef CONFIG_GENERIC_TIME
502 offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask; 741 offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
503#else 742#else
504 offset = clock->cycle_interval; 743 offset = timekeeper.cycle_interval;
505#endif 744#endif
506 clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift; 745 timekeeper.xtime_nsec = (s64)xtime.tv_nsec << timekeeper.shift;
507 746
508 /* normally this loop will run just once, however in the 747 /* normally this loop will run just once, however in the
509 * case of lost or late ticks, it will accumulate correctly. 748 * case of lost or late ticks, it will accumulate correctly.
510 */ 749 */
511 while (offset >= clock->cycle_interval) { 750 while (offset >= timekeeper.cycle_interval) {
751 u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
752
512 /* accumulate one interval */ 753 /* accumulate one interval */
513 offset -= clock->cycle_interval; 754 offset -= timekeeper.cycle_interval;
514 clock->cycle_last += clock->cycle_interval; 755 clock->cycle_last += timekeeper.cycle_interval;
515 756
516 clock->xtime_nsec += clock->xtime_interval; 757 timekeeper.xtime_nsec += timekeeper.xtime_interval;
517 if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) { 758 if (timekeeper.xtime_nsec >= nsecps) {
518 clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift; 759 timekeeper.xtime_nsec -= nsecps;
519 xtime.tv_sec++; 760 xtime.tv_sec++;
520 second_overflow(); 761 second_overflow();
521 } 762 }
522 763
523 clock->raw_time.tv_nsec += clock->raw_interval; 764 raw_time.tv_nsec += timekeeper.raw_interval;
524 if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { 765 if (raw_time.tv_nsec >= NSEC_PER_SEC) {
525 clock->raw_time.tv_nsec -= NSEC_PER_SEC; 766 raw_time.tv_nsec -= NSEC_PER_SEC;
526 clock->raw_time.tv_sec++; 767 raw_time.tv_sec++;
527 } 768 }
528 769
529 /* accumulate error between NTP and clock interval */ 770 /* accumulate error between NTP and clock interval */
530 clock->error += tick_length; 771 timekeeper.ntp_error += tick_length;
531 clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); 772 timekeeper.ntp_error -= timekeeper.xtime_interval <<
773 timekeeper.ntp_error_shift;
532 } 774 }
533 775
534 /* correct the clock when NTP error is too big */ 776 /* correct the clock when NTP error is too big */
535 clocksource_adjust(offset); 777 timekeeping_adjust(offset);
536 778
537 /* 779 /*
538 * Since in the loop above, we accumulate any amount of time 780 * Since in the loop above, we accumulate any amount of time
539 * in xtime_nsec over a second into xtime.tv_sec, its possible for 781 * in xtime_nsec over a second into xtime.tv_sec, its possible for
540 * xtime_nsec to be fairly small after the loop. Further, if we're 782 * xtime_nsec to be fairly small after the loop. Further, if we're
541 * slightly speeding the clocksource up in clocksource_adjust(), 783 * slightly speeding the clocksource up in timekeeping_adjust(),
542 * its possible the required corrective factor to xtime_nsec could 784 * its possible the required corrective factor to xtime_nsec could
543 * cause it to underflow. 785 * cause it to underflow.
544 * 786 *
@@ -550,24 +792,25 @@ void update_wall_time(void)
550 * We'll correct this error next time through this function, when 792 * We'll correct this error next time through this function, when
551 * xtime_nsec is not as small. 793 * xtime_nsec is not as small.
552 */ 794 */
553 if (unlikely((s64)clock->xtime_nsec < 0)) { 795 if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
554 s64 neg = -(s64)clock->xtime_nsec; 796 s64 neg = -(s64)timekeeper.xtime_nsec;
555 clock->xtime_nsec = 0; 797 timekeeper.xtime_nsec = 0;
556 clock->error += neg << (NTP_SCALE_SHIFT - clock->shift); 798 timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
557 } 799 }
558 800
559 /* store full nanoseconds into xtime after rounding it up and 801 /* store full nanoseconds into xtime after rounding it up and
560 * add the remainder to the error difference. 802 * add the remainder to the error difference.
561 */ 803 */
562 xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1; 804 xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
563 clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift; 805 timekeeper.xtime_nsec -= (s64) xtime.tv_nsec << timekeeper.shift;
564 clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift); 806 timekeeper.ntp_error += timekeeper.xtime_nsec <<
807 timekeeper.ntp_error_shift;
565 808
566 update_xtime_cache(cyc2ns(clock, offset)); 809 nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
810 update_xtime_cache(nsecs);
567 811
568 /* check to see if there is a new clocksource to use */ 812 /* check to see if there is a new clocksource to use */
569 change_clocksource(); 813 update_vsyscall(&xtime, timekeeper.clock);
570 update_vsyscall(&xtime, clock);
571} 814}
572 815
573/** 816/**
@@ -583,9 +826,12 @@ void update_wall_time(void)
583 */ 826 */
584void getboottime(struct timespec *ts) 827void getboottime(struct timespec *ts)
585{ 828{
586 set_normalized_timespec(ts, 829 struct timespec boottime = {
587 - (wall_to_monotonic.tv_sec + total_sleep_time), 830 .tv_sec = wall_to_monotonic.tv_sec + total_sleep_time.tv_sec,
588 - wall_to_monotonic.tv_nsec); 831 .tv_nsec = wall_to_monotonic.tv_nsec + total_sleep_time.tv_nsec
832 };
833
834 set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
589} 835}
590 836
591/** 837/**
@@ -594,7 +840,7 @@ void getboottime(struct timespec *ts)
594 */ 840 */
595void monotonic_to_bootbased(struct timespec *ts) 841void monotonic_to_bootbased(struct timespec *ts)
596{ 842{
597 ts->tv_sec += total_sleep_time; 843 *ts = timespec_add_safe(*ts, total_sleep_time);
598} 844}
599 845
600unsigned long get_seconds(void) 846unsigned long get_seconds(void)
@@ -603,6 +849,10 @@ unsigned long get_seconds(void)
603} 849}
604EXPORT_SYMBOL(get_seconds); 850EXPORT_SYMBOL(get_seconds);
605 851
852struct timespec __current_kernel_time(void)
853{
854 return xtime_cache;
855}
606 856
607struct timespec current_kernel_time(void) 857struct timespec current_kernel_time(void)
608{ 858{
@@ -618,3 +868,20 @@ struct timespec current_kernel_time(void)
618 return now; 868 return now;
619} 869}
620EXPORT_SYMBOL(current_kernel_time); 870EXPORT_SYMBOL(current_kernel_time);
871
872struct timespec get_monotonic_coarse(void)
873{
874 struct timespec now, mono;
875 unsigned long seq;
876
877 do {
878 seq = read_seqbegin(&xtime_lock);
879
880 now = xtime_cache;
881 mono = wall_to_monotonic;
882 } while (read_seqretry(&xtime_lock, seq));
883
884 set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
885 now.tv_nsec + mono.tv_nsec);
886 return now;
887}
diff --git a/kernel/timer.c b/kernel/timer.c
index a3d25f41501..bbb51074680 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -72,6 +72,7 @@ struct tvec_base {
72 spinlock_t lock; 72 spinlock_t lock;
73 struct timer_list *running_timer; 73 struct timer_list *running_timer;
74 unsigned long timer_jiffies; 74 unsigned long timer_jiffies;
75 unsigned long next_timer;
75 struct tvec_root tv1; 76 struct tvec_root tv1;
76 struct tvec tv2; 77 struct tvec tv2;
77 struct tvec tv3; 78 struct tvec tv3;
@@ -622,6 +623,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
622 623
623 if (timer_pending(timer)) { 624 if (timer_pending(timer)) {
624 detach_timer(timer, 0); 625 detach_timer(timer, 0);
626 if (timer->expires == base->next_timer &&
627 !tbase_get_deferrable(timer->base))
628 base->next_timer = base->timer_jiffies;
625 ret = 1; 629 ret = 1;
626 } else { 630 } else {
627 if (pending_only) 631 if (pending_only)
@@ -663,6 +667,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
663 } 667 }
664 668
665 timer->expires = expires; 669 timer->expires = expires;
670 if (time_before(timer->expires, base->next_timer) &&
671 !tbase_get_deferrable(timer->base))
672 base->next_timer = timer->expires;
666 internal_add_timer(base, timer); 673 internal_add_timer(base, timer);
667 674
668out_unlock: 675out_unlock:
@@ -781,6 +788,9 @@ void add_timer_on(struct timer_list *timer, int cpu)
781 spin_lock_irqsave(&base->lock, flags); 788 spin_lock_irqsave(&base->lock, flags);
782 timer_set_base(timer, base); 789 timer_set_base(timer, base);
783 debug_timer_activate(timer); 790 debug_timer_activate(timer);
791 if (time_before(timer->expires, base->next_timer) &&
792 !tbase_get_deferrable(timer->base))
793 base->next_timer = timer->expires;
784 internal_add_timer(base, timer); 794 internal_add_timer(base, timer);
785 /* 795 /*
786 * Check whether the other CPU is idle and needs to be 796 * Check whether the other CPU is idle and needs to be
@@ -817,6 +827,9 @@ int del_timer(struct timer_list *timer)
817 base = lock_timer_base(timer, &flags); 827 base = lock_timer_base(timer, &flags);
818 if (timer_pending(timer)) { 828 if (timer_pending(timer)) {
819 detach_timer(timer, 1); 829 detach_timer(timer, 1);
830 if (timer->expires == base->next_timer &&
831 !tbase_get_deferrable(timer->base))
832 base->next_timer = base->timer_jiffies;
820 ret = 1; 833 ret = 1;
821 } 834 }
822 spin_unlock_irqrestore(&base->lock, flags); 835 spin_unlock_irqrestore(&base->lock, flags);
@@ -850,6 +863,9 @@ int try_to_del_timer_sync(struct timer_list *timer)
850 ret = 0; 863 ret = 0;
851 if (timer_pending(timer)) { 864 if (timer_pending(timer)) {
852 detach_timer(timer, 1); 865 detach_timer(timer, 1);
866 if (timer->expires == base->next_timer &&
867 !tbase_get_deferrable(timer->base))
868 base->next_timer = base->timer_jiffies;
853 ret = 1; 869 ret = 1;
854 } 870 }
855out: 871out:
@@ -1007,8 +1023,8 @@ static inline void __run_timers(struct tvec_base *base)
1007#ifdef CONFIG_NO_HZ 1023#ifdef CONFIG_NO_HZ
1008/* 1024/*
1009 * Find out when the next timer event is due to happen. This 1025 * Find out when the next timer event is due to happen. This
1010 * is used on S/390 to stop all activity when a cpus is idle. 1026 * is used on S/390 to stop all activity when a CPU is idle.
1011 * This functions needs to be called disabled. 1027 * This function needs to be called with interrupts disabled.
1012 */ 1028 */
1013static unsigned long __next_timer_interrupt(struct tvec_base *base) 1029static unsigned long __next_timer_interrupt(struct tvec_base *base)
1014{ 1030{
@@ -1134,7 +1150,9 @@ unsigned long get_next_timer_interrupt(unsigned long now)
1134 unsigned long expires; 1150 unsigned long expires;
1135 1151
1136 spin_lock(&base->lock); 1152 spin_lock(&base->lock);
1137 expires = __next_timer_interrupt(base); 1153 if (time_before_eq(base->next_timer, base->timer_jiffies))
1154 base->next_timer = __next_timer_interrupt(base);
1155 expires = base->next_timer;
1138 spin_unlock(&base->lock); 1156 spin_unlock(&base->lock);
1139 1157
1140 if (time_before_eq(expires, now)) 1158 if (time_before_eq(expires, now))
@@ -1522,6 +1540,7 @@ static int __cpuinit init_timers_cpu(int cpu)
1522 INIT_LIST_HEAD(base->tv1.vec + j); 1540 INIT_LIST_HEAD(base->tv1.vec + j);
1523 1541
1524 base->timer_jiffies = jiffies; 1542 base->timer_jiffies = jiffies;
1543 base->next_timer = base->timer_jiffies;
1525 return 0; 1544 return 0;
1526} 1545}
1527 1546
@@ -1534,6 +1553,9 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
1534 timer = list_first_entry(head, struct timer_list, entry); 1553 timer = list_first_entry(head, struct timer_list, entry);
1535 detach_timer(timer, 0); 1554 detach_timer(timer, 0);
1536 timer_set_base(timer, new_base); 1555 timer_set_base(timer, new_base);
1556 if (time_before(timer->expires, new_base->next_timer) &&
1557 !tbase_get_deferrable(timer->base))
1558 new_base->next_timer = timer->expires;
1537 internal_add_timer(new_base, timer); 1559 internal_add_timer(new_base, timer);
1538 } 1560 }
1539} 1561}