diff options
Diffstat (limited to 'kernel/time/timekeeping.c')
-rw-r--r-- | kernel/time/timekeeping.c | 183 |
1 files changed, 89 insertions, 94 deletions
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d3b91e75cecd..cbc6acb0db3f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * | 8 | * |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/timekeeper_internal.h> | ||
11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
12 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
13 | #include <linux/percpu.h> | 14 | #include <linux/percpu.h> |
@@ -20,71 +21,11 @@ | |||
20 | #include <linux/time.h> | 21 | #include <linux/time.h> |
21 | #include <linux/tick.h> | 22 | #include <linux/tick.h> |
22 | #include <linux/stop_machine.h> | 23 | #include <linux/stop_machine.h> |
24 | #include <linux/pvclock_gtod.h> | ||
23 | 25 | ||
24 | /* Structure holding internal timekeeping values. */ | ||
25 | struct timekeeper { | ||
26 | /* Current clocksource used for timekeeping. */ | ||
27 | struct clocksource *clock; | ||
28 | /* NTP adjusted clock multiplier */ | ||
29 | u32 mult; | ||
30 | /* The shift value of the current clocksource. */ | ||
31 | u32 shift; | ||
32 | /* Number of clock cycles in one NTP interval. */ | ||
33 | cycle_t cycle_interval; | ||
34 | /* Number of clock shifted nano seconds in one NTP interval. */ | ||
35 | u64 xtime_interval; | ||
36 | /* shifted nano seconds left over when rounding cycle_interval */ | ||
37 | s64 xtime_remainder; | ||
38 | /* Raw nano seconds accumulated per NTP interval. */ | ||
39 | u32 raw_interval; | ||
40 | |||
41 | /* Current CLOCK_REALTIME time in seconds */ | ||
42 | u64 xtime_sec; | ||
43 | /* Clock shifted nano seconds */ | ||
44 | u64 xtime_nsec; | ||
45 | |||
46 | /* Difference between accumulated time and NTP time in ntp | ||
47 | * shifted nano seconds. */ | ||
48 | s64 ntp_error; | ||
49 | /* Shift conversion between clock shifted nano seconds and | ||
50 | * ntp shifted nano seconds. */ | ||
51 | u32 ntp_error_shift; | ||
52 | |||
53 | /* | ||
54 | * wall_to_monotonic is what we need to add to xtime (or xtime corrected | ||
55 | * for sub jiffie times) to get to monotonic time. Monotonic is pegged | ||
56 | * at zero at system boot time, so wall_to_monotonic will be negative, | ||
57 | * however, we will ALWAYS keep the tv_nsec part positive so we can use | ||
58 | * the usual normalization. | ||
59 | * | ||
60 | * wall_to_monotonic is moved after resume from suspend for the | ||
61 | * monotonic time not to jump. We need to add total_sleep_time to | ||
62 | * wall_to_monotonic to get the real boot based time offset. | ||
63 | * | ||
64 | * - wall_to_monotonic is no longer the boot time, getboottime must be | ||
65 | * used instead. | ||
66 | */ | ||
67 | struct timespec wall_to_monotonic; | ||
68 | /* Offset clock monotonic -> clock realtime */ | ||
69 | ktime_t offs_real; | ||
70 | /* time spent in suspend */ | ||
71 | struct timespec total_sleep_time; | ||
72 | /* Offset clock monotonic -> clock boottime */ | ||
73 | ktime_t offs_boot; | ||
74 | /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */ | ||
75 | struct timespec raw_time; | ||
76 | /* Seqlock for all timekeeper values */ | ||
77 | seqlock_t lock; | ||
78 | }; | ||
79 | 26 | ||
80 | static struct timekeeper timekeeper; | 27 | static struct timekeeper timekeeper; |
81 | 28 | ||
82 | /* | ||
83 | * This read-write spinlock protects us from races in SMP while | ||
84 | * playing with xtime. | ||
85 | */ | ||
86 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); | ||
87 | |||
88 | /* flag for if timekeeping is suspended */ | 29 | /* flag for if timekeeping is suspended */ |
89 | int __read_mostly timekeeping_suspended; | 30 | int __read_mostly timekeeping_suspended; |
90 | 31 | ||
@@ -96,15 +37,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) | |||
96 | } | 37 | } |
97 | } | 38 | } |
98 | 39 | ||
99 | static struct timespec tk_xtime(struct timekeeper *tk) | ||
100 | { | ||
101 | struct timespec ts; | ||
102 | |||
103 | ts.tv_sec = tk->xtime_sec; | ||
104 | ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
105 | return ts; | ||
106 | } | ||
107 | |||
108 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) | 40 | static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts) |
109 | { | 41 | { |
110 | tk->xtime_sec = ts->tv_sec; | 42 | tk->xtime_sec = ts->tv_sec; |
@@ -243,17 +175,63 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
243 | return nsec + arch_gettimeoffset(); | 175 | return nsec + arch_gettimeoffset(); |
244 | } | 176 | } |
245 | 177 | ||
178 | static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); | ||
179 | |||
180 | static void update_pvclock_gtod(struct timekeeper *tk) | ||
181 | { | ||
182 | raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk); | ||
183 | } | ||
184 | |||
185 | /** | ||
186 | * pvclock_gtod_register_notifier - register a pvclock timedata update listener | ||
187 | * | ||
188 | * Must hold write on timekeeper.lock | ||
189 | */ | ||
190 | int pvclock_gtod_register_notifier(struct notifier_block *nb) | ||
191 | { | ||
192 | struct timekeeper *tk = &timekeeper; | ||
193 | unsigned long flags; | ||
194 | int ret; | ||
195 | |||
196 | write_seqlock_irqsave(&tk->lock, flags); | ||
197 | ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb); | ||
198 | /* update timekeeping data */ | ||
199 | update_pvclock_gtod(tk); | ||
200 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
201 | |||
202 | return ret; | ||
203 | } | ||
204 | EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier); | ||
205 | |||
206 | /** | ||
207 | * pvclock_gtod_unregister_notifier - unregister a pvclock | ||
208 | * timedata update listener | ||
209 | * | ||
210 | * Must hold write on timekeeper.lock | ||
211 | */ | ||
212 | int pvclock_gtod_unregister_notifier(struct notifier_block *nb) | ||
213 | { | ||
214 | struct timekeeper *tk = &timekeeper; | ||
215 | unsigned long flags; | ||
216 | int ret; | ||
217 | |||
218 | write_seqlock_irqsave(&tk->lock, flags); | ||
219 | ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb); | ||
220 | write_sequnlock_irqrestore(&tk->lock, flags); | ||
221 | |||
222 | return ret; | ||
223 | } | ||
224 | EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); | ||
225 | |||
246 | /* must hold write on timekeeper.lock */ | 226 | /* must hold write on timekeeper.lock */ |
247 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) | 227 | static void timekeeping_update(struct timekeeper *tk, bool clearntp) |
248 | { | 228 | { |
249 | struct timespec xt; | ||
250 | |||
251 | if (clearntp) { | 229 | if (clearntp) { |
252 | tk->ntp_error = 0; | 230 | tk->ntp_error = 0; |
253 | ntp_clear(); | 231 | ntp_clear(); |
254 | } | 232 | } |
255 | xt = tk_xtime(tk); | 233 | update_vsyscall(tk); |
256 | update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult); | 234 | update_pvclock_gtod(tk); |
257 | } | 235 | } |
258 | 236 | ||
259 | /** | 237 | /** |
@@ -776,6 +754,7 @@ static void timekeeping_resume(void) | |||
776 | 754 | ||
777 | read_persistent_clock(&ts); | 755 | read_persistent_clock(&ts); |
778 | 756 | ||
757 | clockevents_resume(); | ||
779 | clocksource_resume(); | 758 | clocksource_resume(); |
780 | 759 | ||
781 | write_seqlock_irqsave(&tk->lock, flags); | 760 | write_seqlock_irqsave(&tk->lock, flags); |
@@ -835,6 +814,7 @@ static int timekeeping_suspend(void) | |||
835 | 814 | ||
836 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 815 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
837 | clocksource_suspend(); | 816 | clocksource_suspend(); |
817 | clockevents_suspend(); | ||
838 | 818 | ||
839 | return 0; | 819 | return 0; |
840 | } | 820 | } |
@@ -1111,7 +1091,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
1111 | accumulate_nsecs_to_secs(tk); | 1091 | accumulate_nsecs_to_secs(tk); |
1112 | 1092 | ||
1113 | /* Accumulate raw time */ | 1093 | /* Accumulate raw time */ |
1114 | raw_nsecs = tk->raw_interval << shift; | 1094 | raw_nsecs = (u64)tk->raw_interval << shift; |
1115 | raw_nsecs += tk->raw_time.tv_nsec; | 1095 | raw_nsecs += tk->raw_time.tv_nsec; |
1116 | if (raw_nsecs >= NSEC_PER_SEC) { | 1096 | if (raw_nsecs >= NSEC_PER_SEC) { |
1117 | u64 raw_secs = raw_nsecs; | 1097 | u64 raw_secs = raw_nsecs; |
@@ -1128,6 +1108,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, | |||
1128 | return offset; | 1108 | return offset; |
1129 | } | 1109 | } |
1130 | 1110 | ||
1111 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
1112 | static inline void old_vsyscall_fixup(struct timekeeper *tk) | ||
1113 | { | ||
1114 | s64 remainder; | ||
1115 | |||
1116 | /* | ||
1117 | * Store only full nanoseconds into xtime_nsec after rounding | ||
1118 | * it up and add the remainder to the error difference. | ||
1119 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | ||
1120 | * by truncating the remainder in vsyscalls. However, it causes | ||
1121 | * additional work to be done in timekeeping_adjust(). Once | ||
1122 | * the vsyscall implementations are converted to use xtime_nsec | ||
1123 | * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD | ||
1124 | * users are removed, this can be killed. | ||
1125 | */ | ||
1126 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
1127 | tk->xtime_nsec -= remainder; | ||
1128 | tk->xtime_nsec += 1ULL << tk->shift; | ||
1129 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
1130 | |||
1131 | } | ||
1132 | #else | ||
1133 | #define old_vsyscall_fixup(tk) | ||
1134 | #endif | ||
1135 | |||
1136 | |||
1137 | |||
1131 | /** | 1138 | /** |
1132 | * update_wall_time - Uses the current clocksource to increment the wall time | 1139 | * update_wall_time - Uses the current clocksource to increment the wall time |
1133 | * | 1140 | * |
@@ -1139,7 +1146,6 @@ static void update_wall_time(void) | |||
1139 | cycle_t offset; | 1146 | cycle_t offset; |
1140 | int shift = 0, maxshift; | 1147 | int shift = 0, maxshift; |
1141 | unsigned long flags; | 1148 | unsigned long flags; |
1142 | s64 remainder; | ||
1143 | 1149 | ||
1144 | write_seqlock_irqsave(&tk->lock, flags); | 1150 | write_seqlock_irqsave(&tk->lock, flags); |
1145 | 1151 | ||
@@ -1181,20 +1187,11 @@ static void update_wall_time(void) | |||
1181 | /* correct the clock when NTP error is too big */ | 1187 | /* correct the clock when NTP error is too big */ |
1182 | timekeeping_adjust(tk, offset); | 1188 | timekeeping_adjust(tk, offset); |
1183 | 1189 | ||
1184 | |||
1185 | /* | 1190 | /* |
1186 | * Store only full nanoseconds into xtime_nsec after rounding | 1191 | * XXX This can be killed once everyone converts |
1187 | * it up and add the remainder to the error difference. | 1192 | * to the new update_vsyscall. |
1188 | * XXX - This is necessary to avoid small 1ns inconsistnecies caused | 1193 | */ |
1189 | * by truncating the remainder in vsyscalls. However, it causes | 1194 | old_vsyscall_fixup(tk); |
1190 | * additional work to be done in timekeeping_adjust(). Once | ||
1191 | * the vsyscall implementations are converted to use xtime_nsec | ||
1192 | * (shifted nanoseconds), this can be killed. | ||
1193 | */ | ||
1194 | remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); | ||
1195 | tk->xtime_nsec -= remainder; | ||
1196 | tk->xtime_nsec += 1ULL << tk->shift; | ||
1197 | tk->ntp_error += remainder << tk->ntp_error_shift; | ||
1198 | 1195 | ||
1199 | /* | 1196 | /* |
1200 | * Finally, make sure that after the rounding | 1197 | * Finally, make sure that after the rounding |
@@ -1346,9 +1343,7 @@ struct timespec get_monotonic_coarse(void) | |||
1346 | } | 1343 | } |
1347 | 1344 | ||
1348 | /* | 1345 | /* |
1349 | * The 64-bit jiffies value is not atomic - you MUST NOT read it | 1346 | * Must hold jiffies_lock |
1350 | * without sampling the sequence number in xtime_lock. | ||
1351 | * jiffies is defined in the linker script... | ||
1352 | */ | 1347 | */ |
1353 | void do_timer(unsigned long ticks) | 1348 | void do_timer(unsigned long ticks) |
1354 | { | 1349 | { |
@@ -1436,7 +1431,7 @@ EXPORT_SYMBOL_GPL(ktime_get_monotonic_offset); | |||
1436 | */ | 1431 | */ |
1437 | void xtime_update(unsigned long ticks) | 1432 | void xtime_update(unsigned long ticks) |
1438 | { | 1433 | { |
1439 | write_seqlock(&xtime_lock); | 1434 | write_seqlock(&jiffies_lock); |
1440 | do_timer(ticks); | 1435 | do_timer(ticks); |
1441 | write_sequnlock(&xtime_lock); | 1436 | write_sequnlock(&jiffies_lock); |
1442 | } | 1437 | } |