diff options
-rw-r--r-- | include/linux/timekeeping.h | 2 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 124 |
2 files changed, 126 insertions, 0 deletions
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 58ad7eff83ff..1caa6b04fdc5 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h | |||
@@ -164,6 +164,8 @@ static inline u64 ktime_get_raw_ns(void) | |||
164 | return ktime_to_ns(ktime_get_raw()); | 164 | return ktime_to_ns(ktime_get_raw()); |
165 | } | 165 | } |
166 | 166 | ||
167 | extern u64 ktime_get_mono_fast_ns(void); | ||
168 | |||
167 | /* | 169 | /* |
168 | * Timespec interfaces utilizing the ktime based ones | 170 | * Timespec interfaces utilizing the ktime based ones |
169 | */ | 171 | */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dee23c9d6c21..8980fb722fc5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -44,6 +44,22 @@ static struct { | |||
44 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); | 44 | static DEFINE_RAW_SPINLOCK(timekeeper_lock); |
45 | static struct timekeeper shadow_timekeeper; | 45 | static struct timekeeper shadow_timekeeper; |
46 | 46 | ||
47 | /** | ||
48 | * struct tk_fast - NMI safe timekeeper | ||
49 | * @seq: Sequence counter for protecting updates. The lowest bit | ||
50 | * is the index for the tk_read_base array | ||
51 | * @base: tk_read_base array. Access is indexed by the lowest bit of | ||
52 | * @seq. | ||
53 | * | ||
54 | * See @update_fast_timekeeper() below. | ||
55 | */ | ||
56 | struct tk_fast { | ||
57 | seqcount_t seq; | ||
58 | struct tk_read_base base[2]; | ||
59 | }; | ||
60 | |||
61 | static struct tk_fast tk_fast_mono ____cacheline_aligned; | ||
62 | |||
47 | /* flag for if timekeeping is suspended */ | 63 | /* flag for if timekeeping is suspended */ |
48 | int __read_mostly timekeeping_suspended; | 64 | int __read_mostly timekeeping_suspended; |
49 | 65 | ||
@@ -210,6 +226,112 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) | |||
210 | return nsec + arch_gettimeoffset(); | 226 | return nsec + arch_gettimeoffset(); |
211 | } | 227 | } |
212 | 228 | ||
229 | /** | ||
230 | * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. | ||
231 | * @tk: The timekeeper from which we take the update | ||
232 | * @tkf: The fast timekeeper to update | ||
233 | * @tbase: The time base for the fast timekeeper (mono/raw) | ||
234 | * | ||
235 | * We want to use this from any context including NMI and tracing / | ||
236 | * instrumenting the timekeeping code itself. | ||
237 | * | ||
238 | * So we handle this differently than the other timekeeping accessor | ||
239 | * functions which retry when the sequence count has changed. The | ||
240 | * update side does: | ||
241 | * | ||
242 | * smp_wmb(); <- Ensure that the last base[1] update is visible | ||
243 | * tkf->seq++; | ||
244 | * smp_wmb(); <- Ensure that the seqcount update is visible | ||
245 | * update(tkf->base[0], tk); | ||
246 | * smp_wmb(); <- Ensure that the base[0] update is visible | ||
247 | * tkf->seq++; | ||
248 | * smp_wmb(); <- Ensure that the seqcount update is visible | ||
249 | * update(tkf->base[1], tk); | ||
250 | * | ||
251 | * The reader side does: | ||
252 | * | ||
253 | * do { | ||
254 | * seq = tkf->seq; | ||
255 | * smp_rmb(); | ||
256 | * idx = seq & 0x01; | ||
257 | * now = now(tkf->base[idx]); | ||
258 | * smp_rmb(); | ||
259 | * } while (seq != tkf->seq) | ||
260 | * | ||
261 | * As long as we update base[0] readers are forced off to | ||
262 | * base[1]. Once base[0] is updated readers are redirected to base[0] | ||
263 | * and the base[1] update takes place. | ||
264 | * | ||
265 | * So if a NMI hits the update of base[0] then it will use base[1] | ||
266 | * which is still consistent. In the worst case this can result is a | ||
267 | * slightly wrong timestamp (a few nanoseconds). See | ||
268 | * @ktime_get_mono_fast_ns. | ||
269 | */ | ||
270 | static void update_fast_timekeeper(struct timekeeper *tk) | ||
271 | { | ||
272 | struct tk_read_base *base = tk_fast_mono.base; | ||
273 | |||
274 | /* Force readers off to base[1] */ | ||
275 | raw_write_seqcount_latch(&tk_fast_mono.seq); | ||
276 | |||
277 | /* Update base[0] */ | ||
278 | memcpy(base, &tk->tkr, sizeof(*base)); | ||
279 | |||
280 | /* Force readers back to base[0] */ | ||
281 | raw_write_seqcount_latch(&tk_fast_mono.seq); | ||
282 | |||
283 | /* Update base[1] */ | ||
284 | memcpy(base + 1, base, sizeof(*base)); | ||
285 | } | ||
286 | |||
287 | /** | ||
288 | * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic | ||
289 | * | ||
290 | * This timestamp is not guaranteed to be monotonic across an update. | ||
291 | * The timestamp is calculated by: | ||
292 | * | ||
293 | * now = base_mono + clock_delta * slope | ||
294 | * | ||
295 | * So if the update lowers the slope, readers who are forced to the | ||
296 | * not yet updated second array are still using the old steeper slope. | ||
297 | * | ||
298 | * tmono | ||
299 | * ^ | ||
300 | * | o n | ||
301 | * | o n | ||
302 | * | u | ||
303 | * | o | ||
304 | * |o | ||
305 | * |12345678---> reader order | ||
306 | * | ||
307 | * o = old slope | ||
308 | * u = update | ||
309 | * n = new slope | ||
310 | * | ||
311 | * So reader 6 will observe time going backwards versus reader 5. | ||
312 | * | ||
313 | * While other CPUs are likely to be able observe that, the only way | ||
314 | * for a CPU local observation is when an NMI hits in the middle of | ||
315 | * the update. Timestamps taken from that NMI context might be ahead | ||
316 | * of the following timestamps. Callers need to be aware of that and | ||
317 | * deal with it. | ||
318 | */ | ||
319 | u64 notrace ktime_get_mono_fast_ns(void) | ||
320 | { | ||
321 | struct tk_read_base *tkr; | ||
322 | unsigned int seq; | ||
323 | u64 now; | ||
324 | |||
325 | do { | ||
326 | seq = raw_read_seqcount(&tk_fast_mono.seq); | ||
327 | tkr = tk_fast_mono.base + (seq & 0x01); | ||
328 | now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); | ||
329 | |||
330 | } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); | ||
331 | return now; | ||
332 | } | ||
333 | EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); | ||
334 | |||
213 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD | 335 | #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD |
214 | 336 | ||
215 | static inline void update_vsyscall(struct timekeeper *tk) | 337 | static inline void update_vsyscall(struct timekeeper *tk) |
@@ -325,6 +447,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) | |||
325 | if (action & TK_MIRROR) | 447 | if (action & TK_MIRROR) |
326 | memcpy(&shadow_timekeeper, &tk_core.timekeeper, | 448 | memcpy(&shadow_timekeeper, &tk_core.timekeeper, |
327 | sizeof(tk_core.timekeeper)); | 449 | sizeof(tk_core.timekeeper)); |
450 | |||
451 | update_fast_timekeeper(tk); | ||
328 | } | 452 | } |
329 | 453 | ||
330 | /** | 454 | /** |