aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/timekeeping.h2
-rw-r--r--kernel/time/timekeeping.c124
2 files changed, 126 insertions, 0 deletions
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 58ad7eff83ff..1caa6b04fdc5 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -164,6 +164,8 @@ static inline u64 ktime_get_raw_ns(void)
164 return ktime_to_ns(ktime_get_raw()); 164 return ktime_to_ns(ktime_get_raw());
165} 165}
166 166
167extern u64 ktime_get_mono_fast_ns(void);
168
167/* 169/*
168 * Timespec interfaces utilizing the ktime based ones 170 * Timespec interfaces utilizing the ktime based ones
169 */ 171 */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dee23c9d6c21..8980fb722fc5 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -44,6 +44,22 @@ static struct {
44static DEFINE_RAW_SPINLOCK(timekeeper_lock); 44static DEFINE_RAW_SPINLOCK(timekeeper_lock);
45static struct timekeeper shadow_timekeeper; 45static struct timekeeper shadow_timekeeper;
46 46
47/**
48 * struct tk_fast - NMI safe timekeeper
49 * @seq: Sequence counter for protecting updates. The lowest bit
50 * is the index for the tk_read_base array
51 * @base: tk_read_base array. Access is indexed by the lowest bit of
52 * @seq.
53 *
54 * See @update_fast_timekeeper() below.
55 */
56struct tk_fast {
57 seqcount_t seq;
58 struct tk_read_base base[2];
59};
60
61static struct tk_fast tk_fast_mono ____cacheline_aligned;
62
47/* flag for if timekeeping is suspended */ 63/* flag for if timekeeping is suspended */
48int __read_mostly timekeeping_suspended; 64int __read_mostly timekeeping_suspended;
49 65
@@ -210,6 +226,112 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
210 return nsec + arch_gettimeoffset(); 226 return nsec + arch_gettimeoffset();
211} 227}
212 228
229/**
230 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
231 * @tk: The timekeeper from which we take the update
232 * @tkf: The fast timekeeper to update
233 * @tbase: The time base for the fast timekeeper (mono/raw)
234 *
235 * We want to use this from any context including NMI and tracing /
236 * instrumenting the timekeeping code itself.
237 *
238 * So we handle this differently than the other timekeeping accessor
239 * functions which retry when the sequence count has changed. The
240 * update side does:
241 *
242 * smp_wmb(); <- Ensure that the last base[1] update is visible
243 * tkf->seq++;
244 * smp_wmb(); <- Ensure that the seqcount update is visible
245 * update(tkf->base[0], tk);
246 * smp_wmb(); <- Ensure that the base[0] update is visible
247 * tkf->seq++;
248 * smp_wmb(); <- Ensure that the seqcount update is visible
249 * update(tkf->base[1], tk);
250 *
251 * The reader side does:
252 *
253 * do {
254 * seq = tkf->seq;
255 * smp_rmb();
256 * idx = seq & 0x01;
257 * now = now(tkf->base[idx]);
258 * smp_rmb();
259 * } while (seq != tkf->seq)
260 *
261 * As long as we update base[0] readers are forced off to
262 * base[1]. Once base[0] is updated readers are redirected to base[0]
263 * and the base[1] update takes place.
264 *
265 * So if a NMI hits the update of base[0] then it will use base[1]
266 * which is still consistent. In the worst case this can result is a
267 * slightly wrong timestamp (a few nanoseconds). See
268 * @ktime_get_mono_fast_ns.
269 */
270static void update_fast_timekeeper(struct timekeeper *tk)
271{
272 struct tk_read_base *base = tk_fast_mono.base;
273
274 /* Force readers off to base[1] */
275 raw_write_seqcount_latch(&tk_fast_mono.seq);
276
277 /* Update base[0] */
278 memcpy(base, &tk->tkr, sizeof(*base));
279
280 /* Force readers back to base[0] */
281 raw_write_seqcount_latch(&tk_fast_mono.seq);
282
283 /* Update base[1] */
284 memcpy(base + 1, base, sizeof(*base));
285}
286
287/**
288 * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
289 *
290 * This timestamp is not guaranteed to be monotonic across an update.
291 * The timestamp is calculated by:
292 *
293 * now = base_mono + clock_delta * slope
294 *
295 * So if the update lowers the slope, readers who are forced to the
296 * not yet updated second array are still using the old steeper slope.
297 *
298 * tmono
299 * ^
300 * | o n
301 * | o n
302 * | u
303 * | o
304 * |o
305 * |12345678---> reader order
306 *
307 * o = old slope
308 * u = update
309 * n = new slope
310 *
311 * So reader 6 will observe time going backwards versus reader 5.
312 *
313 * While other CPUs are likely to be able observe that, the only way
314 * for a CPU local observation is when an NMI hits in the middle of
315 * the update. Timestamps taken from that NMI context might be ahead
316 * of the following timestamps. Callers need to be aware of that and
317 * deal with it.
318 */
319u64 notrace ktime_get_mono_fast_ns(void)
320{
321 struct tk_read_base *tkr;
322 unsigned int seq;
323 u64 now;
324
325 do {
326 seq = raw_read_seqcount(&tk_fast_mono.seq);
327 tkr = tk_fast_mono.base + (seq & 0x01);
328 now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
329
330 } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
331 return now;
332}
333EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
334
213#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD 335#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
214 336
215static inline void update_vsyscall(struct timekeeper *tk) 337static inline void update_vsyscall(struct timekeeper *tk)
@@ -325,6 +447,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
325 if (action & TK_MIRROR) 447 if (action & TK_MIRROR)
326 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 448 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
327 sizeof(tk_core.timekeeper)); 449 sizeof(tk_core.timekeeper));
450
451 update_fast_timekeeper(tk);
328} 452}
329 453
330/** 454/**