aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/timekeeping.c
diff options
context:
space:
mode:
authorJohn Stultz <john.stultz@linaro.org>2017-06-08 19:44:20 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-06-20 04:41:50 -0400
commitceea5e3771ed2378668455fa21861bead7504df5 (patch)
tree3b6fa6a5f0623c27d57c75e7fb6f755e265a23ee /kernel/time/timekeeping.c
parent41f1830f5a7af77cf5c86359aba3cbd706687e52 (diff)
time: Fix clock->read(clock) race around clocksource changes
In tests, which excercise switching of clocksources, a NULL pointer dereference can be observed on AMR64 platforms in the clocksource read() function: u64 clocksource_mmio_readl_down(struct clocksource *c) { return ~(u64)readl_relaxed(to_mmio_clksrc(c)->reg) & c->mask; } This is called from the core timekeeping code via: cycle_now = tkr->read(tkr->clock); tkr->read is the cached tkr->clock->read() function pointer. When the clocksource is changed then tkr->clock and tkr->read are updated sequentially. The code above results in a sequential load operation of tkr->read and tkr->clock as well. If the store to tkr->clock hits between the loads of tkr->read and tkr->clock, then the old read() function is called with the new clock pointer. As a consequence the read() function dereferences a different data structure and the resulting 'reg' pointer can point anywhere including NULL. This problem was introduced when the timekeeping code was switched over to use struct tk_read_base. Before that, it was theoretically possible as well when the compiler decided to reload clock in the code sequence: now = tk->clock->read(tk->clock); Add a helper function which avoids the issue by reading tk_read_base->clock once into a local variable clk and then issue the read function via clk->read(clk). This guarantees that the read() function always gets the proper clocksource pointer handed in. Since there is now no use for the tkr.read pointer, this patch also removes it, and to address stopping the fast timekeeper during suspend/resume, it introduces a dummy clocksource to use rather then just a dummy read function. Signed-off-by: John Stultz <john.stultz@linaro.org> Acked-by: Ingo Molnar <mingo@kernel.org> Cc: Prarit Bhargava <prarit@redhat.com> Cc: Richard Cochran <richardcochran@gmail.com> Cc: Stephen Boyd <stephen.boyd@linaro.org> Cc: stable <stable@vger.kernel.org> Cc: Miroslav Lichvar <mlichvar@redhat.com> Cc: Daniel Mentz <danielmentz@google.com> Link: http://lkml.kernel.org/r/1496965462-20003-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time/timekeeping.c')
-rw-r--r--kernel/time/timekeeping.c52
1 files changed, 36 insertions, 16 deletions
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 9652bc57fd09..eff94cb8e89e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
118 tk->offs_boot = ktime_add(tk->offs_boot, delta); 118 tk->offs_boot = ktime_add(tk->offs_boot, delta);
119} 119}
120 120
121/*
122 * tk_clock_read - atomic clocksource read() helper
123 *
124 * This helper is necessary to use in the read paths because, while the
125 * seqlock ensures we don't return a bad value while structures are updated,
126 * it doesn't protect from potential crashes. There is the possibility that
127 * the tkr's clocksource may change between the read reference, and the
128 * clock reference passed to the read function. This can cause crashes if
129 * the wrong clocksource is passed to the wrong read function.
130 * This isn't necessary to use when holding the timekeeper_lock or doing
131 * a read of the fast-timekeeper tkrs (which is protected by its own locking
132 * and update logic).
133 */
134static inline u64 tk_clock_read(struct tk_read_base *tkr)
135{
136 struct clocksource *clock = READ_ONCE(tkr->clock);
137
138 return clock->read(clock);
139}
140
121#ifdef CONFIG_DEBUG_TIMEKEEPING 141#ifdef CONFIG_DEBUG_TIMEKEEPING
122#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ 142#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
123 143
@@ -175,7 +195,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
175 */ 195 */
176 do { 196 do {
177 seq = read_seqcount_begin(&tk_core.seq); 197 seq = read_seqcount_begin(&tk_core.seq);
178 now = tkr->read(tkr->clock); 198 now = tk_clock_read(tkr);
179 last = tkr->cycle_last; 199 last = tkr->cycle_last;
180 mask = tkr->mask; 200 mask = tkr->mask;
181 max = tkr->clock->max_cycles; 201 max = tkr->clock->max_cycles;
@@ -209,7 +229,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr)
209 u64 cycle_now, delta; 229 u64 cycle_now, delta;
210 230
211 /* read clocksource */ 231 /* read clocksource */
212 cycle_now = tkr->read(tkr->clock); 232 cycle_now = tk_clock_read(tkr);
213 233
214 /* calculate the delta since the last update_wall_time */ 234 /* calculate the delta since the last update_wall_time */
215 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); 235 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
@@ -238,12 +258,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
238 ++tk->cs_was_changed_seq; 258 ++tk->cs_was_changed_seq;
239 old_clock = tk->tkr_mono.clock; 259 old_clock = tk->tkr_mono.clock;
240 tk->tkr_mono.clock = clock; 260 tk->tkr_mono.clock = clock;
241 tk->tkr_mono.read = clock->read;
242 tk->tkr_mono.mask = clock->mask; 261 tk->tkr_mono.mask = clock->mask;
243 tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); 262 tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
244 263
245 tk->tkr_raw.clock = clock; 264 tk->tkr_raw.clock = clock;
246 tk->tkr_raw.read = clock->read;
247 tk->tkr_raw.mask = clock->mask; 265 tk->tkr_raw.mask = clock->mask;
248 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; 266 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
249 267
@@ -404,7 +422,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
404 422
405 now += timekeeping_delta_to_ns(tkr, 423 now += timekeeping_delta_to_ns(tkr,
406 clocksource_delta( 424 clocksource_delta(
407 tkr->read(tkr->clock), 425 tk_clock_read(tkr),
408 tkr->cycle_last, 426 tkr->cycle_last,
409 tkr->mask)); 427 tkr->mask));
410 } while (read_seqcount_retry(&tkf->seq, seq)); 428 } while (read_seqcount_retry(&tkf->seq, seq));
@@ -461,6 +479,10 @@ static u64 dummy_clock_read(struct clocksource *cs)
461 return cycles_at_suspend; 479 return cycles_at_suspend;
462} 480}
463 481
482static struct clocksource dummy_clock = {
483 .read = dummy_clock_read,
484};
485
464/** 486/**
465 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. 487 * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
466 * @tk: Timekeeper to snapshot. 488 * @tk: Timekeeper to snapshot.
@@ -477,13 +499,13 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
477 struct tk_read_base *tkr = &tk->tkr_mono; 499 struct tk_read_base *tkr = &tk->tkr_mono;
478 500
479 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 501 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
480 cycles_at_suspend = tkr->read(tkr->clock); 502 cycles_at_suspend = tk_clock_read(tkr);
481 tkr_dummy.read = dummy_clock_read; 503 tkr_dummy.clock = &dummy_clock;
482 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); 504 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
483 505
484 tkr = &tk->tkr_raw; 506 tkr = &tk->tkr_raw;
485 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 507 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
486 tkr_dummy.read = dummy_clock_read; 508 tkr_dummy.clock = &dummy_clock;
487 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); 509 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
488} 510}
489 511
@@ -649,11 +671,10 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
649 */ 671 */
650static void timekeeping_forward_now(struct timekeeper *tk) 672static void timekeeping_forward_now(struct timekeeper *tk)
651{ 673{
652 struct clocksource *clock = tk->tkr_mono.clock;
653 u64 cycle_now, delta; 674 u64 cycle_now, delta;
654 u64 nsec; 675 u64 nsec;
655 676
656 cycle_now = tk->tkr_mono.read(clock); 677 cycle_now = tk_clock_read(&tk->tkr_mono);
657 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 678 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
658 tk->tkr_mono.cycle_last = cycle_now; 679 tk->tkr_mono.cycle_last = cycle_now;
659 tk->tkr_raw.cycle_last = cycle_now; 680 tk->tkr_raw.cycle_last = cycle_now;
@@ -929,8 +950,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
929 950
930 do { 951 do {
931 seq = read_seqcount_begin(&tk_core.seq); 952 seq = read_seqcount_begin(&tk_core.seq);
932 953 now = tk_clock_read(&tk->tkr_mono);
933 now = tk->tkr_mono.read(tk->tkr_mono.clock);
934 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; 954 systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
935 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; 955 systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
936 base_real = ktime_add(tk->tkr_mono.base, 956 base_real = ktime_add(tk->tkr_mono.base,
@@ -1108,7 +1128,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
1108 * Check whether the system counter value provided by the 1128 * Check whether the system counter value provided by the
1109 * device driver is on the current timekeeping interval. 1129 * device driver is on the current timekeeping interval.
1110 */ 1130 */
1111 now = tk->tkr_mono.read(tk->tkr_mono.clock); 1131 now = tk_clock_read(&tk->tkr_mono);
1112 interval_start = tk->tkr_mono.cycle_last; 1132 interval_start = tk->tkr_mono.cycle_last;
1113 if (!cycle_between(interval_start, cycles, now)) { 1133 if (!cycle_between(interval_start, cycles, now)) {
1114 clock_was_set_seq = tk->clock_was_set_seq; 1134 clock_was_set_seq = tk->clock_was_set_seq;
@@ -1629,7 +1649,7 @@ void timekeeping_resume(void)
1629 * The less preferred source will only be tried if there is no better 1649 * The less preferred source will only be tried if there is no better
1630 * usable source. The rtc part is handled separately in rtc core code. 1650 * usable source. The rtc part is handled separately in rtc core code.
1631 */ 1651 */
1632 cycle_now = tk->tkr_mono.read(clock); 1652 cycle_now = tk_clock_read(&tk->tkr_mono);
1633 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1653 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
1634 cycle_now > tk->tkr_mono.cycle_last) { 1654 cycle_now > tk->tkr_mono.cycle_last) {
1635 u64 nsec, cyc_delta; 1655 u64 nsec, cyc_delta;
@@ -2030,7 +2050,7 @@ void update_wall_time(void)
2030#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 2050#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
2031 offset = real_tk->cycle_interval; 2051 offset = real_tk->cycle_interval;
2032#else 2052#else
2033 offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), 2053 offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
2034 tk->tkr_mono.cycle_last, tk->tkr_mono.mask); 2054 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
2035#endif 2055#endif
2036 2056