aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-27 05:09:21 -0400
committerIngo Molnar <mingo@kernel.org>2015-03-27 05:09:21 -0400
commit4e6d7c2aa95158315902647963b359b32da5c295 (patch)
tree5141f79302e1e653cde53bab6a981a1b7bfa47b0
parent3c435c1e472ba344ee25f795f4807d4457e61f6c (diff)
parentfe5fba05b46c791c95a9f34228ac495f81f72fc0 (diff)
Merge branch 'timers/core' into perf/timer, to apply dependent patch
An upcoming patch will depend on tai_ns() and NMI-safe ktime_get_raw_fast(), so merge timers/core here in a separate topic branch until it's all cooked and timers/core is merged upstream. Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/arm/plat-omap/counter_32k.c2
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/s390/kernel/time.c20
-rw-r--r--arch/sparc/kernel/time_32.c6
-rw-r--r--arch/tile/kernel/time.c24
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c24
-rw-r--r--arch/x86/kvm/x86.c14
-rw-r--r--drivers/clocksource/em_sti.c2
-rw-r--r--drivers/clocksource/sh_cmt.c2
-rw-r--r--drivers/clocksource/sh_tmu.c2
-rw-r--r--include/linux/clockchips.h21
-rw-r--r--include/linux/clocksource.h25
-rw-r--r--include/linux/timekeeper_internal.h16
-rw-r--r--include/linux/timekeeping.h6
-rw-r--r--kernel/time/clockevents.c88
-rw-r--r--kernel/time/clocksource.c170
-rw-r--r--kernel/time/jiffies.c5
-rw-r--r--kernel/time/sched_clock.c236
-rw-r--r--kernel/time/timekeeping.c345
-rw-r--r--kernel/time/timer_list.c32
-rw-r--r--lib/Kconfig.debug13
21 files changed, 703 insertions, 360 deletions
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d705c267..43cf74561cfd 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
103 103
104 /* 104 /*
105 * 120000 rough estimate from the calculations in 105 * 120000 rough estimate from the calculations in
106 * __clocksource_updatefreq_scale. 106 * __clocksource_update_freq_scale.
107 */ 107 */
108 clocks_calc_mult_shift(&persistent_mult, &persistent_shift, 108 clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
109 32768, NSEC_PER_SEC, 120000); 109 32768, NSEC_PER_SEC, 120000);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea083d93..ec37ab3f524f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
200void update_vsyscall(struct timekeeper *tk) 200void update_vsyscall(struct timekeeper *tk)
201{ 201{
202 struct timespec xtime_coarse; 202 struct timespec xtime_coarse;
203 u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter"); 203 u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
204 204
205 ++vdso_data->tb_seq_count; 205 ++vdso_data->tb_seq_count;
206 smp_wmb(); 206 smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
213 vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; 213 vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
214 214
215 if (!use_syscall) { 215 if (!use_syscall) {
216 vdso_data->cs_cycle_last = tk->tkr.cycle_last; 216 vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
217 vdso_data->xtime_clock_sec = tk->xtime_sec; 217 vdso_data->xtime_clock_sec = tk->xtime_sec;
218 vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; 218 vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
219 vdso_data->cs_mult = tk->tkr.mult; 219 vdso_data->cs_mult = tk->tkr_mono.mult;
220 vdso_data->cs_shift = tk->tkr.shift; 220 vdso_data->cs_shift = tk->tkr_mono.shift;
221 } 221 }
222 222
223 smp_wmb(); 223 smp_wmb();
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dddb2d6..170ddd2018b3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
215{ 215{
216 u64 nsecps; 216 u64 nsecps;
217 217
218 if (tk->tkr.clock != &clocksource_tod) 218 if (tk->tkr_mono.clock != &clocksource_tod)
219 return; 219 return;
220 220
221 /* Make userspace gettimeofday spin until we're done. */ 221 /* Make userspace gettimeofday spin until we're done. */
222 ++vdso_data->tb_update_count; 222 ++vdso_data->tb_update_count;
223 smp_wmb(); 223 smp_wmb();
224 vdso_data->xtime_tod_stamp = tk->tkr.cycle_last; 224 vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
225 vdso_data->xtime_clock_sec = tk->xtime_sec; 225 vdso_data->xtime_clock_sec = tk->xtime_sec;
226 vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec; 226 vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
227 vdso_data->wtom_clock_sec = 227 vdso_data->wtom_clock_sec =
228 tk->xtime_sec + tk->wall_to_monotonic.tv_sec; 228 tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
229 vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec + 229 vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
230 + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift); 230 + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
231 nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift; 231 nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
232 while (vdso_data->wtom_clock_nsec >= nsecps) { 232 while (vdso_data->wtom_clock_nsec >= nsecps) {
233 vdso_data->wtom_clock_nsec -= nsecps; 233 vdso_data->wtom_clock_nsec -= nsecps;
234 vdso_data->wtom_clock_sec++; 234 vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
236 236
237 vdso_data->xtime_coarse_sec = tk->xtime_sec; 237 vdso_data->xtime_coarse_sec = tk->xtime_sec;
238 vdso_data->xtime_coarse_nsec = 238 vdso_data->xtime_coarse_nsec =
239 (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); 239 (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
240 vdso_data->wtom_coarse_sec = 240 vdso_data->wtom_coarse_sec =
241 vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec; 241 vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
242 vdso_data->wtom_coarse_nsec = 242 vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
246 vdso_data->wtom_coarse_sec++; 246 vdso_data->wtom_coarse_sec++;
247 } 247 }
248 248
249 vdso_data->tk_mult = tk->tkr.mult; 249 vdso_data->tk_mult = tk->tkr_mono.mult;
250 vdso_data->tk_shift = tk->tkr.shift; 250 vdso_data->tk_shift = tk->tkr_mono.shift;
251 smp_wmb(); 251 smp_wmb();
252 ++vdso_data->tb_update_count; 252 ++vdso_data->tb_update_count;
253} 253}
@@ -283,7 +283,7 @@ void __init time_init(void)
283 if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt)) 283 if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
284 panic("Couldn't request external interrupt 0x1406"); 284 panic("Couldn't request external interrupt 0x1406");
285 285
286 if (clocksource_register(&clocksource_tod) != 0) 286 if (__clocksource_register(&clocksource_tod) != 0)
287 panic("Could not register TOD clock source"); 287 panic("Could not register TOD clock source");
288 288
289 /* Enable TOD clock interrupts on the boot cpu. */ 289 /* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23a0a44..18147a5523d9 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
181 .rating = 100, 181 .rating = 100,
182 .read = timer_cs_read, 182 .read = timer_cs_read,
183 .mask = CLOCKSOURCE_MASK(64), 183 .mask = CLOCKSOURCE_MASK(64),
184 .shift = 2,
185 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 184 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
186}; 185};
187 186
188static __init int setup_timer_cs(void) 187static __init int setup_timer_cs(void)
189{ 188{
190 timer_cs_enabled = 1; 189 timer_cs_enabled = 1;
191 timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate, 190 return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
192 timer_cs.shift);
193
194 return clocksource_register(&timer_cs);
195} 191}
196 192
197#ifdef CONFIG_SMP 193#ifdef CONFIG_SMP
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b0856c0a..00178ecf9aea 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
257 257
258void update_vsyscall(struct timekeeper *tk) 258void update_vsyscall(struct timekeeper *tk)
259{ 259{
260 if (tk->tkr.clock != &cycle_counter_cs) 260 if (tk->tkr_mono.clock != &cycle_counter_cs)
261 return; 261 return;
262 262
263 write_seqcount_begin(&vdso_data->tb_seq); 263 write_seqcount_begin(&vdso_data->tb_seq);
264 264
265 vdso_data->cycle_last = tk->tkr.cycle_last; 265 vdso_data->cycle_last = tk->tkr_mono.cycle_last;
266 vdso_data->mask = tk->tkr.mask; 266 vdso_data->mask = tk->tkr_mono.mask;
267 vdso_data->mult = tk->tkr.mult; 267 vdso_data->mult = tk->tkr_mono.mult;
268 vdso_data->shift = tk->tkr.shift; 268 vdso_data->shift = tk->tkr_mono.shift;
269 269
270 vdso_data->wall_time_sec = tk->xtime_sec; 270 vdso_data->wall_time_sec = tk->xtime_sec;
271 vdso_data->wall_time_snsec = tk->tkr.xtime_nsec; 271 vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec;
272 272
273 vdso_data->monotonic_time_sec = tk->xtime_sec 273 vdso_data->monotonic_time_sec = tk->xtime_sec
274 + tk->wall_to_monotonic.tv_sec; 274 + tk->wall_to_monotonic.tv_sec;
275 vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec 275 vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
276 + ((u64)tk->wall_to_monotonic.tv_nsec 276 + ((u64)tk->wall_to_monotonic.tv_nsec
277 << tk->tkr.shift); 277 << tk->tkr_mono.shift);
278 while (vdso_data->monotonic_time_snsec >= 278 while (vdso_data->monotonic_time_snsec >=
279 (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { 279 (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
280 vdso_data->monotonic_time_snsec -= 280 vdso_data->monotonic_time_snsec -=
281 ((u64)NSEC_PER_SEC) << tk->tkr.shift; 281 ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
282 vdso_data->monotonic_time_sec++; 282 vdso_data->monotonic_time_sec++;
283 } 283 }
284 284
285 vdso_data->wall_time_coarse_sec = tk->xtime_sec; 285 vdso_data->wall_time_coarse_sec = tk->xtime_sec;
286 vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> 286 vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
287 tk->tkr.shift); 287 tk->tkr_mono.shift);
288 288
289 vdso_data->monotonic_time_coarse_sec = 289 vdso_data->monotonic_time_coarse_sec =
290 vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; 290 vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f32b98..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
31 gtod_write_begin(vdata); 31 gtod_write_begin(vdata);
32 32
33 /* copy vsyscall data */ 33 /* copy vsyscall data */
34 vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; 34 vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
35 vdata->cycle_last = tk->tkr.cycle_last; 35 vdata->cycle_last = tk->tkr_mono.cycle_last;
36 vdata->mask = tk->tkr.mask; 36 vdata->mask = tk->tkr_mono.mask;
37 vdata->mult = tk->tkr.mult; 37 vdata->mult = tk->tkr_mono.mult;
38 vdata->shift = tk->tkr.shift; 38 vdata->shift = tk->tkr_mono.shift;
39 39
40 vdata->wall_time_sec = tk->xtime_sec; 40 vdata->wall_time_sec = tk->xtime_sec;
41 vdata->wall_time_snsec = tk->tkr.xtime_nsec; 41 vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
42 42
43 vdata->monotonic_time_sec = tk->xtime_sec 43 vdata->monotonic_time_sec = tk->xtime_sec
44 + tk->wall_to_monotonic.tv_sec; 44 + tk->wall_to_monotonic.tv_sec;
45 vdata->monotonic_time_snsec = tk->tkr.xtime_nsec 45 vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
46 + ((u64)tk->wall_to_monotonic.tv_nsec 46 + ((u64)tk->wall_to_monotonic.tv_nsec
47 << tk->tkr.shift); 47 << tk->tkr_mono.shift);
48 while (vdata->monotonic_time_snsec >= 48 while (vdata->monotonic_time_snsec >=
49 (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { 49 (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
50 vdata->monotonic_time_snsec -= 50 vdata->monotonic_time_snsec -=
51 ((u64)NSEC_PER_SEC) << tk->tkr.shift; 51 ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
52 vdata->monotonic_time_sec++; 52 vdata->monotonic_time_sec++;
53 } 53 }
54 54
55 vdata->wall_time_coarse_sec = tk->xtime_sec; 55 vdata->wall_time_coarse_sec = tk->xtime_sec;
56 vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> 56 vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
57 tk->tkr.shift); 57 tk->tkr_mono.shift);
58 58
59 vdata->monotonic_time_coarse_sec = 59 vdata->monotonic_time_coarse_sec =
60 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; 60 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 32bf19ef3115..0ee725f1896d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
1070 struct pvclock_gtod_data *vdata = &pvclock_gtod_data; 1070 struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
1071 u64 boot_ns; 1071 u64 boot_ns;
1072 1072
1073 boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); 1073 boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
1074 1074
1075 write_seqcount_begin(&vdata->seq); 1075 write_seqcount_begin(&vdata->seq);
1076 1076
1077 /* copy pvclock gtod data */ 1077 /* copy pvclock gtod data */
1078 vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; 1078 vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
1079 vdata->clock.cycle_last = tk->tkr.cycle_last; 1079 vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
1080 vdata->clock.mask = tk->tkr.mask; 1080 vdata->clock.mask = tk->tkr_mono.mask;
1081 vdata->clock.mult = tk->tkr.mult; 1081 vdata->clock.mult = tk->tkr_mono.mult;
1082 vdata->clock.shift = tk->tkr.shift; 1082 vdata->clock.shift = tk->tkr_mono.shift;
1083 1083
1084 vdata->boot_ns = boot_ns; 1084 vdata->boot_ns = boot_ns;
1085 vdata->nsec_base = tk->tkr.xtime_nsec; 1085 vdata->nsec_base = tk->tkr_mono.xtime_nsec;
1086 1086
1087 write_seqcount_end(&vdata->seq); 1087 write_seqcount_end(&vdata->seq);
1088} 1088}
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c
index d0a7bd66b8b9..dc3c6ee04aaa 100644
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
210 210
211 ret = em_sti_start(p, USER_CLOCKSOURCE); 211 ret = em_sti_start(p, USER_CLOCKSOURCE);
212 if (!ret) 212 if (!ret)
213 __clocksource_updatefreq_hz(cs, p->rate); 213 __clocksource_update_freq_hz(cs, p->rate);
214 return ret; 214 return ret;
215} 215}
216 216
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 2bd13b53b727..b8ff3c64cc45 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
641 641
642 ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE); 642 ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
643 if (!ret) { 643 if (!ret) {
644 __clocksource_updatefreq_hz(cs, ch->rate); 644 __clocksource_update_freq_hz(cs, ch->rate);
645 ch->cs_enabled = true; 645 ch->cs_enabled = true;
646 } 646 }
647 return ret; 647 return ret;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c
index f150ca82bfaf..b6b8fa3cd211 100644
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
272 272
273 ret = sh_tmu_enable(ch); 273 ret = sh_tmu_enable(ch);
274 if (!ret) { 274 if (!ret) {
275 __clocksource_updatefreq_hz(cs, ch->rate); 275 __clocksource_update_freq_hz(cs, ch->rate);
276 ch->cs_enabled = true; 276 ch->cs_enabled = true;
277 } 277 }
278 278
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 2e4cb67f6e56..59af26b54d15 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -39,6 +39,8 @@ enum clock_event_mode {
39 CLOCK_EVT_MODE_PERIODIC, 39 CLOCK_EVT_MODE_PERIODIC,
40 CLOCK_EVT_MODE_ONESHOT, 40 CLOCK_EVT_MODE_ONESHOT,
41 CLOCK_EVT_MODE_RESUME, 41 CLOCK_EVT_MODE_RESUME,
42
43 /* Legacy ->set_mode() callback doesn't support below modes */
42}; 44};
43 45
44/* 46/*
@@ -81,7 +83,11 @@ enum clock_event_mode {
81 * @mode: operating mode assigned by the management code 83 * @mode: operating mode assigned by the management code
82 * @features: features 84 * @features: features
83 * @retries: number of forced programming retries 85 * @retries: number of forced programming retries
84 * @set_mode: set mode function 86 * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
87 * @set_mode_periodic: switch mode to periodic, if !set_mode
88 * @set_mode_oneshot: switch mode to oneshot, if !set_mode
89 * @set_mode_shutdown: switch mode to shutdown, if !set_mode
90 * @set_mode_resume: resume clkevt device, if !set_mode
85 * @broadcast: function to broadcast events 91 * @broadcast: function to broadcast events
86 * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration 92 * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
87 * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration 93 * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
@@ -108,9 +114,20 @@ struct clock_event_device {
108 unsigned int features; 114 unsigned int features;
109 unsigned long retries; 115 unsigned long retries;
110 116
111 void (*broadcast)(const struct cpumask *mask); 117 /*
118 * Mode transition callback(s): Only one of the two groups should be
119 * defined:
120 * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
121 * - set_mode_{shutdown|periodic|oneshot|resume}().
122 */
112 void (*set_mode)(enum clock_event_mode mode, 123 void (*set_mode)(enum clock_event_mode mode,
113 struct clock_event_device *); 124 struct clock_event_device *);
125 int (*set_mode_periodic)(struct clock_event_device *);
126 int (*set_mode_oneshot)(struct clock_event_device *);
127 int (*set_mode_shutdown)(struct clock_event_device *);
128 int (*set_mode_resume)(struct clock_event_device *);
129
130 void (*broadcast)(const struct cpumask *mask);
114 void (*suspend)(struct clock_event_device *); 131 void (*suspend)(struct clock_event_device *);
115 void (*resume)(struct clock_event_device *); 132 void (*resume)(struct clock_event_device *);
116 unsigned long min_delta_ticks; 133 unsigned long min_delta_ticks;
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9c78d15d33e4..135509821c39 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@ struct module;
56 * @shift: cycle to nanosecond divisor (power of two) 56 * @shift: cycle to nanosecond divisor (power of two)
57 * @max_idle_ns: max idle time permitted by the clocksource (nsecs) 57 * @max_idle_ns: max idle time permitted by the clocksource (nsecs)
58 * @maxadj: maximum adjustment value to mult (~11%) 58 * @maxadj: maximum adjustment value to mult (~11%)
59 * @max_cycles: maximum safe cycle value which won't overflow on multiplication
59 * @flags: flags describing special properties 60 * @flags: flags describing special properties
60 * @archdata: arch-specific data 61 * @archdata: arch-specific data
61 * @suspend: suspend function for the clocksource, if necessary 62 * @suspend: suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
76#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA 77#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
77 struct arch_clocksource_data archdata; 78 struct arch_clocksource_data archdata;
78#endif 79#endif
79 80 u64 max_cycles;
80 const char *name; 81 const char *name;
81 struct list_head list; 82 struct list_head list;
82 int rating; 83 int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
178} 179}
179 180
180 181
181extern int clocksource_register(struct clocksource*);
182extern int clocksource_unregister(struct clocksource*); 182extern int clocksource_unregister(struct clocksource*);
183extern void clocksource_touch_watchdog(void); 183extern void clocksource_touch_watchdog(void);
184extern struct clocksource* clocksource_get_next(void); 184extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
189extern void clocksource_mark_unstable(struct clocksource *cs); 189extern void clocksource_mark_unstable(struct clocksource *cs);
190 190
191extern u64 191extern u64
192clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask); 192clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
193extern void 193extern void
194clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); 194clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
195 195
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
200extern int 200extern int
201__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq); 201__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
202extern void 202extern void
203__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq); 203__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
204
205/*
206 * Don't call this unless you are a default clocksource
207 * (AKA: jiffies) and absolutely have to.
208 */
209static inline int __clocksource_register(struct clocksource *cs)
210{
211 return __clocksource_register_scale(cs, 1, 0);
212}
204 213
205static inline int clocksource_register_hz(struct clocksource *cs, u32 hz) 214static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
206{ 215{
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
212 return __clocksource_register_scale(cs, 1000, khz); 221 return __clocksource_register_scale(cs, 1000, khz);
213} 222}
214 223
215static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz) 224static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
216{ 225{
217 __clocksource_updatefreq_scale(cs, 1, hz); 226 __clocksource_update_freq_scale(cs, 1, hz);
218} 227}
219 228
220static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz) 229static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
221{ 230{
222 __clocksource_updatefreq_scale(cs, 1000, khz); 231 __clocksource_update_freq_scale(cs, 1000, khz);
223} 232}
224 233
225 234
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 05af9a334893..fb86963859c7 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
16 * @read: Read function of @clock 16 * @read: Read function of @clock
17 * @mask: Bitmask for two's complement subtraction of non 64bit clocks 17 * @mask: Bitmask for two's complement subtraction of non 64bit clocks
18 * @cycle_last: @clock cycle value at last update 18 * @cycle_last: @clock cycle value at last update
19 * @mult: NTP adjusted multiplier for scaled math conversion 19 * @mult: (NTP adjusted) multiplier for scaled math conversion
20 * @shift: Shift value for scaled math conversion 20 * @shift: Shift value for scaled math conversion
21 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout 21 * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
22 * @base_mono: ktime_t (nanoseconds) base time for readout 22 * @base: ktime_t (nanoseconds) base time for readout
23 * 23 *
24 * This struct has size 56 byte on 64 bit. Together with a seqcount it 24 * This struct has size 56 byte on 64 bit. Together with a seqcount it
25 * occupies a single 64byte cache line. 25 * occupies a single 64byte cache line.
26 * 26 *
27 * The struct is separate from struct timekeeper as it is also used 27 * The struct is separate from struct timekeeper as it is also used
28 * for a fast NMI safe accessor to clock monotonic. 28 * for a fast NMI safe accessors.
29 */ 29 */
30struct tk_read_base { 30struct tk_read_base {
31 struct clocksource *clock; 31 struct clocksource *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
35 u32 mult; 35 u32 mult;
36 u32 shift; 36 u32 shift;
37 u64 xtime_nsec; 37 u64 xtime_nsec;
38 ktime_t base_mono; 38 ktime_t base;
39}; 39};
40 40
41/** 41/**
42 * struct timekeeper - Structure holding internal timekeeping values. 42 * struct timekeeper - Structure holding internal timekeeping values.
43 * @tkr: The readout base structure 43 * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
44 * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
44 * @xtime_sec: Current CLOCK_REALTIME time in seconds 45 * @xtime_sec: Current CLOCK_REALTIME time in seconds
45 * @ktime_sec: Current CLOCK_MONOTONIC time in seconds 46 * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
46 * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset 47 * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
48 * @offs_boot: Offset clock monotonic -> clock boottime 49 * @offs_boot: Offset clock monotonic -> clock boottime
49 * @offs_tai: Offset clock monotonic -> clock tai 50 * @offs_tai: Offset clock monotonic -> clock tai
50 * @tai_offset: The current UTC to TAI offset in seconds 51 * @tai_offset: The current UTC to TAI offset in seconds
51 * @base_raw: Monotonic raw base time in ktime_t format
52 * @raw_time: Monotonic raw base time in timespec64 format 52 * @raw_time: Monotonic raw base time in timespec64 format
53 * @cycle_interval: Number of clock cycles in one NTP interval 53 * @cycle_interval: Number of clock cycles in one NTP interval
54 * @xtime_interval: Number of clock shifted nano seconds in one NTP 54 * @xtime_interval: Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
76 * used instead. 76 * used instead.
77 */ 77 */
78struct timekeeper { 78struct timekeeper {
79 struct tk_read_base tkr; 79 struct tk_read_base tkr_mono;
80 struct tk_read_base tkr_raw;
80 u64 xtime_sec; 81 u64 xtime_sec;
81 unsigned long ktime_sec; 82 unsigned long ktime_sec;
82 struct timespec64 wall_to_monotonic; 83 struct timespec64 wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
84 ktime_t offs_boot; 85 ktime_t offs_boot;
85 ktime_t offs_tai; 86 ktime_t offs_tai;
86 s32 tai_offset; 87 s32 tai_offset;
87 ktime_t base_raw;
88 struct timespec64 raw_time; 88 struct timespec64 raw_time;
89 89
90 /* The following members are for timekeeping internal use */ 90 /* The following members are for timekeeping internal use */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 3eaae4754275..5047b83483d6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
214 return ktime_to_ns(ktime_get_boottime()); 214 return ktime_to_ns(ktime_get_boottime());
215} 215}
216 216
217static inline u64 ktime_get_tai_ns(void)
218{
219 return ktime_to_ns(ktime_get_clocktai());
220}
221
217static inline u64 ktime_get_raw_ns(void) 222static inline u64 ktime_get_raw_ns(void)
218{ 223{
219 return ktime_to_ns(ktime_get_raw()); 224 return ktime_to_ns(ktime_get_raw());
220} 225}
221 226
222extern u64 ktime_get_mono_fast_ns(void); 227extern u64 ktime_get_mono_fast_ns(void);
228extern u64 ktime_get_raw_fast_ns(void);
223 229
224/* 230/*
225 * Timespec interfaces utilizing the ktime based ones 231 * Timespec interfaces utilizing the ktime based ones
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 55449909f114..489642b08d64 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
94} 94}
95EXPORT_SYMBOL_GPL(clockevent_delta2ns); 95EXPORT_SYMBOL_GPL(clockevent_delta2ns);
96 96
97static int __clockevents_set_mode(struct clock_event_device *dev,
98 enum clock_event_mode mode)
99{
100 /* Transition with legacy set_mode() callback */
101 if (dev->set_mode) {
102 /* Legacy callback doesn't support new modes */
103 if (mode > CLOCK_EVT_MODE_RESUME)
104 return -ENOSYS;
105 dev->set_mode(mode, dev);
106 return 0;
107 }
108
109 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
110 return 0;
111
112 /* Transition with new mode-specific callbacks */
113 switch (mode) {
114 case CLOCK_EVT_MODE_UNUSED:
115 /*
116 * This is an internal state, which is guaranteed to go from
117 * SHUTDOWN to UNUSED. No driver interaction required.
118 */
119 return 0;
120
121 case CLOCK_EVT_MODE_SHUTDOWN:
122 return dev->set_mode_shutdown(dev);
123
124 case CLOCK_EVT_MODE_PERIODIC:
125 /* Core internal bug */
126 if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
127 return -ENOSYS;
128 return dev->set_mode_periodic(dev);
129
130 case CLOCK_EVT_MODE_ONESHOT:
131 /* Core internal bug */
132 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
133 return -ENOSYS;
134 return dev->set_mode_oneshot(dev);
135
136 case CLOCK_EVT_MODE_RESUME:
137 /* Optional callback */
138 if (dev->set_mode_resume)
139 return dev->set_mode_resume(dev);
140 else
141 return 0;
142
143 default:
144 return -ENOSYS;
145 }
146}
147
97/** 148/**
98 * clockevents_set_mode - set the operating mode of a clock event device 149 * clockevents_set_mode - set the operating mode of a clock event device
99 * @dev: device to modify 150 * @dev: device to modify
@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
105 enum clock_event_mode mode) 156 enum clock_event_mode mode)
106{ 157{
107 if (dev->mode != mode) { 158 if (dev->mode != mode) {
108 dev->set_mode(mode, dev); 159 if (__clockevents_set_mode(dev, mode))
160 return;
161
109 dev->mode = mode; 162 dev->mode = mode;
110 163
111 /* 164 /*
@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
373} 426}
374EXPORT_SYMBOL_GPL(clockevents_unbind); 427EXPORT_SYMBOL_GPL(clockevents_unbind);
375 428
429/* Sanity check of mode transition callbacks */
430static int clockevents_sanity_check(struct clock_event_device *dev)
431{
432 /* Legacy set_mode() callback */
433 if (dev->set_mode) {
434 /* We shouldn't be supporting new modes now */
435 WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
436 dev->set_mode_shutdown || dev->set_mode_resume);
437 return 0;
438 }
439
440 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
441 return 0;
442
443 /* New mode-specific callbacks */
444 if (!dev->set_mode_shutdown)
445 return -EINVAL;
446
447 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
448 !dev->set_mode_periodic)
449 return -EINVAL;
450
451 if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
452 !dev->set_mode_oneshot)
453 return -EINVAL;
454
455 return 0;
456}
457
376/** 458/**
377 * clockevents_register_device - register a clock event device 459 * clockevents_register_device - register a clock event device
378 * @dev: device to register 460 * @dev: device to register
@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
382 unsigned long flags; 464 unsigned long flags;
383 465
384 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); 466 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
467 BUG_ON(clockevents_sanity_check(dev));
468
385 if (!dev->cpumask) { 469 if (!dev->cpumask) {
386 WARN_ON(num_possible_cpus() > 1); 470 WARN_ON(num_possible_cpus() > 1);
387 dev->cpumask = cpumask_of(smp_processor_id()); 471 dev->cpumask = cpumask_of(smp_processor_id());
@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
449 return clockevents_program_event(dev, dev->next_event, false); 533 return clockevents_program_event(dev, dev->next_event, false);
450 534
451 if (dev->mode == CLOCK_EVT_MODE_PERIODIC) 535 if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
452 dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); 536 return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
453 537
454 return 0; 538 return 0;
455} 539}
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352f0e49..c3be3c71bbad 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
142 schedule_work(&watchdog_work); 142 schedule_work(&watchdog_work);
143} 143}
144 144
145static void clocksource_unstable(struct clocksource *cs, int64_t delta)
146{
147 printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
148 cs->name, delta);
149 __clocksource_unstable(cs);
150}
151
152/** 145/**
153 * clocksource_mark_unstable - mark clocksource unstable via watchdog 146 * clocksource_mark_unstable - mark clocksource unstable via watchdog
154 * @cs: clocksource to be marked unstable 147 * @cs: clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
174static void clocksource_watchdog(unsigned long data) 167static void clocksource_watchdog(unsigned long data)
175{ 168{
176 struct clocksource *cs; 169 struct clocksource *cs;
177 cycle_t csnow, wdnow, delta; 170 cycle_t csnow, wdnow, cslast, wdlast, delta;
178 int64_t wd_nsec, cs_nsec; 171 int64_t wd_nsec, cs_nsec;
179 int next_cpu, reset_pending; 172 int next_cpu, reset_pending;
180 173
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
213 206
214 delta = clocksource_delta(csnow, cs->cs_last, cs->mask); 207 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
215 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); 208 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
209 wdlast = cs->wd_last; /* save these in case we print them */
210 cslast = cs->cs_last;
216 cs->cs_last = csnow; 211 cs->cs_last = csnow;
217 cs->wd_last = wdnow; 212 cs->wd_last = wdnow;
218 213
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
221 216
222 /* Check the deviation from the watchdog clocksource. */ 217 /* Check the deviation from the watchdog clocksource. */
223 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 218 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
224 clocksource_unstable(cs, cs_nsec - wd_nsec); 219 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
220 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
221 watchdog->name, wdnow, wdlast, watchdog->mask);
222 pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
223 cs->name, csnow, cslast, cs->mask);
224 __clocksource_unstable(cs);
225 continue; 225 continue;
226 } 226 }
227 227
@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
469 * @shift: cycle to nanosecond divisor (power of two) 469 * @shift: cycle to nanosecond divisor (power of two)
470 * @maxadj: maximum adjustment value to mult (~11%) 470 * @maxadj: maximum adjustment value to mult (~11%)
471 * @mask: bitmask for two's complement subtraction of non 64 bit counters 471 * @mask: bitmask for two's complement subtraction of non 64 bit counters
472 * @max_cyc: maximum cycle value before potential overflow (does not include
473 * any safety margin)
474 *
475 * NOTE: This function includes a safety margin of 50%, so that bad clock values
476 * can be detected.
472 */ 477 */
473u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) 478u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
474{ 479{
475 u64 max_nsecs, max_cycles; 480 u64 max_nsecs, max_cycles;
476 481
477 /* 482 /*
478 * Calculate the maximum number of cycles that we can pass to the 483 * Calculate the maximum number of cycles that we can pass to the
479 * cyc2ns function without overflowing a 64-bit signed result. The 484 * cyc2ns() function without overflowing a 64-bit result.
480 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
481 * which is equivalent to the below.
482 * max_cycles < (2^63)/(mult + maxadj)
483 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
484 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
485 * max_cycles < 2^(63 - log2(mult + maxadj))
486 * max_cycles < 1 << (63 - log2(mult + maxadj))
487 * Please note that we add 1 to the result of the log2 to account for
488 * any rounding errors, ensure the above inequality is satisfied and
489 * no overflow will occur.
490 */ 485 */
491 max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); 486 max_cycles = ULLONG_MAX;
487 do_div(max_cycles, mult+maxadj);
492 488
493 /* 489 /*
494 * The actual maximum number of cycles we can defer the clocksource is 490 * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
499 max_cycles = min(max_cycles, mask); 495 max_cycles = min(max_cycles, mask);
500 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); 496 max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
501 497
498 /* return the max_cycles value as well if requested */
499 if (max_cyc)
500 *max_cyc = max_cycles;
501
502 /* Return 50% of the actual maximum, so we can detect bad values */
503 max_nsecs >>= 1;
504
502 return max_nsecs; 505 return max_nsecs;
503} 506}
504 507
505/** 508/**
506 * clocksource_max_deferment - Returns max time the clocksource can be deferred 509 * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
507 * @cs: Pointer to clocksource 510 * @cs: Pointer to clocksource to be updated
508 * 511 *
509 */ 512 */
510static u64 clocksource_max_deferment(struct clocksource *cs) 513static inline void clocksource_update_max_deferment(struct clocksource *cs)
511{ 514{
512 u64 max_nsecs; 515 cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
513 516 cs->maxadj, cs->mask,
514 max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, 517 &cs->max_cycles);
515 cs->mask);
516 /*
517 * To ensure that the clocksource does not wrap whilst we are idle,
518 * limit the time the clocksource can be deferred by 12.5%. Please
519 * note a margin of 12.5% is used because this can be computed with
520 * a shift, versus say 10% which would require division.
521 */
522 return max_nsecs - (max_nsecs >> 3);
523} 518}
524 519
525#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET 520#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
648} 643}
649 644
650/** 645/**
651 * __clocksource_updatefreq_scale - Used update clocksource with new freq 646 * __clocksource_update_freq_scale - Used update clocksource with new freq
652 * @cs: clocksource to be registered 647 * @cs: clocksource to be registered
653 * @scale: Scale factor multiplied against freq to get clocksource hz 648 * @scale: Scale factor multiplied against freq to get clocksource hz
654 * @freq: clocksource frequency (cycles per second) divided by scale 649 * @freq: clocksource frequency (cycles per second) divided by scale
@@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
656 * This should only be called from the clocksource->enable() method. 651 * This should only be called from the clocksource->enable() method.
657 * 652 *
658 * This *SHOULD NOT* be called directly! Please use the 653 * This *SHOULD NOT* be called directly! Please use the
659 * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. 654 * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
655 * functions.
660 */ 656 */
661void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) 657void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
662{ 658{
663 u64 sec; 659 u64 sec;
660
664 /* 661 /*
665 * Calc the maximum number of seconds which we can run before 662 * Default clocksources are *special* and self-define their mult/shift.
666 * wrapping around. For clocksources which have a mask > 32bit 663 * But, you're not special, so you should specify a freq value.
667 * we need to limit the max sleep time to have a good
668 * conversion precision. 10 minutes is still a reasonable
669 * amount. That results in a shift value of 24 for a
670 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
671 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
672 * margin as we do in clocksource_max_deferment()
673 */ 664 */
674 sec = (cs->mask - (cs->mask >> 3)); 665 if (freq) {
675 do_div(sec, freq); 666 /*
676 do_div(sec, scale); 667 * Calc the maximum number of seconds which we can run before
677 if (!sec) 668 * wrapping around. For clocksources which have a mask > 32-bit
678 sec = 1; 669 * we need to limit the max sleep time to have a good
679 else if (sec > 600 && cs->mask > UINT_MAX) 670 * conversion precision. 10 minutes is still a reasonable
680 sec = 600; 671 * amount. That results in a shift value of 24 for a
681 672 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
682 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, 673 * ~ 0.06ppm granularity for NTP.
683 NSEC_PER_SEC / scale, sec * scale); 674 */
684 675 sec = cs->mask;
676 do_div(sec, freq);
677 do_div(sec, scale);
678 if (!sec)
679 sec = 1;
680 else if (sec > 600 && cs->mask > UINT_MAX)
681 sec = 600;
682
683 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
684 NSEC_PER_SEC / scale, sec * scale);
685 }
685 /* 686 /*
686 * for clocksources that have large mults, to avoid overflow. 687 * Ensure clocksources that have large 'mult' values don't overflow
687 * Since mult may be adjusted by ntp, add an safety extra margin 688 * when adjusted.
688 *
689 */ 689 */
690 cs->maxadj = clocksource_max_adjustment(cs); 690 cs->maxadj = clocksource_max_adjustment(cs);
691 while ((cs->mult + cs->maxadj < cs->mult) 691 while (freq && ((cs->mult + cs->maxadj < cs->mult)
692 || (cs->mult - cs->maxadj > cs->mult)) { 692 || (cs->mult - cs->maxadj > cs->mult))) {
693 cs->mult >>= 1; 693 cs->mult >>= 1;
694 cs->shift--; 694 cs->shift--;
695 cs->maxadj = clocksource_max_adjustment(cs); 695 cs->maxadj = clocksource_max_adjustment(cs);
696 } 696 }
697 697
698 cs->max_idle_ns = clocksource_max_deferment(cs); 698 /*
699 * Only warn for *special* clocksources that self-define
700 * their mult/shift values and don't specify a freq.
701 */
702 WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
703 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
704 cs->name);
705
706 clocksource_update_max_deferment(cs);
707
708 pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
709 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
699} 710}
700EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); 711EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
701 712
702/** 713/**
703 * __clocksource_register_scale - Used to install new clocksources 714 * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
714{ 725{
715 726
716 /* Initialize mult/shift and max_idle_ns */ 727 /* Initialize mult/shift and max_idle_ns */
717 __clocksource_updatefreq_scale(cs, scale, freq); 728 __clocksource_update_freq_scale(cs, scale, freq);
718 729
719 /* Add clocksource to the clocksource list */ 730 /* Add clocksource to the clocksource list */
720 mutex_lock(&clocksource_mutex); 731 mutex_lock(&clocksource_mutex);
@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
726} 737}
727EXPORT_SYMBOL_GPL(__clocksource_register_scale); 738EXPORT_SYMBOL_GPL(__clocksource_register_scale);
728 739
729
730/**
731 * clocksource_register - Used to install new clocksources
732 * @cs: clocksource to be registered
733 *
734 * Returns -EBUSY if registration fails, zero otherwise.
735 */
736int clocksource_register(struct clocksource *cs)
737{
738 /* calculate max adjustment for given mult/shift */
739 cs->maxadj = clocksource_max_adjustment(cs);
740 WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
741 "Clocksource %s might overflow on 11%% adjustment\n",
742 cs->name);
743
744 /* calculate max idle time permitted for this clocksource */
745 cs->max_idle_ns = clocksource_max_deferment(cs);
746
747 mutex_lock(&clocksource_mutex);
748 clocksource_enqueue(cs);
749 clocksource_enqueue_watchdog(cs);
750 clocksource_select();
751 mutex_unlock(&clocksource_mutex);
752 return 0;
753}
754EXPORT_SYMBOL(clocksource_register);
755
756static void __clocksource_change_rating(struct clocksource *cs, int rating) 740static void __clocksource_change_rating(struct clocksource *cs, int rating)
757{ 741{
758 list_del(&cs->list); 742 list_del(&cs->list);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf53e86d..c4bb518725b5 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
71 .mask = 0xffffffff, /*32bits*/ 71 .mask = 0xffffffff, /*32bits*/
72 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ 72 .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
73 .shift = JIFFIES_SHIFT, 73 .shift = JIFFIES_SHIFT,
74 .max_cycles = 10,
74}; 75};
75 76
76__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); 77__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
94 95
95static int __init init_jiffies_clocksource(void) 96static int __init init_jiffies_clocksource(void)
96{ 97{
97 return clocksource_register(&clocksource_jiffies); 98 return __clocksource_register(&clocksource_jiffies);
98} 99}
99 100
100core_initcall(init_jiffies_clocksource); 101core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
130 131
131 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; 132 refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
132 133
133 clocksource_register(&refined_jiffies); 134 __clocksource_register(&refined_jiffies);
134 return 0; 135 return 0;
135} 136}
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..a26036d37a38 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * sched_clock.c: support for extending counters to full 64-bit ns counter 2 * sched_clock.c: Generic sched_clock() support, to extend low level
3 * hardware time counters to full 64-bit ns values.
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
18#include <linux/seqlock.h> 19#include <linux/seqlock.h>
19#include <linux/bitops.h> 20#include <linux/bitops.h>
20 21
21struct clock_data { 22/**
22 ktime_t wrap_kt; 23 * struct clock_read_data - data required to read from sched_clock()
24 *
25 * @epoch_ns: sched_clock() value at last update
26 * @epoch_cyc: Clock cycle value at last update.
27 * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
28 * clocks.
29 * @read_sched_clock: Current clock source (or dummy source when suspended).
30 * @mult: Multipler for scaled math conversion.
31 * @shift: Shift value for scaled math conversion.
32 *
33 * Care must be taken when updating this structure; it is read by
34 * some very hot code paths. It occupies <=40 bytes and, when combined
35 * with the seqcount used to synchronize access, comfortably fits into
36 * a 64 byte cache line.
37 */
38struct clock_read_data {
23 u64 epoch_ns; 39 u64 epoch_ns;
24 u64 epoch_cyc; 40 u64 epoch_cyc;
25 seqcount_t seq; 41 u64 sched_clock_mask;
26 unsigned long rate; 42 u64 (*read_sched_clock)(void);
27 u32 mult; 43 u32 mult;
28 u32 shift; 44 u32 shift;
29 bool suspended; 45};
46
47/**
48 * struct clock_data - all data needed for sched_clock() (including
49 * registration of a new clock source)
50 *
51 * @seq: Sequence counter for protecting updates. The lowest
52 * bit is the index for @read_data.
53 * @read_data: Data required to read from sched_clock.
54 * @wrap_kt: Duration for which clock can run before wrapping.
55 * @rate: Tick rate of the registered clock.
56 * @actual_read_sched_clock: Registered hardware level clock read function.
57 *
58 * The ordering of this structure has been chosen to optimize cache
59 * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
60 * into a single 64-byte cache line.
61 */
62struct clock_data {
63 seqcount_t seq;
64 struct clock_read_data read_data[2];
65 ktime_t wrap_kt;
66 unsigned long rate;
67
68 u64 (*actual_read_sched_clock)(void);
30}; 69};
31 70
32static struct hrtimer sched_clock_timer; 71static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
34 73
35core_param(irqtime, irqtime, int, 0400); 74core_param(irqtime, irqtime, int, 0400);
36 75
37static struct clock_data cd = {
38 .mult = NSEC_PER_SEC / HZ,
39};
40
41static u64 __read_mostly sched_clock_mask;
42
43static u64 notrace jiffy_sched_clock_read(void) 76static u64 notrace jiffy_sched_clock_read(void)
44{ 77{
45 /* 78 /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
49 return (u64)(jiffies - INITIAL_JIFFIES); 82 return (u64)(jiffies - INITIAL_JIFFIES);
50} 83}
51 84
52static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; 85static struct clock_data cd ____cacheline_aligned = {
86 .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
87 .read_sched_clock = jiffy_sched_clock_read, },
88 .actual_read_sched_clock = jiffy_sched_clock_read,
89};
53 90
54static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) 91static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
55{ 92{
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
58 95
59unsigned long long notrace sched_clock(void) 96unsigned long long notrace sched_clock(void)
60{ 97{
61 u64 epoch_ns; 98 u64 cyc, res;
62 u64 epoch_cyc;
63 u64 cyc;
64 unsigned long seq; 99 unsigned long seq;
65 100 struct clock_read_data *rd;
66 if (cd.suspended)
67 return cd.epoch_ns;
68 101
69 do { 102 do {
70 seq = raw_read_seqcount_begin(&cd.seq); 103 seq = raw_read_seqcount(&cd.seq);
71 epoch_cyc = cd.epoch_cyc; 104 rd = cd.read_data + (seq & 1);
72 epoch_ns = cd.epoch_ns; 105
106 cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
107 rd->sched_clock_mask;
108 res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
73 } while (read_seqcount_retry(&cd.seq, seq)); 109 } while (read_seqcount_retry(&cd.seq, seq));
74 110
75 cyc = read_sched_clock(); 111 return res;
76 cyc = (cyc - epoch_cyc) & sched_clock_mask; 112}
77 return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); 113
114/*
115 * Updating the data required to read the clock.
116 *
117 * sched_clock() will never observe mis-matched data even if called from
118 * an NMI. We do this by maintaining an odd/even copy of the data and
119 * steering sched_clock() to one or the other using a sequence counter.
120 * In order to preserve the data cache profile of sched_clock() as much
121 * as possible the system reverts back to the even copy when the update
122 * completes; the odd copy is used *only* during an update.
123 */
124static void update_clock_read_data(struct clock_read_data *rd)
125{
126 /* update the backup (odd) copy with the new data */
127 cd.read_data[1] = *rd;
128
129 /* steer readers towards the odd copy */
130 raw_write_seqcount_latch(&cd.seq);
131
132 /* now its safe for us to update the normal (even) copy */
133 cd.read_data[0] = *rd;
134
135 /* switch readers back to the even copy */
136 raw_write_seqcount_latch(&cd.seq);
78} 137}
79 138
80/* 139/*
81 * Atomically update the sched_clock epoch. 140 * Atomically update the sched_clock() epoch.
82 */ 141 */
83static void notrace update_sched_clock(void) 142static void update_sched_clock(void)
84{ 143{
85 unsigned long flags;
86 u64 cyc; 144 u64 cyc;
87 u64 ns; 145 u64 ns;
146 struct clock_read_data rd;
147
148 rd = cd.read_data[0];
149
150 cyc = cd.actual_read_sched_clock();
151 ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
152
153 rd.epoch_ns = ns;
154 rd.epoch_cyc = cyc;
88 155
89 cyc = read_sched_clock(); 156 update_clock_read_data(&rd);
90 ns = cd.epoch_ns +
91 cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
92 cd.mult, cd.shift);
93
94 raw_local_irq_save(flags);
95 raw_write_seqcount_begin(&cd.seq);
96 cd.epoch_ns = ns;
97 cd.epoch_cyc = cyc;
98 raw_write_seqcount_end(&cd.seq);
99 raw_local_irq_restore(flags);
100} 157}
101 158
102static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) 159static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
103{ 160{
104 update_sched_clock(); 161 update_sched_clock();
105 hrtimer_forward_now(hrt, cd.wrap_kt); 162 hrtimer_forward_now(hrt, cd.wrap_kt);
163
106 return HRTIMER_RESTART; 164 return HRTIMER_RESTART;
107} 165}
108 166
109void __init sched_clock_register(u64 (*read)(void), int bits, 167void __init
110 unsigned long rate) 168sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
111{ 169{
112 u64 res, wrap, new_mask, new_epoch, cyc, ns; 170 u64 res, wrap, new_mask, new_epoch, cyc, ns;
113 u32 new_mult, new_shift; 171 u32 new_mult, new_shift;
114 ktime_t new_wrap_kt;
115 unsigned long r; 172 unsigned long r;
116 char r_unit; 173 char r_unit;
174 struct clock_read_data rd;
117 175
118 if (cd.rate > rate) 176 if (cd.rate > rate)
119 return; 177 return;
120 178
121 WARN_ON(!irqs_disabled()); 179 WARN_ON(!irqs_disabled());
122 180
123 /* calculate the mult/shift to convert counter ticks to ns. */ 181 /* Calculate the mult/shift to convert counter ticks to ns. */
124 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); 182 clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
125 183
126 new_mask = CLOCKSOURCE_MASK(bits); 184 new_mask = CLOCKSOURCE_MASK(bits);
185 cd.rate = rate;
186
187 /* Calculate how many nanosecs until we risk wrapping */
188 wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
189 cd.wrap_kt = ns_to_ktime(wrap);
127 190
128 /* calculate how many ns until we wrap */ 191 rd = cd.read_data[0];
129 wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
130 new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
131 192
132 /* update epoch for new counter and update epoch_ns from old counter*/ 193 /* Update epoch for new counter and update 'epoch_ns' from old counter*/
133 new_epoch = read(); 194 new_epoch = read();
134 cyc = read_sched_clock(); 195 cyc = cd.actual_read_sched_clock();
135 ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, 196 ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
136 cd.mult, cd.shift); 197 cd.actual_read_sched_clock = read;
137 198
138 raw_write_seqcount_begin(&cd.seq); 199 rd.read_sched_clock = read;
139 read_sched_clock = read; 200 rd.sched_clock_mask = new_mask;
140 sched_clock_mask = new_mask; 201 rd.mult = new_mult;
141 cd.rate = rate; 202 rd.shift = new_shift;
142 cd.wrap_kt = new_wrap_kt; 203 rd.epoch_cyc = new_epoch;
143 cd.mult = new_mult; 204 rd.epoch_ns = ns;
144 cd.shift = new_shift; 205
145 cd.epoch_cyc = new_epoch; 206 update_clock_read_data(&rd);
146 cd.epoch_ns = ns;
147 raw_write_seqcount_end(&cd.seq);
148 207
149 r = rate; 208 r = rate;
150 if (r >= 4000000) { 209 if (r >= 4000000) {
151 r /= 1000000; 210 r /= 1000000;
152 r_unit = 'M'; 211 r_unit = 'M';
153 } else if (r >= 1000) { 212 } else {
154 r /= 1000; 213 if (r >= 1000) {
155 r_unit = 'k'; 214 r /= 1000;
156 } else 215 r_unit = 'k';
157 r_unit = ' '; 216 } else {
158 217 r_unit = ' ';
159 /* calculate the ns resolution of this counter */ 218 }
219 }
220
221 /* Calculate the ns resolution of this counter */
160 res = cyc_to_ns(1ULL, new_mult, new_shift); 222 res = cyc_to_ns(1ULL, new_mult, new_shift);
161 223
162 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", 224 pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
163 bits, r, r_unit, res, wrap); 225 bits, r, r_unit, res, wrap);
164 226
165 /* Enable IRQ time accounting if we have a fast enough sched_clock */ 227 /* Enable IRQ time accounting if we have a fast enough sched_clock() */
166 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) 228 if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
167 enable_sched_clock_irqtime(); 229 enable_sched_clock_irqtime();
168 230
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
172void __init sched_clock_postinit(void) 234void __init sched_clock_postinit(void)
173{ 235{
174 /* 236 /*
175 * If no sched_clock function has been provided at that point, 237 * If no sched_clock() function has been provided at that point,
176 * make it the final one one. 238 * make it the final one one.
177 */ 239 */
178 if (read_sched_clock == jiffy_sched_clock_read) 240 if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
179 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); 241 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
180 242
181 update_sched_clock(); 243 update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
189 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 251 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
190} 252}
191 253
254/*
255 * Clock read function for use when the clock is suspended.
256 *
257 * This function makes it appear to sched_clock() as if the clock
258 * stopped counting at its last update.
259 *
260 * This function must only be called from the critical
261 * section in sched_clock(). It relies on the read_seqcount_retry()
262 * at the end of the critical section to be sure we observe the
263 * correct copy of 'epoch_cyc'.
264 */
265static u64 notrace suspended_sched_clock_read(void)
266{
267 unsigned long seq = raw_read_seqcount(&cd.seq);
268
269 return cd.read_data[seq & 1].epoch_cyc;
270}
271
192static int sched_clock_suspend(void) 272static int sched_clock_suspend(void)
193{ 273{
274 struct clock_read_data *rd = &cd.read_data[0];
275
194 update_sched_clock(); 276 update_sched_clock();
195 hrtimer_cancel(&sched_clock_timer); 277 hrtimer_cancel(&sched_clock_timer);
196 cd.suspended = true; 278 rd->read_sched_clock = suspended_sched_clock_read;
279
197 return 0; 280 return 0;
198} 281}
199 282
200static void sched_clock_resume(void) 283static void sched_clock_resume(void)
201{ 284{
202 cd.epoch_cyc = read_sched_clock(); 285 struct clock_read_data *rd = &cd.read_data[0];
286
287 rd->epoch_cyc = cd.actual_read_sched_clock();
203 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); 288 hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
204 cd.suspended = false; 289 rd->read_sched_clock = cd.actual_read_sched_clock;
205} 290}
206 291
207static struct syscore_ops sched_clock_ops = { 292static struct syscore_ops sched_clock_ops = {
208 .suspend = sched_clock_suspend, 293 .suspend = sched_clock_suspend,
209 .resume = sched_clock_resume, 294 .resume = sched_clock_resume,
210}; 295};
211 296
212static int __init sched_clock_syscore_init(void) 297static int __init sched_clock_syscore_init(void)
213{ 298{
214 register_syscore_ops(&sched_clock_ops); 299 register_syscore_ops(&sched_clock_ops);
300
215 return 0; 301 return 0;
216} 302}
217device_initcall(sched_clock_syscore_init); 303device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db94136c10..c3fcff06d30a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
59}; 59};
60 60
61static struct tk_fast tk_fast_mono ____cacheline_aligned; 61static struct tk_fast tk_fast_mono ____cacheline_aligned;
62static struct tk_fast tk_fast_raw ____cacheline_aligned;
62 63
63/* flag for if timekeeping is suspended */ 64/* flag for if timekeeping is suspended */
64int __read_mostly timekeeping_suspended; 65int __read_mostly timekeeping_suspended;
@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
68 69
69static inline void tk_normalize_xtime(struct timekeeper *tk) 70static inline void tk_normalize_xtime(struct timekeeper *tk)
70{ 71{
71 while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { 72 while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
72 tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; 73 tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
73 tk->xtime_sec++; 74 tk->xtime_sec++;
74 } 75 }
75} 76}
@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
79 struct timespec64 ts; 80 struct timespec64 ts;
80 81
81 ts.tv_sec = tk->xtime_sec; 82 ts.tv_sec = tk->xtime_sec;
82 ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); 83 ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
83 return ts; 84 return ts;
84} 85}
85 86
86static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) 87static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
87{ 88{
88 tk->xtime_sec = ts->tv_sec; 89 tk->xtime_sec = ts->tv_sec;
89 tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; 90 tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
90} 91}
91 92
92static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) 93static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
93{ 94{
94 tk->xtime_sec += ts->tv_sec; 95 tk->xtime_sec += ts->tv_sec;
95 tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; 96 tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
96 tk_normalize_xtime(tk); 97 tk_normalize_xtime(tk);
97} 98}
98 99
@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
118 tk->offs_boot = ktime_add(tk->offs_boot, delta); 119 tk->offs_boot = ktime_add(tk->offs_boot, delta);
119} 120}
120 121
122#ifdef CONFIG_DEBUG_TIMEKEEPING
123#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
124/*
125 * These simple flag variables are managed
126 * without locks, which is racy, but ok since
127 * we don't really care about being super
128 * precise about how many events were seen,
129 * just that a problem was observed.
130 */
131static int timekeeping_underflow_seen;
132static int timekeeping_overflow_seen;
133
134/* last_warning is only modified under the timekeeping lock */
135static long timekeeping_last_warning;
136
137static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
138{
139
140 cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
141 const char *name = tk->tkr_mono.clock->name;
142
143 if (offset > max_cycles) {
144 printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
145 offset, name, max_cycles);
146 printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
147 } else {
148 if (offset > (max_cycles >> 1)) {
149 printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
150 offset, name, max_cycles >> 1);
151 printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
152 }
153 }
154
155 if (timekeeping_underflow_seen) {
156 if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
157 printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
158 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
159 printk_deferred(" Your kernel is probably still fine.\n");
160 timekeeping_last_warning = jiffies;
161 }
162 timekeeping_underflow_seen = 0;
163 }
164
165 if (timekeeping_overflow_seen) {
166 if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
167 printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
168 printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
169 printk_deferred(" Your kernel is probably still fine.\n");
170 timekeeping_last_warning = jiffies;
171 }
172 timekeeping_overflow_seen = 0;
173 }
174}
175
176static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
177{
178 cycle_t now, last, mask, max, delta;
179 unsigned int seq;
180
181 /*
182 * Since we're called holding a seqlock, the data may shift
183 * under us while we're doing the calculation. This can cause
184 * false positives, since we'd note a problem but throw the
185 * results away. So nest another seqlock here to atomically
186 * grab the points we are checking with.
187 */
188 do {
189 seq = read_seqcount_begin(&tk_core.seq);
190 now = tkr->read(tkr->clock);
191 last = tkr->cycle_last;
192 mask = tkr->mask;
193 max = tkr->clock->max_cycles;
194 } while (read_seqcount_retry(&tk_core.seq, seq));
195
196 delta = clocksource_delta(now, last, mask);
197
198 /*
199 * Try to catch underflows by checking if we are seeing small
200 * mask-relative negative values.
201 */
202 if (unlikely((~delta & mask) < (mask >> 3))) {
203 timekeeping_underflow_seen = 1;
204 delta = 0;
205 }
206
207 /* Cap delta value to the max_cycles values to avoid mult overflows */
208 if (unlikely(delta > max)) {
209 timekeeping_overflow_seen = 1;
210 delta = tkr->clock->max_cycles;
211 }
212
213 return delta;
214}
215#else
216static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
217{
218}
219static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
220{
221 cycle_t cycle_now, delta;
222
223 /* read clocksource */
224 cycle_now = tkr->read(tkr->clock);
225
226 /* calculate the delta since the last update_wall_time */
227 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
228
229 return delta;
230}
231#endif
232
121/** 233/**
122 * tk_setup_internals - Set up internals to use clocksource clock. 234 * tk_setup_internals - Set up internals to use clocksource clock.
123 * 235 *
@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
135 u64 tmp, ntpinterval; 247 u64 tmp, ntpinterval;
136 struct clocksource *old_clock; 248 struct clocksource *old_clock;
137 249
138 old_clock = tk->tkr.clock; 250 old_clock = tk->tkr_mono.clock;
139 tk->tkr.clock = clock; 251 tk->tkr_mono.clock = clock;
140 tk->tkr.read = clock->read; 252 tk->tkr_mono.read = clock->read;
141 tk->tkr.mask = clock->mask; 253 tk->tkr_mono.mask = clock->mask;
142 tk->tkr.cycle_last = tk->tkr.read(clock); 254 tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
255
256 tk->tkr_raw.clock = clock;
257 tk->tkr_raw.read = clock->read;
258 tk->tkr_raw.mask = clock->mask;
259 tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
143 260
144 /* Do the ns -> cycle conversion first, using original mult */ 261 /* Do the ns -> cycle conversion first, using original mult */
145 tmp = NTP_INTERVAL_LENGTH; 262 tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
163 if (old_clock) { 280 if (old_clock) {
164 int shift_change = clock->shift - old_clock->shift; 281 int shift_change = clock->shift - old_clock->shift;
165 if (shift_change < 0) 282 if (shift_change < 0)
166 tk->tkr.xtime_nsec >>= -shift_change; 283 tk->tkr_mono.xtime_nsec >>= -shift_change;
167 else 284 else
168 tk->tkr.xtime_nsec <<= shift_change; 285 tk->tkr_mono.xtime_nsec <<= shift_change;
169 } 286 }
170 tk->tkr.shift = clock->shift; 287 tk->tkr_raw.xtime_nsec = 0;
288
289 tk->tkr_mono.shift = clock->shift;
290 tk->tkr_raw.shift = clock->shift;
171 291
172 tk->ntp_error = 0; 292 tk->ntp_error = 0;
173 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; 293 tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
178 * active clocksource. These value will be adjusted via NTP 298 * active clocksource. These value will be adjusted via NTP
179 * to counteract clock drifting. 299 * to counteract clock drifting.
180 */ 300 */
181 tk->tkr.mult = clock->mult; 301 tk->tkr_mono.mult = clock->mult;
302 tk->tkr_raw.mult = clock->mult;
182 tk->ntp_err_mult = 0; 303 tk->ntp_err_mult = 0;
183} 304}
184 305
@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
193 314
194static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) 315static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
195{ 316{
196 cycle_t cycle_now, delta; 317 cycle_t delta;
197 s64 nsec; 318 s64 nsec;
198 319
199 /* read clocksource: */ 320 delta = timekeeping_get_delta(tkr);
200 cycle_now = tkr->read(tkr->clock);
201
202 /* calculate the delta since the last update_wall_time: */
203 delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
204 321
205 nsec = delta * tkr->mult + tkr->xtime_nsec; 322 nsec = delta * tkr->mult + tkr->xtime_nsec;
206 nsec >>= tkr->shift; 323 nsec >>= tkr->shift;
@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
209 return nsec + arch_gettimeoffset(); 326 return nsec + arch_gettimeoffset();
210} 327}
211 328
212static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
213{
214 struct clocksource *clock = tk->tkr.clock;
215 cycle_t cycle_now, delta;
216 s64 nsec;
217
218 /* read clocksource: */
219 cycle_now = tk->tkr.read(clock);
220
221 /* calculate the delta since the last update_wall_time: */
222 delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
223
224 /* convert delta to nanoseconds. */
225 nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
226
227 /* If arch requires, add in get_arch_timeoffset() */
228 return nsec + arch_gettimeoffset();
229}
230
231/** 329/**
232 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. 330 * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
233 * @tkr: Timekeeping readout base from which we take the update 331 * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
267 * slightly wrong timestamp (a few nanoseconds). See 365 * slightly wrong timestamp (a few nanoseconds). See
268 * @ktime_get_mono_fast_ns. 366 * @ktime_get_mono_fast_ns.
269 */ 367 */
270static void update_fast_timekeeper(struct tk_read_base *tkr) 368static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
271{ 369{
272 struct tk_read_base *base = tk_fast_mono.base; 370 struct tk_read_base *base = tkf->base;
273 371
274 /* Force readers off to base[1] */ 372 /* Force readers off to base[1] */
275 raw_write_seqcount_latch(&tk_fast_mono.seq); 373 raw_write_seqcount_latch(&tkf->seq);
276 374
277 /* Update base[0] */ 375 /* Update base[0] */
278 memcpy(base, tkr, sizeof(*base)); 376 memcpy(base, tkr, sizeof(*base));
279 377
280 /* Force readers back to base[0] */ 378 /* Force readers back to base[0] */
281 raw_write_seqcount_latch(&tk_fast_mono.seq); 379 raw_write_seqcount_latch(&tkf->seq);
282 380
283 /* Update base[1] */ 381 /* Update base[1] */
284 memcpy(base + 1, base, sizeof(*base)); 382 memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
316 * of the following timestamps. Callers need to be aware of that and 414 * of the following timestamps. Callers need to be aware of that and
317 * deal with it. 415 * deal with it.
318 */ 416 */
319u64 notrace ktime_get_mono_fast_ns(void) 417static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
320{ 418{
321 struct tk_read_base *tkr; 419 struct tk_read_base *tkr;
322 unsigned int seq; 420 unsigned int seq;
323 u64 now; 421 u64 now;
324 422
325 do { 423 do {
326 seq = raw_read_seqcount(&tk_fast_mono.seq); 424 seq = raw_read_seqcount(&tkf->seq);
327 tkr = tk_fast_mono.base + (seq & 0x01); 425 tkr = tkf->base + (seq & 0x01);
328 now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); 426 now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
427 } while (read_seqcount_retry(&tkf->seq, seq));
329 428
330 } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
331 return now; 429 return now;
332} 430}
431
432u64 ktime_get_mono_fast_ns(void)
433{
434 return __ktime_get_fast_ns(&tk_fast_mono);
435}
333EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); 436EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
334 437
438u64 ktime_get_raw_fast_ns(void)
439{
440 return __ktime_get_fast_ns(&tk_fast_raw);
441}
442EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
443
335/* Suspend-time cycles value for halted fast timekeeper. */ 444/* Suspend-time cycles value for halted fast timekeeper. */
336static cycle_t cycles_at_suspend; 445static cycle_t cycles_at_suspend;
337 446
@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
353static void halt_fast_timekeeper(struct timekeeper *tk) 462static void halt_fast_timekeeper(struct timekeeper *tk)
354{ 463{
355 static struct tk_read_base tkr_dummy; 464 static struct tk_read_base tkr_dummy;
356 struct tk_read_base *tkr = &tk->tkr; 465 struct tk_read_base *tkr = &tk->tkr_mono;
357 466
358 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); 467 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
359 cycles_at_suspend = tkr->read(tkr->clock); 468 cycles_at_suspend = tkr->read(tkr->clock);
360 tkr_dummy.read = dummy_clock_read; 469 tkr_dummy.read = dummy_clock_read;
361 update_fast_timekeeper(&tkr_dummy); 470 update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
471
472 tkr = &tk->tkr_raw;
473 memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
474 tkr_dummy.read = dummy_clock_read;
475 update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
362} 476}
363 477
364#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD 478#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
369 483
370 xt = timespec64_to_timespec(tk_xtime(tk)); 484 xt = timespec64_to_timespec(tk_xtime(tk));
371 wm = timespec64_to_timespec(tk->wall_to_monotonic); 485 wm = timespec64_to_timespec(tk->wall_to_monotonic);
372 update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, 486 update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
373 tk->tkr.cycle_last); 487 tk->tkr_mono.cycle_last);
374} 488}
375 489
376static inline void old_vsyscall_fixup(struct timekeeper *tk) 490static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
387 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD 501 * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
388 * users are removed, this can be killed. 502 * users are removed, this can be killed.
389 */ 503 */
390 remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); 504 remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
391 tk->tkr.xtime_nsec -= remainder; 505 tk->tkr_mono.xtime_nsec -= remainder;
392 tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; 506 tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
393 tk->ntp_error += remainder << tk->ntp_error_shift; 507 tk->ntp_error += remainder << tk->ntp_error_shift;
394 tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; 508 tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
395} 509}
396#else 510#else
397#define old_vsyscall_fixup(tk) 511#define old_vsyscall_fixup(tk)
@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
456 */ 570 */
457 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); 571 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
458 nsec = (u32) tk->wall_to_monotonic.tv_nsec; 572 nsec = (u32) tk->wall_to_monotonic.tv_nsec;
459 tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); 573 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
460 574
461 /* Update the monotonic raw base */ 575 /* Update the monotonic raw base */
462 tk->base_raw = timespec64_to_ktime(tk->raw_time); 576 tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
463 577
464 /* 578 /*
465 * The sum of the nanoseconds portions of xtime and 579 * The sum of the nanoseconds portions of xtime and
466 * wall_to_monotonic can be greater/equal one second. Take 580 * wall_to_monotonic can be greater/equal one second. Take
467 * this into account before updating tk->ktime_sec. 581 * this into account before updating tk->ktime_sec.
468 */ 582 */
469 nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); 583 nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
470 if (nsec >= NSEC_PER_SEC) 584 if (nsec >= NSEC_PER_SEC)
471 seconds++; 585 seconds++;
472 tk->ktime_sec = seconds; 586 tk->ktime_sec = seconds;
@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
489 memcpy(&shadow_timekeeper, &tk_core.timekeeper, 603 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
490 sizeof(tk_core.timekeeper)); 604 sizeof(tk_core.timekeeper));
491 605
492 update_fast_timekeeper(&tk->tkr); 606 update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
607 update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
493} 608}
494 609
495/** 610/**
@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
501 */ 616 */
502static void timekeeping_forward_now(struct timekeeper *tk) 617static void timekeeping_forward_now(struct timekeeper *tk)
503{ 618{
504 struct clocksource *clock = tk->tkr.clock; 619 struct clocksource *clock = tk->tkr_mono.clock;
505 cycle_t cycle_now, delta; 620 cycle_t cycle_now, delta;
506 s64 nsec; 621 s64 nsec;
507 622
508 cycle_now = tk->tkr.read(clock); 623 cycle_now = tk->tkr_mono.read(clock);
509 delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); 624 delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
510 tk->tkr.cycle_last = cycle_now; 625 tk->tkr_mono.cycle_last = cycle_now;
626 tk->tkr_raw.cycle_last = cycle_now;
511 627
512 tk->tkr.xtime_nsec += delta * tk->tkr.mult; 628 tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
513 629
514 /* If arch requires, add in get_arch_timeoffset() */ 630 /* If arch requires, add in get_arch_timeoffset() */
515 tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; 631 tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
516 632
517 tk_normalize_xtime(tk); 633 tk_normalize_xtime(tk);
518 634
519 nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); 635 nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
520 timespec64_add_ns(&tk->raw_time, nsec); 636 timespec64_add_ns(&tk->raw_time, nsec);
521} 637}
522 638
@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
537 seq = read_seqcount_begin(&tk_core.seq); 653 seq = read_seqcount_begin(&tk_core.seq);
538 654
539 ts->tv_sec = tk->xtime_sec; 655 ts->tv_sec = tk->xtime_sec;
540 nsecs = timekeeping_get_ns(&tk->tkr); 656 nsecs = timekeeping_get_ns(&tk->tkr_mono);
541 657
542 } while (read_seqcount_retry(&tk_core.seq, seq)); 658 } while (read_seqcount_retry(&tk_core.seq, seq));
543 659
@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
577 693
578 do { 694 do {
579 seq = read_seqcount_begin(&tk_core.seq); 695 seq = read_seqcount_begin(&tk_core.seq);
580 base = tk->tkr.base_mono; 696 base = tk->tkr_mono.base;
581 nsecs = timekeeping_get_ns(&tk->tkr); 697 nsecs = timekeeping_get_ns(&tk->tkr_mono);
582 698
583 } while (read_seqcount_retry(&tk_core.seq, seq)); 699 } while (read_seqcount_retry(&tk_core.seq, seq));
584 700
@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
603 719
604 do { 720 do {
605 seq = read_seqcount_begin(&tk_core.seq); 721 seq = read_seqcount_begin(&tk_core.seq);
606 base = ktime_add(tk->tkr.base_mono, *offset); 722 base = ktime_add(tk->tkr_mono.base, *offset);
607 nsecs = timekeeping_get_ns(&tk->tkr); 723 nsecs = timekeeping_get_ns(&tk->tkr_mono);
608 724
609 } while (read_seqcount_retry(&tk_core.seq, seq)); 725 } while (read_seqcount_retry(&tk_core.seq, seq));
610 726
@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
645 761
646 do { 762 do {
647 seq = read_seqcount_begin(&tk_core.seq); 763 seq = read_seqcount_begin(&tk_core.seq);
648 base = tk->base_raw; 764 base = tk->tkr_raw.base;
649 nsecs = timekeeping_get_ns_raw(tk); 765 nsecs = timekeeping_get_ns(&tk->tkr_raw);
650 766
651 } while (read_seqcount_retry(&tk_core.seq, seq)); 767 } while (read_seqcount_retry(&tk_core.seq, seq));
652 768
@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
674 do { 790 do {
675 seq = read_seqcount_begin(&tk_core.seq); 791 seq = read_seqcount_begin(&tk_core.seq);
676 ts->tv_sec = tk->xtime_sec; 792 ts->tv_sec = tk->xtime_sec;
677 nsec = timekeeping_get_ns(&tk->tkr); 793 nsec = timekeeping_get_ns(&tk->tkr_mono);
678 tomono = tk->wall_to_monotonic; 794 tomono = tk->wall_to_monotonic;
679 795
680 } while (read_seqcount_retry(&tk_core.seq, seq)); 796 } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
759 ts_real->tv_sec = tk->xtime_sec; 875 ts_real->tv_sec = tk->xtime_sec;
760 ts_real->tv_nsec = 0; 876 ts_real->tv_nsec = 0;
761 877
762 nsecs_raw = timekeeping_get_ns_raw(tk); 878 nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
763 nsecs_real = timekeeping_get_ns(&tk->tkr); 879 nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
764 880
765 } while (read_seqcount_retry(&tk_core.seq, seq)); 881 } while (read_seqcount_retry(&tk_core.seq, seq));
766 882
@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
943 */ 1059 */
944 if (try_module_get(new->owner)) { 1060 if (try_module_get(new->owner)) {
945 if (!new->enable || new->enable(new) == 0) { 1061 if (!new->enable || new->enable(new) == 0) {
946 old = tk->tkr.clock; 1062 old = tk->tkr_mono.clock;
947 tk_setup_internals(tk, new); 1063 tk_setup_internals(tk, new);
948 if (old->disable) 1064 if (old->disable)
949 old->disable(old); 1065 old->disable(old);
@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
971{ 1087{
972 struct timekeeper *tk = &tk_core.timekeeper; 1088 struct timekeeper *tk = &tk_core.timekeeper;
973 1089
974 if (tk->tkr.clock == clock) 1090 if (tk->tkr_mono.clock == clock)
975 return 0; 1091 return 0;
976 stop_machine(change_clocksource, clock, NULL); 1092 stop_machine(change_clocksource, clock, NULL);
977 tick_clock_notify(); 1093 tick_clock_notify();
978 return tk->tkr.clock == clock ? 0 : -1; 1094 return tk->tkr_mono.clock == clock ? 0 : -1;
979} 1095}
980 1096
981/** 1097/**
@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
993 1109
994 do { 1110 do {
995 seq = read_seqcount_begin(&tk_core.seq); 1111 seq = read_seqcount_begin(&tk_core.seq);
996 nsecs = timekeeping_get_ns_raw(tk); 1112 nsecs = timekeeping_get_ns(&tk->tkr_raw);
997 ts64 = tk->raw_time; 1113 ts64 = tk->raw_time;
998 1114
999 } while (read_seqcount_retry(&tk_core.seq, seq)); 1115 } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
1016 do { 1132 do {
1017 seq = read_seqcount_begin(&tk_core.seq); 1133 seq = read_seqcount_begin(&tk_core.seq);
1018 1134
1019 ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; 1135 ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
1020 1136
1021 } while (read_seqcount_retry(&tk_core.seq, seq)); 1137 } while (read_seqcount_retry(&tk_core.seq, seq));
1022 1138
@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
1035 do { 1151 do {
1036 seq = read_seqcount_begin(&tk_core.seq); 1152 seq = read_seqcount_begin(&tk_core.seq);
1037 1153
1038 ret = tk->tkr.clock->max_idle_ns; 1154 ret = tk->tkr_mono.clock->max_idle_ns;
1039 1155
1040 } while (read_seqcount_retry(&tk_core.seq, seq)); 1156 } while (read_seqcount_retry(&tk_core.seq, seq));
1041 1157
@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
1114 tk_set_xtime(tk, &now); 1230 tk_set_xtime(tk, &now);
1115 tk->raw_time.tv_sec = 0; 1231 tk->raw_time.tv_sec = 0;
1116 tk->raw_time.tv_nsec = 0; 1232 tk->raw_time.tv_nsec = 0;
1117 tk->base_raw.tv64 = 0;
1118 if (boot.tv_sec == 0 && boot.tv_nsec == 0) 1233 if (boot.tv_sec == 0 && boot.tv_nsec == 0)
1119 boot = tk_xtime(tk); 1234 boot = tk_xtime(tk);
1120 1235
@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
1200void timekeeping_resume(void) 1315void timekeeping_resume(void)
1201{ 1316{
1202 struct timekeeper *tk = &tk_core.timekeeper; 1317 struct timekeeper *tk = &tk_core.timekeeper;
1203 struct clocksource *clock = tk->tkr.clock; 1318 struct clocksource *clock = tk->tkr_mono.clock;
1204 unsigned long flags; 1319 unsigned long flags;
1205 struct timespec64 ts_new, ts_delta; 1320 struct timespec64 ts_new, ts_delta;
1206 struct timespec tmp; 1321 struct timespec tmp;
@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
1228 * The less preferred source will only be tried if there is no better 1343 * The less preferred source will only be tried if there is no better
1229 * usable source. The rtc part is handled separately in rtc core code. 1344 * usable source. The rtc part is handled separately in rtc core code.
1230 */ 1345 */
1231 cycle_now = tk->tkr.read(clock); 1346 cycle_now = tk->tkr_mono.read(clock);
1232 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && 1347 if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
1233 cycle_now > tk->tkr.cycle_last) { 1348 cycle_now > tk->tkr_mono.cycle_last) {
1234 u64 num, max = ULLONG_MAX; 1349 u64 num, max = ULLONG_MAX;
1235 u32 mult = clock->mult; 1350 u32 mult = clock->mult;
1236 u32 shift = clock->shift; 1351 u32 shift = clock->shift;
1237 s64 nsec = 0; 1352 s64 nsec = 0;
1238 1353
1239 cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, 1354 cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
1240 tk->tkr.mask); 1355 tk->tkr_mono.mask);
1241 1356
1242 /* 1357 /*
1243 * "cycle_delta * mutl" may cause 64 bits overflow, if the 1358 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
1263 __timekeeping_inject_sleeptime(tk, &ts_delta); 1378 __timekeeping_inject_sleeptime(tk, &ts_delta);
1264 1379
1265 /* Re-base the last cycle value */ 1380 /* Re-base the last cycle value */
1266 tk->tkr.cycle_last = cycle_now; 1381 tk->tkr_mono.cycle_last = cycle_now;
1382 tk->tkr_raw.cycle_last = cycle_now;
1383
1267 tk->ntp_error = 0; 1384 tk->ntp_error = 0;
1268 timekeeping_suspended = 0; 1385 timekeeping_suspended = 0;
1269 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); 1386 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
1416 * 1533 *
1417 * XXX - TODO: Doc ntp_error calculation. 1534 * XXX - TODO: Doc ntp_error calculation.
1418 */ 1535 */
1419 if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { 1536 if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
1420 /* NTP adjustment caused clocksource mult overflow */ 1537 /* NTP adjustment caused clocksource mult overflow */
1421 WARN_ON_ONCE(1); 1538 WARN_ON_ONCE(1);
1422 return; 1539 return;
1423 } 1540 }
1424 1541
1425 tk->tkr.mult += mult_adj; 1542 tk->tkr_mono.mult += mult_adj;
1426 tk->xtime_interval += interval; 1543 tk->xtime_interval += interval;
1427 tk->tkr.xtime_nsec -= offset; 1544 tk->tkr_mono.xtime_nsec -= offset;
1428 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; 1545 tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
1429} 1546}
1430 1547
@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1486 tk->ntp_err_mult = 0; 1603 tk->ntp_err_mult = 0;
1487 } 1604 }
1488 1605
1489 if (unlikely(tk->tkr.clock->maxadj && 1606 if (unlikely(tk->tkr_mono.clock->maxadj &&
1490 (abs(tk->tkr.mult - tk->tkr.clock->mult) 1607 (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
1491 > tk->tkr.clock->maxadj))) { 1608 > tk->tkr_mono.clock->maxadj))) {
1492 printk_once(KERN_WARNING 1609 printk_once(KERN_WARNING
1493 "Adjusting %s more than 11%% (%ld vs %ld)\n", 1610 "Adjusting %s more than 11%% (%ld vs %ld)\n",
1494 tk->tkr.clock->name, (long)tk->tkr.mult, 1611 tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
1495 (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); 1612 (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
1496 } 1613 }
1497 1614
1498 /* 1615 /*
@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1509 * We'll correct this error next time through this function, when 1626 * We'll correct this error next time through this function, when
1510 * xtime_nsec is not as small. 1627 * xtime_nsec is not as small.
1511 */ 1628 */
1512 if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { 1629 if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
1513 s64 neg = -(s64)tk->tkr.xtime_nsec; 1630 s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
1514 tk->tkr.xtime_nsec = 0; 1631 tk->tkr_mono.xtime_nsec = 0;
1515 tk->ntp_error += neg << tk->ntp_error_shift; 1632 tk->ntp_error += neg << tk->ntp_error_shift;
1516 } 1633 }
1517} 1634}
@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
1526 */ 1643 */
1527static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) 1644static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
1528{ 1645{
1529 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; 1646 u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
1530 unsigned int clock_set = 0; 1647 unsigned int clock_set = 0;
1531 1648
1532 while (tk->tkr.xtime_nsec >= nsecps) { 1649 while (tk->tkr_mono.xtime_nsec >= nsecps) {
1533 int leap; 1650 int leap;
1534 1651
1535 tk->tkr.xtime_nsec -= nsecps; 1652 tk->tkr_mono.xtime_nsec -= nsecps;
1536 tk->xtime_sec++; 1653 tk->xtime_sec++;
1537 1654
1538 /* Figure out if its a leap sec and apply if needed */ 1655 /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1577 1694
1578 /* Accumulate one shifted interval */ 1695 /* Accumulate one shifted interval */
1579 offset -= interval; 1696 offset -= interval;
1580 tk->tkr.cycle_last += interval; 1697 tk->tkr_mono.cycle_last += interval;
1698 tk->tkr_raw.cycle_last += interval;
1581 1699
1582 tk->tkr.xtime_nsec += tk->xtime_interval << shift; 1700 tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
1583 *clock_set |= accumulate_nsecs_to_secs(tk); 1701 *clock_set |= accumulate_nsecs_to_secs(tk);
1584 1702
1585 /* Accumulate raw time */ 1703 /* Accumulate raw time */
@@ -1622,14 +1740,17 @@ void update_wall_time(void)
1622#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET 1740#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1623 offset = real_tk->cycle_interval; 1741 offset = real_tk->cycle_interval;
1624#else 1742#else
1625 offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), 1743 offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
1626 tk->tkr.cycle_last, tk->tkr.mask); 1744 tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
1627#endif 1745#endif
1628 1746
1629 /* Check if there's really nothing to do */ 1747 /* Check if there's really nothing to do */
1630 if (offset < real_tk->cycle_interval) 1748 if (offset < real_tk->cycle_interval)
1631 goto out; 1749 goto out;
1632 1750
1751 /* Do some additional sanity checking */
1752 timekeeping_check_update(real_tk, offset);
1753
1633 /* 1754 /*
1634 * With NO_HZ we may have to accumulate many cycle_intervals 1755 * With NO_HZ we may have to accumulate many cycle_intervals
1635 * (think "ticks") worth of time at once. To do this efficiently, 1756 * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
1784 do { 1905 do {
1785 seq = read_seqcount_begin(&tk_core.seq); 1906 seq = read_seqcount_begin(&tk_core.seq);
1786 1907
1787 base = tk->tkr.base_mono; 1908 base = tk->tkr_mono.base;
1788 nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; 1909 nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
1789 1910
1790 *offs_real = tk->offs_real; 1911 *offs_real = tk->offs_real;
1791 *offs_boot = tk->offs_boot; 1912 *offs_boot = tk->offs_boot;
@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
1816 do { 1937 do {
1817 seq = read_seqcount_begin(&tk_core.seq); 1938 seq = read_seqcount_begin(&tk_core.seq);
1818 1939
1819 base = tk->tkr.base_mono; 1940 base = tk->tkr_mono.base;
1820 nsecs = timekeeping_get_ns(&tk->tkr); 1941 nsecs = timekeeping_get_ns(&tk->tkr_mono);
1821 1942
1822 *offs_real = tk->offs_real; 1943 *offs_real = tk->offs_real;
1823 *offs_boot = tk->offs_boot; 1944 *offs_boot = tk->offs_boot;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862cdd37..2cfd19485824 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
228 print_name_offset(m, dev->set_next_event); 228 print_name_offset(m, dev->set_next_event);
229 SEQ_printf(m, "\n"); 229 SEQ_printf(m, "\n");
230 230
231 SEQ_printf(m, " set_mode: "); 231 if (dev->set_mode) {
232 print_name_offset(m, dev->set_mode); 232 SEQ_printf(m, " set_mode: ");
233 SEQ_printf(m, "\n"); 233 print_name_offset(m, dev->set_mode);
234 SEQ_printf(m, "\n");
235 } else {
236 if (dev->set_mode_shutdown) {
237 SEQ_printf(m, " shutdown: ");
238 print_name_offset(m, dev->set_mode_shutdown);
239 SEQ_printf(m, "\n");
240 }
241
242 if (dev->set_mode_periodic) {
243 SEQ_printf(m, " periodic: ");
244 print_name_offset(m, dev->set_mode_periodic);
245 SEQ_printf(m, "\n");
246 }
247
248 if (dev->set_mode_oneshot) {
249 SEQ_printf(m, " oneshot: ");
250 print_name_offset(m, dev->set_mode_oneshot);
251 SEQ_printf(m, "\n");
252 }
253
254 if (dev->set_mode_resume) {
255 SEQ_printf(m, " resume: ");
256 print_name_offset(m, dev->set_mode_resume);
257 SEQ_printf(m, "\n");
258 }
259 }
234 260
235 SEQ_printf(m, " event_handler: "); 261 SEQ_printf(m, " event_handler: ");
236 print_name_offset(m, dev->event_handler); 262 print_name_offset(m, dev->event_handler);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3c009c..36b6fa88ce5b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
865 data corruption or a sporadic crash at a later stage once the region 865 data corruption or a sporadic crash at a later stage once the region
866 is examined. The runtime overhead introduced is minimal. 866 is examined. The runtime overhead introduced is minimal.
867 867
868config DEBUG_TIMEKEEPING
869 bool "Enable extra timekeeping sanity checking"
870 help
871 This option will enable additional timekeeping sanity checks
872 which may be helpful when diagnosing issues where timekeeping
873 problems are suspected.
874
875 This may include checks in the timekeeping hotpaths, so this
876 option may have a (very small) performance impact to some
877 workloads.
878
879 If unsure, say N.
880
868config TIMER_STATS 881config TIMER_STATS
869 bool "Collect kernel timers statistics" 882 bool "Collect kernel timers statistics"
870 depends on DEBUG_KERNEL && PROC_FS 883 depends on DEBUG_KERNEL && PROC_FS