summaryrefslogtreecommitdiffstats
path: root/kernel/sched/clock.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-12-15 07:36:17 -0500
committerIngo Molnar <mingo@kernel.org>2017-01-14 05:30:00 -0500
commit5680d8094ffa9e5cfc81afdd865027ee6417c263 (patch)
tree70c19d42265db8d72c3d5bfd4877951fc3587488 /kernel/sched/clock.c
parent9881b024b7d7671f6a014091bc96506b89081802 (diff)
sched/clock: Provide better clock continuity
When switching between the unstable and stable variants it is currently possible that clock discontinuities occur. And while these will mostly be 'small', attempt to do better. As observed on my IVB-EP, the sched_clock() is ~1.5s ahead of the ktime_get_ns() based timeline at the point of switchover (sched_clock_init_late()) after SMP bringup. Equally, when the TSC is later found to be unstable -- typically because SMM tries to hide its SMI latencies by mucking with the TSC -- we want to avoid large jumps. Since the clocksource watchdog reports the issue after the fact we cannot exactly fix up time, but since SMI latencies are typically small (~10ns range), the discontinuity is mainly due to drift between sched_clock() and ktime_get_ns() (which on my desktop is ~79s over 24days). I dislike this patch because it adds overhead to the good case in favour of dealing with badness. But given the widespread failure of TSC stability this is worth it. Note that in case the TSC makes drastic jumps after SMP bringup we're still hosed. There's just not much we can do in that case without stupid overhead. If we were to somehow expose tsc_clocksource_reliable (which is hard because this code is also used on ia64 and parisc) we could avoid some of the newly introduced overhead. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/clock.c')
-rw-r--r--kernel/sched/clock.c99
1 files changed, 65 insertions, 34 deletions
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index b3466d4e0cc2..7713b2b53f61 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -86,6 +86,30 @@ void sched_clock_init(void)
86static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); 86static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
87static int __sched_clock_stable_early; 87static int __sched_clock_stable_early;
88 88
89/*
90 * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
91 */
92static __read_mostly u64 raw_offset;
93static __read_mostly u64 gtod_offset;
94
95struct sched_clock_data {
96 u64 tick_raw;
97 u64 tick_gtod;
98 u64 clock;
99};
100
101static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
102
103static inline struct sched_clock_data *this_scd(void)
104{
105 return this_cpu_ptr(&sched_clock_data);
106}
107
108static inline struct sched_clock_data *cpu_sdc(int cpu)
109{
110 return &per_cpu(sched_clock_data, cpu);
111}
112
89int sched_clock_stable(void) 113int sched_clock_stable(void)
90{ 114{
91 return static_branch_likely(&__sched_clock_stable); 115 return static_branch_likely(&__sched_clock_stable);
@@ -93,6 +117,17 @@ int sched_clock_stable(void)
93 117
94static void __set_sched_clock_stable(void) 118static void __set_sched_clock_stable(void)
95{ 119{
120 struct sched_clock_data *scd = this_scd();
121
122 /*
123 * Attempt to make the (initial) unstable->stable transition continuous.
124 */
125 raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
126
127 printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
128 scd->tick_gtod, gtod_offset,
129 scd->tick_raw, raw_offset);
130
96 static_branch_enable(&__sched_clock_stable); 131 static_branch_enable(&__sched_clock_stable);
97 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); 132 tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
98} 133}
@@ -117,7 +152,23 @@ void set_sched_clock_stable(void)
117 152
118static void __clear_sched_clock_stable(struct work_struct *work) 153static void __clear_sched_clock_stable(struct work_struct *work)
119{ 154{
120 /* XXX worry about clock continuity */ 155 struct sched_clock_data *scd = this_scd();
156
157 /*
158 * Attempt to make the stable->unstable transition continuous.
159 *
160 * Trouble is, this is typically called from the TSC watchdog
161 * timer, which is late per definition. This means the tick
162 * values can already be screwy.
163 *
164 * Still do what we can.
165 */
166 gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
167
168 printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
169 scd->tick_gtod, gtod_offset,
170 scd->tick_raw, raw_offset);
171
121 static_branch_disable(&__sched_clock_stable); 172 static_branch_disable(&__sched_clock_stable);
122 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); 173 tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
123} 174}
@@ -134,28 +185,9 @@ void clear_sched_clock_stable(void)
134 schedule_work(&sched_clock_work); 185 schedule_work(&sched_clock_work);
135} 186}
136 187
137struct sched_clock_data {
138 u64 tick_raw;
139 u64 tick_gtod;
140 u64 clock;
141};
142
143static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
144
145static inline struct sched_clock_data *this_scd(void)
146{
147 return this_cpu_ptr(&sched_clock_data);
148}
149
150static inline struct sched_clock_data *cpu_sdc(int cpu)
151{
152 return &per_cpu(sched_clock_data, cpu);
153}
154
155void sched_clock_init_late(void) 188void sched_clock_init_late(void)
156{ 189{
157 sched_clock_running = 2; 190 sched_clock_running = 2;
158
159 /* 191 /*
160 * Ensure that it is impossible to not do a static_key update. 192 * Ensure that it is impossible to not do a static_key update.
161 * 193 *
@@ -210,7 +242,7 @@ again:
210 * scd->tick_gtod + TICK_NSEC); 242 * scd->tick_gtod + TICK_NSEC);
211 */ 243 */
212 244
213 clock = scd->tick_gtod + delta; 245 clock = scd->tick_gtod + gtod_offset + delta;
214 min_clock = wrap_max(scd->tick_gtod, old_clock); 246 min_clock = wrap_max(scd->tick_gtod, old_clock);
215 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); 247 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
216 248
@@ -296,7 +328,7 @@ u64 sched_clock_cpu(int cpu)
296 u64 clock; 328 u64 clock;
297 329
298 if (sched_clock_stable()) 330 if (sched_clock_stable())
299 return sched_clock(); 331 return sched_clock() + raw_offset;
300 332
301 if (unlikely(!sched_clock_running)) 333 if (unlikely(!sched_clock_running))
302 return 0ull; 334 return 0ull;
@@ -317,23 +349,22 @@ EXPORT_SYMBOL_GPL(sched_clock_cpu);
317void sched_clock_tick(void) 349void sched_clock_tick(void)
318{ 350{
319 struct sched_clock_data *scd; 351 struct sched_clock_data *scd;
320 u64 now, now_gtod;
321
322 if (sched_clock_stable())
323 return;
324
325 if (unlikely(!sched_clock_running))
326 return;
327 352
328 WARN_ON_ONCE(!irqs_disabled()); 353 WARN_ON_ONCE(!irqs_disabled());
329 354
355 /*
356 * Update these values even if sched_clock_stable(), because it can
357 * become unstable at any point in time at which point we need some
358 * values to fall back on.
359 *
360 * XXX arguably we can skip this if we expose tsc_clocksource_reliable
361 */
330 scd = this_scd(); 362 scd = this_scd();
331 now_gtod = ktime_to_ns(ktime_get()); 363 scd->tick_raw = sched_clock();
332 now = sched_clock(); 364 scd->tick_gtod = ktime_get_ns();
333 365
334 scd->tick_raw = now; 366 if (!sched_clock_stable() && likely(sched_clock_running))
335 scd->tick_gtod = now_gtod; 367 sched_clock_local(scd);
336 sched_clock_local(scd);
337} 368}
338 369
339/* 370/*