aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_clock.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_clock.c')
-rw-r--r--kernel/sched_clock.c237
1 files changed, 152 insertions, 85 deletions
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index e1d16c9a7680..9d8af0b3fb64 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -10,19 +10,55 @@
10 * Ingo Molnar <mingo@redhat.com> 10 * Ingo Molnar <mingo@redhat.com>
11 * Guillaume Chazarain <guichaz@gmail.com> 11 * Guillaume Chazarain <guichaz@gmail.com>
12 * 12 *
13 * Create a semi stable clock from a mixture of other events, including: 13 *
14 * - gtod 14 * What:
15 *
16 * cpu_clock(i) provides a fast (execution time) high resolution
17 * clock with bounded drift between CPUs. The value of cpu_clock(i)
18 * is monotonic for constant i. The timestamp returned is in nanoseconds.
19 *
20 * ######################### BIG FAT WARNING ##########################
21 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
22 * # go backwards !! #
23 * ####################################################################
24 *
25 * There is no strict promise about the base, although it tends to start
26 * at 0 on boot (but people really shouldn't rely on that).
27 *
28 * cpu_clock(i) -- can be used from any context, including NMI.
29 * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
30 * local_clock() -- is cpu_clock() on the current cpu.
31 *
32 * How:
33 *
34 * The implementation either uses sched_clock() when
35 * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
36 * sched_clock() is assumed to provide these properties (mostly it means
37 * the architecture provides a globally synchronized highres time source).
38 *
39 * Otherwise it tries to create a semi stable clock from a mixture of other
40 * clocks, including:
41 *
42 * - GTOD (clock monotomic)
15 * - sched_clock() 43 * - sched_clock()
16 * - explicit idle events 44 * - explicit idle events
17 * 45 *
18 * We use gtod as base and the unstable clock deltas. The deltas are filtered, 46 * We use GTOD as base and use sched_clock() deltas to improve resolution. The
19 * making it monotonic and keeping it within an expected window. 47 * deltas are filtered to provide monotonicity and keeping it within an
48 * expected window.
20 * 49 *
21 * Furthermore, explicit sleep and wakeup hooks allow us to account for time 50 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
22 * that is otherwise invisible (TSC gets stopped). 51 * that is otherwise invisible (TSC gets stopped).
23 * 52 *
24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 53 *
25 * consistent between cpus (never more than 2 jiffies difference). 54 * Notes:
55 *
56 * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
57 * like cpufreq interrupts that can change the base clock (TSC) multiplier
58 * and cause funny jumps in time -- although the filtering provided by
59 * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
60 * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
61 * sched_clock().
26 */ 62 */
27#include <linux/spinlock.h> 63#include <linux/spinlock.h>
28#include <linux/hardirq.h> 64#include <linux/hardirq.h>
@@ -41,20 +77,14 @@ unsigned long long __attribute__((weak)) sched_clock(void)
41 return (unsigned long long)(jiffies - INITIAL_JIFFIES) 77 return (unsigned long long)(jiffies - INITIAL_JIFFIES)
42 * (NSEC_PER_SEC / HZ); 78 * (NSEC_PER_SEC / HZ);
43} 79}
80EXPORT_SYMBOL_GPL(sched_clock);
44 81
45static __read_mostly int sched_clock_running; 82__read_mostly int sched_clock_running;
46 83
47#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 84#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
48__read_mostly int sched_clock_stable; 85__read_mostly int sched_clock_stable;
49 86
50struct sched_clock_data { 87struct sched_clock_data {
51 /*
52 * Raw spinlock - this is a special case: this might be called
53 * from within instrumentation code so we dont want to do any
54 * instrumentation ourselves.
55 */
56 raw_spinlock_t lock;
57
58 u64 tick_raw; 88 u64 tick_raw;
59 u64 tick_gtod; 89 u64 tick_gtod;
60 u64 clock; 90 u64 clock;
@@ -80,7 +110,6 @@ void sched_clock_init(void)
80 for_each_possible_cpu(cpu) { 110 for_each_possible_cpu(cpu) {
81 struct sched_clock_data *scd = cpu_sdc(cpu); 111 struct sched_clock_data *scd = cpu_sdc(cpu);
82 112
83 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
84 scd->tick_raw = 0; 113 scd->tick_raw = 0;
85 scd->tick_gtod = ktime_now; 114 scd->tick_gtod = ktime_now;
86 scd->clock = ktime_now; 115 scd->clock = ktime_now;
@@ -109,14 +138,19 @@ static inline u64 wrap_max(u64 x, u64 y)
109 * - filter out backward motion 138 * - filter out backward motion
110 * - use the GTOD tick value to create a window to filter crazy TSC values 139 * - use the GTOD tick value to create a window to filter crazy TSC values
111 */ 140 */
112static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) 141static u64 sched_clock_local(struct sched_clock_data *scd)
113{ 142{
114 s64 delta = now - scd->tick_raw; 143 u64 now, clock, old_clock, min_clock, max_clock;
115 u64 clock, min_clock, max_clock; 144 s64 delta;
116 145
146again:
147 now = sched_clock();
148 delta = now - scd->tick_raw;
117 if (unlikely(delta < 0)) 149 if (unlikely(delta < 0))
118 delta = 0; 150 delta = 0;
119 151
152 old_clock = scd->clock;
153
120 /* 154 /*
121 * scd->clock = clamp(scd->tick_gtod + delta, 155 * scd->clock = clamp(scd->tick_gtod + delta,
122 * max(scd->tick_gtod, scd->clock), 156 * max(scd->tick_gtod, scd->clock),
@@ -124,84 +158,78 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
124 */ 158 */
125 159
126 clock = scd->tick_gtod + delta; 160 clock = scd->tick_gtod + delta;
127 min_clock = wrap_max(scd->tick_gtod, scd->clock); 161 min_clock = wrap_max(scd->tick_gtod, old_clock);
128 max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); 162 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
129 163
130 clock = wrap_max(clock, min_clock); 164 clock = wrap_max(clock, min_clock);
131 clock = wrap_min(clock, max_clock); 165 clock = wrap_min(clock, max_clock);
132 166
133 scd->clock = clock; 167 if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
168 goto again;
134 169
135 return scd->clock; 170 return clock;
136} 171}
137 172
138static void lock_double_clock(struct sched_clock_data *data1, 173static u64 sched_clock_remote(struct sched_clock_data *scd)
139 struct sched_clock_data *data2)
140{ 174{
141 if (data1 < data2) { 175 struct sched_clock_data *my_scd = this_scd();
142 __raw_spin_lock(&data1->lock); 176 u64 this_clock, remote_clock;
143 __raw_spin_lock(&data2->lock); 177 u64 *ptr, old_val, val;
178
179 sched_clock_local(my_scd);
180again:
181 this_clock = my_scd->clock;
182 remote_clock = scd->clock;
183
184 /*
185 * Use the opportunity that we have both locks
186 * taken to couple the two clocks: we take the
187 * larger time as the latest time for both
188 * runqueues. (this creates monotonic movement)
189 */
190 if (likely((s64)(remote_clock - this_clock) < 0)) {
191 ptr = &scd->clock;
192 old_val = remote_clock;
193 val = this_clock;
144 } else { 194 } else {
145 __raw_spin_lock(&data2->lock); 195 /*
146 __raw_spin_lock(&data1->lock); 196 * Should be rare, but possible:
197 */
198 ptr = &my_scd->clock;
199 old_val = this_clock;
200 val = remote_clock;
147 } 201 }
202
203 if (cmpxchg64(ptr, old_val, val) != old_val)
204 goto again;
205
206 return val;
148} 207}
149 208
209/*
210 * Similar to cpu_clock(), but requires local IRQs to be disabled.
211 *
212 * See cpu_clock().
213 */
150u64 sched_clock_cpu(int cpu) 214u64 sched_clock_cpu(int cpu)
151{ 215{
152 u64 now, clock, this_clock, remote_clock;
153 struct sched_clock_data *scd; 216 struct sched_clock_data *scd;
217 u64 clock;
218
219 WARN_ON_ONCE(!irqs_disabled());
154 220
155 if (sched_clock_stable) 221 if (sched_clock_stable)
156 return sched_clock(); 222 return sched_clock();
157 223
158 scd = cpu_sdc(cpu);
159
160 /*
161 * Normally this is not called in NMI context - but if it is,
162 * trying to do any locking here is totally lethal.
163 */
164 if (unlikely(in_nmi()))
165 return scd->clock;
166
167 if (unlikely(!sched_clock_running)) 224 if (unlikely(!sched_clock_running))
168 return 0ull; 225 return 0ull;
169 226
170 WARN_ON_ONCE(!irqs_disabled()); 227 scd = cpu_sdc(cpu);
171 now = sched_clock();
172
173 if (cpu != raw_smp_processor_id()) {
174 struct sched_clock_data *my_scd = this_scd();
175
176 lock_double_clock(scd, my_scd);
177
178 this_clock = __update_sched_clock(my_scd, now);
179 remote_clock = scd->clock;
180
181 /*
182 * Use the opportunity that we have both locks
183 * taken to couple the two clocks: we take the
184 * larger time as the latest time for both
185 * runqueues. (this creates monotonic movement)
186 */
187 if (likely((s64)(remote_clock - this_clock) < 0)) {
188 clock = this_clock;
189 scd->clock = clock;
190 } else {
191 /*
192 * Should be rare, but possible:
193 */
194 clock = remote_clock;
195 my_scd->clock = remote_clock;
196 }
197
198 __raw_spin_unlock(&my_scd->lock);
199 } else {
200 __raw_spin_lock(&scd->lock);
201 clock = __update_sched_clock(scd, now);
202 }
203 228
204 __raw_spin_unlock(&scd->lock); 229 if (cpu != smp_processor_id())
230 clock = sched_clock_remote(scd);
231 else
232 clock = sched_clock_local(scd);
205 233
206 return clock; 234 return clock;
207} 235}
@@ -223,11 +251,9 @@ void sched_clock_tick(void)
223 now_gtod = ktime_to_ns(ktime_get()); 251 now_gtod = ktime_to_ns(ktime_get());
224 now = sched_clock(); 252 now = sched_clock();
225 253
226 __raw_spin_lock(&scd->lock);
227 scd->tick_raw = now; 254 scd->tick_raw = now;
228 scd->tick_gtod = now_gtod; 255 scd->tick_gtod = now_gtod;
229 __update_sched_clock(scd, now); 256 sched_clock_local(scd);
230 __raw_spin_unlock(&scd->lock);
231} 257}
232 258
233/* 259/*
@@ -252,6 +278,47 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
252} 278}
253EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 279EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
254 280
281/*
282 * As outlined at the top, provides a fast, high resolution, nanosecond
283 * time source that is monotonic per cpu argument and has bounded drift
284 * between cpus.
285 *
286 * ######################### BIG FAT WARNING ##########################
287 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
288 * # go backwards !! #
289 * ####################################################################
290 */
291u64 cpu_clock(int cpu)
292{
293 u64 clock;
294 unsigned long flags;
295
296 local_irq_save(flags);
297 clock = sched_clock_cpu(cpu);
298 local_irq_restore(flags);
299
300 return clock;
301}
302
303/*
304 * Similar to cpu_clock() for the current cpu. Time will only be observed
305 * to be monotonic if care is taken to only compare timestampt taken on the
306 * same CPU.
307 *
308 * See cpu_clock().
309 */
310u64 local_clock(void)
311{
312 u64 clock;
313 unsigned long flags;
314
315 local_irq_save(flags);
316 clock = sched_clock_cpu(smp_processor_id());
317 local_irq_restore(flags);
318
319 return clock;
320}
321
255#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 322#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
256 323
257void sched_clock_init(void) 324void sched_clock_init(void)
@@ -267,17 +334,17 @@ u64 sched_clock_cpu(int cpu)
267 return sched_clock(); 334 return sched_clock();
268} 335}
269 336
270#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ 337u64 cpu_clock(int cpu)
338{
339 return sched_clock_cpu(cpu);
340}
271 341
272unsigned long long cpu_clock(int cpu) 342u64 local_clock(void)
273{ 343{
274 unsigned long long clock; 344 return sched_clock_cpu(0);
275 unsigned long flags; 345}
276 346
277 local_irq_save(flags); 347#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
278 clock = sched_clock_cpu(cpu);
279 local_irq_restore(flags);
280 348
281 return clock;
282}
283EXPORT_SYMBOL_GPL(cpu_clock); 349EXPORT_SYMBOL_GPL(cpu_clock);
350EXPORT_SYMBOL_GPL(local_clock);