diff options
Diffstat (limited to 'kernel/sched_clock.c')
-rw-r--r-- | kernel/sched_clock.c | 237 |
1 files changed, 152 insertions, 85 deletions
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index e1d16c9a7680..9d8af0b3fb64 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
@@ -10,19 +10,55 @@ | |||
10 | * Ingo Molnar <mingo@redhat.com> | 10 | * Ingo Molnar <mingo@redhat.com> |
11 | * Guillaume Chazarain <guichaz@gmail.com> | 11 | * Guillaume Chazarain <guichaz@gmail.com> |
12 | * | 12 | * |
13 | * Create a semi stable clock from a mixture of other events, including: | 13 | * |
14 | * - gtod | 14 | * What: |
15 | * | ||
16 | * cpu_clock(i) provides a fast (execution time) high resolution | ||
17 | * clock with bounded drift between CPUs. The value of cpu_clock(i) | ||
18 | * is monotonic for constant i. The timestamp returned is in nanoseconds. | ||
19 | * | ||
20 | * ######################### BIG FAT WARNING ########################## | ||
21 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
22 | * # go backwards !! # | ||
23 | * #################################################################### | ||
24 | * | ||
25 | * There is no strict promise about the base, although it tends to start | ||
26 | * at 0 on boot (but people really shouldn't rely on that). | ||
27 | * | ||
28 | * cpu_clock(i) -- can be used from any context, including NMI. | ||
29 | * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI) | ||
30 | * local_clock() -- is cpu_clock() on the current cpu. | ||
31 | * | ||
32 | * How: | ||
33 | * | ||
34 | * The implementation either uses sched_clock() when | ||
35 | * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the | ||
36 | * sched_clock() is assumed to provide these properties (mostly it means | ||
37 | * the architecture provides a globally synchronized highres time source). | ||
38 | * | ||
39 | * Otherwise it tries to create a semi stable clock from a mixture of other | ||
40 | * clocks, including: | ||
41 | * | ||
42 | * - GTOD (clock monotomic) | ||
15 | * - sched_clock() | 43 | * - sched_clock() |
16 | * - explicit idle events | 44 | * - explicit idle events |
17 | * | 45 | * |
18 | * We use gtod as base and the unstable clock deltas. The deltas are filtered, | 46 | * We use GTOD as base and use sched_clock() deltas to improve resolution. The |
19 | * making it monotonic and keeping it within an expected window. | 47 | * deltas are filtered to provide monotonicity and keeping it within an |
48 | * expected window. | ||
20 | * | 49 | * |
21 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time | 50 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time |
22 | * that is otherwise invisible (TSC gets stopped). | 51 | * that is otherwise invisible (TSC gets stopped). |
23 | * | 52 | * |
24 | * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat | 53 | * |
25 | * consistent between cpus (never more than 2 jiffies difference). | 54 | * Notes: |
55 | * | ||
56 | * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things | ||
57 | * like cpufreq interrupts that can change the base clock (TSC) multiplier | ||
58 | * and cause funny jumps in time -- although the filtering provided by | ||
59 | * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it | ||
60 | * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on | ||
61 | * sched_clock(). | ||
26 | */ | 62 | */ |
27 | #include <linux/spinlock.h> | 63 | #include <linux/spinlock.h> |
28 | #include <linux/hardirq.h> | 64 | #include <linux/hardirq.h> |
@@ -41,20 +77,14 @@ unsigned long long __attribute__((weak)) sched_clock(void) | |||
41 | return (unsigned long long)(jiffies - INITIAL_JIFFIES) | 77 | return (unsigned long long)(jiffies - INITIAL_JIFFIES) |
42 | * (NSEC_PER_SEC / HZ); | 78 | * (NSEC_PER_SEC / HZ); |
43 | } | 79 | } |
80 | EXPORT_SYMBOL_GPL(sched_clock); | ||
44 | 81 | ||
45 | static __read_mostly int sched_clock_running; | 82 | __read_mostly int sched_clock_running; |
46 | 83 | ||
47 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 84 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
48 | __read_mostly int sched_clock_stable; | 85 | __read_mostly int sched_clock_stable; |
49 | 86 | ||
50 | struct sched_clock_data { | 87 | struct sched_clock_data { |
51 | /* | ||
52 | * Raw spinlock - this is a special case: this might be called | ||
53 | * from within instrumentation code so we dont want to do any | ||
54 | * instrumentation ourselves. | ||
55 | */ | ||
56 | raw_spinlock_t lock; | ||
57 | |||
58 | u64 tick_raw; | 88 | u64 tick_raw; |
59 | u64 tick_gtod; | 89 | u64 tick_gtod; |
60 | u64 clock; | 90 | u64 clock; |
@@ -80,7 +110,6 @@ void sched_clock_init(void) | |||
80 | for_each_possible_cpu(cpu) { | 110 | for_each_possible_cpu(cpu) { |
81 | struct sched_clock_data *scd = cpu_sdc(cpu); | 111 | struct sched_clock_data *scd = cpu_sdc(cpu); |
82 | 112 | ||
83 | scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | ||
84 | scd->tick_raw = 0; | 113 | scd->tick_raw = 0; |
85 | scd->tick_gtod = ktime_now; | 114 | scd->tick_gtod = ktime_now; |
86 | scd->clock = ktime_now; | 115 | scd->clock = ktime_now; |
@@ -109,14 +138,19 @@ static inline u64 wrap_max(u64 x, u64 y) | |||
109 | * - filter out backward motion | 138 | * - filter out backward motion |
110 | * - use the GTOD tick value to create a window to filter crazy TSC values | 139 | * - use the GTOD tick value to create a window to filter crazy TSC values |
111 | */ | 140 | */ |
112 | static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) | 141 | static u64 sched_clock_local(struct sched_clock_data *scd) |
113 | { | 142 | { |
114 | s64 delta = now - scd->tick_raw; | 143 | u64 now, clock, old_clock, min_clock, max_clock; |
115 | u64 clock, min_clock, max_clock; | 144 | s64 delta; |
116 | 145 | ||
146 | again: | ||
147 | now = sched_clock(); | ||
148 | delta = now - scd->tick_raw; | ||
117 | if (unlikely(delta < 0)) | 149 | if (unlikely(delta < 0)) |
118 | delta = 0; | 150 | delta = 0; |
119 | 151 | ||
152 | old_clock = scd->clock; | ||
153 | |||
120 | /* | 154 | /* |
121 | * scd->clock = clamp(scd->tick_gtod + delta, | 155 | * scd->clock = clamp(scd->tick_gtod + delta, |
122 | * max(scd->tick_gtod, scd->clock), | 156 | * max(scd->tick_gtod, scd->clock), |
@@ -124,84 +158,78 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) | |||
124 | */ | 158 | */ |
125 | 159 | ||
126 | clock = scd->tick_gtod + delta; | 160 | clock = scd->tick_gtod + delta; |
127 | min_clock = wrap_max(scd->tick_gtod, scd->clock); | 161 | min_clock = wrap_max(scd->tick_gtod, old_clock); |
128 | max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC); | 162 | max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); |
129 | 163 | ||
130 | clock = wrap_max(clock, min_clock); | 164 | clock = wrap_max(clock, min_clock); |
131 | clock = wrap_min(clock, max_clock); | 165 | clock = wrap_min(clock, max_clock); |
132 | 166 | ||
133 | scd->clock = clock; | 167 | if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock) |
168 | goto again; | ||
134 | 169 | ||
135 | return scd->clock; | 170 | return clock; |
136 | } | 171 | } |
137 | 172 | ||
138 | static void lock_double_clock(struct sched_clock_data *data1, | 173 | static u64 sched_clock_remote(struct sched_clock_data *scd) |
139 | struct sched_clock_data *data2) | ||
140 | { | 174 | { |
141 | if (data1 < data2) { | 175 | struct sched_clock_data *my_scd = this_scd(); |
142 | __raw_spin_lock(&data1->lock); | 176 | u64 this_clock, remote_clock; |
143 | __raw_spin_lock(&data2->lock); | 177 | u64 *ptr, old_val, val; |
178 | |||
179 | sched_clock_local(my_scd); | ||
180 | again: | ||
181 | this_clock = my_scd->clock; | ||
182 | remote_clock = scd->clock; | ||
183 | |||
184 | /* | ||
185 | * Use the opportunity that we have both locks | ||
186 | * taken to couple the two clocks: we take the | ||
187 | * larger time as the latest time for both | ||
188 | * runqueues. (this creates monotonic movement) | ||
189 | */ | ||
190 | if (likely((s64)(remote_clock - this_clock) < 0)) { | ||
191 | ptr = &scd->clock; | ||
192 | old_val = remote_clock; | ||
193 | val = this_clock; | ||
144 | } else { | 194 | } else { |
145 | __raw_spin_lock(&data2->lock); | 195 | /* |
146 | __raw_spin_lock(&data1->lock); | 196 | * Should be rare, but possible: |
197 | */ | ||
198 | ptr = &my_scd->clock; | ||
199 | old_val = this_clock; | ||
200 | val = remote_clock; | ||
147 | } | 201 | } |
202 | |||
203 | if (cmpxchg64(ptr, old_val, val) != old_val) | ||
204 | goto again; | ||
205 | |||
206 | return val; | ||
148 | } | 207 | } |
149 | 208 | ||
209 | /* | ||
210 | * Similar to cpu_clock(), but requires local IRQs to be disabled. | ||
211 | * | ||
212 | * See cpu_clock(). | ||
213 | */ | ||
150 | u64 sched_clock_cpu(int cpu) | 214 | u64 sched_clock_cpu(int cpu) |
151 | { | 215 | { |
152 | u64 now, clock, this_clock, remote_clock; | ||
153 | struct sched_clock_data *scd; | 216 | struct sched_clock_data *scd; |
217 | u64 clock; | ||
218 | |||
219 | WARN_ON_ONCE(!irqs_disabled()); | ||
154 | 220 | ||
155 | if (sched_clock_stable) | 221 | if (sched_clock_stable) |
156 | return sched_clock(); | 222 | return sched_clock(); |
157 | 223 | ||
158 | scd = cpu_sdc(cpu); | ||
159 | |||
160 | /* | ||
161 | * Normally this is not called in NMI context - but if it is, | ||
162 | * trying to do any locking here is totally lethal. | ||
163 | */ | ||
164 | if (unlikely(in_nmi())) | ||
165 | return scd->clock; | ||
166 | |||
167 | if (unlikely(!sched_clock_running)) | 224 | if (unlikely(!sched_clock_running)) |
168 | return 0ull; | 225 | return 0ull; |
169 | 226 | ||
170 | WARN_ON_ONCE(!irqs_disabled()); | 227 | scd = cpu_sdc(cpu); |
171 | now = sched_clock(); | ||
172 | |||
173 | if (cpu != raw_smp_processor_id()) { | ||
174 | struct sched_clock_data *my_scd = this_scd(); | ||
175 | |||
176 | lock_double_clock(scd, my_scd); | ||
177 | |||
178 | this_clock = __update_sched_clock(my_scd, now); | ||
179 | remote_clock = scd->clock; | ||
180 | |||
181 | /* | ||
182 | * Use the opportunity that we have both locks | ||
183 | * taken to couple the two clocks: we take the | ||
184 | * larger time as the latest time for both | ||
185 | * runqueues. (this creates monotonic movement) | ||
186 | */ | ||
187 | if (likely((s64)(remote_clock - this_clock) < 0)) { | ||
188 | clock = this_clock; | ||
189 | scd->clock = clock; | ||
190 | } else { | ||
191 | /* | ||
192 | * Should be rare, but possible: | ||
193 | */ | ||
194 | clock = remote_clock; | ||
195 | my_scd->clock = remote_clock; | ||
196 | } | ||
197 | |||
198 | __raw_spin_unlock(&my_scd->lock); | ||
199 | } else { | ||
200 | __raw_spin_lock(&scd->lock); | ||
201 | clock = __update_sched_clock(scd, now); | ||
202 | } | ||
203 | 228 | ||
204 | __raw_spin_unlock(&scd->lock); | 229 | if (cpu != smp_processor_id()) |
230 | clock = sched_clock_remote(scd); | ||
231 | else | ||
232 | clock = sched_clock_local(scd); | ||
205 | 233 | ||
206 | return clock; | 234 | return clock; |
207 | } | 235 | } |
@@ -223,11 +251,9 @@ void sched_clock_tick(void) | |||
223 | now_gtod = ktime_to_ns(ktime_get()); | 251 | now_gtod = ktime_to_ns(ktime_get()); |
224 | now = sched_clock(); | 252 | now = sched_clock(); |
225 | 253 | ||
226 | __raw_spin_lock(&scd->lock); | ||
227 | scd->tick_raw = now; | 254 | scd->tick_raw = now; |
228 | scd->tick_gtod = now_gtod; | 255 | scd->tick_gtod = now_gtod; |
229 | __update_sched_clock(scd, now); | 256 | sched_clock_local(scd); |
230 | __raw_spin_unlock(&scd->lock); | ||
231 | } | 257 | } |
232 | 258 | ||
233 | /* | 259 | /* |
@@ -252,6 +278,47 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
252 | } | 278 | } |
253 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | 279 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); |
254 | 280 | ||
281 | /* | ||
282 | * As outlined at the top, provides a fast, high resolution, nanosecond | ||
283 | * time source that is monotonic per cpu argument and has bounded drift | ||
284 | * between cpus. | ||
285 | * | ||
286 | * ######################### BIG FAT WARNING ########################## | ||
287 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
288 | * # go backwards !! # | ||
289 | * #################################################################### | ||
290 | */ | ||
291 | u64 cpu_clock(int cpu) | ||
292 | { | ||
293 | u64 clock; | ||
294 | unsigned long flags; | ||
295 | |||
296 | local_irq_save(flags); | ||
297 | clock = sched_clock_cpu(cpu); | ||
298 | local_irq_restore(flags); | ||
299 | |||
300 | return clock; | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Similar to cpu_clock() for the current cpu. Time will only be observed | ||
305 | * to be monotonic if care is taken to only compare timestampt taken on the | ||
306 | * same CPU. | ||
307 | * | ||
308 | * See cpu_clock(). | ||
309 | */ | ||
310 | u64 local_clock(void) | ||
311 | { | ||
312 | u64 clock; | ||
313 | unsigned long flags; | ||
314 | |||
315 | local_irq_save(flags); | ||
316 | clock = sched_clock_cpu(smp_processor_id()); | ||
317 | local_irq_restore(flags); | ||
318 | |||
319 | return clock; | ||
320 | } | ||
321 | |||
255 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 322 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
256 | 323 | ||
257 | void sched_clock_init(void) | 324 | void sched_clock_init(void) |
@@ -267,17 +334,17 @@ u64 sched_clock_cpu(int cpu) | |||
267 | return sched_clock(); | 334 | return sched_clock(); |
268 | } | 335 | } |
269 | 336 | ||
270 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 337 | u64 cpu_clock(int cpu) |
338 | { | ||
339 | return sched_clock_cpu(cpu); | ||
340 | } | ||
271 | 341 | ||
272 | unsigned long long cpu_clock(int cpu) | 342 | u64 local_clock(void) |
273 | { | 343 | { |
274 | unsigned long long clock; | 344 | return sched_clock_cpu(0); |
275 | unsigned long flags; | 345 | } |
276 | 346 | ||
277 | local_irq_save(flags); | 347 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
278 | clock = sched_clock_cpu(cpu); | ||
279 | local_irq_restore(flags); | ||
280 | 348 | ||
281 | return clock; | ||
282 | } | ||
283 | EXPORT_SYMBOL_GPL(cpu_clock); | 349 | EXPORT_SYMBOL_GPL(cpu_clock); |
350 | EXPORT_SYMBOL_GPL(local_clock); | ||