diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-05-25 04:48:51 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-06-09 04:34:49 -0400 |
| commit | c676329abb2b8359d9a5d734dec0c81779823fd6 (patch) | |
| tree | b6c33715176221a87100228399c2a6f5049e44ea | |
| parent | 95ae3c59fa8ad616c73745e21154b5af0fb10168 (diff) | |
sched_clock: Add local_clock() API and improve documentation
For people who otherwise get to write: cpu_clock(smp_processor_id()),
there is now: local_clock().
Also, as per suggestion from Andrew, provide some documentation on
the various clock interfaces, and minimize the unsigned long long vs
u64 mess.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jens Axboe <jaxboe@fusionio.com>
LKML-Reference: <1275052414.1645.52.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | arch/parisc/kernel/ftrace.c | 4 | ||||
| -rw-r--r-- | include/linux/sched.h | 37 | ||||
| -rw-r--r-- | kernel/lockdep.c | 2 | ||||
| -rw-r--r-- | kernel/perf_event.c | 2 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 3 | ||||
| -rw-r--r-- | kernel/sched.c | 2 | ||||
| -rw-r--r-- | kernel/sched_clock.c | 95 | ||||
| -rw-r--r-- | kernel/trace/trace_clock.c | 2 |
8 files changed, 113 insertions, 34 deletions
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 9877372ffdba..5beb97bafbb1 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c | |||
| @@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0, | |||
| 82 | unsigned long ret; | 82 | unsigned long ret; |
| 83 | 83 | ||
| 84 | pop_return_trace(&trace, &ret); | 84 | pop_return_trace(&trace, &ret); |
| 85 | trace.rettime = cpu_clock(raw_smp_processor_id()); | 85 | trace.rettime = local_clock(); |
| 86 | ftrace_graph_return(&trace); | 86 | ftrace_graph_return(&trace); |
| 87 | 87 | ||
| 88 | if (unlikely(!ret)) { | 88 | if (unlikely(!ret)) { |
| @@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
| 126 | return; | 126 | return; |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | calltime = cpu_clock(raw_smp_processor_id()); | 129 | calltime = local_clock(); |
| 130 | 130 | ||
| 131 | if (push_return_trace(old, calltime, | 131 | if (push_return_trace(old, calltime, |
| 132 | self_addr, &trace.depth) == -EBUSY) { | 132 | self_addr, &trace.depth) == -EBUSY) { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index edc3dd168d87..c2d4316a04bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1791,20 +1791,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) | |||
| 1791 | #endif | 1791 | #endif |
| 1792 | 1792 | ||
| 1793 | /* | 1793 | /* |
| 1794 | * Architectures can set this to 1 if they have specified | 1794 | * Do not use outside of architecture code which knows its limitations. |
| 1795 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | 1795 | * |
| 1796 | * but then during bootup it turns out that sched_clock() | 1796 | * sched_clock() has no promise of monotonicity or bounded drift between |
| 1797 | * is reliable after all: | 1797 | * CPUs, use (which you should not) requires disabling IRQs. |
| 1798 | * | ||
| 1799 | * Please use one of the three interfaces below. | ||
| 1798 | */ | 1800 | */ |
| 1799 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | ||
| 1800 | extern int sched_clock_stable; | ||
| 1801 | #endif | ||
| 1802 | |||
| 1803 | /* ftrace calls sched_clock() directly */ | ||
| 1804 | extern unsigned long long notrace sched_clock(void); | 1801 | extern unsigned long long notrace sched_clock(void); |
| 1802 | /* | ||
| 1803 | * See the comment in kernel/sched_clock.c | ||
| 1804 | */ | ||
| 1805 | extern u64 cpu_clock(int cpu); | ||
| 1806 | extern u64 local_clock(void); | ||
| 1807 | extern u64 sched_clock_cpu(int cpu); | ||
| 1808 | |||
| 1805 | 1809 | ||
| 1806 | extern void sched_clock_init(void); | 1810 | extern void sched_clock_init(void); |
| 1807 | extern u64 sched_clock_cpu(int cpu); | ||
| 1808 | 1811 | ||
| 1809 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | 1812 | #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK |
| 1810 | static inline void sched_clock_tick(void) | 1813 | static inline void sched_clock_tick(void) |
| @@ -1819,17 +1822,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
| 1819 | { | 1822 | { |
| 1820 | } | 1823 | } |
| 1821 | #else | 1824 | #else |
| 1825 | /* | ||
| 1826 | * Architectures can set this to 1 if they have specified | ||
| 1827 | * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, | ||
| 1828 | * but then during bootup it turns out that sched_clock() | ||
| 1829 | * is reliable after all: | ||
| 1830 | */ | ||
| 1831 | extern int sched_clock_stable; | ||
| 1832 | |||
| 1822 | extern void sched_clock_tick(void); | 1833 | extern void sched_clock_tick(void); |
| 1823 | extern void sched_clock_idle_sleep_event(void); | 1834 | extern void sched_clock_idle_sleep_event(void); |
| 1824 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1835 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
| 1825 | #endif | 1836 | #endif |
| 1826 | 1837 | ||
| 1827 | /* | ||
| 1828 | * For kernel-internal use: high-speed (but slightly incorrect) per-cpu | ||
| 1829 | * clock constructed from sched_clock(): | ||
| 1830 | */ | ||
| 1831 | extern unsigned long long cpu_clock(int cpu); | ||
| 1832 | |||
| 1833 | extern unsigned long long | 1838 | extern unsigned long long |
| 1834 | task_sched_runtime(struct task_struct *task); | 1839 | task_sched_runtime(struct task_struct *task); |
| 1835 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); | 1840 | extern unsigned long long thread_group_sched_runtime(struct task_struct *task); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 54286798c37b..f2852a510232 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
| @@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], | |||
| 146 | 146 | ||
| 147 | static inline u64 lockstat_clock(void) | 147 | static inline u64 lockstat_clock(void) |
| 148 | { | 148 | { |
| 149 | return cpu_clock(smp_processor_id()); | 149 | return local_clock(); |
| 150 | } | 150 | } |
| 151 | 151 | ||
| 152 | static int lock_point(unsigned long points[], unsigned long ip) | 152 | static int lock_point(unsigned long points[], unsigned long ip) |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 31d6afe92594..109c5ec88933 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
| @@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
| 214 | 214 | ||
| 215 | static inline u64 perf_clock(void) | 215 | static inline u64 perf_clock(void) |
| 216 | { | 216 | { |
| 217 | return cpu_clock(raw_smp_processor_id()); | 217 | return local_clock(); |
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | /* | 220 | /* |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 6535ac8bc6a5..2e2726d790b9 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -239,8 +239,7 @@ static unsigned long | |||
| 239 | rcu_random(struct rcu_random_state *rrsp) | 239 | rcu_random(struct rcu_random_state *rrsp) |
| 240 | { | 240 | { |
| 241 | if (--rrsp->rrs_count < 0) { | 241 | if (--rrsp->rrs_count < 0) { |
| 242 | rrsp->rrs_state += | 242 | rrsp->rrs_state += (unsigned long)local_clock(); |
| 243 | (unsigned long)cpu_clock(raw_smp_processor_id()); | ||
| 244 | rrsp->rrs_count = RCU_RANDOM_REFRESH; | 243 | rrsp->rrs_count = RCU_RANDOM_REFRESH; |
| 245 | } | 244 | } |
| 246 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; | 245 | rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; |
diff --git a/kernel/sched.c b/kernel/sched.c index 8f351c56567f..3abd8f780dae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1647,7 +1647,7 @@ static void update_shares(struct sched_domain *sd) | |||
| 1647 | if (root_task_group_empty()) | 1647 | if (root_task_group_empty()) |
| 1648 | return; | 1648 | return; |
| 1649 | 1649 | ||
| 1650 | now = cpu_clock(raw_smp_processor_id()); | 1650 | now = local_clock(); |
| 1651 | elapsed = now - sd->last_update; | 1651 | elapsed = now - sd->last_update; |
| 1652 | 1652 | ||
| 1653 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { | 1653 | if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { |
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 906a0f718cb3..52f1a149bfb1 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
| @@ -10,19 +10,55 @@ | |||
| 10 | * Ingo Molnar <mingo@redhat.com> | 10 | * Ingo Molnar <mingo@redhat.com> |
| 11 | * Guillaume Chazarain <guichaz@gmail.com> | 11 | * Guillaume Chazarain <guichaz@gmail.com> |
| 12 | * | 12 | * |
| 13 | * Create a semi stable clock from a mixture of other events, including: | 13 | * |
| 14 | * - gtod | 14 | * What: |
| 15 | * | ||
| 16 | * cpu_clock(i) provides a fast (execution time) high resolution | ||
| 17 | * clock with bounded drift between CPUs. The value of cpu_clock(i) | ||
| 18 | * is monotonic for constant i. The timestamp returned is in nanoseconds. | ||
| 19 | * | ||
| 20 | * ######################### BIG FAT WARNING ########################## | ||
| 21 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
| 22 | * # go backwards !! # | ||
| 23 | * #################################################################### | ||
| 24 | * | ||
| 25 | * There is no strict promise about the base, although it tends to start | ||
| 26 | * at 0 on boot (but people really shouldn't rely on that). | ||
| 27 | * | ||
| 28 | * cpu_clock(i) -- can be used from any context, including NMI. | ||
| 29 | * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI) | ||
| 30 | * local_clock() -- is cpu_clock() on the current cpu. | ||
| 31 | * | ||
| 32 | * How: | ||
| 33 | * | ||
| 34 | * The implementation either uses sched_clock() when | ||
| 35 | * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the | ||
| 36 | * sched_clock() is assumed to provide these properties (mostly it means | ||
| 37 | * the architecture provides a globally synchronized highres time source). | ||
| 38 | * | ||
| 39 | * Otherwise it tries to create a semi stable clock from a mixture of other | ||
| 40 | * clocks, including: | ||
| 41 | * | ||
| 42 | * - GTOD (clock monotomic) | ||
| 15 | * - sched_clock() | 43 | * - sched_clock() |
| 16 | * - explicit idle events | 44 | * - explicit idle events |
| 17 | * | 45 | * |
| 18 | * We use gtod as base and the unstable clock deltas. The deltas are filtered, | 46 | * We use GTOD as base and use sched_clock() deltas to improve resolution. The |
| 19 | * making it monotonic and keeping it within an expected window. | 47 | * deltas are filtered to provide monotonicity and keeping it within an |
| 48 | * expected window. | ||
| 20 | * | 49 | * |
| 21 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time | 50 | * Furthermore, explicit sleep and wakeup hooks allow us to account for time |
| 22 | * that is otherwise invisible (TSC gets stopped). | 51 | * that is otherwise invisible (TSC gets stopped). |
| 23 | * | 52 | * |
| 24 | * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat | 53 | * |
| 25 | * consistent between cpus (never more than 2 jiffies difference). | 54 | * Notes: |
| 55 | * | ||
| 56 | * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things | ||
| 57 | * like cpufreq interrupts that can change the base clock (TSC) multiplier | ||
| 58 | * and cause funny jumps in time -- although the filtering provided by | ||
| 59 | * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it | ||
| 60 | * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on | ||
| 61 | * sched_clock(). | ||
| 26 | */ | 62 | */ |
| 27 | #include <linux/spinlock.h> | 63 | #include <linux/spinlock.h> |
| 28 | #include <linux/hardirq.h> | 64 | #include <linux/hardirq.h> |
| @@ -170,6 +206,11 @@ again: | |||
| 170 | return val; | 206 | return val; |
| 171 | } | 207 | } |
| 172 | 208 | ||
| 209 | /* | ||
| 210 | * Similar to cpu_clock(), but requires local IRQs to be disabled. | ||
| 211 | * | ||
| 212 | * See cpu_clock(). | ||
| 213 | */ | ||
| 173 | u64 sched_clock_cpu(int cpu) | 214 | u64 sched_clock_cpu(int cpu) |
| 174 | { | 215 | { |
| 175 | struct sched_clock_data *scd; | 216 | struct sched_clock_data *scd; |
| @@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) | |||
| 237 | } | 278 | } |
| 238 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | 279 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); |
| 239 | 280 | ||
| 240 | unsigned long long cpu_clock(int cpu) | 281 | /* |
| 282 | * As outlined at the top, provides a fast, high resolution, nanosecond | ||
| 283 | * time source that is monotonic per cpu argument and has bounded drift | ||
| 284 | * between cpus. | ||
| 285 | * | ||
| 286 | * ######################### BIG FAT WARNING ########################## | ||
| 287 | * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can # | ||
| 288 | * # go backwards !! # | ||
| 289 | * #################################################################### | ||
| 290 | */ | ||
| 291 | u64 cpu_clock(int cpu) | ||
| 241 | { | 292 | { |
| 242 | unsigned long long clock; | 293 | u64 clock; |
| 243 | unsigned long flags; | 294 | unsigned long flags; |
| 244 | 295 | ||
| 245 | local_irq_save(flags); | 296 | local_irq_save(flags); |
| @@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu) | |||
| 249 | return clock; | 300 | return clock; |
| 250 | } | 301 | } |
| 251 | 302 | ||
| 303 | /* | ||
| 304 | * Similar to cpu_clock() for the current cpu. Time will only be observed | ||
| 305 | * to be monotonic if care is taken to only compare timestampt taken on the | ||
| 306 | * same CPU. | ||
| 307 | * | ||
| 308 | * See cpu_clock(). | ||
| 309 | */ | ||
| 310 | u64 local_clock(void) | ||
| 311 | { | ||
| 312 | u64 clock; | ||
| 313 | unsigned long flags; | ||
| 314 | |||
| 315 | local_irq_save(flags); | ||
| 316 | clock = sched_clock_cpu(smp_processor_id()); | ||
| 317 | local_irq_restore(flags); | ||
| 318 | |||
| 319 | return clock; | ||
| 320 | } | ||
| 321 | |||
| 252 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 322 | #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
| 253 | 323 | ||
| 254 | void sched_clock_init(void) | 324 | void sched_clock_init(void) |
| @@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu) | |||
| 264 | return sched_clock(); | 334 | return sched_clock(); |
| 265 | } | 335 | } |
| 266 | 336 | ||
| 267 | 337 | u64 cpu_clock(int cpu) | |
| 268 | unsigned long long cpu_clock(int cpu) | ||
| 269 | { | 338 | { |
| 270 | return sched_clock_cpu(cpu); | 339 | return sched_clock_cpu(cpu); |
| 271 | } | 340 | } |
| 272 | 341 | ||
| 342 | u64 local_clock(void) | ||
| 343 | { | ||
| 344 | return sched_clock_cpu(0); | ||
| 345 | } | ||
| 346 | |||
| 273 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ | 347 | #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ |
| 274 | 348 | ||
| 275 | EXPORT_SYMBOL_GPL(cpu_clock); | 349 | EXPORT_SYMBOL_GPL(cpu_clock); |
| 350 | EXPORT_SYMBOL_GPL(local_clock); | ||
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 9d589d8dcd1a..1723e2b8c589 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
| @@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void) | |||
| 56 | */ | 56 | */ |
| 57 | u64 notrace trace_clock(void) | 57 | u64 notrace trace_clock(void) |
| 58 | { | 58 | { |
| 59 | return cpu_clock(raw_smp_processor_id()); | 59 | return local_clock(); |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | 62 | ||
