diff options
-rw-r--r-- | arch/x86/include/asm/timer.h | 59 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 112 |
2 files changed, 61 insertions, 110 deletions
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 10a78c037910..b4c667693a21 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -13,66 +13,7 @@ extern int recalibrate_cpu_khz(void); | |||
13 | 13 | ||
14 | extern int no_timer_check; | 14 | extern int no_timer_check; |
15 | 15 | ||
16 | /* Accelerators for sched_clock() | ||
17 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
18 | * basic equation: | ||
19 | * ns = cycles / (freq / ns_per_sec) | ||
20 | * ns = cycles * (ns_per_sec / freq) | ||
21 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
22 | * ns = cycles * (10^6 / cpu_khz) | ||
23 | * | ||
24 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
25 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
26 | * ns = cycles * cyc2ns_scale / SC | ||
27 | * | ||
28 | * And since SC is a constant power of two, we can convert the div | ||
29 | * into a shift. | ||
30 | * | ||
31 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
32 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
33 | * (mathieu.desnoyers@polymtl.ca) | ||
34 | * | ||
35 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
36 | * | ||
37 | * In: | ||
38 | * | ||
39 | * ns = cycles * cyc2ns_scale / SC | ||
40 | * | ||
41 | * Although we may still have enough bits to store the value of ns, | ||
42 | * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, | ||
43 | * leading to an incorrect result. | ||
44 | * | ||
45 | * To avoid this, we can decompose 'cycles' into quotient and remainder | ||
46 | * of division by SC. Then, | ||
47 | * | ||
48 | * ns = (quot * SC + rem) * cyc2ns_scale / SC | ||
49 | * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC | ||
50 | * | ||
51 | * - sqazi@google.com | ||
52 | */ | ||
53 | |||
54 | DECLARE_PER_CPU(unsigned long, cyc2ns); | 16 | DECLARE_PER_CPU(unsigned long, cyc2ns); |
55 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | 17 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); |
56 | 18 | ||
57 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
58 | |||
59 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | ||
60 | { | ||
61 | unsigned long long ns = this_cpu_read(cyc2ns_offset); | ||
62 | ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR); | ||
63 | return ns; | ||
64 | } | ||
65 | |||
66 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
67 | { | ||
68 | unsigned long long ns; | ||
69 | unsigned long flags; | ||
70 | |||
71 | local_irq_save(flags); | ||
72 | ns = __cycles_2_ns(cyc); | ||
73 | local_irq_restore(flags); | ||
74 | |||
75 | return ns; | ||
76 | } | ||
77 | |||
78 | #endif /* _ASM_X86_TIMER_H */ | 19 | #endif /* _ASM_X86_TIMER_H */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 930e5d48f560..b4a04ac1d7aa 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -38,6 +38,66 @@ static int __read_mostly tsc_unstable; | |||
38 | static int __read_mostly tsc_disabled = -1; | 38 | static int __read_mostly tsc_disabled = -1; |
39 | 39 | ||
40 | int tsc_clocksource_reliable; | 40 | int tsc_clocksource_reliable; |
41 | |||
42 | /* Accelerators for sched_clock() | ||
43 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
44 | * basic equation: | ||
45 | * ns = cycles / (freq / ns_per_sec) | ||
46 | * ns = cycles * (ns_per_sec / freq) | ||
47 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
48 | * ns = cycles * (10^6 / cpu_khz) | ||
49 | * | ||
50 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
51 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
52 | * ns = cycles * cyc2ns_scale / SC | ||
53 | * | ||
54 | * And since SC is a constant power of two, we can convert the div | ||
55 | * into a shift. | ||
56 | * | ||
57 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
58 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
59 | * (mathieu.desnoyers@polymtl.ca) | ||
60 | * | ||
61 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
62 | */ | ||
63 | |||
64 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
65 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
66 | |||
67 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
68 | |||
69 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
70 | { | ||
71 | unsigned long long ns = this_cpu_read(cyc2ns_offset); | ||
72 | ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR); | ||
73 | return ns; | ||
74 | } | ||
75 | |||
76 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
77 | { | ||
78 | unsigned long long tsc_now, ns_now, *offset; | ||
79 | unsigned long flags, *scale; | ||
80 | |||
81 | local_irq_save(flags); | ||
82 | sched_clock_idle_sleep_event(); | ||
83 | |||
84 | scale = &per_cpu(cyc2ns, cpu); | ||
85 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
86 | |||
87 | rdtscll(tsc_now); | ||
88 | ns_now = cycles_2_ns(tsc_now); | ||
89 | |||
90 | if (cpu_khz) { | ||
91 | *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + | ||
92 | cpu_khz / 2) / cpu_khz; | ||
93 | *offset = ns_now - mult_frac(tsc_now, *scale, | ||
94 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
95 | } | ||
96 | |||
97 | sched_clock_idle_wakeup_event(0); | ||
98 | local_irq_restore(flags); | ||
99 | } | ||
100 | |||
41 | /* | 101 | /* |
42 | * Scheduler clock - returns current time in nanosec units. | 102 | * Scheduler clock - returns current time in nanosec units. |
43 | */ | 103 | */ |
@@ -62,7 +122,7 @@ u64 native_sched_clock(void) | |||
62 | rdtscll(this_offset); | 122 | rdtscll(this_offset); |
63 | 123 | ||
64 | /* return the value in ns */ | 124 | /* return the value in ns */ |
65 | return __cycles_2_ns(this_offset); | 125 | return cycles_2_ns(this_offset); |
66 | } | 126 | } |
67 | 127 | ||
68 | /* We need to define a real function for sched_clock, to override the | 128 | /* We need to define a real function for sched_clock, to override the |
@@ -589,56 +649,6 @@ int recalibrate_cpu_khz(void) | |||
589 | EXPORT_SYMBOL(recalibrate_cpu_khz); | 649 | EXPORT_SYMBOL(recalibrate_cpu_khz); |
590 | 650 | ||
591 | 651 | ||
592 | /* Accelerators for sched_clock() | ||
593 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
594 | * basic equation: | ||
595 | * ns = cycles / (freq / ns_per_sec) | ||
596 | * ns = cycles * (ns_per_sec / freq) | ||
597 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
598 | * ns = cycles * (10^6 / cpu_khz) | ||
599 | * | ||
600 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
601 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
602 | * ns = cycles * cyc2ns_scale / SC | ||
603 | * | ||
604 | * And since SC is a constant power of two, we can convert the div | ||
605 | * into a shift. | ||
606 | * | ||
607 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
608 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
609 | * (mathieu.desnoyers@polymtl.ca) | ||
610 | * | ||
611 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
612 | */ | ||
613 | |||
614 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
615 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
616 | |||
617 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
618 | { | ||
619 | unsigned long long tsc_now, ns_now, *offset; | ||
620 | unsigned long flags, *scale; | ||
621 | |||
622 | local_irq_save(flags); | ||
623 | sched_clock_idle_sleep_event(); | ||
624 | |||
625 | scale = &per_cpu(cyc2ns, cpu); | ||
626 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
627 | |||
628 | rdtscll(tsc_now); | ||
629 | ns_now = __cycles_2_ns(tsc_now); | ||
630 | |||
631 | if (cpu_khz) { | ||
632 | *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + | ||
633 | cpu_khz / 2) / cpu_khz; | ||
634 | *offset = ns_now - mult_frac(tsc_now, *scale, | ||
635 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
636 | } | ||
637 | |||
638 | sched_clock_idle_wakeup_event(0); | ||
639 | local_irq_restore(flags); | ||
640 | } | ||
641 | |||
642 | static unsigned long long cyc2ns_suspend; | 652 | static unsigned long long cyc2ns_suspend; |
643 | 653 | ||
644 | void tsc_save_sched_clock_state(void) | 654 | void tsc_save_sched_clock_state(void) |