aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/timer.h59
-rw-r--r--arch/x86/kernel/tsc.c112
2 files changed, 61 insertions, 110 deletions
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 10a78c037910..b4c667693a21 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -13,66 +13,7 @@ extern int recalibrate_cpu_khz(void);
13 13
14extern int no_timer_check; 14extern int no_timer_check;
15 15
16/* Accelerators for sched_clock()
17 * convert from cycles(64bits) => nanoseconds (64bits)
18 * basic equation:
19 * ns = cycles / (freq / ns_per_sec)
20 * ns = cycles * (ns_per_sec / freq)
21 * ns = cycles * (10^9 / (cpu_khz * 10^3))
22 * ns = cycles * (10^6 / cpu_khz)
23 *
24 * Then we use scaling math (suggested by george@mvista.com) to get:
25 * ns = cycles * (10^6 * SC / cpu_khz) / SC
26 * ns = cycles * cyc2ns_scale / SC
27 *
28 * And since SC is a constant power of two, we can convert the div
29 * into a shift.
30 *
31 * We can use khz divisor instead of mhz to keep a better precision, since
32 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
33 * (mathieu.desnoyers@polymtl.ca)
34 *
35 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
36 *
37 * In:
38 *
39 * ns = cycles * cyc2ns_scale / SC
40 *
41 * Although we may still have enough bits to store the value of ns,
42 * in some cases, we may not have enough bits to store cycles * cyc2ns_scale,
43 * leading to an incorrect result.
44 *
45 * To avoid this, we can decompose 'cycles' into quotient and remainder
46 * of division by SC. Then,
47 *
48 * ns = (quot * SC + rem) * cyc2ns_scale / SC
49 * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC
50 *
51 * - sqazi@google.com
52 */
53
54DECLARE_PER_CPU(unsigned long, cyc2ns); 16DECLARE_PER_CPU(unsigned long, cyc2ns);
55DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); 17DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
56 18
57#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
58
59static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
60{
61 unsigned long long ns = this_cpu_read(cyc2ns_offset);
62 ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
63 return ns;
64}
65
66static inline unsigned long long cycles_2_ns(unsigned long long cyc)
67{
68 unsigned long long ns;
69 unsigned long flags;
70
71 local_irq_save(flags);
72 ns = __cycles_2_ns(cyc);
73 local_irq_restore(flags);
74
75 return ns;
76}
77
78#endif /* _ASM_X86_TIMER_H */ 19#endif /* _ASM_X86_TIMER_H */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 930e5d48f560..b4a04ac1d7aa 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -38,6 +38,66 @@ static int __read_mostly tsc_unstable;
38static int __read_mostly tsc_disabled = -1; 38static int __read_mostly tsc_disabled = -1;
39 39
40int tsc_clocksource_reliable; 40int tsc_clocksource_reliable;
41
42/* Accelerators for sched_clock()
43 * convert from cycles(64bits) => nanoseconds (64bits)
44 * basic equation:
45 * ns = cycles / (freq / ns_per_sec)
46 * ns = cycles * (ns_per_sec / freq)
47 * ns = cycles * (10^9 / (cpu_khz * 10^3))
48 * ns = cycles * (10^6 / cpu_khz)
49 *
50 * Then we use scaling math (suggested by george@mvista.com) to get:
51 * ns = cycles * (10^6 * SC / cpu_khz) / SC
52 * ns = cycles * cyc2ns_scale / SC
53 *
54 * And since SC is a constant power of two, we can convert the div
55 * into a shift.
56 *
57 * We can use khz divisor instead of mhz to keep a better precision, since
58 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
59 * (mathieu.desnoyers@polymtl.ca)
60 *
61 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
62 */
63
64DEFINE_PER_CPU(unsigned long, cyc2ns);
65DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
66
67#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
68
69static inline unsigned long long cycles_2_ns(unsigned long long cyc)
70{
71 unsigned long long ns = this_cpu_read(cyc2ns_offset);
72 ns += mul_u64_u32_shr(cyc, this_cpu_read(cyc2ns), CYC2NS_SCALE_FACTOR);
73 return ns;
74}
75
76static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
77{
78 unsigned long long tsc_now, ns_now, *offset;
79 unsigned long flags, *scale;
80
81 local_irq_save(flags);
82 sched_clock_idle_sleep_event();
83
84 scale = &per_cpu(cyc2ns, cpu);
85 offset = &per_cpu(cyc2ns_offset, cpu);
86
87 rdtscll(tsc_now);
88 ns_now = cycles_2_ns(tsc_now);
89
90 if (cpu_khz) {
91 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
92 cpu_khz / 2) / cpu_khz;
93 *offset = ns_now - mult_frac(tsc_now, *scale,
94 (1UL << CYC2NS_SCALE_FACTOR));
95 }
96
97 sched_clock_idle_wakeup_event(0);
98 local_irq_restore(flags);
99}
100
41/* 101/*
42 * Scheduler clock - returns current time in nanosec units. 102 * Scheduler clock - returns current time in nanosec units.
43 */ 103 */
@@ -62,7 +122,7 @@ u64 native_sched_clock(void)
62 rdtscll(this_offset); 122 rdtscll(this_offset);
63 123
64 /* return the value in ns */ 124 /* return the value in ns */
65 return __cycles_2_ns(this_offset); 125 return cycles_2_ns(this_offset);
66} 126}
67 127
68/* We need to define a real function for sched_clock, to override the 128/* We need to define a real function for sched_clock, to override the
@@ -589,56 +649,6 @@ int recalibrate_cpu_khz(void)
589EXPORT_SYMBOL(recalibrate_cpu_khz); 649EXPORT_SYMBOL(recalibrate_cpu_khz);
590 650
591 651
592/* Accelerators for sched_clock()
593 * convert from cycles(64bits) => nanoseconds (64bits)
594 * basic equation:
595 * ns = cycles / (freq / ns_per_sec)
596 * ns = cycles * (ns_per_sec / freq)
597 * ns = cycles * (10^9 / (cpu_khz * 10^3))
598 * ns = cycles * (10^6 / cpu_khz)
599 *
600 * Then we use scaling math (suggested by george@mvista.com) to get:
601 * ns = cycles * (10^6 * SC / cpu_khz) / SC
602 * ns = cycles * cyc2ns_scale / SC
603 *
604 * And since SC is a constant power of two, we can convert the div
605 * into a shift.
606 *
607 * We can use khz divisor instead of mhz to keep a better precision, since
608 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
609 * (mathieu.desnoyers@polymtl.ca)
610 *
611 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
612 */
613
614DEFINE_PER_CPU(unsigned long, cyc2ns);
615DEFINE_PER_CPU(unsigned long long, cyc2ns_offset);
616
617static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
618{
619 unsigned long long tsc_now, ns_now, *offset;
620 unsigned long flags, *scale;
621
622 local_irq_save(flags);
623 sched_clock_idle_sleep_event();
624
625 scale = &per_cpu(cyc2ns, cpu);
626 offset = &per_cpu(cyc2ns_offset, cpu);
627
628 rdtscll(tsc_now);
629 ns_now = __cycles_2_ns(tsc_now);
630
631 if (cpu_khz) {
632 *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
633 cpu_khz / 2) / cpu_khz;
634 *offset = ns_now - mult_frac(tsc_now, *scale,
635 (1UL << CYC2NS_SCALE_FACTOR));
636 }
637
638 sched_clock_idle_wakeup_event(0);
639 local_irq_restore(flags);
640}
641
642static unsigned long long cyc2ns_suspend; 652static unsigned long long cyc2ns_suspend;
643 653
644void tsc_save_sched_clock_state(void) 654void tsc_save_sched_clock_state(void)