diff options
Diffstat (limited to 'arch/x86_64/kernel/tsc.c')
-rw-r--r-- | arch/x86_64/kernel/tsc.c | 212 |
1 files changed, 212 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c new file mode 100644 index 000000000000..2dbac15ab1f0 --- /dev/null +++ b/arch/x86_64/kernel/tsc.c | |||
@@ -0,0 +1,212 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/interrupt.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/clocksource.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <linux/acpi.h> | ||
8 | #include <linux/cpufreq.h> | ||
9 | |||
10 | #include <asm/timex.h> | ||
11 | |||
12 | int notsc __initdata = 0; | ||
13 | |||
14 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | ||
15 | EXPORT_SYMBOL(cpu_khz); | ||
16 | |||
17 | /* | ||
18 | * do_gettimeoffset() returns microseconds since last timer interrupt was | ||
19 | * triggered by hardware. A memory read of HPET is slower than a register read | ||
20 | * of TSC, but much more reliable. It's also synchronized to the timer | ||
21 | * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a | ||
22 | * timer interrupt has happened already, but vxtime.trigger wasn't updated yet. | ||
23 | * This is not a problem, because jiffies hasn't updated either. They are bound | ||
24 | * together by xtime_lock. | ||
25 | */ | ||
26 | |||
27 | unsigned int do_gettimeoffset_tsc(void) | ||
28 | { | ||
29 | unsigned long t; | ||
30 | unsigned long x; | ||
31 | t = get_cycles_sync(); | ||
32 | if (t < vxtime.last_tsc) | ||
33 | t = vxtime.last_tsc; /* hack */ | ||
34 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE; | ||
35 | return x; | ||
36 | } | ||
37 | |||
38 | static unsigned int cyc2ns_scale __read_mostly; | ||
39 | |||
40 | void set_cyc2ns_scale(unsigned long khz) | ||
41 | { | ||
42 | cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; | ||
43 | } | ||
44 | |||
45 | unsigned long long cycles_2_ns(unsigned long long cyc) | ||
46 | { | ||
47 | return (cyc * cyc2ns_scale) >> NS_SCALE; | ||
48 | } | ||
49 | |||
50 | unsigned long long sched_clock(void) | ||
51 | { | ||
52 | unsigned long a = 0; | ||
53 | |||
54 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, | ||
55 | * which means it is not completely exact and may not be monotonous | ||
56 | * between CPUs. But the errors should be too small to matter for | ||
57 | * scheduling purposes. | ||
58 | */ | ||
59 | |||
60 | rdtscll(a); | ||
61 | return cycles_2_ns(a); | ||
62 | } | ||
63 | |||
64 | #ifdef CONFIG_CPU_FREQ | ||
65 | |||
66 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | ||
67 | * changes. | ||
68 | * | ||
69 | * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's | ||
70 | * not that important because current Opteron setups do not support | ||
71 | * scaling on SMP anyroads. | ||
72 | * | ||
73 | * Should fix up last_tsc too. Currently gettimeofday in the | ||
74 | * first tick after the change will be slightly wrong. | ||
75 | */ | ||
76 | |||
77 | #include <linux/workqueue.h> | ||
78 | |||
79 | static unsigned int cpufreq_delayed_issched = 0; | ||
80 | static unsigned int cpufreq_init = 0; | ||
81 | static struct work_struct cpufreq_delayed_get_work; | ||
82 | |||
83 | static void handle_cpufreq_delayed_get(struct work_struct *v) | ||
84 | { | ||
85 | unsigned int cpu; | ||
86 | for_each_online_cpu(cpu) { | ||
87 | cpufreq_get(cpu); | ||
88 | } | ||
89 | cpufreq_delayed_issched = 0; | ||
90 | } | ||
91 | |||
92 | /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries | ||
93 | * to verify the CPU frequency the timing core thinks the CPU is running | ||
94 | * at is still correct. | ||
95 | */ | ||
96 | void cpufreq_delayed_get(void) | ||
97 | { | ||
98 | static int warned; | ||
99 | if (cpufreq_init && !cpufreq_delayed_issched) { | ||
100 | cpufreq_delayed_issched = 1; | ||
101 | if (!warned) { | ||
102 | warned = 1; | ||
103 | printk(KERN_DEBUG "Losing some ticks... " | ||
104 | "checking if CPU frequency changed.\n"); | ||
105 | } | ||
106 | schedule_work(&cpufreq_delayed_get_work); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | static unsigned int ref_freq = 0; | ||
111 | static unsigned long loops_per_jiffy_ref = 0; | ||
112 | |||
113 | static unsigned long cpu_khz_ref = 0; | ||
114 | |||
115 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
116 | void *data) | ||
117 | { | ||
118 | struct cpufreq_freqs *freq = data; | ||
119 | unsigned long *lpj, dummy; | ||
120 | |||
121 | if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) | ||
122 | return 0; | ||
123 | |||
124 | lpj = &dummy; | ||
125 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
126 | #ifdef CONFIG_SMP | ||
127 | lpj = &cpu_data[freq->cpu].loops_per_jiffy; | ||
128 | #else | ||
129 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
130 | #endif | ||
131 | |||
132 | if (!ref_freq) { | ||
133 | ref_freq = freq->old; | ||
134 | loops_per_jiffy_ref = *lpj; | ||
135 | cpu_khz_ref = cpu_khz; | ||
136 | } | ||
137 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
138 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
139 | (val == CPUFREQ_RESUMECHANGE)) { | ||
140 | *lpj = | ||
141 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
142 | |||
143 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | ||
144 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
145 | vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz; | ||
146 | } | ||
147 | |||
148 | set_cyc2ns_scale(cpu_khz_ref); | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static struct notifier_block time_cpufreq_notifier_block = { | ||
154 | .notifier_call = time_cpufreq_notifier | ||
155 | }; | ||
156 | |||
157 | static int __init cpufreq_tsc(void) | ||
158 | { | ||
159 | INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get); | ||
160 | if (!cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
161 | CPUFREQ_TRANSITION_NOTIFIER)) | ||
162 | cpufreq_init = 1; | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | core_initcall(cpufreq_tsc); | ||
167 | |||
168 | #endif | ||
169 | |||
170 | static int tsc_unstable = 0; | ||
171 | |||
172 | void mark_tsc_unstable(void) | ||
173 | { | ||
174 | tsc_unstable = 1; | ||
175 | } | ||
176 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
177 | |||
178 | /* | ||
179 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
180 | * over all CPUs. | ||
181 | */ | ||
182 | __cpuinit int unsynchronized_tsc(void) | ||
183 | { | ||
184 | if (tsc_unstable) | ||
185 | return 1; | ||
186 | |||
187 | #ifdef CONFIG_SMP | ||
188 | if (apic_is_clustered_box()) | ||
189 | return 1; | ||
190 | #endif | ||
191 | /* Most intel systems have synchronized TSCs except for | ||
192 | multi node systems */ | ||
193 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { | ||
194 | #ifdef CONFIG_ACPI | ||
195 | /* But TSC doesn't tick in C3 so don't use it there */ | ||
196 | if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000) | ||
197 | return 1; | ||
198 | #endif | ||
199 | return 0; | ||
200 | } | ||
201 | |||
202 | /* Assume multi socket systems are not synchronized */ | ||
203 | return num_present_cpus() > 1; | ||
204 | } | ||
205 | |||
206 | int __init notsc_setup(char *s) | ||
207 | { | ||
208 | notsc = 1; | ||
209 | return 1; | ||
210 | } | ||
211 | |||
212 | __setup("notsc", notsc_setup); | ||