aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorjohn stultz <johnstul@us.ibm.com>2007-02-16 04:28:19 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:14:00 -0500
commitc37e7bb5d2ce36ef377caabfced0b132bb1bf6a7 (patch)
tree3cc175f6922e7921bad5e588dd3046db1a67f996 /arch/x86_64/kernel
parent2d0c87c3bc49c60ab5bbac401fb1ef37ff10bbe2 (diff)
[PATCH] time: x86_64: split x86_64/kernel/time.c up
In preparation for the x86_64 generic time conversion, this patch splits out TSC and HPET related code from arch/x86_64/kernel/time.c into respective hpet.c and tsc.c files. [akpm@osdl.org: fix printk timestamps] [akpm@osdl.org: cleanup] Signed-off-by: John Stultz <johnstul@us.ibm.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andi Kleen <ak@muc.de> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile2
-rw-r--r--arch/x86_64/kernel/hpet.c453
-rw-r--r--arch/x86_64/kernel/time.c664
-rw-r--r--arch/x86_64/kernel/tsc.c212
4 files changed, 670 insertions, 661 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 6465eee6d920..bb47e86f3d02 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
9 x8664_ksyms.o i387.o syscall.o vsyscall.o \ 9 x8664_ksyms.o i387.o syscall.o vsyscall.o \
10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ 10 setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
11 pci-dma.o pci-nommu.o alternative.o 11 pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o
12 12
13obj-$(CONFIG_STACKTRACE) += stacktrace.o 13obj-$(CONFIG_STACKTRACE) += stacktrace.o
14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o 14obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o
diff --git a/arch/x86_64/kernel/hpet.c b/arch/x86_64/kernel/hpet.c
new file mode 100644
index 000000000000..08ab37c966c2
--- /dev/null
+++ b/arch/x86_64/kernel/hpet.c
@@ -0,0 +1,453 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/init.h>
4#include <linux/mc146818rtc.h>
5#include <linux/time.h>
6#include <linux/clocksource.h>
7#include <linux/ioport.h>
8#include <linux/acpi.h>
9#include <linux/hpet.h>
10#include <asm/pgtable.h>
11#include <asm/vsyscall.h>
12#include <asm/timex.h>
13#include <asm/hpet.h>
14
15int nohpet __initdata;
16
17unsigned long hpet_address;
18unsigned long hpet_period; /* fsecs / HPET clock */
19unsigned long hpet_tick; /* HPET clocks / interrupt */
20
21int hpet_use_timer; /* Use counter of hpet for time keeping,
22 * otherwise PIT
23 */
24unsigned int do_gettimeoffset_hpet(void)
25{
26 /* cap counter read to one tick to avoid inconsistencies */
27 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
28 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
29}
30
31#ifdef CONFIG_HPET
32static __init int late_hpet_init(void)
33{
34 struct hpet_data hd;
35 unsigned int ntimer;
36
37 if (!hpet_address)
38 return 0;
39
40 memset(&hd, 0, sizeof(hd));
41
42 ntimer = hpet_readl(HPET_ID);
43 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
44 ntimer++;
45
46 /*
47 * Register with driver.
48 * Timer0 and Timer1 is used by platform.
49 */
50 hd.hd_phys_address = hpet_address;
51 hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
52 hd.hd_nirqs = ntimer;
53 hd.hd_flags = HPET_DATA_PLATFORM;
54 hpet_reserve_timer(&hd, 0);
55#ifdef CONFIG_HPET_EMULATE_RTC
56 hpet_reserve_timer(&hd, 1);
57#endif
58 hd.hd_irq[0] = HPET_LEGACY_8254;
59 hd.hd_irq[1] = HPET_LEGACY_RTC;
60 if (ntimer > 2) {
61 struct hpet *hpet;
62 struct hpet_timer *timer;
63 int i;
64
65 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
66 timer = &hpet->hpet_timers[2];
67 for (i = 2; i < ntimer; timer++, i++)
68 hd.hd_irq[i] = (timer->hpet_config &
69 Tn_INT_ROUTE_CNF_MASK) >>
70 Tn_INT_ROUTE_CNF_SHIFT;
71
72 }
73
74 hpet_alloc(&hd);
75 return 0;
76}
77fs_initcall(late_hpet_init);
78#endif
79
80int hpet_timer_stop_set_go(unsigned long tick)
81{
82 unsigned int cfg;
83
84/*
85 * Stop the timers and reset the main counter.
86 */
87
88 cfg = hpet_readl(HPET_CFG);
89 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
90 hpet_writel(cfg, HPET_CFG);
91 hpet_writel(0, HPET_COUNTER);
92 hpet_writel(0, HPET_COUNTER + 4);
93
94/*
95 * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
96 * and period also hpet_tick.
97 */
98 if (hpet_use_timer) {
99 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
100 HPET_TN_32BIT, HPET_T0_CFG);
101 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
102 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
103 cfg |= HPET_CFG_LEGACY;
104 }
105/*
106 * Go!
107 */
108
109 cfg |= HPET_CFG_ENABLE;
110 hpet_writel(cfg, HPET_CFG);
111
112 return 0;
113}
114
115int hpet_arch_init(void)
116{
117 unsigned int id;
118
119 if (!hpet_address)
120 return -1;
121 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
122 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
123
124/*
125 * Read the period, compute tick and quotient.
126 */
127
128 id = hpet_readl(HPET_ID);
129
130 if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
131 return -1;
132
133 hpet_period = hpet_readl(HPET_PERIOD);
134 if (hpet_period < 100000 || hpet_period > 100000000)
135 return -1;
136
137 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
138
139 hpet_use_timer = (id & HPET_ID_LEGSUP);
140
141 return hpet_timer_stop_set_go(hpet_tick);
142}
143
144int hpet_reenable(void)
145{
146 return hpet_timer_stop_set_go(hpet_tick);
147}
148
149/*
150 * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
151 * it to the HPET timer of known frequency.
152 */
153
154#define TICK_COUNT 100000000
155#define TICK_MIN 5000
156
157/*
158 * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
159 * occurs between the reads of the hpet & TSC.
160 */
161static void __init read_hpet_tsc(int *hpet, int *tsc)
162{
163 int tsc1, tsc2, hpet1;
164
165 do {
166 tsc1 = get_cycles_sync();
167 hpet1 = hpet_readl(HPET_COUNTER);
168 tsc2 = get_cycles_sync();
169 } while (tsc2 - tsc1 > TICK_MIN);
170 *hpet = hpet1;
171 *tsc = tsc2;
172}
173
174unsigned int __init hpet_calibrate_tsc(void)
175{
176 int tsc_start, hpet_start;
177 int tsc_now, hpet_now;
178 unsigned long flags;
179
180 local_irq_save(flags);
181
182 read_hpet_tsc(&hpet_start, &tsc_start);
183
184 do {
185 local_irq_disable();
186 read_hpet_tsc(&hpet_now, &tsc_now);
187 local_irq_restore(flags);
188 } while ((tsc_now - tsc_start) < TICK_COUNT &&
189 (hpet_now - hpet_start) < TICK_COUNT);
190
191 return (tsc_now - tsc_start) * 1000000000L
192 / ((hpet_now - hpet_start) * hpet_period / 1000);
193}
194
195#ifdef CONFIG_HPET_EMULATE_RTC
196/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
197 * is enabled, we support RTC interrupt functionality in software.
198 * RTC has 3 kinds of interrupts:
199 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
200 * is updated
201 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
202 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
203 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
204 * (1) and (2) above are implemented using polling at a frequency of
205 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
206 * overhead. (DEFAULT_RTC_INT_FREQ)
207 * For (3), we use interrupts at 64Hz or user specified periodic
208 * frequency, whichever is higher.
209 */
210#include <linux/rtc.h>
211
212#define DEFAULT_RTC_INT_FREQ 64
213#define RTC_NUM_INTS 1
214
215static unsigned long UIE_on;
216static unsigned long prev_update_sec;
217
218static unsigned long AIE_on;
219static struct rtc_time alarm_time;
220
221static unsigned long PIE_on;
222static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
223static unsigned long PIE_count;
224
225static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
226static unsigned int hpet_t1_cmp; /* cached comparator register */
227
228int is_hpet_enabled(void)
229{
230 return hpet_address != 0;
231}
232
233/*
234 * Timer 1 for RTC, we do not use periodic interrupt feature,
235 * even if HPET supports periodic interrupts on Timer 1.
236 * The reason being, to set up a periodic interrupt in HPET, we need to
237 * stop the main counter. And if we do that everytime someone diables/enables
238 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
239 * So, for the time being, simulate the periodic interrupt in software.
240 *
241 * hpet_rtc_timer_init() is called for the first time and during subsequent
242 * interuppts reinit happens through hpet_rtc_timer_reinit().
243 */
244int hpet_rtc_timer_init(void)
245{
246 unsigned int cfg, cnt;
247 unsigned long flags;
248
249 if (!is_hpet_enabled())
250 return 0;
251 /*
252 * Set the counter 1 and enable the interrupts.
253 */
254 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
255 hpet_rtc_int_freq = PIE_freq;
256 else
257 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
258
259 local_irq_save(flags);
260
261 cnt = hpet_readl(HPET_COUNTER);
262 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
263 hpet_writel(cnt, HPET_T1_CMP);
264 hpet_t1_cmp = cnt;
265
266 cfg = hpet_readl(HPET_T1_CFG);
267 cfg &= ~HPET_TN_PERIODIC;
268 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
269 hpet_writel(cfg, HPET_T1_CFG);
270
271 local_irq_restore(flags);
272
273 return 1;
274}
275
276static void hpet_rtc_timer_reinit(void)
277{
278 unsigned int cfg, cnt, ticks_per_int, lost_ints;
279
280 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
281 cfg = hpet_readl(HPET_T1_CFG);
282 cfg &= ~HPET_TN_ENABLE;
283 hpet_writel(cfg, HPET_T1_CFG);
284 return;
285 }
286
287 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
288 hpet_rtc_int_freq = PIE_freq;
289 else
290 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
291
292 /* It is more accurate to use the comparator value than current count.*/
293 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
294 hpet_t1_cmp += ticks_per_int;
295 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
296
297 /*
298 * If the interrupt handler was delayed too long, the write above tries
299 * to schedule the next interrupt in the past and the hardware would
300 * not interrupt until the counter had wrapped around.
301 * So we have to check that the comparator wasn't set to a past time.
302 */
303 cnt = hpet_readl(HPET_COUNTER);
304 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
305 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
306 /* Make sure that, even with the time needed to execute
307 * this code, the next scheduled interrupt has been moved
308 * back to the future: */
309 lost_ints++;
310
311 hpet_t1_cmp += lost_ints * ticks_per_int;
312 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
313
314 if (PIE_on)
315 PIE_count += lost_ints;
316
317 if (printk_ratelimit())
318 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
319 hpet_rtc_int_freq);
320 }
321}
322
323/*
324 * The functions below are called from rtc driver.
325 * Return 0 if HPET is not being used.
326 * Otherwise do the necessary changes and return 1.
327 */
328int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
329{
330 if (!is_hpet_enabled())
331 return 0;
332
333 if (bit_mask & RTC_UIE)
334 UIE_on = 0;
335 if (bit_mask & RTC_PIE)
336 PIE_on = 0;
337 if (bit_mask & RTC_AIE)
338 AIE_on = 0;
339
340 return 1;
341}
342
343int hpet_set_rtc_irq_bit(unsigned long bit_mask)
344{
345 int timer_init_reqd = 0;
346
347 if (!is_hpet_enabled())
348 return 0;
349
350 if (!(PIE_on | AIE_on | UIE_on))
351 timer_init_reqd = 1;
352
353 if (bit_mask & RTC_UIE) {
354 UIE_on = 1;
355 }
356 if (bit_mask & RTC_PIE) {
357 PIE_on = 1;
358 PIE_count = 0;
359 }
360 if (bit_mask & RTC_AIE) {
361 AIE_on = 1;
362 }
363
364 if (timer_init_reqd)
365 hpet_rtc_timer_init();
366
367 return 1;
368}
369
370int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
371{
372 if (!is_hpet_enabled())
373 return 0;
374
375 alarm_time.tm_hour = hrs;
376 alarm_time.tm_min = min;
377 alarm_time.tm_sec = sec;
378
379 return 1;
380}
381
382int hpet_set_periodic_freq(unsigned long freq)
383{
384 if (!is_hpet_enabled())
385 return 0;
386
387 PIE_freq = freq;
388 PIE_count = 0;
389
390 return 1;
391}
392
393int hpet_rtc_dropped_irq(void)
394{
395 if (!is_hpet_enabled())
396 return 0;
397
398 return 1;
399}
400
401irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
402{
403 struct rtc_time curr_time;
404 unsigned long rtc_int_flag = 0;
405 int call_rtc_interrupt = 0;
406
407 hpet_rtc_timer_reinit();
408
409 if (UIE_on | AIE_on) {
410 rtc_get_rtc_time(&curr_time);
411 }
412 if (UIE_on) {
413 if (curr_time.tm_sec != prev_update_sec) {
414 /* Set update int info, call real rtc int routine */
415 call_rtc_interrupt = 1;
416 rtc_int_flag = RTC_UF;
417 prev_update_sec = curr_time.tm_sec;
418 }
419 }
420 if (PIE_on) {
421 PIE_count++;
422 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
423 /* Set periodic int info, call real rtc int routine */
424 call_rtc_interrupt = 1;
425 rtc_int_flag |= RTC_PF;
426 PIE_count = 0;
427 }
428 }
429 if (AIE_on) {
430 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
431 (curr_time.tm_min == alarm_time.tm_min) &&
432 (curr_time.tm_hour == alarm_time.tm_hour)) {
433 /* Set alarm int info, call real rtc int routine */
434 call_rtc_interrupt = 1;
435 rtc_int_flag |= RTC_AF;
436 }
437 }
438 if (call_rtc_interrupt) {
439 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
440 rtc_interrupt(rtc_int_flag, dev_id);
441 }
442 return IRQ_HANDLED;
443}
444#endif
445
446static int __init nohpet_setup(char *s)
447{
448 nohpet = 1;
449 return 1;
450}
451
452__setup("nohpet", nohpet_setup);
453
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 76857add2f51..216fc9d74247 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -42,9 +42,10 @@
42#include <linux/cpufreq.h> 42#include <linux/cpufreq.h>
43#include <linux/hpet.h> 43#include <linux/hpet.h>
44#include <asm/apic.h> 44#include <asm/apic.h>
45#include <asm/hpet.h>
45 46
46#ifdef CONFIG_CPU_FREQ 47#ifdef CONFIG_CPU_FREQ
47static void cpufreq_delayed_get(void); 48extern void cpufreq_delayed_get(void);
48#endif 49#endif
49extern void i8254_timer_resume(void); 50extern void i8254_timer_resume(void);
50extern int using_apic_timer; 51extern int using_apic_timer;
@@ -55,22 +56,6 @@ DEFINE_SPINLOCK(rtc_lock);
55EXPORT_SYMBOL(rtc_lock); 56EXPORT_SYMBOL(rtc_lock);
56DEFINE_SPINLOCK(i8253_lock); 57DEFINE_SPINLOCK(i8253_lock);
57 58
58int nohpet __initdata = 0;
59static int notsc __initdata = 0;
60
61#define USEC_PER_TICK (USEC_PER_SEC / HZ)
62#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
63#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
64
65#define NS_SCALE 10 /* 2^10, carefully chosen */
66#define US_SCALE 32 /* 2^32, arbitralrily chosen */
67
68unsigned int cpu_khz; /* TSC clocks / usec, not used here */
69EXPORT_SYMBOL(cpu_khz);
70unsigned long hpet_address;
71static unsigned long hpet_period; /* fsecs / HPET clock */
72unsigned long hpet_tick; /* HPET clocks / interrupt */
73int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
74unsigned long vxtime_hz = PIT_TICK_RATE; 59unsigned long vxtime_hz = PIT_TICK_RATE;
75int report_lost_ticks; /* command line option */ 60int report_lost_ticks; /* command line option */
76unsigned long long monotonic_base; 61unsigned long long monotonic_base;
@@ -81,34 +66,6 @@ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
81struct timespec __xtime __section_xtime; 66struct timespec __xtime __section_xtime;
82struct timezone __sys_tz __section_sys_tz; 67struct timezone __sys_tz __section_sys_tz;
83 68
84/*
85 * do_gettimeoffset() returns microseconds since last timer interrupt was
86 * triggered by hardware. A memory read of HPET is slower than a register read
87 * of TSC, but much more reliable. It's also synchronized to the timer
88 * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
89 * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
90 * This is not a problem, because jiffies hasn't updated either. They are bound
91 * together by xtime_lock.
92 */
93
94static inline unsigned int do_gettimeoffset_tsc(void)
95{
96 unsigned long t;
97 unsigned long x;
98 t = get_cycles_sync();
99 if (t < vxtime.last_tsc)
100 t = vxtime.last_tsc; /* hack */
101 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
102 return x;
103}
104
105static inline unsigned int do_gettimeoffset_hpet(void)
106{
107 /* cap counter read to one tick to avoid inconsistencies */
108 unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
109 return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
110}
111
112unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; 69unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
113 70
114/* 71/*
@@ -272,7 +229,7 @@ static void set_rtc_mmss(unsigned long nowtime)
272 * Note: This function is required to return accurate 229 * Note: This function is required to return accurate
273 * time even in the absence of multiple timer ticks. 230 * time even in the absence of multiple timer ticks.
274 */ 231 */
275static inline unsigned long long cycles_2_ns(unsigned long long cyc); 232extern unsigned long long cycles_2_ns(unsigned long long cyc);
276unsigned long long monotonic_clock(void) 233unsigned long long monotonic_clock(void)
277{ 234{
278 unsigned long seq; 235 unsigned long seq;
@@ -462,40 +419,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
462 return IRQ_HANDLED; 419 return IRQ_HANDLED;
463} 420}
464 421
465static unsigned int cyc2ns_scale __read_mostly;
466
467static inline void set_cyc2ns_scale(unsigned long cpu_khz)
468{
469 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
470}
471
472static inline unsigned long long cycles_2_ns(unsigned long long cyc)
473{
474 return (cyc * cyc2ns_scale) >> NS_SCALE;
475}
476
477unsigned long long sched_clock(void)
478{
479 unsigned long a = 0;
480
481#if 0
482 /* Don't do a HPET read here. Using TSC always is much faster
483 and HPET may not be mapped yet when the scheduler first runs.
484 Disadvantage is a small drift between CPUs in some configurations,
485 but that should be tolerable. */
486 if (__vxtime.mode == VXTIME_HPET)
487 return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
488#endif
489
490 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
491 which means it is not completely exact and may not be monotonous between
492 CPUs. But the errors should be too small to matter for scheduling
493 purposes. */
494
495 rdtscll(a);
496 return cycles_2_ns(a);
497}
498
499static unsigned long get_cmos_time(void) 422static unsigned long get_cmos_time(void)
500{ 423{
501 unsigned int year, mon, day, hour, min, sec; 424 unsigned int year, mon, day, hour, min, sec;
@@ -547,164 +470,6 @@ static unsigned long get_cmos_time(void)
547 return mktime(year, mon, day, hour, min, sec); 470 return mktime(year, mon, day, hour, min, sec);
548} 471}
549 472
550#ifdef CONFIG_CPU_FREQ
551
552/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
553 changes.
554
555 RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
556 not that important because current Opteron setups do not support
557 scaling on SMP anyroads.
558
559 Should fix up last_tsc too. Currently gettimeofday in the
560 first tick after the change will be slightly wrong. */
561
562#include <linux/workqueue.h>
563
564static unsigned int cpufreq_delayed_issched = 0;
565static unsigned int cpufreq_init = 0;
566static struct work_struct cpufreq_delayed_get_work;
567
568static void handle_cpufreq_delayed_get(struct work_struct *v)
569{
570 unsigned int cpu;
571 for_each_online_cpu(cpu) {
572 cpufreq_get(cpu);
573 }
574 cpufreq_delayed_issched = 0;
575}
576
577/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
578 * to verify the CPU frequency the timing core thinks the CPU is running
579 * at is still correct.
580 */
581static void cpufreq_delayed_get(void)
582{
583 static int warned;
584 if (cpufreq_init && !cpufreq_delayed_issched) {
585 cpufreq_delayed_issched = 1;
586 if (!warned) {
587 warned = 1;
588 printk(KERN_DEBUG
589 "Losing some ticks... checking if CPU frequency changed.\n");
590 }
591 schedule_work(&cpufreq_delayed_get_work);
592 }
593}
594
595static unsigned int ref_freq = 0;
596static unsigned long loops_per_jiffy_ref = 0;
597
598static unsigned long cpu_khz_ref = 0;
599
600static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
601 void *data)
602{
603 struct cpufreq_freqs *freq = data;
604 unsigned long *lpj, dummy;
605
606 if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
607 return 0;
608
609 lpj = &dummy;
610 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
611#ifdef CONFIG_SMP
612 lpj = &cpu_data[freq->cpu].loops_per_jiffy;
613#else
614 lpj = &boot_cpu_data.loops_per_jiffy;
615#endif
616
617 if (!ref_freq) {
618 ref_freq = freq->old;
619 loops_per_jiffy_ref = *lpj;
620 cpu_khz_ref = cpu_khz;
621 }
622 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
623 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
624 (val == CPUFREQ_RESUMECHANGE)) {
625 *lpj =
626 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
627
628 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
629 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
630 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
631 }
632
633 set_cyc2ns_scale(cpu_khz_ref);
634
635 return 0;
636}
637
638static struct notifier_block time_cpufreq_notifier_block = {
639 .notifier_call = time_cpufreq_notifier
640};
641
642static int __init cpufreq_tsc(void)
643{
644 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
645 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
646 CPUFREQ_TRANSITION_NOTIFIER))
647 cpufreq_init = 1;
648 return 0;
649}
650
651core_initcall(cpufreq_tsc);
652
653#endif
654
655/*
656 * calibrate_tsc() calibrates the processor TSC in a very simple way, comparing
657 * it to the HPET timer of known frequency.
658 */
659
660#define TICK_COUNT 100000000
661#define TICK_MIN 5000
662#define MAX_READ_RETRIES 5
663
664/*
665 * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none
666 * occurs between the reads of the hpet & TSC.
667 */
668static void __init read_hpet_tsc(int *hpet, int *tsc)
669{
670 int tsc1, tsc2, hpet1, retries = 0;
671 static int msg;
672
673 do {
674 tsc1 = get_cycles_sync();
675 hpet1 = hpet_readl(HPET_COUNTER);
676 tsc2 = get_cycles_sync();
677 } while (tsc2 - tsc1 > TICK_MIN && retries++ < MAX_READ_RETRIES);
678 if (retries >= MAX_READ_RETRIES && !msg++)
679 printk(KERN_WARNING
680 "hpet.c: exceeded max retries to read HPET & TSC\n");
681 *hpet = hpet1;
682 *tsc = tsc2;
683}
684
685
686static unsigned int __init hpet_calibrate_tsc(void)
687{
688 int tsc_start, hpet_start;
689 int tsc_now, hpet_now;
690 unsigned long flags;
691
692 local_irq_save(flags);
693 local_irq_disable();
694
695 read_hpet_tsc(&hpet_start, &tsc_start);
696
697 do {
698 local_irq_disable();
699 read_hpet_tsc(&hpet_now, &tsc_now);
700 local_irq_restore(flags);
701 } while ((tsc_now - tsc_start) < TICK_COUNT &&
702 (hpet_now - hpet_start) < TICK_COUNT);
703
704 return (tsc_now - tsc_start) * 1000000000L
705 / ((hpet_now - hpet_start) * hpet_period / 1000);
706}
707
708 473
709/* 474/*
710 * pit_calibrate_tsc() uses the speaker output (channel 2) of 475 * pit_calibrate_tsc() uses the speaker output (channel 2) of
@@ -735,124 +500,6 @@ static unsigned int __init pit_calibrate_tsc(void)
735 return (end - start) / 50; 500 return (end - start) / 50;
736} 501}
737 502
738#ifdef CONFIG_HPET
739static __init int late_hpet_init(void)
740{
741 struct hpet_data hd;
742 unsigned int ntimer;
743
744 if (!hpet_address)
745 return 0;
746
747 memset(&hd, 0, sizeof (hd));
748
749 ntimer = hpet_readl(HPET_ID);
750 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
751 ntimer++;
752
753 /*
754 * Register with driver.
755 * Timer0 and Timer1 is used by platform.
756 */
757 hd.hd_phys_address = hpet_address;
758 hd.hd_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
759 hd.hd_nirqs = ntimer;
760 hd.hd_flags = HPET_DATA_PLATFORM;
761 hpet_reserve_timer(&hd, 0);
762#ifdef CONFIG_HPET_EMULATE_RTC
763 hpet_reserve_timer(&hd, 1);
764#endif
765 hd.hd_irq[0] = HPET_LEGACY_8254;
766 hd.hd_irq[1] = HPET_LEGACY_RTC;
767 if (ntimer > 2) {
768 struct hpet *hpet;
769 struct hpet_timer *timer;
770 int i;
771
772 hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE);
773 timer = &hpet->hpet_timers[2];
774 for (i = 2; i < ntimer; timer++, i++)
775 hd.hd_irq[i] = (timer->hpet_config &
776 Tn_INT_ROUTE_CNF_MASK) >>
777 Tn_INT_ROUTE_CNF_SHIFT;
778
779 }
780
781 hpet_alloc(&hd);
782 return 0;
783}
784fs_initcall(late_hpet_init);
785#endif
786
787static int hpet_timer_stop_set_go(unsigned long tick)
788{
789 unsigned int cfg;
790
791/*
792 * Stop the timers and reset the main counter.
793 */
794
795 cfg = hpet_readl(HPET_CFG);
796 cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
797 hpet_writel(cfg, HPET_CFG);
798 hpet_writel(0, HPET_COUNTER);
799 hpet_writel(0, HPET_COUNTER + 4);
800
801/*
802 * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
803 * and period also hpet_tick.
804 */
805 if (hpet_use_timer) {
806 hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
807 HPET_TN_32BIT, HPET_T0_CFG);
808 hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
809 hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
810 cfg |= HPET_CFG_LEGACY;
811 }
812/*
813 * Go!
814 */
815
816 cfg |= HPET_CFG_ENABLE;
817 hpet_writel(cfg, HPET_CFG);
818
819 return 0;
820}
821
822static int hpet_init(void)
823{
824 unsigned int id;
825
826 if (!hpet_address)
827 return -1;
828 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
829 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
830
831/*
832 * Read the period, compute tick and quotient.
833 */
834
835 id = hpet_readl(HPET_ID);
836
837 if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER))
838 return -1;
839
840 hpet_period = hpet_readl(HPET_PERIOD);
841 if (hpet_period < 100000 || hpet_period > 100000000)
842 return -1;
843
844 hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
845
846 hpet_use_timer = (id & HPET_ID_LEGSUP);
847
848 return hpet_timer_stop_set_go(hpet_tick);
849}
850
851static int hpet_reenable(void)
852{
853 return hpet_timer_stop_set_go(hpet_tick);
854}
855
856#define PIT_MODE 0x43 503#define PIT_MODE 0x43
857#define PIT_CH0 0x40 504#define PIT_CH0 0x40
858 505
@@ -910,7 +557,7 @@ void __init time_init(void)
910 set_normalized_timespec(&wall_to_monotonic, 557 set_normalized_timespec(&wall_to_monotonic,
911 -xtime.tv_sec, -xtime.tv_nsec); 558 -xtime.tv_sec, -xtime.tv_nsec);
912 559
913 if (!hpet_init()) 560 if (!hpet_arch_init())
914 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period; 561 vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
915 else 562 else
916 hpet_address = 0; 563 hpet_address = 0;
@@ -945,42 +592,6 @@ void __init time_init(void)
945#endif 592#endif
946} 593}
947 594
948static int tsc_unstable = 0;
949
950void mark_tsc_unstable(void)
951{
952 tsc_unstable = 1;
953}
954EXPORT_SYMBOL_GPL(mark_tsc_unstable);
955
956/*
957 * Make an educated guess if the TSC is trustworthy and synchronized
958 * over all CPUs.
959 */
960__cpuinit int unsynchronized_tsc(void)
961{
962 if (tsc_unstable)
963 return 1;
964
965#ifdef CONFIG_SMP
966 if (apic_is_clustered_box())
967 return 1;
968#endif
969 /* Most intel systems have synchronized TSCs except for
970 multi node systems */
971 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
972#ifdef CONFIG_ACPI
973 /* But TSC doesn't tick in C3 so don't use it there */
974 if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
975 return 1;
976#endif
977 return 0;
978 }
979
980 /* Assume multi socket systems are not synchronized */
981 return num_present_cpus() > 1;
982}
983
984/* 595/*
985 * Decide what mode gettimeofday should use. 596 * Decide what mode gettimeofday should use.
986 */ 597 */
@@ -1116,270 +727,3 @@ static int time_init_device(void)
1116} 727}
1117 728
1118device_initcall(time_init_device); 729device_initcall(time_init_device);
1119
1120#ifdef CONFIG_HPET_EMULATE_RTC
1121/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
1122 * is enabled, we support RTC interrupt functionality in software.
1123 * RTC has 3 kinds of interrupts:
1124 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
1125 * is updated
1126 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
1127 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
1128 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
1129 * (1) and (2) above are implemented using polling at a frequency of
1130 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
1131 * overhead. (DEFAULT_RTC_INT_FREQ)
1132 * For (3), we use interrupts at 64Hz or user specified periodic
1133 * frequency, whichever is higher.
1134 */
1135#include <linux/rtc.h>
1136
1137#define DEFAULT_RTC_INT_FREQ 64
1138#define RTC_NUM_INTS 1
1139
1140static unsigned long UIE_on;
1141static unsigned long prev_update_sec;
1142
1143static unsigned long AIE_on;
1144static struct rtc_time alarm_time;
1145
1146static unsigned long PIE_on;
1147static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
1148static unsigned long PIE_count;
1149
1150static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
1151static unsigned int hpet_t1_cmp; /* cached comparator register */
1152
1153int is_hpet_enabled(void)
1154{
1155 return hpet_address != 0;
1156}
1157
1158/*
1159 * Timer 1 for RTC, we do not use periodic interrupt feature,
1160 * even if HPET supports periodic interrupts on Timer 1.
1161 * The reason being, to set up a periodic interrupt in HPET, we need to
1162 * stop the main counter. And if we do that everytime someone diables/enables
1163 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
1164 * So, for the time being, simulate the periodic interrupt in software.
1165 *
1166 * hpet_rtc_timer_init() is called for the first time and during subsequent
1167 * interuppts reinit happens through hpet_rtc_timer_reinit().
1168 */
1169int hpet_rtc_timer_init(void)
1170{
1171 unsigned int cfg, cnt;
1172 unsigned long flags;
1173
1174 if (!is_hpet_enabled())
1175 return 0;
1176 /*
1177 * Set the counter 1 and enable the interrupts.
1178 */
1179 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1180 hpet_rtc_int_freq = PIE_freq;
1181 else
1182 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1183
1184 local_irq_save(flags);
1185
1186 cnt = hpet_readl(HPET_COUNTER);
1187 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
1188 hpet_writel(cnt, HPET_T1_CMP);
1189 hpet_t1_cmp = cnt;
1190
1191 cfg = hpet_readl(HPET_T1_CFG);
1192 cfg &= ~HPET_TN_PERIODIC;
1193 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
1194 hpet_writel(cfg, HPET_T1_CFG);
1195
1196 local_irq_restore(flags);
1197
1198 return 1;
1199}
1200
1201static void hpet_rtc_timer_reinit(void)
1202{
1203 unsigned int cfg, cnt, ticks_per_int, lost_ints;
1204
1205 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
1206 cfg = hpet_readl(HPET_T1_CFG);
1207 cfg &= ~HPET_TN_ENABLE;
1208 hpet_writel(cfg, HPET_T1_CFG);
1209 return;
1210 }
1211
1212 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
1213 hpet_rtc_int_freq = PIE_freq;
1214 else
1215 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
1216
1217 /* It is more accurate to use the comparator value than current count.*/
1218 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
1219 hpet_t1_cmp += ticks_per_int;
1220 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1221
1222 /*
1223 * If the interrupt handler was delayed too long, the write above tries
1224 * to schedule the next interrupt in the past and the hardware would
1225 * not interrupt until the counter had wrapped around.
1226 * So we have to check that the comparator wasn't set to a past time.
1227 */
1228 cnt = hpet_readl(HPET_COUNTER);
1229 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
1230 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
1231 /* Make sure that, even with the time needed to execute
1232 * this code, the next scheduled interrupt has been moved
1233 * back to the future: */
1234 lost_ints++;
1235
1236 hpet_t1_cmp += lost_ints * ticks_per_int;
1237 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1238
1239 if (PIE_on)
1240 PIE_count += lost_ints;
1241
1242 if (printk_ratelimit())
1243 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
1244 hpet_rtc_int_freq);
1245 }
1246}
1247
1248/*
1249 * The functions below are called from rtc driver.
1250 * Return 0 if HPET is not being used.
1251 * Otherwise do the necessary changes and return 1.
1252 */
1253int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
1254{
1255 if (!is_hpet_enabled())
1256 return 0;
1257
1258 if (bit_mask & RTC_UIE)
1259 UIE_on = 0;
1260 if (bit_mask & RTC_PIE)
1261 PIE_on = 0;
1262 if (bit_mask & RTC_AIE)
1263 AIE_on = 0;
1264
1265 return 1;
1266}
1267
1268int hpet_set_rtc_irq_bit(unsigned long bit_mask)
1269{
1270 int timer_init_reqd = 0;
1271
1272 if (!is_hpet_enabled())
1273 return 0;
1274
1275 if (!(PIE_on | AIE_on | UIE_on))
1276 timer_init_reqd = 1;
1277
1278 if (bit_mask & RTC_UIE) {
1279 UIE_on = 1;
1280 }
1281 if (bit_mask & RTC_PIE) {
1282 PIE_on = 1;
1283 PIE_count = 0;
1284 }
1285 if (bit_mask & RTC_AIE) {
1286 AIE_on = 1;
1287 }
1288
1289 if (timer_init_reqd)
1290 hpet_rtc_timer_init();
1291
1292 return 1;
1293}
1294
1295int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
1296{
1297 if (!is_hpet_enabled())
1298 return 0;
1299
1300 alarm_time.tm_hour = hrs;
1301 alarm_time.tm_min = min;
1302 alarm_time.tm_sec = sec;
1303
1304 return 1;
1305}
1306
1307int hpet_set_periodic_freq(unsigned long freq)
1308{
1309 if (!is_hpet_enabled())
1310 return 0;
1311
1312 PIE_freq = freq;
1313 PIE_count = 0;
1314
1315 return 1;
1316}
1317
1318int hpet_rtc_dropped_irq(void)
1319{
1320 if (!is_hpet_enabled())
1321 return 0;
1322
1323 return 1;
1324}
1325
1326irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
1327{
1328 struct rtc_time curr_time;
1329 unsigned long rtc_int_flag = 0;
1330 int call_rtc_interrupt = 0;
1331
1332 hpet_rtc_timer_reinit();
1333
1334 if (UIE_on | AIE_on) {
1335 rtc_get_rtc_time(&curr_time);
1336 }
1337 if (UIE_on) {
1338 if (curr_time.tm_sec != prev_update_sec) {
1339 /* Set update int info, call real rtc int routine */
1340 call_rtc_interrupt = 1;
1341 rtc_int_flag = RTC_UF;
1342 prev_update_sec = curr_time.tm_sec;
1343 }
1344 }
1345 if (PIE_on) {
1346 PIE_count++;
1347 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
1348 /* Set periodic int info, call real rtc int routine */
1349 call_rtc_interrupt = 1;
1350 rtc_int_flag |= RTC_PF;
1351 PIE_count = 0;
1352 }
1353 }
1354 if (AIE_on) {
1355 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
1356 (curr_time.tm_min == alarm_time.tm_min) &&
1357 (curr_time.tm_hour == alarm_time.tm_hour)) {
1358 /* Set alarm int info, call real rtc int routine */
1359 call_rtc_interrupt = 1;
1360 rtc_int_flag |= RTC_AF;
1361 }
1362 }
1363 if (call_rtc_interrupt) {
1364 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
1365 rtc_interrupt(rtc_int_flag, dev_id);
1366 }
1367 return IRQ_HANDLED;
1368}
1369#endif
1370
1371static int __init nohpet_setup(char *s)
1372{
1373 nohpet = 1;
1374 return 1;
1375}
1376
1377__setup("nohpet", nohpet_setup);
1378
1379int __init notsc_setup(char *s)
1380{
1381 notsc = 1;
1382 return 1;
1383}
1384
1385__setup("notsc", notsc_setup);
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
new file mode 100644
index 000000000000..2dbac15ab1f0
--- /dev/null
+++ b/arch/x86_64/kernel/tsc.c
@@ -0,0 +1,212 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/interrupt.h>
4#include <linux/init.h>
5#include <linux/clocksource.h>
6#include <linux/time.h>
7#include <linux/acpi.h>
8#include <linux/cpufreq.h>
9
10#include <asm/timex.h>
11
12int notsc __initdata = 0;
13
14unsigned int cpu_khz; /* TSC clocks / usec, not used here */
15EXPORT_SYMBOL(cpu_khz);
16
17/*
18 * do_gettimeoffset() returns microseconds since last timer interrupt was
19 * triggered by hardware. A memory read of HPET is slower than a register read
20 * of TSC, but much more reliable. It's also synchronized to the timer
21 * interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
22 * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
23 * This is not a problem, because jiffies hasn't updated either. They are bound
24 * together by xtime_lock.
25 */
26
27unsigned int do_gettimeoffset_tsc(void)
28{
29 unsigned long t;
30 unsigned long x;
31 t = get_cycles_sync();
32 if (t < vxtime.last_tsc)
33 t = vxtime.last_tsc; /* hack */
34 x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
35 return x;
36}
37
38static unsigned int cyc2ns_scale __read_mostly;
39
40void set_cyc2ns_scale(unsigned long khz)
41{
42 cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
43}
44
45unsigned long long cycles_2_ns(unsigned long long cyc)
46{
47 return (cyc * cyc2ns_scale) >> NS_SCALE;
48}
49
50unsigned long long sched_clock(void)
51{
52 unsigned long a = 0;
53
54 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
55 * which means it is not completely exact and may not be monotonous
56 * between CPUs. But the errors should be too small to matter for
57 * scheduling purposes.
58 */
59
60 rdtscll(a);
61 return cycles_2_ns(a);
62}
63
64#ifdef CONFIG_CPU_FREQ
65
66/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
67 * changes.
68 *
69 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
70 * not that important because current Opteron setups do not support
71 * scaling on SMP anyroads.
72 *
73 * Should fix up last_tsc too. Currently gettimeofday in the
74 * first tick after the change will be slightly wrong.
75 */
76
77#include <linux/workqueue.h>
78
79static unsigned int cpufreq_delayed_issched = 0;
80static unsigned int cpufreq_init = 0;
81static struct work_struct cpufreq_delayed_get_work;
82
83static void handle_cpufreq_delayed_get(struct work_struct *v)
84{
85 unsigned int cpu;
86 for_each_online_cpu(cpu) {
87 cpufreq_get(cpu);
88 }
89 cpufreq_delayed_issched = 0;
90}
91
92/* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
93 * to verify the CPU frequency the timing core thinks the CPU is running
94 * at is still correct.
95 */
96void cpufreq_delayed_get(void)
97{
98 static int warned;
99 if (cpufreq_init && !cpufreq_delayed_issched) {
100 cpufreq_delayed_issched = 1;
101 if (!warned) {
102 warned = 1;
103 printk(KERN_DEBUG "Losing some ticks... "
104 "checking if CPU frequency changed.\n");
105 }
106 schedule_work(&cpufreq_delayed_get_work);
107 }
108}
109
110static unsigned int ref_freq = 0;
111static unsigned long loops_per_jiffy_ref = 0;
112
113static unsigned long cpu_khz_ref = 0;
114
115static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
116 void *data)
117{
118 struct cpufreq_freqs *freq = data;
119 unsigned long *lpj, dummy;
120
121 if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
122 return 0;
123
124 lpj = &dummy;
125 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
126#ifdef CONFIG_SMP
127 lpj = &cpu_data[freq->cpu].loops_per_jiffy;
128#else
129 lpj = &boot_cpu_data.loops_per_jiffy;
130#endif
131
132 if (!ref_freq) {
133 ref_freq = freq->old;
134 loops_per_jiffy_ref = *lpj;
135 cpu_khz_ref = cpu_khz;
136 }
137 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
138 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
139 (val == CPUFREQ_RESUMECHANGE)) {
140 *lpj =
141 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
142
143 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
144 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
145 vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
146 }
147
148 set_cyc2ns_scale(cpu_khz_ref);
149
150 return 0;
151}
152
153static struct notifier_block time_cpufreq_notifier_block = {
154 .notifier_call = time_cpufreq_notifier
155};
156
157static int __init cpufreq_tsc(void)
158{
159 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
160 if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
161 CPUFREQ_TRANSITION_NOTIFIER))
162 cpufreq_init = 1;
163 return 0;
164}
165
166core_initcall(cpufreq_tsc);
167
168#endif
169
170static int tsc_unstable = 0;
171
172void mark_tsc_unstable(void)
173{
174 tsc_unstable = 1;
175}
176EXPORT_SYMBOL_GPL(mark_tsc_unstable);
177
178/*
179 * Make an educated guess if the TSC is trustworthy and synchronized
180 * over all CPUs.
181 */
182__cpuinit int unsynchronized_tsc(void)
183{
184 if (tsc_unstable)
185 return 1;
186
187#ifdef CONFIG_SMP
188 if (apic_is_clustered_box())
189 return 1;
190#endif
191 /* Most intel systems have synchronized TSCs except for
192 multi node systems */
193 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
194#ifdef CONFIG_ACPI
195 /* But TSC doesn't tick in C3 so don't use it there */
196 if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
197 return 1;
198#endif
199 return 0;
200 }
201
202 /* Assume multi socket systems are not synchronized */
203 return num_present_cpus() > 1;
204}
205
206int __init notsc_setup(char *s)
207{
208 notsc = 1;
209 return 1;
210}
211
212__setup("notsc", notsc_setup);