aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile3
-rw-r--r--arch/i386/kernel/acpi/boot.c30
-rw-r--r--arch/i386/kernel/apic.c1629
-rw-r--r--arch/i386/kernel/apm.c44
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Kconfig9
-rw-r--r--arch/i386/kernel/cpu/cpufreq/Makefile1
-rw-r--r--arch/i386/kernel/cpu/cpufreq/e_powersaver.c334
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.c359
-rw-r--r--arch/i386/kernel/cpu/cpufreq/longhaul.h153
-rw-r--r--arch/i386/kernel/cpu/cpufreq/powernow-k8.c6
-rw-r--r--arch/i386/kernel/hpet.c498
-rw-r--r--arch/i386/kernel/i8253.c96
-rw-r--r--arch/i386/kernel/i8259.c7
-rw-r--r--arch/i386/kernel/io_apic.c10
-rw-r--r--arch/i386/kernel/irq.c22
-rw-r--r--arch/i386/kernel/nmi.c9
-rw-r--r--arch/i386/kernel/process.c3
-rw-r--r--arch/i386/kernel/smpboot.c187
-rw-r--r--arch/i386/kernel/time.c124
-rw-r--r--arch/i386/kernel/time_hpet.c497
-rw-r--r--arch/i386/kernel/tsc.c169
-rw-r--r--arch/i386/kernel/tsc_sync.c1
-rw-r--r--arch/i386/kernel/vmitime.c2
23 files changed, 2117 insertions, 2076 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index cbe4e601885c..4ae3dcf1d2f0 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o
18obj-$(CONFIG_X86_CPUID) += cpuid.o 18obj-$(CONFIG_X86_CPUID) += cpuid.o
19obj-$(CONFIG_MICROCODE) += microcode.o 19obj-$(CONFIG_MICROCODE) += microcode.o
20obj-$(CONFIG_APM) += apm.o 20obj-$(CONFIG_APM) += apm.o
21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o 21obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o
22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 22obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
23obj-$(CONFIG_X86_MPPARSE) += mpparse.o 23obj-$(CONFIG_X86_MPPARSE) += mpparse.o
24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 24obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
@@ -32,7 +32,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o
32obj-$(CONFIG_MODULES) += module.o 32obj-$(CONFIG_MODULES) += module.o
33obj-y += sysenter.o vsyscall.o 33obj-y += sysenter.o vsyscall.o
34obj-$(CONFIG_ACPI_SRAT) += srat.o 34obj-$(CONFIG_ACPI_SRAT) += srat.o
35obj-$(CONFIG_HPET_TIMER) += time_hpet.o
36obj-$(CONFIG_EFI) += efi.o efi_stub.o 35obj-$(CONFIG_EFI) += efi.o efi_stub.o
37obj-$(CONFIG_DOUBLEFAULT) += doublefault.o 36obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
38obj-$(CONFIG_VM86) += vm86.o 37obj-$(CONFIG_VM86) += vm86.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index e94aff6888ca..e5eb97a910ed 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -25,6 +25,7 @@
25 25
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/acpi_pmtmr.h>
28#include <linux/efi.h> 29#include <linux/efi.h>
29#include <linux/cpumask.h> 30#include <linux/cpumask.h>
30#include <linux/module.h> 31#include <linux/module.h>
@@ -615,6 +616,7 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table)
615} 616}
616 617
617#ifdef CONFIG_HPET_TIMER 618#ifdef CONFIG_HPET_TIMER
619#include <asm/hpet.h>
618 620
619static int __init acpi_parse_hpet(struct acpi_table_header *table) 621static int __init acpi_parse_hpet(struct acpi_table_header *table)
620{ 622{
@@ -645,24 +647,11 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
645 hpet_res->end = (1 * 1024) - 1; 647 hpet_res->end = (1 * 1024) - 1;
646 } 648 }
647 649
648#ifdef CONFIG_X86_64 650 hpet_address = hpet_tbl->address.address;
649 vxtime.hpet_address = hpet_tbl->address.address;
650
651 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 651 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
652 hpet_tbl->id, vxtime.hpet_address); 652 hpet_tbl->id, hpet_address);
653
654 res_start = vxtime.hpet_address;
655#else /* X86 */
656 {
657 extern unsigned long hpet_address;
658 653
659 hpet_address = hpet_tbl->address.address; 654 res_start = hpet_address;
660 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
661 hpet_tbl->id, hpet_address);
662
663 res_start = hpet_address;
664 }
665#endif /* X86 */
666 655
667 if (hpet_res) { 656 if (hpet_res) {
668 hpet_res->start = res_start; 657 hpet_res->start = res_start;
@@ -676,10 +665,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
676#define acpi_parse_hpet NULL 665#define acpi_parse_hpet NULL
677#endif 666#endif
678 667
679#ifdef CONFIG_X86_PM_TIMER
680extern u32 pmtmr_ioport;
681#endif
682
683static int __init acpi_parse_fadt(struct acpi_table_header *table) 668static int __init acpi_parse_fadt(struct acpi_table_header *table)
684{ 669{
685 670
@@ -865,10 +850,9 @@ static inline int acpi_parse_madt_ioapic_entries(void)
865static void __init acpi_process_madt(void) 850static void __init acpi_process_madt(void)
866{ 851{
867#ifdef CONFIG_X86_LOCAL_APIC 852#ifdef CONFIG_X86_LOCAL_APIC
868 int count, error; 853 int error;
869 854
870 count = acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt); 855 if (!acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) {
871 if (count >= 1) {
872 856
873 /* 857 /*
874 * Parse MADT LAPIC entries 858 * Parse MADT LAPIC entries
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index f4159e0a7ae9..9655c233e6f1 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -25,6 +25,8 @@
25#include <linux/kernel_stat.h> 25#include <linux/kernel_stat.h>
26#include <linux/sysdev.h> 26#include <linux/sysdev.h>
27#include <linux/cpu.h> 27#include <linux/cpu.h>
28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h>
28#include <linux/module.h> 30#include <linux/module.h>
29 31
30#include <asm/atomic.h> 32#include <asm/atomic.h>
@@ -45,128 +47,549 @@
45#include "io_ports.h" 47#include "io_ports.h"
46 48
47/* 49/*
48 * cpu_mask that denotes the CPUs that needs timer interrupt coming in as 50 * Sanity check
49 * IPIs in place of local APIC timers
50 */ 51 */
51static cpumask_t timer_bcast_ipi; 52#if (SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F
53# error SPURIOUS_APIC_VECTOR definition error
54#endif
52 55
53/* 56/*
54 * Knob to control our willingness to enable the local APIC. 57 * Knob to control our willingness to enable the local APIC.
58 *
59 * -1=force-disable, +1=force-enable
55 */ 60 */
56static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ 61static int enable_local_apic __initdata = 0;
57
58static inline void lapic_disable(void)
59{
60 enable_local_apic = -1;
61 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
62}
63 62
64static inline void lapic_enable(void) 63/* Local APIC timer verification ok */
65{ 64static int local_apic_timer_verify_ok;
66 enable_local_apic = 1;
67}
68 65
69/* 66/*
70 * Debug level 67 * Debug level, exported for io_apic.c
71 */ 68 */
72int apic_verbosity; 69int apic_verbosity;
73 70
71static unsigned int calibration_result;
74 72
73static int lapic_next_event(unsigned long delta,
74 struct clock_event_device *evt);
75static void lapic_timer_setup(enum clock_event_mode mode,
76 struct clock_event_device *evt);
77static void lapic_timer_broadcast(cpumask_t mask);
75static void apic_pm_activate(void); 78static void apic_pm_activate(void);
76 79
80/*
81 * The local apic timer can be used for any function which is CPU local.
82 */
83static struct clock_event_device lapic_clockevent = {
84 .name = "lapic",
85 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
86 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
87 .shift = 32,
88 .set_mode = lapic_timer_setup,
89 .set_next_event = lapic_next_event,
90 .broadcast = lapic_timer_broadcast,
91 .rating = 100,
92 .irq = -1,
93};
94static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
95
96/* Local APIC was disabled by the BIOS and enabled by the kernel */
97static int enabled_via_apicbase;
98
99/*
100 * Get the LAPIC version
101 */
102static inline int lapic_get_version(void)
103{
104 return GET_APIC_VERSION(apic_read(APIC_LVR));
105}
106
107/*
108 * Check, if the APIC is integrated or a seperate chip
109 */
110static inline int lapic_is_integrated(void)
111{
112 return APIC_INTEGRATED(lapic_get_version());
113}
114
115/*
116 * Check, whether this is a modern or a first generation APIC
117 */
77static int modern_apic(void) 118static int modern_apic(void)
78{ 119{
79 unsigned int lvr, version;
80 /* AMD systems use old APIC versions, so check the CPU */ 120 /* AMD systems use old APIC versions, so check the CPU */
81 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && 121 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
82 boot_cpu_data.x86 >= 0xf) 122 boot_cpu_data.x86 >= 0xf)
83 return 1; 123 return 1;
84 lvr = apic_read(APIC_LVR); 124 return lapic_get_version() >= 0x14;
85 version = GET_APIC_VERSION(lvr); 125}
86 return version >= 0x14; 126
127/**
128 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
129 */
130void enable_NMI_through_LVT0 (void * dummy)
131{
132 unsigned int v = APIC_DM_NMI;
133
134 /* Level triggered for 82489DX */
135 if (!lapic_is_integrated())
136 v |= APIC_LVT_LEVEL_TRIGGER;
137 apic_write_around(APIC_LVT0, v);
138}
139
140/**
141 * get_physical_broadcast - Get number of physical broadcast IDs
142 */
143int get_physical_broadcast(void)
144{
145 return modern_apic() ? 0xff : 0xf;
146}
147
148/**
149 * lapic_get_maxlvt - get the maximum number of local vector table entries
150 */
151int lapic_get_maxlvt(void)
152{
153 unsigned int v = apic_read(APIC_LVR);
154
155 /* 82489DXs do not report # of LVT entries. */
156 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
87} 157}
88 158
89/* 159/*
90 * 'what should we do if we get a hw irq event on an illegal vector'. 160 * Local APIC timer
91 * each architecture has to answer this themselves.
92 */ 161 */
93void ack_bad_irq(unsigned int irq) 162
163/* Clock divisor is set to 16 */
164#define APIC_DIVISOR 16
165
166/*
167 * This function sets up the local APIC timer, with a timeout of
168 * 'clocks' APIC bus clock. During calibration we actually call
169 * this function twice on the boot CPU, once with a bogus timeout
170 * value, second time for real. The other (noncalibrating) CPUs
171 * call this function only once, with the real, calibrated value.
172 *
173 * We do reads before writes even if unnecessary, to get around the
174 * P5 APIC double write bug.
175 */
176static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
94{ 177{
95 printk("unexpected IRQ trap at vector %02x\n", irq); 178 unsigned int lvtt_value, tmp_value;
179
180 lvtt_value = LOCAL_TIMER_VECTOR;
181 if (!oneshot)
182 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
183 if (!lapic_is_integrated())
184 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
185
186 if (!irqen)
187 lvtt_value |= APIC_LVT_MASKED;
188
189 apic_write_around(APIC_LVTT, lvtt_value);
190
96 /* 191 /*
97 * Currently unexpected vectors happen only on SMP and APIC. 192 * Divide PICLK by 16
98 * We _must_ ack these because every local APIC has only N
99 * irq slots per priority level, and a 'hanging, unacked' IRQ
100 * holds up an irq slot - in excessive cases (when multiple
101 * unexpected vectors occur) that might lock up the APIC
102 * completely.
103 * But only ack when the APIC is enabled -AK
104 */ 193 */
105 if (cpu_has_apic) 194 tmp_value = apic_read(APIC_TDCR);
106 ack_APIC_irq(); 195 apic_write_around(APIC_TDCR, (tmp_value
196 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
197 | APIC_TDR_DIV_16);
198
199 if (!oneshot)
200 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
107} 201}
108 202
109void __init apic_intr_init(void) 203/*
204 * Program the next event, relative to now
205 */
206static int lapic_next_event(unsigned long delta,
207 struct clock_event_device *evt)
208{
209 apic_write_around(APIC_TMICT, delta);
210 return 0;
211}
212
213/*
214 * Setup the lapic timer in periodic or oneshot mode
215 */
216static void lapic_timer_setup(enum clock_event_mode mode,
217 struct clock_event_device *evt)
218{
219 unsigned long flags;
220 unsigned int v;
221
222 /* Lapic used for broadcast ? */
223 if (!local_apic_timer_verify_ok)
224 return;
225
226 local_irq_save(flags);
227
228 switch (mode) {
229 case CLOCK_EVT_MODE_PERIODIC:
230 case CLOCK_EVT_MODE_ONESHOT:
231 __setup_APIC_LVTT(calibration_result,
232 mode != CLOCK_EVT_MODE_PERIODIC, 1);
233 break;
234 case CLOCK_EVT_MODE_UNUSED:
235 case CLOCK_EVT_MODE_SHUTDOWN:
236 v = apic_read(APIC_LVTT);
237 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
238 apic_write_around(APIC_LVTT, v);
239 break;
240 }
241
242 local_irq_restore(flags);
243}
244
245/*
246 * Local APIC timer broadcast function
247 */
248static void lapic_timer_broadcast(cpumask_t mask)
110{ 249{
111#ifdef CONFIG_SMP 250#ifdef CONFIG_SMP
112 smp_intr_init(); 251 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
113#endif 252#endif
114 /* self generated IPI for local APIC timer */ 253}
115 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
116 254
117 /* IPI vectors for APIC spurious and error interrupts */ 255/*
118 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 256 * Setup the local APIC timer for this CPU. Copy the initilized values
119 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 257 * of the boot CPU and register the clock event in the framework.
258 */
259static void __devinit setup_APIC_timer(void)
260{
261 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
120 262
121 /* thermal monitor LVT interrupt */ 263 memcpy(levt, &lapic_clockevent, sizeof(*levt));
122#ifdef CONFIG_X86_MCE_P4THERMAL 264 levt->cpumask = cpumask_of_cpu(smp_processor_id());
123 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 265
124#endif 266 clockevents_register_device(levt);
125} 267}
126 268
127/* Using APIC to generate smp_local_timer_interrupt? */ 269/*
128int using_apic_timer __read_mostly = 0; 270 * In this functions we calibrate APIC bus clocks to the external timer.
271 *
272 * We want to do the calibration only once since we want to have local timer
273 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
274 * frequency.
275 *
276 * This was previously done by reading the PIT/HPET and waiting for a wrap
277 * around to find out, that a tick has elapsed. I have a box, where the PIT
278 * readout is broken, so it never gets out of the wait loop again. This was
279 * also reported by others.
280 *
281 * Monitoring the jiffies value is inaccurate and the clockevents
282 * infrastructure allows us to do a simple substitution of the interrupt
283 * handler.
284 *
285 * The calibration routine also uses the pm_timer when possible, as the PIT
286 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
287 * back to normal later in the boot process).
288 */
129 289
130static int enabled_via_apicbase; 290#define LAPIC_CAL_LOOPS (HZ/10)
131 291
132void enable_NMI_through_LVT0 (void * dummy) 292static __initdata volatile int lapic_cal_loops = -1;
293static __initdata long lapic_cal_t1, lapic_cal_t2;
294static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
295static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
296static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
297
298/*
299 * Temporary interrupt handler.
300 */
301static void __init lapic_cal_handler(struct clock_event_device *dev)
133{ 302{
134 unsigned int v, ver; 303 unsigned long long tsc = 0;
304 long tapic = apic_read(APIC_TMCCT);
305 unsigned long pm = acpi_pm_read_early();
135 306
136 ver = apic_read(APIC_LVR); 307 if (cpu_has_tsc)
137 ver = GET_APIC_VERSION(ver); 308 rdtscll(tsc);
138 v = APIC_DM_NMI; /* unmask and set to NMI */ 309
139 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 310 switch (lapic_cal_loops++) {
140 v |= APIC_LVT_LEVEL_TRIGGER; 311 case 0:
141 apic_write_around(APIC_LVT0, v); 312 lapic_cal_t1 = tapic;
313 lapic_cal_tsc1 = tsc;
314 lapic_cal_pm1 = pm;
315 lapic_cal_j1 = jiffies;
316 break;
317
318 case LAPIC_CAL_LOOPS:
319 lapic_cal_t2 = tapic;
320 lapic_cal_tsc2 = tsc;
321 if (pm < lapic_cal_pm1)
322 pm += ACPI_PM_OVRRUN;
323 lapic_cal_pm2 = pm;
324 lapic_cal_j2 = jiffies;
325 break;
326 }
142} 327}
143 328
144int get_physical_broadcast(void) 329/*
330 * Setup the boot APIC
331 *
332 * Calibrate and verify the result.
333 */
334void __init setup_boot_APIC_clock(void)
145{ 335{
146 if (modern_apic()) 336 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
147 return 0xff; 337 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
148 else 338 const long pm_thresh = pm_100ms/100;
149 return 0xf; 339 void (*real_handler)(struct clock_event_device *dev);
340 unsigned long deltaj;
341 long delta, deltapm;
342
343 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
344 "calibrating APIC timer ...\n");
345
346 local_irq_disable();
347
348 /* Replace the global interrupt handler */
349 real_handler = global_clock_event->event_handler;
350 global_clock_event->event_handler = lapic_cal_handler;
351
352 /*
353 * Setup the APIC counter to 1e9. There is no way the lapic
354 * can underflow in the 100ms detection time frame
355 */
356 __setup_APIC_LVTT(1000000000, 0, 0);
357
358 /* Let the interrupts run */
359 local_irq_enable();
360
361 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
362
363 local_irq_disable();
364
365 /* Restore the real event handler */
366 global_clock_event->event_handler = real_handler;
367
368 /* Build delta t1-t2 as apic timer counts down */
369 delta = lapic_cal_t1 - lapic_cal_t2;
370 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
371
372 /* Check, if the PM timer is available */
373 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
374 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
375
376 if (deltapm) {
377 unsigned long mult;
378 u64 res;
379
380 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
381
382 if (deltapm > (pm_100ms - pm_thresh) &&
383 deltapm < (pm_100ms + pm_thresh)) {
384 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
385 } else {
386 res = (((u64) deltapm) * mult) >> 22;
387 do_div(res, 1000000);
388 printk(KERN_WARNING "APIC calibration not consistent "
389 "with PM Timer: %ldms instead of 100ms\n",
390 (long)res);
391 /* Correct the lapic counter value */
392 res = (((u64) delta ) * pm_100ms);
393 do_div(res, deltapm);
394 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
395 "%lu (%ld)\n", (unsigned long) res, delta);
396 delta = (long) res;
397 }
398 }
399
400 /* Calculate the scaled math multiplication factor */
401 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
402 lapic_clockevent.max_delta_ns =
403 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
404 lapic_clockevent.min_delta_ns =
405 clockevent_delta2ns(0xF, &lapic_clockevent);
406
407 calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
408
409 apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
410 apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
411 apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
412 calibration_result);
413
414 if (cpu_has_tsc) {
415 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
416 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
417 "%ld.%04ld MHz.\n",
418 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
419 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
420 }
421
422 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
423 "%u.%04u MHz.\n",
424 calibration_result / (1000000 / HZ),
425 calibration_result % (1000000 / HZ));
426
427
428 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
429
430 /*
431 * Setup the apic timer manually
432 */
433 local_apic_timer_verify_ok = 1;
434 levt->event_handler = lapic_cal_handler;
435 lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
436 lapic_cal_loops = -1;
437
438 /* Let the interrupts run */
439 local_irq_enable();
440
441 while(lapic_cal_loops <= LAPIC_CAL_LOOPS);
442
443 local_irq_disable();
444
445 /* Stop the lapic timer */
446 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
447
448 local_irq_enable();
449
450 /* Jiffies delta */
451 deltaj = lapic_cal_j2 - lapic_cal_j1;
452 apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
453
454 /* Check, if the PM timer is available */
455 deltapm = lapic_cal_pm2 - lapic_cal_pm1;
456 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
457
458 local_apic_timer_verify_ok = 0;
459
460 if (deltapm) {
461 if (deltapm > (pm_100ms - pm_thresh) &&
462 deltapm < (pm_100ms + pm_thresh)) {
463 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
464 /* Check, if the jiffies result is consistent */
465 if (deltaj < LAPIC_CAL_LOOPS-2 ||
466 deltaj > LAPIC_CAL_LOOPS+2) {
467 /*
468 * Not sure, what we can do about this one.
469 * When high resultion timers are active
470 * and the lapic timer does not stop in C3
471 * we are fine. Otherwise more trouble might
472 * be waiting. -- tglx
473 */
474 printk(KERN_WARNING "Global event device %s "
475 "has wrong frequency "
476 "(%lu ticks instead of %d)\n",
477 global_clock_event->name, deltaj,
478 LAPIC_CAL_LOOPS);
479 }
480 local_apic_timer_verify_ok = 1;
481 }
482 } else {
483 /* Check, if the jiffies result is consistent */
484 if (deltaj >= LAPIC_CAL_LOOPS-2 &&
485 deltaj <= LAPIC_CAL_LOOPS+2) {
486 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
487 local_apic_timer_verify_ok = 1;
488 }
489 }
490
491 if (!local_apic_timer_verify_ok) {
492 printk(KERN_WARNING
493 "APIC timer disabled due to verification failure.\n");
494 /* No broadcast on UP ! */
495 if (num_possible_cpus() == 1)
496 return;
497 } else
498 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
499
500 /* Setup the lapic or request the broadcast */
501 setup_APIC_timer();
502}
503
504void __devinit setup_secondary_APIC_clock(void)
505{
506 setup_APIC_timer();
150} 507}
151 508
152int get_maxlvt(void) 509/*
510 * The guts of the apic timer interrupt
511 */
512static void local_apic_timer_interrupt(void)
153{ 513{
154 unsigned int v, ver, maxlvt; 514 int cpu = smp_processor_id();
515 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
155 516
156 v = apic_read(APIC_LVR); 517 /*
157 ver = GET_APIC_VERSION(v); 518 * Normally we should not be here till LAPIC has been initialized but
158 /* 82489DXs do not report # of LVT entries. */ 519 * in some cases like kdump, its possible that there is a pending LAPIC
159 maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; 520 * timer interrupt from previous kernel's context and is delivered in
160 return maxlvt; 521 * new kernel the moment interrupts are enabled.
522 *
523 * Interrupts are enabled early and LAPIC is setup much later, hence
524 * its possible that when we get here evt->event_handler is NULL.
525 * Check for event_handler being NULL and discard the interrupt as
526 * spurious.
527 */
528 if (!evt->event_handler) {
529 printk(KERN_WARNING
530 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
531 /* Switch it off */
532 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
533 return;
534 }
535
536 per_cpu(irq_stat, cpu).apic_timer_irqs++;
537
538 evt->event_handler(evt);
539}
540
541/*
542 * Local APIC timer interrupt. This is the most natural way for doing
543 * local interrupts, but local timer interrupts can be emulated by
544 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
545 *
546 * [ if a single-CPU system runs an SMP kernel then we call the local
547 * interrupt as well. Thus we cannot inline the local irq ... ]
548 */
549
550void fastcall smp_apic_timer_interrupt(struct pt_regs *regs)
551{
552 struct pt_regs *old_regs = set_irq_regs(regs);
553
554 /*
555 * NOTE! We'd better ACK the irq immediately,
556 * because timer handling can be slow.
557 */
558 ack_APIC_irq();
559 /*
560 * update_process_times() expects us to have done irq_enter().
561 * Besides, if we don't timer interrupts ignore the global
562 * interrupt lock, which is the WrongThing (tm) to do.
563 */
564 exit_idle();
565 irq_enter();
566 local_apic_timer_interrupt();
567 irq_exit();
568
569 set_irq_regs(old_regs);
161} 570}
162 571
572int setup_profiling_timer(unsigned int multiplier)
573{
574 return -EINVAL;
575}
576
577/*
578 * Local APIC start and shutdown
579 */
580
581/**
582 * clear_local_APIC - shutdown the local APIC
583 *
584 * This is called, when a CPU is disabled and before rebooting, so the state of
585 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
586 * leftovers during boot.
587 */
163void clear_local_APIC(void) 588void clear_local_APIC(void)
164{ 589{
165 int maxlvt; 590 int maxlvt = lapic_get_maxlvt();
166 unsigned long v; 591 unsigned long v;
167 592
168 maxlvt = get_maxlvt();
169
170 /* 593 /*
171 * Masking an LVT entry can trigger a local APIC error 594 * Masking an LVT entry can trigger a local APIC error
172 * if the vector is zero. Mask LVTERR first to prevent this. 595 * if the vector is zero. Mask LVTERR first to prevent this.
@@ -190,7 +613,7 @@ void clear_local_APIC(void)
190 apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); 613 apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
191 } 614 }
192 615
193/* lets not touch this if we didn't frob it */ 616 /* lets not touch this if we didn't frob it */
194#ifdef CONFIG_X86_MCE_P4THERMAL 617#ifdef CONFIG_X86_MCE_P4THERMAL
195 if (maxlvt >= 5) { 618 if (maxlvt >= 5) {
196 v = apic_read(APIC_LVTTHMR); 619 v = apic_read(APIC_LVTTHMR);
@@ -212,85 +635,18 @@ void clear_local_APIC(void)
212 if (maxlvt >= 5) 635 if (maxlvt >= 5)
213 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); 636 apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
214#endif 637#endif
215 v = GET_APIC_VERSION(apic_read(APIC_LVR)); 638 /* Integrated APIC (!82489DX) ? */
216 if (APIC_INTEGRATED(v)) { /* !82489DX */ 639 if (lapic_is_integrated()) {
217 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */ 640 if (maxlvt > 3)
641 /* Clear ESR due to Pentium errata 3AP and 11AP */
218 apic_write(APIC_ESR, 0); 642 apic_write(APIC_ESR, 0);
219 apic_read(APIC_ESR); 643 apic_read(APIC_ESR);
220 } 644 }
221} 645}
222 646
223void __init connect_bsp_APIC(void) 647/**
224{ 648 * disable_local_APIC - clear and disable the local APIC
225 if (pic_mode) { 649 */
226 /*
227 * Do not trust the local APIC being empty at bootup.
228 */
229 clear_local_APIC();
230 /*
231 * PIC mode, enable APIC mode in the IMCR, i.e.
232 * connect BSP's local APIC to INT and NMI lines.
233 */
234 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
235 "enabling APIC mode.\n");
236 outb(0x70, 0x22);
237 outb(0x01, 0x23);
238 }
239 enable_apic_mode();
240}
241
242void disconnect_bsp_APIC(int virt_wire_setup)
243{
244 if (pic_mode) {
245 /*
246 * Put the board back into PIC mode (has an effect
247 * only on certain older boards). Note that APIC
248 * interrupts, including IPIs, won't work beyond
249 * this point! The only exception are INIT IPIs.
250 */
251 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
252 "entering PIC mode.\n");
253 outb(0x70, 0x22);
254 outb(0x00, 0x23);
255 }
256 else {
257 /* Go back to Virtual Wire compatibility mode */
258 unsigned long value;
259
260 /* For the spurious interrupt use vector F, and enable it */
261 value = apic_read(APIC_SPIV);
262 value &= ~APIC_VECTOR_MASK;
263 value |= APIC_SPIV_APIC_ENABLED;
264 value |= 0xf;
265 apic_write_around(APIC_SPIV, value);
266
267 if (!virt_wire_setup) {
268 /* For LVT0 make it edge triggered, active high, external and enabled */
269 value = apic_read(APIC_LVT0);
270 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
271 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
272 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
273 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
274 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
275 apic_write_around(APIC_LVT0, value);
276 }
277 else {
278 /* Disable LVT0 */
279 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
280 }
281
282 /* For LVT1 make it edge triggered, active high, nmi and enabled */
283 value = apic_read(APIC_LVT1);
284 value &= ~(
285 APIC_MODE_MASK | APIC_SEND_PENDING |
286 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
287 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
288 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
289 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
290 apic_write_around(APIC_LVT1, value);
291 }
292}
293
294void disable_local_APIC(void) 650void disable_local_APIC(void)
295{ 651{
296 unsigned long value; 652 unsigned long value;
@@ -305,8 +661,13 @@ void disable_local_APIC(void)
305 value &= ~APIC_SPIV_APIC_ENABLED; 661 value &= ~APIC_SPIV_APIC_ENABLED;
306 apic_write_around(APIC_SPIV, value); 662 apic_write_around(APIC_SPIV, value);
307 663
664 /*
665 * When LAPIC was disabled by the BIOS and enabled by the kernel,
666 * restore the disabled state.
667 */
308 if (enabled_via_apicbase) { 668 if (enabled_via_apicbase) {
309 unsigned int l, h; 669 unsigned int l, h;
670
310 rdmsr(MSR_IA32_APICBASE, l, h); 671 rdmsr(MSR_IA32_APICBASE, l, h);
311 l &= ~MSR_IA32_APICBASE_ENABLE; 672 l &= ~MSR_IA32_APICBASE_ENABLE;
312 wrmsr(MSR_IA32_APICBASE, l, h); 673 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -314,6 +675,28 @@ void disable_local_APIC(void)
314} 675}
315 676
316/* 677/*
678 * If Linux enabled the LAPIC against the BIOS default disable it down before
679 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
680 * not power-off. Additionally clear all LVT entries before disable_local_APIC
681 * for the case where Linux didn't enable the LAPIC.
682 */
683void lapic_shutdown(void)
684{
685 unsigned long flags;
686
687 if (!cpu_has_apic)
688 return;
689
690 local_irq_save(flags);
691 clear_local_APIC();
692
693 if (enabled_via_apicbase)
694 disable_local_APIC();
695
696 local_irq_restore(flags);
697}
698
699/*
317 * This is to verify that we're looking at a real local APIC. 700 * This is to verify that we're looking at a real local APIC.
318 * Check these against your board if the CPUs aren't getting 701 * Check these against your board if the CPUs aren't getting
319 * started for no apparent reason. 702 * started for no apparent reason.
@@ -345,7 +728,7 @@ int __init verify_local_APIC(void)
345 reg1 = GET_APIC_VERSION(reg0); 728 reg1 = GET_APIC_VERSION(reg0);
346 if (reg1 == 0x00 || reg1 == 0xff) 729 if (reg1 == 0x00 || reg1 == 0xff)
347 return 0; 730 return 0;
348 reg1 = get_maxlvt(); 731 reg1 = lapic_get_maxlvt();
349 if (reg1 < 0x02 || reg1 == 0xff) 732 if (reg1 < 0x02 || reg1 == 0xff)
350 return 0; 733 return 0;
351 734
@@ -368,10 +751,15 @@ int __init verify_local_APIC(void)
368 return 1; 751 return 1;
369} 752}
370 753
754/**
755 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
756 */
371void __init sync_Arb_IDs(void) 757void __init sync_Arb_IDs(void)
372{ 758{
373 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 759 /*
374 And not needed on AMD */ 760 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
761 * needed on AMD.
762 */
375 if (modern_apic()) 763 if (modern_apic())
376 return; 764 return;
377 /* 765 /*
@@ -384,14 +772,12 @@ void __init sync_Arb_IDs(void)
384 | APIC_DM_INIT); 772 | APIC_DM_INIT);
385} 773}
386 774
387extern void __error_in_apic_c (void);
388
389/* 775/*
390 * An initial setup of the virtual wire mode. 776 * An initial setup of the virtual wire mode.
391 */ 777 */
392void __init init_bsp_APIC(void) 778void __init init_bsp_APIC(void)
393{ 779{
394 unsigned long value, ver; 780 unsigned long value;
395 781
396 /* 782 /*
397 * Don't do the setup now if we have a SMP BIOS as the 783 * Don't do the setup now if we have a SMP BIOS as the
@@ -400,9 +786,6 @@ void __init init_bsp_APIC(void)
400 if (smp_found_config || !cpu_has_apic) 786 if (smp_found_config || !cpu_has_apic)
401 return; 787 return;
402 788
403 value = apic_read(APIC_LVR);
404 ver = GET_APIC_VERSION(value);
405
406 /* 789 /*
407 * Do not trust the local APIC being empty at bootup. 790 * Do not trust the local APIC being empty at bootup.
408 */ 791 */
@@ -414,9 +797,10 @@ void __init init_bsp_APIC(void)
414 value = apic_read(APIC_SPIV); 797 value = apic_read(APIC_SPIV);
415 value &= ~APIC_VECTOR_MASK; 798 value &= ~APIC_VECTOR_MASK;
416 value |= APIC_SPIV_APIC_ENABLED; 799 value |= APIC_SPIV_APIC_ENABLED;
417 800
418 /* This bit is reserved on P4/Xeon and should be cleared */ 801 /* This bit is reserved on P4/Xeon and should be cleared */
419 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15)) 802 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
803 (boot_cpu_data.x86 == 15))
420 value &= ~APIC_SPIV_FOCUS_DISABLED; 804 value &= ~APIC_SPIV_FOCUS_DISABLED;
421 else 805 else
422 value |= APIC_SPIV_FOCUS_DISABLED; 806 value |= APIC_SPIV_FOCUS_DISABLED;
@@ -428,14 +812,17 @@ void __init init_bsp_APIC(void)
428 */ 812 */
429 apic_write_around(APIC_LVT0, APIC_DM_EXTINT); 813 apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
430 value = APIC_DM_NMI; 814 value = APIC_DM_NMI;
431 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 815 if (!lapic_is_integrated()) /* 82489DX */
432 value |= APIC_LVT_LEVEL_TRIGGER; 816 value |= APIC_LVT_LEVEL_TRIGGER;
433 apic_write_around(APIC_LVT1, value); 817 apic_write_around(APIC_LVT1, value);
434} 818}
435 819
820/**
821 * setup_local_APIC - setup the local APIC
822 */
436void __devinit setup_local_APIC(void) 823void __devinit setup_local_APIC(void)
437{ 824{
438 unsigned long oldvalue, value, ver, maxlvt; 825 unsigned long oldvalue, value, maxlvt, integrated;
439 int i, j; 826 int i, j;
440 827
441 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 828 /* Pound the ESR really hard over the head with a big hammer - mbligh */
@@ -446,11 +833,7 @@ void __devinit setup_local_APIC(void)
446 apic_write(APIC_ESR, 0); 833 apic_write(APIC_ESR, 0);
447 } 834 }
448 835
449 value = apic_read(APIC_LVR); 836 integrated = lapic_is_integrated();
450 ver = GET_APIC_VERSION(value);
451
452 if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
453 __error_in_apic_c();
454 837
455 /* 838 /*
456 * Double-check whether this APIC is really registered. 839 * Double-check whether this APIC is really registered.
@@ -521,13 +904,10 @@ void __devinit setup_local_APIC(void)
521 * like LRU than MRU (the short-term load is more even across CPUs). 904 * like LRU than MRU (the short-term load is more even across CPUs).
522 * See also the comment in end_level_ioapic_irq(). --macro 905 * See also the comment in end_level_ioapic_irq(). --macro
523 */ 906 */
524#if 1 907
525 /* Enable focus processor (bit==0) */ 908 /* Enable focus processor (bit==0) */
526 value &= ~APIC_SPIV_FOCUS_DISABLED; 909 value &= ~APIC_SPIV_FOCUS_DISABLED;
527#else 910
528 /* Disable focus processor (bit==1) */
529 value |= APIC_SPIV_FOCUS_DISABLED;
530#endif
531 /* 911 /*
532 * Set spurious IRQ vector 912 * Set spurious IRQ vector
533 */ 913 */
@@ -563,17 +943,18 @@ void __devinit setup_local_APIC(void)
563 value = APIC_DM_NMI; 943 value = APIC_DM_NMI;
564 else 944 else
565 value = APIC_DM_NMI | APIC_LVT_MASKED; 945 value = APIC_DM_NMI | APIC_LVT_MASKED;
566 if (!APIC_INTEGRATED(ver)) /* 82489DX */ 946 if (!integrated) /* 82489DX */
567 value |= APIC_LVT_LEVEL_TRIGGER; 947 value |= APIC_LVT_LEVEL_TRIGGER;
568 apic_write_around(APIC_LVT1, value); 948 apic_write_around(APIC_LVT1, value);
569 949
570 if (APIC_INTEGRATED(ver) && !esr_disable) { /* !82489DX */ 950 if (integrated && !esr_disable) { /* !82489DX */
571 maxlvt = get_maxlvt(); 951 maxlvt = lapic_get_maxlvt();
572 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 952 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
573 apic_write(APIC_ESR, 0); 953 apic_write(APIC_ESR, 0);
574 oldvalue = apic_read(APIC_ESR); 954 oldvalue = apic_read(APIC_ESR);
575 955
576 value = ERROR_APIC_VECTOR; // enables sending errors 956 /* enables sending errors */
957 value = ERROR_APIC_VECTOR;
577 apic_write_around(APIC_LVTERR, value); 958 apic_write_around(APIC_LVTERR, value);
578 /* 959 /*
579 * spec says clear errors after enabling vector. 960 * spec says clear errors after enabling vector.
@@ -586,207 +967,30 @@ void __devinit setup_local_APIC(void)
586 "vector: 0x%08lx after: 0x%08lx\n", 967 "vector: 0x%08lx after: 0x%08lx\n",
587 oldvalue, value); 968 oldvalue, value);
588 } else { 969 } else {
589 if (esr_disable) 970 if (esr_disable)
590 /* 971 /*
591 * Something untraceble is creating bad interrupts on 972 * Something untraceble is creating bad interrupts on
592 * secondary quads ... for the moment, just leave the 973 * secondary quads ... for the moment, just leave the
593 * ESR disabled - we can't do anything useful with the 974 * ESR disabled - we can't do anything useful with the
594 * errors anyway - mbligh 975 * errors anyway - mbligh
595 */ 976 */
596 printk("Leaving ESR disabled.\n"); 977 printk(KERN_INFO "Leaving ESR disabled.\n");
597 else 978 else
598 printk("No ESR for 82489DX.\n"); 979 printk(KERN_INFO "No ESR for 82489DX.\n");
599 } 980 }
600 981
982 /* Disable the local apic timer */
983 value = apic_read(APIC_LVTT);
984 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
985 apic_write_around(APIC_LVTT, value);
986
601 setup_apic_nmi_watchdog(NULL); 987 setup_apic_nmi_watchdog(NULL);
602 apic_pm_activate(); 988 apic_pm_activate();
603} 989}
604 990
605/* 991/*
606 * If Linux enabled the LAPIC against the BIOS default 992 * Detect and initialize APIC
607 * disable it down before re-entering the BIOS on shutdown.
608 * Otherwise the BIOS may get confused and not power-off.
609 * Additionally clear all LVT entries before disable_local_APIC
610 * for the case where Linux didn't enable the LAPIC.
611 */
612void lapic_shutdown(void)
613{
614 unsigned long flags;
615
616 if (!cpu_has_apic)
617 return;
618
619 local_irq_save(flags);
620 clear_local_APIC();
621
622 if (enabled_via_apicbase)
623 disable_local_APIC();
624
625 local_irq_restore(flags);
626}
627
628#ifdef CONFIG_PM
629
630static struct {
631 int active;
632 /* r/w apic fields */
633 unsigned int apic_id;
634 unsigned int apic_taskpri;
635 unsigned int apic_ldr;
636 unsigned int apic_dfr;
637 unsigned int apic_spiv;
638 unsigned int apic_lvtt;
639 unsigned int apic_lvtpc;
640 unsigned int apic_lvt0;
641 unsigned int apic_lvt1;
642 unsigned int apic_lvterr;
643 unsigned int apic_tmict;
644 unsigned int apic_tdcr;
645 unsigned int apic_thmr;
646} apic_pm_state;
647
648static int lapic_suspend(struct sys_device *dev, pm_message_t state)
649{
650 unsigned long flags;
651 int maxlvt;
652
653 if (!apic_pm_state.active)
654 return 0;
655
656 maxlvt = get_maxlvt();
657
658 apic_pm_state.apic_id = apic_read(APIC_ID);
659 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
660 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
661 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
662 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
663 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
664 if (maxlvt >= 4)
665 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
666 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
667 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
668 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
669 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
670 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
671#ifdef CONFIG_X86_MCE_P4THERMAL
672 if (maxlvt >= 5)
673 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
674#endif
675
676 local_irq_save(flags);
677 disable_local_APIC();
678 local_irq_restore(flags);
679 return 0;
680}
681
682static int lapic_resume(struct sys_device *dev)
683{
684 unsigned int l, h;
685 unsigned long flags;
686 int maxlvt;
687
688 if (!apic_pm_state.active)
689 return 0;
690
691 maxlvt = get_maxlvt();
692
693 local_irq_save(flags);
694
695 /*
696 * Make sure the APICBASE points to the right address
697 *
698 * FIXME! This will be wrong if we ever support suspend on
699 * SMP! We'll need to do this as part of the CPU restore!
700 */
701 rdmsr(MSR_IA32_APICBASE, l, h);
702 l &= ~MSR_IA32_APICBASE_BASE;
703 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
704 wrmsr(MSR_IA32_APICBASE, l, h);
705
706 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
707 apic_write(APIC_ID, apic_pm_state.apic_id);
708 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
709 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
710 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
711 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
712 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
713 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
714#ifdef CONFIG_X86_MCE_P4THERMAL
715 if (maxlvt >= 5)
716 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
717#endif
718 if (maxlvt >= 4)
719 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
720 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
721 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
722 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
723 apic_write(APIC_ESR, 0);
724 apic_read(APIC_ESR);
725 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
726 apic_write(APIC_ESR, 0);
727 apic_read(APIC_ESR);
728 local_irq_restore(flags);
729 return 0;
730}
731
732/*
733 * This device has no shutdown method - fully functioning local APICs
734 * are needed on every CPU up until machine_halt/restart/poweroff.
735 */ 993 */
736
737static struct sysdev_class lapic_sysclass = {
738 set_kset_name("lapic"),
739 .resume = lapic_resume,
740 .suspend = lapic_suspend,
741};
742
743static struct sys_device device_lapic = {
744 .id = 0,
745 .cls = &lapic_sysclass,
746};
747
748static void __devinit apic_pm_activate(void)
749{
750 apic_pm_state.active = 1;
751}
752
753static int __init init_lapic_sysfs(void)
754{
755 int error;
756
757 if (!cpu_has_apic)
758 return 0;
759 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
760
761 error = sysdev_class_register(&lapic_sysclass);
762 if (!error)
763 error = sysdev_register(&device_lapic);
764 return error;
765}
766device_initcall(init_lapic_sysfs);
767
768#else /* CONFIG_PM */
769
770static void apic_pm_activate(void) { }
771
772#endif /* CONFIG_PM */
773
774/*
775 * Detect and enable local APICs on non-SMP boards.
776 * Original code written by Keir Fraser.
777 */
778
779static int __init apic_set_verbosity(char *str)
780{
781 if (strcmp("debug", str) == 0)
782 apic_verbosity = APIC_DEBUG;
783 else if (strcmp("verbose", str) == 0)
784 apic_verbosity = APIC_VERBOSE;
785 return 1;
786}
787
788__setup("apic=", apic_set_verbosity);
789
790static int __init detect_init_APIC (void) 994static int __init detect_init_APIC (void)
791{ 995{
792 u32 h, l, features; 996 u32 h, l, features;
@@ -798,7 +1002,7 @@ static int __init detect_init_APIC (void)
798 switch (boot_cpu_data.x86_vendor) { 1002 switch (boot_cpu_data.x86_vendor) {
799 case X86_VENDOR_AMD: 1003 case X86_VENDOR_AMD:
800 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || 1004 if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
801 (boot_cpu_data.x86 == 15)) 1005 (boot_cpu_data.x86 == 15))
802 break; 1006 break;
803 goto no_apic; 1007 goto no_apic;
804 case X86_VENDOR_INTEL: 1008 case X86_VENDOR_INTEL:
@@ -812,23 +1016,23 @@ static int __init detect_init_APIC (void)
812 1016
813 if (!cpu_has_apic) { 1017 if (!cpu_has_apic) {
814 /* 1018 /*
815 * Over-ride BIOS and try to enable the local 1019 * Over-ride BIOS and try to enable the local APIC only if
816 * APIC only if "lapic" specified. 1020 * "lapic" specified.
817 */ 1021 */
818 if (enable_local_apic <= 0) { 1022 if (enable_local_apic <= 0) {
819 printk("Local APIC disabled by BIOS -- " 1023 printk(KERN_INFO "Local APIC disabled by BIOS -- "
820 "you can enable it with \"lapic\"\n"); 1024 "you can enable it with \"lapic\"\n");
821 return -1; 1025 return -1;
822 } 1026 }
823 /* 1027 /*
824 * Some BIOSes disable the local APIC in the 1028 * Some BIOSes disable the local APIC in the APIC_BASE
825 * APIC_BASE MSR. This can only be done in 1029 * MSR. This can only be done in software for Intel P6 or later
826 * software for Intel P6 or later and AMD K7 1030 * and AMD K7 (Model > 1) or later.
827 * (Model > 1) or later.
828 */ 1031 */
829 rdmsr(MSR_IA32_APICBASE, l, h); 1032 rdmsr(MSR_IA32_APICBASE, l, h);
830 if (!(l & MSR_IA32_APICBASE_ENABLE)) { 1033 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
831 printk("Local APIC disabled by BIOS -- reenabling.\n"); 1034 printk(KERN_INFO
1035 "Local APIC disabled by BIOS -- reenabling.\n");
832 l &= ~MSR_IA32_APICBASE_BASE; 1036 l &= ~MSR_IA32_APICBASE_BASE;
833 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; 1037 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
834 wrmsr(MSR_IA32_APICBASE, l, h); 1038 wrmsr(MSR_IA32_APICBASE, l, h);
@@ -841,7 +1045,7 @@ static int __init detect_init_APIC (void)
841 */ 1045 */
842 features = cpuid_edx(1); 1046 features = cpuid_edx(1);
843 if (!(features & (1 << X86_FEATURE_APIC))) { 1047 if (!(features & (1 << X86_FEATURE_APIC))) {
844 printk("Could not enable APIC!\n"); 1048 printk(KERN_WARNING "Could not enable APIC!\n");
845 return -1; 1049 return -1;
846 } 1050 }
847 set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1051 set_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
@@ -855,17 +1059,20 @@ static int __init detect_init_APIC (void)
855 if (nmi_watchdog != NMI_NONE) 1059 if (nmi_watchdog != NMI_NONE)
856 nmi_watchdog = NMI_LOCAL_APIC; 1060 nmi_watchdog = NMI_LOCAL_APIC;
857 1061
858 printk("Found and enabled local APIC!\n"); 1062 printk(KERN_INFO "Found and enabled local APIC!\n");
859 1063
860 apic_pm_activate(); 1064 apic_pm_activate();
861 1065
862 return 0; 1066 return 0;
863 1067
864no_apic: 1068no_apic:
865 printk("No local APIC present or hardware disabled\n"); 1069 printk(KERN_INFO "No local APIC present or hardware disabled\n");
866 return -1; 1070 return -1;
867} 1071}
868 1072
1073/**
1074 * init_apic_mappings - initialize APIC mappings
1075 */
869void __init init_apic_mappings(void) 1076void __init init_apic_mappings(void)
870{ 1077{
871 unsigned long apic_phys; 1078 unsigned long apic_phys;
@@ -925,385 +1132,92 @@ fake_ioapic_page:
925} 1132}
926 1133
927/* 1134/*
928 * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts 1135 * This initializes the IO-APIC and APIC hardware if this is
929 * per second. We assume that the caller has already set up the local 1136 * a UP kernel.
930 * APIC.
931 *
932 * The APIC timer is not exactly sync with the external timer chip, it
933 * closely follows bus clocks.
934 */
935
936/*
937 * The timer chip is already set up at HZ interrupts per second here,
938 * but we do not accept timer interrupts yet. We only allow the BP
939 * to calibrate.
940 */
941static unsigned int __devinit get_8254_timer_count(void)
942{
943 unsigned long flags;
944
945 unsigned int count;
946
947 spin_lock_irqsave(&i8253_lock, flags);
948
949 outb_p(0x00, PIT_MODE);
950 count = inb_p(PIT_CH0);
951 count |= inb_p(PIT_CH0) << 8;
952
953 spin_unlock_irqrestore(&i8253_lock, flags);
954
955 return count;
956}
957
958/* next tick in 8254 can be caught by catching timer wraparound */
959static void __devinit wait_8254_wraparound(void)
960{
961 unsigned int curr_count, prev_count;
962
963 curr_count = get_8254_timer_count();
964 do {
965 prev_count = curr_count;
966 curr_count = get_8254_timer_count();
967
968 /* workaround for broken Mercury/Neptune */
969 if (prev_count >= curr_count + 0x100)
970 curr_count = get_8254_timer_count();
971
972 } while (prev_count >= curr_count);
973}
974
975/*
976 * Default initialization for 8254 timers. If we use other timers like HPET,
977 * we override this later
978 */
979void (*wait_timer_tick)(void) __devinitdata = wait_8254_wraparound;
980
981/*
982 * This function sets up the local APIC timer, with a timeout of
983 * 'clocks' APIC bus clock. During calibration we actually call
984 * this function twice on the boot CPU, once with a bogus timeout
985 * value, second time for real. The other (noncalibrating) CPUs
986 * call this function only once, with the real, calibrated value.
987 *
988 * We do reads before writes even if unnecessary, to get around the
989 * P5 APIC double write bug.
990 */ 1137 */
991 1138int __init APIC_init_uniprocessor (void)
992#define APIC_DIVISOR 16
993
994static void __setup_APIC_LVTT(unsigned int clocks)
995{ 1139{
996 unsigned int lvtt_value, tmp_value, ver; 1140 if (enable_local_apic < 0)
997 int cpu = smp_processor_id(); 1141 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
998
999 ver = GET_APIC_VERSION(apic_read(APIC_LVR));
1000 lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
1001 if (!APIC_INTEGRATED(ver))
1002 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
1003
1004 if (cpu_isset(cpu, timer_bcast_ipi))
1005 lvtt_value |= APIC_LVT_MASKED;
1006 1142
1007 apic_write_around(APIC_LVTT, lvtt_value); 1143 if (!smp_found_config && !cpu_has_apic)
1144 return -1;
1008 1145
1009 /* 1146 /*
1010 * Divide PICLK by 16 1147 * Complain if the BIOS pretends there is one.
1011 */ 1148 */
1012 tmp_value = apic_read(APIC_TDCR); 1149 if (!cpu_has_apic &&
1013 apic_write_around(APIC_TDCR, (tmp_value 1150 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1014 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 1151 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1015 | APIC_TDR_DIV_16); 1152 boot_cpu_physical_apicid);
1016 1153 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1017 apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); 1154 return -1;
1018} 1155 }
1019 1156
1020static void __devinit setup_APIC_timer(unsigned int clocks) 1157 verify_local_APIC();
1021{
1022 unsigned long flags;
1023 1158
1024 local_irq_save(flags); 1159 connect_bsp_APIC();
1025 1160
1026 /* 1161 /*
1027 * Wait for IRQ0's slice: 1162 * Hack: In case of kdump, after a crash, kernel might be booting
1163 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1164 * might be zero if read from MP tables. Get it from LAPIC.
1028 */ 1165 */
1029 wait_timer_tick(); 1166#ifdef CONFIG_CRASH_DUMP
1167 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
1168#endif
1169 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
1030 1170
1031 __setup_APIC_LVTT(clocks); 1171 setup_local_APIC();
1032 1172
1033 local_irq_restore(flags); 1173#ifdef CONFIG_X86_IO_APIC
1174 if (smp_found_config)
1175 if (!skip_ioapic_setup && nr_ioapics)
1176 setup_IO_APIC();
1177#endif
1178 setup_boot_clock();
1179
1180 return 0;
1034} 1181}
1035 1182
1036/* 1183/*
1037 * In this function we calibrate APIC bus clocks to the external 1184 * APIC command line parameters
1038 * timer. Unfortunately we cannot use jiffies and the timer irq
1039 * to calibrate, since some later bootup code depends on getting
1040 * the first irq? Ugh.
1041 *
1042 * We want to do the calibration only once since we
1043 * want to have local timer irqs syncron. CPUs connected
1044 * by the same APIC bus have the very same bus frequency.
1045 * And we want to have irqs off anyways, no accidental
1046 * APIC irq that way.
1047 */ 1185 */
1048 1186static int __init parse_lapic(char *arg)
1049static int __init calibrate_APIC_clock(void)
1050{
1051 unsigned long long t1 = 0, t2 = 0;
1052 long tt1, tt2;
1053 long result;
1054 int i;
1055 const int LOOPS = HZ/10;
1056
1057 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1058
1059 /*
1060 * Put whatever arbitrary (but long enough) timeout
1061 * value into the APIC clock, we just want to get the
1062 * counter running for calibration.
1063 */
1064 __setup_APIC_LVTT(1000000000);
1065
1066 /*
1067 * The timer chip counts down to zero. Let's wait
1068 * for a wraparound to start exact measurement:
1069 * (the current tick might have been already half done)
1070 */
1071
1072 wait_timer_tick();
1073
1074 /*
1075 * We wrapped around just now. Let's start:
1076 */
1077 if (cpu_has_tsc)
1078 rdtscll(t1);
1079 tt1 = apic_read(APIC_TMCCT);
1080
1081 /*
1082 * Let's wait LOOPS wraprounds:
1083 */
1084 for (i = 0; i < LOOPS; i++)
1085 wait_timer_tick();
1086
1087 tt2 = apic_read(APIC_TMCCT);
1088 if (cpu_has_tsc)
1089 rdtscll(t2);
1090
1091 /*
1092 * The APIC bus clock counter is 32 bits only, it
1093 * might have overflown, but note that we use signed
1094 * longs, thus no extra care needed.
1095 *
1096 * underflown to be exact, as the timer counts down ;)
1097 */
1098
1099 result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
1100
1101 if (cpu_has_tsc)
1102 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
1103 "%ld.%04ld MHz.\n",
1104 ((long)(t2-t1)/LOOPS)/(1000000/HZ),
1105 ((long)(t2-t1)/LOOPS)%(1000000/HZ));
1106
1107 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
1108 "%ld.%04ld MHz.\n",
1109 result/(1000000/HZ),
1110 result%(1000000/HZ));
1111
1112 return result;
1113}
1114
1115static unsigned int calibration_result;
1116
1117void __init setup_boot_APIC_clock(void)
1118{
1119 unsigned long flags;
1120 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1121 using_apic_timer = 1;
1122
1123 local_irq_save(flags);
1124
1125 calibration_result = calibrate_APIC_clock();
1126 /*
1127 * Now set up the timer for real.
1128 */
1129 setup_APIC_timer(calibration_result);
1130
1131 local_irq_restore(flags);
1132}
1133
1134void __devinit setup_secondary_APIC_clock(void)
1135{
1136 setup_APIC_timer(calibration_result);
1137}
1138
1139void disable_APIC_timer(void)
1140{
1141 if (using_apic_timer) {
1142 unsigned long v;
1143
1144 v = apic_read(APIC_LVTT);
1145 /*
1146 * When an illegal vector value (0-15) is written to an LVT
1147 * entry and delivery mode is Fixed, the APIC may signal an
1148 * illegal vector error, with out regard to whether the mask
1149 * bit is set or whether an interrupt is actually seen on input.
1150 *
1151 * Boot sequence might call this function when the LVTT has
1152 * '0' vector value. So make sure vector field is set to
1153 * valid value.
1154 */
1155 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1156 apic_write_around(APIC_LVTT, v);
1157 }
1158}
1159
1160void enable_APIC_timer(void)
1161{ 1187{
1162 int cpu = smp_processor_id(); 1188 enable_local_apic = 1;
1163 1189 return 0;
1164 if (using_apic_timer &&
1165 !cpu_isset(cpu, timer_bcast_ipi)) {
1166 unsigned long v;
1167
1168 v = apic_read(APIC_LVTT);
1169 apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
1170 }
1171} 1190}
1191early_param("lapic", parse_lapic);
1172 1192
1173void switch_APIC_timer_to_ipi(void *cpumask) 1193static int __init parse_nolapic(char *arg)
1174{ 1194{
1175 cpumask_t mask = *(cpumask_t *)cpumask; 1195 enable_local_apic = -1;
1176 int cpu = smp_processor_id(); 1196 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
1177 1197 return 0;
1178 if (cpu_isset(cpu, mask) &&
1179 !cpu_isset(cpu, timer_bcast_ipi)) {
1180 disable_APIC_timer();
1181 cpu_set(cpu, timer_bcast_ipi);
1182 }
1183} 1198}
1184EXPORT_SYMBOL(switch_APIC_timer_to_ipi); 1199early_param("nolapic", parse_nolapic);
1185 1200
1186void switch_ipi_to_APIC_timer(void *cpumask) 1201static int __init apic_set_verbosity(char *str)
1187{ 1202{
1188 cpumask_t mask = *(cpumask_t *)cpumask; 1203 if (strcmp("debug", str) == 0)
1189 int cpu = smp_processor_id(); 1204 apic_verbosity = APIC_DEBUG;
1190 1205 else if (strcmp("verbose", str) == 0)
1191 if (cpu_isset(cpu, mask) && 1206 apic_verbosity = APIC_VERBOSE;
1192 cpu_isset(cpu, timer_bcast_ipi)) { 1207 return 1;
1193 cpu_clear(cpu, timer_bcast_ipi);
1194 enable_APIC_timer();
1195 }
1196} 1208}
1197EXPORT_SYMBOL(switch_ipi_to_APIC_timer);
1198 1209
1199#undef APIC_DIVISOR 1210__setup("apic=", apic_set_verbosity);
1200
1201/*
1202 * Local timer interrupt handler. It does both profiling and
1203 * process statistics/rescheduling.
1204 *
1205 * We do profiling in every local tick, statistics/rescheduling
1206 * happen only every 'profiling multiplier' ticks. The default
1207 * multiplier is 1 and it can be changed by writing the new multiplier
1208 * value into /proc/profile.
1209 */
1210
1211inline void smp_local_timer_interrupt(void)
1212{
1213 profile_tick(CPU_PROFILING);
1214#ifdef CONFIG_SMP
1215 update_process_times(user_mode_vm(get_irq_regs()));
1216#endif
1217 1211
1218 /*
1219 * We take the 'long' return path, and there every subsystem
1220 * grabs the apropriate locks (kernel lock/ irq lock).
1221 *
1222 * we might want to decouple profiling from the 'long path',
1223 * and do the profiling totally in assembly.
1224 *
1225 * Currently this isn't too much of an issue (performance wise),
1226 * we can take more than 100K local irqs per second on a 100 MHz P5.
1227 */
1228}
1229 1212
1230/* 1213/*
1231 * Local APIC timer interrupt. This is the most natural way for doing 1214 * Local APIC interrupts
1232 * local interrupts, but local timer interrupts can be emulated by
1233 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
1234 *
1235 * [ if a single-CPU system runs an SMP kernel then we call the local
1236 * interrupt as well. Thus we cannot inline the local irq ... ]
1237 */ 1215 */
1238 1216
1239fastcall void smp_apic_timer_interrupt(struct pt_regs *regs)
1240{
1241 struct pt_regs *old_regs = set_irq_regs(regs);
1242 int cpu = smp_processor_id();
1243
1244 /*
1245 * the NMI deadlock-detector uses this.
1246 */
1247 per_cpu(irq_stat, cpu).apic_timer_irqs++;
1248
1249 /*
1250 * NOTE! We'd better ACK the irq immediately,
1251 * because timer handling can be slow.
1252 */
1253 ack_APIC_irq();
1254 /*
1255 * update_process_times() expects us to have done irq_enter().
1256 * Besides, if we don't timer interrupts ignore the global
1257 * interrupt lock, which is the WrongThing (tm) to do.
1258 */
1259 exit_idle();
1260 irq_enter();
1261 smp_local_timer_interrupt();
1262 irq_exit();
1263 set_irq_regs(old_regs);
1264}
1265
1266#ifndef CONFIG_SMP
1267static void up_apic_timer_interrupt_call(void)
1268{
1269 int cpu = smp_processor_id();
1270
1271 /*
1272 * the NMI deadlock-detector uses this.
1273 */
1274 per_cpu(irq_stat, cpu).apic_timer_irqs++;
1275
1276 smp_local_timer_interrupt();
1277}
1278#endif
1279
1280void smp_send_timer_broadcast_ipi(void)
1281{
1282 cpumask_t mask;
1283
1284 cpus_and(mask, cpu_online_map, timer_bcast_ipi);
1285 if (!cpus_empty(mask)) {
1286#ifdef CONFIG_SMP
1287 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
1288#else
1289 /*
1290 * We can directly call the apic timer interrupt handler
1291 * in UP case. Minus all irq related functions
1292 */
1293 up_apic_timer_interrupt_call();
1294#endif
1295 }
1296}
1297
1298int setup_profiling_timer(unsigned int multiplier)
1299{
1300 return -EINVAL;
1301}
1302
1303/* 1217/*
1304 * This interrupt should _never_ happen with our APIC/SMP architecture 1218 * This interrupt should _never_ happen with our APIC/SMP architecture
1305 */ 1219 */
1306fastcall void smp_spurious_interrupt(struct pt_regs *regs) 1220void smp_spurious_interrupt(struct pt_regs *regs)
1307{ 1221{
1308 unsigned long v; 1222 unsigned long v;
1309 1223
@@ -1319,16 +1233,15 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs)
1319 ack_APIC_irq(); 1233 ack_APIC_irq();
1320 1234
1321 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1235 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1322 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n", 1236 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
1323 smp_processor_id()); 1237 "should never happen.\n", smp_processor_id());
1324 irq_exit(); 1238 irq_exit();
1325} 1239}
1326 1240
1327/* 1241/*
1328 * This interrupt should never happen with our APIC/SMP architecture 1242 * This interrupt should never happen with our APIC/SMP architecture
1329 */ 1243 */
1330 1244void smp_error_interrupt(struct pt_regs *regs)
1331fastcall void smp_error_interrupt(struct pt_regs *regs)
1332{ 1245{
1333 unsigned long v, v1; 1246 unsigned long v, v1;
1334 1247
@@ -1352,69 +1265,261 @@ fastcall void smp_error_interrupt(struct pt_regs *regs)
1352 7: Illegal register address 1265 7: Illegal register address
1353 */ 1266 */
1354 printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", 1267 printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
1355 smp_processor_id(), v , v1); 1268 smp_processor_id(), v , v1);
1356 irq_exit(); 1269 irq_exit();
1357} 1270}
1358 1271
1359/* 1272/*
1360 * This initializes the IO-APIC and APIC hardware if this is 1273 * Initialize APIC interrupts
1361 * a UP kernel.
1362 */ 1274 */
1363int __init APIC_init_uniprocessor (void) 1275void __init apic_intr_init(void)
1364{ 1276{
1365 if (enable_local_apic < 0) 1277#ifdef CONFIG_SMP
1366 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1278 smp_intr_init();
1279#endif
1280 /* self generated IPI for local APIC timer */
1281 set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1367 1282
1368 if (!smp_found_config && !cpu_has_apic) 1283 /* IPI vectors for APIC spurious and error interrupts */
1369 return -1; 1284 set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1285 set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1370 1286
1371 /* 1287 /* thermal monitor LVT interrupt */
1372 * Complain if the BIOS pretends there is one. 1288#ifdef CONFIG_X86_MCE_P4THERMAL
1373 */ 1289 set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1374 if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1290#endif
1375 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1291}
1376 boot_cpu_physical_apicid); 1292
1377 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); 1293/**
1378 return -1; 1294 * connect_bsp_APIC - attach the APIC to the interrupt system
1295 */
1296void __init connect_bsp_APIC(void)
1297{
1298 if (pic_mode) {
1299 /*
1300 * Do not trust the local APIC being empty at bootup.
1301 */
1302 clear_local_APIC();
1303 /*
1304 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1305 * local APIC to INT and NMI lines.
1306 */
1307 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1308 "enabling APIC mode.\n");
1309 outb(0x70, 0x22);
1310 outb(0x01, 0x23);
1379 } 1311 }
1312 enable_apic_mode();
1313}
1380 1314
1381 verify_local_APIC(); 1315/**
1316 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1317 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1318 *
1319 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1320 * APIC is disabled.
1321 */
1322void disconnect_bsp_APIC(int virt_wire_setup)
1323{
1324 if (pic_mode) {
1325 /*
1326 * Put the board back into PIC mode (has an effect only on
1327 * certain older boards). Note that APIC interrupts, including
1328 * IPIs, won't work beyond this point! The only exception are
1329 * INIT IPIs.
1330 */
1331 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1332 "entering PIC mode.\n");
1333 outb(0x70, 0x22);
1334 outb(0x00, 0x23);
1335 } else {
1336 /* Go back to Virtual Wire compatibility mode */
1337 unsigned long value;
1382 1338
1383 connect_bsp_APIC(); 1339 /* For the spurious interrupt use vector F, and enable it */
1340 value = apic_read(APIC_SPIV);
1341 value &= ~APIC_VECTOR_MASK;
1342 value |= APIC_SPIV_APIC_ENABLED;
1343 value |= 0xf;
1344 apic_write_around(APIC_SPIV, value);
1384 1345
1385 /* 1346 if (!virt_wire_setup) {
1386 * Hack: In case of kdump, after a crash, kernel might be booting 1347 /*
1387 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 1348 * For LVT0 make it edge triggered, active high,
1388 * might be zero if read from MP tables. Get it from LAPIC. 1349 * external and enabled
1389 */ 1350 */
1390#ifdef CONFIG_CRASH_DUMP 1351 value = apic_read(APIC_LVT0);
1391 boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); 1352 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1392#endif 1353 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1393 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 1354 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
1355 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1356 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1357 apic_write_around(APIC_LVT0, value);
1358 } else {
1359 /* Disable LVT0 */
1360 apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
1361 }
1394 1362
1395 setup_local_APIC(); 1363 /*
1364 * For LVT1 make it edge triggered, active high, nmi and
1365 * enabled
1366 */
1367 value = apic_read(APIC_LVT1);
1368 value &= ~(
1369 APIC_MODE_MASK | APIC_SEND_PENDING |
1370 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1371 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1372 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1373 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1374 apic_write_around(APIC_LVT1, value);
1375 }
1376}
1396 1377
1397#ifdef CONFIG_X86_IO_APIC 1378/*
1398 if (smp_found_config) 1379 * Power management
1399 if (!skip_ioapic_setup && nr_ioapics) 1380 */
1400 setup_IO_APIC(); 1381#ifdef CONFIG_PM
1382
1383static struct {
1384 int active;
1385 /* r/w apic fields */
1386 unsigned int apic_id;
1387 unsigned int apic_taskpri;
1388 unsigned int apic_ldr;
1389 unsigned int apic_dfr;
1390 unsigned int apic_spiv;
1391 unsigned int apic_lvtt;
1392 unsigned int apic_lvtpc;
1393 unsigned int apic_lvt0;
1394 unsigned int apic_lvt1;
1395 unsigned int apic_lvterr;
1396 unsigned int apic_tmict;
1397 unsigned int apic_tdcr;
1398 unsigned int apic_thmr;
1399} apic_pm_state;
1400
1401static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1402{
1403 unsigned long flags;
1404 int maxlvt;
1405
1406 if (!apic_pm_state.active)
1407 return 0;
1408
1409 maxlvt = lapic_get_maxlvt();
1410
1411 apic_pm_state.apic_id = apic_read(APIC_ID);
1412 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1413 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1414 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
1415 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
1416 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
1417 if (maxlvt >= 4)
1418 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
1419 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
1420 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
1421 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1422 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1423 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1424#ifdef CONFIG_X86_MCE_P4THERMAL
1425 if (maxlvt >= 5)
1426 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1401#endif 1427#endif
1402 setup_boot_clock();
1403 1428
1429 local_irq_save(flags);
1430 disable_local_APIC();
1431 local_irq_restore(flags);
1404 return 0; 1432 return 0;
1405} 1433}
1406 1434
1407static int __init parse_lapic(char *arg) 1435static int lapic_resume(struct sys_device *dev)
1408{ 1436{
1409 lapic_enable(); 1437 unsigned int l, h;
1438 unsigned long flags;
1439 int maxlvt;
1440
1441 if (!apic_pm_state.active)
1442 return 0;
1443
1444 maxlvt = lapic_get_maxlvt();
1445
1446 local_irq_save(flags);
1447
1448 /*
1449 * Make sure the APICBASE points to the right address
1450 *
1451 * FIXME! This will be wrong if we ever support suspend on
1452 * SMP! We'll need to do this as part of the CPU restore!
1453 */
1454 rdmsr(MSR_IA32_APICBASE, l, h);
1455 l &= ~MSR_IA32_APICBASE_BASE;
1456 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1457 wrmsr(MSR_IA32_APICBASE, l, h);
1458
1459 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1460 apic_write(APIC_ID, apic_pm_state.apic_id);
1461 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
1462 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
1463 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
1464 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1465 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1466 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1467#ifdef CONFIG_X86_MCE_P4THERMAL
1468 if (maxlvt >= 5)
1469 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1470#endif
1471 if (maxlvt >= 4)
1472 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
1473 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
1474 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
1475 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
1476 apic_write(APIC_ESR, 0);
1477 apic_read(APIC_ESR);
1478 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1479 apic_write(APIC_ESR, 0);
1480 apic_read(APIC_ESR);
1481 local_irq_restore(flags);
1410 return 0; 1482 return 0;
1411} 1483}
1412early_param("lapic", parse_lapic);
1413 1484
1414static int __init parse_nolapic(char *arg) 1485/*
1486 * This device has no shutdown method - fully functioning local APICs
1487 * are needed on every CPU up until machine_halt/restart/poweroff.
1488 */
1489
1490static struct sysdev_class lapic_sysclass = {
1491 set_kset_name("lapic"),
1492 .resume = lapic_resume,
1493 .suspend = lapic_suspend,
1494};
1495
1496static struct sys_device device_lapic = {
1497 .id = 0,
1498 .cls = &lapic_sysclass,
1499};
1500
1501static void __devinit apic_pm_activate(void)
1415{ 1502{
1416 lapic_disable(); 1503 apic_pm_state.active = 1;
1417 return 0;
1418} 1504}
1419early_param("nolapic", parse_nolapic);
1420 1505
1506static int __init init_lapic_sysfs(void)
1507{
1508 int error;
1509
1510 if (!cpu_has_apic)
1511 return 0;
1512 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
1513
1514 error = sysdev_class_register(&lapic_sysclass);
1515 if (!error)
1516 error = sysdev_register(&device_lapic);
1517 return error;
1518}
1519device_initcall(init_lapic_sysfs);
1520
1521#else /* CONFIG_PM */
1522
1523static void apic_pm_activate(void) { }
1524
1525#endif /* CONFIG_PM */
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index f9ba0af7ee1f..064bbf2861f4 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -236,7 +236,6 @@
236 236
237#include "io_ports.h" 237#include "io_ports.h"
238 238
239extern unsigned long get_cmos_time(void);
240extern void machine_real_restart(unsigned char *, int); 239extern void machine_real_restart(unsigned char *, int);
241 240
242#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) 241#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
@@ -1176,28 +1175,6 @@ out:
1176 spin_unlock(&user_list_lock); 1175 spin_unlock(&user_list_lock);
1177} 1176}
1178 1177
1179static void set_time(void)
1180{
1181 struct timespec ts;
1182 if (got_clock_diff) { /* Must know time zone in order to set clock */
1183 ts.tv_sec = get_cmos_time() + clock_cmos_diff;
1184 ts.tv_nsec = 0;
1185 do_settimeofday(&ts);
1186 }
1187}
1188
1189static void get_time_diff(void)
1190{
1191#ifndef CONFIG_APM_RTC_IS_GMT
1192 /*
1193 * Estimate time zone so that set_time can update the clock
1194 */
1195 clock_cmos_diff = -get_cmos_time();
1196 clock_cmos_diff += get_seconds();
1197 got_clock_diff = 1;
1198#endif
1199}
1200
1201static void reinit_timer(void) 1178static void reinit_timer(void)
1202{ 1179{
1203#ifdef INIT_TIMER_AFTER_SUSPEND 1180#ifdef INIT_TIMER_AFTER_SUSPEND
@@ -1237,19 +1214,6 @@ static int suspend(int vetoable)
1237 local_irq_disable(); 1214 local_irq_disable();
1238 device_power_down(PMSG_SUSPEND); 1215 device_power_down(PMSG_SUSPEND);
1239 1216
1240 /* serialize with the timer interrupt */
1241 write_seqlock(&xtime_lock);
1242
1243 /* protect against access to timer chip registers */
1244 spin_lock(&i8253_lock);
1245
1246 get_time_diff();
1247 /*
1248 * Irq spinlock must be dropped around set_system_power_state.
1249 * We'll undo any timer changes due to interrupts below.
1250 */
1251 spin_unlock(&i8253_lock);
1252 write_sequnlock(&xtime_lock);
1253 local_irq_enable(); 1217 local_irq_enable();
1254 1218
1255 save_processor_state(); 1219 save_processor_state();
@@ -1258,7 +1222,6 @@ static int suspend(int vetoable)
1258 restore_processor_state(); 1222 restore_processor_state();
1259 1223
1260 local_irq_disable(); 1224 local_irq_disable();
1261 set_time();
1262 reinit_timer(); 1225 reinit_timer();
1263 1226
1264 if (err == APM_NO_ERROR) 1227 if (err == APM_NO_ERROR)
@@ -1288,11 +1251,6 @@ static void standby(void)
1288 1251
1289 local_irq_disable(); 1252 local_irq_disable();
1290 device_power_down(PMSG_SUSPEND); 1253 device_power_down(PMSG_SUSPEND);
1291 /* serialize with the timer interrupt */
1292 write_seqlock(&xtime_lock);
1293 /* If needed, notify drivers here */
1294 get_time_diff();
1295 write_sequnlock(&xtime_lock);
1296 local_irq_enable(); 1254 local_irq_enable();
1297 1255
1298 err = set_system_power_state(APM_STATE_STANDBY); 1256 err = set_system_power_state(APM_STATE_STANDBY);
@@ -1386,7 +1344,6 @@ static void check_events(void)
1386 ignore_bounce = 1; 1344 ignore_bounce = 1;
1387 if ((event != APM_NORMAL_RESUME) 1345 if ((event != APM_NORMAL_RESUME)
1388 || (ignore_normal_resume == 0)) { 1346 || (ignore_normal_resume == 0)) {
1389 set_time();
1390 device_resume(); 1347 device_resume();
1391 pm_send_all(PM_RESUME, (void *)0); 1348 pm_send_all(PM_RESUME, (void *)0);
1392 queue_event(event, NULL); 1349 queue_event(event, NULL);
@@ -1402,7 +1359,6 @@ static void check_events(void)
1402 break; 1359 break;
1403 1360
1404 case APM_UPDATE_TIME: 1361 case APM_UPDATE_TIME:
1405 set_time();
1406 break; 1362 break;
1407 1363
1408 case APM_CRITICAL_SUSPEND: 1364 case APM_CRITICAL_SUSPEND:
diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig
index 5299c5bf4454..6c52182ca323 100644
--- a/arch/i386/kernel/cpu/cpufreq/Kconfig
+++ b/arch/i386/kernel/cpu/cpufreq/Kconfig
@@ -217,6 +217,15 @@ config X86_LONGHAUL
217 217
218 If in doubt, say N. 218 If in doubt, say N.
219 219
220config X86_E_POWERSAVER
221 tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)"
222 select CPU_FREQ_TABLE
223 depends on EXPERIMENTAL
224 help
225 This adds the CPUFreq driver for VIA C7 processors.
226
227 If in doubt, say N.
228
220comment "shared options" 229comment "shared options"
221 230
222config X86_ACPI_CPUFREQ_PROC_INTF 231config X86_ACPI_CPUFREQ_PROC_INTF
diff --git a/arch/i386/kernel/cpu/cpufreq/Makefile b/arch/i386/kernel/cpu/cpufreq/Makefile
index 8de3abe322a9..560f7760dae5 100644
--- a/arch/i386/kernel/cpu/cpufreq/Makefile
+++ b/arch/i386/kernel/cpu/cpufreq/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o
2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o 2obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o
3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o 3obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o
4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o 4obj-$(CONFIG_X86_LONGHAUL) += longhaul.o
5obj-$(CONFIG_X86_E_POWERSAVER) += e_powersaver.o
5obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o 6obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o
6obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o 7obj-$(CONFIG_SC520_CPUFREQ) += sc520_freq.o
7obj-$(CONFIG_X86_LONGRUN) += longrun.o 8obj-$(CONFIG_X86_LONGRUN) += longrun.o
diff --git a/arch/i386/kernel/cpu/cpufreq/e_powersaver.c b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c
new file mode 100644
index 000000000000..f43d98e11cc7
--- /dev/null
+++ b/arch/i386/kernel/cpu/cpufreq/e_powersaver.c
@@ -0,0 +1,334 @@
1/*
2 * Based on documentation provided by Dave Jones. Thanks!
3 *
4 * Licensed under the terms of the GNU GPL License version 2.
5 *
6 * BIG FAT DISCLAIMER: Work in progress code. Possibly *dangerous*
7 */
8
9#include <linux/kernel.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/cpufreq.h>
13#include <linux/ioport.h>
14#include <linux/slab.h>
15
16#include <asm/msr.h>
17#include <asm/tsc.h>
18#include <asm/timex.h>
19#include <asm/io.h>
20#include <asm/delay.h>
21
22#define EPS_BRAND_C7M 0
23#define EPS_BRAND_C7 1
24#define EPS_BRAND_EDEN 2
25#define EPS_BRAND_C3 3
26
27struct eps_cpu_data {
28 u32 fsb;
29 struct cpufreq_frequency_table freq_table[];
30};
31
32static struct eps_cpu_data *eps_cpu[NR_CPUS];
33
34
35static unsigned int eps_get(unsigned int cpu)
36{
37 struct eps_cpu_data *centaur;
38 u32 lo, hi;
39
40 if (cpu)
41 return 0;
42 centaur = eps_cpu[cpu];
43 if (centaur == NULL)
44 return 0;
45
46 /* Return current frequency */
47 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
48 return centaur->fsb * ((lo >> 8) & 0xff);
49}
50
51static int eps_set_state(struct eps_cpu_data *centaur,
52 unsigned int cpu,
53 u32 dest_state)
54{
55 struct cpufreq_freqs freqs;
56 u32 lo, hi;
57 int err = 0;
58 int i;
59
60 freqs.old = eps_get(cpu);
61 freqs.new = centaur->fsb * ((dest_state >> 8) & 0xff);
62 freqs.cpu = cpu;
63 cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
64
65 /* Wait while CPU is busy */
66 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
67 i = 0;
68 while (lo & ((1 << 16) | (1 << 17))) {
69 udelay(16);
70 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
71 i++;
72 if (unlikely(i > 64)) {
73 err = -ENODEV;
74 goto postchange;
75 }
76 }
77 /* Set new multiplier and voltage */
78 wrmsr(MSR_IA32_PERF_CTL, dest_state & 0xffff, 0);
79 /* Wait until transition end */
80 i = 0;
81 do {
82 udelay(16);
83 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
84 i++;
85 if (unlikely(i > 64)) {
86 err = -ENODEV;
87 goto postchange;
88 }
89 } while (lo & ((1 << 16) | (1 << 17)));
90
91 /* Return current frequency */
92postchange:
93 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
94 freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
95
96 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
97 return err;
98}
99
100static int eps_target(struct cpufreq_policy *policy,
101 unsigned int target_freq,
102 unsigned int relation)
103{
104 struct eps_cpu_data *centaur;
105 unsigned int newstate = 0;
106 unsigned int cpu = policy->cpu;
107 unsigned int dest_state;
108 int ret;
109
110 if (unlikely(eps_cpu[cpu] == NULL))
111 return -ENODEV;
112 centaur = eps_cpu[cpu];
113
114 if (unlikely(cpufreq_frequency_table_target(policy,
115 &eps_cpu[cpu]->freq_table[0],
116 target_freq,
117 relation,
118 &newstate))) {
119 return -EINVAL;
120 }
121
122 /* Make frequency transition */
123 dest_state = centaur->freq_table[newstate].index & 0xffff;
124 ret = eps_set_state(centaur, cpu, dest_state);
125 if (ret)
126 printk(KERN_ERR "eps: Timeout!\n");
127 return ret;
128}
129
130static int eps_verify(struct cpufreq_policy *policy)
131{
132 return cpufreq_frequency_table_verify(policy,
133 &eps_cpu[policy->cpu]->freq_table[0]);
134}
135
136static int eps_cpu_init(struct cpufreq_policy *policy)
137{
138 unsigned int i;
139 u32 lo, hi;
140 u64 val;
141 u8 current_multiplier, current_voltage;
142 u8 max_multiplier, max_voltage;
143 u8 min_multiplier, min_voltage;
144 u8 brand;
145 u32 fsb;
146 struct eps_cpu_data *centaur;
147 struct cpufreq_frequency_table *f_table;
148 int k, step, voltage;
149 int ret;
150 int states;
151
152 if (policy->cpu != 0)
153 return -ENODEV;
154
155 /* Check brand */
156 printk("eps: Detected VIA ");
157 rdmsr(0x1153, lo, hi);
158 brand = (((lo >> 2) ^ lo) >> 18) & 3;
159 switch(brand) {
160 case EPS_BRAND_C7M:
161 printk("C7-M\n");
162 break;
163 case EPS_BRAND_C7:
164 printk("C7\n");
165 break;
166 case EPS_BRAND_EDEN:
167 printk("Eden\n");
168 break;
169 case EPS_BRAND_C3:
170 printk("C3\n");
171 return -ENODEV;
172 break;
173 }
174 /* Enable Enhanced PowerSaver */
175 rdmsrl(MSR_IA32_MISC_ENABLE, val);
176 if (!(val & 1 << 16)) {
177 val |= 1 << 16;
178 wrmsrl(MSR_IA32_MISC_ENABLE, val);
179 /* Can be locked at 0 */
180 rdmsrl(MSR_IA32_MISC_ENABLE, val);
181 if (!(val & 1 << 16)) {
182 printk("eps: Can't enable Enhanced PowerSaver\n");
183 return -ENODEV;
184 }
185 }
186
187 /* Print voltage and multiplier */
188 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
189 current_voltage = lo & 0xff;
190 printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
191 current_multiplier = (lo >> 8) & 0xff;
192 printk("eps: Current multiplier = %d\n", current_multiplier);
193
194 /* Print limits */
195 max_voltage = hi & 0xff;
196 printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
197 max_multiplier = (hi >> 8) & 0xff;
198 printk("eps: Highest multiplier = %d\n", max_multiplier);
199 min_voltage = (hi >> 16) & 0xff;
200 printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
201 min_multiplier = (hi >> 24) & 0xff;
202 printk("eps: Lowest multiplier = %d\n", min_multiplier);
203
204 /* Sanity checks */
205 if (current_multiplier == 0 || max_multiplier == 0
206 || min_multiplier == 0)
207 return -EINVAL;
208 if (current_multiplier > max_multiplier
209 || max_multiplier <= min_multiplier)
210 return -EINVAL;
211 if (current_voltage > 0x1c || max_voltage > 0x1c)
212 return -EINVAL;
213 if (max_voltage < min_voltage)
214 return -EINVAL;
215
216 /* Calc FSB speed */
217 fsb = cpu_khz / current_multiplier;
218 /* Calc number of p-states supported */
219 if (brand == EPS_BRAND_C7M)
220 states = max_multiplier - min_multiplier + 1;
221 else
222 states = 2;
223
224 /* Allocate private data and frequency table for current cpu */
225 centaur = kzalloc(sizeof(struct eps_cpu_data)
226 + (states + 1) * sizeof(struct cpufreq_frequency_table),
227 GFP_KERNEL);
228 if (!centaur)
229 return -ENOMEM;
230 eps_cpu[0] = centaur;
231
232 /* Copy basic values */
233 centaur->fsb = fsb;
234
235 /* Fill frequency and MSR value table */
236 f_table = &centaur->freq_table[0];
237 if (brand != EPS_BRAND_C7M) {
238 f_table[0].frequency = fsb * min_multiplier;
239 f_table[0].index = (min_multiplier << 8) | min_voltage;
240 f_table[1].frequency = fsb * max_multiplier;
241 f_table[1].index = (max_multiplier << 8) | max_voltage;
242 f_table[2].frequency = CPUFREQ_TABLE_END;
243 } else {
244 k = 0;
245 step = ((max_voltage - min_voltage) * 256)
246 / (max_multiplier - min_multiplier);
247 for (i = min_multiplier; i <= max_multiplier; i++) {
248 voltage = (k * step) / 256 + min_voltage;
249 f_table[k].frequency = fsb * i;
250 f_table[k].index = (i << 8) | voltage;
251 k++;
252 }
253 f_table[k].frequency = CPUFREQ_TABLE_END;
254 }
255
256 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
257 policy->cpuinfo.transition_latency = 140000; /* 844mV -> 700mV in ns */
258 policy->cur = fsb * current_multiplier;
259
260 ret = cpufreq_frequency_table_cpuinfo(policy, &centaur->freq_table[0]);
261 if (ret) {
262 kfree(centaur);
263 return ret;
264 }
265
266 cpufreq_frequency_table_get_attr(&centaur->freq_table[0], policy->cpu);
267 return 0;
268}
269
270static int eps_cpu_exit(struct cpufreq_policy *policy)
271{
272 unsigned int cpu = policy->cpu;
273 struct eps_cpu_data *centaur;
274 u32 lo, hi;
275
276 if (eps_cpu[cpu] == NULL)
277 return -ENODEV;
278 centaur = eps_cpu[cpu];
279
280 /* Get max frequency */
281 rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
282 /* Set max frequency */
283 eps_set_state(centaur, cpu, hi & 0xffff);
284 /* Bye */
285 cpufreq_frequency_table_put_attr(policy->cpu);
286 kfree(eps_cpu[cpu]);
287 eps_cpu[cpu] = NULL;
288 return 0;
289}
290
291static struct freq_attr* eps_attr[] = {
292 &cpufreq_freq_attr_scaling_available_freqs,
293 NULL,
294};
295
296static struct cpufreq_driver eps_driver = {
297 .verify = eps_verify,
298 .target = eps_target,
299 .init = eps_cpu_init,
300 .exit = eps_cpu_exit,
301 .get = eps_get,
302 .name = "e_powersaver",
303 .owner = THIS_MODULE,
304 .attr = eps_attr,
305};
306
307static int __init eps_init(void)
308{
309 struct cpuinfo_x86 *c = cpu_data;
310
311 /* This driver will work only on Centaur C7 processors with
312 * Enhanced SpeedStep/PowerSaver registers */
313 if (c->x86_vendor != X86_VENDOR_CENTAUR
314 || c->x86 != 6 || c->x86_model != 10)
315 return -ENODEV;
316 if (!cpu_has(c, X86_FEATURE_EST))
317 return -ENODEV;
318
319 if (cpufreq_register_driver(&eps_driver))
320 return -EINVAL;
321 return 0;
322}
323
324static void __exit eps_exit(void)
325{
326 cpufreq_unregister_driver(&eps_driver);
327}
328
329MODULE_AUTHOR("Rafa³ Bilski <rafalbilski@interia.pl>");
330MODULE_DESCRIPTION("Enhanced PowerSaver driver for VIA C7 CPU's.");
331MODULE_LICENSE("GPL");
332
333module_init(eps_init);
334module_exit(eps_exit);
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c
index a3db9332d652..b59878a0d9b3 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c
@@ -8,12 +8,11 @@
8 * VIA have currently 3 different versions of Longhaul. 8 * VIA have currently 3 different versions of Longhaul.
9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147. 9 * Version 1 (Longhaul) uses the BCR2 MSR at 0x1147.
10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0. 10 * It is present only in Samuel 1 (C5A), Samuel 2 (C5B) stepping 0.
11 * Version 2 of longhaul is the same as v1, but adds voltage scaling. 11 * Version 2 of longhaul is backward compatible with v1, but adds
12 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C) 12 * LONGHAUL MSR for purpose of both frequency and voltage scaling.
13 * voltage scaling support has currently been disabled in this driver 13 * Present in Samuel 2 (steppings 1-7 only) (C5B), and Ezra (C5C).
14 * until we have code that gets it right.
15 * Version 3 of longhaul got renamed to Powersaver and redesigned 14 * Version 3 of longhaul got renamed to Powersaver and redesigned
16 * to use the POWERSAVER MSR at 0x110a. 15 * to use only the POWERSAVER MSR at 0x110a.
17 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above. 16 * It is present in Ezra-T (C5M), Nehemiah (C5X) and above.
18 * It's pretty much the same feature wise to longhaul v2, though 17 * It's pretty much the same feature wise to longhaul v2, though
19 * there is provision for scaling FSB too, but this doesn't work 18 * there is provision for scaling FSB too, but this doesn't work
@@ -51,10 +50,12 @@
51#define CPU_EZRA 3 50#define CPU_EZRA 3
52#define CPU_EZRA_T 4 51#define CPU_EZRA_T 4
53#define CPU_NEHEMIAH 5 52#define CPU_NEHEMIAH 5
53#define CPU_NEHEMIAH_C 6
54 54
55/* Flags */ 55/* Flags */
56#define USE_ACPI_C3 (1 << 1) 56#define USE_ACPI_C3 (1 << 1)
57#define USE_NORTHBRIDGE (1 << 2) 57#define USE_NORTHBRIDGE (1 << 2)
58#define USE_VT8235 (1 << 3)
58 59
59static int cpu_model; 60static int cpu_model;
60static unsigned int numscales=16; 61static unsigned int numscales=16;
@@ -63,7 +64,8 @@ static unsigned int fsb;
63static struct mV_pos *vrm_mV_table; 64static struct mV_pos *vrm_mV_table;
64static unsigned char *mV_vrm_table; 65static unsigned char *mV_vrm_table;
65struct f_msr { 66struct f_msr {
66 unsigned char vrm; 67 u8 vrm;
68 u8 pos;
67}; 69};
68static struct f_msr f_msr_table[32]; 70static struct f_msr f_msr_table[32];
69 71
@@ -73,10 +75,10 @@ static int can_scale_voltage;
73static struct acpi_processor *pr = NULL; 75static struct acpi_processor *pr = NULL;
74static struct acpi_processor_cx *cx = NULL; 76static struct acpi_processor_cx *cx = NULL;
75static u8 longhaul_flags; 77static u8 longhaul_flags;
78static u8 longhaul_pos;
76 79
77/* Module parameters */ 80/* Module parameters */
78static int scale_voltage; 81static int scale_voltage;
79static int ignore_latency;
80 82
81#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg) 83#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
82 84
@@ -164,26 +166,47 @@ static void do_longhaul1(unsigned int clock_ratio_index)
164static void do_powersaver(int cx_address, unsigned int clock_ratio_index) 166static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
165{ 167{
166 union msr_longhaul longhaul; 168 union msr_longhaul longhaul;
169 u8 dest_pos;
167 u32 t; 170 u32 t;
168 171
172 dest_pos = f_msr_table[clock_ratio_index].pos;
173
169 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); 174 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
175 /* Setup new frequency */
170 longhaul.bits.RevisionKey = longhaul.bits.RevisionID; 176 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
171 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; 177 longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf;
172 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; 178 longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
173 longhaul.bits.EnableSoftBusRatio = 1; 179 /* Setup new voltage */
174 180 if (can_scale_voltage)
175 if (can_scale_voltage) {
176 longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm; 181 longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
182 /* Sync to timer tick */
183 safe_halt();
184 /* Raise voltage if necessary */
185 if (can_scale_voltage && longhaul_pos < dest_pos) {
177 longhaul.bits.EnableSoftVID = 1; 186 longhaul.bits.EnableSoftVID = 1;
187 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
188 /* Change voltage */
189 if (!cx_address) {
190 ACPI_FLUSH_CPU_CACHE();
191 halt();
192 } else {
193 ACPI_FLUSH_CPU_CACHE();
194 /* Invoke C3 */
195 inb(cx_address);
196 /* Dummy op - must do something useless after P_LVL3
197 * read */
198 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
199 }
200 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
202 longhaul_pos = dest_pos;
178 } 203 }
179 204
180 /* Sync to timer tick */
181 safe_halt();
182 /* Change frequency on next halt or sleep */ 205 /* Change frequency on next halt or sleep */
206 longhaul.bits.EnableSoftBusRatio = 1;
183 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 207 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
184 if (!cx_address) { 208 if (!cx_address) {
185 ACPI_FLUSH_CPU_CACHE(); 209 ACPI_FLUSH_CPU_CACHE();
186 /* Invoke C1 */
187 halt(); 210 halt();
188 } else { 211 } else {
189 ACPI_FLUSH_CPU_CACHE(); 212 ACPI_FLUSH_CPU_CACHE();
@@ -193,12 +216,29 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
193 t = inl(acpi_gbl_FADT.xpm_timer_block.address); 216 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
194 } 217 }
195 /* Disable bus ratio bit */ 218 /* Disable bus ratio bit */
196 local_irq_disable();
197 longhaul.bits.RevisionKey = longhaul.bits.RevisionID;
198 longhaul.bits.EnableSoftBusRatio = 0; 219 longhaul.bits.EnableSoftBusRatio = 0;
199 longhaul.bits.EnableSoftBSEL = 0;
200 longhaul.bits.EnableSoftVID = 0;
201 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); 220 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
221
222 /* Reduce voltage if necessary */
223 if (can_scale_voltage && longhaul_pos > dest_pos) {
224 longhaul.bits.EnableSoftVID = 1;
225 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
226 /* Change voltage */
227 if (!cx_address) {
228 ACPI_FLUSH_CPU_CACHE();
229 halt();
230 } else {
231 ACPI_FLUSH_CPU_CACHE();
232 /* Invoke C3 */
233 inb(cx_address);
234 /* Dummy op - must do something useless after P_LVL3
235 * read */
236 t = inl(acpi_gbl_FADT.xpm_timer_block.address);
237 }
238 longhaul.bits.EnableSoftVID = 0;
239 wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
240 longhaul_pos = dest_pos;
241 }
202} 242}
203 243
204/** 244/**
@@ -257,26 +297,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
257 /* 297 /*
258 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B]) 298 * Longhaul v1. (Samuel[C5A] and Samuel2 stepping 0[C5B])
259 * Software controlled multipliers only. 299 * Software controlled multipliers only.
260 *
261 * *NB* Until we get voltage scaling working v1 & v2 are the same code.
262 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5b] and Ezra [C5C]
263 */ 300 */
264 case TYPE_LONGHAUL_V1: 301 case TYPE_LONGHAUL_V1:
265 case TYPE_LONGHAUL_V2:
266 do_longhaul1(clock_ratio_index); 302 do_longhaul1(clock_ratio_index);
267 break; 303 break;
268 304
269 /* 305 /*
306 * Longhaul v2 appears in Samuel2 Steppings 1->7 [C5B] and Ezra [C5C]
307 *
270 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N]) 308 * Longhaul v3 (aka Powersaver). (Ezra-T [C5M] & Nehemiah [C5N])
271 * We can scale voltage with this too, but that's currently
272 * disabled until we come up with a decent 'match freq to voltage'
273 * algorithm.
274 * When we add voltage scaling, we will also need to do the
275 * voltage/freq setting in order depending on the direction
276 * of scaling (like we do in powernow-k7.c)
277 * Nehemiah can do FSB scaling too, but this has never been proven 309 * Nehemiah can do FSB scaling too, but this has never been proven
278 * to work in practice. 310 * to work in practice.
279 */ 311 */
312 case TYPE_LONGHAUL_V2:
280 case TYPE_POWERSAVER: 313 case TYPE_POWERSAVER:
281 if (longhaul_flags & USE_ACPI_C3) { 314 if (longhaul_flags & USE_ACPI_C3) {
282 /* Don't allow wakeup */ 315 /* Don't allow wakeup */
@@ -301,6 +334,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
301 local_irq_restore(flags); 334 local_irq_restore(flags);
302 preempt_enable(); 335 preempt_enable();
303 336
337 freqs.new = calc_speed(longhaul_get_cpu_mult());
304 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); 338 cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
305} 339}
306 340
@@ -315,31 +349,19 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
315 349
316#define ROUNDING 0xf 350#define ROUNDING 0xf
317 351
318static int _guess(int guess, int mult)
319{
320 int target;
321
322 target = ((mult/10)*guess);
323 if (mult%10 != 0)
324 target += (guess/2);
325 target += ROUNDING/2;
326 target &= ~ROUNDING;
327 return target;
328}
329
330
331static int guess_fsb(int mult) 352static int guess_fsb(int mult)
332{ 353{
333 int speed = (cpu_khz/1000); 354 int speed = cpu_khz / 1000;
334 int i; 355 int i;
335 int speeds[] = { 66, 100, 133, 200 }; 356 int speeds[] = { 666, 1000, 1333, 2000 };
336 357 int f_max, f_min;
337 speed += ROUNDING/2; 358
338 speed &= ~ROUNDING; 359 for (i = 0; i < 4; i++) {
339 360 f_max = ((speeds[i] * mult) + 50) / 100;
340 for (i=0; i<4; i++) { 361 f_max += (ROUNDING / 2);
341 if (_guess(speeds[i], mult) == speed) 362 f_min = f_max - ROUNDING;
342 return speeds[i]; 363 if ((speed <= f_max) && (speed >= f_min))
364 return speeds[i] / 10;
343 } 365 }
344 return 0; 366 return 0;
345} 367}
@@ -347,67 +369,40 @@ static int guess_fsb(int mult)
347 369
348static int __init longhaul_get_ranges(void) 370static int __init longhaul_get_ranges(void)
349{ 371{
350 unsigned long invalue;
351 unsigned int ezra_t_multipliers[32]= {
352 90, 30, 40, 100, 55, 35, 45, 95,
353 50, 70, 80, 60, 120, 75, 85, 65,
354 -1, 110, 120, -1, 135, 115, 125, 105,
355 130, 150, 160, 140, -1, 155, -1, 145 };
356 unsigned int j, k = 0; 372 unsigned int j, k = 0;
357 union msr_longhaul longhaul; 373 int mult;
358 int mult = 0;
359 374
360 switch (longhaul_version) { 375 /* Get current frequency */
361 case TYPE_LONGHAUL_V1: 376 mult = longhaul_get_cpu_mult();
362 case TYPE_LONGHAUL_V2: 377 if (mult == -1) {
363 /* Ugh, Longhaul v1 didn't have the min/max MSRs. 378 printk(KERN_INFO PFX "Invalid (reserved) multiplier!\n");
364 Assume min=3.0x & max = whatever we booted at. */ 379 return -EINVAL;
380 }
381 fsb = guess_fsb(mult);
382 if (fsb == 0) {
383 printk(KERN_INFO PFX "Invalid (reserved) FSB!\n");
384 return -EINVAL;
385 }
386 /* Get max multiplier - as we always did.
387 * Longhaul MSR is usefull only when voltage scaling is enabled.
388 * C3 is booting at max anyway. */
389 maxmult = mult;
390 /* Get min multiplier */
391 switch (cpu_model) {
392 case CPU_NEHEMIAH:
393 minmult = 50;
394 break;
395 case CPU_NEHEMIAH_C:
396 minmult = 40;
397 break;
398 default:
365 minmult = 30; 399 minmult = 30;
366 maxmult = mult = longhaul_get_cpu_mult();
367 break; 400 break;
368
369 case TYPE_POWERSAVER:
370 /* Ezra-T */
371 if (cpu_model==CPU_EZRA_T) {
372 minmult = 30;
373 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
374 invalue = longhaul.bits.MaxMHzBR;
375 if (longhaul.bits.MaxMHzBR4)
376 invalue += 16;
377 maxmult = mult = ezra_t_multipliers[invalue];
378 break;
379 }
380
381 /* Nehemiah */
382 if (cpu_model==CPU_NEHEMIAH) {
383 rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
384
385 /*
386 * TODO: This code works, but raises a lot of questions.
387 * - Some Nehemiah's seem to have broken Min/MaxMHzBR's.
388 * We get around this by using a hardcoded multiplier of 4.0x
389 * for the minimimum speed, and the speed we booted up at for the max.
390 * This is done in longhaul_get_cpu_mult() by reading the EBLCR register.
391 * - According to some VIA documentation EBLCR is only
392 * in pre-Nehemiah C3s. How this still works is a mystery.
393 * We're possibly using something undocumented and unsupported,
394 * But it works, so we don't grumble.
395 */
396 minmult=40;
397 maxmult = mult = longhaul_get_cpu_mult();
398 break;
399 }
400 } 401 }
401 fsb = guess_fsb(mult);
402 402
403 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n", 403 dprintk ("MinMult:%d.%dx MaxMult:%d.%dx\n",
404 minmult/10, minmult%10, maxmult/10, maxmult%10); 404 minmult/10, minmult%10, maxmult/10, maxmult%10);
405 405
406 if (fsb == 0) {
407 printk (KERN_INFO PFX "Invalid (reserved) FSB!\n");
408 return -EINVAL;
409 }
410
411 highest_speed = calc_speed(maxmult); 406 highest_speed = calc_speed(maxmult);
412 lowest_speed = calc_speed(minmult); 407 lowest_speed = calc_speed(minmult);
413 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb, 408 dprintk ("FSB:%dMHz Lowest speed: %s Highest speed:%s\n", fsb,
@@ -455,6 +450,7 @@ static void __init longhaul_setup_voltagescaling(void)
455 union msr_longhaul longhaul; 450 union msr_longhaul longhaul;
456 struct mV_pos minvid, maxvid; 451 struct mV_pos minvid, maxvid;
457 unsigned int j, speed, pos, kHz_step, numvscales; 452 unsigned int j, speed, pos, kHz_step, numvscales;
453 int min_vid_speed;
458 454
459 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); 455 rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
460 if (!(longhaul.bits.RevisionID & 1)) { 456 if (!(longhaul.bits.RevisionID & 1)) {
@@ -468,14 +464,14 @@ static void __init longhaul_setup_voltagescaling(void)
468 mV_vrm_table = &mV_vrm85[0]; 464 mV_vrm_table = &mV_vrm85[0];
469 } else { 465 } else {
470 printk (KERN_INFO PFX "Mobile VRM\n"); 466 printk (KERN_INFO PFX "Mobile VRM\n");
467 if (cpu_model < CPU_NEHEMIAH)
468 return;
471 vrm_mV_table = &mobilevrm_mV[0]; 469 vrm_mV_table = &mobilevrm_mV[0];
472 mV_vrm_table = &mV_mobilevrm[0]; 470 mV_vrm_table = &mV_mobilevrm[0];
473 } 471 }
474 472
475 minvid = vrm_mV_table[longhaul.bits.MinimumVID]; 473 minvid = vrm_mV_table[longhaul.bits.MinimumVID];
476 maxvid = vrm_mV_table[longhaul.bits.MaximumVID]; 474 maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
477 numvscales = maxvid.pos - minvid.pos + 1;
478 kHz_step = (highest_speed - lowest_speed) / numvscales;
479 475
480 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) { 476 if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
481 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. " 477 printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
@@ -491,20 +487,59 @@ static void __init longhaul_setup_voltagescaling(void)
491 return; 487 return;
492 } 488 }
493 489
494 printk(KERN_INFO PFX "Max VID=%d.%03d Min VID=%d.%03d, %d possible voltage scales\n", 490 /* How many voltage steps */
491 numvscales = maxvid.pos - minvid.pos + 1;
492 printk(KERN_INFO PFX
493 "Max VID=%d.%03d "
494 "Min VID=%d.%03d, "
495 "%d possible voltage scales\n",
495 maxvid.mV/1000, maxvid.mV%1000, 496 maxvid.mV/1000, maxvid.mV%1000,
496 minvid.mV/1000, minvid.mV%1000, 497 minvid.mV/1000, minvid.mV%1000,
497 numvscales); 498 numvscales);
498 499
500 /* Calculate max frequency at min voltage */
501 j = longhaul.bits.MinMHzBR;
502 if (longhaul.bits.MinMHzBR4)
503 j += 16;
504 min_vid_speed = eblcr_table[j];
505 if (min_vid_speed == -1)
506 return;
507 switch (longhaul.bits.MinMHzFSB) {
508 case 0:
509 min_vid_speed *= 13333;
510 break;
511 case 1:
512 min_vid_speed *= 10000;
513 break;
514 case 3:
515 min_vid_speed *= 6666;
516 break;
517 default:
518 return;
519 break;
520 }
521 if (min_vid_speed >= highest_speed)
522 return;
523 /* Calculate kHz for one voltage step */
524 kHz_step = (highest_speed - min_vid_speed) / numvscales;
525
526
499 j = 0; 527 j = 0;
500 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) { 528 while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
501 speed = longhaul_table[j].frequency; 529 speed = longhaul_table[j].frequency;
502 pos = (speed - lowest_speed) / kHz_step + minvid.pos; 530 if (speed > min_vid_speed)
531 pos = (speed - min_vid_speed) / kHz_step + minvid.pos;
532 else
533 pos = minvid.pos;
503 f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos]; 534 f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
535 f_msr_table[longhaul_table[j].index].pos = pos;
504 j++; 536 j++;
505 } 537 }
506 538
539 longhaul_pos = maxvid.pos;
507 can_scale_voltage = 1; 540 can_scale_voltage = 1;
541 printk(KERN_INFO PFX "Voltage scaling enabled. "
542 "Use of \"conservative\" governor is highly recommended.\n");
508} 543}
509 544
510 545
@@ -573,20 +608,51 @@ static int enable_arbiter_disable(void)
573 if (dev != NULL) { 608 if (dev != NULL) {
574 /* Enable access to port 0x22 */ 609 /* Enable access to port 0x22 */
575 pci_read_config_byte(dev, reg, &pci_cmd); 610 pci_read_config_byte(dev, reg, &pci_cmd);
576 if ( !(pci_cmd & 1<<7) ) { 611 if (!(pci_cmd & 1<<7)) {
577 pci_cmd |= 1<<7; 612 pci_cmd |= 1<<7;
578 pci_write_config_byte(dev, reg, pci_cmd); 613 pci_write_config_byte(dev, reg, pci_cmd);
614 pci_read_config_byte(dev, reg, &pci_cmd);
615 if (!(pci_cmd & 1<<7)) {
616 printk(KERN_ERR PFX
617 "Can't enable access to port 0x22.\n");
618 return 0;
619 }
579 } 620 }
580 return 1; 621 return 1;
581 } 622 }
582 return 0; 623 return 0;
583} 624}
584 625
626static int longhaul_setup_vt8235(void)
627{
628 struct pci_dev *dev;
629 u8 pci_cmd;
630
631 /* Find VT8235 southbridge */
632 dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, NULL);
633 if (dev != NULL) {
634 /* Set transition time to max */
635 pci_read_config_byte(dev, 0xec, &pci_cmd);
636 pci_cmd &= ~(1 << 2);
637 pci_write_config_byte(dev, 0xec, pci_cmd);
638 pci_read_config_byte(dev, 0xe4, &pci_cmd);
639 pci_cmd &= ~(1 << 7);
640 pci_write_config_byte(dev, 0xe4, pci_cmd);
641 pci_read_config_byte(dev, 0xe5, &pci_cmd);
642 pci_cmd |= 1 << 7;
643 pci_write_config_byte(dev, 0xe5, pci_cmd);
644 return 1;
645 }
646 return 0;
647}
648
585static int __init longhaul_cpu_init(struct cpufreq_policy *policy) 649static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
586{ 650{
587 struct cpuinfo_x86 *c = cpu_data; 651 struct cpuinfo_x86 *c = cpu_data;
588 char *cpuname=NULL; 652 char *cpuname=NULL;
589 int ret; 653 int ret;
654 u32 lo, hi;
655 int vt8235_present;
590 656
591 /* Check what we have on this motherboard */ 657 /* Check what we have on this motherboard */
592 switch (c->x86_model) { 658 switch (c->x86_model) {
@@ -599,16 +665,20 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
599 break; 665 break;
600 666
601 case 7: 667 case 7:
602 longhaul_version = TYPE_LONGHAUL_V1;
603 switch (c->x86_mask) { 668 switch (c->x86_mask) {
604 case 0: 669 case 0:
670 longhaul_version = TYPE_LONGHAUL_V1;
605 cpu_model = CPU_SAMUEL2; 671 cpu_model = CPU_SAMUEL2;
606 cpuname = "C3 'Samuel 2' [C5B]"; 672 cpuname = "C3 'Samuel 2' [C5B]";
607 /* Note, this is not a typo, early Samuel2's had Samuel1 ratios. */ 673 /* Note, this is not a typo, early Samuel2's had
608 memcpy (clock_ratio, samuel1_clock_ratio, sizeof(samuel1_clock_ratio)); 674 * Samuel1 ratios. */
609 memcpy (eblcr_table, samuel2_eblcr, sizeof(samuel2_eblcr)); 675 memcpy(clock_ratio, samuel1_clock_ratio,
676 sizeof(samuel1_clock_ratio));
677 memcpy(eblcr_table, samuel2_eblcr,
678 sizeof(samuel2_eblcr));
610 break; 679 break;
611 case 1 ... 15: 680 case 1 ... 15:
681 longhaul_version = TYPE_LONGHAUL_V2;
612 if (c->x86_mask < 8) { 682 if (c->x86_mask < 8) {
613 cpu_model = CPU_SAMUEL2; 683 cpu_model = CPU_SAMUEL2;
614 cpuname = "C3 'Samuel 2' [C5B]"; 684 cpuname = "C3 'Samuel 2' [C5B]";
@@ -616,8 +686,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
616 cpu_model = CPU_EZRA; 686 cpu_model = CPU_EZRA;
617 cpuname = "C3 'Ezra' [C5C]"; 687 cpuname = "C3 'Ezra' [C5C]";
618 } 688 }
619 memcpy (clock_ratio, ezra_clock_ratio, sizeof(ezra_clock_ratio)); 689 memcpy(clock_ratio, ezra_clock_ratio,
620 memcpy (eblcr_table, ezra_eblcr, sizeof(ezra_eblcr)); 690 sizeof(ezra_clock_ratio));
691 memcpy(eblcr_table, ezra_eblcr,
692 sizeof(ezra_eblcr));
621 break; 693 break;
622 } 694 }
623 break; 695 break;
@@ -632,24 +704,24 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
632 break; 704 break;
633 705
634 case 9: 706 case 9:
635 cpu_model = CPU_NEHEMIAH;
636 longhaul_version = TYPE_POWERSAVER; 707 longhaul_version = TYPE_POWERSAVER;
637 numscales=32; 708 numscales = 32;
709 memcpy(clock_ratio,
710 nehemiah_clock_ratio,
711 sizeof(nehemiah_clock_ratio));
712 memcpy(eblcr_table, nehemiah_eblcr, sizeof(nehemiah_eblcr));
638 switch (c->x86_mask) { 713 switch (c->x86_mask) {
639 case 0 ... 1: 714 case 0 ... 1:
640 cpuname = "C3 'Nehemiah A' [C5N]"; 715 cpu_model = CPU_NEHEMIAH;
641 memcpy (clock_ratio, nehemiah_a_clock_ratio, sizeof(nehemiah_a_clock_ratio)); 716 cpuname = "C3 'Nehemiah A' [C5XLOE]";
642 memcpy (eblcr_table, nehemiah_a_eblcr, sizeof(nehemiah_a_eblcr));
643 break; 717 break;
644 case 2 ... 4: 718 case 2 ... 4:
645 cpuname = "C3 'Nehemiah B' [C5N]"; 719 cpu_model = CPU_NEHEMIAH;
646 memcpy (clock_ratio, nehemiah_b_clock_ratio, sizeof(nehemiah_b_clock_ratio)); 720 cpuname = "C3 'Nehemiah B' [C5XLOH]";
647 memcpy (eblcr_table, nehemiah_b_eblcr, sizeof(nehemiah_b_eblcr));
648 break; 721 break;
649 case 5 ... 15: 722 case 5 ... 15:
650 cpuname = "C3 'Nehemiah C' [C5N]"; 723 cpu_model = CPU_NEHEMIAH_C;
651 memcpy (clock_ratio, nehemiah_c_clock_ratio, sizeof(nehemiah_c_clock_ratio)); 724 cpuname = "C3 'Nehemiah C' [C5P]";
652 memcpy (eblcr_table, nehemiah_c_eblcr, sizeof(nehemiah_c_eblcr));
653 break; 725 break;
654 } 726 }
655 break; 727 break;
@@ -658,6 +730,13 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
658 cpuname = "Unknown"; 730 cpuname = "Unknown";
659 break; 731 break;
660 } 732 }
733 /* Check Longhaul ver. 2 */
734 if (longhaul_version == TYPE_LONGHAUL_V2) {
735 rdmsr(MSR_VIA_LONGHAUL, lo, hi);
736 if (lo == 0 && hi == 0)
737 /* Looks like MSR isn't present */
738 longhaul_version = TYPE_LONGHAUL_V1;
739 }
661 740
662 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname); 741 printk (KERN_INFO PFX "VIA %s CPU detected. ", cpuname);
663 switch (longhaul_version) { 742 switch (longhaul_version) {
@@ -670,15 +749,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
670 break; 749 break;
671 }; 750 };
672 751
752 /* Doesn't hurt */
753 vt8235_present = longhaul_setup_vt8235();
754
673 /* Find ACPI data for processor */ 755 /* Find ACPI data for processor */
674 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, 756 acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
675 &longhaul_walk_callback, NULL, (void *)&pr); 757 ACPI_UINT32_MAX, &longhaul_walk_callback,
758 NULL, (void *)&pr);
676 759
677 /* Check ACPI support for C3 state */ 760 /* Check ACPI support for C3 state */
678 if ((pr != NULL) && (longhaul_version == TYPE_POWERSAVER)) { 761 if (pr != NULL && longhaul_version != TYPE_LONGHAUL_V1) {
679 cx = &pr->power.states[ACPI_STATE_C3]; 762 cx = &pr->power.states[ACPI_STATE_C3];
680 if (cx->address > 0 && 763 if (cx->address > 0 && cx->latency <= 1000) {
681 (cx->latency <= 1000 || ignore_latency != 0) ) {
682 longhaul_flags |= USE_ACPI_C3; 764 longhaul_flags |= USE_ACPI_C3;
683 goto print_support_type; 765 goto print_support_type;
684 } 766 }
@@ -688,8 +770,11 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
688 longhaul_flags |= USE_NORTHBRIDGE; 770 longhaul_flags |= USE_NORTHBRIDGE;
689 goto print_support_type; 771 goto print_support_type;
690 } 772 }
691 773 /* Use VT8235 southbridge if present */
692 /* No ACPI C3 or we can't use it */ 774 if (longhaul_version == TYPE_POWERSAVER && vt8235_present) {
775 longhaul_flags |= USE_VT8235;
776 goto print_support_type;
777 }
693 /* Check ACPI support for bus master arbiter disable */ 778 /* Check ACPI support for bus master arbiter disable */
694 if ((pr == NULL) || !(pr->flags.bm_control)) { 779 if ((pr == NULL) || !(pr->flags.bm_control)) {
695 printk(KERN_ERR PFX 780 printk(KERN_ERR PFX
@@ -698,18 +783,18 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
698 } 783 }
699 784
700print_support_type: 785print_support_type:
701 if (!(longhaul_flags & USE_NORTHBRIDGE)) { 786 if (longhaul_flags & USE_NORTHBRIDGE)
702 printk (KERN_INFO PFX "Using ACPI support.\n");
703 } else {
704 printk (KERN_INFO PFX "Using northbridge support.\n"); 787 printk (KERN_INFO PFX "Using northbridge support.\n");
705 } 788 else if (longhaul_flags & USE_VT8235)
789 printk (KERN_INFO PFX "Using VT8235 support.\n");
790 else
791 printk (KERN_INFO PFX "Using ACPI support.\n");
706 792
707 ret = longhaul_get_ranges(); 793 ret = longhaul_get_ranges();
708 if (ret != 0) 794 if (ret != 0)
709 return ret; 795 return ret;
710 796
711 if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) && 797 if ((longhaul_version != TYPE_LONGHAUL_V1) && (scale_voltage != 0))
712 (scale_voltage != 0))
713 longhaul_setup_voltagescaling(); 798 longhaul_setup_voltagescaling();
714 799
715 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 800 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -797,8 +882,6 @@ static void __exit longhaul_exit(void)
797 882
798module_param (scale_voltage, int, 0644); 883module_param (scale_voltage, int, 0644);
799MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); 884MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
800module_param(ignore_latency, int, 0644);
801MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
802 885
803MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 886MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
804MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 887MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h
index bc4682aad69b..bb0a04b1d1ab 100644
--- a/arch/i386/kernel/cpu/cpufreq/longhaul.h
+++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h
@@ -235,84 +235,14 @@ static int __initdata ezrat_eblcr[32] = {
235/* 235/*
236 * VIA C3 Nehemiah */ 236 * VIA C3 Nehemiah */
237 237
238static int __initdata nehemiah_a_clock_ratio[32] = { 238static int __initdata nehemiah_clock_ratio[32] = {
239 100, /* 0000 -> 10.0x */ 239 100, /* 0000 -> 10.0x */
240 160, /* 0001 -> 16.0x */ 240 160, /* 0001 -> 16.0x */
241 -1, /* 0010 -> RESERVED */ 241 40, /* 0010 -> 4.0x */
242 90, /* 0011 -> 9.0x */
243 95, /* 0100 -> 9.5x */
244 -1, /* 0101 -> RESERVED */
245 -1, /* 0110 -> RESERVED */
246 55, /* 0111 -> 5.5x */
247 60, /* 1000 -> 6.0x */
248 70, /* 1001 -> 7.0x */
249 80, /* 1010 -> 8.0x */
250 50, /* 1011 -> 5.0x */
251 65, /* 1100 -> 6.5x */
252 75, /* 1101 -> 7.5x */
253 85, /* 1110 -> 8.5x */
254 120, /* 1111 -> 12.0x */
255 100, /* 0000 -> 10.0x */
256 -1, /* 0001 -> RESERVED */
257 120, /* 0010 -> 12.0x */
258 90, /* 0011 -> 9.0x */
259 105, /* 0100 -> 10.5x */
260 115, /* 0101 -> 11.5x */
261 125, /* 0110 -> 12.5x */
262 135, /* 0111 -> 13.5x */
263 140, /* 1000 -> 14.0x */
264 150, /* 1001 -> 15.0x */
265 160, /* 1010 -> 16.0x */
266 130, /* 1011 -> 13.0x */
267 145, /* 1100 -> 14.5x */
268 155, /* 1101 -> 15.5x */
269 -1, /* 1110 -> RESERVED (13.0x) */
270 120, /* 1111 -> 12.0x */
271};
272
273static int __initdata nehemiah_b_clock_ratio[32] = {
274 100, /* 0000 -> 10.0x */
275 160, /* 0001 -> 16.0x */
276 -1, /* 0010 -> RESERVED */
277 90, /* 0011 -> 9.0x */
278 95, /* 0100 -> 9.5x */
279 -1, /* 0101 -> RESERVED */
280 -1, /* 0110 -> RESERVED */
281 55, /* 0111 -> 5.5x */
282 60, /* 1000 -> 6.0x */
283 70, /* 1001 -> 7.0x */
284 80, /* 1010 -> 8.0x */
285 50, /* 1011 -> 5.0x */
286 65, /* 1100 -> 6.5x */
287 75, /* 1101 -> 7.5x */
288 85, /* 1110 -> 8.5x */
289 120, /* 1111 -> 12.0x */
290 100, /* 0000 -> 10.0x */
291 110, /* 0001 -> 11.0x */
292 120, /* 0010 -> 12.0x */
293 90, /* 0011 -> 9.0x */
294 105, /* 0100 -> 10.5x */
295 115, /* 0101 -> 11.5x */
296 125, /* 0110 -> 12.5x */
297 135, /* 0111 -> 13.5x */
298 140, /* 1000 -> 14.0x */
299 150, /* 1001 -> 15.0x */
300 160, /* 1010 -> 16.0x */
301 130, /* 1011 -> 13.0x */
302 145, /* 1100 -> 14.5x */
303 155, /* 1101 -> 15.5x */
304 -1, /* 1110 -> RESERVED (13.0x) */
305 120, /* 1111 -> 12.0x */
306};
307
308static int __initdata nehemiah_c_clock_ratio[32] = {
309 100, /* 0000 -> 10.0x */
310 160, /* 0001 -> 16.0x */
311 40, /* 0010 -> RESERVED */
312 90, /* 0011 -> 9.0x */ 242 90, /* 0011 -> 9.0x */
313 95, /* 0100 -> 9.5x */ 243 95, /* 0100 -> 9.5x */
314 -1, /* 0101 -> RESERVED */ 244 -1, /* 0101 -> RESERVED */
315 45, /* 0110 -> RESERVED */ 245 45, /* 0110 -> 4.5x */
316 55, /* 0111 -> 5.5x */ 246 55, /* 0111 -> 5.5x */
317 60, /* 1000 -> 6.0x */ 247 60, /* 1000 -> 6.0x */
318 70, /* 1001 -> 7.0x */ 248 70, /* 1001 -> 7.0x */
@@ -340,84 +270,14 @@ static int __initdata nehemiah_c_clock_ratio[32] = {
340 120, /* 1111 -> 12.0x */ 270 120, /* 1111 -> 12.0x */
341}; 271};
342 272
343static int __initdata nehemiah_a_eblcr[32] = { 273static int __initdata nehemiah_eblcr[32] = {
344 50, /* 0000 -> 5.0x */
345 160, /* 0001 -> 16.0x */
346 -1, /* 0010 -> RESERVED */
347 100, /* 0011 -> 10.0x */
348 55, /* 0100 -> 5.5x */
349 -1, /* 0101 -> RESERVED */
350 -1, /* 0110 -> RESERVED */
351 95, /* 0111 -> 9.5x */
352 90, /* 1000 -> 9.0x */
353 70, /* 1001 -> 7.0x */
354 80, /* 1010 -> 8.0x */
355 60, /* 1011 -> 6.0x */
356 120, /* 1100 -> 12.0x */
357 75, /* 1101 -> 7.5x */
358 85, /* 1110 -> 8.5x */
359 65, /* 1111 -> 6.5x */
360 90, /* 0000 -> 9.0x */
361 -1, /* 0001 -> RESERVED */
362 120, /* 0010 -> 12.0x */
363 100, /* 0011 -> 10.0x */
364 135, /* 0100 -> 13.5x */
365 115, /* 0101 -> 11.5x */
366 125, /* 0110 -> 12.5x */
367 105, /* 0111 -> 10.5x */
368 130, /* 1000 -> 13.0x */
369 150, /* 1001 -> 15.0x */
370 160, /* 1010 -> 16.0x */
371 140, /* 1011 -> 14.0x */
372 120, /* 1100 -> 12.0x */
373 155, /* 1101 -> 15.5x */
374 -1, /* 1110 -> RESERVED (13.0x) */
375 145 /* 1111 -> 14.5x */
376 /* end of table */
377};
378static int __initdata nehemiah_b_eblcr[32] = {
379 50, /* 0000 -> 5.0x */
380 160, /* 0001 -> 16.0x */
381 -1, /* 0010 -> RESERVED */
382 100, /* 0011 -> 10.0x */
383 55, /* 0100 -> 5.5x */
384 -1, /* 0101 -> RESERVED */
385 -1, /* 0110 -> RESERVED */
386 95, /* 0111 -> 9.5x */
387 90, /* 1000 -> 9.0x */
388 70, /* 1001 -> 7.0x */
389 80, /* 1010 -> 8.0x */
390 60, /* 1011 -> 6.0x */
391 120, /* 1100 -> 12.0x */
392 75, /* 1101 -> 7.5x */
393 85, /* 1110 -> 8.5x */
394 65, /* 1111 -> 6.5x */
395 90, /* 0000 -> 9.0x */
396 110, /* 0001 -> 11.0x */
397 120, /* 0010 -> 12.0x */
398 100, /* 0011 -> 10.0x */
399 135, /* 0100 -> 13.5x */
400 115, /* 0101 -> 11.5x */
401 125, /* 0110 -> 12.5x */
402 105, /* 0111 -> 10.5x */
403 130, /* 1000 -> 13.0x */
404 150, /* 1001 -> 15.0x */
405 160, /* 1010 -> 16.0x */
406 140, /* 1011 -> 14.0x */
407 120, /* 1100 -> 12.0x */
408 155, /* 1101 -> 15.5x */
409 -1, /* 1110 -> RESERVED (13.0x) */
410 145 /* 1111 -> 14.5x */
411 /* end of table */
412};
413static int __initdata nehemiah_c_eblcr[32] = {
414 50, /* 0000 -> 5.0x */ 274 50, /* 0000 -> 5.0x */
415 160, /* 0001 -> 16.0x */ 275 160, /* 0001 -> 16.0x */
416 40, /* 0010 -> RESERVED */ 276 40, /* 0010 -> 4.0x */
417 100, /* 0011 -> 10.0x */ 277 100, /* 0011 -> 10.0x */
418 55, /* 0100 -> 5.5x */ 278 55, /* 0100 -> 5.5x */
419 -1, /* 0101 -> RESERVED */ 279 -1, /* 0101 -> RESERVED */
420 45, /* 0110 -> RESERVED */ 280 45, /* 0110 -> 4.5x */
421 95, /* 0111 -> 9.5x */ 281 95, /* 0111 -> 9.5x */
422 90, /* 1000 -> 9.0x */ 282 90, /* 1000 -> 9.0x */
423 70, /* 1001 -> 7.0x */ 283 70, /* 1001 -> 7.0x */
@@ -443,7 +303,6 @@ static int __initdata nehemiah_c_eblcr[32] = {
443 155, /* 1101 -> 15.5x */ 303 155, /* 1101 -> 15.5x */
444 -1, /* 1110 -> RESERVED (13.0x) */ 304 -1, /* 1110 -> RESERVED (13.0x) */
445 145 /* 1111 -> 14.5x */ 305 145 /* 1111 -> 14.5x */
446 /* end of table */
447}; 306};
448 307
449/* 308/*
diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
index 2d6491672559..fe3b67005ebb 100644
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
@@ -1289,7 +1289,11 @@ static unsigned int powernowk8_get (unsigned int cpu)
1289 if (query_current_values_with_pending_wait(data)) 1289 if (query_current_values_with_pending_wait(data))
1290 goto out; 1290 goto out;
1291 1291
1292 khz = find_khz_freq_from_fid(data->currfid); 1292 if (cpu_family == CPU_HW_PSTATE)
1293 khz = find_khz_freq_from_fiddid(data->currfid, data->currdid);
1294 else
1295 khz = find_khz_freq_from_fid(data->currfid);
1296
1293 1297
1294out: 1298out:
1295 set_cpus_allowed(current, oldmask); 1299 set_cpus_allowed(current, oldmask);
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 0b29d41322a2..e1006b7acc9e 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -1,4 +1,5 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h>
2#include <linux/errno.h> 3#include <linux/errno.h>
3#include <linux/hpet.h> 4#include <linux/hpet.h>
4#include <linux/init.h> 5#include <linux/init.h>
@@ -6,17 +7,278 @@
6#include <asm/hpet.h> 7#include <asm/hpet.h>
7#include <asm/io.h> 8#include <asm/io.h>
8 9
10extern struct clock_event_device *global_clock_event;
11
9#define HPET_MASK CLOCKSOURCE_MASK(32) 12#define HPET_MASK CLOCKSOURCE_MASK(32)
10#define HPET_SHIFT 22 13#define HPET_SHIFT 22
11 14
12/* FSEC = 10^-15 NSEC = 10^-9 */ 15/* FSEC = 10^-15 NSEC = 10^-9 */
13#define FSEC_PER_NSEC 1000000 16#define FSEC_PER_NSEC 1000000
14 17
15static void __iomem *hpet_ptr; 18/*
19 * HPET address is set in acpi/boot.c, when an ACPI entry exists
20 */
21unsigned long hpet_address;
22static void __iomem * hpet_virt_address;
23
24static inline unsigned long hpet_readl(unsigned long a)
25{
26 return readl(hpet_virt_address + a);
27}
28
29static inline void hpet_writel(unsigned long d, unsigned long a)
30{
31 writel(d, hpet_virt_address + a);
32}
33
34/*
35 * HPET command line enable / disable
36 */
37static int boot_hpet_disable;
38
39static int __init hpet_setup(char* str)
40{
41 if (str) {
42 if (!strncmp("disable", str, 7))
43 boot_hpet_disable = 1;
44 }
45 return 1;
46}
47__setup("hpet=", hpet_setup);
48
49static inline int is_hpet_capable(void)
50{
51 return (!boot_hpet_disable && hpet_address);
52}
53
54/*
55 * HPET timer interrupt enable / disable
56 */
57static int hpet_legacy_int_enabled;
58
59/**
60 * is_hpet_enabled - check whether the hpet timer interrupt is enabled
61 */
62int is_hpet_enabled(void)
63{
64 return is_hpet_capable() && hpet_legacy_int_enabled;
65}
66
67/*
68 * When the hpet driver (/dev/hpet) is enabled, we need to reserve
69 * timer 0 and timer 1 in case of RTC emulation.
70 */
71#ifdef CONFIG_HPET
72static void hpet_reserve_platform_timers(unsigned long id)
73{
74 struct hpet __iomem *hpet = hpet_virt_address;
75 struct hpet_timer __iomem *timer = &hpet->hpet_timers[2];
76 unsigned int nrtimers, i;
77 struct hpet_data hd;
78
79 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
80
81 memset(&hd, 0, sizeof (hd));
82 hd.hd_phys_address = hpet_address;
83 hd.hd_address = hpet_virt_address;
84 hd.hd_nirqs = nrtimers;
85 hd.hd_flags = HPET_DATA_PLATFORM;
86 hpet_reserve_timer(&hd, 0);
87
88#ifdef CONFIG_HPET_EMULATE_RTC
89 hpet_reserve_timer(&hd, 1);
90#endif
91
92 hd.hd_irq[0] = HPET_LEGACY_8254;
93 hd.hd_irq[1] = HPET_LEGACY_RTC;
94
95 for (i = 2; i < nrtimers; timer++, i++)
96 hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >>
97 Tn_INT_ROUTE_CNF_SHIFT;
98
99 hpet_alloc(&hd);
100
101}
102#else
103static void hpet_reserve_platform_timers(unsigned long id) { }
104#endif
105
106/*
107 * Common hpet info
108 */
109static unsigned long hpet_period;
110
111static void hpet_set_mode(enum clock_event_mode mode,
112 struct clock_event_device *evt);
113static int hpet_next_event(unsigned long delta,
114 struct clock_event_device *evt);
115
116/*
117 * The hpet clock event device
118 */
119static struct clock_event_device hpet_clockevent = {
120 .name = "hpet",
121 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
122 .set_mode = hpet_set_mode,
123 .set_next_event = hpet_next_event,
124 .shift = 32,
125 .irq = 0,
126};
127
128static void hpet_start_counter(void)
129{
130 unsigned long cfg = hpet_readl(HPET_CFG);
131
132 cfg &= ~HPET_CFG_ENABLE;
133 hpet_writel(cfg, HPET_CFG);
134 hpet_writel(0, HPET_COUNTER);
135 hpet_writel(0, HPET_COUNTER + 4);
136 cfg |= HPET_CFG_ENABLE;
137 hpet_writel(cfg, HPET_CFG);
138}
139
140static void hpet_enable_int(void)
141{
142 unsigned long cfg = hpet_readl(HPET_CFG);
143
144 cfg |= HPET_CFG_LEGACY;
145 hpet_writel(cfg, HPET_CFG);
146 hpet_legacy_int_enabled = 1;
147}
148
149static void hpet_set_mode(enum clock_event_mode mode,
150 struct clock_event_device *evt)
151{
152 unsigned long cfg, cmp, now;
153 uint64_t delta;
154
155 switch(mode) {
156 case CLOCK_EVT_MODE_PERIODIC:
157 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
158 delta >>= hpet_clockevent.shift;
159 now = hpet_readl(HPET_COUNTER);
160 cmp = now + (unsigned long) delta;
161 cfg = hpet_readl(HPET_T0_CFG);
162 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
163 HPET_TN_SETVAL | HPET_TN_32BIT;
164 hpet_writel(cfg, HPET_T0_CFG);
165 /*
166 * The first write after writing TN_SETVAL to the
167 * config register sets the counter value, the second
168 * write sets the period.
169 */
170 hpet_writel(cmp, HPET_T0_CMP);
171 udelay(1);
172 hpet_writel((unsigned long) delta, HPET_T0_CMP);
173 break;
174
175 case CLOCK_EVT_MODE_ONESHOT:
176 cfg = hpet_readl(HPET_T0_CFG);
177 cfg &= ~HPET_TN_PERIODIC;
178 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
179 hpet_writel(cfg, HPET_T0_CFG);
180 break;
181
182 case CLOCK_EVT_MODE_UNUSED:
183 case CLOCK_EVT_MODE_SHUTDOWN:
184 cfg = hpet_readl(HPET_T0_CFG);
185 cfg &= ~HPET_TN_ENABLE;
186 hpet_writel(cfg, HPET_T0_CFG);
187 break;
188 }
189}
190
191static int hpet_next_event(unsigned long delta,
192 struct clock_event_device *evt)
193{
194 unsigned long cnt;
195
196 cnt = hpet_readl(HPET_COUNTER);
197 cnt += delta;
198 hpet_writel(cnt, HPET_T0_CMP);
199
200 return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0);
201}
202
203/*
204 * Try to setup the HPET timer
205 */
206int __init hpet_enable(void)
207{
208 unsigned long id;
209 uint64_t hpet_freq;
210
211 if (!is_hpet_capable())
212 return 0;
213
214 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
215
216 /*
217 * Read the period and check for a sane value:
218 */
219 hpet_period = hpet_readl(HPET_PERIOD);
220 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD)
221 goto out_nohpet;
222
223 /*
224 * The period is a femto seconds value. We need to calculate the
225 * scaled math multiplication factor for nanosecond to hpet tick
226 * conversion.
227 */
228 hpet_freq = 1000000000000000ULL;
229 do_div(hpet_freq, hpet_period);
230 hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
231 NSEC_PER_SEC, 32);
232 /* Calculate the min / max delta */
233 hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
234 &hpet_clockevent);
235 hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30,
236 &hpet_clockevent);
237
238 /*
239 * Read the HPET ID register to retrieve the IRQ routing
240 * information and the number of channels
241 */
242 id = hpet_readl(HPET_ID);
243
244#ifdef CONFIG_HPET_EMULATE_RTC
245 /*
246 * The legacy routing mode needs at least two channels, tick timer
247 * and the rtc emulation channel.
248 */
249 if (!(id & HPET_ID_NUMBER))
250 goto out_nohpet;
251#endif
252
253 /* Start the counter */
254 hpet_start_counter();
255
256 if (id & HPET_ID_LEGSUP) {
257 hpet_enable_int();
258 hpet_reserve_platform_timers(id);
259 /*
260 * Start hpet with the boot cpu mask and make it
261 * global after the IO_APIC has been initialized.
262 */
263 hpet_clockevent.cpumask =cpumask_of_cpu(0);
264 clockevents_register_device(&hpet_clockevent);
265 global_clock_event = &hpet_clockevent;
266 return 1;
267 }
268 return 0;
16 269
270out_nohpet:
271 iounmap(hpet_virt_address);
272 hpet_virt_address = NULL;
273 return 0;
274}
275
276/*
277 * Clock source related code
278 */
17static cycle_t read_hpet(void) 279static cycle_t read_hpet(void)
18{ 280{
19 return (cycle_t)readl(hpet_ptr); 281 return (cycle_t)hpet_readl(HPET_COUNTER);
20} 282}
21 283
22static struct clocksource clocksource_hpet = { 284static struct clocksource clocksource_hpet = {
@@ -24,28 +286,17 @@ static struct clocksource clocksource_hpet = {
24 .rating = 250, 286 .rating = 250,
25 .read = read_hpet, 287 .read = read_hpet,
26 .mask = HPET_MASK, 288 .mask = HPET_MASK,
27 .mult = 0, /* set below */
28 .shift = HPET_SHIFT, 289 .shift = HPET_SHIFT,
29 .is_continuous = 1, 290 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
30}; 291};
31 292
32static int __init init_hpet_clocksource(void) 293static int __init init_hpet_clocksource(void)
33{ 294{
34 unsigned long hpet_period;
35 void __iomem* hpet_base;
36 u64 tmp; 295 u64 tmp;
37 int err;
38 296
39 if (!is_hpet_enabled()) 297 if (!hpet_virt_address)
40 return -ENODEV; 298 return -ENODEV;
41 299
42 /* calculate the hpet address: */
43 hpet_base = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
44 hpet_ptr = hpet_base + HPET_COUNTER;
45
46 /* calculate the frequency: */
47 hpet_period = readl(hpet_base + HPET_PERIOD);
48
49 /* 300 /*
50 * hpet period is in femto seconds per cycle 301 * hpet period is in femto seconds per cycle
51 * so we need to convert this to ns/cyc units 302 * so we need to convert this to ns/cyc units
@@ -61,11 +312,218 @@ static int __init init_hpet_clocksource(void)
61 do_div(tmp, FSEC_PER_NSEC); 312 do_div(tmp, FSEC_PER_NSEC);
62 clocksource_hpet.mult = (u32)tmp; 313 clocksource_hpet.mult = (u32)tmp;
63 314
64 err = clocksource_register(&clocksource_hpet); 315 return clocksource_register(&clocksource_hpet);
65 if (err)
66 iounmap(hpet_base);
67
68 return err;
69} 316}
70 317
71module_init(init_hpet_clocksource); 318module_init(init_hpet_clocksource);
319
320#ifdef CONFIG_HPET_EMULATE_RTC
321
322/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
323 * is enabled, we support RTC interrupt functionality in software.
324 * RTC has 3 kinds of interrupts:
325 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
326 * is updated
327 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
328 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
329 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
330 * (1) and (2) above are implemented using polling at a frequency of
331 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
332 * overhead. (DEFAULT_RTC_INT_FREQ)
333 * For (3), we use interrupts at 64Hz or user specified periodic
334 * frequency, whichever is higher.
335 */
336#include <linux/mc146818rtc.h>
337#include <linux/rtc.h>
338
339#define DEFAULT_RTC_INT_FREQ 64
340#define DEFAULT_RTC_SHIFT 6
341#define RTC_NUM_INTS 1
342
343static unsigned long hpet_rtc_flags;
344static unsigned long hpet_prev_update_sec;
345static struct rtc_time hpet_alarm_time;
346static unsigned long hpet_pie_count;
347static unsigned long hpet_t1_cmp;
348static unsigned long hpet_default_delta;
349static unsigned long hpet_pie_delta;
350static unsigned long hpet_pie_limit;
351
352/*
353 * Timer 1 for RTC emulation. We use one shot mode, as periodic mode
354 * is not supported by all HPET implementations for timer 1.
355 *
356 * hpet_rtc_timer_init() is called when the rtc is initialized.
357 */
358int hpet_rtc_timer_init(void)
359{
360 unsigned long cfg, cnt, delta, flags;
361
362 if (!is_hpet_enabled())
363 return 0;
364
365 if (!hpet_default_delta) {
366 uint64_t clc;
367
368 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
369 clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT;
370 hpet_default_delta = (unsigned long) clc;
371 }
372
373 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
374 delta = hpet_default_delta;
375 else
376 delta = hpet_pie_delta;
377
378 local_irq_save(flags);
379
380 cnt = delta + hpet_readl(HPET_COUNTER);
381 hpet_writel(cnt, HPET_T1_CMP);
382 hpet_t1_cmp = cnt;
383
384 cfg = hpet_readl(HPET_T1_CFG);
385 cfg &= ~HPET_TN_PERIODIC;
386 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
387 hpet_writel(cfg, HPET_T1_CFG);
388
389 local_irq_restore(flags);
390
391 return 1;
392}
393
394/*
395 * The functions below are called from rtc driver.
396 * Return 0 if HPET is not being used.
397 * Otherwise do the necessary changes and return 1.
398 */
399int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
400{
401 if (!is_hpet_enabled())
402 return 0;
403
404 hpet_rtc_flags &= ~bit_mask;
405 return 1;
406}
407
408int hpet_set_rtc_irq_bit(unsigned long bit_mask)
409{
410 unsigned long oldbits = hpet_rtc_flags;
411
412 if (!is_hpet_enabled())
413 return 0;
414
415 hpet_rtc_flags |= bit_mask;
416
417 if (!oldbits)
418 hpet_rtc_timer_init();
419
420 return 1;
421}
422
423int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
424 unsigned char sec)
425{
426 if (!is_hpet_enabled())
427 return 0;
428
429 hpet_alarm_time.tm_hour = hrs;
430 hpet_alarm_time.tm_min = min;
431 hpet_alarm_time.tm_sec = sec;
432
433 return 1;
434}
435
436int hpet_set_periodic_freq(unsigned long freq)
437{
438 uint64_t clc;
439
440 if (!is_hpet_enabled())
441 return 0;
442
443 if (freq <= DEFAULT_RTC_INT_FREQ)
444 hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq;
445 else {
446 clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC;
447 do_div(clc, freq);
448 clc >>= hpet_clockevent.shift;
449 hpet_pie_delta = (unsigned long) clc;
450 }
451 return 1;
452}
453
454int hpet_rtc_dropped_irq(void)
455{
456 return is_hpet_enabled();
457}
458
459static void hpet_rtc_timer_reinit(void)
460{
461 unsigned long cfg, delta;
462 int lost_ints = -1;
463
464 if (unlikely(!hpet_rtc_flags)) {
465 cfg = hpet_readl(HPET_T1_CFG);
466 cfg &= ~HPET_TN_ENABLE;
467 hpet_writel(cfg, HPET_T1_CFG);
468 return;
469 }
470
471 if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit)
472 delta = hpet_default_delta;
473 else
474 delta = hpet_pie_delta;
475
476 /*
477 * Increment the comparator value until we are ahead of the
478 * current count.
479 */
480 do {
481 hpet_t1_cmp += delta;
482 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
483 lost_ints++;
484 } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0);
485
486 if (lost_ints) {
487 if (hpet_rtc_flags & RTC_PIE)
488 hpet_pie_count += lost_ints;
489 if (printk_ratelimit())
490 printk(KERN_WARNING "rtc: lost %d interrupts\n",
491 lost_ints);
492 }
493}
494
495irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
496{
497 struct rtc_time curr_time;
498 unsigned long rtc_int_flag = 0;
499
500 hpet_rtc_timer_reinit();
501
502 if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
503 rtc_get_rtc_time(&curr_time);
504
505 if (hpet_rtc_flags & RTC_UIE &&
506 curr_time.tm_sec != hpet_prev_update_sec) {
507 rtc_int_flag = RTC_UF;
508 hpet_prev_update_sec = curr_time.tm_sec;
509 }
510
511 if (hpet_rtc_flags & RTC_PIE &&
512 ++hpet_pie_count >= hpet_pie_limit) {
513 rtc_int_flag |= RTC_PF;
514 hpet_pie_count = 0;
515 }
516
517 if (hpet_rtc_flags & RTC_PIE &&
518 (curr_time.tm_sec == hpet_alarm_time.tm_sec) &&
519 (curr_time.tm_min == hpet_alarm_time.tm_min) &&
520 (curr_time.tm_hour == hpet_alarm_time.tm_hour))
521 rtc_int_flag |= RTC_AF;
522
523 if (rtc_int_flag) {
524 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
525 rtc_interrupt(rtc_int_flag, dev_id);
526 }
527 return IRQ_HANDLED;
528}
529#endif
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index 9a0060b92e32..a6bc7bb38834 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -2,7 +2,7 @@
2 * i8253.c 8253/PIT functions 2 * i8253.c 8253/PIT functions
3 * 3 *
4 */ 4 */
5#include <linux/clocksource.h> 5#include <linux/clockchips.h>
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/jiffies.h> 7#include <linux/jiffies.h>
8#include <linux/sysdev.h> 8#include <linux/sysdev.h>
@@ -19,17 +19,97 @@
19DEFINE_SPINLOCK(i8253_lock); 19DEFINE_SPINLOCK(i8253_lock);
20EXPORT_SYMBOL(i8253_lock); 20EXPORT_SYMBOL(i8253_lock);
21 21
22void setup_pit_timer(void) 22/*
23 * HPET replaces the PIT, when enabled. So we need to know, which of
24 * the two timers is used
25 */
26struct clock_event_device *global_clock_event;
27
28/*
29 * Initialize the PIT timer.
30 *
31 * This is also called after resume to bring the PIT into operation again.
32 */
33static void init_pit_timer(enum clock_event_mode mode,
34 struct clock_event_device *evt)
35{
36 unsigned long flags;
37
38 spin_lock_irqsave(&i8253_lock, flags);
39
40 switch(mode) {
41 case CLOCK_EVT_MODE_PERIODIC:
42 /* binary, mode 2, LSB/MSB, ch 0 */
43 outb_p(0x34, PIT_MODE);
44 udelay(10);
45 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
46 udelay(10);
47 outb(LATCH >> 8 , PIT_CH0); /* MSB */
48 break;
49
50 case CLOCK_EVT_MODE_ONESHOT:
51 case CLOCK_EVT_MODE_SHUTDOWN:
52 case CLOCK_EVT_MODE_UNUSED:
53 /* One shot setup */
54 outb_p(0x38, PIT_MODE);
55 udelay(10);
56 break;
57 }
58 spin_unlock_irqrestore(&i8253_lock, flags);
59}
60
61/*
62 * Program the next event in oneshot mode
63 *
64 * Delta is given in PIT ticks
65 */
66static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
23{ 67{
24 unsigned long flags; 68 unsigned long flags;
25 69
26 spin_lock_irqsave(&i8253_lock, flags); 70 spin_lock_irqsave(&i8253_lock, flags);
27 outb_p(0x34,PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ 71 outb_p(delta & 0xff , PIT_CH0); /* LSB */
28 udelay(10); 72 outb(delta >> 8 , PIT_CH0); /* MSB */
29 outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
30 udelay(10);
31 outb(LATCH >> 8 , PIT_CH0); /* MSB */
32 spin_unlock_irqrestore(&i8253_lock, flags); 73 spin_unlock_irqrestore(&i8253_lock, flags);
74
75 return 0;
76}
77
78/*
79 * On UP the PIT can serve all of the possible timer functions. On SMP systems
80 * it can be solely used for the global tick.
81 *
82 * The profiling and update capabilites are switched off once the local apic is
83 * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
84 * !using_apic_timer decisions in do_timer_interrupt_hook()
85 */
86struct clock_event_device pit_clockevent = {
87 .name = "pit",
88 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
89 .set_mode = init_pit_timer,
90 .set_next_event = pit_next_event,
91 .shift = 32,
92 .irq = 0,
93};
94
95/*
96 * Initialize the conversion factor and the min/max deltas of the clock event
97 * structure and register the clock event source with the framework.
98 */
99void __init setup_pit_timer(void)
100{
101 /*
102 * Start pit with the boot cpu mask and make it global after the
103 * IO_APIC has been initialized.
104 */
105 pit_clockevent.cpumask = cpumask_of_cpu(0);
106 pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
107 pit_clockevent.max_delta_ns =
108 clockevent_delta2ns(0x7FFF, &pit_clockevent);
109 pit_clockevent.min_delta_ns =
110 clockevent_delta2ns(0xF, &pit_clockevent);
111 clockevents_register_device(&pit_clockevent);
112 global_clock_event = &pit_clockevent;
33} 113}
34 114
35/* 115/*
@@ -46,7 +126,7 @@ static cycle_t pit_read(void)
46 static u32 old_jifs; 126 static u32 old_jifs;
47 127
48 spin_lock_irqsave(&i8253_lock, flags); 128 spin_lock_irqsave(&i8253_lock, flags);
49 /* 129 /*
50 * Although our caller may have the read side of xtime_lock, 130 * Although our caller may have the read side of xtime_lock,
51 * this is now a seqlock, and we are cheating in this routine 131 * this is now a seqlock, and we are cheating in this routine
52 * by having side effects on state that we cannot undo if 132 * by having side effects on state that we cannot undo if
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index c8d45821c788..03abfdb1a6e4 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -41,6 +41,7 @@ static void mask_and_ack_8259A(unsigned int);
41static struct irq_chip i8259A_chip = { 41static struct irq_chip i8259A_chip = {
42 .name = "XT-PIC", 42 .name = "XT-PIC",
43 .mask = disable_8259A_irq, 43 .mask = disable_8259A_irq,
44 .disable = disable_8259A_irq,
44 .unmask = enable_8259A_irq, 45 .unmask = enable_8259A_irq,
45 .mask_ack = mask_and_ack_8259A, 46 .mask_ack = mask_and_ack_8259A,
46}; 47};
@@ -410,12 +411,6 @@ void __init native_init_IRQ(void)
410 intr_init_hook(); 411 intr_init_hook();
411 412
412 /* 413 /*
413 * Set the clock to HZ Hz, we already have a valid
414 * vector now:
415 */
416 setup_pit_timer();
417
418 /*
419 * External FPU? Set up irq13 if so, for 414 * External FPU? Set up irq13 if so, for
420 * original braindamaged IBM FERR coupling. 415 * original braindamaged IBM FERR coupling.
421 */ 416 */
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index e30ccedad0b9..4ccebd454e25 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -482,8 +482,8 @@ static void do_irq_balance(void)
482 package_index = CPU_TO_PACKAGEINDEX(i); 482 package_index = CPU_TO_PACKAGEINDEX(i);
483 for (j = 0; j < NR_IRQS; j++) { 483 for (j = 0; j < NR_IRQS; j++) {
484 unsigned long value_now, delta; 484 unsigned long value_now, delta;
485 /* Is this an active IRQ? */ 485 /* Is this an active IRQ or balancing disabled ? */
486 if (!irq_desc[j].action) 486 if (!irq_desc[j].action || irq_balancing_disabled(j))
487 continue; 487 continue;
488 if ( package_index == i ) 488 if ( package_index == i )
489 IRQ_DELTA(package_index,j) = 0; 489 IRQ_DELTA(package_index,j) = 0;
@@ -1281,11 +1281,9 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1281 trigger == IOAPIC_LEVEL) 1281 trigger == IOAPIC_LEVEL)
1282 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1282 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1283 handle_fasteoi_irq, "fasteoi"); 1283 handle_fasteoi_irq, "fasteoi");
1284 else { 1284 else
1285 irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
1286 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1285 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1287 handle_edge_irq, "edge"); 1286 handle_edge_irq, "edge");
1288 }
1289 set_intr_gate(vector, interrupt[irq]); 1287 set_intr_gate(vector, interrupt[irq]);
1290} 1288}
1291 1289
@@ -1588,7 +1586,7 @@ void /*__init*/ print_local_APIC(void * dummy)
1588 v = apic_read(APIC_LVR); 1586 v = apic_read(APIC_LVR);
1589 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1587 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1590 ver = GET_APIC_VERSION(v); 1588 ver = GET_APIC_VERSION(v);
1591 maxlvt = get_maxlvt(); 1589 maxlvt = lapic_get_maxlvt();
1592 1590
1593 v = apic_read(APIC_TASKPRI); 1591 v = apic_read(APIC_TASKPRI);
1594 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1592 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 5785d84103a6..0f2ca590bf23 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -10,7 +10,6 @@
10 * io_apic.c.) 10 * io_apic.c.)
11 */ 11 */
12 12
13#include <asm/uaccess.h>
14#include <linux/module.h> 13#include <linux/module.h>
15#include <linux/seq_file.h> 14#include <linux/seq_file.h>
16#include <linux/interrupt.h> 15#include <linux/interrupt.h>
@@ -21,19 +20,34 @@
21 20
22#include <asm/idle.h> 21#include <asm/idle.h>
23 22
23#include <asm/apic.h>
24#include <asm/uaccess.h>
25
24DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; 26DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
25EXPORT_PER_CPU_SYMBOL(irq_stat); 27EXPORT_PER_CPU_SYMBOL(irq_stat);
26 28
27#ifndef CONFIG_X86_LOCAL_APIC
28/* 29/*
29 * 'what should we do if we get a hw irq event on an illegal vector'. 30 * 'what should we do if we get a hw irq event on an illegal vector'.
30 * each architecture has to answer this themselves. 31 * each architecture has to answer this themselves.
31 */ 32 */
32void ack_bad_irq(unsigned int irq) 33void ack_bad_irq(unsigned int irq)
33{ 34{
34 printk("unexpected IRQ trap at vector %02x\n", irq); 35 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
35} 36
37#ifdef CONFIG_X86_LOCAL_APIC
38 /*
39 * Currently unexpected vectors happen only on SMP and APIC.
40 * We _must_ ack these because every local APIC has only N
41 * irq slots per priority level, and a 'hanging, unacked' IRQ
42 * holds up an irq slot - in excessive cases (when multiple
43 * unexpected vectors occur) that might lock up the APIC
44 * completely.
45 * But only ack when the APIC is enabled -AK
46 */
47 if (cpu_has_apic)
48 ack_APIC_irq();
36#endif 49#endif
50}
37 51
38#ifdef CONFIG_4KSTACKS 52#ifdef CONFIG_4KSTACKS
39/* 53/*
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 5d8a07c20281..821df34d2b3a 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -23,6 +23,7 @@
23#include <linux/dmi.h> 23#include <linux/dmi.h>
24#include <linux/kprobes.h> 24#include <linux/kprobes.h>
25#include <linux/cpumask.h> 25#include <linux/cpumask.h>
26#include <linux/kernel_stat.h>
26 27
27#include <asm/smp.h> 28#include <asm/smp.h>
28#include <asm/nmi.h> 29#include <asm/nmi.h>
@@ -973,9 +974,13 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
973 cpu_clear(cpu, backtrace_mask); 974 cpu_clear(cpu, backtrace_mask);
974 } 975 }
975 976
976 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 977 /*
978 * Take the local apic timer and PIT/HPET into account. We don't
979 * know which one is active, when we have highres/dyntick on
980 */
981 sum = per_cpu(irq_stat, cpu).apic_timer_irqs + kstat_irqs(0);
977 982
978 /* if the apic timer isn't firing, this cpu isn't doing much */ 983 /* if the none of the timers isn't firing, this cpu isn't doing much */
979 if (!touched && last_irq_sums[cpu] == sum) { 984 if (!touched && last_irq_sums[cpu] == sum) {
980 /* 985 /*
981 * Ayiee, looks like this CPU is stuck ... 986 * Ayiee, looks like this CPU is stuck ...
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 7845d480c293..bea304d48cdb 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/ptrace.h> 38#include <linux/ptrace.h>
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/personality.h> 40#include <linux/personality.h>
41#include <linux/tick.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/pgtable.h> 44#include <asm/pgtable.h>
@@ -211,6 +212,7 @@ void cpu_idle(void)
211 212
212 /* endless idle loop with no priority at all */ 213 /* endless idle loop with no priority at all */
213 while (1) { 214 while (1) {
215 tick_nohz_stop_sched_tick();
214 while (!need_resched()) { 216 while (!need_resched()) {
215 void (*idle)(void); 217 void (*idle)(void);
216 218
@@ -238,6 +240,7 @@ void cpu_idle(void)
238 idle(); 240 idle();
239 __exit_idle(); 241 __exit_idle();
240 } 242 }
243 tick_nohz_restart_sched_tick();
241 preempt_enable_no_resched(); 244 preempt_enable_no_resched();
242 schedule(); 245 schedule();
243 preempt_disable(); 246 preempt_disable();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f46a4d095e6c..48bfcaa13ecc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -94,12 +94,6 @@ cpumask_t cpu_possible_map;
94EXPORT_SYMBOL(cpu_possible_map); 94EXPORT_SYMBOL(cpu_possible_map);
95static cpumask_t smp_commenced_mask; 95static cpumask_t smp_commenced_mask;
96 96
97/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
98 * is no way to resync one AP against BP. TBD: for prescott and above, we
99 * should use IA64's algorithm
100 */
101static int __devinitdata tsc_sync_disabled;
102
103/* Per CPU bogomips and other parameters */ 97/* Per CPU bogomips and other parameters */
104struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 98struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
105EXPORT_SYMBOL(cpu_data); 99EXPORT_SYMBOL(cpu_data);
@@ -216,151 +210,6 @@ valid_k7:
216 ; 210 ;
217} 211}
218 212
219/*
220 * TSC synchronization.
221 *
222 * We first check whether all CPUs have their TSC's synchronized,
223 * then we print a warning if not, and always resync.
224 */
225
226static struct {
227 atomic_t start_flag;
228 atomic_t count_start;
229 atomic_t count_stop;
230 unsigned long long values[NR_CPUS];
231} tsc __cpuinitdata = {
232 .start_flag = ATOMIC_INIT(0),
233 .count_start = ATOMIC_INIT(0),
234 .count_stop = ATOMIC_INIT(0),
235};
236
237#define NR_LOOPS 5
238
239static void __init synchronize_tsc_bp(void)
240{
241 int i;
242 unsigned long long t0;
243 unsigned long long sum, avg;
244 long long delta;
245 unsigned int one_usec;
246 int buggy = 0;
247
248 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
249
250 /* convert from kcyc/sec to cyc/usec */
251 one_usec = cpu_khz / 1000;
252
253 atomic_set(&tsc.start_flag, 1);
254 wmb();
255
256 /*
257 * We loop a few times to get a primed instruction cache,
258 * then the last pass is more or less synchronized and
259 * the BP and APs set their cycle counters to zero all at
260 * once. This reduces the chance of having random offsets
261 * between the processors, and guarantees that the maximum
262 * delay between the cycle counters is never bigger than
263 * the latency of information-passing (cachelines) between
264 * two CPUs.
265 */
266 for (i = 0; i < NR_LOOPS; i++) {
267 /*
268 * all APs synchronize but they loop on '== num_cpus'
269 */
270 while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
271 cpu_relax();
272 atomic_set(&tsc.count_stop, 0);
273 wmb();
274 /*
275 * this lets the APs save their current TSC:
276 */
277 atomic_inc(&tsc.count_start);
278
279 rdtscll(tsc.values[smp_processor_id()]);
280 /*
281 * We clear the TSC in the last loop:
282 */
283 if (i == NR_LOOPS-1)
284 write_tsc(0, 0);
285
286 /*
287 * Wait for all APs to leave the synchronization point:
288 */
289 while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
290 cpu_relax();
291 atomic_set(&tsc.count_start, 0);
292 wmb();
293 atomic_inc(&tsc.count_stop);
294 }
295
296 sum = 0;
297 for (i = 0; i < NR_CPUS; i++) {
298 if (cpu_isset(i, cpu_callout_map)) {
299 t0 = tsc.values[i];
300 sum += t0;
301 }
302 }
303 avg = sum;
304 do_div(avg, num_booting_cpus());
305
306 for (i = 0; i < NR_CPUS; i++) {
307 if (!cpu_isset(i, cpu_callout_map))
308 continue;
309 delta = tsc.values[i] - avg;
310 if (delta < 0)
311 delta = -delta;
312 /*
313 * We report bigger than 2 microseconds clock differences.
314 */
315 if (delta > 2*one_usec) {
316 long long realdelta;
317
318 if (!buggy) {
319 buggy = 1;
320 printk("\n");
321 }
322 realdelta = delta;
323 do_div(realdelta, one_usec);
324 if (tsc.values[i] < avg)
325 realdelta = -realdelta;
326
327 if (realdelta)
328 printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
329 "skew, fixed it up.\n", i, realdelta);
330 }
331 }
332 if (!buggy)
333 printk("passed.\n");
334}
335
336static void __cpuinit synchronize_tsc_ap(void)
337{
338 int i;
339
340 /*
341 * Not every cpu is online at the time
342 * this gets called, so we first wait for the BP to
343 * finish SMP initialization:
344 */
345 while (!atomic_read(&tsc.start_flag))
346 cpu_relax();
347
348 for (i = 0; i < NR_LOOPS; i++) {
349 atomic_inc(&tsc.count_start);
350 while (atomic_read(&tsc.count_start) != num_booting_cpus())
351 cpu_relax();
352
353 rdtscll(tsc.values[smp_processor_id()]);
354 if (i == NR_LOOPS-1)
355 write_tsc(0, 0);
356
357 atomic_inc(&tsc.count_stop);
358 while (atomic_read(&tsc.count_stop) != num_booting_cpus())
359 cpu_relax();
360 }
361}
362#undef NR_LOOPS
363
364extern void calibrate_delay(void); 213extern void calibrate_delay(void);
365 214
366static atomic_t init_deasserted; 215static atomic_t init_deasserted;
@@ -438,20 +287,12 @@ static void __cpuinit smp_callin(void)
438 /* 287 /*
439 * Save our processor parameters 288 * Save our processor parameters
440 */ 289 */
441 smp_store_cpu_info(cpuid); 290 smp_store_cpu_info(cpuid);
442
443 disable_APIC_timer();
444 291
445 /* 292 /*
446 * Allow the master to continue. 293 * Allow the master to continue.
447 */ 294 */
448 cpu_set(cpuid, cpu_callin_map); 295 cpu_set(cpuid, cpu_callin_map);
449
450 /*
451 * Synchronize the TSC with the BP
452 */
453 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
454 synchronize_tsc_ap();
455} 296}
456 297
457static int cpucount; 298static int cpucount;
@@ -554,13 +395,17 @@ static void __cpuinit start_secondary(void *unused)
554 smp_callin(); 395 smp_callin();
555 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 396 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
556 rep_nop(); 397 rep_nop();
398 /*
399 * Check TSC synchronization with the BP:
400 */
401 check_tsc_sync_target();
402
557 setup_secondary_clock(); 403 setup_secondary_clock();
558 if (nmi_watchdog == NMI_IO_APIC) { 404 if (nmi_watchdog == NMI_IO_APIC) {
559 disable_8259A_irq(0); 405 disable_8259A_irq(0);
560 enable_NMI_through_LVT0(NULL); 406 enable_NMI_through_LVT0(NULL);
561 enable_8259A_irq(0); 407 enable_8259A_irq(0);
562 } 408 }
563 enable_APIC_timer();
564 /* 409 /*
565 * low-memory mappings have been cleared, flush them from 410 * low-memory mappings have been cleared, flush them from
566 * the local TLBs too. 411 * the local TLBs too.
@@ -752,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
752 /* 597 /*
753 * Due to the Pentium erratum 3AP. 598 * Due to the Pentium erratum 3AP.
754 */ 599 */
755 maxlvt = get_maxlvt(); 600 maxlvt = lapic_get_maxlvt();
756 if (maxlvt > 3) { 601 if (maxlvt > 3) {
757 apic_read_around(APIC_SPIV); 602 apic_read_around(APIC_SPIV);
758 apic_write(APIC_ESR, 0); 603 apic_write(APIC_ESR, 0);
@@ -849,7 +694,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
849 */ 694 */
850 Dprintk("#startup loops: %d.\n", num_starts); 695 Dprintk("#startup loops: %d.\n", num_starts);
851 696
852 maxlvt = get_maxlvt(); 697 maxlvt = lapic_get_maxlvt();
853 698
854 for (j = 1; j <= num_starts; j++) { 699 for (j = 1; j <= num_starts; j++) {
855 Dprintk("Sending STARTUP #%d.\n",j); 700 Dprintk("Sending STARTUP #%d.\n",j);
@@ -1125,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1125 info.cpu = cpu; 970 info.cpu = cpu;
1126 INIT_WORK(&info.task, do_warm_boot_cpu); 971 INIT_WORK(&info.task, do_warm_boot_cpu);
1127 972
1128 tsc_sync_disabled = 1;
1129
1130 /* init low mem mapping */ 973 /* init low mem mapping */
1131 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 974 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1132 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 975 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
@@ -1134,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1134 schedule_work(&info.task); 977 schedule_work(&info.task);
1135 wait_for_completion(&done); 978 wait_for_completion(&done);
1136 979
1137 tsc_sync_disabled = 0;
1138 zap_low_mappings(); 980 zap_low_mappings();
1139 ret = 0; 981 ret = 0;
1140exit: 982exit:
@@ -1331,12 +1173,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1331 smpboot_setup_io_apic(); 1173 smpboot_setup_io_apic();
1332 1174
1333 setup_boot_clock(); 1175 setup_boot_clock();
1334
1335 /*
1336 * Synchronize the TSC with the AP
1337 */
1338 if (cpu_has_tsc && cpucount && cpu_khz)
1339 synchronize_tsc_bp();
1340} 1176}
1341 1177
1342/* These are wrappers to interface to the new boot process. Someone 1178/* These are wrappers to interface to the new boot process. Someone
@@ -1471,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
1471 } 1307 }
1472 1308
1473 local_irq_enable(); 1309 local_irq_enable();
1310
1474 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1311 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1475 /* Unleash the CPU! */ 1312 /* Unleash the CPU! */
1476 cpu_set(cpu, smp_commenced_mask); 1313 cpu_set(cpu, smp_commenced_mask);
1314
1315 /*
1316 * Check TSC synchronization with the AP:
1317 */
1318 check_tsc_sync_source(cpu);
1319
1477 while (!cpu_isset(cpu, cpu_online_map)) 1320 while (!cpu_isset(cpu, cpu_online_map))
1478 cpu_relax(); 1321 cpu_relax();
1479 1322
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a4f67a6e6821..a5350059557a 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -159,15 +159,6 @@ EXPORT_SYMBOL(profile_pc);
159 */ 159 */
160irqreturn_t timer_interrupt(int irq, void *dev_id) 160irqreturn_t timer_interrupt(int irq, void *dev_id)
161{ 161{
162 /*
163 * Here we are in the timer irq handler. We just have irqs locally
164 * disabled but we don't know if the timer_bh is running on the other
165 * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
166 * the irq version of write_lock because as just said we have irq
167 * locally disabled. -arca
168 */
169 write_seqlock(&xtime_lock);
170
171#ifdef CONFIG_X86_IO_APIC 162#ifdef CONFIG_X86_IO_APIC
172 if (timer_ack) { 163 if (timer_ack) {
173 /* 164 /*
@@ -186,7 +177,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
186 177
187 do_timer_interrupt_hook(); 178 do_timer_interrupt_hook();
188 179
189
190 if (MCA_bus) { 180 if (MCA_bus) {
191 /* The PS/2 uses level-triggered interrupts. You can't 181 /* The PS/2 uses level-triggered interrupts. You can't
192 turn them off, nor would you want to (any attempt to 182 turn them off, nor would you want to (any attempt to
@@ -201,18 +191,11 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
201 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ 191 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
202 } 192 }
203 193
204 write_sequnlock(&xtime_lock);
205
206#ifdef CONFIG_X86_LOCAL_APIC
207 if (using_apic_timer)
208 smp_send_timer_broadcast_ipi();
209#endif
210
211 return IRQ_HANDLED; 194 return IRQ_HANDLED;
212} 195}
213 196
214/* not static: needed by APM */ 197/* not static: needed by APM */
215unsigned long get_cmos_time(void) 198unsigned long read_persistent_clock(void)
216{ 199{
217 unsigned long retval; 200 unsigned long retval;
218 unsigned long flags; 201 unsigned long flags;
@@ -225,7 +208,6 @@ unsigned long get_cmos_time(void)
225 208
226 return retval; 209 return retval;
227} 210}
228EXPORT_SYMBOL(get_cmos_time);
229 211
230static void sync_cmos_clock(unsigned long dummy); 212static void sync_cmos_clock(unsigned long dummy);
231 213
@@ -278,114 +260,16 @@ void notify_arch_cmos_timer(void)
278 mod_timer(&sync_cmos_timer, jiffies + 1); 260 mod_timer(&sync_cmos_timer, jiffies + 1);
279} 261}
280 262
281static long clock_cmos_diff;
282static unsigned long sleep_start;
283
284static int timer_suspend(struct sys_device *dev, pm_message_t state)
285{
286 /*
287 * Estimate time zone so that set_time can update the clock
288 */
289 unsigned long ctime = get_cmos_time();
290
291 clock_cmos_diff = -ctime;
292 clock_cmos_diff += get_seconds();
293 sleep_start = ctime;
294 return 0;
295}
296
297static int timer_resume(struct sys_device *dev)
298{
299 unsigned long flags;
300 unsigned long sec;
301 unsigned long ctime = get_cmos_time();
302 long sleep_length = (ctime - sleep_start) * HZ;
303 struct timespec ts;
304
305 if (sleep_length < 0) {
306 printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
307 /* The time after the resume must not be earlier than the time
308 * before the suspend or some nasty things will happen
309 */
310 sleep_length = 0;
311 ctime = sleep_start;
312 }
313#ifdef CONFIG_HPET_TIMER
314 if (is_hpet_enabled())
315 hpet_reenable();
316#endif
317 setup_pit_timer();
318
319 sec = ctime + clock_cmos_diff;
320 ts.tv_sec = sec;
321 ts.tv_nsec = 0;
322 do_settimeofday(&ts);
323 write_seqlock_irqsave(&xtime_lock, flags);
324 jiffies_64 += sleep_length;
325 write_sequnlock_irqrestore(&xtime_lock, flags);
326 touch_softlockup_watchdog();
327 return 0;
328}
329
330static struct sysdev_class timer_sysclass = {
331 .resume = timer_resume,
332 .suspend = timer_suspend,
333 set_kset_name("timer"),
334};
335
336
337/* XXX this driverfs stuff should probably go elsewhere later -john */
338static struct sys_device device_timer = {
339 .id = 0,
340 .cls = &timer_sysclass,
341};
342
343static int time_init_device(void)
344{
345 int error = sysdev_class_register(&timer_sysclass);
346 if (!error)
347 error = sysdev_register(&device_timer);
348 return error;
349}
350
351device_initcall(time_init_device);
352
353#ifdef CONFIG_HPET_TIMER
354extern void (*late_time_init)(void); 263extern void (*late_time_init)(void);
355/* Duplicate of time_init() below, with hpet_enable part added */ 264/* Duplicate of time_init() below, with hpet_enable part added */
356static void __init hpet_time_init(void) 265static void __init hpet_time_init(void)
357{ 266{
358 struct timespec ts; 267 if (!hpet_enable())
359 ts.tv_sec = get_cmos_time(); 268 setup_pit_timer();
360 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
361
362 do_settimeofday(&ts);
363
364 if ((hpet_enable() >= 0) && hpet_use_timer) {
365 printk("Using HPET for base-timer\n");
366 }
367
368 do_time_init(); 269 do_time_init();
369} 270}
370#endif
371 271
372void __init time_init(void) 272void __init time_init(void)
373{ 273{
374 struct timespec ts; 274 late_time_init = hpet_time_init;
375#ifdef CONFIG_HPET_TIMER
376 if (is_hpet_capable()) {
377 /*
378 * HPET initialization needs to do memory-mapped io. So, let
379 * us do a late initialization after mem_init().
380 */
381 late_time_init = hpet_time_init;
382 return;
383 }
384#endif
385 ts.tv_sec = get_cmos_time();
386 ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
387
388 do_settimeofday(&ts);
389
390 do_time_init();
391} 275}
diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c
deleted file mode 100644
index 1e4702dfcd01..000000000000
--- a/arch/i386/kernel/time_hpet.c
+++ /dev/null
@@ -1,497 +0,0 @@
1/*
2 * linux/arch/i386/kernel/time_hpet.c
3 * This code largely copied from arch/x86_64/kernel/time.c
4 * See that file for credits.
5 *
6 * 2003-06-30 Venkatesh Pallipadi - Additional changes for HPET support
7 */
8
9#include <linux/errno.h>
10#include <linux/kernel.h>
11#include <linux/param.h>
12#include <linux/string.h>
13#include <linux/init.h>
14#include <linux/smp.h>
15
16#include <asm/timer.h>
17#include <asm/fixmap.h>
18#include <asm/apic.h>
19
20#include <linux/timex.h>
21
22#include <asm/hpet.h>
23#include <linux/hpet.h>
24
25static unsigned long hpet_period; /* fsecs / HPET clock */
26unsigned long hpet_tick; /* hpet clks count per tick */
27unsigned long hpet_address; /* hpet memory map physical address */
28int hpet_use_timer;
29
30static int use_hpet; /* can be used for runtime check of hpet */
31static int boot_hpet_disable; /* boottime override for HPET timer */
32static void __iomem * hpet_virt_address; /* hpet kernel virtual address */
33
34#define FSEC_TO_USEC (1000000000UL)
35
36int hpet_readl(unsigned long a)
37{
38 return readl(hpet_virt_address + a);
39}
40
41static void hpet_writel(unsigned long d, unsigned long a)
42{
43 writel(d, hpet_virt_address + a);
44}
45
46#ifdef CONFIG_X86_LOCAL_APIC
47/*
48 * HPET counters dont wrap around on every tick. They just change the
49 * comparator value and continue. Next tick can be caught by checking
50 * for a change in the comparator value. Used in apic.c.
51 */
52static void __devinit wait_hpet_tick(void)
53{
54 unsigned int start_cmp_val, end_cmp_val;
55
56 start_cmp_val = hpet_readl(HPET_T0_CMP);
57 do {
58 end_cmp_val = hpet_readl(HPET_T0_CMP);
59 } while (start_cmp_val == end_cmp_val);
60}
61#endif
62
63static int hpet_timer_stop_set_go(unsigned long tick)
64{
65 unsigned int cfg;
66
67 /*
68 * Stop the timers and reset the main counter.
69 */
70 cfg = hpet_readl(HPET_CFG);
71 cfg &= ~HPET_CFG_ENABLE;
72 hpet_writel(cfg, HPET_CFG);
73 hpet_writel(0, HPET_COUNTER);
74 hpet_writel(0, HPET_COUNTER + 4);
75
76 if (hpet_use_timer) {
77 /*
78 * Set up timer 0, as periodic with first interrupt to happen at
79 * hpet_tick, and period also hpet_tick.
80 */
81 cfg = hpet_readl(HPET_T0_CFG);
82 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
83 HPET_TN_SETVAL | HPET_TN_32BIT;
84 hpet_writel(cfg, HPET_T0_CFG);
85
86 /*
87 * The first write after writing TN_SETVAL to the config register sets
88 * the counter value, the second write sets the threshold.
89 */
90 hpet_writel(tick, HPET_T0_CMP);
91 hpet_writel(tick, HPET_T0_CMP);
92 }
93 /*
94 * Go!
95 */
96 cfg = hpet_readl(HPET_CFG);
97 if (hpet_use_timer)
98 cfg |= HPET_CFG_LEGACY;
99 cfg |= HPET_CFG_ENABLE;
100 hpet_writel(cfg, HPET_CFG);
101
102 return 0;
103}
104
105/*
106 * Check whether HPET was found by ACPI boot parse. If yes setup HPET
107 * counter 0 for kernel base timer.
108 */
109int __init hpet_enable(void)
110{
111 unsigned int id;
112 unsigned long tick_fsec_low, tick_fsec_high; /* tick in femto sec */
113 unsigned long hpet_tick_rem;
114
115 if (boot_hpet_disable)
116 return -1;
117
118 if (!hpet_address) {
119 return -1;
120 }
121 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
122 /*
123 * Read the period, compute tick and quotient.
124 */
125 id = hpet_readl(HPET_ID);
126
127 /*
128 * We are checking for value '1' or more in number field if
129 * CONFIG_HPET_EMULATE_RTC is set because we will need an
130 * additional timer for RTC emulation.
131 * However, we can do with one timer otherwise using the
132 * the single HPET timer for system time.
133 */
134#ifdef CONFIG_HPET_EMULATE_RTC
135 if (!(id & HPET_ID_NUMBER)) {
136 iounmap(hpet_virt_address);
137 hpet_virt_address = NULL;
138 return -1;
139 }
140#endif
141
142
143 hpet_period = hpet_readl(HPET_PERIOD);
144 if ((hpet_period < HPET_MIN_PERIOD) || (hpet_period > HPET_MAX_PERIOD)) {
145 iounmap(hpet_virt_address);
146 hpet_virt_address = NULL;
147 return -1;
148 }
149
150 /*
151 * 64 bit math
152 * First changing tick into fsec
153 * Then 64 bit div to find number of hpet clk per tick
154 */
155 ASM_MUL64_REG(tick_fsec_low, tick_fsec_high,
156 KERNEL_TICK_USEC, FSEC_TO_USEC);
157 ASM_DIV64_REG(hpet_tick, hpet_tick_rem,
158 hpet_period, tick_fsec_low, tick_fsec_high);
159
160 if (hpet_tick_rem > (hpet_period >> 1))
161 hpet_tick++; /* rounding the result */
162
163 hpet_use_timer = id & HPET_ID_LEGSUP;
164
165 if (hpet_timer_stop_set_go(hpet_tick)) {
166 iounmap(hpet_virt_address);
167 hpet_virt_address = NULL;
168 return -1;
169 }
170
171 use_hpet = 1;
172
173#ifdef CONFIG_HPET
174 {
175 struct hpet_data hd;
176 unsigned int ntimer;
177
178 memset(&hd, 0, sizeof (hd));
179
180 ntimer = hpet_readl(HPET_ID);
181 ntimer = (ntimer & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT;
182 ntimer++;
183
184 /*
185 * Register with driver.
186 * Timer0 and Timer1 is used by platform.
187 */
188 hd.hd_phys_address = hpet_address;
189 hd.hd_address = hpet_virt_address;
190 hd.hd_nirqs = ntimer;
191 hd.hd_flags = HPET_DATA_PLATFORM;
192 hpet_reserve_timer(&hd, 0);
193#ifdef CONFIG_HPET_EMULATE_RTC
194 hpet_reserve_timer(&hd, 1);
195#endif
196 hd.hd_irq[0] = HPET_LEGACY_8254;
197 hd.hd_irq[1] = HPET_LEGACY_RTC;
198 if (ntimer > 2) {
199 struct hpet __iomem *hpet;
200 struct hpet_timer __iomem *timer;
201 int i;
202
203 hpet = hpet_virt_address;
204
205 for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer;
206 timer++, i++)
207 hd.hd_irq[i] = (timer->hpet_config &
208 Tn_INT_ROUTE_CNF_MASK) >>
209 Tn_INT_ROUTE_CNF_SHIFT;
210
211 }
212
213 hpet_alloc(&hd);
214 }
215#endif
216
217#ifdef CONFIG_X86_LOCAL_APIC
218 if (hpet_use_timer)
219 wait_timer_tick = wait_hpet_tick;
220#endif
221 return 0;
222}
223
224int hpet_reenable(void)
225{
226 return hpet_timer_stop_set_go(hpet_tick);
227}
228
229int is_hpet_enabled(void)
230{
231 return use_hpet;
232}
233
234int is_hpet_capable(void)
235{
236 if (!boot_hpet_disable && hpet_address)
237 return 1;
238 return 0;
239}
240
241static int __init hpet_setup(char* str)
242{
243 if (str) {
244 if (!strncmp("disable", str, 7))
245 boot_hpet_disable = 1;
246 }
247 return 1;
248}
249
250__setup("hpet=", hpet_setup);
251
252#ifdef CONFIG_HPET_EMULATE_RTC
253/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET
254 * is enabled, we support RTC interrupt functionality in software.
255 * RTC has 3 kinds of interrupts:
256 * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock
257 * is updated
258 * 2) Alarm Interrupt - generate an interrupt at a specific time of day
259 * 3) Periodic Interrupt - generate periodic interrupt, with frequencies
260 * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2)
261 * (1) and (2) above are implemented using polling at a frequency of
262 * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt
263 * overhead. (DEFAULT_RTC_INT_FREQ)
264 * For (3), we use interrupts at 64Hz or user specified periodic
265 * frequency, whichever is higher.
266 */
267#include <linux/mc146818rtc.h>
268#include <linux/rtc.h>
269
270#define DEFAULT_RTC_INT_FREQ 64
271#define RTC_NUM_INTS 1
272
273static unsigned long UIE_on;
274static unsigned long prev_update_sec;
275
276static unsigned long AIE_on;
277static struct rtc_time alarm_time;
278
279static unsigned long PIE_on;
280static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ;
281static unsigned long PIE_count;
282
283static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */
284static unsigned int hpet_t1_cmp; /* cached comparator register */
285
286/*
287 * Timer 1 for RTC, we do not use periodic interrupt feature,
288 * even if HPET supports periodic interrupts on Timer 1.
289 * The reason being, to set up a periodic interrupt in HPET, we need to
290 * stop the main counter. And if we do that everytime someone diables/enables
291 * RTC, we will have adverse effect on main kernel timer running on Timer 0.
292 * So, for the time being, simulate the periodic interrupt in software.
293 *
294 * hpet_rtc_timer_init() is called for the first time and during subsequent
295 * interuppts reinit happens through hpet_rtc_timer_reinit().
296 */
297int hpet_rtc_timer_init(void)
298{
299 unsigned int cfg, cnt;
300 unsigned long flags;
301
302 if (!is_hpet_enabled())
303 return 0;
304 /*
305 * Set the counter 1 and enable the interrupts.
306 */
307 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
308 hpet_rtc_int_freq = PIE_freq;
309 else
310 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
311
312 local_irq_save(flags);
313
314 cnt = hpet_readl(HPET_COUNTER);
315 cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq);
316 hpet_writel(cnt, HPET_T1_CMP);
317 hpet_t1_cmp = cnt;
318
319 cfg = hpet_readl(HPET_T1_CFG);
320 cfg &= ~HPET_TN_PERIODIC;
321 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
322 hpet_writel(cfg, HPET_T1_CFG);
323
324 local_irq_restore(flags);
325
326 return 1;
327}
328
329static void hpet_rtc_timer_reinit(void)
330{
331 unsigned int cfg, cnt, ticks_per_int, lost_ints;
332
333 if (unlikely(!(PIE_on | AIE_on | UIE_on))) {
334 cfg = hpet_readl(HPET_T1_CFG);
335 cfg &= ~HPET_TN_ENABLE;
336 hpet_writel(cfg, HPET_T1_CFG);
337 return;
338 }
339
340 if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ))
341 hpet_rtc_int_freq = PIE_freq;
342 else
343 hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ;
344
345 /* It is more accurate to use the comparator value than current count.*/
346 ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq;
347 hpet_t1_cmp += ticks_per_int;
348 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
349
350 /*
351 * If the interrupt handler was delayed too long, the write above tries
352 * to schedule the next interrupt in the past and the hardware would
353 * not interrupt until the counter had wrapped around.
354 * So we have to check that the comparator wasn't set to a past time.
355 */
356 cnt = hpet_readl(HPET_COUNTER);
357 if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) {
358 lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1;
359 /* Make sure that, even with the time needed to execute
360 * this code, the next scheduled interrupt has been moved
361 * back to the future: */
362 lost_ints++;
363
364 hpet_t1_cmp += lost_ints * ticks_per_int;
365 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
366
367 if (PIE_on)
368 PIE_count += lost_ints;
369
370 printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
371 hpet_rtc_int_freq);
372 }
373}
374
375/*
376 * The functions below are called from rtc driver.
377 * Return 0 if HPET is not being used.
378 * Otherwise do the necessary changes and return 1.
379 */
380int hpet_mask_rtc_irq_bit(unsigned long bit_mask)
381{
382 if (!is_hpet_enabled())
383 return 0;
384
385 if (bit_mask & RTC_UIE)
386 UIE_on = 0;
387 if (bit_mask & RTC_PIE)
388 PIE_on = 0;
389 if (bit_mask & RTC_AIE)
390 AIE_on = 0;
391
392 return 1;
393}
394
395int hpet_set_rtc_irq_bit(unsigned long bit_mask)
396{
397 int timer_init_reqd = 0;
398
399 if (!is_hpet_enabled())
400 return 0;
401
402 if (!(PIE_on | AIE_on | UIE_on))
403 timer_init_reqd = 1;
404
405 if (bit_mask & RTC_UIE) {
406 UIE_on = 1;
407 }
408 if (bit_mask & RTC_PIE) {
409 PIE_on = 1;
410 PIE_count = 0;
411 }
412 if (bit_mask & RTC_AIE) {
413 AIE_on = 1;
414 }
415
416 if (timer_init_reqd)
417 hpet_rtc_timer_init();
418
419 return 1;
420}
421
422int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec)
423{
424 if (!is_hpet_enabled())
425 return 0;
426
427 alarm_time.tm_hour = hrs;
428 alarm_time.tm_min = min;
429 alarm_time.tm_sec = sec;
430
431 return 1;
432}
433
434int hpet_set_periodic_freq(unsigned long freq)
435{
436 if (!is_hpet_enabled())
437 return 0;
438
439 PIE_freq = freq;
440 PIE_count = 0;
441
442 return 1;
443}
444
445int hpet_rtc_dropped_irq(void)
446{
447 if (!is_hpet_enabled())
448 return 0;
449
450 return 1;
451}
452
453irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
454{
455 struct rtc_time curr_time;
456 unsigned long rtc_int_flag = 0;
457 int call_rtc_interrupt = 0;
458
459 hpet_rtc_timer_reinit();
460
461 if (UIE_on | AIE_on) {
462 rtc_get_rtc_time(&curr_time);
463 }
464 if (UIE_on) {
465 if (curr_time.tm_sec != prev_update_sec) {
466 /* Set update int info, call real rtc int routine */
467 call_rtc_interrupt = 1;
468 rtc_int_flag = RTC_UF;
469 prev_update_sec = curr_time.tm_sec;
470 }
471 }
472 if (PIE_on) {
473 PIE_count++;
474 if (PIE_count >= hpet_rtc_int_freq/PIE_freq) {
475 /* Set periodic int info, call real rtc int routine */
476 call_rtc_interrupt = 1;
477 rtc_int_flag |= RTC_PF;
478 PIE_count = 0;
479 }
480 }
481 if (AIE_on) {
482 if ((curr_time.tm_sec == alarm_time.tm_sec) &&
483 (curr_time.tm_min == alarm_time.tm_min) &&
484 (curr_time.tm_hour == alarm_time.tm_hour)) {
485 /* Set alarm int info, call real rtc int routine */
486 call_rtc_interrupt = 1;
487 rtc_int_flag |= RTC_AF;
488 }
489 }
490 if (call_rtc_interrupt) {
491 rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8));
492 rtc_interrupt(rtc_int_flag, dev_id);
493 }
494 return IRQ_HANDLED;
495}
496#endif
497
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 46f752a8bbf3..3082a418635c 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -60,12 +60,6 @@ static inline int check_tsc_unstable(void)
60 return tsc_unstable; 60 return tsc_unstable;
61} 61}
62 62
63void mark_tsc_unstable(void)
64{
65 tsc_unstable = 1;
66}
67EXPORT_SYMBOL_GPL(mark_tsc_unstable);
68
69/* Accellerators for sched_clock() 63/* Accellerators for sched_clock()
70 * convert from cycles(64bits) => nanoseconds (64bits) 64 * convert from cycles(64bits) => nanoseconds (64bits)
71 * basic equation: 65 * basic equation:
@@ -222,34 +216,6 @@ out_no_tsc:
222 216
223#ifdef CONFIG_CPU_FREQ 217#ifdef CONFIG_CPU_FREQ
224 218
225static unsigned int cpufreq_delayed_issched = 0;
226static unsigned int cpufreq_init = 0;
227static struct work_struct cpufreq_delayed_get_work;
228
229static void handle_cpufreq_delayed_get(struct work_struct *work)
230{
231 unsigned int cpu;
232
233 for_each_online_cpu(cpu)
234 cpufreq_get(cpu);
235
236 cpufreq_delayed_issched = 0;
237}
238
239/*
240 * if we notice cpufreq oddness, schedule a call to cpufreq_get() as it tries
241 * to verify the CPU frequency the timing core thinks the CPU is running
242 * at is still correct.
243 */
244static inline void cpufreq_delayed_get(void)
245{
246 if (cpufreq_init && !cpufreq_delayed_issched) {
247 cpufreq_delayed_issched = 1;
248 printk(KERN_DEBUG "Checking if CPU frequency changed.\n");
249 schedule_work(&cpufreq_delayed_get_work);
250 }
251}
252
253/* 219/*
254 * if the CPU frequency is scaled, TSC-based delays will need a different 220 * if the CPU frequency is scaled, TSC-based delays will need a different
255 * loops_per_jiffy value to function properly. 221 * loops_per_jiffy value to function properly.
@@ -313,17 +279,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
313 279
314static int __init cpufreq_tsc(void) 280static int __init cpufreq_tsc(void)
315{ 281{
316 int ret; 282 return cpufreq_register_notifier(&time_cpufreq_notifier_block,
317 283 CPUFREQ_TRANSITION_NOTIFIER);
318 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
319 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
320 CPUFREQ_TRANSITION_NOTIFIER);
321 if (!ret)
322 cpufreq_init = 1;
323
324 return ret;
325} 284}
326
327core_initcall(cpufreq_tsc); 285core_initcall(cpufreq_tsc);
328 286
329#endif 287#endif
@@ -331,7 +289,6 @@ core_initcall(cpufreq_tsc);
331/* clock source code */ 289/* clock source code */
332 290
333static unsigned long current_tsc_khz = 0; 291static unsigned long current_tsc_khz = 0;
334static int tsc_update_callback(void);
335 292
336static cycle_t read_tsc(void) 293static cycle_t read_tsc(void)
337{ 294{
@@ -349,37 +306,28 @@ static struct clocksource clocksource_tsc = {
349 .mask = CLOCKSOURCE_MASK(64), 306 .mask = CLOCKSOURCE_MASK(64),
350 .mult = 0, /* to be set */ 307 .mult = 0, /* to be set */
351 .shift = 22, 308 .shift = 22,
352 .update_callback = tsc_update_callback, 309 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
353 .is_continuous = 1, 310 CLOCK_SOURCE_MUST_VERIFY,
354}; 311};
355 312
356static int tsc_update_callback(void) 313void mark_tsc_unstable(void)
357{ 314{
358 int change = 0; 315 if (!tsc_unstable) {
359 316 tsc_unstable = 1;
360 /* check to see if we should switch to the safe clocksource: */ 317 /* Can be called before registration */
361 if (clocksource_tsc.rating != 0 && check_tsc_unstable()) { 318 if (clocksource_tsc.mult)
362 clocksource_tsc.rating = 0; 319 clocksource_change_rating(&clocksource_tsc, 0);
363 clocksource_reselect(); 320 else
364 change = 1; 321 clocksource_tsc.rating = 0;
365 }
366
367 /* only update if tsc_khz has changed: */
368 if (current_tsc_khz != tsc_khz) {
369 current_tsc_khz = tsc_khz;
370 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
371 clocksource_tsc.shift);
372 change = 1;
373 } 322 }
374
375 return change;
376} 323}
324EXPORT_SYMBOL_GPL(mark_tsc_unstable);
377 325
378static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d) 326static int __init dmi_mark_tsc_unstable(struct dmi_system_id *d)
379{ 327{
380 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", 328 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
381 d->ident); 329 d->ident);
382 mark_tsc_unstable(); 330 tsc_unstable = 1;
383 return 0; 331 return 0;
384} 332}
385 333
@@ -396,65 +344,44 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
396 {} 344 {}
397}; 345};
398 346
399#define TSC_FREQ_CHECK_INTERVAL (10*MSEC_PER_SEC) /* 10sec in MS */
400static struct timer_list verify_tsc_freq_timer;
401
402/* XXX - Probably should add locking */
403static void verify_tsc_freq(unsigned long unused)
404{
405 static u64 last_tsc;
406 static unsigned long last_jiffies;
407
408 u64 now_tsc, interval_tsc;
409 unsigned long now_jiffies, interval_jiffies;
410
411
412 if (check_tsc_unstable())
413 return;
414
415 rdtscll(now_tsc);
416 now_jiffies = jiffies;
417
418 if (!last_jiffies) {
419 goto out;
420 }
421
422 interval_jiffies = now_jiffies - last_jiffies;
423 interval_tsc = now_tsc - last_tsc;
424 interval_tsc *= HZ;
425 do_div(interval_tsc, cpu_khz*1000);
426
427 if (interval_tsc < (interval_jiffies * 3 / 4)) {
428 printk("TSC appears to be running slowly. "
429 "Marking it as unstable\n");
430 mark_tsc_unstable();
431 return;
432 }
433
434out:
435 last_tsc = now_tsc;
436 last_jiffies = now_jiffies;
437 /* set us up to go off on the next interval: */
438 mod_timer(&verify_tsc_freq_timer,
439 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL));
440}
441
442/* 347/*
443 * Make an educated guess if the TSC is trustworthy and synchronized 348 * Make an educated guess if the TSC is trustworthy and synchronized
444 * over all CPUs. 349 * over all CPUs.
445 */ 350 */
446static __init int unsynchronized_tsc(void) 351__cpuinit int unsynchronized_tsc(void)
447{ 352{
353 if (!cpu_has_tsc || tsc_unstable)
354 return 1;
448 /* 355 /*
449 * Intel systems are normally all synchronized. 356 * Intel systems are normally all synchronized.
450 * Exceptions must mark TSC as unstable: 357 * Exceptions must mark TSC as unstable:
451 */ 358 */
452 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) 359 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
453 return 0; 360 /* assume multi socket systems are not synchronized: */
361 if (num_possible_cpus() > 1)
362 tsc_unstable = 1;
363 }
364 return tsc_unstable;
365}
366
367/*
368 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
369 */
370#ifdef CONFIG_MGEODE_LX
371/* RTSC counts during suspend */
372#define RTSC_SUSP 0x100
373
374static void __init check_geode_tsc_reliable(void)
375{
376 unsigned long val;
454 377
455 /* assume multi socket systems are not synchronized: */ 378 rdmsrl(MSR_GEODE_BUSCONT_CONF0, val);
456 return num_possible_cpus() > 1; 379 if ((val & RTSC_SUSP))
380 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
457} 381}
382#else
383static inline void check_geode_tsc_reliable(void) { }
384#endif
458 385
459static int __init init_tsc_clocksource(void) 386static int __init init_tsc_clocksource(void)
460{ 387{
@@ -463,20 +390,16 @@ static int __init init_tsc_clocksource(void)
463 /* check blacklist */ 390 /* check blacklist */
464 dmi_check_system(bad_tsc_dmi_table); 391 dmi_check_system(bad_tsc_dmi_table);
465 392
466 if (unsynchronized_tsc()) /* mark unstable if unsynced */ 393 unsynchronized_tsc();
467 mark_tsc_unstable(); 394 check_geode_tsc_reliable();
468 current_tsc_khz = tsc_khz; 395 current_tsc_khz = tsc_khz;
469 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz, 396 clocksource_tsc.mult = clocksource_khz2mult(current_tsc_khz,
470 clocksource_tsc.shift); 397 clocksource_tsc.shift);
471 /* lower the rating if we already know its unstable: */ 398 /* lower the rating if we already know its unstable: */
472 if (check_tsc_unstable()) 399 if (check_tsc_unstable()) {
473 clocksource_tsc.rating = 0; 400 clocksource_tsc.rating = 0;
474 401 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
475 init_timer(&verify_tsc_freq_timer); 402 }
476 verify_tsc_freq_timer.function = verify_tsc_freq;
477 verify_tsc_freq_timer.expires =
478 jiffies + msecs_to_jiffies(TSC_FREQ_CHECK_INTERVAL);
479 add_timer(&verify_tsc_freq_timer);
480 403
481 return clocksource_register(&clocksource_tsc); 404 return clocksource_register(&clocksource_tsc);
482 } 405 }
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c
new file mode 100644
index 000000000000..12424629af87
--- /dev/null
+++ b/arch/i386/kernel/tsc_sync.c
@@ -0,0 +1 @@
#include "../../x86_64/kernel/tsc_sync.c"
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 2e2d8dbcbd68..76d2adcae5a3 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -115,7 +115,7 @@ static struct clocksource clocksource_vmi = {
115 .mask = CLOCKSOURCE_MASK(64), 115 .mask = CLOCKSOURCE_MASK(64),
116 .mult = 0, /* to be set */ 116 .mult = 0, /* to be set */
117 .shift = 22, 117 .shift = 22,
118 .is_continuous = 1, 118 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
119}; 119};
120 120
121 121