aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic.c (renamed from arch/x86/kernel/apic_32.c)627
-rw-r--r--arch/x86/kernel/apic_64.c1848
-rw-r--r--arch/x86/kernel/bios_uv.c137
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/genapic_flat_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c49
-rw-r--r--arch/x86/kernel/hpet.c453
-rw-r--r--arch/x86/kernel/io_apic.c (renamed from arch/x86/kernel/io_apic_64.c)1538
-rw-r--r--arch/x86/kernel/io_apic_32.c2908
-rw-r--r--arch/x86/kernel/irq.c189
-rw-r--r--arch/x86/kernel/irq_32.c194
-rw-r--r--arch/x86/kernel/irq_64.c169
-rw-r--r--arch/x86/kernel/irqinit_32.c47
-rw-r--r--arch/x86/kernel/irqinit_64.c28
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/uv_irq.c79
-rw-r--r--arch/x86/kernel/uv_sysfs.c72
-rw-r--r--arch/x86/kernel/visws_quirks.c32
-rw-r--r--arch/x86/kernel/vmiclock_32.c3
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/mach-generic/bigsmp.c4
-rw-r--r--arch/x86/mach-generic/es7000.c14
-rw-r--r--arch/x86/mach-generic/numaq.c14
-rw-r--r--arch/x86/mach-generic/summit.c14
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c4
-rw-r--r--arch/x86/xen/irq.c2
-rw-r--r--arch/x86/xen/spinlock.c2
37 files changed, 2842 insertions, 5652 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 49349ba77d80..739668968390 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1242,14 +1242,6 @@ config EFI
1242 resultant kernel should continue to boot on existing non-EFI 1242 resultant kernel should continue to boot on existing non-EFI
1243 platforms. 1243 platforms.
1244 1244
1245config IRQBALANCE
1246 def_bool y
1247 prompt "Enable kernel irq balancing"
1248 depends on X86_32 && SMP && X86_IO_APIC
1249 help
1250 The default yes will allow the kernel to do irq load balancing.
1251 Saying no will keep the kernel from doing irq load balancing.
1252
1253config SECCOMP 1245config SECCOMP
1254 def_bool y 1246 def_bool y
1255 prompt "Enable seccomp to safely compute untrusted bytecode" 1247 prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
287# CONFIG_MTRR_SANITIZER is not set 287# CONFIG_MTRR_SANITIZER is not set
288CONFIG_X86_PAT=y 288CONFIG_X86_PAT=y
289CONFIG_EFI=y 289CONFIG_EFI=y
290# CONFIG_IRQBALANCE is not set
291CONFIG_SECCOMP=y 290CONFIG_SECCOMP=y
292# CONFIG_HZ_100 is not set 291# CONFIG_HZ_100 is not set
293# CONFIG_HZ_250 is not set 292# CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
23CFLAGS_tsc.o := $(nostackp) 23CFLAGS_tsc.o := $(nostackp)
24 24
25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
26obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o 26obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time_$(BITS).o ioport.o ldt.o 27obj-y += time_$(BITS).o ioport.o ldt.o
28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
29obj-$(CONFIG_X86_VISWS) += visws_quirks.o 29obj-$(CONFIG_X86_VISWS) += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
62obj-$(CONFIG_X86_MPPARSE) += mpparse.o 62obj-$(CONFIG_X86_MPPARSE) += mpparse.o
63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic.o
65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
108# 64 bit specific files 108# 64 bit specific files
109ifeq ($(CONFIG_X86_64),y) 109ifeq ($(CONFIG_X86_64),y)
110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
111 obj-y += bios_uv.o 111 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
112 obj-y += genx2apic_cluster.o 112 obj-y += genx2apic_cluster.o
113 obj-y += genx2apic_phys.o 113 obj-y += genx2apic_phys.o
114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1256 1256
1257 count = 1257 count =
1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
1259 NR_IRQ_VECTORS); 1259 nr_irqs);
1260 if (count < 0) { 1260 if (count < 0) {
1261 printk(KERN_ERR PREFIX 1261 printk(KERN_ERR PREFIX
1262 "Error parsing interrupt source overrides entry\n"); 1262 "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1276 1276
1277 count = 1277 count =
1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
1279 NR_IRQ_VECTORS); 1279 nr_irqs);
1280 if (count < 0) { 1280 if (count < 0) {
1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
1282 /* TBD: Cleanup to allow fallback to MPS */ 1282 /* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c
index 21c831d96af3..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic.c
@@ -23,11 +23,13 @@
23#include <linux/mc146818rtc.h> 23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 25#include <linux/sysdev.h>
26#include <linux/ioport.h>
26#include <linux/cpu.h> 27#include <linux/cpu.h>
27#include <linux/clockchips.h> 28#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 29#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h>
31 33
32#include <asm/atomic.h> 34#include <asm/atomic.h>
33#include <asm/smp.h> 35#include <asm/smp.h>
@@ -36,8 +38,14 @@
36#include <asm/desc.h> 38#include <asm/desc.h>
37#include <asm/arch_hooks.h> 39#include <asm/arch_hooks.h>
38#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/pgalloc.h>
39#include <asm/i8253.h> 42#include <asm/i8253.h>
40#include <asm/nmi.h> 43#include <asm/nmi.h>
44#include <asm/idle.h>
45#include <asm/proto.h>
46#include <asm/timex.h>
47#include <asm/apic.h>
48#include <asm/i8259.h>
41 49
42#include <mach_apic.h> 50#include <mach_apic.h>
43#include <mach_apicdef.h> 51#include <mach_apicdef.h>
@@ -50,16 +58,58 @@
50# error SPURIOUS_APIC_VECTOR definition error 58# error SPURIOUS_APIC_VECTOR definition error
51#endif 59#endif
52 60
53unsigned long mp_lapic_addr; 61#ifdef CONFIG_X86_32
54
55/* 62/*
56 * Knob to control our willingness to enable the local APIC. 63 * Knob to control our willingness to enable the local APIC.
57 * 64 *
58 * +1=force-enable 65 * +1=force-enable
59 */ 66 */
60static int force_enable_local_apic; 67static int force_enable_local_apic;
61int disable_apic; 68/*
69 * APIC command line parameters
70 */
71static int __init parse_lapic(char *arg)
72{
73 force_enable_local_apic = 1;
74 return 0;
75}
76early_param("lapic", parse_lapic);
77/* Local APIC was disabled by the BIOS and enabled by the kernel */
78static int enabled_via_apicbase;
79
80#endif
81
82#ifdef CONFIG_X86_64
83static int apic_calibrate_pmtmr __initdata;
84static __init int setup_apicpmtimer(char *s)
85{
86 apic_calibrate_pmtmr = 1;
87 notsc_setup(NULL);
88 return 0;
89}
90__setup("apicpmtimer", setup_apicpmtimer);
91#endif
92
93#ifdef CONFIG_X86_64
94#define HAVE_X2APIC
95#endif
96
97#ifdef HAVE_X2APIC
98int x2apic;
99/* x2apic enabled before OS handover */
100int x2apic_preenabled;
101int disable_x2apic;
102static __init int setup_nox2apic(char *str)
103{
104 disable_x2apic = 1;
105 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
106 return 0;
107}
108early_param("nox2apic", setup_nox2apic);
109#endif
62 110
111unsigned long mp_lapic_addr;
112int disable_apic;
63/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 113/* Disable local APIC timer from the kernel commandline or via dmi quirk */
64static int disable_apic_timer __cpuinitdata; 114static int disable_apic_timer __cpuinitdata;
65/* Local APIC timer works in C2 */ 115/* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
110}; 160};
111static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 161static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
112 162
113/* Local APIC was disabled by the BIOS and enabled by the kernel */
114static int enabled_via_apicbase;
115
116static unsigned long apic_phys; 163static unsigned long apic_phys;
117 164
118/* 165/*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
202struct apic_ops __read_mostly *apic_ops = &xapic_ops; 249struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops); 250EXPORT_SYMBOL_GPL(apic_ops);
204 251
252#ifdef HAVE_X2APIC
253static void x2apic_wait_icr_idle(void)
254{
255 /* no need to wait for icr idle in x2apic */
256 return;
257}
258
259static u32 safe_x2apic_wait_icr_idle(void)
260{
261 /* no need to wait for icr idle in x2apic */
262 return 0;
263}
264
265void x2apic_icr_write(u32 low, u32 id)
266{
267 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
268}
269
270u64 x2apic_icr_read(void)
271{
272 unsigned long val;
273
274 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
275 return val;
276}
277
278static struct apic_ops x2apic_ops = {
279 .read = native_apic_msr_read,
280 .write = native_apic_msr_write,
281 .icr_read = x2apic_icr_read,
282 .icr_write = x2apic_icr_write,
283 .wait_icr_idle = x2apic_wait_icr_idle,
284 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
285};
286#endif
287
205/** 288/**
206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 289 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
207 */ 290 */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
219 apic_write(APIC_LVT0, v); 302 apic_write(APIC_LVT0, v);
220} 303}
221 304
305#ifdef CONFIG_X86_32
222/** 306/**
223 * get_physical_broadcast - Get number of physical broadcast IDs 307 * get_physical_broadcast - Get number of physical broadcast IDs
224 */ 308 */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
226{ 310{
227 return modern_apic() ? 0xff : 0xf; 311 return modern_apic() ? 0xff : 0xf;
228} 312}
313#endif
229 314
230/** 315/**
231 * lapic_get_maxlvt - get the maximum number of local vector table entries 316 * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
247 */ 332 */
248 333
249/* Clock divisor */ 334/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
253#define APIC_DIVISOR 16 335#define APIC_DIVISOR 16
254#endif
255 336
256/* 337/*
257 * This function sets up the local APIC timer, with a timeout of 338 * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
383 * Setup the local APIC timer for this CPU. Copy the initilized values 464 * Setup the local APIC timer for this CPU. Copy the initilized values
384 * of the boot CPU and register the clock event in the framework. 465 * of the boot CPU and register the clock event in the framework.
385 */ 466 */
386static void __devinit setup_APIC_timer(void) 467static void __cpuinit setup_APIC_timer(void)
387{ 468{
388 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
389 470
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
453 } 534 }
454} 535}
455 536
537static int __init calibrate_by_pmtimer(long deltapm, long *delta)
538{
539 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
540 const long pm_thresh = pm_100ms / 100;
541 unsigned long mult;
542 u64 res;
543
544#ifndef CONFIG_X86_PM_TIMER
545 return -1;
546#endif
547
548 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
549
550 /* Check, if the PM timer is available */
551 if (!deltapm)
552 return -1;
553
554 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
555
556 if (deltapm > (pm_100ms - pm_thresh) &&
557 deltapm < (pm_100ms + pm_thresh)) {
558 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
559 } else {
560 res = (((u64)deltapm) * mult) >> 22;
561 do_div(res, 1000000);
562 printk(KERN_WARNING "APIC calibration not consistent "
563 "with PM Timer: %ldms instead of 100ms\n",
564 (long)res);
565 /* Correct the lapic counter value */
566 res = (((u64)(*delta)) * pm_100ms);
567 do_div(res, deltapm);
568 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
569 "%lu (%ld)\n", (unsigned long)res, *delta);
570 *delta = (long)res;
571 }
572
573 return 0;
574}
575
456static int __init calibrate_APIC_clock(void) 576static int __init calibrate_APIC_clock(void)
457{ 577{
458 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 578 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
459 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
460 const long pm_thresh = pm_100ms/100;
461 void (*real_handler)(struct clock_event_device *dev); 579 void (*real_handler)(struct clock_event_device *dev);
462 unsigned long deltaj; 580 unsigned long deltaj;
463 long delta, deltapm; 581 long delta;
464 int pm_referenced = 0; 582 int pm_referenced = 0;
465 583
466 local_irq_disable(); 584 local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
470 global_clock_event->event_handler = lapic_cal_handler; 588 global_clock_event->event_handler = lapic_cal_handler;
471 589
472 /* 590 /*
473 * Setup the APIC counter to 1e9. There is no way the lapic 591 * Setup the APIC counter to maximum. There is no way the lapic
474 * can underflow in the 100ms detection time frame 592 * can underflow in the 100ms detection time frame
475 */ 593 */
476 __setup_APIC_LVTT(1000000000, 0, 0); 594 __setup_APIC_LVTT(0xffffffff, 0, 0);
477 595
478 /* Let the interrupts run */ 596 /* Let the interrupts run */
479 local_irq_enable(); 597 local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
490 delta = lapic_cal_t1 - lapic_cal_t2; 608 delta = lapic_cal_t1 - lapic_cal_t2;
491 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 609 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
492 610
493 /* Check, if the PM timer is available */ 611 /* we trust the PM based calibration if possible */
494 deltapm = lapic_cal_pm2 - lapic_cal_pm1; 612 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
495 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 613 &delta);
496
497 if (deltapm) {
498 unsigned long mult;
499 u64 res;
500
501 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
502
503 if (deltapm > (pm_100ms - pm_thresh) &&
504 deltapm < (pm_100ms + pm_thresh)) {
505 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
506 } else {
507 res = (((u64) deltapm) * mult) >> 22;
508 do_div(res, 1000000);
509 printk(KERN_WARNING "APIC calibration not consistent "
510 "with PM Timer: %ldms instead of 100ms\n",
511 (long)res);
512 /* Correct the lapic counter value */
513 res = (((u64) delta) * pm_100ms);
514 do_div(res, deltapm);
515 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
516 "%lu (%ld)\n", (unsigned long) res, delta);
517 delta = (long) res;
518 }
519 pm_referenced = 1;
520 }
521 614
522 /* Calculate the scaled math multiplication factor */ 615 /* Calculate the scaled math multiplication factor */
523 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 616 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
559 652
560 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 653 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
561 654
562 /* We trust the pm timer based calibration */ 655 /*
656 * PM timer calibration failed or not turned on
657 * so lets try APIC timer based calibration
658 */
563 if (!pm_referenced) { 659 if (!pm_referenced) {
564 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 660 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
565 661
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
652 setup_APIC_timer(); 748 setup_APIC_timer();
653} 749}
654 750
655void __devinit setup_secondary_APIC_clock(void) 751void __cpuinit setup_secondary_APIC_clock(void)
656{ 752{
657 setup_APIC_timer(); 753 setup_APIC_timer();
658} 754}
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
718 * Besides, if we don't timer interrupts ignore the global 814 * Besides, if we don't timer interrupts ignore the global
719 * interrupt lock, which is the WrongThing (tm) to do. 815 * interrupt lock, which is the WrongThing (tm) to do.
720 */ 816 */
817#ifdef CONFIG_X86_64
818 exit_idle();
819#endif
721 irq_enter(); 820 irq_enter();
722 local_apic_timer_interrupt(); 821 local_apic_timer_interrupt();
723 irq_exit(); 822 irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
991 1090
992static void __cpuinit lapic_setup_esr(void) 1091static void __cpuinit lapic_setup_esr(void)
993{ 1092{
994 unsigned long oldvalue, value, maxlvt; 1093 unsigned int oldvalue, value, maxlvt;
995 if (lapic_is_integrated() && !esr_disable) { 1094
996 if (esr_disable) { 1095 if (!lapic_is_integrated()) {
997 /* 1096 printk(KERN_INFO "No ESR for 82489DX.\n");
998 * Something untraceable is creating bad interrupts on 1097 return;
999 * secondary quads ... for the moment, just leave the 1098 }
1000 * ESR disabled - we can't do anything useful with the
1001 * errors anyway - mbligh
1002 */
1003 printk(KERN_INFO "Leaving ESR disabled.\n");
1004 return;
1005 }
1006 /* !82489DX */
1007 maxlvt = lapic_get_maxlvt();
1008 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1009 apic_write(APIC_ESR, 0);
1010 oldvalue = apic_read(APIC_ESR);
1011 1099
1012 /* enables sending errors */ 1100 if (esr_disable) {
1013 value = ERROR_APIC_VECTOR;
1014 apic_write(APIC_LVTERR, value);
1015 /* 1101 /*
1016 * spec says clear errors after enabling vector. 1102 * Something untraceable is creating bad interrupts on
1103 * secondary quads ... for the moment, just leave the
1104 * ESR disabled - we can't do anything useful with the
1105 * errors anyway - mbligh
1017 */ 1106 */
1018 if (maxlvt > 3) 1107 printk(KERN_INFO "Leaving ESR disabled.\n");
1019 apic_write(APIC_ESR, 0); 1108 return;
1020 value = apic_read(APIC_ESR);
1021 if (value != oldvalue)
1022 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1023 "vector: 0x%08lx after: 0x%08lx\n",
1024 oldvalue, value);
1025 } else {
1026 printk(KERN_INFO "No ESR for 82489DX.\n");
1027 } 1109 }
1110
1111 maxlvt = lapic_get_maxlvt();
1112 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1113 apic_write(APIC_ESR, 0);
1114 oldvalue = apic_read(APIC_ESR);
1115
1116 /* enables sending errors */
1117 value = ERROR_APIC_VECTOR;
1118 apic_write(APIC_LVTERR, value);
1119
1120 /*
1121 * spec says clear errors after enabling vector.
1122 */
1123 if (maxlvt > 3)
1124 apic_write(APIC_ESR, 0);
1125 value = apic_read(APIC_ESR);
1126 if (value != oldvalue)
1127 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1128 "vector: 0x%08x after: 0x%08x\n",
1129 oldvalue, value);
1028} 1130}
1029 1131
1030 1132
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
1033 */ 1135 */
1034void __cpuinit setup_local_APIC(void) 1136void __cpuinit setup_local_APIC(void)
1035{ 1137{
1036 unsigned long value, integrated; 1138 unsigned int value;
1037 int i, j; 1139 int i, j;
1038 1140
1141#ifdef CONFIG_X86_32
1039 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1142 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1040 if (esr_disable) { 1143 if (lapic_is_integrated() && esr_disable) {
1041 apic_write(APIC_ESR, 0); 1144 apic_write(APIC_ESR, 0);
1042 apic_write(APIC_ESR, 0); 1145 apic_write(APIC_ESR, 0);
1043 apic_write(APIC_ESR, 0); 1146 apic_write(APIC_ESR, 0);
1044 apic_write(APIC_ESR, 0); 1147 apic_write(APIC_ESR, 0);
1045 } 1148 }
1149#endif
1046 1150
1047 integrated = lapic_is_integrated(); 1151 preempt_disable();
1048 1152
1049 /* 1153 /*
1050 * Double-check whether this APIC is really registered. 1154 * Double-check whether this APIC is really registered.
1155 * This is meaningless in clustered apic mode, so we skip it.
1051 */ 1156 */
1052 if (!apic_id_registered()) 1157 if (!apic_id_registered())
1053 WARN_ON_ONCE(1); 1158 BUG();
1054 1159
1055 /* 1160 /*
1056 * Intel recommends to set DFR, LDR and TPR before enabling 1161 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
1096 */ 1201 */
1097 value |= APIC_SPIV_APIC_ENABLED; 1202 value |= APIC_SPIV_APIC_ENABLED;
1098 1203
1204#ifdef CONFIG_X86_32
1099 /* 1205 /*
1100 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1206 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1101 * certain networking cards. If high frequency interrupts are 1207 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
1116 * See also the comment in end_level_ioapic_irq(). --macro 1222 * See also the comment in end_level_ioapic_irq(). --macro
1117 */ 1223 */
1118 1224
1119 /* Enable focus processor (bit==0) */ 1225 /*
1226 * - enable focus processor (bit==0)
1227 * - 64bit mode always use processor focus
1228 * so no need to set it
1229 */
1120 value &= ~APIC_SPIV_FOCUS_DISABLED; 1230 value &= ~APIC_SPIV_FOCUS_DISABLED;
1231#endif
1121 1232
1122 /* 1233 /*
1123 * Set spurious IRQ vector 1234 * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
1154 value = APIC_DM_NMI; 1265 value = APIC_DM_NMI;
1155 else 1266 else
1156 value = APIC_DM_NMI | APIC_LVT_MASKED; 1267 value = APIC_DM_NMI | APIC_LVT_MASKED;
1157 if (!integrated) /* 82489DX */ 1268 if (!lapic_is_integrated()) /* 82489DX */
1158 value |= APIC_LVT_LEVEL_TRIGGER; 1269 value |= APIC_LVT_LEVEL_TRIGGER;
1159 apic_write(APIC_LVT1, value); 1270 apic_write(APIC_LVT1, value);
1271
1272 preempt_enable();
1160} 1273}
1161 1274
1162void __cpuinit end_local_APIC_setup(void) 1275void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
1177 apic_pm_activate(); 1290 apic_pm_activate();
1178} 1291}
1179 1292
1293#ifdef HAVE_X2APIC
1294void check_x2apic(void)
1295{
1296 int msr, msr2;
1297
1298 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1299
1300 if (msr & X2APIC_ENABLE) {
1301 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1302 x2apic_preenabled = x2apic = 1;
1303 apic_ops = &x2apic_ops;
1304 }
1305}
1306
1307void enable_x2apic(void)
1308{
1309 int msr, msr2;
1310
1311 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1312 if (!(msr & X2APIC_ENABLE)) {
1313 printk("Enabling x2apic\n");
1314 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1315 }
1316}
1317
1318void enable_IR_x2apic(void)
1319{
1320#ifdef CONFIG_INTR_REMAP
1321 int ret;
1322 unsigned long flags;
1323
1324 if (!cpu_has_x2apic)
1325 return;
1326
1327 if (!x2apic_preenabled && disable_x2apic) {
1328 printk(KERN_INFO
1329 "Skipped enabling x2apic and Interrupt-remapping "
1330 "because of nox2apic\n");
1331 return;
1332 }
1333
1334 if (x2apic_preenabled && disable_x2apic)
1335 panic("Bios already enabled x2apic, can't enforce nox2apic");
1336
1337 if (!x2apic_preenabled && skip_ioapic_setup) {
1338 printk(KERN_INFO
1339 "Skipped enabling x2apic and Interrupt-remapping "
1340 "because of skipping io-apic setup\n");
1341 return;
1342 }
1343
1344 ret = dmar_table_init();
1345 if (ret) {
1346 printk(KERN_INFO
1347 "dmar_table_init() failed with %d:\n", ret);
1348
1349 if (x2apic_preenabled)
1350 panic("x2apic enabled by bios. But IR enabling failed");
1351 else
1352 printk(KERN_INFO
1353 "Not enabling x2apic,Intr-remapping\n");
1354 return;
1355 }
1356
1357 local_irq_save(flags);
1358 mask_8259A();
1359
1360 ret = save_mask_IO_APIC_setup();
1361 if (ret) {
1362 printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
1363 goto end;
1364 }
1365
1366 ret = enable_intr_remapping(1);
1367
1368 if (ret && x2apic_preenabled) {
1369 local_irq_restore(flags);
1370 panic("x2apic enabled by bios. But IR enabling failed");
1371 }
1372
1373 if (ret)
1374 goto end_restore;
1375
1376 if (!x2apic) {
1377 x2apic = 1;
1378 apic_ops = &x2apic_ops;
1379 enable_x2apic();
1380 }
1381
1382end_restore:
1383 if (ret)
1384 /*
1385 * IR enabling failed
1386 */
1387 restore_IO_APIC_setup();
1388 else
1389 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1390
1391end:
1392 unmask_8259A();
1393 local_irq_restore(flags);
1394
1395 if (!ret) {
1396 if (!x2apic_preenabled)
1397 printk(KERN_INFO
1398 "Enabled x2apic and interrupt-remapping\n");
1399 else
1400 printk(KERN_INFO
1401 "Enabled Interrupt-remapping\n");
1402 } else
1403 printk(KERN_ERR
1404 "Failed to enable Interrupt-remapping and x2apic\n");
1405#else
1406 if (!cpu_has_x2apic)
1407 return;
1408
1409 if (x2apic_preenabled)
1410 panic("x2apic enabled prior OS handover,"
1411 " enable CONFIG_INTR_REMAP");
1412
1413 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1414 " and x2apic\n");
1415#endif
1416
1417 return;
1418}
1419#endif /* HAVE_X2APIC */
1420
1421#ifdef CONFIG_X86_64
1422/*
1423 * Detect and enable local APICs on non-SMP boards.
1424 * Original code written by Keir Fraser.
1425 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1426 * not correctly set up (usually the APIC timer won't work etc.)
1427 */
1428static int __init detect_init_APIC(void)
1429{
1430 if (!cpu_has_apic) {
1431 printk(KERN_INFO "No local APIC present\n");
1432 return -1;
1433 }
1434
1435 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1436 boot_cpu_physical_apicid = 0;
1437 return 0;
1438}
1439#else
1180/* 1440/*
1181 * Detect and initialize APIC 1441 * Detect and initialize APIC
1182 */ 1442 */
@@ -1255,12 +1515,46 @@ no_apic:
1255 printk(KERN_INFO "No local APIC present or hardware disabled\n"); 1515 printk(KERN_INFO "No local APIC present or hardware disabled\n");
1256 return -1; 1516 return -1;
1257} 1517}
1518#endif
1519
1520#ifdef CONFIG_X86_64
1521void __init early_init_lapic_mapping(void)
1522{
1523 unsigned long phys_addr;
1524
1525 /*
1526 * If no local APIC can be found then go out
1527 * : it means there is no mpatable and MADT
1528 */
1529 if (!smp_found_config)
1530 return;
1531
1532 phys_addr = mp_lapic_addr;
1533
1534 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1535 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1536 APIC_BASE, phys_addr);
1537
1538 /*
1539 * Fetch the APIC ID of the BSP in case we have a
1540 * default configuration (or the MP table is broken).
1541 */
1542 boot_cpu_physical_apicid = read_apic_id();
1543}
1544#endif
1258 1545
1259/** 1546/**
1260 * init_apic_mappings - initialize APIC mappings 1547 * init_apic_mappings - initialize APIC mappings
1261 */ 1548 */
1262void __init init_apic_mappings(void) 1549void __init init_apic_mappings(void)
1263{ 1550{
1551#ifdef HAVE_X2APIC
1552 if (x2apic) {
1553 boot_cpu_physical_apicid = read_apic_id();
1554 return;
1555 }
1556#endif
1557
1264 /* 1558 /*
1265 * If no local APIC can be found then set up a fake all 1559 * If no local APIC can be found then set up a fake all
1266 * zeroes page to simulate the local APIC and another 1560 * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
1273 apic_phys = mp_lapic_addr; 1567 apic_phys = mp_lapic_addr;
1274 1568
1275 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1569 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1276 printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, 1570 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1277 apic_phys); 1571 APIC_BASE, apic_phys);
1278 1572
1279 /* 1573 /*
1280 * Fetch the APIC ID of the BSP in case we have a 1574 * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
1282 */ 1576 */
1283 if (boot_cpu_physical_apicid == -1U) 1577 if (boot_cpu_physical_apicid == -1U)
1284 boot_cpu_physical_apicid = read_apic_id(); 1578 boot_cpu_physical_apicid = read_apic_id();
1285
1286} 1579}
1287 1580
1288/* 1581/*
1289 * This initializes the IO-APIC and APIC hardware if this is 1582 * This initializes the IO-APIC and APIC hardware if this is
1290 * a UP kernel. 1583 * a UP kernel.
1291 */ 1584 */
1292
1293int apic_version[MAX_APICS]; 1585int apic_version[MAX_APICS];
1294 1586
1295int __init APIC_init_uniprocessor(void) 1587int __init APIC_init_uniprocessor(void)
1296{ 1588{
1589#ifdef CONFIG_X86_64
1590 if (disable_apic) {
1591 printk(KERN_INFO "Apic disabled\n");
1592 return -1;
1593 }
1594 if (!cpu_has_apic) {
1595 disable_apic = 1;
1596 printk(KERN_INFO "Apic disabled by BIOS\n");
1597 return -1;
1598 }
1599#else
1297 if (!smp_found_config && !cpu_has_apic) 1600 if (!smp_found_config && !cpu_has_apic)
1298 return -1; 1601 return -1;
1299 1602
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
1302 */ 1605 */
1303 if (!cpu_has_apic && 1606 if (!cpu_has_apic &&
1304 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1607 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1305 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1608 printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
1306 boot_cpu_physical_apicid); 1609 boot_cpu_physical_apicid);
1307 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1610 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1308 return -1; 1611 return -1;
1309 } 1612 }
1613#endif
1310 1614
1311 verify_local_APIC(); 1615#ifdef HAVE_X2APIC
1616 enable_IR_x2apic();
1617#endif
1618#ifdef CONFIG_X86_64
1619 setup_apic_routing();
1620#endif
1312 1621
1622 verify_local_APIC();
1313 connect_bsp_APIC(); 1623 connect_bsp_APIC();
1314 1624
1625#ifdef CONFIG_X86_64
1626 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1627#else
1315 /* 1628 /*
1316 * Hack: In case of kdump, after a crash, kernel might be booting 1629 * Hack: In case of kdump, after a crash, kernel might be booting
1317 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 1630 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1318 * might be zero if read from MP tables. Get it from LAPIC. 1631 * might be zero if read from MP tables. Get it from LAPIC.
1319 */ 1632 */
1320#ifdef CONFIG_CRASH_DUMP 1633# ifdef CONFIG_CRASH_DUMP
1321 boot_cpu_physical_apicid = read_apic_id(); 1634 boot_cpu_physical_apicid = read_apic_id();
1635# endif
1322#endif 1636#endif
1323 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1637 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1324
1325 setup_local_APIC(); 1638 setup_local_APIC();
1326 1639
1640#ifdef CONFIG_X86_64
1641 /*
1642 * Now enable IO-APICs, actually call clear_IO_APIC
1643 * We need clear_IO_APIC before enabling vector on BP
1644 */
1645 if (!skip_ioapic_setup && nr_ioapics)
1646 enable_IO_APIC();
1647#endif
1648
1327#ifdef CONFIG_X86_IO_APIC 1649#ifdef CONFIG_X86_IO_APIC
1328 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) 1650 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1329#endif 1651#endif
1330 localise_nmi_watchdog(); 1652 localise_nmi_watchdog();
1331 end_local_APIC_setup(); 1653 end_local_APIC_setup();
1654
1332#ifdef CONFIG_X86_IO_APIC 1655#ifdef CONFIG_X86_IO_APIC
1333 if (smp_found_config) 1656 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1334 if (!skip_ioapic_setup && nr_ioapics) 1657 setup_IO_APIC();
1335 setup_IO_APIC(); 1658# ifdef CONFIG_X86_64
1659 else
1660 nr_ioapics = 0;
1661# endif
1336#endif 1662#endif
1663
1664#ifdef CONFIG_X86_64
1665 setup_boot_APIC_clock();
1666 check_nmi_watchdog();
1667#else
1337 setup_boot_clock(); 1668 setup_boot_clock();
1669#endif
1338 1670
1339 return 0; 1671 return 0;
1340} 1672}
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
1348 */ 1680 */
1349void smp_spurious_interrupt(struct pt_regs *regs) 1681void smp_spurious_interrupt(struct pt_regs *regs)
1350{ 1682{
1351 unsigned long v; 1683 u32 v;
1352 1684
1685#ifdef CONFIG_X86_64
1686 exit_idle();
1687#endif
1353 irq_enter(); 1688 irq_enter();
1354 /* 1689 /*
1355 * Check if this really is a spurious interrupt and ACK it 1690 * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1360 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) 1695 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1361 ack_APIC_irq(); 1696 ack_APIC_irq();
1362 1697
1698#ifdef CONFIG_X86_64
1699 add_pda(irq_spurious_count, 1);
1700#else
1363 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1701 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1364 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " 1702 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
1365 "should never happen.\n", smp_processor_id()); 1703 "should never happen.\n", smp_processor_id());
1366 __get_cpu_var(irq_stat).irq_spurious_count++; 1704 __get_cpu_var(irq_stat).irq_spurious_count++;
1705#endif
1367 irq_exit(); 1706 irq_exit();
1368} 1707}
1369 1708
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1372 */ 1711 */
1373void smp_error_interrupt(struct pt_regs *regs) 1712void smp_error_interrupt(struct pt_regs *regs)
1374{ 1713{
1375 unsigned long v, v1; 1714 u32 v, v1;
1376 1715
1716#ifdef CONFIG_X86_64
1717 exit_idle();
1718#endif
1377 irq_enter(); 1719 irq_enter();
1378 /* First tickle the hardware, only then report what went on. -- REW */ 1720 /* First tickle the hardware, only then report what went on. -- REW */
1379 v = apic_read(APIC_ESR); 1721 v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1392 6: Received illegal vector 1734 6: Received illegal vector
1393 7: Illegal register address 1735 7: Illegal register address
1394 */ 1736 */
1395 printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", 1737 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1396 smp_processor_id(), v , v1); 1738 smp_processor_id(), v , v1);
1397 irq_exit(); 1739 irq_exit();
1398} 1740}
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
1565 cpu_set(cpu, cpu_present_map); 1907 cpu_set(cpu, cpu_present_map);
1566} 1908}
1567 1909
1910#ifdef CONFIG_X86_64
1911int hard_smp_processor_id(void)
1912{
1913 return read_apic_id();
1914}
1915#endif
1916
1568/* 1917/*
1569 * Power management 1918 * Power management
1570 */ 1919 */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
1640 1989
1641 local_irq_save(flags); 1990 local_irq_save(flags);
1642 1991
1643#ifdef CONFIG_X86_64 1992#ifdef HAVE_X2APIC
1644 if (x2apic) 1993 if (x2apic)
1645 enable_x2apic(); 1994 enable_x2apic();
1646 else 1995 else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
1702 .cls = &lapic_sysclass, 2051 .cls = &lapic_sysclass,
1703}; 2052};
1704 2053
1705static void __devinit apic_pm_activate(void) 2054static void __cpuinit apic_pm_activate(void)
1706{ 2055{
1707 apic_pm_state.active = 1; 2056 apic_pm_state.active = 1;
1708} 2057}
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
1728 2077
1729#endif /* CONFIG_PM */ 2078#endif /* CONFIG_PM */
1730 2079
2080#ifdef CONFIG_X86_64
1731/* 2081/*
1732 * APIC command line parameters 2082 * apic_is_clustered_box() -- Check if we can expect good TSC
2083 *
2084 * Thus far, the major user of this is IBM's Summit2 series:
2085 *
2086 * Clustered boxes may have unsynced TSC problems if they are
2087 * multi-chassis. Use available data to take a good guess.
2088 * If in doubt, go HPET.
1733 */ 2089 */
1734static int __init parse_lapic(char *arg) 2090__cpuinit int apic_is_clustered_box(void)
1735{ 2091{
1736 force_enable_local_apic = 1; 2092 int i, clusters, zeros;
1737 return 0; 2093 unsigned id;
2094 u16 *bios_cpu_apicid;
2095 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2096
2097 /*
2098 * there is not this kind of box with AMD CPU yet.
2099 * Some AMD box with quadcore cpu and 8 sockets apicid
2100 * will be [4, 0x23] or [8, 0x27] could be thought to
2101 * vsmp box still need checking...
2102 */
2103 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
2104 return 0;
2105
2106 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2107 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2108
2109 for (i = 0; i < NR_CPUS; i++) {
2110 /* are we being called early in kernel startup? */
2111 if (bios_cpu_apicid) {
2112 id = bios_cpu_apicid[i];
2113 }
2114 else if (i < nr_cpu_ids) {
2115 if (cpu_present(i))
2116 id = per_cpu(x86_bios_cpu_apicid, i);
2117 else
2118 continue;
2119 }
2120 else
2121 break;
2122
2123 if (id != BAD_APICID)
2124 __set_bit(APIC_CLUSTERID(id), clustermap);
2125 }
2126
2127 /* Problem: Partially populated chassis may not have CPUs in some of
2128 * the APIC clusters they have been allocated. Only present CPUs have
2129 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
2130 * Since clusters are allocated sequentially, count zeros only if
2131 * they are bounded by ones.
2132 */
2133 clusters = 0;
2134 zeros = 0;
2135 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
2136 if (test_bit(i, clustermap)) {
2137 clusters += 1 + zeros;
2138 zeros = 0;
2139 } else
2140 ++zeros;
2141 }
2142
2143 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2144 * not guaranteed to be synced between boards
2145 */
2146 if (is_vsmp_box() && clusters > 1)
2147 return 1;
2148
2149 /*
2150 * If clusters > 2, then should be multi-chassis.
2151 * May have to revisit this when multi-core + hyperthreaded CPUs come
2152 * out, but AFAIK this will work even for them.
2153 */
2154 return (clusters > 2);
1738} 2155}
1739early_param("lapic", parse_lapic); 2156#endif
1740 2157
2158/*
2159 * APIC command line parameters
2160 */
1741static int __init setup_disableapic(char *arg) 2161static int __init setup_disableapic(char *arg)
1742{ 2162{
1743 disable_apic = 1; 2163 disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
1779 if (!arg) { 2199 if (!arg) {
1780#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
1781 skip_ioapic_setup = 0; 2201 skip_ioapic_setup = 0;
1782 ioapic_force = 1;
1783 return 0; 2202 return 0;
1784#endif 2203#endif
1785 return -EINVAL; 2204 return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 94ddb69ae15e..000000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,1848 +0,0 @@
1/*
2 * Local APIC handling, local APIC timers
3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
8 * thanks to Eric Gilmore
9 * and Rolf G. Tews
10 * for testing these extensively.
11 * Maciej W. Rozycki : Various updates and fixes.
12 * Mikael Pettersson : Power Management for UP-APIC.
13 * Pavel Machek and
14 * Mikael Pettersson : PM converted to driver model.
15 */
16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h>
26#include <linux/ioport.h>
27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h>
30#include <linux/dmar.h>
31
32#include <asm/atomic.h>
33#include <asm/smp.h>
34#include <asm/mtrr.h>
35#include <asm/mpspec.h>
36#include <asm/hpet.h>
37#include <asm/pgalloc.h>
38#include <asm/nmi.h>
39#include <asm/idle.h>
40#include <asm/proto.h>
41#include <asm/timex.h>
42#include <asm/apic.h>
43#include <asm/i8259.h>
44
45#include <mach_ipi.h>
46#include <mach_apic.h>
47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
49static int disable_apic_timer __cpuinitdata;
50static int apic_calibrate_pmtmr __initdata;
51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
57
58/* Local APIC timer works in C2 */
59int local_apic_timer_c2_ok;
60EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
61
62/*
63 * Debug level, exported for io_apic.c
64 */
65unsigned int apic_verbosity;
66
67/* Have we found an MP table */
68int smp_found_config;
69
70static struct resource lapic_resource = {
71 .name = "Local APIC",
72 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
73};
74
75static unsigned int calibration_result;
76
77static int lapic_next_event(unsigned long delta,
78 struct clock_event_device *evt);
79static void lapic_timer_setup(enum clock_event_mode mode,
80 struct clock_event_device *evt);
81static void lapic_timer_broadcast(cpumask_t mask);
82static void apic_pm_activate(void);
83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
87static struct clock_event_device lapic_clockevent = {
88 .name = "lapic",
89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
90 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
91 .shift = 32,
92 .set_mode = lapic_timer_setup,
93 .set_next_event = lapic_next_event,
94 .broadcast = lapic_timer_broadcast,
95 .rating = 100,
96 .irq = -1,
97};
98static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
99
100static unsigned long apic_phys;
101
102unsigned long mp_lapic_addr;
103
104/*
105 * Get the LAPIC version
106 */
107static inline int lapic_get_version(void)
108{
109 return GET_APIC_VERSION(apic_read(APIC_LVR));
110}
111
112/*
113 * Check, if the APIC is integrated or a separate chip
114 */
115static inline int lapic_is_integrated(void)
116{
117#ifdef CONFIG_X86_64
118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
122}
123
124/*
125 * Check, whether this is a modern or a first generation APIC
126 */
127static int modern_apic(void)
128{
129 /* AMD systems use old APIC versions, so check the CPU */
130 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
131 boot_cpu_data.x86 >= 0xf)
132 return 1;
133 return lapic_get_version() >= 0x14;
134}
135
136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
142{
143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
144 cpu_relax();
145}
146
147u32 safe_xapic_wait_icr_idle(void)
148{
149 u32 send_status;
150 int timeout;
151
152 timeout = 0;
153 do {
154 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
155 if (!send_status)
156 break;
157 udelay(100);
158 } while (timeout++ < 1000);
159
160 return send_status;
161}
162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
225/**
226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
227 */
228void __cpuinit enable_NMI_through_LVT0(void)
229{
230 unsigned int v;
231
232 /* unmask and set to NMI */
233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
239 apic_write(APIC_LVT0, v);
240}
241
242/**
243 * lapic_get_maxlvt - get the maximum number of local vector table entries
244 */
245int lapic_get_maxlvt(void)
246{
247 unsigned int v;
248
249 v = apic_read(APIC_LVR);
250 /*
251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
255}
256
257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
269 * This function sets up the local APIC timer, with a timeout of
270 * 'clocks' APIC bus clock. During calibration we actually call
271 * this function twice on the boot CPU, once with a bogus timeout
272 * value, second time for real. The other (noncalibrating) CPUs
273 * call this function only once, with the real, calibrated value.
274 *
275 * We do reads before writes even if unnecessary, to get around the
276 * P5 APIC double write bug.
277 */
278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
279{
280 unsigned int lvtt_value, tmp_value;
281
282 lvtt_value = LOCAL_TIMER_VECTOR;
283 if (!oneshot)
284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
288 if (!irqen)
289 lvtt_value |= APIC_LVT_MASKED;
290
291 apic_write(APIC_LVTT, lvtt_value);
292
293 /*
294 * Divide PICLK by 16
295 */
296 tmp_value = apic_read(APIC_TDCR);
297 apic_write(APIC_TDCR,
298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
299 APIC_TDR_DIV_16);
300
301 if (!oneshot)
302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
303}
304
305/*
306 * Setup extended LVT, AMD specific (K8, family 10h)
307 *
308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
309 * MCE interrupts are supported. Thus MCE offset must be set to 0.
310 *
311 * If mask=1, the LVT entry does not generate interrupts while mask=0
312 * enables the vector. See also the BKDGs.
313 */
314
315#define APIC_EILVT_LVTOFF_MCE 0
316#define APIC_EILVT_LVTOFF_IBS 1
317
318static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
319{
320 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
321 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
322
323 apic_write(reg, v);
324}
325
326u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
327{
328 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
329 return APIC_EILVT_LVTOFF_MCE;
330}
331
332u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
333{
334 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
335 return APIC_EILVT_LVTOFF_IBS;
336}
337EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
338
339/*
340 * Program the next event, relative to now
341 */
342static int lapic_next_event(unsigned long delta,
343 struct clock_event_device *evt)
344{
345 apic_write(APIC_TMICT, delta);
346 return 0;
347}
348
349/*
350 * Setup the lapic timer in periodic or oneshot mode
351 */
352static void lapic_timer_setup(enum clock_event_mode mode,
353 struct clock_event_device *evt)
354{
355 unsigned long flags;
356 unsigned int v;
357
358 /* Lapic used as dummy for broadcast ? */
359 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
360 return;
361
362 local_irq_save(flags);
363
364 switch (mode) {
365 case CLOCK_EVT_MODE_PERIODIC:
366 case CLOCK_EVT_MODE_ONESHOT:
367 __setup_APIC_LVTT(calibration_result,
368 mode != CLOCK_EVT_MODE_PERIODIC, 1);
369 break;
370 case CLOCK_EVT_MODE_UNUSED:
371 case CLOCK_EVT_MODE_SHUTDOWN:
372 v = apic_read(APIC_LVTT);
373 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
374 apic_write(APIC_LVTT, v);
375 break;
376 case CLOCK_EVT_MODE_RESUME:
377 /* Nothing to do here */
378 break;
379 }
380
381 local_irq_restore(flags);
382}
383
384/*
385 * Local APIC timer broadcast function
386 */
387static void lapic_timer_broadcast(cpumask_t mask)
388{
389#ifdef CONFIG_SMP
390 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
391#endif
392}
393
394/*
395 * Setup the local APIC timer for this CPU. Copy the initilized values
396 * of the boot CPU and register the clock event in the framework.
397 */
398static void setup_APIC_timer(void)
399{
400 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
401
402 memcpy(levt, &lapic_clockevent, sizeof(*levt));
403 levt->cpumask = cpumask_of_cpu(smp_processor_id());
404
405 clockevents_register_device(levt);
406}
407
408/*
409 * In this function we calibrate APIC bus clocks to the external
410 * timer. Unfortunately we cannot use jiffies and the timer irq
411 * to calibrate, since some later bootup code depends on getting
412 * the first irq? Ugh.
413 *
414 * We want to do the calibration only once since we
415 * want to have local timer irqs syncron. CPUs connected
416 * by the same APIC bus have the very same bus frequency.
417 * And we want to have irqs off anyways, no accidental
418 * APIC irq that way.
419 */
420
421#define TICK_COUNT 100000000
422
423static int __init calibrate_APIC_clock(void)
424{
425 unsigned apic, apic_start;
426 unsigned long tsc, tsc_start;
427 int result;
428
429 local_irq_disable();
430
431 /*
432 * Put whatever arbitrary (but long enough) timeout
433 * value into the APIC clock, we just want to get the
434 * counter running for calibration.
435 *
436 * No interrupt enable !
437 */
438 __setup_APIC_LVTT(250000000, 0, 0);
439
440 apic_start = apic_read(APIC_TMCCT);
441#ifdef CONFIG_X86_PM_TIMER
442 if (apic_calibrate_pmtmr && pmtmr_ioport) {
443 pmtimer_wait(5000); /* 5ms wait */
444 apic = apic_read(APIC_TMCCT);
445 result = (apic_start - apic) * 1000L / 5;
446 } else
447#endif
448 {
449 rdtscll(tsc_start);
450
451 do {
452 apic = apic_read(APIC_TMCCT);
453 rdtscll(tsc);
454 } while ((tsc - tsc_start) < TICK_COUNT &&
455 (apic_start - apic) < TICK_COUNT);
456
457 result = (apic_start - apic) * 1000L * tsc_khz /
458 (tsc - tsc_start);
459 }
460
461 local_irq_enable();
462
463 printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
464
465 printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
466 result / 1000 / 1000, result / 1000 % 1000);
467
468 /* Calculate the scaled math multiplication factor */
469 lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
470 lapic_clockevent.shift);
471 lapic_clockevent.max_delta_ns =
472 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
473 lapic_clockevent.min_delta_ns =
474 clockevent_delta2ns(0xF, &lapic_clockevent);
475
476 calibration_result = (result * APIC_DIVISOR) / HZ;
477
478 /*
479 * Do a sanity check on the APIC calibration result
480 */
481 if (calibration_result < (1000000 / HZ)) {
482 printk(KERN_WARNING
483 "APIC frequency too slow, disabling apic timer\n");
484 return -1;
485 }
486
487 return 0;
488}
489
490/*
491 * Setup the boot APIC
492 *
493 * Calibrate and verify the result.
494 */
495void __init setup_boot_APIC_clock(void)
496{
497 /*
498 * The local apic timer can be disabled via the kernel
499 * commandline or from the CPU detection code. Register the lapic
500 * timer as a dummy clock event source on SMP systems, so the
501 * broadcast mechanism is used. On UP systems simply ignore it.
502 */
503 if (disable_apic_timer) {
504 printk(KERN_INFO "Disabling APIC timer\n");
505 /* No broadcast on UP ! */
506 if (num_possible_cpus() > 1) {
507 lapic_clockevent.mult = 1;
508 setup_APIC_timer();
509 }
510 return;
511 }
512
513 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
514 "calibrating APIC timer ...\n");
515
516 if (calibrate_APIC_clock()) {
517 /* No broadcast on UP ! */
518 if (num_possible_cpus() > 1)
519 setup_APIC_timer();
520 return;
521 }
522
523 /*
524 * If nmi_watchdog is set to IO_APIC, we need the
525 * PIT/HPET going. Otherwise register lapic as a dummy
526 * device.
527 */
528 if (nmi_watchdog != NMI_IO_APIC)
529 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
530 else
531 printk(KERN_WARNING "APIC timer registered as dummy,"
532 " due to nmi_watchdog=%d!\n", nmi_watchdog);
533
534 /* Setup the lapic or request the broadcast */
535 setup_APIC_timer();
536}
537
538void __cpuinit setup_secondary_APIC_clock(void)
539{
540 setup_APIC_timer();
541}
542
543/*
544 * The guts of the apic timer interrupt
545 */
546static void local_apic_timer_interrupt(void)
547{
548 int cpu = smp_processor_id();
549 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
550
551 /*
552 * Normally we should not be here till LAPIC has been initialized but
553 * in some cases like kdump, its possible that there is a pending LAPIC
554 * timer interrupt from previous kernel's context and is delivered in
555 * new kernel the moment interrupts are enabled.
556 *
557 * Interrupts are enabled early and LAPIC is setup much later, hence
558 * its possible that when we get here evt->event_handler is NULL.
559 * Check for event_handler being NULL and discard the interrupt as
560 * spurious.
561 */
562 if (!evt->event_handler) {
563 printk(KERN_WARNING
564 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
565 /* Switch it off */
566 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
567 return;
568 }
569
570 /*
571 * the NMI deadlock-detector uses this.
572 */
573#ifdef CONFIG_X86_64
574 add_pda(apic_timer_irqs, 1);
575#else
576 per_cpu(irq_stat, cpu).apic_timer_irqs++;
577#endif
578
579 evt->event_handler(evt);
580}
581
582/*
583 * Local APIC timer interrupt. This is the most natural way for doing
584 * local interrupts, but local timer interrupts can be emulated by
585 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
586 *
587 * [ if a single-CPU system runs an SMP kernel then we call the local
588 * interrupt as well. Thus we cannot inline the local irq ... ]
589 */
590void smp_apic_timer_interrupt(struct pt_regs *regs)
591{
592 struct pt_regs *old_regs = set_irq_regs(regs);
593
594 /*
595 * NOTE! We'd better ACK the irq immediately,
596 * because timer handling can be slow.
597 */
598 ack_APIC_irq();
599 /*
600 * update_process_times() expects us to have done irq_enter().
601 * Besides, if we don't timer interrupts ignore the global
602 * interrupt lock, which is the WrongThing (tm) to do.
603 */
604 exit_idle();
605 irq_enter();
606 local_apic_timer_interrupt();
607 irq_exit();
608
609 set_irq_regs(old_regs);
610}
611
612int setup_profiling_timer(unsigned int multiplier)
613{
614 return -EINVAL;
615}
616
617
618/*
619 * Local APIC start and shutdown
620 */
621
622/**
623 * clear_local_APIC - shutdown the local APIC
624 *
625 * This is called, when a CPU is disabled and before rebooting, so the state of
626 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
627 * leftovers during boot.
628 */
629void clear_local_APIC(void)
630{
631 int maxlvt;
632 u32 v;
633
634 /* APIC hasn't been mapped yet */
635 if (!apic_phys)
636 return;
637
638 maxlvt = lapic_get_maxlvt();
639 /*
640 * Masking an LVT entry can trigger a local APIC error
641 * if the vector is zero. Mask LVTERR first to prevent this.
642 */
643 if (maxlvt >= 3) {
644 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
645 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
646 }
647 /*
648 * Careful: we have to set masks only first to deassert
649 * any level-triggered sources.
650 */
651 v = apic_read(APIC_LVTT);
652 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
653 v = apic_read(APIC_LVT0);
654 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
655 v = apic_read(APIC_LVT1);
656 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
657 if (maxlvt >= 4) {
658 v = apic_read(APIC_LVTPC);
659 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
660 }
661
662 /* lets not touch this if we didn't frob it */
663#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
664 if (maxlvt >= 5) {
665 v = apic_read(APIC_LVTTHMR);
666 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
667 }
668#endif
669 /*
670 * Clean APIC state for other OSs:
671 */
672 apic_write(APIC_LVTT, APIC_LVT_MASKED);
673 apic_write(APIC_LVT0, APIC_LVT_MASKED);
674 apic_write(APIC_LVT1, APIC_LVT_MASKED);
675 if (maxlvt >= 3)
676 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
677 if (maxlvt >= 4)
678 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
679
680 /* Integrated APIC (!82489DX) ? */
681 if (lapic_is_integrated()) {
682 if (maxlvt > 3)
683 /* Clear ESR due to Pentium errata 3AP and 11AP */
684 apic_write(APIC_ESR, 0);
685 apic_read(APIC_ESR);
686 }
687}
688
689/**
690 * disable_local_APIC - clear and disable the local APIC
691 */
692void disable_local_APIC(void)
693{
694 unsigned int value;
695
696 clear_local_APIC();
697
698 /*
699 * Disable APIC (implies clearing of registers
700 * for 82489DX!).
701 */
702 value = apic_read(APIC_SPIV);
703 value &= ~APIC_SPIV_APIC_ENABLED;
704 apic_write(APIC_SPIV, value);
705
706#ifdef CONFIG_X86_32
707 /*
708 * When LAPIC was disabled by the BIOS and enabled by the kernel,
709 * restore the disabled state.
710 */
711 if (enabled_via_apicbase) {
712 unsigned int l, h;
713
714 rdmsr(MSR_IA32_APICBASE, l, h);
715 l &= ~MSR_IA32_APICBASE_ENABLE;
716 wrmsr(MSR_IA32_APICBASE, l, h);
717 }
718#endif
719}
720
721/*
722 * If Linux enabled the LAPIC against the BIOS default disable it down before
723 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
724 * not power-off. Additionally clear all LVT entries before disable_local_APIC
725 * for the case where Linux didn't enable the LAPIC.
726 */
727void lapic_shutdown(void)
728{
729 unsigned long flags;
730
731 if (!cpu_has_apic)
732 return;
733
734 local_irq_save(flags);
735
736#ifdef CONFIG_X86_32
737 if (!enabled_via_apicbase)
738 clear_local_APIC();
739 else
740#endif
741 disable_local_APIC();
742
743
744 local_irq_restore(flags);
745}
746
747/*
748 * This is to verify that we're looking at a real local APIC.
749 * Check these against your board if the CPUs aren't getting
750 * started for no apparent reason.
751 */
752int __init verify_local_APIC(void)
753{
754 unsigned int reg0, reg1;
755
756 /*
757 * The version register is read-only in a real APIC.
758 */
759 reg0 = apic_read(APIC_LVR);
760 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
761 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
762 reg1 = apic_read(APIC_LVR);
763 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
764
765 /*
766 * The two version reads above should print the same
767 * numbers. If the second one is different, then we
768 * poke at a non-APIC.
769 */
770 if (reg1 != reg0)
771 return 0;
772
773 /*
774 * Check if the version looks reasonably.
775 */
776 reg1 = GET_APIC_VERSION(reg0);
777 if (reg1 == 0x00 || reg1 == 0xff)
778 return 0;
779 reg1 = lapic_get_maxlvt();
780 if (reg1 < 0x02 || reg1 == 0xff)
781 return 0;
782
783 /*
784 * The ID register is read/write in a real APIC.
785 */
786 reg0 = apic_read(APIC_ID);
787 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
788 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
789 reg1 = apic_read(APIC_ID);
790 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
791 apic_write(APIC_ID, reg0);
792 if (reg1 != (reg0 ^ APIC_ID_MASK))
793 return 0;
794
795 /*
796 * The next two are just to see if we have sane values.
797 * They're only really relevant if we're in Virtual Wire
798 * compatibility mode, but most boxes are anymore.
799 */
800 reg0 = apic_read(APIC_LVT0);
801 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
802 reg1 = apic_read(APIC_LVT1);
803 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
804
805 return 1;
806}
807
808/**
809 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
810 */
811void __init sync_Arb_IDs(void)
812{
813 /*
814 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
815 * needed on AMD.
816 */
817 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
818 return;
819
820 /*
821 * Wait for idle.
822 */
823 apic_wait_icr_idle();
824
825 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
826 apic_write(APIC_ICR, APIC_DEST_ALLINC |
827 APIC_INT_LEVELTRIG | APIC_DM_INIT);
828}
829
830/*
831 * An initial setup of the virtual wire mode.
832 */
833void __init init_bsp_APIC(void)
834{
835 unsigned int value;
836
837 /*
838 * Don't do the setup now if we have a SMP BIOS as the
839 * through-I/O-APIC virtual wire mode might be active.
840 */
841 if (smp_found_config || !cpu_has_apic)
842 return;
843
844 /*
845 * Do not trust the local APIC being empty at bootup.
846 */
847 clear_local_APIC();
848
849 /*
850 * Enable APIC.
851 */
852 value = apic_read(APIC_SPIV);
853 value &= ~APIC_VECTOR_MASK;
854 value |= APIC_SPIV_APIC_ENABLED;
855
856#ifdef CONFIG_X86_32
857 /* This bit is reserved on P4/Xeon and should be cleared */
858 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
859 (boot_cpu_data.x86 == 15))
860 value &= ~APIC_SPIV_FOCUS_DISABLED;
861 else
862#endif
863 value |= APIC_SPIV_FOCUS_DISABLED;
864 value |= SPURIOUS_APIC_VECTOR;
865 apic_write(APIC_SPIV, value);
866
867 /*
868 * Set up the virtual wire mode.
869 */
870 apic_write(APIC_LVT0, APIC_DM_EXTINT);
871 value = APIC_DM_NMI;
872 if (!lapic_is_integrated()) /* 82489DX */
873 value |= APIC_LVT_LEVEL_TRIGGER;
874 apic_write(APIC_LVT1, value);
875}
876
877static void __cpuinit lapic_setup_esr(void)
878{
879 unsigned long oldvalue, value, maxlvt;
880 if (lapic_is_integrated() && !esr_disable) {
881 if (esr_disable) {
882 /*
883 * Something untraceable is creating bad interrupts on
884 * secondary quads ... for the moment, just leave the
885 * ESR disabled - we can't do anything useful with the
886 * errors anyway - mbligh
887 */
888 printk(KERN_INFO "Leaving ESR disabled.\n");
889 return;
890 }
891 /* !82489DX */
892 maxlvt = lapic_get_maxlvt();
893 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
894 apic_write(APIC_ESR, 0);
895 oldvalue = apic_read(APIC_ESR);
896
897 /* enables sending errors */
898 value = ERROR_APIC_VECTOR;
899 apic_write(APIC_LVTERR, value);
900 /*
901 * spec says clear errors after enabling vector.
902 */
903 if (maxlvt > 3)
904 apic_write(APIC_ESR, 0);
905 value = apic_read(APIC_ESR);
906 if (value != oldvalue)
907 apic_printk(APIC_VERBOSE, "ESR value before enabling "
908 "vector: 0x%08lx after: 0x%08lx\n",
909 oldvalue, value);
910 } else {
911 printk(KERN_INFO "No ESR for 82489DX.\n");
912 }
913}
914
915
916/**
917 * setup_local_APIC - setup the local APIC
918 */
919void __cpuinit setup_local_APIC(void)
920{
921 unsigned int value;
922 int i, j;
923
924 preempt_disable();
925 value = apic_read(APIC_LVR);
926
927 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
928
929 /*
930 * Double-check whether this APIC is really registered.
931 * This is meaningless in clustered apic mode, so we skip it.
932 */
933 if (!apic_id_registered())
934 BUG();
935
936 /*
937 * Intel recommends to set DFR, LDR and TPR before enabling
938 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
939 * document number 292116). So here it goes...
940 */
941 init_apic_ldr();
942
943 /*
944 * Set Task Priority to 'accept all'. We never change this
945 * later on.
946 */
947 value = apic_read(APIC_TASKPRI);
948 value &= ~APIC_TPRI_MASK;
949 apic_write(APIC_TASKPRI, value);
950
951 /*
952 * After a crash, we no longer service the interrupts and a pending
953 * interrupt from previous kernel might still have ISR bit set.
954 *
955 * Most probably by now CPU has serviced that pending interrupt and
956 * it might not have done the ack_APIC_irq() because it thought,
957 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
958 * does not clear the ISR bit and cpu thinks it has already serivced
959 * the interrupt. Hence a vector might get locked. It was noticed
960 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
961 */
962 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
963 value = apic_read(APIC_ISR + i*0x10);
964 for (j = 31; j >= 0; j--) {
965 if (value & (1<<j))
966 ack_APIC_irq();
967 }
968 }
969
970 /*
971 * Now that we are all set up, enable the APIC
972 */
973 value = apic_read(APIC_SPIV);
974 value &= ~APIC_VECTOR_MASK;
975 /*
976 * Enable APIC
977 */
978 value |= APIC_SPIV_APIC_ENABLED;
979
980 /* We always use processor focus */
981
982 /*
983 * Set spurious IRQ vector
984 */
985 value |= SPURIOUS_APIC_VECTOR;
986 apic_write(APIC_SPIV, value);
987
988 /*
989 * Set up LVT0, LVT1:
990 *
991 * set up through-local-APIC on the BP's LINT0. This is not
992 * strictly necessary in pure symmetric-IO mode, but sometimes
993 * we delegate interrupts to the 8259A.
994 */
995 /*
996 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
997 */
998 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
999 if (!smp_processor_id() && !value) {
1000 value = APIC_DM_EXTINT;
1001 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
1002 smp_processor_id());
1003 } else {
1004 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1005 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
1006 smp_processor_id());
1007 }
1008 apic_write(APIC_LVT0, value);
1009
1010 /*
1011 * only the BP should see the LINT1 NMI signal, obviously.
1012 */
1013 if (!smp_processor_id())
1014 value = APIC_DM_NMI;
1015 else
1016 value = APIC_DM_NMI | APIC_LVT_MASKED;
1017 apic_write(APIC_LVT1, value);
1018 preempt_enable();
1019}
1020
1021void __cpuinit end_local_APIC_setup(void)
1022{
1023 lapic_setup_esr();
1024
1025#ifdef CONFIG_X86_32
1026 {
1027 unsigned int value;
1028 /* Disable the local apic timer */
1029 value = apic_read(APIC_LVTT);
1030 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1031 apic_write(APIC_LVTT, value);
1032 }
1033#endif
1034
1035 setup_apic_nmi_watchdog(NULL);
1036 apic_pm_activate();
1037}
1038
1039void check_x2apic(void)
1040{
1041 int msr, msr2;
1042
1043 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1044
1045 if (msr & X2APIC_ENABLE) {
1046 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1047 x2apic_preenabled = x2apic = 1;
1048 apic_ops = &x2apic_ops;
1049 }
1050}
1051
1052void enable_x2apic(void)
1053{
1054 int msr, msr2;
1055
1056 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1057 if (!(msr & X2APIC_ENABLE)) {
1058 printk("Enabling x2apic\n");
1059 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1060 }
1061}
1062
1063void enable_IR_x2apic(void)
1064{
1065#ifdef CONFIG_INTR_REMAP
1066 int ret;
1067 unsigned long flags;
1068
1069 if (!cpu_has_x2apic)
1070 return;
1071
1072 if (!x2apic_preenabled && disable_x2apic) {
1073 printk(KERN_INFO
1074 "Skipped enabling x2apic and Interrupt-remapping "
1075 "because of nox2apic\n");
1076 return;
1077 }
1078
1079 if (x2apic_preenabled && disable_x2apic)
1080 panic("Bios already enabled x2apic, can't enforce nox2apic");
1081
1082 if (!x2apic_preenabled && skip_ioapic_setup) {
1083 printk(KERN_INFO
1084 "Skipped enabling x2apic and Interrupt-remapping "
1085 "because of skipping io-apic setup\n");
1086 return;
1087 }
1088
1089 ret = dmar_table_init();
1090 if (ret) {
1091 printk(KERN_INFO
1092 "dmar_table_init() failed with %d:\n", ret);
1093
1094 if (x2apic_preenabled)
1095 panic("x2apic enabled by bios. But IR enabling failed");
1096 else
1097 printk(KERN_INFO
1098 "Not enabling x2apic,Intr-remapping\n");
1099 return;
1100 }
1101
1102 local_irq_save(flags);
1103 mask_8259A();
1104 save_mask_IO_APIC_setup();
1105
1106 ret = enable_intr_remapping(1);
1107
1108 if (ret && x2apic_preenabled) {
1109 local_irq_restore(flags);
1110 panic("x2apic enabled by bios. But IR enabling failed");
1111 }
1112
1113 if (ret)
1114 goto end;
1115
1116 if (!x2apic) {
1117 x2apic = 1;
1118 apic_ops = &x2apic_ops;
1119 enable_x2apic();
1120 }
1121end:
1122 if (ret)
1123 /*
1124 * IR enabling failed
1125 */
1126 restore_IO_APIC_setup();
1127 else
1128 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1129
1130 unmask_8259A();
1131 local_irq_restore(flags);
1132
1133 if (!ret) {
1134 if (!x2apic_preenabled)
1135 printk(KERN_INFO
1136 "Enabled x2apic and interrupt-remapping\n");
1137 else
1138 printk(KERN_INFO
1139 "Enabled Interrupt-remapping\n");
1140 } else
1141 printk(KERN_ERR
1142 "Failed to enable Interrupt-remapping and x2apic\n");
1143#else
1144 if (!cpu_has_x2apic)
1145 return;
1146
1147 if (x2apic_preenabled)
1148 panic("x2apic enabled prior OS handover,"
1149 " enable CONFIG_INTR_REMAP");
1150
1151 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1152 " and x2apic\n");
1153#endif
1154
1155 return;
1156}
1157
1158/*
1159 * Detect and enable local APICs on non-SMP boards.
1160 * Original code written by Keir Fraser.
1161 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1162 * not correctly set up (usually the APIC timer won't work etc.)
1163 */
1164static int __init detect_init_APIC(void)
1165{
1166 if (!cpu_has_apic) {
1167 printk(KERN_INFO "No local APIC present\n");
1168 return -1;
1169 }
1170
1171 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1172 boot_cpu_physical_apicid = 0;
1173 return 0;
1174}
1175
1176void __init early_init_lapic_mapping(void)
1177{
1178 unsigned long phys_addr;
1179
1180 /*
1181 * If no local APIC can be found then go out
1182 * : it means there is no mpatable and MADT
1183 */
1184 if (!smp_found_config)
1185 return;
1186
1187 phys_addr = mp_lapic_addr;
1188
1189 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1190 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1191 APIC_BASE, phys_addr);
1192
1193 /*
1194 * Fetch the APIC ID of the BSP in case we have a
1195 * default configuration (or the MP table is broken).
1196 */
1197 boot_cpu_physical_apicid = read_apic_id();
1198}
1199
1200/**
1201 * init_apic_mappings - initialize APIC mappings
1202 */
1203void __init init_apic_mappings(void)
1204{
1205 if (x2apic) {
1206 boot_cpu_physical_apicid = read_apic_id();
1207 return;
1208 }
1209
1210 /*
1211 * If no local APIC can be found then set up a fake all
1212 * zeroes page to simulate the local APIC and another
1213 * one for the IO-APIC.
1214 */
1215 if (!smp_found_config && detect_init_APIC()) {
1216 apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
1217 apic_phys = __pa(apic_phys);
1218 } else
1219 apic_phys = mp_lapic_addr;
1220
1221 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1222 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1223 APIC_BASE, apic_phys);
1224
1225 /*
1226 * Fetch the APIC ID of the BSP in case we have a
1227 * default configuration (or the MP table is broken).
1228 */
1229 boot_cpu_physical_apicid = read_apic_id();
1230}
1231
1232/*
1233 * This initializes the IO-APIC and APIC hardware if this is
1234 * a UP kernel.
1235 */
1236int apic_version[MAX_APICS];
1237
1238int __init APIC_init_uniprocessor(void)
1239{
1240 if (disable_apic) {
1241 printk(KERN_INFO "Apic disabled\n");
1242 return -1;
1243 }
1244 if (!cpu_has_apic) {
1245 disable_apic = 1;
1246 printk(KERN_INFO "Apic disabled by BIOS\n");
1247 return -1;
1248 }
1249
1250 enable_IR_x2apic();
1251 setup_apic_routing();
1252
1253 verify_local_APIC();
1254
1255 connect_bsp_APIC();
1256
1257 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1258 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1259
1260 setup_local_APIC();
1261
1262 /*
1263 * Now enable IO-APICs, actually call clear_IO_APIC
1264 * We need clear_IO_APIC before enabling vector on BP
1265 */
1266 if (!skip_ioapic_setup && nr_ioapics)
1267 enable_IO_APIC();
1268
1269 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1270 localise_nmi_watchdog();
1271 end_local_APIC_setup();
1272
1273 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1274 setup_IO_APIC();
1275 else
1276 nr_ioapics = 0;
1277 setup_boot_APIC_clock();
1278 check_nmi_watchdog();
1279 return 0;
1280}
1281
1282/*
1283 * Local APIC interrupts
1284 */
1285
1286/*
1287 * This interrupt should _never_ happen with our APIC/SMP architecture
1288 */
1289asmlinkage void smp_spurious_interrupt(void)
1290{
1291 unsigned int v;
1292 exit_idle();
1293 irq_enter();
1294 /*
1295 * Check if this really is a spurious interrupt and ACK it
1296 * if it is a vectored one. Just in case...
1297 * Spurious interrupts should not be ACKed.
1298 */
1299 v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1300 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1301 ack_APIC_irq();
1302
1303 add_pda(irq_spurious_count, 1);
1304 irq_exit();
1305}
1306
1307/*
1308 * This interrupt should never happen with our APIC/SMP architecture
1309 */
1310asmlinkage void smp_error_interrupt(void)
1311{
1312 unsigned int v, v1;
1313
1314 exit_idle();
1315 irq_enter();
1316 /* First tickle the hardware, only then report what went on. -- REW */
1317 v = apic_read(APIC_ESR);
1318 apic_write(APIC_ESR, 0);
1319 v1 = apic_read(APIC_ESR);
1320 ack_APIC_irq();
1321 atomic_inc(&irq_err_count);
1322
1323 /* Here is what the APIC error bits mean:
1324 0: Send CS error
1325 1: Receive CS error
1326 2: Send accept error
1327 3: Receive accept error
1328 4: Reserved
1329 5: Send illegal vector
1330 6: Received illegal vector
1331 7: Illegal register address
1332 */
1333 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1334 smp_processor_id(), v , v1);
1335 irq_exit();
1336}
1337
1338/**
1339 * connect_bsp_APIC - attach the APIC to the interrupt system
1340 */
1341void __init connect_bsp_APIC(void)
1342{
1343#ifdef CONFIG_X86_32
1344 if (pic_mode) {
1345 /*
1346 * Do not trust the local APIC being empty at bootup.
1347 */
1348 clear_local_APIC();
1349 /*
1350 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1351 * local APIC to INT and NMI lines.
1352 */
1353 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1354 "enabling APIC mode.\n");
1355 outb(0x70, 0x22);
1356 outb(0x01, 0x23);
1357 }
1358#endif
1359 enable_apic_mode();
1360}
1361
1362/**
1363 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1364 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1365 *
1366 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1367 * APIC is disabled.
1368 */
1369void disconnect_bsp_APIC(int virt_wire_setup)
1370{
1371 unsigned int value;
1372
1373#ifdef CONFIG_X86_32
1374 if (pic_mode) {
1375 /*
1376 * Put the board back into PIC mode (has an effect only on
1377 * certain older boards). Note that APIC interrupts, including
1378 * IPIs, won't work beyond this point! The only exception are
1379 * INIT IPIs.
1380 */
1381 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1382 "entering PIC mode.\n");
1383 outb(0x70, 0x22);
1384 outb(0x00, 0x23);
1385 return;
1386 }
1387#endif
1388
1389 /* Go back to Virtual Wire compatibility mode */
1390
1391 /* For the spurious interrupt use vector F, and enable it */
1392 value = apic_read(APIC_SPIV);
1393 value &= ~APIC_VECTOR_MASK;
1394 value |= APIC_SPIV_APIC_ENABLED;
1395 value |= 0xf;
1396 apic_write(APIC_SPIV, value);
1397
1398 if (!virt_wire_setup) {
1399 /*
1400 * For LVT0 make it edge triggered, active high,
1401 * external and enabled
1402 */
1403 value = apic_read(APIC_LVT0);
1404 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1405 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1406 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1407 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1408 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1409 apic_write(APIC_LVT0, value);
1410 } else {
1411 /* Disable LVT0 */
1412 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1413 }
1414
1415 /*
1416 * For LVT1 make it edge triggered, active high,
1417 * nmi and enabled
1418 */
1419 value = apic_read(APIC_LVT1);
1420 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1421 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1422 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1423 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1424 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1425 apic_write(APIC_LVT1, value);
1426}
1427
1428void __cpuinit generic_processor_info(int apicid, int version)
1429{
1430 int cpu;
1431 cpumask_t tmp_map;
1432
1433 /*
1434 * Validate version
1435 */
1436 if (version == 0x0) {
1437 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1438 "fixing up to 0x10. (tell your hw vendor)\n",
1439 version);
1440 version = 0x10;
1441 }
1442 apic_version[apicid] = version;
1443
1444 if (num_processors >= NR_CPUS) {
1445 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1446 " Processor ignored.\n", NR_CPUS);
1447 return;
1448 }
1449
1450 num_processors++;
1451 cpus_complement(tmp_map, cpu_present_map);
1452 cpu = first_cpu(tmp_map);
1453
1454 physid_set(apicid, phys_cpu_present_map);
1455 if (apicid == boot_cpu_physical_apicid) {
1456 /*
1457 * x86_bios_cpu_apicid is required to have processors listed
1458 * in same order as logical cpu numbers. Hence the first
1459 * entry is BSP, and so on.
1460 */
1461 cpu = 0;
1462 }
1463 if (apicid > max_physical_apicid)
1464 max_physical_apicid = apicid;
1465
1466#ifdef CONFIG_X86_32
1467 /*
1468 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1469 * but we need to work other dependencies like SMP_SUSPEND etc
1470 * before this can be done without some confusion.
1471 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1472 * - Ashok Raj <ashok.raj@intel.com>
1473 */
1474 if (max_physical_apicid >= 8) {
1475 switch (boot_cpu_data.x86_vendor) {
1476 case X86_VENDOR_INTEL:
1477 if (!APIC_XAPIC(version)) {
1478 def_to_bigsmp = 0;
1479 break;
1480 }
1481 /* If P4 and above fall through */
1482 case X86_VENDOR_AMD:
1483 def_to_bigsmp = 1;
1484 }
1485 }
1486#endif
1487
1488#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1489 /* are we being called early in kernel startup? */
1490 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1491 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1492 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1493
1494 cpu_to_apicid[cpu] = apicid;
1495 bios_cpu_apicid[cpu] = apicid;
1496 } else {
1497 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1498 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1499 }
1500#endif
1501
1502 cpu_set(cpu, cpu_possible_map);
1503 cpu_set(cpu, cpu_present_map);
1504}
1505
1506int hard_smp_processor_id(void)
1507{
1508 return read_apic_id();
1509}
1510
1511/*
1512 * Power management
1513 */
1514#ifdef CONFIG_PM
1515
1516static struct {
1517 /*
1518 * 'active' is true if the local APIC was enabled by us and
1519 * not the BIOS; this signifies that we are also responsible
1520 * for disabling it before entering apm/acpi suspend
1521 */
1522 int active;
1523 /* r/w apic fields */
1524 unsigned int apic_id;
1525 unsigned int apic_taskpri;
1526 unsigned int apic_ldr;
1527 unsigned int apic_dfr;
1528 unsigned int apic_spiv;
1529 unsigned int apic_lvtt;
1530 unsigned int apic_lvtpc;
1531 unsigned int apic_lvt0;
1532 unsigned int apic_lvt1;
1533 unsigned int apic_lvterr;
1534 unsigned int apic_tmict;
1535 unsigned int apic_tdcr;
1536 unsigned int apic_thmr;
1537} apic_pm_state;
1538
1539static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1540{
1541 unsigned long flags;
1542 int maxlvt;
1543
1544 if (!apic_pm_state.active)
1545 return 0;
1546
1547 maxlvt = lapic_get_maxlvt();
1548
1549 apic_pm_state.apic_id = apic_read(APIC_ID);
1550 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1551 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1552 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
1553 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
1554 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
1555 if (maxlvt >= 4)
1556 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
1557 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
1558 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
1559 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1560 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1561 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1562#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1563 if (maxlvt >= 5)
1564 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1565#endif
1566
1567 local_irq_save(flags);
1568 disable_local_APIC();
1569 local_irq_restore(flags);
1570 return 0;
1571}
1572
1573static int lapic_resume(struct sys_device *dev)
1574{
1575 unsigned int l, h;
1576 unsigned long flags;
1577 int maxlvt;
1578
1579 if (!apic_pm_state.active)
1580 return 0;
1581
1582 maxlvt = lapic_get_maxlvt();
1583
1584 local_irq_save(flags);
1585
1586#ifdef CONFIG_X86_64
1587 if (x2apic)
1588 enable_x2apic();
1589 else
1590#endif
1591 {
1592 /*
1593 * Make sure the APICBASE points to the right address
1594 *
1595 * FIXME! This will be wrong if we ever support suspend on
1596 * SMP! We'll need to do this as part of the CPU restore!
1597 */
1598 rdmsr(MSR_IA32_APICBASE, l, h);
1599 l &= ~MSR_IA32_APICBASE_BASE;
1600 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1601 wrmsr(MSR_IA32_APICBASE, l, h);
1602 }
1603
1604 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1605 apic_write(APIC_ID, apic_pm_state.apic_id);
1606 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
1607 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
1608 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
1609 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1610 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1611 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1612#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1613 if (maxlvt >= 5)
1614 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1615#endif
1616 if (maxlvt >= 4)
1617 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
1618 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
1619 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
1620 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
1621 apic_write(APIC_ESR, 0);
1622 apic_read(APIC_ESR);
1623 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1624 apic_write(APIC_ESR, 0);
1625 apic_read(APIC_ESR);
1626
1627 local_irq_restore(flags);
1628
1629 return 0;
1630}
1631
1632/*
1633 * This device has no shutdown method - fully functioning local APICs
1634 * are needed on every CPU up until machine_halt/restart/poweroff.
1635 */
1636
1637static struct sysdev_class lapic_sysclass = {
1638 .name = "lapic",
1639 .resume = lapic_resume,
1640 .suspend = lapic_suspend,
1641};
1642
1643static struct sys_device device_lapic = {
1644 .id = 0,
1645 .cls = &lapic_sysclass,
1646};
1647
1648static void __cpuinit apic_pm_activate(void)
1649{
1650 apic_pm_state.active = 1;
1651}
1652
1653static int __init init_lapic_sysfs(void)
1654{
1655 int error;
1656
1657 if (!cpu_has_apic)
1658 return 0;
1659 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
1660
1661 error = sysdev_class_register(&lapic_sysclass);
1662 if (!error)
1663 error = sysdev_register(&device_lapic);
1664 return error;
1665}
1666device_initcall(init_lapic_sysfs);
1667
1668#else /* CONFIG_PM */
1669
1670static void apic_pm_activate(void) { }
1671
1672#endif /* CONFIG_PM */
1673
1674/*
1675 * apic_is_clustered_box() -- Check if we can expect good TSC
1676 *
1677 * Thus far, the major user of this is IBM's Summit2 series:
1678 *
1679 * Clustered boxes may have unsynced TSC problems if they are
1680 * multi-chassis. Use available data to take a good guess.
1681 * If in doubt, go HPET.
1682 */
1683__cpuinit int apic_is_clustered_box(void)
1684{
1685 int i, clusters, zeros;
1686 unsigned id;
1687 u16 *bios_cpu_apicid;
1688 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
1689
1690 /*
1691 * there is not this kind of box with AMD CPU yet.
1692 * Some AMD box with quadcore cpu and 8 sockets apicid
1693 * will be [4, 0x23] or [8, 0x27] could be thought to
1694 * vsmp box still need checking...
1695 */
1696 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
1697 return 0;
1698
1699 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1700 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
1701
1702 for (i = 0; i < NR_CPUS; i++) {
1703 /* are we being called early in kernel startup? */
1704 if (bios_cpu_apicid) {
1705 id = bios_cpu_apicid[i];
1706 }
1707 else if (i < nr_cpu_ids) {
1708 if (cpu_present(i))
1709 id = per_cpu(x86_bios_cpu_apicid, i);
1710 else
1711 continue;
1712 }
1713 else
1714 break;
1715
1716 if (id != BAD_APICID)
1717 __set_bit(APIC_CLUSTERID(id), clustermap);
1718 }
1719
1720 /* Problem: Partially populated chassis may not have CPUs in some of
1721 * the APIC clusters they have been allocated. Only present CPUs have
1722 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
1723 * Since clusters are allocated sequentially, count zeros only if
1724 * they are bounded by ones.
1725 */
1726 clusters = 0;
1727 zeros = 0;
1728 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
1729 if (test_bit(i, clustermap)) {
1730 clusters += 1 + zeros;
1731 zeros = 0;
1732 } else
1733 ++zeros;
1734 }
1735
1736 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
1737 * not guaranteed to be synced between boards
1738 */
1739 if (is_vsmp_box() && clusters > 1)
1740 return 1;
1741
1742 /*
1743 * If clusters > 2, then should be multi-chassis.
1744 * May have to revisit this when multi-core + hyperthreaded CPUs come
1745 * out, but AFAIK this will work even for them.
1746 */
1747 return (clusters > 2);
1748}
1749
1750static __init int setup_nox2apic(char *str)
1751{
1752 disable_x2apic = 1;
1753 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1754 return 0;
1755}
1756early_param("nox2apic", setup_nox2apic);
1757
1758
1759/*
1760 * APIC command line parameters
1761 */
1762static int __init setup_disableapic(char *arg)
1763{
1764 disable_apic = 1;
1765 setup_clear_cpu_cap(X86_FEATURE_APIC);
1766 return 0;
1767}
1768early_param("disableapic", setup_disableapic);
1769
1770/* same as disableapic, for compatibility */
1771static int __init setup_nolapic(char *arg)
1772{
1773 return setup_disableapic(arg);
1774}
1775early_param("nolapic", setup_nolapic);
1776
1777static int __init parse_lapic_timer_c2_ok(char *arg)
1778{
1779 local_apic_timer_c2_ok = 1;
1780 return 0;
1781}
1782early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1783
1784static int __init parse_disable_apic_timer(char *arg)
1785{
1786 disable_apic_timer = 1;
1787 return 0;
1788}
1789early_param("noapictimer", parse_disable_apic_timer);
1790
1791static int __init parse_nolapic_timer(char *arg)
1792{
1793 disable_apic_timer = 1;
1794 return 0;
1795}
1796early_param("nolapic_timer", parse_nolapic_timer);
1797
1798static __init int setup_apicpmtimer(char *s)
1799{
1800 apic_calibrate_pmtmr = 1;
1801 notsc_setup(NULL);
1802 return 0;
1803}
1804__setup("apicpmtimer", setup_apicpmtimer);
1805
1806static int __init apic_set_verbosity(char *arg)
1807{
1808 if (!arg) {
1809#ifdef CONFIG_X86_64
1810 skip_ioapic_setup = 0;
1811 ioapic_force = 1;
1812 return 0;
1813#endif
1814 return -EINVAL;
1815 }
1816
1817 if (strcmp("debug", arg) == 0)
1818 apic_verbosity = APIC_DEBUG;
1819 else if (strcmp("verbose", arg) == 0)
1820 apic_verbosity = APIC_VERBOSE;
1821 else {
1822 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1823 " use apic=verbose or apic=debug\n", arg);
1824 return -EINVAL;
1825 }
1826
1827 return 0;
1828}
1829early_param("apic", apic_set_verbosity);
1830
1831static int __init lapic_insert_resource(void)
1832{
1833 if (!apic_phys)
1834 return -1;
1835
1836 /* Put local APIC into the resource map. */
1837 lapic_resource.start = apic_phys;
1838 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
1839 insert_resource(&iomem_resource, &lapic_resource);
1840
1841 return 0;
1842}
1843
1844/*
1845 * need call insert after e820_reserve_resources()
1846 * that is using request_resource
1847 */
1848late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * BIOS run time interface routines. 2 * BIOS run time interface routines.
3 * 3 *
4 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,128 @@
16 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
19 */ 20 */
20 21
22#include <linux/efi.h>
23#include <asm/efi.h>
24#include <linux/io.h>
21#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h>
27
28struct uv_systab uv_systab;
22 29
23const char * 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
24x86_bios_strerror(long status)
25{ 31{
26 const char *str; 32 struct uv_systab *tab = &uv_systab;
27 switch (status) { 33
28 case 0: str = "Call completed without error"; break; 34 if (!tab->function)
29 case -1: str = "Not implemented"; break; 35 /*
30 case -2: str = "Invalid argument"; break; 36 * BIOS does not support UV systab
31 case -3: str = "Call completed with error"; break; 37 */
32 default: str = "Unknown BIOS status code"; break; 38 return BIOS_STATUS_UNIMPLEMENTED;
33 } 39
34 return str; 40 return efi_call6((void *)__va(tab->function),
41 (u64)which, a1, a2, a3, a4, a5);
35} 42}
36 43
37long 44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
38x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, 45 u64 a4, u64 a5)
39 unsigned long *drift_info)
40{ 46{
41 struct uv_bios_retval isrv; 47 unsigned long bios_flags;
48 s64 ret;
42 49
43 BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); 50 local_irq_save(bios_flags);
44 *ticks_per_second = isrv.v0; 51 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
45 *drift_info = isrv.v1; 52 local_irq_restore(bios_flags);
46 return isrv.status; 53
54 return ret;
47} 55}
48EXPORT_SYMBOL_GPL(x86_bios_freq_base); 56
57s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
58 u64 a4, u64 a5)
59{
60 s64 ret;
61
62 preempt_disable();
63 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
64 preempt_enable();
65
66 return ret;
67}
68
69
70long sn_partition_id;
71EXPORT_SYMBOL_GPL(sn_partition_id);
72long uv_coherency_id;
73EXPORT_SYMBOL_GPL(uv_coherency_id);
74long uv_region_size;
75EXPORT_SYMBOL_GPL(uv_region_size);
76int uv_type;
77
78
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region)
81{
82 s64 ret;
83 u64 v0, v1;
84 union partition_info_u part;
85
86 ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
87 (u64)(&v0), (u64)(&v1), 0, 0);
88 if (ret != BIOS_STATUS_SUCCESS)
89 return ret;
90
91 part.val = v0;
92 if (uvtype)
93 *uvtype = part.hub_version;
94 if (partid)
95 *partid = part.partition_id;
96 if (coher)
97 *coher = part.coherence_id;
98 if (region)
99 *region = part.region_size;
100 return ret;
101}
102
103
104s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
105{
106 return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
107 (u64)ticks_per_second, 0, 0, 0);
108}
109EXPORT_SYMBOL_GPL(uv_bios_freq_base);
110
111
112#ifdef CONFIG_EFI
113void uv_bios_init(void)
114{
115 struct uv_systab *tab;
116
117 if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
118 (efi.uv_systab == (unsigned long)NULL)) {
119 printk(KERN_CRIT "No EFI UV System Table.\n");
120 uv_systab.function = (unsigned long)NULL;
121 return;
122 }
123
124 tab = (struct uv_systab *)ioremap(efi.uv_systab,
125 sizeof(struct uv_systab));
126 if (strncmp(tab->signature, "UVST", 4) != 0)
127 printk(KERN_ERR "bad signature in UV system table!");
128
129 /*
130 * Copy table to permanent spot for later use.
131 */
132 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
133 iounmap(tab);
134
135 printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
136}
137#else /* !CONFIG_EFI */
138
139void uv_bios_init(void) { }
140#endif
141
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
249 } 249 }
250 numa_set_node(cpu, node); 250 numa_set_node(cpu, node);
251 251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 252 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
253#endif 253#endif
254} 254}
255 255
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
174 node = first_node(node_online_map); 174 node = first_node(node_online_map);
175 numa_set_node(cpu, node); 175 numa_set_node(cpu, node);
176 176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 177 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
178#endif 178#endif
179} 179}
180 180
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -367,6 +367,10 @@ void __init efi_init(void)
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369 } else if (!efi_guidcmp(config_tables[i].guid, 369 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table);
373 } else if (!efi_guidcmp(config_tables[i].guid,
370 HCDP_TABLE_GUID)) { 374 HCDP_TABLE_GUID)) {
371 efi.hcdp = config_tables[i].table; 375 efi.hcdp = config_tables[i].table;
372 printk(" HCDP=0x%lx ", config_tables[i].table); 376 printk(" HCDP=0x%lx ", config_tables[i].table);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..4d82171d0f9c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
629ENTRY(irq_entries_start) 629ENTRY(irq_entries_start)
630 RING0_INT_FRAME 630 RING0_INT_FRAME
631vector=0 631vector=0
632.rept NR_IRQS 632.rept NR_VECTORS
633 ALIGN 633 ALIGN
634 .if vector 634 .if vector
635 CFI_ADJUST_CFA_OFFSET -4 635 CFI_ADJUST_CFA_OFFSET -4
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
179 * is an example). 179 * is an example).
180 */ 180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && 181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) 182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
183 printk(KERN_DEBUG "system APIC only can use physical flat");
183 return 1; 184 return 1;
185 }
184#endif 186#endif
185 187
186 return 0; 188 return 0;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
341 341
342static __init void uv_rtc_init(void) 342static __init void uv_rtc_init(void)
343{ 343{
344 long status, ticks_per_sec, drift; 344 long status;
345 u64 ticks_per_sec;
345 346
346 status = 347 status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
347 x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, 348 &ticks_per_sec);
348 &drift); 349 if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
349 if (status != 0 || ticks_per_sec < 100000) {
350 printk(KERN_WARNING 350 printk(KERN_WARNING
351 "unable to determine platform RTC clock frequency, " 351 "unable to determine platform RTC clock frequency, "
352 "guessing.\n"); 352 "guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
356 sn_rtc_cycles_per_second = ticks_per_sec; 356 sn_rtc_cycles_per_second = ticks_per_sec;
357} 357}
358 358
359static bool uv_system_inited; 359/*
360 * Called on each cpu to initialize the per_cpu UV data area.
361 * ZZZ hotplug not supported yet
362 */
363void __cpuinit uv_cpu_init(void)
364{
365 /* CPU 0 initilization will be done via uv_system_init. */
366 if (!uv_blade_info)
367 return;
368
369 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
370
371 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
372 set_x2apic_extra_bits(uv_hub_info->pnode);
373}
374
360 375
361void __init uv_system_init(void) 376void __init uv_system_init(void)
362{ 377{
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
412 gnode_upper = (((unsigned long)node_id.s.node_id) & 427 gnode_upper = (((unsigned long)node_id.s.node_id) &
413 ~((1 << n_val) - 1)) << m_val; 428 ~((1 << n_val) - 1)) << m_val;
414 429
430 uv_bios_init();
431 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
432 &uv_coherency_id, &uv_region_size);
415 uv_rtc_init(); 433 uv_rtc_init();
416 434
417 for_each_present_cpu(cpu) { 435 for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
433 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 451 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
434 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 452 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
435 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 453 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
436 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ 454 uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
437 uv_node_to_blade[nid] = blade; 455 uv_node_to_blade[nid] = blade;
438 uv_cpu_to_blade[cpu] = blade; 456 uv_cpu_to_blade[cpu] = blade;
439 max_pnode = max(pnode, max_pnode); 457 max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
448 map_mmr_high(max_pnode); 466 map_mmr_high(max_pnode);
449 map_config_high(max_pnode); 467 map_config_high(max_pnode);
450 map_mmioh_high(max_pnode); 468 map_mmioh_high(max_pnode);
451 uv_system_inited = true;
452}
453 469
454/* 470 uv_cpu_init();
455 * Called on each cpu to initialize the per_cpu UV data area.
456 * ZZZ hotplug not supported yet
457 */
458void __cpuinit uv_cpu_init(void)
459{
460 BUG_ON(!uv_system_inited);
461
462 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
463
464 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
465 set_x2apic_extra_bits(uv_hub_info->pnode);
466} 471}
467
468
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,29 +1,49 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h>
4#include <linux/sysdev.h>
3#include <linux/delay.h> 5#include <linux/delay.h>
4#include <linux/errno.h> 6#include <linux/errno.h>
5#include <linux/hpet.h> 7#include <linux/hpet.h>
6#include <linux/init.h> 8#include <linux/init.h>
7#include <linux/sysdev.h> 9#include <linux/cpu.h>
8#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/io.h>
9 12
10#include <asm/fixmap.h> 13#include <asm/fixmap.h>
11#include <asm/hpet.h>
12#include <asm/i8253.h> 14#include <asm/i8253.h>
13#include <asm/io.h> 15#include <asm/hpet.h>
14 16
15#define HPET_MASK CLOCKSOURCE_MASK(32) 17#define HPET_MASK CLOCKSOURCE_MASK(32)
16#define HPET_SHIFT 22 18#define HPET_SHIFT 22
17 19
18/* FSEC = 10^-15 20/* FSEC = 10^-15
19 NSEC = 10^-9 */ 21 NSEC = 10^-9 */
20#define FSEC_PER_NSEC 1000000L 22#define FSEC_PER_NSEC 1000000L
23
24#define HPET_DEV_USED_BIT 2
25#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
26#define HPET_DEV_VALID 0x8
27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000
29
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
21 31
22/* 32/*
23 * HPET address is set in acpi/boot.c, when an ACPI entry exists 33 * HPET address is set in acpi/boot.c, when an ACPI entry exists
24 */ 34 */
25unsigned long hpet_address; 35unsigned long hpet_address;
26static void __iomem *hpet_virt_address; 36unsigned long hpet_num_timers;
37static void __iomem *hpet_virt_address;
38
39struct hpet_dev {
40 struct clock_event_device evt;
41 unsigned int num;
42 int cpu;
43 unsigned int irq;
44 unsigned int flags;
45 char name[10];
46};
27 47
28unsigned long hpet_readl(unsigned long a) 48unsigned long hpet_readl(unsigned long a)
29{ 49{
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
59static int boot_hpet_disable; 79static int boot_hpet_disable;
60int hpet_force_user; 80int hpet_force_user;
61 81
62static int __init hpet_setup(char* str) 82static int __init hpet_setup(char *str)
63{ 83{
64 if (str) { 84 if (str) {
65 if (!strncmp("disable", str, 7)) 85 if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
80 100
81static inline int is_hpet_capable(void) 101static inline int is_hpet_capable(void)
82{ 102{
83 return (!boot_hpet_disable && hpet_address); 103 return !boot_hpet_disable && hpet_address;
84} 104}
85 105
86/* 106/*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
102 * timer 0 and timer 1 in case of RTC emulation. 122 * timer 0 and timer 1 in case of RTC emulation.
103 */ 123 */
104#ifdef CONFIG_HPET 124#ifdef CONFIG_HPET
125
126static void hpet_reserve_msi_timers(struct hpet_data *hd);
127
105static void hpet_reserve_platform_timers(unsigned long id) 128static void hpet_reserve_platform_timers(unsigned long id)
106{ 129{
107 struct hpet __iomem *hpet = hpet_virt_address; 130 struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
111 134
112 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; 135 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
113 136
114 memset(&hd, 0, sizeof (hd)); 137 memset(&hd, 0, sizeof(hd));
115 hd.hd_phys_address = hpet_address; 138 hd.hd_phys_address = hpet_address;
116 hd.hd_address = hpet; 139 hd.hd_address = hpet;
117 hd.hd_nirqs = nrtimers; 140 hd.hd_nirqs = nrtimers;
118 hpet_reserve_timer(&hd, 0); 141 hpet_reserve_timer(&hd, 0);
119 142
120#ifdef CONFIG_HPET_EMULATE_RTC 143#ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
130 hd.hd_irq[1] = HPET_LEGACY_RTC; 153 hd.hd_irq[1] = HPET_LEGACY_RTC;
131 154
132 for (i = 2; i < nrtimers; timer++, i++) { 155 for (i = 2; i < nrtimers; timer++, i++) {
133 hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> 156 hd.hd_irq[i] = (readl(&timer->hpet_config) &
134 Tn_INT_ROUTE_CNF_SHIFT; 157 Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
135 } 158 }
136 159
160 hpet_reserve_msi_timers(&hd);
161
137 hpet_alloc(&hd); 162 hpet_alloc(&hd);
138 163
139} 164}
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
227 printk(KERN_DEBUG "hpet clockevent registered\n"); 252 printk(KERN_DEBUG "hpet clockevent registered\n");
228} 253}
229 254
230static void hpet_legacy_set_mode(enum clock_event_mode mode, 255static int hpet_setup_msi_irq(unsigned int irq);
231 struct clock_event_device *evt) 256
257static void hpet_set_mode(enum clock_event_mode mode,
258 struct clock_event_device *evt, int timer)
232{ 259{
233 unsigned long cfg, cmp, now; 260 unsigned long cfg, cmp, now;
234 uint64_t delta; 261 uint64_t delta;
235 262
236 switch(mode) { 263 switch (mode) {
237 case CLOCK_EVT_MODE_PERIODIC: 264 case CLOCK_EVT_MODE_PERIODIC:
238 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; 265 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
239 delta >>= hpet_clockevent.shift; 266 delta >>= evt->shift;
240 now = hpet_readl(HPET_COUNTER); 267 now = hpet_readl(HPET_COUNTER);
241 cmp = now + (unsigned long) delta; 268 cmp = now + (unsigned long) delta;
242 cfg = hpet_readl(HPET_T0_CFG); 269 cfg = hpet_readl(HPET_Tn_CFG(timer));
243 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 270 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
244 HPET_TN_SETVAL | HPET_TN_32BIT; 271 HPET_TN_SETVAL | HPET_TN_32BIT;
245 hpet_writel(cfg, HPET_T0_CFG); 272 hpet_writel(cfg, HPET_Tn_CFG(timer));
246 /* 273 /*
247 * The first write after writing TN_SETVAL to the 274 * The first write after writing TN_SETVAL to the
248 * config register sets the counter value, the second 275 * config register sets the counter value, the second
249 * write sets the period. 276 * write sets the period.
250 */ 277 */
251 hpet_writel(cmp, HPET_T0_CMP); 278 hpet_writel(cmp, HPET_Tn_CMP(timer));
252 udelay(1); 279 udelay(1);
253 hpet_writel((unsigned long) delta, HPET_T0_CMP); 280 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
254 break; 281 break;
255 282
256 case CLOCK_EVT_MODE_ONESHOT: 283 case CLOCK_EVT_MODE_ONESHOT:
257 cfg = hpet_readl(HPET_T0_CFG); 284 cfg = hpet_readl(HPET_Tn_CFG(timer));
258 cfg &= ~HPET_TN_PERIODIC; 285 cfg &= ~HPET_TN_PERIODIC;
259 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 286 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
260 hpet_writel(cfg, HPET_T0_CFG); 287 hpet_writel(cfg, HPET_Tn_CFG(timer));
261 break; 288 break;
262 289
263 case CLOCK_EVT_MODE_UNUSED: 290 case CLOCK_EVT_MODE_UNUSED:
264 case CLOCK_EVT_MODE_SHUTDOWN: 291 case CLOCK_EVT_MODE_SHUTDOWN:
265 cfg = hpet_readl(HPET_T0_CFG); 292 cfg = hpet_readl(HPET_Tn_CFG(timer));
266 cfg &= ~HPET_TN_ENABLE; 293 cfg &= ~HPET_TN_ENABLE;
267 hpet_writel(cfg, HPET_T0_CFG); 294 hpet_writel(cfg, HPET_Tn_CFG(timer));
268 break; 295 break;
269 296
270 case CLOCK_EVT_MODE_RESUME: 297 case CLOCK_EVT_MODE_RESUME:
271 hpet_enable_legacy_int(); 298 if (timer == 0) {
299 hpet_enable_legacy_int();
300 } else {
301 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
302 hpet_setup_msi_irq(hdev->irq);
303 disable_irq(hdev->irq);
304 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
305 enable_irq(hdev->irq);
306 }
272 break; 307 break;
273 } 308 }
274} 309}
275 310
276static int hpet_legacy_next_event(unsigned long delta, 311static int hpet_next_event(unsigned long delta,
277 struct clock_event_device *evt) 312 struct clock_event_device *evt, int timer)
278{ 313{
279 u32 cnt; 314 u32 cnt;
280 315
281 cnt = hpet_readl(HPET_COUNTER); 316 cnt = hpet_readl(HPET_COUNTER);
282 cnt += (u32) delta; 317 cnt += (u32) delta;
283 hpet_writel(cnt, HPET_T0_CMP); 318 hpet_writel(cnt, HPET_Tn_CMP(timer));
284 319
285 /* 320 /*
286 * We need to read back the CMP register to make sure that 321 * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
292 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 327 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
293} 328}
294 329
330static void hpet_legacy_set_mode(enum clock_event_mode mode,
331 struct clock_event_device *evt)
332{
333 hpet_set_mode(mode, evt, 0);
334}
335
336static int hpet_legacy_next_event(unsigned long delta,
337 struct clock_event_device *evt)
338{
339 return hpet_next_event(delta, evt, 0);
340}
341
342/*
343 * HPET MSI Support
344 */
345#ifdef CONFIG_PCI_MSI
346
347static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
348static struct hpet_dev *hpet_devs;
349
350void hpet_msi_unmask(unsigned int irq)
351{
352 struct hpet_dev *hdev = get_irq_data(irq);
353 unsigned long cfg;
354
355 /* unmask it */
356 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
357 cfg |= HPET_TN_FSB;
358 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
359}
360
361void hpet_msi_mask(unsigned int irq)
362{
363 unsigned long cfg;
364 struct hpet_dev *hdev = get_irq_data(irq);
365
366 /* mask it */
367 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
368 cfg &= ~HPET_TN_FSB;
369 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
370}
371
372void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
373{
374 struct hpet_dev *hdev = get_irq_data(irq);
375
376 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
377 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
378}
379
380void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
381{
382 struct hpet_dev *hdev = get_irq_data(irq);
383
384 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
385 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
386 msg->address_hi = 0;
387}
388
389static void hpet_msi_set_mode(enum clock_event_mode mode,
390 struct clock_event_device *evt)
391{
392 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
393 hpet_set_mode(mode, evt, hdev->num);
394}
395
396static int hpet_msi_next_event(unsigned long delta,
397 struct clock_event_device *evt)
398{
399 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
400 return hpet_next_event(delta, evt, hdev->num);
401}
402
403static int hpet_setup_msi_irq(unsigned int irq)
404{
405 if (arch_setup_hpet_msi(irq)) {
406 destroy_irq(irq);
407 return -EINVAL;
408 }
409 return 0;
410}
411
412static int hpet_assign_irq(struct hpet_dev *dev)
413{
414 unsigned int irq;
415
416 irq = create_irq();
417 if (!irq)
418 return -EINVAL;
419
420 set_irq_data(irq, dev);
421
422 if (hpet_setup_msi_irq(irq))
423 return -EINVAL;
424
425 dev->irq = irq;
426 return 0;
427}
428
429static irqreturn_t hpet_interrupt_handler(int irq, void *data)
430{
431 struct hpet_dev *dev = (struct hpet_dev *)data;
432 struct clock_event_device *hevt = &dev->evt;
433
434 if (!hevt->event_handler) {
435 printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
436 dev->num);
437 return IRQ_HANDLED;
438 }
439
440 hevt->event_handler(hevt);
441 return IRQ_HANDLED;
442}
443
444static int hpet_setup_irq(struct hpet_dev *dev)
445{
446
447 if (request_irq(dev->irq, hpet_interrupt_handler,
448 IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
449 return -1;
450
451 disable_irq(dev->irq);
452 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
453 enable_irq(dev->irq);
454
455 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
456 dev->name, dev->irq);
457
458 return 0;
459}
460
461/* This should be called in specific @cpu */
462static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
463{
464 struct clock_event_device *evt = &hdev->evt;
465 uint64_t hpet_freq;
466
467 WARN_ON(cpu != smp_processor_id());
468 if (!(hdev->flags & HPET_DEV_VALID))
469 return;
470
471 if (hpet_setup_msi_irq(hdev->irq))
472 return;
473
474 hdev->cpu = cpu;
475 per_cpu(cpu_hpet_dev, cpu) = hdev;
476 evt->name = hdev->name;
477 hpet_setup_irq(hdev);
478 evt->irq = hdev->irq;
479
480 evt->rating = 110;
481 evt->features = CLOCK_EVT_FEAT_ONESHOT;
482 if (hdev->flags & HPET_DEV_PERI_CAP)
483 evt->features |= CLOCK_EVT_FEAT_PERIODIC;
484
485 evt->set_mode = hpet_msi_set_mode;
486 evt->set_next_event = hpet_msi_next_event;
487 evt->shift = 32;
488
489 /*
490 * The period is a femto seconds value. We need to calculate the
491 * scaled math multiplication factor for nanosecond to hpet tick
492 * conversion.
493 */
494 hpet_freq = 1000000000000000ULL;
495 do_div(hpet_freq, hpet_period);
496 evt->mult = div_sc((unsigned long) hpet_freq,
497 NSEC_PER_SEC, evt->shift);
498 /* Calculate the max delta */
499 evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
500 /* 5 usec minimum reprogramming delta. */
501 evt->min_delta_ns = 5000;
502
503 evt->cpumask = cpumask_of_cpu(hdev->cpu);
504 clockevents_register_device(evt);
505}
506
507#ifdef CONFIG_HPET
508/* Reserve at least one timer for userspace (/dev/hpet) */
509#define RESERVE_TIMERS 1
510#else
511#define RESERVE_TIMERS 0
512#endif
513
514static void hpet_msi_capability_lookup(unsigned int start_timer)
515{
516 unsigned int id;
517 unsigned int num_timers;
518 unsigned int num_timers_used = 0;
519 int i;
520
521 id = hpet_readl(HPET_ID);
522
523 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
524 num_timers++; /* Value read out starts from 0 */
525
526 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
527 if (!hpet_devs)
528 return;
529
530 hpet_num_timers = num_timers;
531
532 for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
533 struct hpet_dev *hdev = &hpet_devs[num_timers_used];
534 unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
535
536 /* Only consider HPET timer with MSI support */
537 if (!(cfg & HPET_TN_FSB_CAP))
538 continue;
539
540 hdev->flags = 0;
541 if (cfg & HPET_TN_PERIODIC_CAP)
542 hdev->flags |= HPET_DEV_PERI_CAP;
543 hdev->num = i;
544
545 sprintf(hdev->name, "hpet%d", i);
546 if (hpet_assign_irq(hdev))
547 continue;
548
549 hdev->flags |= HPET_DEV_FSB_CAP;
550 hdev->flags |= HPET_DEV_VALID;
551 num_timers_used++;
552 if (num_timers_used == num_possible_cpus())
553 break;
554 }
555
556 printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
557 num_timers, num_timers_used);
558}
559
560#ifdef CONFIG_HPET
561static void hpet_reserve_msi_timers(struct hpet_data *hd)
562{
563 int i;
564
565 if (!hpet_devs)
566 return;
567
568 for (i = 0; i < hpet_num_timers; i++) {
569 struct hpet_dev *hdev = &hpet_devs[i];
570
571 if (!(hdev->flags & HPET_DEV_VALID))
572 continue;
573
574 hd->hd_irq[hdev->num] = hdev->irq;
575 hpet_reserve_timer(hd, hdev->num);
576 }
577}
578#endif
579
580static struct hpet_dev *hpet_get_unused_timer(void)
581{
582 int i;
583
584 if (!hpet_devs)
585 return NULL;
586
587 for (i = 0; i < hpet_num_timers; i++) {
588 struct hpet_dev *hdev = &hpet_devs[i];
589
590 if (!(hdev->flags & HPET_DEV_VALID))
591 continue;
592 if (test_and_set_bit(HPET_DEV_USED_BIT,
593 (unsigned long *)&hdev->flags))
594 continue;
595 return hdev;
596 }
597 return NULL;
598}
599
600struct hpet_work_struct {
601 struct delayed_work work;
602 struct completion complete;
603};
604
605static void hpet_work(struct work_struct *w)
606{
607 struct hpet_dev *hdev;
608 int cpu = smp_processor_id();
609 struct hpet_work_struct *hpet_work;
610
611 hpet_work = container_of(w, struct hpet_work_struct, work.work);
612
613 hdev = hpet_get_unused_timer();
614 if (hdev)
615 init_one_hpet_msi_clockevent(hdev, cpu);
616
617 complete(&hpet_work->complete);
618}
619
620static int hpet_cpuhp_notify(struct notifier_block *n,
621 unsigned long action, void *hcpu)
622{
623 unsigned long cpu = (unsigned long)hcpu;
624 struct hpet_work_struct work;
625 struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
626
627 switch (action & 0xf) {
628 case CPU_ONLINE:
629 INIT_DELAYED_WORK(&work.work, hpet_work);
630 init_completion(&work.complete);
631 /* FIXME: add schedule_work_on() */
632 schedule_delayed_work_on(cpu, &work.work, 0);
633 wait_for_completion(&work.complete);
634 break;
635 case CPU_DEAD:
636 if (hdev) {
637 free_irq(hdev->irq, hdev);
638 hdev->flags &= ~HPET_DEV_USED;
639 per_cpu(cpu_hpet_dev, cpu) = NULL;
640 }
641 break;
642 }
643 return NOTIFY_OK;
644}
645#else
646
647static int hpet_setup_msi_irq(unsigned int irq)
648{
649 return 0;
650}
651static void hpet_msi_capability_lookup(unsigned int start_timer)
652{
653 return;
654}
655
656#ifdef CONFIG_HPET
657static void hpet_reserve_msi_timers(struct hpet_data *hd)
658{
659 return;
660}
661#endif
662
663static int hpet_cpuhp_notify(struct notifier_block *n,
664 unsigned long action, void *hcpu)
665{
666 return NOTIFY_OK;
667}
668
669#endif
670
295/* 671/*
296 * Clock source related code 672 * Clock source related code
297 */ 673 */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
427 803
428 if (id & HPET_ID_LEGSUP) { 804 if (id & HPET_ID_LEGSUP) {
429 hpet_legacy_clockevent_register(); 805 hpet_legacy_clockevent_register();
806 hpet_msi_capability_lookup(2);
430 return 1; 807 return 1;
431 } 808 }
809 hpet_msi_capability_lookup(0);
432 return 0; 810 return 0;
433 811
434out_nohpet: 812out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
445 */ 823 */
446static __init int hpet_late_init(void) 824static __init int hpet_late_init(void)
447{ 825{
826 int cpu;
827
448 if (boot_hpet_disable) 828 if (boot_hpet_disable)
449 return -ENODEV; 829 return -ENODEV;
450 830
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
460 840
461 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 841 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
462 842
843 for_each_online_cpu(cpu) {
844 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
845 }
846
847 /* This notifier should be called after workqueue is ready */
848 hotcpu_notifier(hpet_cpuhp_notify, -20);
849
463 return 0; 850 return 0;
464} 851}
465fs_initcall(hpet_late_init); 852fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c
index 02063ae042f7..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic.c
@@ -27,17 +27,21 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/pci.h> 28#include <linux/pci.h>
29#include <linux/mc146818rtc.h> 29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
30#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h>
31#include <linux/sysdev.h> 33#include <linux/sysdev.h>
32#include <linux/msi.h> 34#include <linux/msi.h>
33#include <linux/htirq.h> 35#include <linux/htirq.h>
34#include <linux/dmar.h> 36#include <linux/freezer.h>
35#include <linux/jiffies.h> 37#include <linux/kthread.h>
38#include <linux/jiffies.h> /* time_after() */
36#ifdef CONFIG_ACPI 39#ifdef CONFIG_ACPI
37#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
38#endif 41#endif
39#include <linux/bootmem.h> 42#include <linux/bootmem.h>
40#include <linux/dmar.h> 43#include <linux/dmar.h>
44#include <linux/hpet.h>
41 45
42#include <asm/idle.h> 46#include <asm/idle.h>
43#include <asm/io.h> 47#include <asm/io.h>
@@ -46,61 +50,28 @@
46#include <asm/proto.h> 50#include <asm/proto.h>
47#include <asm/acpi.h> 51#include <asm/acpi.h>
48#include <asm/dma.h> 52#include <asm/dma.h>
53#include <asm/timer.h>
49#include <asm/i8259.h> 54#include <asm/i8259.h>
50#include <asm/nmi.h> 55#include <asm/nmi.h>
51#include <asm/msidef.h> 56#include <asm/msidef.h>
52#include <asm/hypertransport.h> 57#include <asm/hypertransport.h>
58#include <asm/setup.h>
53#include <asm/irq_remapping.h> 59#include <asm/irq_remapping.h>
60#include <asm/hpet.h>
61#include <asm/uv/uv_hub.h>
62#include <asm/uv/uv_irq.h>
54 63
55#include <mach_ipi.h> 64#include <mach_ipi.h>
56#include <mach_apic.h> 65#include <mach_apic.h>
66#include <mach_apicdef.h>
57 67
58#define __apicdebuginit(type) static type __init 68#define __apicdebuginit(type) static type __init
59 69
60struct irq_cfg { 70/*
61 cpumask_t domain; 71 * Is the SiS APIC rmw bug present ?
62 cpumask_t old_domain; 72 * -1 = don't know, 0 = no, 1 = yes
63 unsigned move_cleanup_count; 73 */
64 u8 vector; 74int sis_apic_bug = -1;
65 u8 move_in_progress : 1;
66};
67
68/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
69static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
70 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
71 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
72 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
73 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
74 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
75 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
76 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
77 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
78 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
79 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
80 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
81 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
82 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
83 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
84 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
85 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
86};
87
88static int assign_irq_vector(int irq, cpumask_t mask);
89
90int first_system_vector = 0xfe;
91
92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
93
94int sis_apic_bug; /* not actually supported, dummy for compile */
95
96static int no_timer_check;
97
98static int disable_timer_pin_1 __initdata;
99
100int timer_through_8259 __initdata;
101
102/* Where if anywhere is the i8259 connect in external int mode */
103static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
104 75
105static DEFINE_SPINLOCK(ioapic_lock); 76static DEFINE_SPINLOCK(ioapic_lock);
106static DEFINE_SPINLOCK(vector_lock); 77static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
110 */ 81 */
111int nr_ioapic_registers[MAX_IO_APICS]; 82int nr_ioapic_registers[MAX_IO_APICS];
112 83
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
116/* I/O APIC entries */ 84/* I/O APIC entries */
117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
118int nr_ioapics; 86int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
123/* # of MP IRQ source entries */ 91/* # of MP IRQ source entries */
124int mp_irq_entries; 92int mp_irq_entries;
125 93
94#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
95int mp_bus_id_to_type[MAX_MP_BUSSES];
96#endif
97
126DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 98DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
127 99
100int skip_ioapic_setup;
101
102static int __init parse_noapic(char *str)
103{
104 /* disable IO-APIC */
105 disable_ioapic_setup();
106 return 0;
107}
108early_param("noapic", parse_noapic);
109
110struct irq_pin_list;
111struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain;
115 cpumask_t old_domain;
116 unsigned move_cleanup_count;
117 u8 vector;
118 u8 move_in_progress : 1;
119};
120
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
122static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139};
140
141#define for_each_irq_cfg(irq, cfg) \
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
143
144static struct irq_cfg *irq_cfg(unsigned int irq)
145{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL;
147}
148
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
150{
151 return irq_cfg(irq);
152}
153
128/* 154/*
129 * Rough estimation of how many shared IRQs there are, can 155 * Rough estimation of how many shared IRQs there are, can be changed
130 * be changed anytime. 156 * anytime.
131 */ 157 */
132#define MAX_PLUS_SHARED_IRQS NR_IRQS 158#define MAX_PLUS_SHARED_IRQS NR_IRQS
133#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
139 * between pins and IRQs. 165 * between pins and IRQs.
140 */ 166 */
141 167
142static struct irq_pin_list { 168struct irq_pin_list {
143 short apic, pin, next; 169 int apic, pin;
144} irq_2_pin[PIN_MAP_SIZE]; 170 struct irq_pin_list *next;
171};
172
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
174static struct irq_pin_list *irq_2_pin_ptr;
175
176static void __init irq_2_pin_init(void)
177{
178 struct irq_pin_list *pin = irq_2_pin_head;
179 int i;
180
181 for (i = 1; i < PIN_MAP_SIZE; i++)
182 pin[i-1].next = &pin[i];
183
184 irq_2_pin_ptr = &pin[0];
185}
186
187static struct irq_pin_list *get_one_free_irq_2_pin(void)
188{
189 struct irq_pin_list *pin = irq_2_pin_ptr;
190
191 if (!pin)
192 panic("can not get more irq_2_pin\n");
193
194 irq_2_pin_ptr = pin->next;
195 pin->next = NULL;
196 return pin;
197}
145 198
146struct io_apic { 199struct io_apic {
147 unsigned int index; 200 unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
172/* 225/*
173 * Re-write a value: to be used for read-modify-write 226 * Re-write a value: to be used for read-modify-write
174 * cycles where the read already set up the index register. 227 * cycles where the read already set up the index register.
228 *
229 * Older SiS APIC requires we rewrite the index register
175 */ 230 */
176static inline void io_apic_modify(unsigned int apic, unsigned int value) 231static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
177{ 232{
178 struct io_apic __iomem *io_apic = io_apic_base(apic); 233 struct io_apic __iomem *io_apic = io_apic_base(apic);
234
235 if (sis_apic_bug)
236 writel(reg, &io_apic->index);
179 writel(value, &io_apic->data); 237 writel(value, &io_apic->data);
180} 238}
181 239
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
183{ 241{
184 struct irq_pin_list *entry; 242 struct irq_pin_list *entry;
185 unsigned long flags; 243 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
186 245
187 spin_lock_irqsave(&ioapic_lock, flags); 246 spin_lock_irqsave(&ioapic_lock, flags);
188 entry = irq_2_pin + irq; 247 entry = cfg->irq_2_pin;
189 for (;;) { 248 for (;;) {
190 unsigned int reg; 249 unsigned int reg;
191 int pin; 250 int pin;
192 251
193 pin = entry->pin; 252 if (!entry)
194 if (pin == -1)
195 break; 253 break;
254 pin = entry->pin;
196 reg = io_apic_read(entry->apic, 0x10 + pin*2); 255 reg = io_apic_read(entry->apic, 0x10 + pin*2);
197 /* Is the remote IRR bit set? */ 256 /* Is the remote IRR bit set? */
198 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 257 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
201 } 260 }
202 if (!entry->next) 261 if (!entry->next)
203 break; 262 break;
204 entry = irq_2_pin + entry->next; 263 entry = entry->next;
205 } 264 }
206 spin_unlock_irqrestore(&ioapic_lock, flags); 265 spin_unlock_irqrestore(&ioapic_lock, flags);
207 266
208 return false; 267 return false;
209} 268}
210 269
211/*
212 * Synchronize the IO-APIC and the CPU by doing
213 * a dummy read from the IO-APIC
214 */
215static inline void io_apic_sync(unsigned int apic)
216{
217 struct io_apic __iomem *io_apic = io_apic_base(apic);
218 readl(&io_apic->data);
219}
220
221#define __DO_ACTION(R, ACTION, FINAL) \
222 \
223{ \
224 int pin; \
225 struct irq_pin_list *entry = irq_2_pin + irq; \
226 \
227 BUG_ON(irq >= NR_IRQS); \
228 for (;;) { \
229 unsigned int reg; \
230 pin = entry->pin; \
231 if (pin == -1) \
232 break; \
233 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
234 reg ACTION; \
235 io_apic_modify(entry->apic, reg); \
236 FINAL; \
237 if (!entry->next) \
238 break; \
239 entry = irq_2_pin + entry->next; \
240 } \
241}
242
243union entry_union { 270union entry_union {
244 struct { u32 w1, w2; }; 271 struct { u32 w1, w2; };
245 struct IO_APIC_route_entry entry; 272 struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
299static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
300{ 327{
301 int apic, pin; 328 int apic, pin;
302 struct irq_pin_list *entry = irq_2_pin + irq; 329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry;
303 331
304 BUG_ON(irq >= NR_IRQS); 332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin;
305 for (;;) { 334 for (;;) {
306 unsigned int reg; 335 unsigned int reg;
336
337 if (!entry)
338 break;
339
307 apic = entry->apic; 340 apic = entry->apic;
308 pin = entry->pin; 341 pin = entry->pin;
309 if (pin == -1) 342#ifdef CONFIG_INTR_REMAP
310 break;
311 /* 343 /*
312 * With interrupt-remapping, destination information comes 344 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry. 345 * from interrupt-remapping table entry.
314 */ 346 */
315 if (!irq_remapped(irq)) 347 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest); 348 io_apic_write(apic, 0x11 + pin*2, dest);
349#else
350 io_apic_write(apic, 0x11 + pin*2, dest);
351#endif
317 reg = io_apic_read(apic, 0x10 + pin*2); 352 reg = io_apic_read(apic, 0x10 + pin*2);
318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 353 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
319 reg |= vector; 354 reg |= vector;
320 io_apic_modify(apic, reg); 355 io_apic_modify(apic, 0x10 + pin*2, reg);
321 if (!entry->next) 356 if (!entry->next)
322 break; 357 break;
323 entry = irq_2_pin + entry->next; 358 entry = entry->next;
324 } 359 }
325} 360}
326 361
362static int assign_irq_vector(int irq, cpumask_t mask);
363
327static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
328{ 365{
329 struct irq_cfg *cfg = irq_cfg + irq; 366 struct irq_cfg *cfg;
330 unsigned long flags; 367 unsigned long flags;
331 unsigned int dest; 368 unsigned int dest;
332 cpumask_t tmp; 369 cpumask_t tmp;
370 struct irq_desc *desc;
333 371
334 cpus_and(tmp, mask, cpu_online_map); 372 cpus_and(tmp, mask, cpu_online_map);
335 if (cpus_empty(tmp)) 373 if (cpus_empty(tmp))
336 return; 374 return;
337 375
376 cfg = irq_cfg(irq);
338 if (assign_irq_vector(irq, mask)) 377 if (assign_irq_vector(irq, mask))
339 return; 378 return;
340 379
341 cpus_and(tmp, cfg->domain, mask); 380 cpus_and(tmp, cfg->domain, mask);
342 dest = cpu_mask_to_apicid(tmp); 381 dest = cpu_mask_to_apicid(tmp);
343
344 /* 382 /*
345 * Only the high 8 bits are valid. 383 * Only the high 8 bits are valid.
346 */ 384 */
347 dest = SET_APIC_LOGICAL_ID(dest); 385 dest = SET_APIC_LOGICAL_ID(dest);
348 386
387 desc = irq_to_desc(irq);
349 spin_lock_irqsave(&ioapic_lock, flags); 388 spin_lock_irqsave(&ioapic_lock, flags);
350 __target_IO_APIC_irq(irq, dest, cfg->vector); 389 __target_IO_APIC_irq(irq, dest, cfg->vector);
351 irq_desc[irq].affinity = mask; 390 desc->affinity = mask;
352 spin_unlock_irqrestore(&ioapic_lock, flags); 391 spin_unlock_irqrestore(&ioapic_lock, flags);
353} 392}
354#endif 393#endif /* CONFIG_SMP */
355 394
356/* 395/*
357 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 396 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
360 */ 399 */
361static void add_pin_to_irq(unsigned int irq, int apic, int pin) 400static void add_pin_to_irq(unsigned int irq, int apic, int pin)
362{ 401{
363 static int first_free_entry = NR_IRQS; 402 struct irq_cfg *cfg;
364 struct irq_pin_list *entry = irq_2_pin + irq; 403 struct irq_pin_list *entry;
404
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin;
408 if (!entry) {
409 entry = get_one_free_irq_2_pin();
410 cfg->irq_2_pin = entry;
411 entry->apic = apic;
412 entry->pin = pin;
413 return;
414 }
365 415
366 BUG_ON(irq >= NR_IRQS); 416 while (entry->next) {
367 while (entry->next) 417 /* not again, please */
368 entry = irq_2_pin + entry->next; 418 if (entry->apic == apic && entry->pin == pin)
419 return;
369 420
370 if (entry->pin != -1) { 421 entry = entry->next;
371 entry->next = first_free_entry;
372 entry = irq_2_pin + entry->next;
373 if (++first_free_entry >= PIN_MAP_SIZE)
374 panic("io_apic.c: ran out of irq_2_pin entries!");
375 } 422 }
423
424 entry->next = get_one_free_irq_2_pin();
425 entry = entry->next;
376 entry->apic = apic; 426 entry->apic = apic;
377 entry->pin = pin; 427 entry->pin = pin;
378} 428}
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
384 int oldapic, int oldpin, 434 int oldapic, int oldpin,
385 int newapic, int newpin) 435 int newapic, int newpin)
386{ 436{
387 struct irq_pin_list *entry = irq_2_pin + irq; 437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0;
388 440
389 while (1) { 441 while (entry) {
390 if (entry->apic == oldapic && entry->pin == oldpin) { 442 if (entry->apic == oldapic && entry->pin == oldpin) {
391 entry->apic = newapic; 443 entry->apic = newapic;
392 entry->pin = newpin; 444 entry->pin = newpin;
393 } 445 replaced = 1;
394 if (!entry->next) 446 /* every one is different, right? */
395 break; 447 break;
396 entry = irq_2_pin + entry->next; 448 }
449 entry = entry->next;
450 }
451
452 /* why? call replace before add? */
453 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin);
455}
456
457static inline void io_apic_modify_irq(unsigned int irq,
458 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry))
460{
461 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry;
464
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg;
468 pin = entry->pin;
469 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
470 reg &= mask_and;
471 reg |= mask_or;
472 io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
473 if (final)
474 final(entry);
397 } 475 }
398} 476}
399 477
478static void __unmask_IO_APIC_irq(unsigned int irq)
479{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
481}
400 482
401#define DO_ACTION(name,R,ACTION, FINAL) \ 483#ifdef CONFIG_X86_64
402 \ 484void io_apic_sync(struct irq_pin_list *entry)
403 static void name##_IO_APIC_irq (unsigned int irq) \ 485{
404 __DO_ACTION(R, ACTION, FINAL) 486 /*
487 * Synchronize the IO-APIC and the CPU by doing
488 * a dummy read from the IO-APIC
489 */
490 struct io_apic __iomem *io_apic;
491 io_apic = io_apic_base(entry->apic);
492 readl(&io_apic->data);
493}
405 494
406/* mask = 1 */ 495static void __mask_IO_APIC_irq(unsigned int irq)
407DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) 496{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498}
499#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq)
501{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
503}
408 504
409/* mask = 0 */ 505static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
410DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) 506{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL);
509}
510
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
512{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515}
516#endif /* CONFIG_X86_32 */
411 517
412static void mask_IO_APIC_irq (unsigned int irq) 518static void mask_IO_APIC_irq (unsigned int irq)
413{ 519{
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
450 clear_IO_APIC_pin(apic, pin); 556 clear_IO_APIC_pin(apic, pin);
451} 557}
452 558
559#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
560void send_IPI_self(int vector)
561{
562 unsigned int cfg;
563
564 /*
565 * Wait for idle.
566 */
567 apic_wait_icr_idle();
568 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
569 /*
570 * Send the IPI. The write to APIC_ICR fires this off.
571 */
572 apic_write(APIC_ICR, cfg);
573}
574#endif /* !CONFIG_SMP && CONFIG_X86_32*/
575
576#ifdef CONFIG_X86_32
577/*
578 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
579 * specific CPU-side IRQs.
580 */
581
582#define MAX_PIRQS 8
583static int pirq_entries [MAX_PIRQS];
584static int pirqs_enabled;
585
586static int __init ioapic_pirq_setup(char *str)
587{
588 int i, max;
589 int ints[MAX_PIRQS+1];
590
591 get_options(str, ARRAY_SIZE(ints), ints);
592
593 for (i = 0; i < MAX_PIRQS; i++)
594 pirq_entries[i] = -1;
595
596 pirqs_enabled = 1;
597 apic_printk(APIC_VERBOSE, KERN_INFO
598 "PIRQ redirection, working around broken MP-BIOS.\n");
599 max = MAX_PIRQS;
600 if (ints[0] < MAX_PIRQS)
601 max = ints[0];
602
603 for (i = 0; i < max; i++) {
604 apic_printk(APIC_VERBOSE, KERN_DEBUG
605 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
606 /*
607 * PIRQs are mapped upside down, usually.
608 */
609 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
610 }
611 return 1;
612}
613
614__setup("pirq=", ioapic_pirq_setup);
615#endif /* CONFIG_X86_32 */
616
617#ifdef CONFIG_INTR_REMAP
618/* I/O APIC RTE contents at the OS boot up */
619static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
620
453/* 621/*
454 * Saves and masks all the unmasked IO-APIC RTE's 622 * Saves and masks all the unmasked IO-APIC RTE's
455 */ 623 */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
474 kzalloc(sizeof(struct IO_APIC_route_entry) * 642 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL); 643 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic]) 644 if (!early_ioapic_entries[apic])
477 return -ENOMEM; 645 goto nomem;
478 } 646 }
479 647
480 for (apic = 0; apic < nr_ioapics; apic++) 648 for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
488 ioapic_write_entry(apic, pin, entry); 656 ioapic_write_entry(apic, pin, entry);
489 } 657 }
490 } 658 }
659
491 return 0; 660 return 0;
661
662nomem:
663 while (apic >= 0)
664 kfree(early_ioapic_entries[apic--]);
665 memset(early_ioapic_entries, 0,
666 ARRAY_SIZE(early_ioapic_entries));
667
668 return -ENOMEM;
492} 669}
493 670
494void restore_IO_APIC_setup(void) 671void restore_IO_APIC_setup(void)
495{ 672{
496 int apic, pin; 673 int apic, pin;
497 674
498 for (apic = 0; apic < nr_ioapics; apic++) 675 for (apic = 0; apic < nr_ioapics; apic++) {
676 if (!early_ioapic_entries[apic])
677 break;
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 678 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin, 679 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]); 680 early_ioapic_entries[apic][pin]);
681 kfree(early_ioapic_entries[apic]);
682 early_ioapic_entries[apic] = NULL;
683 }
502} 684}
503 685
504void reinit_intr_remapped_IO_APIC(int intr_remapping) 686void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
512 */ 694 */
513 restore_IO_APIC_setup(); 695 restore_IO_APIC_setup();
514} 696}
515 697#endif
516int skip_ioapic_setup;
517int ioapic_force;
518
519static int __init parse_noapic(char *str)
520{
521 disable_ioapic_setup();
522 return 0;
523}
524early_param("noapic", parse_noapic);
525
526/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
527static int __init disable_timer_pin_setup(char *arg)
528{
529 disable_timer_pin_1 = 1;
530 return 1;
531}
532__setup("disable_timer_pin_1", disable_timer_pin_setup);
533
534 698
535/* 699/*
536 * Find the IRQ entry number of a certain pin. 700 * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
634 best_guess = irq; 798 best_guess = irq;
635 } 799 }
636 } 800 }
637 BUG_ON(best_guess >= NR_IRQS);
638 return best_guess; 801 return best_guess;
639} 802}
640 803
804EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
805
806#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
807/*
808 * EISA Edge/Level control register, ELCR
809 */
810static int EISA_ELCR(unsigned int irq)
811{
812 if (irq < 16) {
813 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1;
815 }
816 apic_printk(APIC_VERBOSE, KERN_INFO
817 "Broken MPtable reports ISA irq %d\n", irq);
818 return 0;
819}
820
821#endif
822
641/* ISA interrupts are always polarity zero edge triggered, 823/* ISA interrupts are always polarity zero edge triggered,
642 * when listed as conforming in the MP table. */ 824 * when listed as conforming in the MP table. */
643 825
644#define default_ISA_trigger(idx) (0) 826#define default_ISA_trigger(idx) (0)
645#define default_ISA_polarity(idx) (0) 827#define default_ISA_polarity(idx) (0)
646 828
829/* EISA interrupts are always polarity zero and can be edge or level
830 * trigger depending on the ELCR value. If an interrupt is listed as
831 * EISA conforming in the MP table, that means its trigger type must
832 * be read in from the ELCR */
833
834#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
835#define default_EISA_polarity(idx) default_ISA_polarity(idx)
836
647/* PCI interrupts are always polarity one level triggered, 837/* PCI interrupts are always polarity one level triggered,
648 * when listed as conforming in the MP table. */ 838 * when listed as conforming in the MP table. */
649 839
650#define default_PCI_trigger(idx) (1) 840#define default_PCI_trigger(idx) (1)
651#define default_PCI_polarity(idx) (1) 841#define default_PCI_polarity(idx) (1)
652 842
843/* MCA interrupts are always polarity zero level triggered,
844 * when listed as conforming in the MP table. */
845
846#define default_MCA_trigger(idx) (1)
847#define default_MCA_polarity(idx) default_ISA_polarity(idx)
848
653static int MPBIOS_polarity(int idx) 849static int MPBIOS_polarity(int idx)
654{ 850{
655 int bus = mp_irqs[idx].mp_srcbus; 851 int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
707 trigger = default_ISA_trigger(idx); 903 trigger = default_ISA_trigger(idx);
708 else 904 else
709 trigger = default_PCI_trigger(idx); 905 trigger = default_PCI_trigger(idx);
906#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
907 switch (mp_bus_id_to_type[bus]) {
908 case MP_BUS_ISA: /* ISA pin */
909 {
910 /* set before the switch */
911 break;
912 }
913 case MP_BUS_EISA: /* EISA pin */
914 {
915 trigger = default_EISA_trigger(idx);
916 break;
917 }
918 case MP_BUS_PCI: /* PCI pin */
919 {
920 /* set before the switch */
921 break;
922 }
923 case MP_BUS_MCA: /* MCA pin */
924 {
925 trigger = default_MCA_trigger(idx);
926 break;
927 }
928 default:
929 {
930 printk(KERN_WARNING "broken BIOS!!\n");
931 trigger = 1;
932 break;
933 }
934 }
935#endif
710 break; 936 break;
711 case 1: /* edge */ 937 case 1: /* edge */
712 { 938 {
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
744 return MPBIOS_trigger(idx); 970 return MPBIOS_trigger(idx);
745} 971}
746 972
973int (*ioapic_renumber_irq)(int ioapic, int irq);
747static int pin_2_irq(int idx, int apic, int pin) 974static int pin_2_irq(int idx, int apic, int pin)
748{ 975{
749 int irq, i; 976 int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
765 while (i < apic) 992 while (i < apic)
766 irq += nr_ioapic_registers[i++]; 993 irq += nr_ioapic_registers[i++];
767 irq += pin; 994 irq += pin;
995 /*
996 * For MPS mode, so far only needed by ES7000 platform
997 */
998 if (ioapic_renumber_irq)
999 irq = ioapic_renumber_irq(apic, irq);
768 } 1000 }
769 BUG_ON(irq >= NR_IRQS); 1001
1002#ifdef CONFIG_X86_32
1003 /*
1004 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1005 */
1006 if ((pin >= 16) && (pin <= 23)) {
1007 if (pirq_entries[pin-16] != -1) {
1008 if (!pirq_entries[pin-16]) {
1009 apic_printk(APIC_VERBOSE, KERN_DEBUG
1010 "disabling PIRQ%d\n", pin-16);
1011 } else {
1012 irq = pirq_entries[pin-16];
1013 apic_printk(APIC_VERBOSE, KERN_DEBUG
1014 "using PIRQ%d -> IRQ %d\n",
1015 pin-16, irq);
1016 }
1017 }
1018 }
1019#endif
1020
770 return irq; 1021 return irq;
771} 1022}
772 1023
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
801 int cpu; 1052 int cpu;
802 struct irq_cfg *cfg; 1053 struct irq_cfg *cfg;
803 1054
804 BUG_ON((unsigned)irq >= NR_IRQS); 1055 cfg = irq_cfg(irq);
805 cfg = &irq_cfg[irq];
806 1056
807 /* Only try and allocate irqs on cpus that are present */ 1057 /* Only try and allocate irqs on cpus that are present */
808 cpus_and(mask, mask, cpu_online_map); 1058 cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
837 } 1087 }
838 if (unlikely(current_vector == vector)) 1088 if (unlikely(current_vector == vector))
839 continue; 1089 continue;
1090#ifdef CONFIG_X86_64
840 if (vector == IA32_SYSCALL_VECTOR) 1091 if (vector == IA32_SYSCALL_VECTOR)
841 goto next; 1092 goto next;
1093#else
1094 if (vector == SYSCALL_VECTOR)
1095 goto next;
1096#endif
842 for_each_cpu_mask_nr(new_cpu, new_mask) 1097 for_each_cpu_mask_nr(new_cpu, new_mask)
843 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
844 goto next; 1099 goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
875 cpumask_t mask; 1130 cpumask_t mask;
876 int cpu, vector; 1131 int cpu, vector;
877 1132
878 BUG_ON((unsigned)irq >= NR_IRQS); 1133 cfg = irq_cfg(irq);
879 cfg = &irq_cfg[irq];
880 BUG_ON(!cfg->vector); 1134 BUG_ON(!cfg->vector);
881 1135
882 vector = cfg->vector; 1136 vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
893 /* Initialize vector_irq on a new cpu */ 1147 /* Initialize vector_irq on a new cpu */
894 /* This function must be called with vector_lock held */ 1148 /* This function must be called with vector_lock held */
895 int irq, vector; 1149 int irq, vector;
1150 struct irq_cfg *cfg;
896 1151
897 /* Mark the inuse vectors */ 1152 /* Mark the inuse vectors */
898 for (irq = 0; irq < NR_IRQS; ++irq) { 1153 for_each_irq_cfg(irq, cfg) {
899 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1154 if (!cpu_isset(cpu, cfg->domain))
900 continue; 1155 continue;
901 vector = irq_cfg[irq].vector; 1156 vector = cfg->vector;
902 per_cpu(vector_irq, cpu)[vector] = irq; 1157 per_cpu(vector_irq, cpu)[vector] = irq;
903 } 1158 }
904 /* Mark the free vectors */ 1159 /* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
906 irq = per_cpu(vector_irq, cpu)[vector]; 1161 irq = per_cpu(vector_irq, cpu)[vector];
907 if (irq < 0) 1162 if (irq < 0)
908 continue; 1163 continue;
909 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1164
1165 cfg = irq_cfg(irq);
1166 if (!cpu_isset(cpu, cfg->domain))
910 per_cpu(vector_irq, cpu)[vector] = -1; 1167 per_cpu(vector_irq, cpu)[vector] = -1;
911 } 1168 }
912} 1169}
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
916static struct irq_chip ir_ioapic_chip; 1173static struct irq_chip ir_ioapic_chip;
917#endif 1174#endif
918 1175
1176#define IOAPIC_AUTO -1
1177#define IOAPIC_EDGE 0
1178#define IOAPIC_LEVEL 1
1179
1180#ifdef CONFIG_X86_32
1181static inline int IO_APIC_irq_trigger(int irq)
1182{
1183 int apic, idx, pin;
1184
1185 for (apic = 0; apic < nr_ioapics; apic++) {
1186 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1187 idx = find_irq_entry(apic, pin, mp_INT);
1188 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1189 return irq_trigger(idx);
1190 }
1191 }
1192 /*
1193 * nonexistent IRQs are edge default
1194 */
1195 return 0;
1196}
1197#else
1198static inline int IO_APIC_irq_trigger(int irq)
1199{
1200 return 1;
1201}
1202#endif
1203
919static void ioapic_register_intr(int irq, unsigned long trigger) 1204static void ioapic_register_intr(int irq, unsigned long trigger)
920{ 1205{
921 if (trigger) 1206 struct irq_desc *desc;
922 irq_desc[irq].status |= IRQ_LEVEL; 1207
1208 desc = irq_to_desc(irq);
1209
1210 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1211 trigger == IOAPIC_LEVEL)
1212 desc->status |= IRQ_LEVEL;
923 else 1213 else
924 irq_desc[irq].status &= ~IRQ_LEVEL; 1214 desc->status &= ~IRQ_LEVEL;
925 1215
926#ifdef CONFIG_INTR_REMAP 1216#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) { 1217 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT; 1218 desc->status |= IRQ_MOVE_PCNTXT;
929 if (trigger) 1219 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1220 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq, 1221 handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
936 return; 1226 return;
937 } 1227 }
938#endif 1228#endif
939 if (trigger) 1229 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1230 trigger == IOAPIC_LEVEL)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1231 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq, 1232 handle_fasteoi_irq,
942 "fasteoi"); 1233 "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1300static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1010 int trigger, int polarity) 1301 int trigger, int polarity)
1011{ 1302{
1012 struct irq_cfg *cfg = irq_cfg + irq; 1303 struct irq_cfg *cfg;
1013 struct IO_APIC_route_entry entry; 1304 struct IO_APIC_route_entry entry;
1014 cpumask_t mask; 1305 cpumask_t mask;
1015 1306
1016 if (!IO_APIC_IRQ(irq)) 1307 if (!IO_APIC_IRQ(irq))
1017 return; 1308 return;
1018 1309
1310 cfg = irq_cfg(irq);
1311
1019 mask = TARGET_CPUS; 1312 mask = TARGET_CPUS;
1020 if (assign_irq_vector(irq, mask)) 1313 if (assign_irq_vector(irq, mask))
1021 return; 1314 return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1047 1340
1048static void __init setup_IO_APIC_irqs(void) 1341static void __init setup_IO_APIC_irqs(void)
1049{ 1342{
1050 int apic, pin, idx, irq, first_notcon = 1; 1343 int apic, pin, idx, irq;
1344 int notcon = 0;
1051 1345
1052 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1346 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1053 1347
1054 for (apic = 0; apic < nr_ioapics; apic++) { 1348 for (apic = 0; apic < nr_ioapics; apic++) {
1055 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1349 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1056
1057 idx = find_irq_entry(apic,pin,mp_INT);
1058 if (idx == -1) {
1059 if (first_notcon) {
1060 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
1061 first_notcon = 0;
1062 } else
1063 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
1064 continue;
1065 }
1066 if (!first_notcon) {
1067 apic_printk(APIC_VERBOSE, " not connected.\n");
1068 first_notcon = 1;
1069 }
1070 1350
1071 irq = pin_2_irq(idx, apic, pin); 1351 idx = find_irq_entry(apic, pin, mp_INT);
1072 add_pin_to_irq(irq, apic, pin); 1352 if (idx == -1) {
1353 if (!notcon) {
1354 notcon = 1;
1355 apic_printk(APIC_VERBOSE,
1356 KERN_DEBUG " %d-%d",
1357 mp_ioapics[apic].mp_apicid,
1358 pin);
1359 } else
1360 apic_printk(APIC_VERBOSE, " %d-%d",
1361 mp_ioapics[apic].mp_apicid,
1362 pin);
1363 continue;
1364 }
1365 if (notcon) {
1366 apic_printk(APIC_VERBOSE,
1367 " (apicid-pin) not connected\n");
1368 notcon = 0;
1369 }
1073 1370
1074 setup_IO_APIC_irq(apic, pin, irq, 1371 irq = pin_2_irq(idx, apic, pin);
1075 irq_trigger(idx), irq_polarity(idx)); 1372#ifdef CONFIG_X86_32
1076 } 1373 if (multi_timer_check(apic, irq))
1374 continue;
1375#endif
1376 add_pin_to_irq(irq, apic, pin);
1377
1378 setup_IO_APIC_irq(apic, pin, irq,
1379 irq_trigger(idx), irq_polarity(idx));
1380 }
1077 } 1381 }
1078 1382
1079 if (!first_notcon) 1383 if (notcon)
1080 apic_printk(APIC_VERBOSE, " not connected.\n"); 1384 apic_printk(APIC_VERBOSE,
1385 " (apicid-pin) not connected\n");
1081} 1386}
1082 1387
1083/* 1388/*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1088{ 1393{
1089 struct IO_APIC_route_entry entry; 1394 struct IO_APIC_route_entry entry;
1090 1395
1396#ifdef CONFIG_INTR_REMAP
1091 if (intr_remapping_enabled) 1397 if (intr_remapping_enabled)
1092 return; 1398 return;
1399#endif
1093 1400
1094 memset(&entry, 0, sizeof(entry)); 1401 memset(&entry, 0, sizeof(entry));
1095 1402
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1124 union IO_APIC_reg_00 reg_00; 1431 union IO_APIC_reg_00 reg_00;
1125 union IO_APIC_reg_01 reg_01; 1432 union IO_APIC_reg_01 reg_01;
1126 union IO_APIC_reg_02 reg_02; 1433 union IO_APIC_reg_02 reg_02;
1434 union IO_APIC_reg_03 reg_03;
1127 unsigned long flags; 1435 unsigned long flags;
1436 struct irq_cfg *cfg;
1437 unsigned int irq;
1128 1438
1129 if (apic_verbosity == APIC_QUIET) 1439 if (apic_verbosity == APIC_QUIET)
1130 return; 1440 return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1147 reg_01.raw = io_apic_read(apic, 1); 1457 reg_01.raw = io_apic_read(apic, 1);
1148 if (reg_01.bits.version >= 0x10) 1458 if (reg_01.bits.version >= 0x10)
1149 reg_02.raw = io_apic_read(apic, 2); 1459 reg_02.raw = io_apic_read(apic, 2);
1460 if (reg_01.bits.version >= 0x20)
1461 reg_03.raw = io_apic_read(apic, 3);
1150 spin_unlock_irqrestore(&ioapic_lock, flags); 1462 spin_unlock_irqrestore(&ioapic_lock, flags);
1151 1463
1152 printk("\n"); 1464 printk("\n");
1153 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1465 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1154 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1466 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1155 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1467 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1468 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1469 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1156 1470
1157 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 1471 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1158 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 1472 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
1160 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 1474 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1161 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 1475 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1162 1476
1163 if (reg_01.bits.version >= 0x10) { 1477 /*
1478 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1479 * but the value of reg_02 is read as the previous read register
1480 * value, so ignore it if reg_02 == reg_01.
1481 */
1482 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1164 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 1483 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1165 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 1484 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1166 } 1485 }
1167 1486
1487 /*
1488 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1489 * or reg_03, but the value of reg_0[23] is read as the previous read
1490 * register value, so ignore it if reg_03 == reg_0[12].
1491 */
1492 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1493 reg_03.raw != reg_01.raw) {
1494 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1495 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1496 }
1497
1168 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1498 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1169 1499
1170 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1500 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1193 } 1523 }
1194 } 1524 }
1195 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1525 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1196 for (i = 0; i < NR_IRQS; i++) { 1526 for_each_irq_cfg(irq, cfg) {
1197 struct irq_pin_list *entry = irq_2_pin + i; 1527 struct irq_pin_list *entry = cfg->irq_2_pin;
1198 if (entry->pin < 0) 1528 if (!entry)
1199 continue; 1529 continue;
1200 printk(KERN_DEBUG "IRQ%d ", i); 1530 printk(KERN_DEBUG "IRQ%d ", irq);
1201 for (;;) { 1531 for (;;) {
1202 printk("-> %d:%d", entry->apic, entry->pin); 1532 printk("-> %d:%d", entry->apic, entry->pin);
1203 if (!entry->next) 1533 if (!entry->next)
1204 break; 1534 break;
1205 entry = irq_2_pin + entry->next; 1535 entry = entry->next;
1206 } 1536 }
1207 printk("\n"); 1537 printk("\n");
1208 } 1538 }
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
1236__apicdebuginit(void) print_local_APIC(void *dummy) 1566__apicdebuginit(void) print_local_APIC(void *dummy)
1237{ 1567{
1238 unsigned int v, ver, maxlvt; 1568 unsigned int v, ver, maxlvt;
1239 unsigned long icr; 1569 u64 icr;
1240 1570
1241 if (apic_verbosity == APIC_QUIET) 1571 if (apic_verbosity == APIC_QUIET)
1242 return; 1572 return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1253 v = apic_read(APIC_TASKPRI); 1583 v = apic_read(APIC_TASKPRI);
1254 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1584 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1255 1585
1256 v = apic_read(APIC_ARBPRI); 1586 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1257 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 1587 if (!APIC_XAPIC(ver)) {
1258 v & APIC_ARBPRI_MASK); 1588 v = apic_read(APIC_ARBPRI);
1259 v = apic_read(APIC_PROCPRI); 1589 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1260 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 1590 v & APIC_ARBPRI_MASK);
1591 }
1592 v = apic_read(APIC_PROCPRI);
1593 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1594 }
1595
1596 /*
1597 * Remote read supported only in the 82489DX and local APIC for
1598 * Pentium processors.
1599 */
1600 if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1601 v = apic_read(APIC_RRR);
1602 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1603 }
1261 1604
1262 v = apic_read(APIC_EOI);
1263 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1264 v = apic_read(APIC_RRR);
1265 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1266 v = apic_read(APIC_LDR); 1605 v = apic_read(APIC_LDR);
1267 printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 1606 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1268 v = apic_read(APIC_DFR); 1607 if (!x2apic_enabled()) {
1269 printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 1608 v = apic_read(APIC_DFR);
1609 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1610 }
1270 v = apic_read(APIC_SPIV); 1611 v = apic_read(APIC_SPIV);
1271 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 1612 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1272 1613
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1277 printk(KERN_DEBUG "... APIC IRR field:\n"); 1618 printk(KERN_DEBUG "... APIC IRR field:\n");
1278 print_APIC_bitfield(APIC_IRR); 1619 print_APIC_bitfield(APIC_IRR);
1279 1620
1280 v = apic_read(APIC_ESR); 1621 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1622 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1623 apic_write(APIC_ESR, 0);
1624
1625 v = apic_read(APIC_ESR);
1626 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1627 }
1282 1628
1283 icr = apic_icr_read(); 1629 icr = apic_icr_read();
1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); 1630 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1312 1658
1313__apicdebuginit(void) print_all_local_APICs(void) 1659__apicdebuginit(void) print_all_local_APICs(void)
1314{ 1660{
1315 on_each_cpu(print_local_APIC, NULL, 1); 1661 int cpu;
1662
1663 preempt_disable();
1664 for_each_online_cpu(cpu)
1665 smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1666 preempt_enable();
1316} 1667}
1317 1668
1318__apicdebuginit(void) print_PIC(void) 1669__apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
1359fs_initcall(print_all_ICs); 1710fs_initcall(print_all_ICs);
1360 1711
1361 1712
1713/* Where if anywhere is the i8259 connect in external int mode */
1714static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1715
1362void __init enable_IO_APIC(void) 1716void __init enable_IO_APIC(void)
1363{ 1717{
1364 union IO_APIC_reg_01 reg_01; 1718 union IO_APIC_reg_01 reg_01;
1365 int i8259_apic, i8259_pin; 1719 int i8259_apic, i8259_pin;
1366 int i, apic; 1720 int apic;
1367 unsigned long flags; 1721 unsigned long flags;
1368 1722
1369 for (i = 0; i < PIN_MAP_SIZE; i++) { 1723#ifdef CONFIG_X86_32
1370 irq_2_pin[i].pin = -1; 1724 int i;
1371 irq_2_pin[i].next = 0; 1725 if (!pirqs_enabled)
1372 } 1726 for (i = 0; i < MAX_PIRQS; i++)
1727 pirq_entries[i] = -1;
1728#endif
1373 1729
1374 /* 1730 /*
1375 * The number of IO-APIC IRQ registers (== #pins): 1731 * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
1399 } 1755 }
1400 found_i8259: 1756 found_i8259:
1401 /* Look to see what if the MP table has reported the ExtINT */ 1757 /* Look to see what if the MP table has reported the ExtINT */
1758 /* If we could not find the appropriate pin by looking at the ioapic
1759 * the i8259 probably is not connected the ioapic but give the
1760 * mptable a chance anyway.
1761 */
1402 i8259_pin = find_isa_irq_pin(0, mp_ExtINT); 1762 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1403 i8259_apic = find_isa_irq_apic(0, mp_ExtINT); 1763 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1404 /* Trust the MP table if nothing is setup in the hardware */ 1764 /* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
1458 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1818 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1459} 1819}
1460 1820
1821#ifdef CONFIG_X86_32
1822/*
1823 * function to set the IO-APIC physical IDs based on the
1824 * values stored in the MPC table.
1825 *
1826 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1827 */
1828
1829static void __init setup_ioapic_ids_from_mpc(void)
1830{
1831 union IO_APIC_reg_00 reg_00;
1832 physid_mask_t phys_id_present_map;
1833 int apic;
1834 int i;
1835 unsigned char old_id;
1836 unsigned long flags;
1837
1838 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1839 return;
1840
1841 /*
1842 * Don't check I/O APIC IDs for xAPIC systems. They have
1843 * no meaning without the serial APIC bus.
1844 */
1845 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1846 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1847 return;
1848 /*
1849 * This is broken; anything with a real cpu count has to
1850 * circumvent this idiocy regardless.
1851 */
1852 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1853
1854 /*
1855 * Set the IOAPIC ID to the value stored in the MPC table.
1856 */
1857 for (apic = 0; apic < nr_ioapics; apic++) {
1858
1859 /* Read the register 0 value */
1860 spin_lock_irqsave(&ioapic_lock, flags);
1861 reg_00.raw = io_apic_read(apic, 0);
1862 spin_unlock_irqrestore(&ioapic_lock, flags);
1863
1864 old_id = mp_ioapics[apic].mp_apicid;
1865
1866 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1867 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1868 apic, mp_ioapics[apic].mp_apicid);
1869 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1870 reg_00.bits.ID);
1871 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1872 }
1873
1874 /*
1875 * Sanity check, is the ID really free? Every APIC in a
1876 * system must have a unique ID or we get lots of nice
1877 * 'stuck on smp_invalidate_needed IPI wait' messages.
1878 */
1879 if (check_apicid_used(phys_id_present_map,
1880 mp_ioapics[apic].mp_apicid)) {
1881 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1882 apic, mp_ioapics[apic].mp_apicid);
1883 for (i = 0; i < get_physical_broadcast(); i++)
1884 if (!physid_isset(i, phys_id_present_map))
1885 break;
1886 if (i >= get_physical_broadcast())
1887 panic("Max APIC ID exceeded!\n");
1888 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1889 i);
1890 physid_set(i, phys_id_present_map);
1891 mp_ioapics[apic].mp_apicid = i;
1892 } else {
1893 physid_mask_t tmp;
1894 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1895 apic_printk(APIC_VERBOSE, "Setting %d in the "
1896 "phys_id_present_map\n",
1897 mp_ioapics[apic].mp_apicid);
1898 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1899 }
1900
1901
1902 /*
1903 * We need to adjust the IRQ routing table
1904 * if the ID changed.
1905 */
1906 if (old_id != mp_ioapics[apic].mp_apicid)
1907 for (i = 0; i < mp_irq_entries; i++)
1908 if (mp_irqs[i].mp_dstapic == old_id)
1909 mp_irqs[i].mp_dstapic
1910 = mp_ioapics[apic].mp_apicid;
1911
1912 /*
1913 * Read the right value from the MPC table and
1914 * write it into the ID register.
1915 */
1916 apic_printk(APIC_VERBOSE, KERN_INFO
1917 "...changing IO-APIC physical APIC ID to %d ...",
1918 mp_ioapics[apic].mp_apicid);
1919
1920 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1921 spin_lock_irqsave(&ioapic_lock, flags);
1922 io_apic_write(apic, 0, reg_00.raw);
1923 spin_unlock_irqrestore(&ioapic_lock, flags);
1924
1925 /*
1926 * Sanity check
1927 */
1928 spin_lock_irqsave(&ioapic_lock, flags);
1929 reg_00.raw = io_apic_read(apic, 0);
1930 spin_unlock_irqrestore(&ioapic_lock, flags);
1931 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1932 printk("could not set ID!\n");
1933 else
1934 apic_printk(APIC_VERBOSE, " ok.\n");
1935 }
1936}
1937#endif
1938
1939int no_timer_check __initdata;
1940
1941static int __init notimercheck(char *s)
1942{
1943 no_timer_check = 1;
1944 return 1;
1945}
1946__setup("no_timer_check", notimercheck);
1947
1461/* 1948/*
1462 * There is a nasty bug in some older SMP boards, their mptable lies 1949 * There is a nasty bug in some older SMP boards, their mptable lies
1463 * about the timer IRQ. We do the following to work around the situation: 1950 * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
1471 unsigned long t1 = jiffies; 1958 unsigned long t1 = jiffies;
1472 unsigned long flags; 1959 unsigned long flags;
1473 1960
1961 if (no_timer_check)
1962 return 1;
1963
1474 local_save_flags(flags); 1964 local_save_flags(flags);
1475 local_irq_enable(); 1965 local_irq_enable();
1476 /* Let ten ticks pass... */ 1966 /* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1531 return was_pending; 2021 return was_pending;
1532} 2022}
1533 2023
2024#ifdef CONFIG_X86_64
1534static int ioapic_retrigger_irq(unsigned int irq) 2025static int ioapic_retrigger_irq(unsigned int irq)
1535{ 2026{
1536 struct irq_cfg *cfg = &irq_cfg[irq]; 2027
2028 struct irq_cfg *cfg = irq_cfg(irq);
1537 unsigned long flags; 2029 unsigned long flags;
1538 2030
1539 spin_lock_irqsave(&vector_lock, flags); 2031 spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
1542 2034
1543 return 1; 2035 return 1;
1544} 2036}
2037#else
2038static int ioapic_retrigger_irq(unsigned int irq)
2039{
2040 send_IPI_self(irq_cfg(irq)->vector);
2041
2042 return 1;
2043}
2044#endif
1545 2045
1546/* 2046/*
1547 * Level and edge triggered IO-APIC interrupts need different handling, 2047 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1580 */ 2080 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask) 2081static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{ 2082{
1583 struct irq_cfg *cfg = irq_cfg + irq; 2083 struct irq_cfg *cfg;
1584 struct irq_desc *desc = irq_desc + irq; 2084 struct irq_desc *desc;
1585 cpumask_t tmp, cleanup_mask; 2085 cpumask_t tmp, cleanup_mask;
1586 struct irte irte; 2086 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL; 2087 int modify_ioapic_rte;
1588 unsigned int dest; 2088 unsigned int dest;
1589 unsigned long flags; 2089 unsigned long flags;
1590 2090
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1598 if (assign_irq_vector(irq, mask)) 2098 if (assign_irq_vector(irq, mask))
1599 return; 2099 return;
1600 2100
2101 cfg = irq_cfg(irq);
1601 cpus_and(tmp, cfg->domain, mask); 2102 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp); 2103 dest = cpu_mask_to_apicid(tmp);
1603 2104
2105 desc = irq_to_desc(irq);
2106 modify_ioapic_rte = desc->status & IRQ_LEVEL;
1604 if (modify_ioapic_rte) { 2107 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags); 2108 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector); 2109 __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1622 cfg->move_in_progress = 0; 2125 cfg->move_in_progress = 0;
1623 } 2126 }
1624 2127
1625 irq_desc[irq].affinity = mask; 2128 desc->affinity = mask;
1626} 2129}
1627 2130
1628static int migrate_irq_remapped_level(int irq) 2131static int migrate_irq_remapped_level(int irq)
1629{ 2132{
1630 int ret = -1; 2133 int ret = -1;
2134 struct irq_desc *desc = irq_to_desc(irq);
1631 2135
1632 mask_IO_APIC_irq(irq); 2136 mask_IO_APIC_irq(irq);
1633 2137
1634 if (io_apic_level_ack_pending(irq)) { 2138 if (io_apic_level_ack_pending(irq)) {
1635 /* 2139 /*
1636 * Interrupt in progress. Migrating irq now will change the 2140 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse 2141 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu. 2142 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance. 2143 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
1643 } 2147 }
1644 2148
1645 /* everthing is clear. we have right of way */ 2149 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); 2150 migrate_ioapic_irq(irq, desc->pending_mask);
1647 2151
1648 ret = 0; 2152 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING; 2153 desc->status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask); 2154 cpus_clear(desc->pending_mask);
1651 2155
1652unmask: 2156unmask:
1653 unmask_IO_APIC_irq(irq); 2157 unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
1656 2160
1657static void ir_irq_migration(struct work_struct *work) 2161static void ir_irq_migration(struct work_struct *work)
1658{ 2162{
1659 int irq; 2163 unsigned int irq;
2164 struct irq_desc *desc;
1660 2165
1661 for (irq = 0; irq < NR_IRQS; irq++) { 2166 for_each_irq_desc(irq, desc) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) { 2167 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags; 2168 unsigned long flags;
1665 2169
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
1671 continue; 2175 continue;
1672 } 2176 }
1673 2177
1674 desc->chip->set_affinity(irq, 2178 desc->chip->set_affinity(irq, desc->pending_mask);
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags); 2179 spin_unlock_irqrestore(&desc->lock, flags);
1677 } 2180 }
1678 } 2181 }
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
1683 */ 2186 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2187static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{ 2188{
1686 if (irq_desc[irq].status & IRQ_LEVEL) { 2189 struct irq_desc *desc = irq_to_desc(irq);
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING; 2190
1688 irq_desc[irq].pending_mask = mask; 2191 if (desc->status & IRQ_LEVEL) {
2192 desc->status |= IRQ_MOVE_PENDING;
2193 desc->pending_mask = mask;
1689 migrate_irq_remapped_level(irq); 2194 migrate_irq_remapped_level(irq);
1690 return; 2195 return;
1691 } 2196 }
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1698{ 2203{
1699 unsigned vector, me; 2204 unsigned vector, me;
1700 ack_APIC_irq(); 2205 ack_APIC_irq();
2206#ifdef CONFIG_X86_64
1701 exit_idle(); 2207 exit_idle();
2208#endif
1702 irq_enter(); 2209 irq_enter();
1703 2210
1704 me = smp_processor_id(); 2211 me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1707 struct irq_desc *desc; 2214 struct irq_desc *desc;
1708 struct irq_cfg *cfg; 2215 struct irq_cfg *cfg;
1709 irq = __get_cpu_var(vector_irq)[vector]; 2216 irq = __get_cpu_var(vector_irq)[vector];
1710 if (irq >= NR_IRQS) 2217
2218 desc = irq_to_desc(irq);
2219 if (!desc)
1711 continue; 2220 continue;
1712 2221
1713 desc = irq_desc + irq; 2222 cfg = irq_cfg(irq);
1714 cfg = irq_cfg + irq;
1715 spin_lock(&desc->lock); 2223 spin_lock(&desc->lock);
1716 if (!cfg->move_cleanup_count) 2224 if (!cfg->move_cleanup_count)
1717 goto unlock; 2225 goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
1730 2238
1731static void irq_complete_move(unsigned int irq) 2239static void irq_complete_move(unsigned int irq)
1732{ 2240{
1733 struct irq_cfg *cfg = irq_cfg + irq; 2241 struct irq_cfg *cfg = irq_cfg(irq);
1734 unsigned vector, me; 2242 unsigned vector, me;
1735 2243
1736 if (likely(!cfg->move_in_progress)) 2244 if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
1769 ack_APIC_irq(); 2277 ack_APIC_irq();
1770} 2278}
1771 2279
2280atomic_t irq_mis_count;
2281
1772static void ack_apic_level(unsigned int irq) 2282static void ack_apic_level(unsigned int irq)
1773{ 2283{
2284#ifdef CONFIG_X86_32
2285 unsigned long v;
2286 int i;
2287#endif
1774 int do_unmask_irq = 0; 2288 int do_unmask_irq = 0;
1775 2289
1776 irq_complete_move(irq); 2290 irq_complete_move(irq);
1777#ifdef CONFIG_GENERIC_PENDING_IRQ 2291#ifdef CONFIG_GENERIC_PENDING_IRQ
1778 /* If we are moving the irq we need to mask it */ 2292 /* If we are moving the irq we need to mask it */
1779 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { 2293 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
1780 do_unmask_irq = 1; 2294 do_unmask_irq = 1;
1781 mask_IO_APIC_irq(irq); 2295 mask_IO_APIC_irq(irq);
1782 } 2296 }
1783#endif 2297#endif
1784 2298
2299#ifdef CONFIG_X86_32
2300 /*
2301 * It appears there is an erratum which affects at least version 0x11
2302 * of I/O APIC (that's the 82093AA and cores integrated into various
2303 * chipsets). Under certain conditions a level-triggered interrupt is
2304 * erroneously delivered as edge-triggered one but the respective IRR
2305 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2306 * message but it will never arrive and further interrupts are blocked
2307 * from the source. The exact reason is so far unknown, but the
2308 * phenomenon was observed when two consecutive interrupt requests
2309 * from a given source get delivered to the same CPU and the source is
2310 * temporarily disabled in between.
2311 *
2312 * A workaround is to simulate an EOI message manually. We achieve it
2313 * by setting the trigger mode to edge and then to level when the edge
2314 * trigger mode gets detected in the TMR of a local APIC for a
2315 * level-triggered interrupt. We mask the source for the time of the
2316 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2317 * The idea is from Manfred Spraul. --macro
2318 */
2319 i = irq_cfg(irq)->vector;
2320
2321 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2322#endif
2323
1785 /* 2324 /*
1786 * We must acknowledge the irq before we move it or the acknowledge will 2325 * We must acknowledge the irq before we move it or the acknowledge will
1787 * not propagate properly. 2326 * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
1820 move_masked_irq(irq); 2359 move_masked_irq(irq);
1821 unmask_IO_APIC_irq(irq); 2360 unmask_IO_APIC_irq(irq);
1822 } 2361 }
2362
2363#ifdef CONFIG_X86_32
2364 if (!(v & (1 << (i & 0x1f)))) {
2365 atomic_inc(&irq_mis_count);
2366 spin_lock(&ioapic_lock);
2367 __mask_and_edge_IO_APIC_irq(irq);
2368 __unmask_and_level_IO_APIC_irq(irq);
2369 spin_unlock(&ioapic_lock);
2370 }
2371#endif
1823} 2372}
1824 2373
1825static struct irq_chip ioapic_chip __read_mostly = { 2374static struct irq_chip ioapic_chip __read_mostly = {
1826 .name = "IO-APIC", 2375 .name = "IO-APIC",
1827 .startup = startup_ioapic_irq, 2376 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq, 2377 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq, 2378 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_apic_edge, 2379 .ack = ack_apic_edge,
1831 .eoi = ack_apic_level, 2380 .eoi = ack_apic_level,
1832#ifdef CONFIG_SMP 2381#ifdef CONFIG_SMP
1833 .set_affinity = set_ioapic_affinity_irq, 2382 .set_affinity = set_ioapic_affinity_irq,
1834#endif 2383#endif
1835 .retrigger = ioapic_retrigger_irq, 2384 .retrigger = ioapic_retrigger_irq,
1836}; 2385};
1837 2386
1838#ifdef CONFIG_INTR_REMAP 2387#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = { 2388static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC", 2389 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq, 2390 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq, 2391 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq, 2392 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge, 2393 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level, 2394 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP 2395#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq, 2396 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif 2397#endif
1849 .retrigger = ioapic_retrigger_irq, 2398 .retrigger = ioapic_retrigger_irq,
1850}; 2399};
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
1853static inline void init_IO_APIC_traps(void) 2402static inline void init_IO_APIC_traps(void)
1854{ 2403{
1855 int irq; 2404 int irq;
2405 struct irq_desc *desc;
2406 struct irq_cfg *cfg;
1856 2407
1857 /* 2408 /*
1858 * NOTE! The local APIC isn't very good at handling 2409 * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
1865 * Also, we've got to be careful not to trash gate 2416 * Also, we've got to be careful not to trash gate
1866 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2417 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1867 */ 2418 */
1868 for (irq = 0; irq < NR_IRQS ; irq++) { 2419 for_each_irq_cfg(irq, cfg) {
1869 if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { 2420 if (IO_APIC_IRQ(irq) && !cfg->vector) {
1870 /* 2421 /*
1871 * Hmm.. We don't have an entry for this, 2422 * Hmm.. We don't have an entry for this,
1872 * so default to an old-fashioned 8259 2423 * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
1874 */ 2425 */
1875 if (irq < 16) 2426 if (irq < 16)
1876 make_8259A_irq(irq); 2427 make_8259A_irq(irq);
1877 else 2428 else {
2429 desc = irq_to_desc(irq);
1878 /* Strange. Oh, well.. */ 2430 /* Strange. Oh, well.. */
1879 irq_desc[irq].chip = &no_irq_chip; 2431 desc->chip = &no_irq_chip;
2432 }
1880 } 2433 }
1881 } 2434 }
1882} 2435}
1883 2436
1884static void unmask_lapic_irq(unsigned int irq) 2437/*
2438 * The local APIC irq-chip implementation:
2439 */
2440
2441static void mask_lapic_irq(unsigned int irq)
1885{ 2442{
1886 unsigned long v; 2443 unsigned long v;
1887 2444
1888 v = apic_read(APIC_LVT0); 2445 v = apic_read(APIC_LVT0);
1889 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2446 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1890} 2447}
1891 2448
1892static void mask_lapic_irq(unsigned int irq) 2449static void unmask_lapic_irq(unsigned int irq)
1893{ 2450{
1894 unsigned long v; 2451 unsigned long v;
1895 2452
1896 v = apic_read(APIC_LVT0); 2453 v = apic_read(APIC_LVT0);
1897 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2454 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1898} 2455}
1899 2456
1900static void ack_lapic_irq (unsigned int irq) 2457static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
1911 2468
1912static void lapic_register_intr(int irq) 2469static void lapic_register_intr(int irq)
1913{ 2470{
1914 irq_desc[irq].status &= ~IRQ_LEVEL; 2471 struct irq_desc *desc;
2472
2473 desc = irq_to_desc(irq);
2474 desc->status &= ~IRQ_LEVEL;
1915 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2475 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1916 "edge"); 2476 "edge");
1917} 2477}
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
1919static void __init setup_nmi(void) 2479static void __init setup_nmi(void)
1920{ 2480{
1921 /* 2481 /*
1922 * Dirty trick to enable the NMI watchdog ... 2482 * Dirty trick to enable the NMI watchdog ...
1923 * We put the 8259A master into AEOI mode and 2483 * We put the 8259A master into AEOI mode and
1924 * unmask on all local APICs LVT0 as NMI. 2484 * unmask on all local APICs LVT0 as NMI.
1925 * 2485 *
1926 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2486 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
1927 * is from Maciej W. Rozycki - so we do not have to EOI from 2487 * is from Maciej W. Rozycki - so we do not have to EOI from
1928 * the NMI handler or the timer interrupt. 2488 * the NMI handler or the timer interrupt.
1929 */ 2489 */
1930 printk(KERN_INFO "activating NMI Watchdog ..."); 2490 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
1931 2491
1932 enable_NMI_through_LVT0(); 2492 enable_NMI_through_LVT0();
1933 2493
1934 printk(" done.\n"); 2494 apic_printk(APIC_VERBOSE, " done.\n");
1935} 2495}
1936 2496
1937/* 2497/*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
1948 unsigned char save_control, save_freq_select; 2508 unsigned char save_control, save_freq_select;
1949 2509
1950 pin = find_isa_irq_pin(8, mp_INT); 2510 pin = find_isa_irq_pin(8, mp_INT);
2511 if (pin == -1) {
2512 WARN_ON_ONCE(1);
2513 return;
2514 }
1951 apic = find_isa_irq_apic(8, mp_INT); 2515 apic = find_isa_irq_apic(8, mp_INT);
1952 if (pin == -1) 2516 if (apic == -1) {
2517 WARN_ON_ONCE(1);
1953 return; 2518 return;
2519 }
1954 2520
1955 entry0 = ioapic_read_entry(apic, pin); 2521 entry0 = ioapic_read_entry(apic, pin);
1956
1957 clear_IO_APIC_pin(apic, pin); 2522 clear_IO_APIC_pin(apic, pin);
1958 2523
1959 memset(&entry1, 0, sizeof(entry1)); 2524 memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
1988 ioapic_write_entry(apic, pin, entry0); 2553 ioapic_write_entry(apic, pin, entry0);
1989} 2554}
1990 2555
2556static int disable_timer_pin_1 __initdata;
2557/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2558static int __init disable_timer_pin_setup(char *arg)
2559{
2560 disable_timer_pin_1 = 1;
2561 return 0;
2562}
2563early_param("disable_timer_pin_1", disable_timer_pin_setup);
2564
2565int timer_through_8259 __initdata;
2566
1991/* 2567/*
1992 * This code may look a bit paranoid, but it's supposed to cooperate with 2568 * This code may look a bit paranoid, but it's supposed to cooperate with
1993 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 2569 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1994 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 2570 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1995 * fanatically on his truly buggy board. 2571 * fanatically on his truly buggy board.
1996 * 2572 *
1997 * FIXME: really need to revamp this for modern platforms only. 2573 * FIXME: really need to revamp this for all platforms.
1998 */ 2574 */
1999static inline void __init check_timer(void) 2575static inline void __init check_timer(void)
2000{ 2576{
2001 struct irq_cfg *cfg = irq_cfg + 0; 2577 struct irq_cfg *cfg = irq_cfg(0);
2002 int apic1, pin1, apic2, pin2; 2578 int apic1, pin1, apic2, pin2;
2003 unsigned long flags; 2579 unsigned long flags;
2580 unsigned int ver;
2004 int no_pin1 = 0; 2581 int no_pin1 = 0;
2005 2582
2006 local_irq_save(flags); 2583 local_irq_save(flags);
2007 2584
2585 ver = apic_read(APIC_LVR);
2586 ver = GET_APIC_VERSION(ver);
2587
2008 /* 2588 /*
2009 * get/set the timer IRQ vector: 2589 * get/set the timer IRQ vector:
2010 */ 2590 */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
2013 2593
2014 /* 2594 /*
2015 * As IRQ0 is to be enabled in the 8259A, the virtual 2595 * As IRQ0 is to be enabled in the 8259A, the virtual
2016 * wire has to be disabled in the local APIC. 2596 * wire has to be disabled in the local APIC. Also
2597 * timer interrupts need to be acknowledged manually in
2598 * the 8259A for the i82489DX when using the NMI
2599 * watchdog as that APIC treats NMIs as level-triggered.
2600 * The AEOI mode will finish them in the 8259A
2601 * automatically.
2017 */ 2602 */
2018 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2603 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2019 init_8259A(1); 2604 init_8259A(1);
2605#ifdef CONFIG_X86_32
2606 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2607#endif
2020 2608
2021 pin1 = find_isa_irq_pin(0, mp_INT); 2609 pin1 = find_isa_irq_pin(0, mp_INT);
2022 apic1 = find_isa_irq_apic(0, mp_INT); 2610 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
2035 * 8259A. 2623 * 8259A.
2036 */ 2624 */
2037 if (pin1 == -1) { 2625 if (pin1 == -1) {
2626#ifdef CONFIG_INTR_REMAP
2038 if (intr_remapping_enabled) 2627 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC"); 2628 panic("BIOS bug: timer not connected to IO-APIC");
2629#endif
2040 pin1 = pin2; 2630 pin1 = pin2;
2041 apic1 = apic2; 2631 apic1 = apic2;
2042 no_pin1 = 1; 2632 no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
2054 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2644 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2055 } 2645 }
2056 unmask_IO_APIC_irq(0); 2646 unmask_IO_APIC_irq(0);
2057 if (!no_timer_check && timer_irq_works()) { 2647 if (timer_irq_works()) {
2058 if (nmi_watchdog == NMI_IO_APIC) { 2648 if (nmi_watchdog == NMI_IO_APIC) {
2059 setup_nmi(); 2649 setup_nmi();
2060 enable_8259A_irq(0); 2650 enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
2063 clear_IO_APIC_pin(0, pin1); 2653 clear_IO_APIC_pin(0, pin1);
2064 goto out; 2654 goto out;
2065 } 2655 }
2656#ifdef CONFIG_INTR_REMAP
2066 if (intr_remapping_enabled) 2657 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2658 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2659#endif
2068 clear_IO_APIC_pin(apic1, pin1); 2660 clear_IO_APIC_pin(apic1, pin1);
2069 if (!no_pin1) 2661 if (!no_pin1)
2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2662 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
2104 "through the IO-APIC - disabling NMI Watchdog!\n"); 2696 "through the IO-APIC - disabling NMI Watchdog!\n");
2105 nmi_watchdog = NMI_NONE; 2697 nmi_watchdog = NMI_NONE;
2106 } 2698 }
2699#ifdef CONFIG_X86_32
2700 timer_ack = 0;
2701#endif
2107 2702
2108 apic_printk(APIC_QUIET, KERN_INFO 2703 apic_printk(APIC_QUIET, KERN_INFO
2109 "...trying to set up timer as Virtual Wire IRQ...\n"); 2704 "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
2140 local_irq_restore(flags); 2735 local_irq_restore(flags);
2141} 2736}
2142 2737
2143static int __init notimercheck(char *s)
2144{
2145 no_timer_check = 1;
2146 return 1;
2147}
2148__setup("no_timer_check", notimercheck);
2149
2150/* 2738/*
2151 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available 2739 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2152 * to devices. However there may be an I/O APIC pin available for 2740 * to devices. However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
2164 * the I/O APIC in all cases now. No actual device should request 2752 * the I/O APIC in all cases now. No actual device should request
2165 * it anyway. --macro 2753 * it anyway. --macro
2166 */ 2754 */
2167#define PIC_IRQS (1<<2) 2755#define PIC_IRQS (1 << PIC_CASCADE_IR)
2168 2756
2169void __init setup_IO_APIC(void) 2757void __init setup_IO_APIC(void)
2170{ 2758{
2171 2759
2760#ifdef CONFIG_X86_32
2761 enable_IO_APIC();
2762#else
2172 /* 2763 /*
2173 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 2764 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2174 */ 2765 */
2766#endif
2175 2767
2176 io_apic_irqs = ~PIC_IRQS; 2768 io_apic_irqs = ~PIC_IRQS;
2177 2769
2178 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 2770 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2179 2771 /*
2772 * Set up IO-APIC IRQ routing.
2773 */
2774#ifdef CONFIG_X86_32
2775 if (!acpi_ioapic)
2776 setup_ioapic_ids_from_mpc();
2777#endif
2180 sync_Arb_IDs(); 2778 sync_Arb_IDs();
2181 setup_IO_APIC_irqs(); 2779 setup_IO_APIC_irqs();
2182 init_IO_APIC_traps(); 2780 init_IO_APIC_traps();
2183 check_timer(); 2781 check_timer();
2184} 2782}
2185 2783
2784/*
2785 * Called after all the initialization is done. If we didnt find any
2786 * APIC bugs then we can allow the modify fast path
2787 */
2788
2789static int __init io_apic_bug_finalize(void)
2790{
2791 if (sis_apic_bug == -1)
2792 sis_apic_bug = 0;
2793 return 0;
2794}
2795
2796late_initcall(io_apic_bug_finalize);
2797
2186struct sysfs_ioapic_data { 2798struct sysfs_ioapic_data {
2187 struct sys_device dev; 2799 struct sys_device dev;
2188 struct IO_APIC_route_entry entry[0]; 2800 struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
2270/* 2882/*
2271 * Dynamic irq allocate and deallocation 2883 * Dynamic irq allocate and deallocation
2272 */ 2884 */
2273int create_irq(void) 2885unsigned int create_irq_nr(unsigned int irq_want)
2274{ 2886{
2275 /* Allocate an unused irq */ 2887 /* Allocate an unused irq */
2276 int irq; 2888 unsigned int irq;
2277 int new; 2889 unsigned int new;
2278 unsigned long flags; 2890 unsigned long flags;
2891 struct irq_cfg *cfg_new;
2892
2893 irq_want = nr_irqs - 1;
2279 2894
2280 irq = -ENOSPC; 2895 irq = 0;
2281 spin_lock_irqsave(&vector_lock, flags); 2896 spin_lock_irqsave(&vector_lock, flags);
2282 for (new = (NR_IRQS - 1); new >= 0; new--) { 2897 for (new = irq_want; new > 0; new--) {
2283 if (platform_legacy_irq(new)) 2898 if (platform_legacy_irq(new))
2284 continue; 2899 continue;
2285 if (irq_cfg[new].vector != 0) 2900 cfg_new = irq_cfg(new);
2901 if (cfg_new && cfg_new->vector != 0)
2286 continue; 2902 continue;
2903 /* check if need to create one */
2904 if (!cfg_new)
2905 cfg_new = irq_cfg_alloc(new);
2287 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 2906 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2288 irq = new; 2907 irq = new;
2289 break; 2908 break;
2290 } 2909 }
2291 spin_unlock_irqrestore(&vector_lock, flags); 2910 spin_unlock_irqrestore(&vector_lock, flags);
2292 2911
2293 if (irq >= 0) { 2912 if (irq > 0) {
2294 dynamic_irq_init(irq); 2913 dynamic_irq_init(irq);
2295 } 2914 }
2296 return irq; 2915 return irq;
2297} 2916}
2298 2917
2918int create_irq(void)
2919{
2920 int irq;
2921
2922 irq = create_irq_nr(nr_irqs - 1);
2923
2924 if (irq == 0)
2925 irq = -1;
2926
2927 return irq;
2928}
2929
2299void destroy_irq(unsigned int irq) 2930void destroy_irq(unsigned int irq)
2300{ 2931{
2301 unsigned long flags; 2932 unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
2316#ifdef CONFIG_PCI_MSI 2947#ifdef CONFIG_PCI_MSI
2317static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) 2948static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2318{ 2949{
2319 struct irq_cfg *cfg = irq_cfg + irq; 2950 struct irq_cfg *cfg;
2320 int err; 2951 int err;
2321 unsigned dest; 2952 unsigned dest;
2322 cpumask_t tmp; 2953 cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2326 if (err) 2957 if (err)
2327 return err; 2958 return err;
2328 2959
2960 cfg = irq_cfg(irq);
2329 cpus_and(tmp, cfg->domain, tmp); 2961 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp); 2962 dest = cpu_mask_to_apicid(tmp);
2331 2963
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2383#ifdef CONFIG_SMP 3015#ifdef CONFIG_SMP
2384static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3016static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2385{ 3017{
2386 struct irq_cfg *cfg = irq_cfg + irq; 3018 struct irq_cfg *cfg;
2387 struct msi_msg msg; 3019 struct msi_msg msg;
2388 unsigned int dest; 3020 unsigned int dest;
2389 cpumask_t tmp; 3021 cpumask_t tmp;
3022 struct irq_desc *desc;
2390 3023
2391 cpus_and(tmp, mask, cpu_online_map); 3024 cpus_and(tmp, mask, cpu_online_map);
2392 if (cpus_empty(tmp)) 3025 if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2395 if (assign_irq_vector(irq, mask)) 3028 if (assign_irq_vector(irq, mask))
2396 return; 3029 return;
2397 3030
3031 cfg = irq_cfg(irq);
2398 cpus_and(tmp, cfg->domain, mask); 3032 cpus_and(tmp, cfg->domain, mask);
2399 dest = cpu_mask_to_apicid(tmp); 3033 dest = cpu_mask_to_apicid(tmp);
2400 3034
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3040 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2407 3041
2408 write_msi_msg(irq, &msg); 3042 write_msi_msg(irq, &msg);
2409 irq_desc[irq].affinity = mask; 3043 desc = irq_to_desc(irq);
3044 desc->affinity = mask;
2410} 3045}
2411 3046
2412#ifdef CONFIG_INTR_REMAP 3047#ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2416 */ 3051 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3052static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{ 3053{
2419 struct irq_cfg *cfg = irq_cfg + irq; 3054 struct irq_cfg *cfg;
2420 unsigned int dest; 3055 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask; 3056 cpumask_t tmp, cleanup_mask;
2422 struct irte irte; 3057 struct irte irte;
3058 struct irq_desc *desc;
2423 3059
2424 cpus_and(tmp, mask, cpu_online_map); 3060 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp)) 3061 if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2431 if (assign_irq_vector(irq, mask)) 3067 if (assign_irq_vector(irq, mask))
2432 return; 3068 return;
2433 3069
3070 cfg = irq_cfg(irq);
2434 cpus_and(tmp, cfg->domain, mask); 3071 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp); 3072 dest = cpu_mask_to_apicid(tmp);
2436 3073
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2454 cfg->move_in_progress = 0; 3091 cfg->move_in_progress = 0;
2455 } 3092 }
2456 3093
2457 irq_desc[irq].affinity = mask; 3094 desc = irq_to_desc(irq);
3095 desc->affinity = mask;
2458} 3096}
2459#endif 3097#endif
2460#endif /* CONFIG_SMP */ 3098#endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2507 if (index < 0) { 3145 if (index < 0) {
2508 printk(KERN_ERR 3146 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec, 3147 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev)); 3148 pci_name(dev));
2511 return -ENOSPC; 3149 return -ENOSPC;
2512 } 3150 }
2513 return index; 3151 return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2528 3166
2529#ifdef CONFIG_INTR_REMAP 3167#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) { 3168 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq; 3169 struct irq_desc *desc = irq_to_desc(irq);
2532 /* 3170 /*
2533 * irq migration in process context 3171 * irq migration in process context
2534 */ 3172 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2538#endif 3176#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3177 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540 3178
3179 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3180
2541 return 0; 3181 return 0;
2542} 3182}
2543 3183
3184static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
3185{
3186 unsigned int irq;
3187
3188 irq = dev->bus->number;
3189 irq <<= 8;
3190 irq |= dev->devfn;
3191 irq <<= 12;
3192
3193 return irq;
3194}
3195
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 3196int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{ 3197{
2546 int irq, ret; 3198 unsigned int irq;
3199 int ret;
3200 unsigned int irq_want;
2547 3201
2548 irq = create_irq(); 3202 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2549 if (irq < 0) 3203
2550 return irq; 3204 irq = create_irq_nr(irq_want);
3205 if (irq == 0)
3206 return -1;
2551 3207
2552#ifdef CONFIG_INTR_REMAP 3208#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled) 3209 if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
2574 3230
2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3231int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{ 3232{
2577 int irq, ret, sub_handle; 3233 unsigned int irq;
3234 int ret, sub_handle;
2578 struct msi_desc *desc; 3235 struct msi_desc *desc;
3236 unsigned int irq_want;
3237
2579#ifdef CONFIG_INTR_REMAP 3238#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0; 3239 struct intel_iommu *iommu = 0;
2581 int index = 0; 3240 int index = 0;
2582#endif 3241#endif
2583 3242
3243 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2584 sub_handle = 0; 3244 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) { 3245 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq(); 3246 irq = create_irq_nr(irq_want--);
2587 if (irq < 0) 3247 if (irq == 0)
2588 return irq; 3248 return -1;
2589#ifdef CONFIG_INTR_REMAP 3249#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled) 3250 if (!intr_remapping_enabled)
2591 goto no_ir; 3251 goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
2636#ifdef CONFIG_SMP 3296#ifdef CONFIG_SMP
2637static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3297static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2638{ 3298{
2639 struct irq_cfg *cfg = irq_cfg + irq; 3299 struct irq_cfg *cfg;
2640 struct msi_msg msg; 3300 struct msi_msg msg;
2641 unsigned int dest; 3301 unsigned int dest;
2642 cpumask_t tmp; 3302 cpumask_t tmp;
3303 struct irq_desc *desc;
2643 3304
2644 cpus_and(tmp, mask, cpu_online_map); 3305 cpus_and(tmp, mask, cpu_online_map);
2645 if (cpus_empty(tmp)) 3306 if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2648 if (assign_irq_vector(irq, mask)) 3309 if (assign_irq_vector(irq, mask))
2649 return; 3310 return;
2650 3311
3312 cfg = irq_cfg(irq);
2651 cpus_and(tmp, cfg->domain, mask); 3313 cpus_and(tmp, cfg->domain, mask);
2652 dest = cpu_mask_to_apicid(tmp); 3314 dest = cpu_mask_to_apicid(tmp);
2653 3315
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2659 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3321 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2660 3322
2661 dmar_msi_write(irq, &msg); 3323 dmar_msi_write(irq, &msg);
2662 irq_desc[irq].affinity = mask; 3324 desc = irq_to_desc(irq);
3325 desc->affinity = mask;
2663} 3326}
2664#endif /* CONFIG_SMP */ 3327#endif /* CONFIG_SMP */
2665 3328
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
2689} 3352}
2690#endif 3353#endif
2691 3354
3355#ifdef CONFIG_HPET_TIMER
3356
3357#ifdef CONFIG_SMP
3358static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3359{
3360 struct irq_cfg *cfg;
3361 struct irq_desc *desc;
3362 struct msi_msg msg;
3363 unsigned int dest;
3364 cpumask_t tmp;
3365
3366 cpus_and(tmp, mask, cpu_online_map);
3367 if (cpus_empty(tmp))
3368 return;
3369
3370 if (assign_irq_vector(irq, mask))
3371 return;
3372
3373 cfg = irq_cfg(irq);
3374 cpus_and(tmp, cfg->domain, mask);
3375 dest = cpu_mask_to_apicid(tmp);
3376
3377 hpet_msi_read(irq, &msg);
3378
3379 msg.data &= ~MSI_DATA_VECTOR_MASK;
3380 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3381 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3382 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3383
3384 hpet_msi_write(irq, &msg);
3385 desc = irq_to_desc(irq);
3386 desc->affinity = mask;
3387}
3388#endif /* CONFIG_SMP */
3389
3390struct irq_chip hpet_msi_type = {
3391 .name = "HPET_MSI",
3392 .unmask = hpet_msi_unmask,
3393 .mask = hpet_msi_mask,
3394 .ack = ack_apic_edge,
3395#ifdef CONFIG_SMP
3396 .set_affinity = hpet_msi_set_affinity,
3397#endif
3398 .retrigger = ioapic_retrigger_irq,
3399};
3400
3401int arch_setup_hpet_msi(unsigned int irq)
3402{
3403 int ret;
3404 struct msi_msg msg;
3405
3406 ret = msi_compose_msg(NULL, irq, &msg);
3407 if (ret < 0)
3408 return ret;
3409
3410 hpet_msi_write(irq, &msg);
3411 set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
3412 "edge");
3413
3414 return 0;
3415}
3416#endif
3417
2692#endif /* CONFIG_PCI_MSI */ 3418#endif /* CONFIG_PCI_MSI */
2693/* 3419/*
2694 * Hypertransport interrupt support 3420 * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
2713 3439
2714static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3440static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2715{ 3441{
2716 struct irq_cfg *cfg = irq_cfg + irq; 3442 struct irq_cfg *cfg;
2717 unsigned int dest; 3443 unsigned int dest;
2718 cpumask_t tmp; 3444 cpumask_t tmp;
3445 struct irq_desc *desc;
2719 3446
2720 cpus_and(tmp, mask, cpu_online_map); 3447 cpus_and(tmp, mask, cpu_online_map);
2721 if (cpus_empty(tmp)) 3448 if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2724 if (assign_irq_vector(irq, mask)) 3451 if (assign_irq_vector(irq, mask))
2725 return; 3452 return;
2726 3453
3454 cfg = irq_cfg(irq);
2727 cpus_and(tmp, cfg->domain, mask); 3455 cpus_and(tmp, cfg->domain, mask);
2728 dest = cpu_mask_to_apicid(tmp); 3456 dest = cpu_mask_to_apicid(tmp);
2729 3457
2730 target_ht_irq(irq, dest, cfg->vector); 3458 target_ht_irq(irq, dest, cfg->vector);
2731 irq_desc[irq].affinity = mask; 3459 desc = irq_to_desc(irq);
3460 desc->affinity = mask;
2732} 3461}
2733#endif 3462#endif
2734 3463
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
2745 3474
2746int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3475int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2747{ 3476{
2748 struct irq_cfg *cfg = irq_cfg + irq; 3477 struct irq_cfg *cfg;
2749 int err; 3478 int err;
2750 cpumask_t tmp; 3479 cpumask_t tmp;
2751 3480
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2755 struct ht_irq_msg msg; 3484 struct ht_irq_msg msg;
2756 unsigned dest; 3485 unsigned dest;
2757 3486
3487 cfg = irq_cfg(irq);
2758 cpus_and(tmp, cfg->domain, tmp); 3488 cpus_and(tmp, cfg->domain, tmp);
2759 dest = cpu_mask_to_apicid(tmp); 3489 dest = cpu_mask_to_apicid(tmp);
2760 3490
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2777 3507
2778 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3508 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2779 handle_edge_irq, "edge"); 3509 handle_edge_irq, "edge");
3510
3511 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
2780 } 3512 }
2781 return err; 3513 return err;
2782} 3514}
2783#endif /* CONFIG_HT_IRQ */ 3515#endif /* CONFIG_HT_IRQ */
2784 3516
3517#ifdef CONFIG_X86_64
3518/*
3519 * Re-target the irq to the specified CPU and enable the specified MMR located
3520 * on the specified blade to allow the sending of MSIs to the specified CPU.
3521 */
3522int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3523 unsigned long mmr_offset)
3524{
3525 const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
3526 struct irq_cfg *cfg;
3527 int mmr_pnode;
3528 unsigned long mmr_value;
3529 struct uv_IO_APIC_route_entry *entry;
3530 unsigned long flags;
3531 int err;
3532
3533 err = assign_irq_vector(irq, *eligible_cpu);
3534 if (err != 0)
3535 return err;
3536
3537 spin_lock_irqsave(&vector_lock, flags);
3538 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
3539 irq_name);
3540 spin_unlock_irqrestore(&vector_lock, flags);
3541
3542 cfg = irq_cfg(irq);
3543
3544 mmr_value = 0;
3545 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3546 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3547
3548 entry->vector = cfg->vector;
3549 entry->delivery_mode = INT_DELIVERY_MODE;
3550 entry->dest_mode = INT_DEST_MODE;
3551 entry->polarity = 0;
3552 entry->trigger = 0;
3553 entry->mask = 0;
3554 entry->dest = cpu_mask_to_apicid(*eligible_cpu);
3555
3556 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3557 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3558
3559 return irq;
3560}
3561
3562/*
3563 * Disable the specified MMR located on the specified blade so that MSIs are
3564 * longer allowed to be sent.
3565 */
3566void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
3567{
3568 unsigned long mmr_value;
3569 struct uv_IO_APIC_route_entry *entry;
3570 int mmr_pnode;
3571
3572 mmr_value = 0;
3573 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3574 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3575
3576 entry->mask = 1;
3577
3578 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3579 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3580}
3581#endif /* CONFIG_X86_64 */
3582
3583int __init io_apic_get_redir_entries (int ioapic)
3584{
3585 union IO_APIC_reg_01 reg_01;
3586 unsigned long flags;
3587
3588 spin_lock_irqsave(&ioapic_lock, flags);
3589 reg_01.raw = io_apic_read(ioapic, 1);
3590 spin_unlock_irqrestore(&ioapic_lock, flags);
3591
3592 return reg_01.bits.entries;
3593}
3594
3595int __init probe_nr_irqs(void)
3596{
3597 int idx;
3598 int nr = 0;
3599#ifndef CONFIG_XEN
3600 int nr_min = 32;
3601#else
3602 int nr_min = NR_IRQS;
3603#endif
3604
3605 for (idx = 0; idx < nr_ioapics; idx++)
3606 nr += io_apic_get_redir_entries(idx) + 1;
3607
3608 /* double it for hotplug and msi and nmi */
3609 nr <<= 1;
3610
3611 /* something wrong ? */
3612 if (nr < nr_min)
3613 nr = nr_min;
3614
3615 return nr;
3616}
3617
2785/* -------------------------------------------------------------------------- 3618/* --------------------------------------------------------------------------
2786 ACPI-based IOAPIC Configuration 3619 ACPI-based IOAPIC Configuration
2787 -------------------------------------------------------------------------- */ 3620 -------------------------------------------------------------------------- */
2788 3621
2789#ifdef CONFIG_ACPI 3622#ifdef CONFIG_ACPI
2790 3623
2791#define IO_APIC_MAX_ID 0xFE 3624#ifdef CONFIG_X86_32
3625int __init io_apic_get_unique_id(int ioapic, int apic_id)
3626{
3627 union IO_APIC_reg_00 reg_00;
3628 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3629 physid_mask_t tmp;
3630 unsigned long flags;
3631 int i = 0;
2792 3632
2793int __init io_apic_get_redir_entries (int ioapic) 3633 /*
3634 * The P4 platform supports up to 256 APIC IDs on two separate APIC
3635 * buses (one for LAPICs, one for IOAPICs), where predecessors only
3636 * supports up to 16 on one shared APIC bus.
3637 *
3638 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3639 * advantage of new APIC bus architecture.
3640 */
3641
3642 if (physids_empty(apic_id_map))
3643 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
3644
3645 spin_lock_irqsave(&ioapic_lock, flags);
3646 reg_00.raw = io_apic_read(ioapic, 0);
3647 spin_unlock_irqrestore(&ioapic_lock, flags);
3648
3649 if (apic_id >= get_physical_broadcast()) {
3650 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3651 "%d\n", ioapic, apic_id, reg_00.bits.ID);
3652 apic_id = reg_00.bits.ID;
3653 }
3654
3655 /*
3656 * Every APIC in a system must have a unique ID or we get lots of nice
3657 * 'stuck on smp_invalidate_needed IPI wait' messages.
3658 */
3659 if (check_apicid_used(apic_id_map, apic_id)) {
3660
3661 for (i = 0; i < get_physical_broadcast(); i++) {
3662 if (!check_apicid_used(apic_id_map, i))
3663 break;
3664 }
3665
3666 if (i == get_physical_broadcast())
3667 panic("Max apic_id exceeded!\n");
3668
3669 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
3670 "trying %d\n", ioapic, apic_id, i);
3671
3672 apic_id = i;
3673 }
3674
3675 tmp = apicid_to_cpu_present(apic_id);
3676 physids_or(apic_id_map, apic_id_map, tmp);
3677
3678 if (reg_00.bits.ID != apic_id) {
3679 reg_00.bits.ID = apic_id;
3680
3681 spin_lock_irqsave(&ioapic_lock, flags);
3682 io_apic_write(ioapic, 0, reg_00.raw);
3683 reg_00.raw = io_apic_read(ioapic, 0);
3684 spin_unlock_irqrestore(&ioapic_lock, flags);
3685
3686 /* Sanity check */
3687 if (reg_00.bits.ID != apic_id) {
3688 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
3689 return -1;
3690 }
3691 }
3692
3693 apic_printk(APIC_VERBOSE, KERN_INFO
3694 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
3695
3696 return apic_id;
3697}
3698
3699int __init io_apic_get_version(int ioapic)
2794{ 3700{
2795 union IO_APIC_reg_01 reg_01; 3701 union IO_APIC_reg_01 reg_01;
2796 unsigned long flags; 3702 unsigned long flags;
@@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic)
2799 reg_01.raw = io_apic_read(ioapic, 1); 3705 reg_01.raw = io_apic_read(ioapic, 1);
2800 spin_unlock_irqrestore(&ioapic_lock, flags); 3706 spin_unlock_irqrestore(&ioapic_lock, flags);
2801 3707
2802 return reg_01.bits.entries; 3708 return reg_01.bits.version;
2803} 3709}
2804 3710#endif
2805 3711
2806int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3712int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
2807{ 3713{
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2853void __init setup_ioapic_dest(void) 3759void __init setup_ioapic_dest(void)
2854{ 3760{
2855 int pin, ioapic, irq, irq_entry; 3761 int pin, ioapic, irq, irq_entry;
3762 struct irq_cfg *cfg;
2856 3763
2857 if (skip_ioapic_setup == 1) 3764 if (skip_ioapic_setup == 1)
2858 return; 3765 return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
2868 * when you have too many devices, because at that time only boot 3775 * when you have too many devices, because at that time only boot
2869 * cpu is online. 3776 * cpu is online.
2870 */ 3777 */
2871 if (!irq_cfg[irq].vector) 3778 cfg = irq_cfg(irq);
3779 if (!cfg->vector)
2872 setup_IO_APIC_irq(ioapic, pin, irq, 3780 setup_IO_APIC_irq(ioapic, pin, irq,
2873 irq_trigger(irq_entry), 3781 irq_trigger(irq_entry),
2874 irq_polarity(irq_entry)); 3782 irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
2926 struct resource *ioapic_res; 3834 struct resource *ioapic_res;
2927 int i; 3835 int i;
2928 3836
3837 irq_2_pin_init();
2929 ioapic_res = ioapic_setup_resources(); 3838 ioapic_res = ioapic_setup_resources();
2930 for (i = 0; i < nr_ioapics; i++) { 3839 for (i = 0; i < nr_ioapics; i++) {
2931 if (smp_found_config) { 3840 if (smp_found_config) {
2932 ioapic_phys = mp_ioapics[i].mp_apicaddr; 3841 ioapic_phys = mp_ioapics[i].mp_apicaddr;
3842#ifdef CONFIG_X86_32
3843 if (!ioapic_phys) {
3844 printk(KERN_ERR
3845 "WARNING: bogus zero IO-APIC "
3846 "address found in MPTABLE, "
3847 "disabling IO/APIC support!\n");
3848 smp_found_config = 0;
3849 skip_ioapic_setup = 1;
3850 goto fake_ioapic_page;
3851 }
3852#endif
2933 } else { 3853 } else {
3854#ifdef CONFIG_X86_32
3855fake_ioapic_page:
3856#endif
2934 ioapic_phys = (unsigned long) 3857 ioapic_phys = (unsigned long)
2935 alloc_bootmem_pages(PAGE_SIZE); 3858 alloc_bootmem_pages(PAGE_SIZE);
2936 ioapic_phys = __pa(ioapic_phys); 3859 ioapic_phys = __pa(ioapic_phys);
2937 } 3860 }
2938 set_fixmap_nocache(idx, ioapic_phys); 3861 set_fixmap_nocache(idx, ioapic_phys);
2939 apic_printk(APIC_VERBOSE, 3862 apic_printk(APIC_VERBOSE,
2940 "mapped IOAPIC to %016lx (%016lx)\n", 3863 "mapped IOAPIC to %08lx (%08lx)\n",
2941 __fix_to_virt(idx), ioapic_phys); 3864 __fix_to_virt(idx), ioapic_phys);
2942 idx++; 3865 idx++;
2943 3866
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
2971/* Insert the IO APIC resources after PCI initialization has occured to handle 3894/* Insert the IO APIC resources after PCI initialization has occured to handle
2972 * IO APICS that are mapped in on a BAR in PCI space. */ 3895 * IO APICS that are mapped in on a BAR in PCI space. */
2973late_initcall(ioapic_insert_resources); 3896late_initcall(ioapic_insert_resources);
2974
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index e710289f673e..000000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,2908 +0,0 @@
1/*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
22
23#include <linux/mm.h>
24#include <linux/interrupt.h>
25#include <linux/init.h>
26#include <linux/delay.h>
27#include <linux/sched.h>
28#include <linux/bootmem.h>
29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
31#include <linux/acpi.h>
32#include <linux/module.h>
33#include <linux/sysdev.h>
34#include <linux/pci.h>
35#include <linux/msi.h>
36#include <linux/htirq.h>
37#include <linux/freezer.h>
38#include <linux/kthread.h>
39#include <linux/jiffies.h> /* time_after() */
40
41#include <asm/io.h>
42#include <asm/smp.h>
43#include <asm/desc.h>
44#include <asm/timer.h>
45#include <asm/i8259.h>
46#include <asm/nmi.h>
47#include <asm/msidef.h>
48#include <asm/hypertransport.h>
49#include <asm/setup.h>
50
51#include <mach_apic.h>
52#include <mach_apicdef.h>
53
54#define __apicdebuginit(type) static type __init
55
56int (*ioapic_renumber_irq)(int ioapic, int irq);
57atomic_t irq_mis_count;
58
59/* Where if anywhere is the i8259 connect in external int mode */
60static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
61
62static DEFINE_SPINLOCK(ioapic_lock);
63DEFINE_SPINLOCK(vector_lock);
64
65int timer_through_8259 __initdata;
66
67/*
68 * Is the SiS APIC rmw bug present ?
69 * -1 = don't know, 0 = no, 1 = yes
70 */
71int sis_apic_bug = -1;
72
73/*
74 * # of IRQ routing registers
75 */
76int nr_ioapic_registers[MAX_IO_APICS];
77
78/* I/O APIC entries */
79struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
80int nr_ioapics;
81
82/* MP IRQ source entries */
83struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
84
85/* # of MP IRQ source entries */
86int mp_irq_entries;
87
88#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
89int mp_bus_id_to_type[MAX_MP_BUSSES];
90#endif
91
92DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
93
94static int disable_timer_pin_1 __initdata;
95
96/*
97 * Rough estimation of how many shared IRQs there are, can
98 * be changed anytime.
99 */
100#define MAX_PLUS_SHARED_IRQS NR_IRQS
101#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
102
103/*
104 * This is performance-critical, we want to do it O(1)
105 *
106 * the indexing order of this array favors 1:1 mappings
107 * between pins and IRQs.
108 */
109
110static struct irq_pin_list {
111 int apic, pin, next;
112} irq_2_pin[PIN_MAP_SIZE];
113
114struct io_apic {
115 unsigned int index;
116 unsigned int unused[3];
117 unsigned int data;
118};
119
120static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
121{
122 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
123 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
124}
125
126static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
127{
128 struct io_apic __iomem *io_apic = io_apic_base(apic);
129 writel(reg, &io_apic->index);
130 return readl(&io_apic->data);
131}
132
133static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
134{
135 struct io_apic __iomem *io_apic = io_apic_base(apic);
136 writel(reg, &io_apic->index);
137 writel(value, &io_apic->data);
138}
139
140/*
141 * Re-write a value: to be used for read-modify-write
142 * cycles where the read already set up the index register.
143 *
144 * Older SiS APIC requires we rewrite the index register
145 */
146static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
147{
148 volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
149 if (sis_apic_bug)
150 writel(reg, &io_apic->index);
151 writel(value, &io_apic->data);
152}
153
154union entry_union {
155 struct { u32 w1, w2; };
156 struct IO_APIC_route_entry entry;
157};
158
159static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
160{
161 union entry_union eu;
162 unsigned long flags;
163 spin_lock_irqsave(&ioapic_lock, flags);
164 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
165 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
166 spin_unlock_irqrestore(&ioapic_lock, flags);
167 return eu.entry;
168}
169
170/*
171 * When we write a new IO APIC routing entry, we need to write the high
172 * word first! If the mask bit in the low word is clear, we will enable
173 * the interrupt, and we need to make sure the entry is fully populated
174 * before that happens.
175 */
176static void
177__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
178{
179 union entry_union eu;
180 eu.entry = e;
181 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
182 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
183}
184
185static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
186{
187 unsigned long flags;
188 spin_lock_irqsave(&ioapic_lock, flags);
189 __ioapic_write_entry(apic, pin, e);
190 spin_unlock_irqrestore(&ioapic_lock, flags);
191}
192
193/*
194 * When we mask an IO APIC routing entry, we need to write the low
195 * word first, in order to set the mask bit before we change the
196 * high bits!
197 */
198static void ioapic_mask_entry(int apic, int pin)
199{
200 unsigned long flags;
201 union entry_union eu = { .entry.mask = 1 };
202
203 spin_lock_irqsave(&ioapic_lock, flags);
204 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
205 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
206 spin_unlock_irqrestore(&ioapic_lock, flags);
207}
208
209/*
210 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
211 * shared ISA-space IRQs, so we have to support them. We are super
212 * fast in the common case, and fast for shared ISA-space IRQs.
213 */
214static void add_pin_to_irq(unsigned int irq, int apic, int pin)
215{
216 static int first_free_entry = NR_IRQS;
217 struct irq_pin_list *entry = irq_2_pin + irq;
218
219 while (entry->next)
220 entry = irq_2_pin + entry->next;
221
222 if (entry->pin != -1) {
223 entry->next = first_free_entry;
224 entry = irq_2_pin + entry->next;
225 if (++first_free_entry >= PIN_MAP_SIZE)
226 panic("io_apic.c: whoops");
227 }
228 entry->apic = apic;
229 entry->pin = pin;
230}
231
232/*
233 * Reroute an IRQ to a different pin.
234 */
235static void __init replace_pin_at_irq(unsigned int irq,
236 int oldapic, int oldpin,
237 int newapic, int newpin)
238{
239 struct irq_pin_list *entry = irq_2_pin + irq;
240
241 while (1) {
242 if (entry->apic == oldapic && entry->pin == oldpin) {
243 entry->apic = newapic;
244 entry->pin = newpin;
245 }
246 if (!entry->next)
247 break;
248 entry = irq_2_pin + entry->next;
249 }
250}
251
252static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
253{
254 struct irq_pin_list *entry = irq_2_pin + irq;
255 unsigned int pin, reg;
256
257 for (;;) {
258 pin = entry->pin;
259 if (pin == -1)
260 break;
261 reg = io_apic_read(entry->apic, 0x10 + pin*2);
262 reg &= ~disable;
263 reg |= enable;
264 io_apic_modify(entry->apic, 0x10 + pin*2, reg);
265 if (!entry->next)
266 break;
267 entry = irq_2_pin + entry->next;
268 }
269}
270
271/* mask = 1 */
272static void __mask_IO_APIC_irq(unsigned int irq)
273{
274 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
275}
276
277/* mask = 0 */
278static void __unmask_IO_APIC_irq(unsigned int irq)
279{
280 __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
281}
282
283/* mask = 1, trigger = 0 */
284static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
285{
286 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
287 IO_APIC_REDIR_LEVEL_TRIGGER);
288}
289
290/* mask = 0, trigger = 1 */
291static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
292{
293 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
294 IO_APIC_REDIR_MASKED);
295}
296
297static void mask_IO_APIC_irq(unsigned int irq)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&ioapic_lock, flags);
302 __mask_IO_APIC_irq(irq);
303 spin_unlock_irqrestore(&ioapic_lock, flags);
304}
305
306static void unmask_IO_APIC_irq(unsigned int irq)
307{
308 unsigned long flags;
309
310 spin_lock_irqsave(&ioapic_lock, flags);
311 __unmask_IO_APIC_irq(irq);
312 spin_unlock_irqrestore(&ioapic_lock, flags);
313}
314
315static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
316{
317 struct IO_APIC_route_entry entry;
318
319 /* Check delivery_mode to be sure we're not clearing an SMI pin */
320 entry = ioapic_read_entry(apic, pin);
321 if (entry.delivery_mode == dest_SMI)
322 return;
323
324 /*
325 * Disable it in the IO-APIC irq-routing table:
326 */
327 ioapic_mask_entry(apic, pin);
328}
329
330static void clear_IO_APIC(void)
331{
332 int apic, pin;
333
334 for (apic = 0; apic < nr_ioapics; apic++)
335 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
336 clear_IO_APIC_pin(apic, pin);
337}
338
339#ifdef CONFIG_SMP
340static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
341{
342 unsigned long flags;
343 int pin;
344 struct irq_pin_list *entry = irq_2_pin + irq;
345 unsigned int apicid_value;
346 cpumask_t tmp;
347
348 cpus_and(tmp, cpumask, cpu_online_map);
349 if (cpus_empty(tmp))
350 tmp = TARGET_CPUS;
351
352 cpus_and(cpumask, tmp, CPU_MASK_ALL);
353
354 apicid_value = cpu_mask_to_apicid(cpumask);
355 /* Prepare to do the io_apic_write */
356 apicid_value = apicid_value << 24;
357 spin_lock_irqsave(&ioapic_lock, flags);
358 for (;;) {
359 pin = entry->pin;
360 if (pin == -1)
361 break;
362 io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
363 if (!entry->next)
364 break;
365 entry = irq_2_pin + entry->next;
366 }
367 irq_desc[irq].affinity = cpumask;
368 spin_unlock_irqrestore(&ioapic_lock, flags);
369}
370
371#if defined(CONFIG_IRQBALANCE)
372# include <asm/processor.h> /* kernel_thread() */
373# include <linux/kernel_stat.h> /* kstat */
374# include <linux/slab.h> /* kmalloc() */
375# include <linux/timer.h>
376
377#define IRQBALANCE_CHECK_ARCH -999
378#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
379#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
380#define BALANCED_IRQ_MORE_DELTA (HZ/10)
381#define BALANCED_IRQ_LESS_DELTA (HZ)
382
383static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
384static int physical_balance __read_mostly;
385static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
386
387static struct irq_cpu_info {
388 unsigned long *last_irq;
389 unsigned long *irq_delta;
390 unsigned long irq;
391} irq_cpu_data[NR_CPUS];
392
393#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
394#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
395#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
396
397#define IDLE_ENOUGH(cpu,now) \
398 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
399
400#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
401
402#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
403
404static cpumask_t balance_irq_affinity[NR_IRQS] = {
405 [0 ... NR_IRQS-1] = CPU_MASK_ALL
406};
407
408void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
409{
410 balance_irq_affinity[irq] = mask;
411}
412
413static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
414 unsigned long now, int direction)
415{
416 int search_idle = 1;
417 int cpu = curr_cpu;
418
419 goto inside;
420
421 do {
422 if (unlikely(cpu == curr_cpu))
423 search_idle = 0;
424inside:
425 if (direction == 1) {
426 cpu++;
427 if (cpu >= NR_CPUS)
428 cpu = 0;
429 } else {
430 cpu--;
431 if (cpu == -1)
432 cpu = NR_CPUS-1;
433 }
434 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
435 (search_idle && !IDLE_ENOUGH(cpu, now)));
436
437 return cpu;
438}
439
440static inline void balance_irq(int cpu, int irq)
441{
442 unsigned long now = jiffies;
443 cpumask_t allowed_mask;
444 unsigned int new_cpu;
445
446 if (irqbalance_disabled)
447 return;
448
449 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
450 new_cpu = move(cpu, allowed_mask, now, 1);
451 if (cpu != new_cpu)
452 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
453}
454
455static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
456{
457 int i, j;
458
459 for_each_online_cpu(i) {
460 for (j = 0; j < NR_IRQS; j++) {
461 if (!irq_desc[j].action)
462 continue;
463 /* Is it a significant load ? */
464 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
465 useful_load_threshold)
466 continue;
467 balance_irq(i, j);
468 }
469 }
470 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
471 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
472 return;
473}
474
475static void do_irq_balance(void)
476{
477 int i, j;
478 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
479 unsigned long move_this_load = 0;
480 int max_loaded = 0, min_loaded = 0;
481 int load;
482 unsigned long useful_load_threshold = balanced_irq_interval + 10;
483 int selected_irq;
484 int tmp_loaded, first_attempt = 1;
485 unsigned long tmp_cpu_irq;
486 unsigned long imbalance = 0;
487 cpumask_t allowed_mask, target_cpu_mask, tmp;
488
489 for_each_possible_cpu(i) {
490 int package_index;
491 CPU_IRQ(i) = 0;
492 if (!cpu_online(i))
493 continue;
494 package_index = CPU_TO_PACKAGEINDEX(i);
495 for (j = 0; j < NR_IRQS; j++) {
496 unsigned long value_now, delta;
497 /* Is this an active IRQ or balancing disabled ? */
498 if (!irq_desc[j].action || irq_balancing_disabled(j))
499 continue;
500 if (package_index == i)
501 IRQ_DELTA(package_index, j) = 0;
502 /* Determine the total count per processor per IRQ */
503 value_now = (unsigned long) kstat_cpu(i).irqs[j];
504
505 /* Determine the activity per processor per IRQ */
506 delta = value_now - LAST_CPU_IRQ(i, j);
507
508 /* Update last_cpu_irq[][] for the next time */
509 LAST_CPU_IRQ(i, j) = value_now;
510
511 /* Ignore IRQs whose rate is less than the clock */
512 if (delta < useful_load_threshold)
513 continue;
514 /* update the load for the processor or package total */
515 IRQ_DELTA(package_index, j) += delta;
516
517 /* Keep track of the higher numbered sibling as well */
518 if (i != package_index)
519 CPU_IRQ(i) += delta;
520 /*
521 * We have sibling A and sibling B in the package
522 *
523 * cpu_irq[A] = load for cpu A + load for cpu B
524 * cpu_irq[B] = load for cpu B
525 */
526 CPU_IRQ(package_index) += delta;
527 }
528 }
529 /* Find the least loaded processor package */
530 for_each_online_cpu(i) {
531 if (i != CPU_TO_PACKAGEINDEX(i))
532 continue;
533 if (min_cpu_irq > CPU_IRQ(i)) {
534 min_cpu_irq = CPU_IRQ(i);
535 min_loaded = i;
536 }
537 }
538 max_cpu_irq = ULONG_MAX;
539
540tryanothercpu:
541 /*
542 * Look for heaviest loaded processor.
543 * We may come back to get the next heaviest loaded processor.
544 * Skip processors with trivial loads.
545 */
546 tmp_cpu_irq = 0;
547 tmp_loaded = -1;
548 for_each_online_cpu(i) {
549 if (i != CPU_TO_PACKAGEINDEX(i))
550 continue;
551 if (max_cpu_irq <= CPU_IRQ(i))
552 continue;
553 if (tmp_cpu_irq < CPU_IRQ(i)) {
554 tmp_cpu_irq = CPU_IRQ(i);
555 tmp_loaded = i;
556 }
557 }
558
559 if (tmp_loaded == -1) {
560 /*
561 * In the case of small number of heavy interrupt sources,
562 * loading some of the cpus too much. We use Ingo's original
563 * approach to rotate them around.
564 */
565 if (!first_attempt && imbalance >= useful_load_threshold) {
566 rotate_irqs_among_cpus(useful_load_threshold);
567 return;
568 }
569 goto not_worth_the_effort;
570 }
571
572 first_attempt = 0; /* heaviest search */
573 max_cpu_irq = tmp_cpu_irq; /* load */
574 max_loaded = tmp_loaded; /* processor */
575 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
576
577 /*
578 * if imbalance is less than approx 10% of max load, then
579 * observe diminishing returns action. - quit
580 */
581 if (imbalance < (max_cpu_irq >> 3))
582 goto not_worth_the_effort;
583
584tryanotherirq:
585 /* if we select an IRQ to move that can't go where we want, then
586 * see if there is another one to try.
587 */
588 move_this_load = 0;
589 selected_irq = -1;
590 for (j = 0; j < NR_IRQS; j++) {
591 /* Is this an active IRQ? */
592 if (!irq_desc[j].action)
593 continue;
594 if (imbalance <= IRQ_DELTA(max_loaded, j))
595 continue;
596 /* Try to find the IRQ that is closest to the imbalance
597 * without going over.
598 */
599 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
600 move_this_load = IRQ_DELTA(max_loaded, j);
601 selected_irq = j;
602 }
603 }
604 if (selected_irq == -1)
605 goto tryanothercpu;
606
607 imbalance = move_this_load;
608
609 /* For physical_balance case, we accumulated both load
610 * values in the one of the siblings cpu_irq[],
611 * to use the same code for physical and logical processors
612 * as much as possible.
613 *
614 * NOTE: the cpu_irq[] array holds the sum of the load for
615 * sibling A and sibling B in the slot for the lowest numbered
616 * sibling (A), _AND_ the load for sibling B in the slot for
617 * the higher numbered sibling.
618 *
619 * We seek the least loaded sibling by making the comparison
620 * (A+B)/2 vs B
621 */
622 load = CPU_IRQ(min_loaded) >> 1;
623 for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
624 if (load > CPU_IRQ(j)) {
625 /* This won't change cpu_sibling_map[min_loaded] */
626 load = CPU_IRQ(j);
627 min_loaded = j;
628 }
629 }
630
631 cpus_and(allowed_mask,
632 cpu_online_map,
633 balance_irq_affinity[selected_irq]);
634 target_cpu_mask = cpumask_of_cpu(min_loaded);
635 cpus_and(tmp, target_cpu_mask, allowed_mask);
636
637 if (!cpus_empty(tmp)) {
638 /* mark for change destination */
639 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
640
641 /* Since we made a change, come back sooner to
642 * check for more variation.
643 */
644 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
645 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
646 return;
647 }
648 goto tryanotherirq;
649
650not_worth_the_effort:
651 /*
652 * if we did not find an IRQ to move, then adjust the time interval
653 * upward
654 */
655 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
656 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
657 return;
658}
659
660static int balanced_irq(void *unused)
661{
662 int i;
663 unsigned long prev_balance_time = jiffies;
664 long time_remaining = balanced_irq_interval;
665
666 /* push everything to CPU 0 to give us a starting point. */
667 for (i = 0 ; i < NR_IRQS ; i++) {
668 irq_desc[i].pending_mask = cpumask_of_cpu(0);
669 set_pending_irq(i, cpumask_of_cpu(0));
670 }
671
672 set_freezable();
673 for ( ; ; ) {
674 time_remaining = schedule_timeout_interruptible(time_remaining);
675 try_to_freeze();
676 if (time_after(jiffies,
677 prev_balance_time+balanced_irq_interval)) {
678 preempt_disable();
679 do_irq_balance();
680 prev_balance_time = jiffies;
681 time_remaining = balanced_irq_interval;
682 preempt_enable();
683 }
684 }
685 return 0;
686}
687
688static int __init balanced_irq_init(void)
689{
690 int i;
691 struct cpuinfo_x86 *c;
692 cpumask_t tmp;
693
694 cpus_shift_right(tmp, cpu_online_map, 2);
695 c = &boot_cpu_data;
696 /* When not overwritten by the command line ask subarchitecture. */
697 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
698 irqbalance_disabled = NO_BALANCE_IRQ;
699 if (irqbalance_disabled)
700 return 0;
701
702 /* disable irqbalance completely if there is only one processor online */
703 if (num_online_cpus() < 2) {
704 irqbalance_disabled = 1;
705 return 0;
706 }
707 /*
708 * Enable physical balance only if more than 1 physical processor
709 * is present
710 */
711 if (smp_num_siblings > 1 && !cpus_empty(tmp))
712 physical_balance = 1;
713
714 for_each_online_cpu(i) {
715 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
716 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
717 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
718 printk(KERN_ERR "balanced_irq_init: out of memory");
719 goto failed;
720 }
721 }
722
723 printk(KERN_INFO "Starting balanced_irq\n");
724 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
725 return 0;
726 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
727failed:
728 for_each_possible_cpu(i) {
729 kfree(irq_cpu_data[i].irq_delta);
730 irq_cpu_data[i].irq_delta = NULL;
731 kfree(irq_cpu_data[i].last_irq);
732 irq_cpu_data[i].last_irq = NULL;
733 }
734 return 0;
735}
736
737int __devinit irqbalance_disable(char *str)
738{
739 irqbalance_disabled = 1;
740 return 1;
741}
742
743__setup("noirqbalance", irqbalance_disable);
744
745late_initcall(balanced_irq_init);
746#endif /* CONFIG_IRQBALANCE */
747#endif /* CONFIG_SMP */
748
749#ifndef CONFIG_SMP
750void send_IPI_self(int vector)
751{
752 unsigned int cfg;
753
754 /*
755 * Wait for idle.
756 */
757 apic_wait_icr_idle();
758 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
759 /*
760 * Send the IPI. The write to APIC_ICR fires this off.
761 */
762 apic_write(APIC_ICR, cfg);
763}
764#endif /* !CONFIG_SMP */
765
766
767/*
768 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
769 * specific CPU-side IRQs.
770 */
771
772#define MAX_PIRQS 8
773static int pirq_entries [MAX_PIRQS];
774static int pirqs_enabled;
775int skip_ioapic_setup;
776
777static int __init ioapic_pirq_setup(char *str)
778{
779 int i, max;
780 int ints[MAX_PIRQS+1];
781
782 get_options(str, ARRAY_SIZE(ints), ints);
783
784 for (i = 0; i < MAX_PIRQS; i++)
785 pirq_entries[i] = -1;
786
787 pirqs_enabled = 1;
788 apic_printk(APIC_VERBOSE, KERN_INFO
789 "PIRQ redirection, working around broken MP-BIOS.\n");
790 max = MAX_PIRQS;
791 if (ints[0] < MAX_PIRQS)
792 max = ints[0];
793
794 for (i = 0; i < max; i++) {
795 apic_printk(APIC_VERBOSE, KERN_DEBUG
796 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
797 /*
798 * PIRQs are mapped upside down, usually.
799 */
800 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
801 }
802 return 1;
803}
804
805__setup("pirq=", ioapic_pirq_setup);
806
807/*
808 * Find the IRQ entry number of a certain pin.
809 */
810static int find_irq_entry(int apic, int pin, int type)
811{
812 int i;
813
814 for (i = 0; i < mp_irq_entries; i++)
815 if (mp_irqs[i].mp_irqtype == type &&
816 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
817 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
818 mp_irqs[i].mp_dstirq == pin)
819 return i;
820
821 return -1;
822}
823
824/*
825 * Find the pin to which IRQ[irq] (ISA) is connected
826 */
827static int __init find_isa_irq_pin(int irq, int type)
828{
829 int i;
830
831 for (i = 0; i < mp_irq_entries; i++) {
832 int lbus = mp_irqs[i].mp_srcbus;
833
834 if (test_bit(lbus, mp_bus_not_pci) &&
835 (mp_irqs[i].mp_irqtype == type) &&
836 (mp_irqs[i].mp_srcbusirq == irq))
837
838 return mp_irqs[i].mp_dstirq;
839 }
840 return -1;
841}
842
843static int __init find_isa_irq_apic(int irq, int type)
844{
845 int i;
846
847 for (i = 0; i < mp_irq_entries; i++) {
848 int lbus = mp_irqs[i].mp_srcbus;
849
850 if (test_bit(lbus, mp_bus_not_pci) &&
851 (mp_irqs[i].mp_irqtype == type) &&
852 (mp_irqs[i].mp_srcbusirq == irq))
853 break;
854 }
855 if (i < mp_irq_entries) {
856 int apic;
857 for (apic = 0; apic < nr_ioapics; apic++) {
858 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
859 return apic;
860 }
861 }
862
863 return -1;
864}
865
866/*
867 * Find a specific PCI IRQ entry.
868 * Not an __init, possibly needed by modules
869 */
870static int pin_2_irq(int idx, int apic, int pin);
871
872int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
873{
874 int apic, i, best_guess = -1;
875
876 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
877 "slot:%d, pin:%d.\n", bus, slot, pin);
878 if (test_bit(bus, mp_bus_not_pci)) {
879 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
880 return -1;
881 }
882 for (i = 0; i < mp_irq_entries; i++) {
883 int lbus = mp_irqs[i].mp_srcbus;
884
885 for (apic = 0; apic < nr_ioapics; apic++)
886 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
887 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
888 break;
889
890 if (!test_bit(lbus, mp_bus_not_pci) &&
891 !mp_irqs[i].mp_irqtype &&
892 (bus == lbus) &&
893 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
894 int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
895
896 if (!(apic || IO_APIC_IRQ(irq)))
897 continue;
898
899 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
900 return irq;
901 /*
902 * Use the first all-but-pin matching entry as a
903 * best-guess fuzzy result for broken mptables.
904 */
905 if (best_guess < 0)
906 best_guess = irq;
907 }
908 }
909 return best_guess;
910}
911EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
912
913/*
914 * This function currently is only a helper for the i386 smp boot process where
915 * we need to reprogram the ioredtbls to cater for the cpus which have come online
916 * so mask in all cases should simply be TARGET_CPUS
917 */
918#ifdef CONFIG_SMP
919void __init setup_ioapic_dest(void)
920{
921 int pin, ioapic, irq, irq_entry;
922
923 if (skip_ioapic_setup == 1)
924 return;
925
926 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
927 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
928 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
929 if (irq_entry == -1)
930 continue;
931 irq = pin_2_irq(irq_entry, ioapic, pin);
932 set_ioapic_affinity_irq(irq, TARGET_CPUS);
933 }
934
935 }
936}
937#endif
938
939#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
940/*
941 * EISA Edge/Level control register, ELCR
942 */
943static int EISA_ELCR(unsigned int irq)
944{
945 if (irq < 16) {
946 unsigned int port = 0x4d0 + (irq >> 3);
947 return (inb(port) >> (irq & 7)) & 1;
948 }
949 apic_printk(APIC_VERBOSE, KERN_INFO
950 "Broken MPtable reports ISA irq %d\n", irq);
951 return 0;
952}
953#endif
954
955/* ISA interrupts are always polarity zero edge triggered,
956 * when listed as conforming in the MP table. */
957
958#define default_ISA_trigger(idx) (0)
959#define default_ISA_polarity(idx) (0)
960
961/* EISA interrupts are always polarity zero and can be edge or level
962 * trigger depending on the ELCR value. If an interrupt is listed as
963 * EISA conforming in the MP table, that means its trigger type must
964 * be read in from the ELCR */
965
966#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
967#define default_EISA_polarity(idx) default_ISA_polarity(idx)
968
969/* PCI interrupts are always polarity one level triggered,
970 * when listed as conforming in the MP table. */
971
972#define default_PCI_trigger(idx) (1)
973#define default_PCI_polarity(idx) (1)
974
975/* MCA interrupts are always polarity zero level triggered,
976 * when listed as conforming in the MP table. */
977
978#define default_MCA_trigger(idx) (1)
979#define default_MCA_polarity(idx) default_ISA_polarity(idx)
980
981static int MPBIOS_polarity(int idx)
982{
983 int bus = mp_irqs[idx].mp_srcbus;
984 int polarity;
985
986 /*
987 * Determine IRQ line polarity (high active or low active):
988 */
989 switch (mp_irqs[idx].mp_irqflag & 3) {
990 case 0: /* conforms, ie. bus-type dependent polarity */
991 {
992 polarity = test_bit(bus, mp_bus_not_pci)?
993 default_ISA_polarity(idx):
994 default_PCI_polarity(idx);
995 break;
996 }
997 case 1: /* high active */
998 {
999 polarity = 0;
1000 break;
1001 }
1002 case 2: /* reserved */
1003 {
1004 printk(KERN_WARNING "broken BIOS!!\n");
1005 polarity = 1;
1006 break;
1007 }
1008 case 3: /* low active */
1009 {
1010 polarity = 1;
1011 break;
1012 }
1013 default: /* invalid */
1014 {
1015 printk(KERN_WARNING "broken BIOS!!\n");
1016 polarity = 1;
1017 break;
1018 }
1019 }
1020 return polarity;
1021}
1022
1023static int MPBIOS_trigger(int idx)
1024{
1025 int bus = mp_irqs[idx].mp_srcbus;
1026 int trigger;
1027
1028 /*
1029 * Determine IRQ trigger mode (edge or level sensitive):
1030 */
1031 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
1032 case 0: /* conforms, ie. bus-type dependent */
1033 {
1034 trigger = test_bit(bus, mp_bus_not_pci)?
1035 default_ISA_trigger(idx):
1036 default_PCI_trigger(idx);
1037#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1038 switch (mp_bus_id_to_type[bus]) {
1039 case MP_BUS_ISA: /* ISA pin */
1040 {
1041 /* set before the switch */
1042 break;
1043 }
1044 case MP_BUS_EISA: /* EISA pin */
1045 {
1046 trigger = default_EISA_trigger(idx);
1047 break;
1048 }
1049 case MP_BUS_PCI: /* PCI pin */
1050 {
1051 /* set before the switch */
1052 break;
1053 }
1054 case MP_BUS_MCA: /* MCA pin */
1055 {
1056 trigger = default_MCA_trigger(idx);
1057 break;
1058 }
1059 default:
1060 {
1061 printk(KERN_WARNING "broken BIOS!!\n");
1062 trigger = 1;
1063 break;
1064 }
1065 }
1066#endif
1067 break;
1068 }
1069 case 1: /* edge */
1070 {
1071 trigger = 0;
1072 break;
1073 }
1074 case 2: /* reserved */
1075 {
1076 printk(KERN_WARNING "broken BIOS!!\n");
1077 trigger = 1;
1078 break;
1079 }
1080 case 3: /* level */
1081 {
1082 trigger = 1;
1083 break;
1084 }
1085 default: /* invalid */
1086 {
1087 printk(KERN_WARNING "broken BIOS!!\n");
1088 trigger = 0;
1089 break;
1090 }
1091 }
1092 return trigger;
1093}
1094
1095static inline int irq_polarity(int idx)
1096{
1097 return MPBIOS_polarity(idx);
1098}
1099
1100static inline int irq_trigger(int idx)
1101{
1102 return MPBIOS_trigger(idx);
1103}
1104
1105static int pin_2_irq(int idx, int apic, int pin)
1106{
1107 int irq, i;
1108 int bus = mp_irqs[idx].mp_srcbus;
1109
1110 /*
1111 * Debugging check, we are in big trouble if this message pops up!
1112 */
1113 if (mp_irqs[idx].mp_dstirq != pin)
1114 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1115
1116 if (test_bit(bus, mp_bus_not_pci))
1117 irq = mp_irqs[idx].mp_srcbusirq;
1118 else {
1119 /*
1120 * PCI IRQs are mapped in order
1121 */
1122 i = irq = 0;
1123 while (i < apic)
1124 irq += nr_ioapic_registers[i++];
1125 irq += pin;
1126
1127 /*
1128 * For MPS mode, so far only needed by ES7000 platform
1129 */
1130 if (ioapic_renumber_irq)
1131 irq = ioapic_renumber_irq(apic, irq);
1132 }
1133
1134 /*
1135 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1136 */
1137 if ((pin >= 16) && (pin <= 23)) {
1138 if (pirq_entries[pin-16] != -1) {
1139 if (!pirq_entries[pin-16]) {
1140 apic_printk(APIC_VERBOSE, KERN_DEBUG
1141 "disabling PIRQ%d\n", pin-16);
1142 } else {
1143 irq = pirq_entries[pin-16];
1144 apic_printk(APIC_VERBOSE, KERN_DEBUG
1145 "using PIRQ%d -> IRQ %d\n",
1146 pin-16, irq);
1147 }
1148 }
1149 }
1150 return irq;
1151}
1152
1153static inline int IO_APIC_irq_trigger(int irq)
1154{
1155 int apic, idx, pin;
1156
1157 for (apic = 0; apic < nr_ioapics; apic++) {
1158 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1159 idx = find_irq_entry(apic, pin, mp_INT);
1160 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1161 return irq_trigger(idx);
1162 }
1163 }
1164 /*
1165 * nonexistent IRQs are edge default
1166 */
1167 return 0;
1168}
1169
1170/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1171static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1172
1173static int __assign_irq_vector(int irq)
1174{
1175 static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
1176 int vector, offset;
1177
1178 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1179
1180 if (irq_vector[irq] > 0)
1181 return irq_vector[irq];
1182
1183 vector = current_vector;
1184 offset = current_offset;
1185next:
1186 vector += 8;
1187 if (vector >= first_system_vector) {
1188 offset = (offset + 1) % 8;
1189 vector = FIRST_DEVICE_VECTOR + offset;
1190 }
1191 if (vector == current_vector)
1192 return -ENOSPC;
1193 if (test_and_set_bit(vector, used_vectors))
1194 goto next;
1195
1196 current_vector = vector;
1197 current_offset = offset;
1198 irq_vector[irq] = vector;
1199
1200 return vector;
1201}
1202
1203static int assign_irq_vector(int irq)
1204{
1205 unsigned long flags;
1206 int vector;
1207
1208 spin_lock_irqsave(&vector_lock, flags);
1209 vector = __assign_irq_vector(irq);
1210 spin_unlock_irqrestore(&vector_lock, flags);
1211
1212 return vector;
1213}
1214
1215static struct irq_chip ioapic_chip;
1216
1217#define IOAPIC_AUTO -1
1218#define IOAPIC_EDGE 0
1219#define IOAPIC_LEVEL 1
1220
1221static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1222{
1223 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1224 trigger == IOAPIC_LEVEL) {
1225 irq_desc[irq].status |= IRQ_LEVEL;
1226 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1227 handle_fasteoi_irq, "fasteoi");
1228 } else {
1229 irq_desc[irq].status &= ~IRQ_LEVEL;
1230 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1231 handle_edge_irq, "edge");
1232 }
1233 set_intr_gate(vector, interrupt[irq]);
1234}
1235
1236static void __init setup_IO_APIC_irqs(void)
1237{
1238 struct IO_APIC_route_entry entry;
1239 int apic, pin, idx, irq, first_notcon = 1, vector;
1240
1241 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1242
1243 for (apic = 0; apic < nr_ioapics; apic++) {
1244 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1245
1246 /*
1247 * add it to the IO-APIC irq-routing table:
1248 */
1249 memset(&entry, 0, sizeof(entry));
1250
1251 entry.delivery_mode = INT_DELIVERY_MODE;
1252 entry.dest_mode = INT_DEST_MODE;
1253 entry.mask = 0; /* enable IRQ */
1254 entry.dest.logical.logical_dest =
1255 cpu_mask_to_apicid(TARGET_CPUS);
1256
1257 idx = find_irq_entry(apic, pin, mp_INT);
1258 if (idx == -1) {
1259 if (first_notcon) {
1260 apic_printk(APIC_VERBOSE, KERN_DEBUG
1261 " IO-APIC (apicid-pin) %d-%d",
1262 mp_ioapics[apic].mp_apicid,
1263 pin);
1264 first_notcon = 0;
1265 } else
1266 apic_printk(APIC_VERBOSE, ", %d-%d",
1267 mp_ioapics[apic].mp_apicid, pin);
1268 continue;
1269 }
1270
1271 if (!first_notcon) {
1272 apic_printk(APIC_VERBOSE, " not connected.\n");
1273 first_notcon = 1;
1274 }
1275
1276 entry.trigger = irq_trigger(idx);
1277 entry.polarity = irq_polarity(idx);
1278
1279 if (irq_trigger(idx)) {
1280 entry.trigger = 1;
1281 entry.mask = 1;
1282 }
1283
1284 irq = pin_2_irq(idx, apic, pin);
1285 /*
1286 * skip adding the timer int on secondary nodes, which causes
1287 * a small but painful rift in the time-space continuum
1288 */
1289 if (multi_timer_check(apic, irq))
1290 continue;
1291 else
1292 add_pin_to_irq(irq, apic, pin);
1293
1294 if (!apic && !IO_APIC_IRQ(irq))
1295 continue;
1296
1297 if (IO_APIC_IRQ(irq)) {
1298 vector = assign_irq_vector(irq);
1299 entry.vector = vector;
1300 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1301
1302 if (!apic && (irq < 16))
1303 disable_8259A_irq(irq);
1304 }
1305 ioapic_write_entry(apic, pin, entry);
1306 }
1307 }
1308
1309 if (!first_notcon)
1310 apic_printk(APIC_VERBOSE, " not connected.\n");
1311}
1312
1313/*
1314 * Set up the timer pin, possibly with the 8259A-master behind.
1315 */
1316static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1317 int vector)
1318{
1319 struct IO_APIC_route_entry entry;
1320
1321 memset(&entry, 0, sizeof(entry));
1322
1323 /*
1324 * We use logical delivery to get the timer IRQ
1325 * to the first CPU.
1326 */
1327 entry.dest_mode = INT_DEST_MODE;
1328 entry.mask = 1; /* mask IRQ now */
1329 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1330 entry.delivery_mode = INT_DELIVERY_MODE;
1331 entry.polarity = 0;
1332 entry.trigger = 0;
1333 entry.vector = vector;
1334
1335 /*
1336 * The timer IRQ doesn't have to know that behind the
1337 * scene we may have a 8259A-master in AEOI mode ...
1338 */
1339 ioapic_register_intr(0, vector, IOAPIC_EDGE);
1340
1341 /*
1342 * Add it to the IO-APIC irq-routing table:
1343 */
1344 ioapic_write_entry(apic, pin, entry);
1345}
1346
1347
1348__apicdebuginit(void) print_IO_APIC(void)
1349{
1350 int apic, i;
1351 union IO_APIC_reg_00 reg_00;
1352 union IO_APIC_reg_01 reg_01;
1353 union IO_APIC_reg_02 reg_02;
1354 union IO_APIC_reg_03 reg_03;
1355 unsigned long flags;
1356
1357 if (apic_verbosity == APIC_QUIET)
1358 return;
1359
1360 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1361 for (i = 0; i < nr_ioapics; i++)
1362 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1363 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1364
1365 /*
1366 * We are a bit conservative about what we expect. We have to
1367 * know about every hardware change ASAP.
1368 */
1369 printk(KERN_INFO "testing the IO APIC.......................\n");
1370
1371 for (apic = 0; apic < nr_ioapics; apic++) {
1372
1373 spin_lock_irqsave(&ioapic_lock, flags);
1374 reg_00.raw = io_apic_read(apic, 0);
1375 reg_01.raw = io_apic_read(apic, 1);
1376 if (reg_01.bits.version >= 0x10)
1377 reg_02.raw = io_apic_read(apic, 2);
1378 if (reg_01.bits.version >= 0x20)
1379 reg_03.raw = io_apic_read(apic, 3);
1380 spin_unlock_irqrestore(&ioapic_lock, flags);
1381
1382 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1383 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1384 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1385 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1386 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1387
1388 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
1389 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1390
1391 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1392 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1393
1394 /*
1395 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1396 * but the value of reg_02 is read as the previous read register
1397 * value, so ignore it if reg_02 == reg_01.
1398 */
1399 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1400 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1401 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1402 }
1403
1404 /*
1405 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1406 * or reg_03, but the value of reg_0[23] is read as the previous read
1407 * register value, so ignore it if reg_03 == reg_0[12].
1408 */
1409 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1410 reg_03.raw != reg_01.raw) {
1411 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1412 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1413 }
1414
1415 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1416
1417 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1418 " Stat Dest Deli Vect: \n");
1419
1420 for (i = 0; i <= reg_01.bits.entries; i++) {
1421 struct IO_APIC_route_entry entry;
1422
1423 entry = ioapic_read_entry(apic, i);
1424
1425 printk(KERN_DEBUG " %02x %03X %02X ",
1426 i,
1427 entry.dest.logical.logical_dest,
1428 entry.dest.physical.physical_dest
1429 );
1430
1431 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1432 entry.mask,
1433 entry.trigger,
1434 entry.irr,
1435 entry.polarity,
1436 entry.delivery_status,
1437 entry.dest_mode,
1438 entry.delivery_mode,
1439 entry.vector
1440 );
1441 }
1442 }
1443 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1444 for (i = 0; i < NR_IRQS; i++) {
1445 struct irq_pin_list *entry = irq_2_pin + i;
1446 if (entry->pin < 0)
1447 continue;
1448 printk(KERN_DEBUG "IRQ%d ", i);
1449 for (;;) {
1450 printk("-> %d:%d", entry->apic, entry->pin);
1451 if (!entry->next)
1452 break;
1453 entry = irq_2_pin + entry->next;
1454 }
1455 printk("\n");
1456 }
1457
1458 printk(KERN_INFO ".................................... done.\n");
1459
1460 return;
1461}
1462
1463__apicdebuginit(void) print_APIC_bitfield(int base)
1464{
1465 unsigned int v;
1466 int i, j;
1467
1468 if (apic_verbosity == APIC_QUIET)
1469 return;
1470
1471 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1472 for (i = 0; i < 8; i++) {
1473 v = apic_read(base + i*0x10);
1474 for (j = 0; j < 32; j++) {
1475 if (v & (1<<j))
1476 printk("1");
1477 else
1478 printk("0");
1479 }
1480 printk("\n");
1481 }
1482}
1483
1484__apicdebuginit(void) print_local_APIC(void *dummy)
1485{
1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1488
1489 if (apic_verbosity == APIC_QUIET)
1490 return;
1491
1492 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1493 smp_processor_id(), hard_smp_processor_id());
1494 v = apic_read(APIC_ID);
1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1496 GET_APIC_ID(v));
1497 v = apic_read(APIC_LVR);
1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1499 ver = GET_APIC_VERSION(v);
1500 maxlvt = lapic_get_maxlvt();
1501
1502 v = apic_read(APIC_TASKPRI);
1503 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1504
1505 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1506 v = apic_read(APIC_ARBPRI);
1507 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1508 v & APIC_ARBPRI_MASK);
1509 v = apic_read(APIC_PROCPRI);
1510 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1511 }
1512
1513 v = apic_read(APIC_EOI);
1514 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1515 v = apic_read(APIC_RRR);
1516 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1517 v = apic_read(APIC_LDR);
1518 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1519 v = apic_read(APIC_DFR);
1520 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1521 v = apic_read(APIC_SPIV);
1522 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1523
1524 printk(KERN_DEBUG "... APIC ISR field:\n");
1525 print_APIC_bitfield(APIC_ISR);
1526 printk(KERN_DEBUG "... APIC TMR field:\n");
1527 print_APIC_bitfield(APIC_TMR);
1528 printk(KERN_DEBUG "... APIC IRR field:\n");
1529 print_APIC_bitfield(APIC_IRR);
1530
1531 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1532 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1533 apic_write(APIC_ESR, 0);
1534 v = apic_read(APIC_ESR);
1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1536 }
1537
1538 icr = apic_icr_read();
1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1541
1542 v = apic_read(APIC_LVTT);
1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1544
1545 if (maxlvt > 3) { /* PC is LVT#4. */
1546 v = apic_read(APIC_LVTPC);
1547 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1548 }
1549 v = apic_read(APIC_LVT0);
1550 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1551 v = apic_read(APIC_LVT1);
1552 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1553
1554 if (maxlvt > 2) { /* ERR is LVT#3. */
1555 v = apic_read(APIC_LVTERR);
1556 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1557 }
1558
1559 v = apic_read(APIC_TMICT);
1560 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1561 v = apic_read(APIC_TMCCT);
1562 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1563 v = apic_read(APIC_TDCR);
1564 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1565 printk("\n");
1566}
1567
1568__apicdebuginit(void) print_all_local_APICs(void)
1569{
1570 on_each_cpu(print_local_APIC, NULL, 1);
1571}
1572
1573__apicdebuginit(void) print_PIC(void)
1574{
1575 unsigned int v;
1576 unsigned long flags;
1577
1578 if (apic_verbosity == APIC_QUIET)
1579 return;
1580
1581 printk(KERN_DEBUG "\nprinting PIC contents\n");
1582
1583 spin_lock_irqsave(&i8259A_lock, flags);
1584
1585 v = inb(0xa1) << 8 | inb(0x21);
1586 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1587
1588 v = inb(0xa0) << 8 | inb(0x20);
1589 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1590
1591 outb(0x0b, 0xa0);
1592 outb(0x0b, 0x20);
1593 v = inb(0xa0) << 8 | inb(0x20);
1594 outb(0x0a, 0xa0);
1595 outb(0x0a, 0x20);
1596
1597 spin_unlock_irqrestore(&i8259A_lock, flags);
1598
1599 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1600
1601 v = inb(0x4d1) << 8 | inb(0x4d0);
1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1603}
1604
1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1616
1617static void __init enable_IO_APIC(void)
1618{
1619 union IO_APIC_reg_01 reg_01;
1620 int i8259_apic, i8259_pin;
1621 int i, apic;
1622 unsigned long flags;
1623
1624 for (i = 0; i < PIN_MAP_SIZE; i++) {
1625 irq_2_pin[i].pin = -1;
1626 irq_2_pin[i].next = 0;
1627 }
1628 if (!pirqs_enabled)
1629 for (i = 0; i < MAX_PIRQS; i++)
1630 pirq_entries[i] = -1;
1631
1632 /*
1633 * The number of IO-APIC IRQ registers (== #pins):
1634 */
1635 for (apic = 0; apic < nr_ioapics; apic++) {
1636 spin_lock_irqsave(&ioapic_lock, flags);
1637 reg_01.raw = io_apic_read(apic, 1);
1638 spin_unlock_irqrestore(&ioapic_lock, flags);
1639 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1640 }
1641 for (apic = 0; apic < nr_ioapics; apic++) {
1642 int pin;
1643 /* See if any of the pins is in ExtINT mode */
1644 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1645 struct IO_APIC_route_entry entry;
1646 entry = ioapic_read_entry(apic, pin);
1647
1648
1649 /* If the interrupt line is enabled and in ExtInt mode
1650 * I have found the pin where the i8259 is connected.
1651 */
1652 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1653 ioapic_i8259.apic = apic;
1654 ioapic_i8259.pin = pin;
1655 goto found_i8259;
1656 }
1657 }
1658 }
1659 found_i8259:
1660 /* Look to see what if the MP table has reported the ExtINT */
1661 /* If we could not find the appropriate pin by looking at the ioapic
1662 * the i8259 probably is not connected the ioapic but give the
1663 * mptable a chance anyway.
1664 */
1665 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1666 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1667 /* Trust the MP table if nothing is setup in the hardware */
1668 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1669 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1670 ioapic_i8259.pin = i8259_pin;
1671 ioapic_i8259.apic = i8259_apic;
1672 }
1673 /* Complain if the MP table and the hardware disagree */
1674 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1675 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1676 {
1677 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1678 }
1679
1680 /*
1681 * Do not trust the IO-APIC being empty at bootup
1682 */
1683 clear_IO_APIC();
1684}
1685
1686/*
1687 * Not an __init, needed by the reboot code
1688 */
1689void disable_IO_APIC(void)
1690{
1691 /*
1692 * Clear the IO-APIC before rebooting:
1693 */
1694 clear_IO_APIC();
1695
1696 /*
1697 * If the i8259 is routed through an IOAPIC
1698 * Put that IOAPIC in virtual wire mode
1699 * so legacy interrupts can be delivered.
1700 */
1701 if (ioapic_i8259.pin != -1) {
1702 struct IO_APIC_route_entry entry;
1703
1704 memset(&entry, 0, sizeof(entry));
1705 entry.mask = 0; /* Enabled */
1706 entry.trigger = 0; /* Edge */
1707 entry.irr = 0;
1708 entry.polarity = 0; /* High */
1709 entry.delivery_status = 0;
1710 entry.dest_mode = 0; /* Physical */
1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1712 entry.vector = 0;
1713 entry.dest.physical.physical_dest = read_apic_id();
1714
1715 /*
1716 * Add it to the IO-APIC irq-routing table:
1717 */
1718 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1719 }
1720 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1721}
1722
1723/*
1724 * function to set the IO-APIC physical IDs based on the
1725 * values stored in the MPC table.
1726 *
1727 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1728 */
1729
1730static void __init setup_ioapic_ids_from_mpc(void)
1731{
1732 union IO_APIC_reg_00 reg_00;
1733 physid_mask_t phys_id_present_map;
1734 int apic;
1735 int i;
1736 unsigned char old_id;
1737 unsigned long flags;
1738
1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1740 return;
1741
1742 /*
1743 * Don't check I/O APIC IDs for xAPIC systems. They have
1744 * no meaning without the serial APIC bus.
1745 */
1746 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1747 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1748 return;
1749 /*
1750 * This is broken; anything with a real cpu count has to
1751 * circumvent this idiocy regardless.
1752 */
1753 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1754
1755 /*
1756 * Set the IOAPIC ID to the value stored in the MPC table.
1757 */
1758 for (apic = 0; apic < nr_ioapics; apic++) {
1759
1760 /* Read the register 0 value */
1761 spin_lock_irqsave(&ioapic_lock, flags);
1762 reg_00.raw = io_apic_read(apic, 0);
1763 spin_unlock_irqrestore(&ioapic_lock, flags);
1764
1765 old_id = mp_ioapics[apic].mp_apicid;
1766
1767 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1768 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1769 apic, mp_ioapics[apic].mp_apicid);
1770 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1771 reg_00.bits.ID);
1772 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1773 }
1774
1775 /*
1776 * Sanity check, is the ID really free? Every APIC in a
1777 * system must have a unique ID or we get lots of nice
1778 * 'stuck on smp_invalidate_needed IPI wait' messages.
1779 */
1780 if (check_apicid_used(phys_id_present_map,
1781 mp_ioapics[apic].mp_apicid)) {
1782 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1783 apic, mp_ioapics[apic].mp_apicid);
1784 for (i = 0; i < get_physical_broadcast(); i++)
1785 if (!physid_isset(i, phys_id_present_map))
1786 break;
1787 if (i >= get_physical_broadcast())
1788 panic("Max APIC ID exceeded!\n");
1789 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1790 i);
1791 physid_set(i, phys_id_present_map);
1792 mp_ioapics[apic].mp_apicid = i;
1793 } else {
1794 physid_mask_t tmp;
1795 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1796 apic_printk(APIC_VERBOSE, "Setting %d in the "
1797 "phys_id_present_map\n",
1798 mp_ioapics[apic].mp_apicid);
1799 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1800 }
1801
1802
1803 /*
1804 * We need to adjust the IRQ routing table
1805 * if the ID changed.
1806 */
1807 if (old_id != mp_ioapics[apic].mp_apicid)
1808 for (i = 0; i < mp_irq_entries; i++)
1809 if (mp_irqs[i].mp_dstapic == old_id)
1810 mp_irqs[i].mp_dstapic
1811 = mp_ioapics[apic].mp_apicid;
1812
1813 /*
1814 * Read the right value from the MPC table and
1815 * write it into the ID register.
1816 */
1817 apic_printk(APIC_VERBOSE, KERN_INFO
1818 "...changing IO-APIC physical APIC ID to %d ...",
1819 mp_ioapics[apic].mp_apicid);
1820
1821 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1822 spin_lock_irqsave(&ioapic_lock, flags);
1823 io_apic_write(apic, 0, reg_00.raw);
1824 spin_unlock_irqrestore(&ioapic_lock, flags);
1825
1826 /*
1827 * Sanity check
1828 */
1829 spin_lock_irqsave(&ioapic_lock, flags);
1830 reg_00.raw = io_apic_read(apic, 0);
1831 spin_unlock_irqrestore(&ioapic_lock, flags);
1832 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1833 printk("could not set ID!\n");
1834 else
1835 apic_printk(APIC_VERBOSE, " ok.\n");
1836 }
1837}
1838
1839int no_timer_check __initdata;
1840
1841static int __init notimercheck(char *s)
1842{
1843 no_timer_check = 1;
1844 return 1;
1845}
1846__setup("no_timer_check", notimercheck);
1847
1848/*
1849 * There is a nasty bug in some older SMP boards, their mptable lies
1850 * about the timer IRQ. We do the following to work around the situation:
1851 *
1852 * - timer IRQ defaults to IO-APIC IRQ
1853 * - if this function detects that timer IRQs are defunct, then we fall
1854 * back to ISA timer IRQs
1855 */
1856static int __init timer_irq_works(void)
1857{
1858 unsigned long t1 = jiffies;
1859 unsigned long flags;
1860
1861 if (no_timer_check)
1862 return 1;
1863
1864 local_save_flags(flags);
1865 local_irq_enable();
1866 /* Let ten ticks pass... */
1867 mdelay((10 * 1000) / HZ);
1868 local_irq_restore(flags);
1869
1870 /*
1871 * Expect a few ticks at least, to be sure some possible
1872 * glue logic does not lock up after one or two first
1873 * ticks in a non-ExtINT mode. Also the local APIC
1874 * might have cached one ExtINT interrupt. Finally, at
1875 * least one tick may be lost due to delays.
1876 */
1877 if (time_after(jiffies, t1 + 4))
1878 return 1;
1879
1880 return 0;
1881}
1882
1883/*
1884 * In the SMP+IOAPIC case it might happen that there are an unspecified
1885 * number of pending IRQ events unhandled. These cases are very rare,
1886 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1887 * better to do it this way as thus we do not have to be aware of
1888 * 'pending' interrupts in the IRQ path, except at this point.
1889 */
1890/*
1891 * Edge triggered needs to resend any interrupt
1892 * that was delayed but this is now handled in the device
1893 * independent code.
1894 */
1895
1896/*
1897 * Startup quirk:
1898 *
1899 * Starting up a edge-triggered IO-APIC interrupt is
1900 * nasty - we need to make sure that we get the edge.
1901 * If it is already asserted for some reason, we need
1902 * return 1 to indicate that is was pending.
1903 *
1904 * This is not complete - we should be able to fake
1905 * an edge even if it isn't on the 8259A...
1906 *
1907 * (We do this for level-triggered IRQs too - it cannot hurt.)
1908 */
1909static unsigned int startup_ioapic_irq(unsigned int irq)
1910{
1911 int was_pending = 0;
1912 unsigned long flags;
1913
1914 spin_lock_irqsave(&ioapic_lock, flags);
1915 if (irq < 16) {
1916 disable_8259A_irq(irq);
1917 if (i8259A_irq_pending(irq))
1918 was_pending = 1;
1919 }
1920 __unmask_IO_APIC_irq(irq);
1921 spin_unlock_irqrestore(&ioapic_lock, flags);
1922
1923 return was_pending;
1924}
1925
1926static void ack_ioapic_irq(unsigned int irq)
1927{
1928 move_native_irq(irq);
1929 ack_APIC_irq();
1930}
1931
1932static void ack_ioapic_quirk_irq(unsigned int irq)
1933{
1934 unsigned long v;
1935 int i;
1936
1937 move_native_irq(irq);
1938/*
1939 * It appears there is an erratum which affects at least version 0x11
1940 * of I/O APIC (that's the 82093AA and cores integrated into various
1941 * chipsets). Under certain conditions a level-triggered interrupt is
1942 * erroneously delivered as edge-triggered one but the respective IRR
1943 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1944 * message but it will never arrive and further interrupts are blocked
1945 * from the source. The exact reason is so far unknown, but the
1946 * phenomenon was observed when two consecutive interrupt requests
1947 * from a given source get delivered to the same CPU and the source is
1948 * temporarily disabled in between.
1949 *
1950 * A workaround is to simulate an EOI message manually. We achieve it
1951 * by setting the trigger mode to edge and then to level when the edge
1952 * trigger mode gets detected in the TMR of a local APIC for a
1953 * level-triggered interrupt. We mask the source for the time of the
1954 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1955 * The idea is from Manfred Spraul. --macro
1956 */
1957 i = irq_vector[irq];
1958
1959 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1960
1961 ack_APIC_irq();
1962
1963 if (!(v & (1 << (i & 0x1f)))) {
1964 atomic_inc(&irq_mis_count);
1965 spin_lock(&ioapic_lock);
1966 __mask_and_edge_IO_APIC_irq(irq);
1967 __unmask_and_level_IO_APIC_irq(irq);
1968 spin_unlock(&ioapic_lock);
1969 }
1970}
1971
1972static int ioapic_retrigger_irq(unsigned int irq)
1973{
1974 send_IPI_self(irq_vector[irq]);
1975
1976 return 1;
1977}
1978
1979static struct irq_chip ioapic_chip __read_mostly = {
1980 .name = "IO-APIC",
1981 .startup = startup_ioapic_irq,
1982 .mask = mask_IO_APIC_irq,
1983 .unmask = unmask_IO_APIC_irq,
1984 .ack = ack_ioapic_irq,
1985 .eoi = ack_ioapic_quirk_irq,
1986#ifdef CONFIG_SMP
1987 .set_affinity = set_ioapic_affinity_irq,
1988#endif
1989 .retrigger = ioapic_retrigger_irq,
1990};
1991
1992
1993static inline void init_IO_APIC_traps(void)
1994{
1995 int irq;
1996
1997 /*
1998 * NOTE! The local APIC isn't very good at handling
1999 * multiple interrupts at the same interrupt level.
2000 * As the interrupt level is determined by taking the
2001 * vector number and shifting that right by 4, we
2002 * want to spread these out a bit so that they don't
2003 * all fall in the same interrupt level.
2004 *
2005 * Also, we've got to be careful not to trash gate
2006 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2007 */
2008 for (irq = 0; irq < NR_IRQS ; irq++) {
2009 if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2010 /*
2011 * Hmm.. We don't have an entry for this,
2012 * so default to an old-fashioned 8259
2013 * interrupt if we can..
2014 */
2015 if (irq < 16)
2016 make_8259A_irq(irq);
2017 else
2018 /* Strange. Oh, well.. */
2019 irq_desc[irq].chip = &no_irq_chip;
2020 }
2021 }
2022}
2023
2024/*
2025 * The local APIC irq-chip implementation:
2026 */
2027
2028static void ack_lapic_irq(unsigned int irq)
2029{
2030 ack_APIC_irq();
2031}
2032
2033static void mask_lapic_irq(unsigned int irq)
2034{
2035 unsigned long v;
2036
2037 v = apic_read(APIC_LVT0);
2038 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2039}
2040
2041static void unmask_lapic_irq(unsigned int irq)
2042{
2043 unsigned long v;
2044
2045 v = apic_read(APIC_LVT0);
2046 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2047}
2048
2049static struct irq_chip lapic_chip __read_mostly = {
2050 .name = "local-APIC",
2051 .mask = mask_lapic_irq,
2052 .unmask = unmask_lapic_irq,
2053 .ack = ack_lapic_irq,
2054};
2055
2056static void lapic_register_intr(int irq, int vector)
2057{
2058 irq_desc[irq].status &= ~IRQ_LEVEL;
2059 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2060 "edge");
2061 set_intr_gate(vector, interrupt[irq]);
2062}
2063
2064static void __init setup_nmi(void)
2065{
2066 /*
2067 * Dirty trick to enable the NMI watchdog ...
2068 * We put the 8259A master into AEOI mode and
2069 * unmask on all local APICs LVT0 as NMI.
2070 *
2071 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2072 * is from Maciej W. Rozycki - so we do not have to EOI from
2073 * the NMI handler or the timer interrupt.
2074 */
2075 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2076
2077 enable_NMI_through_LVT0();
2078
2079 apic_printk(APIC_VERBOSE, " done.\n");
2080}
2081
2082/*
2083 * This looks a bit hackish but it's about the only one way of sending
2084 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2085 * not support the ExtINT mode, unfortunately. We need to send these
2086 * cycles as some i82489DX-based boards have glue logic that keeps the
2087 * 8259A interrupt line asserted until INTA. --macro
2088 */
2089static inline void __init unlock_ExtINT_logic(void)
2090{
2091 int apic, pin, i;
2092 struct IO_APIC_route_entry entry0, entry1;
2093 unsigned char save_control, save_freq_select;
2094
2095 pin = find_isa_irq_pin(8, mp_INT);
2096 if (pin == -1) {
2097 WARN_ON_ONCE(1);
2098 return;
2099 }
2100 apic = find_isa_irq_apic(8, mp_INT);
2101 if (apic == -1) {
2102 WARN_ON_ONCE(1);
2103 return;
2104 }
2105
2106 entry0 = ioapic_read_entry(apic, pin);
2107 clear_IO_APIC_pin(apic, pin);
2108
2109 memset(&entry1, 0, sizeof(entry1));
2110
2111 entry1.dest_mode = 0; /* physical delivery */
2112 entry1.mask = 0; /* unmask IRQ now */
2113 entry1.dest.physical.physical_dest = hard_smp_processor_id();
2114 entry1.delivery_mode = dest_ExtINT;
2115 entry1.polarity = entry0.polarity;
2116 entry1.trigger = 0;
2117 entry1.vector = 0;
2118
2119 ioapic_write_entry(apic, pin, entry1);
2120
2121 save_control = CMOS_READ(RTC_CONTROL);
2122 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2123 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2124 RTC_FREQ_SELECT);
2125 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2126
2127 i = 100;
2128 while (i-- > 0) {
2129 mdelay(10);
2130 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2131 i -= 10;
2132 }
2133
2134 CMOS_WRITE(save_control, RTC_CONTROL);
2135 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2136 clear_IO_APIC_pin(apic, pin);
2137
2138 ioapic_write_entry(apic, pin, entry0);
2139}
2140
2141/*
2142 * This code may look a bit paranoid, but it's supposed to cooperate with
2143 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2144 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2145 * fanatically on his truly buggy board.
2146 */
2147static inline void __init check_timer(void)
2148{
2149 int apic1, pin1, apic2, pin2;
2150 int no_pin1 = 0;
2151 int vector;
2152 unsigned int ver;
2153 unsigned long flags;
2154
2155 local_irq_save(flags);
2156
2157 ver = apic_read(APIC_LVR);
2158 ver = GET_APIC_VERSION(ver);
2159
2160 /*
2161 * get/set the timer IRQ vector:
2162 */
2163 disable_8259A_irq(0);
2164 vector = assign_irq_vector(0);
2165 set_intr_gate(vector, interrupt[0]);
2166
2167 /*
2168 * As IRQ0 is to be enabled in the 8259A, the virtual
2169 * wire has to be disabled in the local APIC. Also
2170 * timer interrupts need to be acknowledged manually in
2171 * the 8259A for the i82489DX when using the NMI
2172 * watchdog as that APIC treats NMIs as level-triggered.
2173 * The AEOI mode will finish them in the 8259A
2174 * automatically.
2175 */
2176 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2177 init_8259A(1);
2178 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2179
2180 pin1 = find_isa_irq_pin(0, mp_INT);
2181 apic1 = find_isa_irq_apic(0, mp_INT);
2182 pin2 = ioapic_i8259.pin;
2183 apic2 = ioapic_i8259.apic;
2184
2185 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2186 "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2187 vector, apic1, pin1, apic2, pin2);
2188
2189 /*
2190 * Some BIOS writers are clueless and report the ExtINTA
2191 * I/O APIC input from the cascaded 8259A as the timer
2192 * interrupt input. So just in case, if only one pin
2193 * was found above, try it both directly and through the
2194 * 8259A.
2195 */
2196 if (pin1 == -1) {
2197 pin1 = pin2;
2198 apic1 = apic2;
2199 no_pin1 = 1;
2200 } else if (pin2 == -1) {
2201 pin2 = pin1;
2202 apic2 = apic1;
2203 }
2204
2205 if (pin1 != -1) {
2206 /*
2207 * Ok, does IRQ0 through the IOAPIC work?
2208 */
2209 if (no_pin1) {
2210 add_pin_to_irq(0, apic1, pin1);
2211 setup_timer_IRQ0_pin(apic1, pin1, vector);
2212 }
2213 unmask_IO_APIC_irq(0);
2214 if (timer_irq_works()) {
2215 if (nmi_watchdog == NMI_IO_APIC) {
2216 setup_nmi();
2217 enable_8259A_irq(0);
2218 }
2219 if (disable_timer_pin_1 > 0)
2220 clear_IO_APIC_pin(0, pin1);
2221 goto out;
2222 }
2223 clear_IO_APIC_pin(apic1, pin1);
2224 if (!no_pin1)
2225 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2226 "8254 timer not connected to IO-APIC\n");
2227
2228 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2229 "(IRQ0) through the 8259A ...\n");
2230 apic_printk(APIC_QUIET, KERN_INFO
2231 "..... (found apic %d pin %d) ...\n", apic2, pin2);
2232 /*
2233 * legacy devices should be connected to IO APIC #0
2234 */
2235 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2236 setup_timer_IRQ0_pin(apic2, pin2, vector);
2237 unmask_IO_APIC_irq(0);
2238 enable_8259A_irq(0);
2239 if (timer_irq_works()) {
2240 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2241 timer_through_8259 = 1;
2242 if (nmi_watchdog == NMI_IO_APIC) {
2243 disable_8259A_irq(0);
2244 setup_nmi();
2245 enable_8259A_irq(0);
2246 }
2247 goto out;
2248 }
2249 /*
2250 * Cleanup, just in case ...
2251 */
2252 disable_8259A_irq(0);
2253 clear_IO_APIC_pin(apic2, pin2);
2254 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2255 }
2256
2257 if (nmi_watchdog == NMI_IO_APIC) {
2258 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2259 "through the IO-APIC - disabling NMI Watchdog!\n");
2260 nmi_watchdog = NMI_NONE;
2261 }
2262 timer_ack = 0;
2263
2264 apic_printk(APIC_QUIET, KERN_INFO
2265 "...trying to set up timer as Virtual Wire IRQ...\n");
2266
2267 lapic_register_intr(0, vector);
2268 apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2269 enable_8259A_irq(0);
2270
2271 if (timer_irq_works()) {
2272 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2273 goto out;
2274 }
2275 disable_8259A_irq(0);
2276 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2277 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2278
2279 apic_printk(APIC_QUIET, KERN_INFO
2280 "...trying to set up timer as ExtINT IRQ...\n");
2281
2282 init_8259A(0);
2283 make_8259A_irq(0);
2284 apic_write(APIC_LVT0, APIC_DM_EXTINT);
2285
2286 unlock_ExtINT_logic();
2287
2288 if (timer_irq_works()) {
2289 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2290 goto out;
2291 }
2292 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2293 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2294 "report. Then try booting with the 'noapic' option.\n");
2295out:
2296 local_irq_restore(flags);
2297}
2298
2299/*
2300 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2301 * to devices. However there may be an I/O APIC pin available for
2302 * this interrupt regardless. The pin may be left unconnected, but
2303 * typically it will be reused as an ExtINT cascade interrupt for
2304 * the master 8259A. In the MPS case such a pin will normally be
2305 * reported as an ExtINT interrupt in the MP table. With ACPI
2306 * there is no provision for ExtINT interrupts, and in the absence
2307 * of an override it would be treated as an ordinary ISA I/O APIC
2308 * interrupt, that is edge-triggered and unmasked by default. We
2309 * used to do this, but it caused problems on some systems because
2310 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2311 * the same ExtINT cascade interrupt to drive the local APIC of the
2312 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2313 * the I/O APIC in all cases now. No actual device should request
2314 * it anyway. --macro
2315 */
2316#define PIC_IRQS (1 << PIC_CASCADE_IR)
2317
2318void __init setup_IO_APIC(void)
2319{
2320 int i;
2321
2322 /* Reserve all the system vectors. */
2323 for (i = first_system_vector; i < NR_VECTORS; i++)
2324 set_bit(i, used_vectors);
2325
2326 enable_IO_APIC();
2327
2328 io_apic_irqs = ~PIC_IRQS;
2329
2330 printk("ENABLING IO-APIC IRQs\n");
2331
2332 /*
2333 * Set up IO-APIC IRQ routing.
2334 */
2335 if (!acpi_ioapic)
2336 setup_ioapic_ids_from_mpc();
2337 sync_Arb_IDs();
2338 setup_IO_APIC_irqs();
2339 init_IO_APIC_traps();
2340 check_timer();
2341}
2342
2343/*
2344 * Called after all the initialization is done. If we didnt find any
2345 * APIC bugs then we can allow the modify fast path
2346 */
2347
2348static int __init io_apic_bug_finalize(void)
2349{
2350 if (sis_apic_bug == -1)
2351 sis_apic_bug = 0;
2352 return 0;
2353}
2354
2355late_initcall(io_apic_bug_finalize);
2356
2357struct sysfs_ioapic_data {
2358 struct sys_device dev;
2359 struct IO_APIC_route_entry entry[0];
2360};
2361static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
2362
2363static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2364{
2365 struct IO_APIC_route_entry *entry;
2366 struct sysfs_ioapic_data *data;
2367 int i;
2368
2369 data = container_of(dev, struct sysfs_ioapic_data, dev);
2370 entry = data->entry;
2371 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2372 entry[i] = ioapic_read_entry(dev->id, i);
2373
2374 return 0;
2375}
2376
2377static int ioapic_resume(struct sys_device *dev)
2378{
2379 struct IO_APIC_route_entry *entry;
2380 struct sysfs_ioapic_data *data;
2381 unsigned long flags;
2382 union IO_APIC_reg_00 reg_00;
2383 int i;
2384
2385 data = container_of(dev, struct sysfs_ioapic_data, dev);
2386 entry = data->entry;
2387
2388 spin_lock_irqsave(&ioapic_lock, flags);
2389 reg_00.raw = io_apic_read(dev->id, 0);
2390 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2391 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2392 io_apic_write(dev->id, 0, reg_00.raw);
2393 }
2394 spin_unlock_irqrestore(&ioapic_lock, flags);
2395 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2396 ioapic_write_entry(dev->id, i, entry[i]);
2397
2398 return 0;
2399}
2400
2401static struct sysdev_class ioapic_sysdev_class = {
2402 .name = "ioapic",
2403 .suspend = ioapic_suspend,
2404 .resume = ioapic_resume,
2405};
2406
2407static int __init ioapic_init_sysfs(void)
2408{
2409 struct sys_device *dev;
2410 int i, size, error = 0;
2411
2412 error = sysdev_class_register(&ioapic_sysdev_class);
2413 if (error)
2414 return error;
2415
2416 for (i = 0; i < nr_ioapics; i++) {
2417 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2418 * sizeof(struct IO_APIC_route_entry);
2419 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2420 if (!mp_ioapic_data[i]) {
2421 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2422 continue;
2423 }
2424 dev = &mp_ioapic_data[i]->dev;
2425 dev->id = i;
2426 dev->cls = &ioapic_sysdev_class;
2427 error = sysdev_register(dev);
2428 if (error) {
2429 kfree(mp_ioapic_data[i]);
2430 mp_ioapic_data[i] = NULL;
2431 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2432 continue;
2433 }
2434 }
2435
2436 return 0;
2437}
2438
2439device_initcall(ioapic_init_sysfs);
2440
2441/*
2442 * Dynamic irq allocate and deallocation
2443 */
2444int create_irq(void)
2445{
2446 /* Allocate an unused irq */
2447 int irq, new, vector = 0;
2448 unsigned long flags;
2449
2450 irq = -ENOSPC;
2451 spin_lock_irqsave(&vector_lock, flags);
2452 for (new = (NR_IRQS - 1); new >= 0; new--) {
2453 if (platform_legacy_irq(new))
2454 continue;
2455 if (irq_vector[new] != 0)
2456 continue;
2457 vector = __assign_irq_vector(new);
2458 if (likely(vector > 0))
2459 irq = new;
2460 break;
2461 }
2462 spin_unlock_irqrestore(&vector_lock, flags);
2463
2464 if (irq >= 0) {
2465 set_intr_gate(vector, interrupt[irq]);
2466 dynamic_irq_init(irq);
2467 }
2468 return irq;
2469}
2470
2471void destroy_irq(unsigned int irq)
2472{
2473 unsigned long flags;
2474
2475 dynamic_irq_cleanup(irq);
2476
2477 spin_lock_irqsave(&vector_lock, flags);
2478 clear_bit(irq_vector[irq], used_vectors);
2479 irq_vector[irq] = 0;
2480 spin_unlock_irqrestore(&vector_lock, flags);
2481}
2482
2483/*
2484 * MSI message composition
2485 */
2486#ifdef CONFIG_PCI_MSI
2487static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2488{
2489 int vector;
2490 unsigned dest;
2491
2492 vector = assign_irq_vector(irq);
2493 if (vector >= 0) {
2494 dest = cpu_mask_to_apicid(TARGET_CPUS);
2495
2496 msg->address_hi = MSI_ADDR_BASE_HI;
2497 msg->address_lo =
2498 MSI_ADDR_BASE_LO |
2499 ((INT_DEST_MODE == 0) ?
2500MSI_ADDR_DEST_MODE_PHYSICAL:
2501 MSI_ADDR_DEST_MODE_LOGICAL) |
2502 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2503 MSI_ADDR_REDIRECTION_CPU:
2504 MSI_ADDR_REDIRECTION_LOWPRI) |
2505 MSI_ADDR_DEST_ID(dest);
2506
2507 msg->data =
2508 MSI_DATA_TRIGGER_EDGE |
2509 MSI_DATA_LEVEL_ASSERT |
2510 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2511MSI_DATA_DELIVERY_FIXED:
2512 MSI_DATA_DELIVERY_LOWPRI) |
2513 MSI_DATA_VECTOR(vector);
2514 }
2515 return vector;
2516}
2517
2518#ifdef CONFIG_SMP
2519static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2520{
2521 struct msi_msg msg;
2522 unsigned int dest;
2523 cpumask_t tmp;
2524 int vector;
2525
2526 cpus_and(tmp, mask, cpu_online_map);
2527 if (cpus_empty(tmp))
2528 tmp = TARGET_CPUS;
2529
2530 vector = assign_irq_vector(irq);
2531 if (vector < 0)
2532 return;
2533
2534 dest = cpu_mask_to_apicid(mask);
2535
2536 read_msi_msg(irq, &msg);
2537
2538 msg.data &= ~MSI_DATA_VECTOR_MASK;
2539 msg.data |= MSI_DATA_VECTOR(vector);
2540 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2541 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2542
2543 write_msi_msg(irq, &msg);
2544 irq_desc[irq].affinity = mask;
2545}
2546#endif /* CONFIG_SMP */
2547
2548/*
2549 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2550 * which implement the MSI or MSI-X Capability Structure.
2551 */
2552static struct irq_chip msi_chip = {
2553 .name = "PCI-MSI",
2554 .unmask = unmask_msi_irq,
2555 .mask = mask_msi_irq,
2556 .ack = ack_ioapic_irq,
2557#ifdef CONFIG_SMP
2558 .set_affinity = set_msi_irq_affinity,
2559#endif
2560 .retrigger = ioapic_retrigger_irq,
2561};
2562
2563int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2564{
2565 struct msi_msg msg;
2566 int irq, ret;
2567 irq = create_irq();
2568 if (irq < 0)
2569 return irq;
2570
2571 ret = msi_compose_msg(dev, irq, &msg);
2572 if (ret < 0) {
2573 destroy_irq(irq);
2574 return ret;
2575 }
2576
2577 set_irq_msi(irq, desc);
2578 write_msi_msg(irq, &msg);
2579
2580 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2581 "edge");
2582
2583 return 0;
2584}
2585
2586void arch_teardown_msi_irq(unsigned int irq)
2587{
2588 destroy_irq(irq);
2589}
2590
2591#endif /* CONFIG_PCI_MSI */
2592
2593/*
2594 * Hypertransport interrupt support
2595 */
2596#ifdef CONFIG_HT_IRQ
2597
2598#ifdef CONFIG_SMP
2599
2600static void target_ht_irq(unsigned int irq, unsigned int dest)
2601{
2602 struct ht_irq_msg msg;
2603 fetch_ht_irq_msg(irq, &msg);
2604
2605 msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2606 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2607
2608 msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2609 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2610
2611 write_ht_irq_msg(irq, &msg);
2612}
2613
2614static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2615{
2616 unsigned int dest;
2617 cpumask_t tmp;
2618
2619 cpus_and(tmp, mask, cpu_online_map);
2620 if (cpus_empty(tmp))
2621 tmp = TARGET_CPUS;
2622
2623 cpus_and(mask, tmp, CPU_MASK_ALL);
2624
2625 dest = cpu_mask_to_apicid(mask);
2626
2627 target_ht_irq(irq, dest);
2628 irq_desc[irq].affinity = mask;
2629}
2630#endif
2631
2632static struct irq_chip ht_irq_chip = {
2633 .name = "PCI-HT",
2634 .mask = mask_ht_irq,
2635 .unmask = unmask_ht_irq,
2636 .ack = ack_ioapic_irq,
2637#ifdef CONFIG_SMP
2638 .set_affinity = set_ht_irq_affinity,
2639#endif
2640 .retrigger = ioapic_retrigger_irq,
2641};
2642
2643int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2644{
2645 int vector;
2646
2647 vector = assign_irq_vector(irq);
2648 if (vector >= 0) {
2649 struct ht_irq_msg msg;
2650 unsigned dest;
2651 cpumask_t tmp;
2652
2653 cpus_clear(tmp);
2654 cpu_set(vector >> 8, tmp);
2655 dest = cpu_mask_to_apicid(tmp);
2656
2657 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2658
2659 msg.address_lo =
2660 HT_IRQ_LOW_BASE |
2661 HT_IRQ_LOW_DEST_ID(dest) |
2662 HT_IRQ_LOW_VECTOR(vector) |
2663 ((INT_DEST_MODE == 0) ?
2664 HT_IRQ_LOW_DM_PHYSICAL :
2665 HT_IRQ_LOW_DM_LOGICAL) |
2666 HT_IRQ_LOW_RQEOI_EDGE |
2667 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2668 HT_IRQ_LOW_MT_FIXED :
2669 HT_IRQ_LOW_MT_ARBITRATED) |
2670 HT_IRQ_LOW_IRQ_MASKED;
2671
2672 write_ht_irq_msg(irq, &msg);
2673
2674 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2675 handle_edge_irq, "edge");
2676 }
2677 return vector;
2678}
2679#endif /* CONFIG_HT_IRQ */
2680
2681/* --------------------------------------------------------------------------
2682 ACPI-based IOAPIC Configuration
2683 -------------------------------------------------------------------------- */
2684
2685#ifdef CONFIG_ACPI
2686
2687int __init io_apic_get_unique_id(int ioapic, int apic_id)
2688{
2689 union IO_APIC_reg_00 reg_00;
2690 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2691 physid_mask_t tmp;
2692 unsigned long flags;
2693 int i = 0;
2694
2695 /*
2696 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2697 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2698 * supports up to 16 on one shared APIC bus.
2699 *
2700 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2701 * advantage of new APIC bus architecture.
2702 */
2703
2704 if (physids_empty(apic_id_map))
2705 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
2706
2707 spin_lock_irqsave(&ioapic_lock, flags);
2708 reg_00.raw = io_apic_read(ioapic, 0);
2709 spin_unlock_irqrestore(&ioapic_lock, flags);
2710
2711 if (apic_id >= get_physical_broadcast()) {
2712 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2713 "%d\n", ioapic, apic_id, reg_00.bits.ID);
2714 apic_id = reg_00.bits.ID;
2715 }
2716
2717 /*
2718 * Every APIC in a system must have a unique ID or we get lots of nice
2719 * 'stuck on smp_invalidate_needed IPI wait' messages.
2720 */
2721 if (check_apicid_used(apic_id_map, apic_id)) {
2722
2723 for (i = 0; i < get_physical_broadcast(); i++) {
2724 if (!check_apicid_used(apic_id_map, i))
2725 break;
2726 }
2727
2728 if (i == get_physical_broadcast())
2729 panic("Max apic_id exceeded!\n");
2730
2731 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2732 "trying %d\n", ioapic, apic_id, i);
2733
2734 apic_id = i;
2735 }
2736
2737 tmp = apicid_to_cpu_present(apic_id);
2738 physids_or(apic_id_map, apic_id_map, tmp);
2739
2740 if (reg_00.bits.ID != apic_id) {
2741 reg_00.bits.ID = apic_id;
2742
2743 spin_lock_irqsave(&ioapic_lock, flags);
2744 io_apic_write(ioapic, 0, reg_00.raw);
2745 reg_00.raw = io_apic_read(ioapic, 0);
2746 spin_unlock_irqrestore(&ioapic_lock, flags);
2747
2748 /* Sanity check */
2749 if (reg_00.bits.ID != apic_id) {
2750 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
2751 return -1;
2752 }
2753 }
2754
2755 apic_printk(APIC_VERBOSE, KERN_INFO
2756 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2757
2758 return apic_id;
2759}
2760
2761
2762int __init io_apic_get_version(int ioapic)
2763{
2764 union IO_APIC_reg_01 reg_01;
2765 unsigned long flags;
2766
2767 spin_lock_irqsave(&ioapic_lock, flags);
2768 reg_01.raw = io_apic_read(ioapic, 1);
2769 spin_unlock_irqrestore(&ioapic_lock, flags);
2770
2771 return reg_01.bits.version;
2772}
2773
2774
2775int __init io_apic_get_redir_entries(int ioapic)
2776{
2777 union IO_APIC_reg_01 reg_01;
2778 unsigned long flags;
2779
2780 spin_lock_irqsave(&ioapic_lock, flags);
2781 reg_01.raw = io_apic_read(ioapic, 1);
2782 spin_unlock_irqrestore(&ioapic_lock, flags);
2783
2784 return reg_01.bits.entries;
2785}
2786
2787
2788int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
2789{
2790 struct IO_APIC_route_entry entry;
2791
2792 if (!IO_APIC_IRQ(irq)) {
2793 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2794 ioapic);
2795 return -EINVAL;
2796 }
2797
2798 /*
2799 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
2800 * Note that we mask (disable) IRQs now -- these get enabled when the
2801 * corresponding device driver registers for this IRQ.
2802 */
2803
2804 memset(&entry, 0, sizeof(entry));
2805
2806 entry.delivery_mode = INT_DELIVERY_MODE;
2807 entry.dest_mode = INT_DEST_MODE;
2808 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2809 entry.trigger = edge_level;
2810 entry.polarity = active_high_low;
2811 entry.mask = 1;
2812
2813 /*
2814 * IRQs < 16 are already in the irq_2_pin[] map
2815 */
2816 if (irq >= 16)
2817 add_pin_to_irq(irq, ioapic, pin);
2818
2819 entry.vector = assign_irq_vector(irq);
2820
2821 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2822 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2823 mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
2824 edge_level, active_high_low);
2825
2826 ioapic_register_intr(irq, entry.vector, edge_level);
2827
2828 if (!ioapic && (irq < 16))
2829 disable_8259A_irq(irq);
2830
2831 ioapic_write_entry(ioapic, pin, entry);
2832
2833 return 0;
2834}
2835
2836int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2837{
2838 int i;
2839
2840 if (skip_ioapic_setup)
2841 return -1;
2842
2843 for (i = 0; i < mp_irq_entries; i++)
2844 if (mp_irqs[i].mp_irqtype == mp_INT &&
2845 mp_irqs[i].mp_srcbusirq == bus_irq)
2846 break;
2847 if (i >= mp_irq_entries)
2848 return -1;
2849
2850 *trigger = irq_trigger(i);
2851 *polarity = irq_polarity(i);
2852 return 0;
2853}
2854
2855#endif /* CONFIG_ACPI */
2856
2857static int __init parse_disable_timer_pin_1(char *arg)
2858{
2859 disable_timer_pin_1 = 1;
2860 return 0;
2861}
2862early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2863
2864static int __init parse_enable_timer_pin_1(char *arg)
2865{
2866 disable_timer_pin_1 = -1;
2867 return 0;
2868}
2869early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2870
2871static int __init parse_noapic(char *arg)
2872{
2873 /* disable IO-APIC */
2874 disable_ioapic_setup();
2875 return 0;
2876}
2877early_param("noapic", parse_noapic);
2878
2879void __init ioapic_init_mappings(void)
2880{
2881 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2882 int i;
2883
2884 for (i = 0; i < nr_ioapics; i++) {
2885 if (smp_found_config) {
2886 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2887 if (!ioapic_phys) {
2888 printk(KERN_ERR
2889 "WARNING: bogus zero IO-APIC "
2890 "address found in MPTABLE, "
2891 "disabling IO/APIC support!\n");
2892 smp_found_config = 0;
2893 skip_ioapic_setup = 1;
2894 goto fake_ioapic_page;
2895 }
2896 } else {
2897fake_ioapic_page:
2898 ioapic_phys = (unsigned long)
2899 alloc_bootmem_pages(PAGE_SIZE);
2900 ioapic_phys = __pa(ioapic_phys);
2901 }
2902 set_fixmap_nocache(idx, ioapic_phys);
2903 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
2904 __fix_to_virt(idx), ioapic_phys);
2905 idx++;
2906 }
2907}
2908
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..ccf6c503fc3b
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,189 @@
1/*
2 * Common interrupt code for 32 and 64 bit
3 */
4#include <linux/cpu.h>
5#include <linux/interrupt.h>
6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h>
8
9#include <asm/apic.h>
10#include <asm/io_apic.h>
11#include <asm/smp.h>
12
13atomic_t irq_err_count;
14
15/*
16 * 'what should we do if we get a hw irq event on an illegal vector'.
17 * each architecture has to answer this themselves.
18 */
19void ack_bad_irq(unsigned int irq)
20{
21 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
22
23#ifdef CONFIG_X86_LOCAL_APIC
24 /*
25 * Currently unexpected vectors happen only on SMP and APIC.
26 * We _must_ ack these because every local APIC has only N
27 * irq slots per priority level, and a 'hanging, unacked' IRQ
28 * holds up an irq slot - in excessive cases (when multiple
29 * unexpected vectors occur) that might lock up the APIC
30 * completely.
31 * But only ack when the APIC is enabled -AK
32 */
33 if (cpu_has_apic)
34 ack_APIC_irq();
35#endif
36}
37
38#ifdef CONFIG_X86_32
39# define irq_stats(x) (&per_cpu(irq_stat,x))
40#else
41# define irq_stats(x) cpu_pda(x)
42#endif
43/*
44 * /proc/interrupts printing:
45 */
46static int show_other_interrupts(struct seq_file *p)
47{
48 int j;
49
50 seq_printf(p, "NMI: ");
51 for_each_online_cpu(j)
52 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
53 seq_printf(p, " Non-maskable interrupts\n");
54#ifdef CONFIG_X86_LOCAL_APIC
55 seq_printf(p, "LOC: ");
56 for_each_online_cpu(j)
57 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
58 seq_printf(p, " Local timer interrupts\n");
59#endif
60#ifdef CONFIG_SMP
61 seq_printf(p, "RES: ");
62 for_each_online_cpu(j)
63 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
64 seq_printf(p, " Rescheduling interrupts\n");
65 seq_printf(p, "CAL: ");
66 for_each_online_cpu(j)
67 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
68 seq_printf(p, " Function call interrupts\n");
69 seq_printf(p, "TLB: ");
70 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
72 seq_printf(p, " TLB shootdowns\n");
73#endif
74#ifdef CONFIG_X86_MCE
75 seq_printf(p, "TRM: ");
76 for_each_online_cpu(j)
77 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
78 seq_printf(p, " Thermal event interrupts\n");
79# ifdef CONFIG_X86_64
80 seq_printf(p, "THR: ");
81 for_each_online_cpu(j)
82 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
83 seq_printf(p, " Threshold APIC interrupts\n");
84# endif
85#endif
86#ifdef CONFIG_X86_LOCAL_APIC
87 seq_printf(p, "SPU: ");
88 for_each_online_cpu(j)
89 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
90 seq_printf(p, " Spurious interrupts\n");
91#endif
92 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
93#if defined(CONFIG_X86_IO_APIC)
94 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
95#endif
96 return 0;
97}
98
99int show_interrupts(struct seq_file *p, void *v)
100{
101 unsigned long flags, any_count = 0;
102 int i = *(loff_t *) v, j;
103 struct irqaction *action;
104 struct irq_desc *desc;
105
106 if (i > nr_irqs)
107 return 0;
108
109 if (i == nr_irqs)
110 return show_other_interrupts(p);
111
112 /* print header */
113 if (i == 0) {
114 seq_printf(p, " ");
115 for_each_online_cpu(j)
116 seq_printf(p, "CPU%-8d",j);
117 seq_putc(p, '\n');
118 }
119
120 desc = irq_to_desc(i);
121 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i);
124#else
125 for_each_online_cpu(j)
126 any_count |= kstat_irqs_cpu(i, j);
127#endif
128 action = desc->action;
129 if (!action && !any_count)
130 goto out;
131
132 seq_printf(p, "%3d: ", i);
133#ifndef CONFIG_SMP
134 seq_printf(p, "%10u ", kstat_irqs(i));
135#else
136 for_each_online_cpu(j)
137 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
138#endif
139 seq_printf(p, " %8s", desc->chip->name);
140 seq_printf(p, "-%-8s", desc->name);
141
142 if (action) {
143 seq_printf(p, " %s", action->name);
144 while ((action = action->next) != NULL)
145 seq_printf(p, ", %s", action->name);
146 }
147
148 seq_putc(p, '\n');
149out:
150 spin_unlock_irqrestore(&desc->lock, flags);
151 return 0;
152}
153
154/*
155 * /proc/stat helpers
156 */
157u64 arch_irq_stat_cpu(unsigned int cpu)
158{
159 u64 sum = irq_stats(cpu)->__nmi_count;
160
161#ifdef CONFIG_X86_LOCAL_APIC
162 sum += irq_stats(cpu)->apic_timer_irqs;
163#endif
164#ifdef CONFIG_SMP
165 sum += irq_stats(cpu)->irq_resched_count;
166 sum += irq_stats(cpu)->irq_call_count;
167 sum += irq_stats(cpu)->irq_tlb_count;
168#endif
169#ifdef CONFIG_X86_MCE
170 sum += irq_stats(cpu)->irq_thermal_count;
171# ifdef CONFIG_X86_64
172 sum += irq_stats(cpu)->irq_threshold_count;
173#endif
174#endif
175#ifdef CONFIG_X86_LOCAL_APIC
176 sum += irq_stats(cpu)->irq_spurious_count;
177#endif
178 return sum;
179}
180
181u64 arch_irq_stat(void)
182{
183 u64 sum = atomic_read(&irq_err_count);
184
185#ifdef CONFIG_X86_IO_APIC
186 sum += atomic_read(&irq_mis_count);
187#endif
188 return sum;
189}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
25DEFINE_PER_CPU(struct pt_regs *, irq_regs); 25DEFINE_PER_CPU(struct pt_regs *, irq_regs);
26EXPORT_PER_CPU_SYMBOL(irq_regs); 26EXPORT_PER_CPU_SYMBOL(irq_regs);
27 27
28/*
29 * 'what should we do if we get a hw irq event on an illegal vector'.
30 * each architecture has to answer this themselves.
31 */
32void ack_bad_irq(unsigned int irq)
33{
34 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
35
36#ifdef CONFIG_X86_LOCAL_APIC
37 /*
38 * Currently unexpected vectors happen only on SMP and APIC.
39 * We _must_ ack these because every local APIC has only N
40 * irq slots per priority level, and a 'hanging, unacked' IRQ
41 * holds up an irq slot - in excessive cases (when multiple
42 * unexpected vectors occur) that might lock up the APIC
43 * completely.
44 * But only ack when the APIC is enabled -AK
45 */
46 if (cpu_has_apic)
47 ack_APIC_irq();
48#endif
49}
50
51#ifdef CONFIG_DEBUG_STACKOVERFLOW 28#ifdef CONFIG_DEBUG_STACKOVERFLOW
52/* Debugging check for stack overflow: is there less than 1KB free? */ 29/* Debugging check for stack overflow: is there less than 1KB free? */
53static int check_stack_overflow(void) 30static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
223{ 200{
224 struct pt_regs *old_regs; 201 struct pt_regs *old_regs;
225 /* high bit used in ret_from_ code */ 202 /* high bit used in ret_from_ code */
226 int overflow, irq = ~regs->orig_ax; 203 int overflow;
227 struct irq_desc *desc = irq_desc + irq; 204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc;
206 unsigned irq;
228 207
229 if (unlikely((unsigned)irq >= NR_IRQS)) {
230 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
231 __func__, irq);
232 BUG();
233 }
234 208
235 old_regs = set_irq_regs(regs); 209 old_regs = set_irq_regs(regs);
236 irq_enter(); 210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
237 212
238 overflow = check_stack_overflow(); 213 overflow = check_stack_overflow();
239 214
215 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) {
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221
240 if (!execute_on_irq_stack(overflow, desc, irq)) { 222 if (!execute_on_irq_stack(overflow, desc, irq)) {
241 if (unlikely(overflow)) 223 if (unlikely(overflow))
242 print_stack_overflow(); 224 print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
248 return 1; 230 return 1;
249} 231}
250 232
251/*
252 * Interrupt statistics:
253 */
254
255atomic_t irq_err_count;
256
257/*
258 * /proc/interrupts printing:
259 */
260
261int show_interrupts(struct seq_file *p, void *v)
262{
263 int i = *(loff_t *) v, j;
264 struct irqaction * action;
265 unsigned long flags;
266
267 if (i == 0) {
268 seq_printf(p, " ");
269 for_each_online_cpu(j)
270 seq_printf(p, "CPU%-8d",j);
271 seq_putc(p, '\n');
272 }
273
274 if (i < NR_IRQS) {
275 unsigned any_count = 0;
276
277 spin_lock_irqsave(&irq_desc[i].lock, flags);
278#ifndef CONFIG_SMP
279 any_count = kstat_irqs(i);
280#else
281 for_each_online_cpu(j)
282 any_count |= kstat_cpu(j).irqs[i];
283#endif
284 action = irq_desc[i].action;
285 if (!action && !any_count)
286 goto skip;
287 seq_printf(p, "%3d: ",i);
288#ifndef CONFIG_SMP
289 seq_printf(p, "%10u ", kstat_irqs(i));
290#else
291 for_each_online_cpu(j)
292 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
293#endif
294 seq_printf(p, " %8s", irq_desc[i].chip->name);
295 seq_printf(p, "-%-8s", irq_desc[i].name);
296
297 if (action) {
298 seq_printf(p, " %s", action->name);
299 while ((action = action->next) != NULL)
300 seq_printf(p, ", %s", action->name);
301 }
302
303 seq_putc(p, '\n');
304skip:
305 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
306 } else if (i == NR_IRQS) {
307 seq_printf(p, "NMI: ");
308 for_each_online_cpu(j)
309 seq_printf(p, "%10u ", nmi_count(j));
310 seq_printf(p, " Non-maskable interrupts\n");
311#ifdef CONFIG_X86_LOCAL_APIC
312 seq_printf(p, "LOC: ");
313 for_each_online_cpu(j)
314 seq_printf(p, "%10u ",
315 per_cpu(irq_stat,j).apic_timer_irqs);
316 seq_printf(p, " Local timer interrupts\n");
317#endif
318#ifdef CONFIG_SMP
319 seq_printf(p, "RES: ");
320 for_each_online_cpu(j)
321 seq_printf(p, "%10u ",
322 per_cpu(irq_stat,j).irq_resched_count);
323 seq_printf(p, " Rescheduling interrupts\n");
324 seq_printf(p, "CAL: ");
325 for_each_online_cpu(j)
326 seq_printf(p, "%10u ",
327 per_cpu(irq_stat,j).irq_call_count);
328 seq_printf(p, " Function call interrupts\n");
329 seq_printf(p, "TLB: ");
330 for_each_online_cpu(j)
331 seq_printf(p, "%10u ",
332 per_cpu(irq_stat,j).irq_tlb_count);
333 seq_printf(p, " TLB shootdowns\n");
334#endif
335#ifdef CONFIG_X86_MCE
336 seq_printf(p, "TRM: ");
337 for_each_online_cpu(j)
338 seq_printf(p, "%10u ",
339 per_cpu(irq_stat,j).irq_thermal_count);
340 seq_printf(p, " Thermal event interrupts\n");
341#endif
342#ifdef CONFIG_X86_LOCAL_APIC
343 seq_printf(p, "SPU: ");
344 for_each_online_cpu(j)
345 seq_printf(p, "%10u ",
346 per_cpu(irq_stat,j).irq_spurious_count);
347 seq_printf(p, " Spurious interrupts\n");
348#endif
349 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
350#if defined(CONFIG_X86_IO_APIC)
351 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
352#endif
353 }
354 return 0;
355}
356
357/*
358 * /proc/stat helpers
359 */
360u64 arch_irq_stat_cpu(unsigned int cpu)
361{
362 u64 sum = nmi_count(cpu);
363
364#ifdef CONFIG_X86_LOCAL_APIC
365 sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
366#endif
367#ifdef CONFIG_SMP
368 sum += per_cpu(irq_stat, cpu).irq_resched_count;
369 sum += per_cpu(irq_stat, cpu).irq_call_count;
370 sum += per_cpu(irq_stat, cpu).irq_tlb_count;
371#endif
372#ifdef CONFIG_X86_MCE
373 sum += per_cpu(irq_stat, cpu).irq_thermal_count;
374#endif
375#ifdef CONFIG_X86_LOCAL_APIC
376 sum += per_cpu(irq_stat, cpu).irq_spurious_count;
377#endif
378 return sum;
379}
380
381u64 arch_irq_stat(void)
382{
383 u64 sum = atomic_read(&irq_err_count);
384
385#ifdef CONFIG_X86_IO_APIC
386 sum += atomic_read(&irq_mis_count);
387#endif
388 return sum;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
392#include <mach_apic.h> 234#include <mach_apic.h>
393 235
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
395{ 237{
396 unsigned int irq; 238 unsigned int irq;
397 static int warned; 239 static int warned;
240 struct irq_desc *desc;
398 241
399 for (irq = 0; irq < NR_IRQS; irq++) { 242 for_each_irq_desc(irq, desc) {
400 cpumask_t mask; 243 cpumask_t mask;
244
401 if (irq == 2) 245 if (irq == 2)
402 continue; 246 continue;
403 247
404 cpus_and(mask, irq_desc[irq].affinity, map); 248 cpus_and(mask, desc->affinity, map);
405 if (any_online_cpu(mask) == NR_CPUS) { 249 if (any_online_cpu(mask) == NR_CPUS) {
406 printk("Breaking affinity for irq %i\n", irq); 250 printk("Breaking affinity for irq %i\n", irq);
407 mask = map; 251 mask = map;
408 } 252 }
409 if (irq_desc[irq].chip->set_affinity) 253 if (desc->chip->set_affinity)
410 irq_desc[irq].chip->set_affinity(irq, mask); 254 desc->chip->set_affinity(irq, mask);
411 else if (irq_desc[irq].action && !(warned++)) 255 else if (desc->action && !(warned++))
412 printk("Cannot set affinity for irq %i\n", irq); 256 printk("Cannot set affinity for irq %i\n", irq);
413 } 257 }
414 258
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,28 +18,6 @@
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21atomic_t irq_err_count;
22
23/*
24 * 'what should we do if we get a hw irq event on an illegal vector'.
25 * each architecture has to answer this themselves.
26 */
27void ack_bad_irq(unsigned int irq)
28{
29 printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
30 /*
31 * Currently unexpected vectors happen only on SMP and APIC.
32 * We _must_ ack these because every local APIC has only N
33 * irq slots per priority level, and a 'hanging, unacked' IRQ
34 * holds up an irq slot - in excessive cases (when multiple
35 * unexpected vectors occur) that might lock up the APIC
36 * completely.
37 * But don't ack when the APIC is disabled. -AK
38 */
39 if (!disable_apic)
40 ack_APIC_irq();
41}
42
43#ifdef CONFIG_DEBUG_STACKOVERFLOW 21#ifdef CONFIG_DEBUG_STACKOVERFLOW
44/* 22/*
45 * Probabilistic stack overflow check: 23 * Probabilistic stack overflow check:
@@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
65#endif 43#endif
66 44
67/* 45/*
68 * Generic, controller-independent functions:
69 */
70
71int show_interrupts(struct seq_file *p, void *v)
72{
73 int i = *(loff_t *) v, j;
74 struct irqaction * action;
75 unsigned long flags;
76
77 if (i == 0) {
78 seq_printf(p, " ");
79 for_each_online_cpu(j)
80 seq_printf(p, "CPU%-8d",j);
81 seq_putc(p, '\n');
82 }
83
84 if (i < NR_IRQS) {
85 unsigned any_count = 0;
86
87 spin_lock_irqsave(&irq_desc[i].lock, flags);
88#ifndef CONFIG_SMP
89 any_count = kstat_irqs(i);
90#else
91 for_each_online_cpu(j)
92 any_count |= kstat_cpu(j).irqs[i];
93#endif
94 action = irq_desc[i].action;
95 if (!action && !any_count)
96 goto skip;
97 seq_printf(p, "%3d: ",i);
98#ifndef CONFIG_SMP
99 seq_printf(p, "%10u ", kstat_irqs(i));
100#else
101 for_each_online_cpu(j)
102 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
103#endif
104 seq_printf(p, " %8s", irq_desc[i].chip->name);
105 seq_printf(p, "-%-8s", irq_desc[i].name);
106
107 if (action) {
108 seq_printf(p, " %s", action->name);
109 while ((action = action->next) != NULL)
110 seq_printf(p, ", %s", action->name);
111 }
112 seq_putc(p, '\n');
113skip:
114 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
115 } else if (i == NR_IRQS) {
116 seq_printf(p, "NMI: ");
117 for_each_online_cpu(j)
118 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
119 seq_printf(p, " Non-maskable interrupts\n");
120 seq_printf(p, "LOC: ");
121 for_each_online_cpu(j)
122 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
123 seq_printf(p, " Local timer interrupts\n");
124#ifdef CONFIG_SMP
125 seq_printf(p, "RES: ");
126 for_each_online_cpu(j)
127 seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
128 seq_printf(p, " Rescheduling interrupts\n");
129 seq_printf(p, "CAL: ");
130 for_each_online_cpu(j)
131 seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
132 seq_printf(p, " Function call interrupts\n");
133 seq_printf(p, "TLB: ");
134 for_each_online_cpu(j)
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
136 seq_printf(p, " TLB shootdowns\n");
137#endif
138#ifdef CONFIG_X86_MCE
139 seq_printf(p, "TRM: ");
140 for_each_online_cpu(j)
141 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
142 seq_printf(p, " Thermal event interrupts\n");
143 seq_printf(p, "THR: ");
144 for_each_online_cpu(j)
145 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
146 seq_printf(p, " Threshold APIC interrupts\n");
147#endif
148 seq_printf(p, "SPU: ");
149 for_each_online_cpu(j)
150 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
151 seq_printf(p, " Spurious interrupts\n");
152 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
153 }
154 return 0;
155}
156
157/*
158 * /proc/stat helpers
159 */
160u64 arch_irq_stat_cpu(unsigned int cpu)
161{
162 u64 sum = cpu_pda(cpu)->__nmi_count;
163
164 sum += cpu_pda(cpu)->apic_timer_irqs;
165#ifdef CONFIG_SMP
166 sum += cpu_pda(cpu)->irq_resched_count;
167 sum += cpu_pda(cpu)->irq_call_count;
168 sum += cpu_pda(cpu)->irq_tlb_count;
169#endif
170#ifdef CONFIG_X86_MCE
171 sum += cpu_pda(cpu)->irq_thermal_count;
172 sum += cpu_pda(cpu)->irq_threshold_count;
173#endif
174 sum += cpu_pda(cpu)->irq_spurious_count;
175 return sum;
176}
177
178u64 arch_irq_stat(void)
179{
180 return atomic_read(&irq_err_count);
181}
182
183/*
184 * do_IRQ handles all normal device IRQ's (the special 46 * do_IRQ handles all normal device IRQ's (the special
185 * SMP cross-CPU interrupts have their own specific 47 * SMP cross-CPU interrupts have their own specific
186 * handlers). 48 * handlers).
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
188asmlinkage unsigned int do_IRQ(struct pt_regs *regs) 50asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
189{ 51{
190 struct pt_regs *old_regs = set_irq_regs(regs); 52 struct pt_regs *old_regs = set_irq_regs(regs);
53 struct irq_desc *desc;
191 54
192 /* high bit used in ret_from_ code */ 55 /* high bit used in ret_from_ code */
193 unsigned vector = ~regs->orig_ax; 56 unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
201 stack_overflow_check(regs); 64 stack_overflow_check(regs);
202#endif 65#endif
203 66
204 if (likely(irq < NR_IRQS)) 67 desc = irq_to_desc(irq);
205 generic_handle_irq(irq); 68 if (likely(desc))
69 generic_handle_irq_desc(irq, desc);
206 else { 70 else {
207 if (!disable_apic) 71 if (!disable_apic)
208 ack_APIC_irq(); 72 ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
223{ 87{
224 unsigned int irq; 88 unsigned int irq;
225 static int warned; 89 static int warned;
90 struct irq_desc *desc;
226 91
227 for (irq = 0; irq < NR_IRQS; irq++) { 92 for_each_irq_desc(irq, desc) {
228 cpumask_t mask; 93 cpumask_t mask;
229 int break_affinity = 0; 94 int break_affinity = 0;
230 int set_affinity = 1; 95 int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
233 continue; 98 continue;
234 99
235 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
236 spin_lock(&irq_desc[irq].lock); 101 spin_lock(&desc->lock);
237 102
238 if (!irq_has_action(irq) || 103 if (!irq_has_action(irq) ||
239 cpus_equal(irq_desc[irq].affinity, map)) { 104 cpus_equal(desc->affinity, map)) {
240 spin_unlock(&irq_desc[irq].lock); 105 spin_unlock(&desc->lock);
241 continue; 106 continue;
242 } 107 }
243 108
244 cpus_and(mask, irq_desc[irq].affinity, map); 109 cpus_and(mask, desc->affinity, map);
245 if (cpus_empty(mask)) { 110 if (cpus_empty(mask)) {
246 break_affinity = 1; 111 break_affinity = 1;
247 mask = map; 112 mask = map;
248 } 113 }
249 114
250 if (irq_desc[irq].chip->mask) 115 if (desc->chip->mask)
251 irq_desc[irq].chip->mask(irq); 116 desc->chip->mask(irq);
252 117
253 if (irq_desc[irq].chip->set_affinity) 118 if (desc->chip->set_affinity)
254 irq_desc[irq].chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, mask);
255 else if (!(warned++)) 120 else if (!(warned++))
256 set_affinity = 0; 121 set_affinity = 0;
257 122
258 if (irq_desc[irq].chip->unmask) 123 if (desc->chip->unmask)
259 irq_desc[irq].chip->unmask(irq); 124 desc->chip->unmask(irq);
260 125
261 spin_unlock(&irq_desc[irq].lock); 126 spin_unlock(&desc->lock);
262 127
263 if (break_affinity && set_affinity) 128 if (break_affinity && set_affinity)
264 printk("Broke affinity for irq %i\n", irq); 129 printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < 16; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i);
74
75 desc->status = IRQ_DISABLED;
76 desc->action = NULL;
77 desc->depth = 1;
78
72 set_irq_chip_and_handler_name(i, &i8259A_chip, 79 set_irq_chip_and_handler_name(i, &i8259A_chip,
73 handle_level_irq, "XT"); 80 handle_level_irq, "XT");
74 } 81 }
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
83 .name = "cascade", 90 .name = "cascade",
84}; 91};
85 92
93DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
94 [0 ... IRQ0_VECTOR - 1] = -1,
95 [IRQ0_VECTOR] = 0,
96 [IRQ1_VECTOR] = 1,
97 [IRQ2_VECTOR] = 2,
98 [IRQ3_VECTOR] = 3,
99 [IRQ4_VECTOR] = 4,
100 [IRQ5_VECTOR] = 5,
101 [IRQ6_VECTOR] = 6,
102 [IRQ7_VECTOR] = 7,
103 [IRQ8_VECTOR] = 8,
104 [IRQ9_VECTOR] = 9,
105 [IRQ10_VECTOR] = 10,
106 [IRQ11_VECTOR] = 11,
107 [IRQ12_VECTOR] = 12,
108 [IRQ13_VECTOR] = 13,
109 [IRQ14_VECTOR] = 14,
110 [IRQ15_VECTOR] = 15,
111 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
112};
113
86/* Overridden in paravirt.c */ 114/* Overridden in paravirt.c */
87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 115void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
88 116
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
98 * us. (some of these will be overridden and become 126 * us. (some of these will be overridden and become
99 * 'special' SMP interrupts) 127 * 'special' SMP interrupts)
100 */ 128 */
101 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
102 int vector = FIRST_EXTERNAL_VECTOR + i;
103 if (i >= NR_IRQS)
104 break;
105 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
106 if (!test_bit(vector, used_vectors)) 131 if (i != SYSCALL_VECTOR)
107 set_intr_gate(vector, interrupt[i]); 132 set_intr_gate(i, interrupt[i]);
108 } 133 }
109 134
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116 135
136#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
117 /* 137 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 138 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup. 139 * IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
128 148
129 /* IPI for single call function */ 149 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 150 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
151
152 /* Low priority IPI to cleanup after moving an irq */
153 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
131#endif 154#endif
132 155
133#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
142 init_bsp_APIC(); 142 init_bsp_APIC();
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < NR_IRQS; i++) { 145 for (i = 0; i < 16; i++) {
146 irq_desc[i].status = IRQ_DISABLED; 146 /* first time call this irq_desc */
147 irq_desc[i].action = NULL; 147 struct irq_desc *desc = irq_to_desc(i);
148 irq_desc[i].depth = 1; 148
149 149 desc->status = IRQ_DISABLED;
150 if (i < 16) { 150 desc->action = NULL;
151 /* 151 desc->depth = 1;
152 * 16 old-style INTA-cycle interrupts: 152
153 */ 153 /*
154 set_irq_chip_and_handler_name(i, &i8259A_chip, 154 * 16 old-style INTA-cycle interrupts:
155 */
156 set_irq_chip_and_handler_name(i, &i8259A_chip,
155 handle_level_irq, "XT"); 157 handle_level_irq, "XT");
156 } else {
157 /*
158 * 'high' PCI IRQs filled in on demand
159 */
160 irq_desc[i].chip = &no_irq_chip;
161 }
162 } 158 }
163} 159}
164 160
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
35 if (!(word & (1 << 13))) { 35 if (!(word & (1 << 13))) {
36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " 36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
37 "disabling irq balancing and affinity\n"); 37 "disabling irq balancing and affinity\n");
38#ifdef CONFIG_IRQBALANCE
39 irqbalance_disable("");
40#endif
41 noirqdebug_setup(""); 38 noirqdebug_setup("");
42#ifdef CONFIG_PROC_FS 39#ifdef CONFIG_PROC_FS
43 no_irq_affinity = 1; 40 no_irq_affinity = 1;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b2c97874ec0f..0fa6790c1dd3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1073,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
1073#endif 1073#endif
1074 1074
1075 prefill_possible_map(); 1075 prefill_possible_map();
1076
1076#ifdef CONFIG_X86_64 1077#ifdef CONFIG_X86_64
1077 init_cpu_to_node(); 1078 init_cpu_to_node();
1078#endif 1079#endif
@@ -1080,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
1080 init_apic_mappings(); 1081 init_apic_mappings();
1081 ioapic_init_mappings(); 1082 ioapic_init_mappings();
1082 1083
1084 /* need to wait for io_apic is mapped */
1085 nr_irqs = probe_nr_irqs();
1086
1083 kvm_guest_init(); 1087 kvm_guest_init();
1084 1088
1085 e820_reserve_resources(); 1089 e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
140 */ 140 */
141void __init setup_per_cpu_areas(void) 141void __init setup_per_cpu_areas(void)
142{ 142{
143 ssize_t size = PERCPU_ENOUGH_ROOM; 143 ssize_t size, old_size;
144 char *ptr; 144 char *ptr;
145 int cpu; 145 int cpu;
146 unsigned long align = 1;
146 147
147 /* Setup cpu_pda map */ 148 /* Setup cpu_pda map */
148 setup_cpu_pda_map(); 149 setup_cpu_pda_map();
149 150
150 /* Copy section for each CPU (we discard the original) */ 151 /* Copy section for each CPU (we discard the original) */
151 size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align);
152 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
153 size); 156 size);
154 157
155 for_each_possible_cpu(cpu) { 158 for_each_possible_cpu(cpu) {
156#ifndef CONFIG_NEED_MULTIPLE_NODES 159#ifndef CONFIG_NEED_MULTIPLE_NODES
157 ptr = alloc_bootmem_pages(size); 160 ptr = __alloc_bootmem(size, align,
161 __pa(MAX_DMA_ADDRESS));
158#else 162#else
159 int node = early_cpu_to_node(cpu); 163 int node = early_cpu_to_node(cpu);
160 if (!node_online(node) || !NODE_DATA(node)) { 164 if (!node_online(node) || !NODE_DATA(node)) {
161 ptr = alloc_bootmem_pages(size); 165 ptr = __alloc_bootmem(size, align,
166 __pa(MAX_DMA_ADDRESS));
162 printk(KERN_INFO 167 printk(KERN_INFO
163 "cpu %d has no node %d or node-local memory\n", 168 "cpu %d has no node %d or node-local memory\n",
164 cpu, node); 169 cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
167 cpu, __pa(ptr)); 172 cpu, __pa(ptr));
168 } 173 }
169 else { 174 else {
170 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS));
171 if (ptr) 177 if (ptr)
172 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
173 cpu, node, __pa(ptr)); 179 cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
175#endif 181#endif
176 per_cpu_offset(cpu) = ptr - __per_cpu_start; 182 per_cpu_offset(cpu) = ptr - __per_cpu_start;
177 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
178
179 } 184 }
180 185
181 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ed9e070a6e9..7ece815ea637 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
543 int timeout; 543 int timeout;
544 u32 status; 544 u32 status;
545 545
546 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 546 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
547 547
548 for (i = 0; i < ARRAY_SIZE(regs); i++) { 548 for (i = 0; i < ARRAY_SIZE(regs); i++) {
549 printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); 549 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
550 550
551 /* 551 /*
552 * Wait for idle. 552 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
874 start_ip = setup_trampoline(); 874 start_ip = setup_trampoline();
875 875
876 /* So we see what's up */ 876 /* So we see what's up */
877 printk(KERN_INFO "Booting processor %d/%d ip %lx\n", 877 printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
878 cpu, apicid, start_ip); 878 cpu, apicid, start_ip);
879 879
880 /* 880 /*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..aeef529917e4
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,79 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ functions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/module.h>
12#include <linux/irq.h>
13
14#include <asm/apic.h>
15#include <asm/uv/uv_irq.h>
16
17static void uv_noop(unsigned int irq)
18{
19}
20
21static unsigned int uv_noop_ret(unsigned int irq)
22{
23 return 0;
24}
25
26static void uv_ack_apic(unsigned int irq)
27{
28 ack_APIC_irq();
29}
30
31struct irq_chip uv_irq_chip = {
32 .name = "UV-CORE",
33 .startup = uv_noop_ret,
34 .shutdown = uv_noop,
35 .enable = uv_noop,
36 .disable = uv_noop,
37 .ack = uv_noop,
38 .mask = uv_noop,
39 .unmask = uv_noop,
40 .eoi = uv_ack_apic,
41 .end = uv_noop,
42};
43
44/*
45 * Set up a mapping of an available irq and vector, and enable the specified
46 * MMR that defines the MSI that is to be sent to the specified CPU when an
47 * interrupt is raised.
48 */
49int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
50 unsigned long mmr_offset)
51{
52 int irq;
53 int ret;
54
55 irq = create_irq();
56 if (irq <= 0)
57 return -EBUSY;
58
59 ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
60 if (ret != irq)
61 destroy_irq(irq);
62
63 return ret;
64}
65EXPORT_SYMBOL_GPL(uv_setup_irq);
66
67/*
68 * Tear down a mapping of an irq and vector, and disable the specified MMR that
69 * defined the MSI that was to be sent to the specified CPU when an interrupt
70 * was raised.
71 *
72 * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
73 */
74void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
75{
76 arch_disable_uv_irq(mmr_blade, mmr_offset);
77 destroy_irq(irq);
78}
79EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 000000000000..67f9b9dbf800
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
1/*
2 * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
20 */
21
22#include <linux/sysdev.h>
23#include <asm/uv/bios.h>
24
25struct kobject *sgi_uv_kobj;
26
27static ssize_t partition_id_show(struct kobject *kobj,
28 struct kobj_attribute *attr, char *buf)
29{
30 return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
31}
32
33static ssize_t coherence_id_show(struct kobject *kobj,
34 struct kobj_attribute *attr, char *buf)
35{
36 return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
37}
38
39static struct kobj_attribute partition_id_attr =
40 __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
41
42static struct kobj_attribute coherence_id_attr =
43 __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
44
45
46static int __init sgi_uv_sysfs_init(void)
47{
48 unsigned long ret;
49
50 if (!sgi_uv_kobj)
51 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
52 if (!sgi_uv_kobj) {
53 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
54 return -EINVAL;
55 }
56
57 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
58 if (ret) {
59 printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
60 return ret;
61 }
62
63 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
64 if (ret) {
65 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
66 return ret;
67 }
68
69 return 0;
70}
71
72device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..0c9667f0752a 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
484static unsigned int startup_cobalt_irq(unsigned int irq) 484static unsigned int startup_cobalt_irq(unsigned int irq)
485{ 485{
486 unsigned long flags; 486 unsigned long flags;
487 struct irq_desc *desc = irq_to_desc(irq);
487 488
488 spin_lock_irqsave(&cobalt_lock, flags); 489 spin_lock_irqsave(&cobalt_lock, flags);
489 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) 490 if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
490 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); 491 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
491 enable_cobalt_irq(irq); 492 enable_cobalt_irq(irq);
492 spin_unlock_irqrestore(&cobalt_lock, flags); 493 spin_unlock_irqrestore(&cobalt_lock, flags);
493 return 0; 494 return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
506static void end_cobalt_irq(unsigned int irq) 507static void end_cobalt_irq(unsigned int irq)
507{ 508{
508 unsigned long flags; 509 unsigned long flags;
510 struct irq_desc *desc = irq_to_desc(irq);
509 511
510 spin_lock_irqsave(&cobalt_lock, flags); 512 spin_lock_irqsave(&cobalt_lock, flags);
511 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) 513 if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
512 enable_cobalt_irq(irq); 514 enable_cobalt_irq(irq);
513 spin_unlock_irqrestore(&cobalt_lock, flags); 515 spin_unlock_irqrestore(&cobalt_lock, flags);
514} 516}
@@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
626 628
627 spin_unlock_irqrestore(&i8259A_lock, flags); 629 spin_unlock_irqrestore(&i8259A_lock, flags);
628 630
629 desc = irq_desc + realirq; 631 desc = irq_to_desc(realirq);
630 632
631 /* 633 /*
632 * handle this 'virtual interrupt' as a Cobalt one now. 634 * handle this 'virtual interrupt' as a Cobalt one now.
633 */ 635 */
634 kstat_cpu(smp_processor_id()).irqs[realirq]++; 636 kstat_incr_irqs_this_cpu(realirq, desc);
635 637
636 if (likely(desc->action != NULL)) 638 if (likely(desc->action != NULL))
637 handle_IRQ_event(realirq, desc->action); 639 handle_IRQ_event(realirq, desc->action);
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
662 int i; 664 int i;
663 665
664 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { 666 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
665 irq_desc[i].status = IRQ_DISABLED; 667 struct irq_desc *desc = irq_to_desc(i);
666 irq_desc[i].action = 0; 668
667 irq_desc[i].depth = 1; 669 desc->status = IRQ_DISABLED;
670 desc->action = 0;
671 desc->depth = 1;
668 672
669 if (i == 0) { 673 if (i == 0) {
670 irq_desc[i].chip = &cobalt_irq_type; 674 desc->chip = &cobalt_irq_type;
671 } 675 }
672 else if (i == CO_IRQ_IDE0) { 676 else if (i == CO_IRQ_IDE0) {
673 irq_desc[i].chip = &cobalt_irq_type; 677 desc->chip = &cobalt_irq_type;
674 } 678 }
675 else if (i == CO_IRQ_IDE1) { 679 else if (i == CO_IRQ_IDE1) {
676 irq_desc[i].chip = &cobalt_irq_type; 680 desc->chip = &cobalt_irq_type;
677 } 681 }
678 else if (i == CO_IRQ_8259) { 682 else if (i == CO_IRQ_8259) {
679 irq_desc[i].chip = &piix4_master_irq_type; 683 desc->chip = &piix4_master_irq_type;
680 } 684 }
681 else if (i < CO_IRQ_APIC0) { 685 else if (i < CO_IRQ_APIC0) {
682 irq_desc[i].chip = &piix4_virtual_irq_type; 686 desc->chip = &piix4_virtual_irq_type;
683 } 687 }
684 else if (IS_CO_APIC(i)) { 688 else if (IS_CO_APIC(i)) {
685 irq_desc[i].chip = &cobalt_irq_type; 689 desc->chip = &cobalt_irq_type;
686 } 690 }
687 } 691 }
688 692
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe289..254ee07f8635 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
235 235
236void __init vmi_time_init(void) 236void __init vmi_time_init(void)
237{ 237{
238 unsigned int cpu;
238 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ 239 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
239 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ 240 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
240 241
241 vmi_time_init_clockevent(); 242 vmi_time_init_clockevent();
242 setup_irq(0, &vmi_clock_action); 243 setup_irq(0, &vmi_clock_action);
244 for_each_possible_cpu(cpu)
245 per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
243} 246}
244 247
245#ifdef CONFIG_X86_LOCAL_APIC 248#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 65f0b8a47bed..48ee4f9435f4 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void)
582 for (i = 0; i < LGUEST_IRQS; i++) { 582 for (i = 0; i < LGUEST_IRQS; i++) {
583 int vector = FIRST_EXTERNAL_VECTOR + i; 583 int vector = FIRST_EXTERNAL_VECTOR + i;
584 if (vector != SYSCALL_VECTOR) { 584 if (vector != SYSCALL_VECTOR) {
585 set_intr_gate(vector, interrupt[i]); 585 set_intr_gate(vector, interrupt[vector]);
586 set_irq_chip_and_handler_name(i, &lguest_irq_controller, 586 set_irq_chip_and_handler_name(i, &lguest_irq_controller,
587 handle_level_irq, 587 handle_level_irq,
588 "level"); 588 "level");
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index df37fc9d6a26..3c3b471ea496 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
41 { } 41 { }
42}; 42};
43 43
44static cpumask_t vector_allocation_domain(int cpu)
45{
46 return cpumask_of_cpu(cpu);
47}
44 48
45static int probe_bigsmp(void) 49static int probe_bigsmp(void)
46{ 50{
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 6513d41ea21e..28459cab3ddb 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
75} 75}
76#endif 76#endif
77 77
78static cpumask_t vector_allocation_domain(int cpu)
79{
80 /* Careful. Some cpus do not strictly honor the set of cpus
81 * specified in the interrupt destination when using lowest
82 * priority interrupt delivery mode.
83 *
84 * In particular there was a hyperthreading cpu observed to
85 * deliver interrupts to the wrong hyperthread when only one
86 * hyperthread was specified in the interrupt desitination.
87 */
88 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
89 return domain;
90}
91
78struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); 92struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8cf58394975e..71a309b122e6 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
38 return 0; 38 return 0;
39} 39}
40 40
41static cpumask_t vector_allocation_domain(int cpu)
42{
43 /* Careful. Some cpus do not strictly honor the set of cpus
44 * specified in the interrupt destination when using lowest
45 * priority interrupt delivery mode.
46 *
47 * In particular there was a hyperthreading cpu observed to
48 * deliver interrupts to the wrong hyperthread when only one
49 * hyperthread was specified in the interrupt desitination.
50 */
51 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
52 return domain;
53}
54
41struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); 55struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 6ad6b67a723d..6272b5e69da6 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -23,4 +23,18 @@ static int probe_summit(void)
23 return 0; 23 return 0;
24} 24}
25 25
26static cpumask_t vector_allocation_domain(int cpu)
27{
28 /* Careful. Some cpus do not strictly honor the set of cpus
29 * specified in the interrupt destination when using lowest
30 * priority interrupt delivery mode.
31 *
32 * In particular there was a hyperthreading cpu observed to
33 * deliver interrupts to the wrong hyperthread when only one
34 * hyperthread was specified in the interrupt desitination.
35 */
36 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
37 return domain;
38}
39
26struct genapic apic_summit = APIC_INIT("summit", probe_summit); 40struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 199a5f4a873c..0f6e8a6523ae 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq)
1483 * the interrupt off to another CPU */ 1483 * the interrupt off to another CPU */
1484static void before_handle_vic_irq(unsigned int irq) 1484static void before_handle_vic_irq(unsigned int irq)
1485{ 1485{
1486 irq_desc_t *desc = irq_desc + irq; 1486 irq_desc_t *desc = irq_to_desc(irq);
1487 __u8 cpu = smp_processor_id(); 1487 __u8 cpu = smp_processor_id();
1488 1488
1489 _raw_spin_lock(&vic_irq_lock); 1489 _raw_spin_lock(&vic_irq_lock);
@@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq)
1518/* Finish the VIC interrupt: basically mask */ 1518/* Finish the VIC interrupt: basically mask */
1519static void after_handle_vic_irq(unsigned int irq) 1519static void after_handle_vic_irq(unsigned int irq)
1520{ 1520{
1521 irq_desc_t *desc = irq_desc + irq; 1521 irq_desc_t *desc = irq_to_desc(irq);
1522 1522
1523 _raw_spin_lock(&vic_irq_lock); 1523 _raw_spin_lock(&vic_irq_lock);
1524 { 1524 {
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 28b85ab8422e..bb042608c602 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void)
21 21
22static void __init __xen_init_IRQ(void) 22static void __init __xen_init_IRQ(void)
23{ 23{
24#ifdef CONFIG_X86_64
25 int i; 24 int i;
26 25
27 /* Create identity vector->irq map */ 26 /* Create identity vector->irq map */
@@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void)
31 for_each_possible_cpu(cpu) 30 for_each_possible_cpu(cpu)
32 per_cpu(vector_irq, cpu)[i] = i; 31 per_cpu(vector_irq, cpu)[i] = i;
33 } 32 }
34#endif /* CONFIG_X86_64 */
35 33
36 xen_init_IRQ(); 34 xen_init_IRQ();
37} 35}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index dd71e3a021cd..5601506f2dd9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl
241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); 241 ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ 242 } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
243 243
244 kstat_this_cpu.irqs[irq]++; 244 kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
245 245
246out: 246out:
247 raw_local_irq_restore(flags); 247 raw_local_irq_restore(flags);