aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/acpi/sleep.c3
-rw-r--r--arch/x86/kernel/apic.c (renamed from arch/x86/kernel/apic_32.c)627
-rw-r--r--arch/x86/kernel/apic_64.c1848
-rw-r--r--arch/x86/kernel/bios_uv.c137
-rw-r--r--arch/x86/kernel/cpu/.gitignore1
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k6.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_32.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/crash_dump_32.c3
-rw-r--r--arch/x86/kernel/crash_dump_64.c3
-rw-r--r--arch/x86/kernel/efi.c4
-rw-r--r--arch/x86/kernel/entry_32.S16
-rw-r--r--arch/x86/kernel/entry_64.S26
-rw-r--r--arch/x86/kernel/ftrace.c124
-rw-r--r--arch/x86/kernel/genapic_flat_64.c4
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c49
-rw-r--r--arch/x86/kernel/hpet.c453
-rw-r--r--arch/x86/kernel/io_apic.c (renamed from arch/x86/kernel/io_apic_64.c)1538
-rw-r--r--arch/x86/kernel/io_apic_32.c2908
-rw-r--r--arch/x86/kernel/irq.c189
-rw-r--r--arch/x86/kernel/irq_32.c194
-rw-r--r--arch/x86/kernel/irq_64.c169
-rw-r--r--arch/x86/kernel/irqinit_32.c47
-rw-r--r--arch/x86/kernel/irqinit_64.c28
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--arch/x86/kernel/rtc.c20
-rw-r--r--arch/x86/kernel/setup.c12
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/uv_irq.c79
-rw-r--r--arch/x86/kernel/uv_sysfs.c72
-rw-r--r--arch/x86/kernel/visws_quirks.c32
-rw-r--r--arch/x86/kernel/vmiclock_32.c3
43 files changed, 2916 insertions, 5748 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0d41f0343dc0..d7e5a58ee22f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
23CFLAGS_tsc.o := $(nostackp) 23CFLAGS_tsc.o := $(nostackp)
24 24
25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
26obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o 26obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time_$(BITS).o ioport.o ldt.o 27obj-y += time_$(BITS).o ioport.o ldt.o
28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
29obj-$(CONFIG_X86_VISWS) += visws_quirks.o 29obj-$(CONFIG_X86_VISWS) += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o
60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 60obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 61obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
62obj-$(CONFIG_X86_MPPARSE) += mpparse.o 62obj-$(CONFIG_X86_MPPARSE) += mpparse.o
63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic.o
65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o
108# 64 bit specific files 108# 64 bit specific files
109ifeq ($(CONFIG_X86_64),y) 109ifeq ($(CONFIG_X86_64),y)
110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 110 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
111 obj-y += bios_uv.o 111 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
112 obj-y += genx2apic_cluster.o 112 obj-y += genx2apic_cluster.o
113 obj-y += genx2apic_phys.o 113 obj-y += genx2apic_phys.o
114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 114 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index eb875cdc7367..0d1c26a583c5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1256 1256
1257 count = 1257 count =
1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 1258 acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
1259 NR_IRQ_VECTORS); 1259 nr_irqs);
1260 if (count < 0) { 1260 if (count < 0) {
1261 printk(KERN_ERR PREFIX 1261 printk(KERN_ERR PREFIX
1262 "Error parsing interrupt source overrides entry\n"); 1262 "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1276 1276
1277 count = 1277 count =
1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 1278 acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
1279 NR_IRQ_VECTORS); 1279 nr_irqs);
1280 if (count < 0) { 1280 if (count < 0) {
1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); 1281 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
1282 /* TBD: Cleanup to allow fallback to MPS */ 1282 /* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 426e5d91b63a..c44cd6dbfa14 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,6 +10,7 @@
10#include <linux/dmi.h> 10#include <linux/dmi.h>
11#include <linux/cpumask.h> 11#include <linux/cpumask.h>
12#include <asm/segment.h> 12#include <asm/segment.h>
13#include <asm/desc.h>
13 14
14#include "realmode/wakeup.h" 15#include "realmode/wakeup.h"
15#include "sleep.h" 16#include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
98 header->trampoline_segment = setup_trampoline() >> 4; 99 header->trampoline_segment = setup_trampoline() >> 4;
99#ifdef CONFIG_SMP 100#ifdef CONFIG_SMP
100 stack_start.sp = temp_stack + 4096; 101 stack_start.sp = temp_stack + 4096;
102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
101#endif 104#endif
102 initial_code = (unsigned long)wakeup_long64; 105 initial_code = (unsigned long)wakeup_long64;
103 saved_magic = 0x123456789abcdef0; 106 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic.c
index 21c831d96af3..04a7f960bbc0 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic.c
@@ -23,11 +23,13 @@
23#include <linux/mc146818rtc.h> 23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 25#include <linux/sysdev.h>
26#include <linux/ioport.h>
26#include <linux/cpu.h> 27#include <linux/cpu.h>
27#include <linux/clockchips.h> 28#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 29#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
32#include <linux/dmar.h>
31 33
32#include <asm/atomic.h> 34#include <asm/atomic.h>
33#include <asm/smp.h> 35#include <asm/smp.h>
@@ -36,8 +38,14 @@
36#include <asm/desc.h> 38#include <asm/desc.h>
37#include <asm/arch_hooks.h> 39#include <asm/arch_hooks.h>
38#include <asm/hpet.h> 40#include <asm/hpet.h>
41#include <asm/pgalloc.h>
39#include <asm/i8253.h> 42#include <asm/i8253.h>
40#include <asm/nmi.h> 43#include <asm/nmi.h>
44#include <asm/idle.h>
45#include <asm/proto.h>
46#include <asm/timex.h>
47#include <asm/apic.h>
48#include <asm/i8259.h>
41 49
42#include <mach_apic.h> 50#include <mach_apic.h>
43#include <mach_apicdef.h> 51#include <mach_apicdef.h>
@@ -50,16 +58,58 @@
50# error SPURIOUS_APIC_VECTOR definition error 58# error SPURIOUS_APIC_VECTOR definition error
51#endif 59#endif
52 60
53unsigned long mp_lapic_addr; 61#ifdef CONFIG_X86_32
54
55/* 62/*
56 * Knob to control our willingness to enable the local APIC. 63 * Knob to control our willingness to enable the local APIC.
57 * 64 *
58 * +1=force-enable 65 * +1=force-enable
59 */ 66 */
60static int force_enable_local_apic; 67static int force_enable_local_apic;
61int disable_apic; 68/*
69 * APIC command line parameters
70 */
71static int __init parse_lapic(char *arg)
72{
73 force_enable_local_apic = 1;
74 return 0;
75}
76early_param("lapic", parse_lapic);
77/* Local APIC was disabled by the BIOS and enabled by the kernel */
78static int enabled_via_apicbase;
79
80#endif
81
82#ifdef CONFIG_X86_64
83static int apic_calibrate_pmtmr __initdata;
84static __init int setup_apicpmtimer(char *s)
85{
86 apic_calibrate_pmtmr = 1;
87 notsc_setup(NULL);
88 return 0;
89}
90__setup("apicpmtimer", setup_apicpmtimer);
91#endif
92
93#ifdef CONFIG_X86_64
94#define HAVE_X2APIC
95#endif
96
97#ifdef HAVE_X2APIC
98int x2apic;
99/* x2apic enabled before OS handover */
100int x2apic_preenabled;
101int disable_x2apic;
102static __init int setup_nox2apic(char *str)
103{
104 disable_x2apic = 1;
105 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
106 return 0;
107}
108early_param("nox2apic", setup_nox2apic);
109#endif
62 110
111unsigned long mp_lapic_addr;
112int disable_apic;
63/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 113/* Disable local APIC timer from the kernel commandline or via dmi quirk */
64static int disable_apic_timer __cpuinitdata; 114static int disable_apic_timer __cpuinitdata;
65/* Local APIC timer works in C2 */ 115/* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
110}; 160};
111static DEFINE_PER_CPU(struct clock_event_device, lapic_events); 161static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
112 162
113/* Local APIC was disabled by the BIOS and enabled by the kernel */
114static int enabled_via_apicbase;
115
116static unsigned long apic_phys; 163static unsigned long apic_phys;
117 164
118/* 165/*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
202struct apic_ops __read_mostly *apic_ops = &xapic_ops; 249struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops); 250EXPORT_SYMBOL_GPL(apic_ops);
204 251
252#ifdef HAVE_X2APIC
253static void x2apic_wait_icr_idle(void)
254{
255 /* no need to wait for icr idle in x2apic */
256 return;
257}
258
259static u32 safe_x2apic_wait_icr_idle(void)
260{
261 /* no need to wait for icr idle in x2apic */
262 return 0;
263}
264
265void x2apic_icr_write(u32 low, u32 id)
266{
267 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
268}
269
270u64 x2apic_icr_read(void)
271{
272 unsigned long val;
273
274 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
275 return val;
276}
277
278static struct apic_ops x2apic_ops = {
279 .read = native_apic_msr_read,
280 .write = native_apic_msr_write,
281 .icr_read = x2apic_icr_read,
282 .icr_write = x2apic_icr_write,
283 .wait_icr_idle = x2apic_wait_icr_idle,
284 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
285};
286#endif
287
205/** 288/**
206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 289 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
207 */ 290 */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
219 apic_write(APIC_LVT0, v); 302 apic_write(APIC_LVT0, v);
220} 303}
221 304
305#ifdef CONFIG_X86_32
222/** 306/**
223 * get_physical_broadcast - Get number of physical broadcast IDs 307 * get_physical_broadcast - Get number of physical broadcast IDs
224 */ 308 */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
226{ 310{
227 return modern_apic() ? 0xff : 0xf; 311 return modern_apic() ? 0xff : 0xf;
228} 312}
313#endif
229 314
230/** 315/**
231 * lapic_get_maxlvt - get the maximum number of local vector table entries 316 * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
247 */ 332 */
248 333
249/* Clock divisor */ 334/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
253#define APIC_DIVISOR 16 335#define APIC_DIVISOR 16
254#endif
255 336
256/* 337/*
257 * This function sets up the local APIC timer, with a timeout of 338 * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
383 * Setup the local APIC timer for this CPU. Copy the initilized values 464 * Setup the local APIC timer for this CPU. Copy the initilized values
384 * of the boot CPU and register the clock event in the framework. 465 * of the boot CPU and register the clock event in the framework.
385 */ 466 */
386static void __devinit setup_APIC_timer(void) 467static void __cpuinit setup_APIC_timer(void)
387{ 468{
388 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 469 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
389 470
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
453 } 534 }
454} 535}
455 536
537static int __init calibrate_by_pmtimer(long deltapm, long *delta)
538{
539 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
540 const long pm_thresh = pm_100ms / 100;
541 unsigned long mult;
542 u64 res;
543
544#ifndef CONFIG_X86_PM_TIMER
545 return -1;
546#endif
547
548 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
549
550 /* Check, if the PM timer is available */
551 if (!deltapm)
552 return -1;
553
554 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
555
556 if (deltapm > (pm_100ms - pm_thresh) &&
557 deltapm < (pm_100ms + pm_thresh)) {
558 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
559 } else {
560 res = (((u64)deltapm) * mult) >> 22;
561 do_div(res, 1000000);
562 printk(KERN_WARNING "APIC calibration not consistent "
563 "with PM Timer: %ldms instead of 100ms\n",
564 (long)res);
565 /* Correct the lapic counter value */
566 res = (((u64)(*delta)) * pm_100ms);
567 do_div(res, deltapm);
568 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
569 "%lu (%ld)\n", (unsigned long)res, *delta);
570 *delta = (long)res;
571 }
572
573 return 0;
574}
575
456static int __init calibrate_APIC_clock(void) 576static int __init calibrate_APIC_clock(void)
457{ 577{
458 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 578 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
459 const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
460 const long pm_thresh = pm_100ms/100;
461 void (*real_handler)(struct clock_event_device *dev); 579 void (*real_handler)(struct clock_event_device *dev);
462 unsigned long deltaj; 580 unsigned long deltaj;
463 long delta, deltapm; 581 long delta;
464 int pm_referenced = 0; 582 int pm_referenced = 0;
465 583
466 local_irq_disable(); 584 local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
470 global_clock_event->event_handler = lapic_cal_handler; 588 global_clock_event->event_handler = lapic_cal_handler;
471 589
472 /* 590 /*
473 * Setup the APIC counter to 1e9. There is no way the lapic 591 * Setup the APIC counter to maximum. There is no way the lapic
474 * can underflow in the 100ms detection time frame 592 * can underflow in the 100ms detection time frame
475 */ 593 */
476 __setup_APIC_LVTT(1000000000, 0, 0); 594 __setup_APIC_LVTT(0xffffffff, 0, 0);
477 595
478 /* Let the interrupts run */ 596 /* Let the interrupts run */
479 local_irq_enable(); 597 local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
490 delta = lapic_cal_t1 - lapic_cal_t2; 608 delta = lapic_cal_t1 - lapic_cal_t2;
491 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 609 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
492 610
493 /* Check, if the PM timer is available */ 611 /* we trust the PM based calibration if possible */
494 deltapm = lapic_cal_pm2 - lapic_cal_pm1; 612 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
495 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 613 &delta);
496
497 if (deltapm) {
498 unsigned long mult;
499 u64 res;
500
501 mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
502
503 if (deltapm > (pm_100ms - pm_thresh) &&
504 deltapm < (pm_100ms + pm_thresh)) {
505 apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
506 } else {
507 res = (((u64) deltapm) * mult) >> 22;
508 do_div(res, 1000000);
509 printk(KERN_WARNING "APIC calibration not consistent "
510 "with PM Timer: %ldms instead of 100ms\n",
511 (long)res);
512 /* Correct the lapic counter value */
513 res = (((u64) delta) * pm_100ms);
514 do_div(res, deltapm);
515 printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
516 "%lu (%ld)\n", (unsigned long) res, delta);
517 delta = (long) res;
518 }
519 pm_referenced = 1;
520 }
521 614
522 /* Calculate the scaled math multiplication factor */ 615 /* Calculate the scaled math multiplication factor */
523 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 616 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
559 652
560 levt->features &= ~CLOCK_EVT_FEAT_DUMMY; 653 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
561 654
562 /* We trust the pm timer based calibration */ 655 /*
656 * PM timer calibration failed or not turned on
657 * so lets try APIC timer based calibration
658 */
563 if (!pm_referenced) { 659 if (!pm_referenced) {
564 apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); 660 apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
565 661
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
652 setup_APIC_timer(); 748 setup_APIC_timer();
653} 749}
654 750
655void __devinit setup_secondary_APIC_clock(void) 751void __cpuinit setup_secondary_APIC_clock(void)
656{ 752{
657 setup_APIC_timer(); 753 setup_APIC_timer();
658} 754}
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
718 * Besides, if we don't timer interrupts ignore the global 814 * Besides, if we don't timer interrupts ignore the global
719 * interrupt lock, which is the WrongThing (tm) to do. 815 * interrupt lock, which is the WrongThing (tm) to do.
720 */ 816 */
817#ifdef CONFIG_X86_64
818 exit_idle();
819#endif
721 irq_enter(); 820 irq_enter();
722 local_apic_timer_interrupt(); 821 local_apic_timer_interrupt();
723 irq_exit(); 822 irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
991 1090
992static void __cpuinit lapic_setup_esr(void) 1091static void __cpuinit lapic_setup_esr(void)
993{ 1092{
994 unsigned long oldvalue, value, maxlvt; 1093 unsigned int oldvalue, value, maxlvt;
995 if (lapic_is_integrated() && !esr_disable) { 1094
996 if (esr_disable) { 1095 if (!lapic_is_integrated()) {
997 /* 1096 printk(KERN_INFO "No ESR for 82489DX.\n");
998 * Something untraceable is creating bad interrupts on 1097 return;
999 * secondary quads ... for the moment, just leave the 1098 }
1000 * ESR disabled - we can't do anything useful with the
1001 * errors anyway - mbligh
1002 */
1003 printk(KERN_INFO "Leaving ESR disabled.\n");
1004 return;
1005 }
1006 /* !82489DX */
1007 maxlvt = lapic_get_maxlvt();
1008 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1009 apic_write(APIC_ESR, 0);
1010 oldvalue = apic_read(APIC_ESR);
1011 1099
1012 /* enables sending errors */ 1100 if (esr_disable) {
1013 value = ERROR_APIC_VECTOR;
1014 apic_write(APIC_LVTERR, value);
1015 /* 1101 /*
1016 * spec says clear errors after enabling vector. 1102 * Something untraceable is creating bad interrupts on
1103 * secondary quads ... for the moment, just leave the
1104 * ESR disabled - we can't do anything useful with the
1105 * errors anyway - mbligh
1017 */ 1106 */
1018 if (maxlvt > 3) 1107 printk(KERN_INFO "Leaving ESR disabled.\n");
1019 apic_write(APIC_ESR, 0); 1108 return;
1020 value = apic_read(APIC_ESR);
1021 if (value != oldvalue)
1022 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1023 "vector: 0x%08lx after: 0x%08lx\n",
1024 oldvalue, value);
1025 } else {
1026 printk(KERN_INFO "No ESR for 82489DX.\n");
1027 } 1109 }
1110
1111 maxlvt = lapic_get_maxlvt();
1112 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1113 apic_write(APIC_ESR, 0);
1114 oldvalue = apic_read(APIC_ESR);
1115
1116 /* enables sending errors */
1117 value = ERROR_APIC_VECTOR;
1118 apic_write(APIC_LVTERR, value);
1119
1120 /*
1121 * spec says clear errors after enabling vector.
1122 */
1123 if (maxlvt > 3)
1124 apic_write(APIC_ESR, 0);
1125 value = apic_read(APIC_ESR);
1126 if (value != oldvalue)
1127 apic_printk(APIC_VERBOSE, "ESR value before enabling "
1128 "vector: 0x%08x after: 0x%08x\n",
1129 oldvalue, value);
1028} 1130}
1029 1131
1030 1132
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
1033 */ 1135 */
1034void __cpuinit setup_local_APIC(void) 1136void __cpuinit setup_local_APIC(void)
1035{ 1137{
1036 unsigned long value, integrated; 1138 unsigned int value;
1037 int i, j; 1139 int i, j;
1038 1140
1141#ifdef CONFIG_X86_32
1039 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1142 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1040 if (esr_disable) { 1143 if (lapic_is_integrated() && esr_disable) {
1041 apic_write(APIC_ESR, 0); 1144 apic_write(APIC_ESR, 0);
1042 apic_write(APIC_ESR, 0); 1145 apic_write(APIC_ESR, 0);
1043 apic_write(APIC_ESR, 0); 1146 apic_write(APIC_ESR, 0);
1044 apic_write(APIC_ESR, 0); 1147 apic_write(APIC_ESR, 0);
1045 } 1148 }
1149#endif
1046 1150
1047 integrated = lapic_is_integrated(); 1151 preempt_disable();
1048 1152
1049 /* 1153 /*
1050 * Double-check whether this APIC is really registered. 1154 * Double-check whether this APIC is really registered.
1155 * This is meaningless in clustered apic mode, so we skip it.
1051 */ 1156 */
1052 if (!apic_id_registered()) 1157 if (!apic_id_registered())
1053 WARN_ON_ONCE(1); 1158 BUG();
1054 1159
1055 /* 1160 /*
1056 * Intel recommends to set DFR, LDR and TPR before enabling 1161 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
1096 */ 1201 */
1097 value |= APIC_SPIV_APIC_ENABLED; 1202 value |= APIC_SPIV_APIC_ENABLED;
1098 1203
1204#ifdef CONFIG_X86_32
1099 /* 1205 /*
1100 * Some unknown Intel IO/APIC (or APIC) errata is biting us with 1206 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
1101 * certain networking cards. If high frequency interrupts are 1207 * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
1116 * See also the comment in end_level_ioapic_irq(). --macro 1222 * See also the comment in end_level_ioapic_irq(). --macro
1117 */ 1223 */
1118 1224
1119 /* Enable focus processor (bit==0) */ 1225 /*
1226 * - enable focus processor (bit==0)
1227 * - 64bit mode always use processor focus
1228 * so no need to set it
1229 */
1120 value &= ~APIC_SPIV_FOCUS_DISABLED; 1230 value &= ~APIC_SPIV_FOCUS_DISABLED;
1231#endif
1121 1232
1122 /* 1233 /*
1123 * Set spurious IRQ vector 1234 * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
1154 value = APIC_DM_NMI; 1265 value = APIC_DM_NMI;
1155 else 1266 else
1156 value = APIC_DM_NMI | APIC_LVT_MASKED; 1267 value = APIC_DM_NMI | APIC_LVT_MASKED;
1157 if (!integrated) /* 82489DX */ 1268 if (!lapic_is_integrated()) /* 82489DX */
1158 value |= APIC_LVT_LEVEL_TRIGGER; 1269 value |= APIC_LVT_LEVEL_TRIGGER;
1159 apic_write(APIC_LVT1, value); 1270 apic_write(APIC_LVT1, value);
1271
1272 preempt_enable();
1160} 1273}
1161 1274
1162void __cpuinit end_local_APIC_setup(void) 1275void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
1177 apic_pm_activate(); 1290 apic_pm_activate();
1178} 1291}
1179 1292
1293#ifdef HAVE_X2APIC
1294void check_x2apic(void)
1295{
1296 int msr, msr2;
1297
1298 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1299
1300 if (msr & X2APIC_ENABLE) {
1301 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1302 x2apic_preenabled = x2apic = 1;
1303 apic_ops = &x2apic_ops;
1304 }
1305}
1306
1307void enable_x2apic(void)
1308{
1309 int msr, msr2;
1310
1311 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1312 if (!(msr & X2APIC_ENABLE)) {
1313 printk("Enabling x2apic\n");
1314 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1315 }
1316}
1317
1318void enable_IR_x2apic(void)
1319{
1320#ifdef CONFIG_INTR_REMAP
1321 int ret;
1322 unsigned long flags;
1323
1324 if (!cpu_has_x2apic)
1325 return;
1326
1327 if (!x2apic_preenabled && disable_x2apic) {
1328 printk(KERN_INFO
1329 "Skipped enabling x2apic and Interrupt-remapping "
1330 "because of nox2apic\n");
1331 return;
1332 }
1333
1334 if (x2apic_preenabled && disable_x2apic)
1335 panic("Bios already enabled x2apic, can't enforce nox2apic");
1336
1337 if (!x2apic_preenabled && skip_ioapic_setup) {
1338 printk(KERN_INFO
1339 "Skipped enabling x2apic and Interrupt-remapping "
1340 "because of skipping io-apic setup\n");
1341 return;
1342 }
1343
1344 ret = dmar_table_init();
1345 if (ret) {
1346 printk(KERN_INFO
1347 "dmar_table_init() failed with %d:\n", ret);
1348
1349 if (x2apic_preenabled)
1350 panic("x2apic enabled by bios. But IR enabling failed");
1351 else
1352 printk(KERN_INFO
1353 "Not enabling x2apic,Intr-remapping\n");
1354 return;
1355 }
1356
1357 local_irq_save(flags);
1358 mask_8259A();
1359
1360 ret = save_mask_IO_APIC_setup();
1361 if (ret) {
1362 printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
1363 goto end;
1364 }
1365
1366 ret = enable_intr_remapping(1);
1367
1368 if (ret && x2apic_preenabled) {
1369 local_irq_restore(flags);
1370 panic("x2apic enabled by bios. But IR enabling failed");
1371 }
1372
1373 if (ret)
1374 goto end_restore;
1375
1376 if (!x2apic) {
1377 x2apic = 1;
1378 apic_ops = &x2apic_ops;
1379 enable_x2apic();
1380 }
1381
1382end_restore:
1383 if (ret)
1384 /*
1385 * IR enabling failed
1386 */
1387 restore_IO_APIC_setup();
1388 else
1389 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1390
1391end:
1392 unmask_8259A();
1393 local_irq_restore(flags);
1394
1395 if (!ret) {
1396 if (!x2apic_preenabled)
1397 printk(KERN_INFO
1398 "Enabled x2apic and interrupt-remapping\n");
1399 else
1400 printk(KERN_INFO
1401 "Enabled Interrupt-remapping\n");
1402 } else
1403 printk(KERN_ERR
1404 "Failed to enable Interrupt-remapping and x2apic\n");
1405#else
1406 if (!cpu_has_x2apic)
1407 return;
1408
1409 if (x2apic_preenabled)
1410 panic("x2apic enabled prior OS handover,"
1411 " enable CONFIG_INTR_REMAP");
1412
1413 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1414 " and x2apic\n");
1415#endif
1416
1417 return;
1418}
1419#endif /* HAVE_X2APIC */
1420
1421#ifdef CONFIG_X86_64
1422/*
1423 * Detect and enable local APICs on non-SMP boards.
1424 * Original code written by Keir Fraser.
1425 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1426 * not correctly set up (usually the APIC timer won't work etc.)
1427 */
1428static int __init detect_init_APIC(void)
1429{
1430 if (!cpu_has_apic) {
1431 printk(KERN_INFO "No local APIC present\n");
1432 return -1;
1433 }
1434
1435 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1436 boot_cpu_physical_apicid = 0;
1437 return 0;
1438}
1439#else
1180/* 1440/*
1181 * Detect and initialize APIC 1441 * Detect and initialize APIC
1182 */ 1442 */
@@ -1255,12 +1515,46 @@ no_apic:
1255 printk(KERN_INFO "No local APIC present or hardware disabled\n"); 1515 printk(KERN_INFO "No local APIC present or hardware disabled\n");
1256 return -1; 1516 return -1;
1257} 1517}
1518#endif
1519
1520#ifdef CONFIG_X86_64
1521void __init early_init_lapic_mapping(void)
1522{
1523 unsigned long phys_addr;
1524
1525 /*
1526 * If no local APIC can be found then go out
1527 * : it means there is no mpatable and MADT
1528 */
1529 if (!smp_found_config)
1530 return;
1531
1532 phys_addr = mp_lapic_addr;
1533
1534 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1535 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1536 APIC_BASE, phys_addr);
1537
1538 /*
1539 * Fetch the APIC ID of the BSP in case we have a
1540 * default configuration (or the MP table is broken).
1541 */
1542 boot_cpu_physical_apicid = read_apic_id();
1543}
1544#endif
1258 1545
1259/** 1546/**
1260 * init_apic_mappings - initialize APIC mappings 1547 * init_apic_mappings - initialize APIC mappings
1261 */ 1548 */
1262void __init init_apic_mappings(void) 1549void __init init_apic_mappings(void)
1263{ 1550{
1551#ifdef HAVE_X2APIC
1552 if (x2apic) {
1553 boot_cpu_physical_apicid = read_apic_id();
1554 return;
1555 }
1556#endif
1557
1264 /* 1558 /*
1265 * If no local APIC can be found then set up a fake all 1559 * If no local APIC can be found then set up a fake all
1266 * zeroes page to simulate the local APIC and another 1560 * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
1273 apic_phys = mp_lapic_addr; 1567 apic_phys = mp_lapic_addr;
1274 1568
1275 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1569 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1276 printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, 1570 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1277 apic_phys); 1571 APIC_BASE, apic_phys);
1278 1572
1279 /* 1573 /*
1280 * Fetch the APIC ID of the BSP in case we have a 1574 * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
1282 */ 1576 */
1283 if (boot_cpu_physical_apicid == -1U) 1577 if (boot_cpu_physical_apicid == -1U)
1284 boot_cpu_physical_apicid = read_apic_id(); 1578 boot_cpu_physical_apicid = read_apic_id();
1285
1286} 1579}
1287 1580
1288/* 1581/*
1289 * This initializes the IO-APIC and APIC hardware if this is 1582 * This initializes the IO-APIC and APIC hardware if this is
1290 * a UP kernel. 1583 * a UP kernel.
1291 */ 1584 */
1292
1293int apic_version[MAX_APICS]; 1585int apic_version[MAX_APICS];
1294 1586
1295int __init APIC_init_uniprocessor(void) 1587int __init APIC_init_uniprocessor(void)
1296{ 1588{
1589#ifdef CONFIG_X86_64
1590 if (disable_apic) {
1591 printk(KERN_INFO "Apic disabled\n");
1592 return -1;
1593 }
1594 if (!cpu_has_apic) {
1595 disable_apic = 1;
1596 printk(KERN_INFO "Apic disabled by BIOS\n");
1597 return -1;
1598 }
1599#else
1297 if (!smp_found_config && !cpu_has_apic) 1600 if (!smp_found_config && !cpu_has_apic)
1298 return -1; 1601 return -1;
1299 1602
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
1302 */ 1605 */
1303 if (!cpu_has_apic && 1606 if (!cpu_has_apic &&
1304 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1607 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1305 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 1608 printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
1306 boot_cpu_physical_apicid); 1609 boot_cpu_physical_apicid);
1307 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 1610 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1308 return -1; 1611 return -1;
1309 } 1612 }
1613#endif
1310 1614
1311 verify_local_APIC(); 1615#ifdef HAVE_X2APIC
1616 enable_IR_x2apic();
1617#endif
1618#ifdef CONFIG_X86_64
1619 setup_apic_routing();
1620#endif
1312 1621
1622 verify_local_APIC();
1313 connect_bsp_APIC(); 1623 connect_bsp_APIC();
1314 1624
1625#ifdef CONFIG_X86_64
1626 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1627#else
1315 /* 1628 /*
1316 * Hack: In case of kdump, after a crash, kernel might be booting 1629 * Hack: In case of kdump, after a crash, kernel might be booting
1317 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid 1630 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1318 * might be zero if read from MP tables. Get it from LAPIC. 1631 * might be zero if read from MP tables. Get it from LAPIC.
1319 */ 1632 */
1320#ifdef CONFIG_CRASH_DUMP 1633# ifdef CONFIG_CRASH_DUMP
1321 boot_cpu_physical_apicid = read_apic_id(); 1634 boot_cpu_physical_apicid = read_apic_id();
1635# endif
1322#endif 1636#endif
1323 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1637 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1324
1325 setup_local_APIC(); 1638 setup_local_APIC();
1326 1639
1640#ifdef CONFIG_X86_64
1641 /*
1642 * Now enable IO-APICs, actually call clear_IO_APIC
1643 * We need clear_IO_APIC before enabling vector on BP
1644 */
1645 if (!skip_ioapic_setup && nr_ioapics)
1646 enable_IO_APIC();
1647#endif
1648
1327#ifdef CONFIG_X86_IO_APIC 1649#ifdef CONFIG_X86_IO_APIC
1328 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) 1650 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1329#endif 1651#endif
1330 localise_nmi_watchdog(); 1652 localise_nmi_watchdog();
1331 end_local_APIC_setup(); 1653 end_local_APIC_setup();
1654
1332#ifdef CONFIG_X86_IO_APIC 1655#ifdef CONFIG_X86_IO_APIC
1333 if (smp_found_config) 1656 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1334 if (!skip_ioapic_setup && nr_ioapics) 1657 setup_IO_APIC();
1335 setup_IO_APIC(); 1658# ifdef CONFIG_X86_64
1659 else
1660 nr_ioapics = 0;
1661# endif
1336#endif 1662#endif
1663
1664#ifdef CONFIG_X86_64
1665 setup_boot_APIC_clock();
1666 check_nmi_watchdog();
1667#else
1337 setup_boot_clock(); 1668 setup_boot_clock();
1669#endif
1338 1670
1339 return 0; 1671 return 0;
1340} 1672}
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
1348 */ 1680 */
1349void smp_spurious_interrupt(struct pt_regs *regs) 1681void smp_spurious_interrupt(struct pt_regs *regs)
1350{ 1682{
1351 unsigned long v; 1683 u32 v;
1352 1684
1685#ifdef CONFIG_X86_64
1686 exit_idle();
1687#endif
1353 irq_enter(); 1688 irq_enter();
1354 /* 1689 /*
1355 * Check if this really is a spurious interrupt and ACK it 1690 * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1360 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) 1695 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1361 ack_APIC_irq(); 1696 ack_APIC_irq();
1362 1697
1698#ifdef CONFIG_X86_64
1699 add_pda(irq_spurious_count, 1);
1700#else
1363 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 1701 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1364 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " 1702 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
1365 "should never happen.\n", smp_processor_id()); 1703 "should never happen.\n", smp_processor_id());
1366 __get_cpu_var(irq_stat).irq_spurious_count++; 1704 __get_cpu_var(irq_stat).irq_spurious_count++;
1705#endif
1367 irq_exit(); 1706 irq_exit();
1368} 1707}
1369 1708
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1372 */ 1711 */
1373void smp_error_interrupt(struct pt_regs *regs) 1712void smp_error_interrupt(struct pt_regs *regs)
1374{ 1713{
1375 unsigned long v, v1; 1714 u32 v, v1;
1376 1715
1716#ifdef CONFIG_X86_64
1717 exit_idle();
1718#endif
1377 irq_enter(); 1719 irq_enter();
1378 /* First tickle the hardware, only then report what went on. -- REW */ 1720 /* First tickle the hardware, only then report what went on. -- REW */
1379 v = apic_read(APIC_ESR); 1721 v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
1392 6: Received illegal vector 1734 6: Received illegal vector
1393 7: Illegal register address 1735 7: Illegal register address
1394 */ 1736 */
1395 printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", 1737 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1396 smp_processor_id(), v , v1); 1738 smp_processor_id(), v , v1);
1397 irq_exit(); 1739 irq_exit();
1398} 1740}
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
1565 cpu_set(cpu, cpu_present_map); 1907 cpu_set(cpu, cpu_present_map);
1566} 1908}
1567 1909
1910#ifdef CONFIG_X86_64
1911int hard_smp_processor_id(void)
1912{
1913 return read_apic_id();
1914}
1915#endif
1916
1568/* 1917/*
1569 * Power management 1918 * Power management
1570 */ 1919 */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
1640 1989
1641 local_irq_save(flags); 1990 local_irq_save(flags);
1642 1991
1643#ifdef CONFIG_X86_64 1992#ifdef HAVE_X2APIC
1644 if (x2apic) 1993 if (x2apic)
1645 enable_x2apic(); 1994 enable_x2apic();
1646 else 1995 else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
1702 .cls = &lapic_sysclass, 2051 .cls = &lapic_sysclass,
1703}; 2052};
1704 2053
1705static void __devinit apic_pm_activate(void) 2054static void __cpuinit apic_pm_activate(void)
1706{ 2055{
1707 apic_pm_state.active = 1; 2056 apic_pm_state.active = 1;
1708} 2057}
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
1728 2077
1729#endif /* CONFIG_PM */ 2078#endif /* CONFIG_PM */
1730 2079
2080#ifdef CONFIG_X86_64
1731/* 2081/*
1732 * APIC command line parameters 2082 * apic_is_clustered_box() -- Check if we can expect good TSC
2083 *
2084 * Thus far, the major user of this is IBM's Summit2 series:
2085 *
2086 * Clustered boxes may have unsynced TSC problems if they are
2087 * multi-chassis. Use available data to take a good guess.
2088 * If in doubt, go HPET.
1733 */ 2089 */
1734static int __init parse_lapic(char *arg) 2090__cpuinit int apic_is_clustered_box(void)
1735{ 2091{
1736 force_enable_local_apic = 1; 2092 int i, clusters, zeros;
1737 return 0; 2093 unsigned id;
2094 u16 *bios_cpu_apicid;
2095 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
2096
2097 /*
2098 * there is not this kind of box with AMD CPU yet.
2099 * Some AMD box with quadcore cpu and 8 sockets apicid
2100 * will be [4, 0x23] or [8, 0x27] could be thought to
2101 * vsmp box still need checking...
2102 */
2103 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
2104 return 0;
2105
2106 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
2107 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
2108
2109 for (i = 0; i < NR_CPUS; i++) {
2110 /* are we being called early in kernel startup? */
2111 if (bios_cpu_apicid) {
2112 id = bios_cpu_apicid[i];
2113 }
2114 else if (i < nr_cpu_ids) {
2115 if (cpu_present(i))
2116 id = per_cpu(x86_bios_cpu_apicid, i);
2117 else
2118 continue;
2119 }
2120 else
2121 break;
2122
2123 if (id != BAD_APICID)
2124 __set_bit(APIC_CLUSTERID(id), clustermap);
2125 }
2126
2127 /* Problem: Partially populated chassis may not have CPUs in some of
2128 * the APIC clusters they have been allocated. Only present CPUs have
2129 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
2130 * Since clusters are allocated sequentially, count zeros only if
2131 * they are bounded by ones.
2132 */
2133 clusters = 0;
2134 zeros = 0;
2135 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
2136 if (test_bit(i, clustermap)) {
2137 clusters += 1 + zeros;
2138 zeros = 0;
2139 } else
2140 ++zeros;
2141 }
2142
2143 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
2144 * not guaranteed to be synced between boards
2145 */
2146 if (is_vsmp_box() && clusters > 1)
2147 return 1;
2148
2149 /*
2150 * If clusters > 2, then should be multi-chassis.
2151 * May have to revisit this when multi-core + hyperthreaded CPUs come
2152 * out, but AFAIK this will work even for them.
2153 */
2154 return (clusters > 2);
1738} 2155}
1739early_param("lapic", parse_lapic); 2156#endif
1740 2157
2158/*
2159 * APIC command line parameters
2160 */
1741static int __init setup_disableapic(char *arg) 2161static int __init setup_disableapic(char *arg)
1742{ 2162{
1743 disable_apic = 1; 2163 disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
1779 if (!arg) { 2199 if (!arg) {
1780#ifdef CONFIG_X86_64 2200#ifdef CONFIG_X86_64
1781 skip_ioapic_setup = 0; 2201 skip_ioapic_setup = 0;
1782 ioapic_force = 1;
1783 return 0; 2202 return 0;
1784#endif 2203#endif
1785 return -EINVAL; 2204 return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644
index 94ddb69ae15e..000000000000
--- a/arch/x86/kernel/apic_64.c
+++ /dev/null
@@ -1,1848 +0,0 @@
1/*
2 * Local APIC handling, local APIC timers
3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
8 * thanks to Eric Gilmore
9 * and Rolf G. Tews
10 * for testing these extensively.
11 * Maciej W. Rozycki : Various updates and fixes.
12 * Mikael Pettersson : Power Management for UP-APIC.
13 * Pavel Machek and
14 * Mikael Pettersson : PM converted to driver model.
15 */
16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h>
25#include <linux/sysdev.h>
26#include <linux/ioport.h>
27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h>
30#include <linux/dmar.h>
31
32#include <asm/atomic.h>
33#include <asm/smp.h>
34#include <asm/mtrr.h>
35#include <asm/mpspec.h>
36#include <asm/hpet.h>
37#include <asm/pgalloc.h>
38#include <asm/nmi.h>
39#include <asm/idle.h>
40#include <asm/proto.h>
41#include <asm/timex.h>
42#include <asm/apic.h>
43#include <asm/i8259.h>
44
45#include <mach_ipi.h>
46#include <mach_apic.h>
47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
49static int disable_apic_timer __cpuinitdata;
50static int apic_calibrate_pmtmr __initdata;
51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
57
58/* Local APIC timer works in C2 */
59int local_apic_timer_c2_ok;
60EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
61
62/*
63 * Debug level, exported for io_apic.c
64 */
65unsigned int apic_verbosity;
66
67/* Have we found an MP table */
68int smp_found_config;
69
70static struct resource lapic_resource = {
71 .name = "Local APIC",
72 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
73};
74
75static unsigned int calibration_result;
76
77static int lapic_next_event(unsigned long delta,
78 struct clock_event_device *evt);
79static void lapic_timer_setup(enum clock_event_mode mode,
80 struct clock_event_device *evt);
81static void lapic_timer_broadcast(cpumask_t mask);
82static void apic_pm_activate(void);
83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
87static struct clock_event_device lapic_clockevent = {
88 .name = "lapic",
89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
90 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
91 .shift = 32,
92 .set_mode = lapic_timer_setup,
93 .set_next_event = lapic_next_event,
94 .broadcast = lapic_timer_broadcast,
95 .rating = 100,
96 .irq = -1,
97};
98static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
99
100static unsigned long apic_phys;
101
102unsigned long mp_lapic_addr;
103
104/*
105 * Get the LAPIC version
106 */
107static inline int lapic_get_version(void)
108{
109 return GET_APIC_VERSION(apic_read(APIC_LVR));
110}
111
112/*
113 * Check, if the APIC is integrated or a separate chip
114 */
115static inline int lapic_is_integrated(void)
116{
117#ifdef CONFIG_X86_64
118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
122}
123
124/*
125 * Check, whether this is a modern or a first generation APIC
126 */
127static int modern_apic(void)
128{
129 /* AMD systems use old APIC versions, so check the CPU */
130 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
131 boot_cpu_data.x86 >= 0xf)
132 return 1;
133 return lapic_get_version() >= 0x14;
134}
135
136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
142{
143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
144 cpu_relax();
145}
146
147u32 safe_xapic_wait_icr_idle(void)
148{
149 u32 send_status;
150 int timeout;
151
152 timeout = 0;
153 do {
154 send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
155 if (!send_status)
156 break;
157 udelay(100);
158 } while (timeout++ < 1000);
159
160 return send_status;
161}
162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
225/**
226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
227 */
228void __cpuinit enable_NMI_through_LVT0(void)
229{
230 unsigned int v;
231
232 /* unmask and set to NMI */
233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
239 apic_write(APIC_LVT0, v);
240}
241
242/**
243 * lapic_get_maxlvt - get the maximum number of local vector table entries
244 */
245int lapic_get_maxlvt(void)
246{
247 unsigned int v;
248
249 v = apic_read(APIC_LVR);
250 /*
251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
255}
256
257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
269 * This function sets up the local APIC timer, with a timeout of
270 * 'clocks' APIC bus clock. During calibration we actually call
271 * this function twice on the boot CPU, once with a bogus timeout
272 * value, second time for real. The other (noncalibrating) CPUs
273 * call this function only once, with the real, calibrated value.
274 *
275 * We do reads before writes even if unnecessary, to get around the
276 * P5 APIC double write bug.
277 */
278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
279{
280 unsigned int lvtt_value, tmp_value;
281
282 lvtt_value = LOCAL_TIMER_VECTOR;
283 if (!oneshot)
284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
288 if (!irqen)
289 lvtt_value |= APIC_LVT_MASKED;
290
291 apic_write(APIC_LVTT, lvtt_value);
292
293 /*
294 * Divide PICLK by 16
295 */
296 tmp_value = apic_read(APIC_TDCR);
297 apic_write(APIC_TDCR,
298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
299 APIC_TDR_DIV_16);
300
301 if (!oneshot)
302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
303}
304
305/*
306 * Setup extended LVT, AMD specific (K8, family 10h)
307 *
308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
309 * MCE interrupts are supported. Thus MCE offset must be set to 0.
310 *
311 * If mask=1, the LVT entry does not generate interrupts while mask=0
312 * enables the vector. See also the BKDGs.
313 */
314
315#define APIC_EILVT_LVTOFF_MCE 0
316#define APIC_EILVT_LVTOFF_IBS 1
317
318static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
319{
320 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
321 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
322
323 apic_write(reg, v);
324}
325
326u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
327{
328 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
329 return APIC_EILVT_LVTOFF_MCE;
330}
331
332u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
333{
334 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
335 return APIC_EILVT_LVTOFF_IBS;
336}
337EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
338
339/*
340 * Program the next event, relative to now
341 */
342static int lapic_next_event(unsigned long delta,
343 struct clock_event_device *evt)
344{
345 apic_write(APIC_TMICT, delta);
346 return 0;
347}
348
349/*
350 * Setup the lapic timer in periodic or oneshot mode
351 */
352static void lapic_timer_setup(enum clock_event_mode mode,
353 struct clock_event_device *evt)
354{
355 unsigned long flags;
356 unsigned int v;
357
358 /* Lapic used as dummy for broadcast ? */
359 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
360 return;
361
362 local_irq_save(flags);
363
364 switch (mode) {
365 case CLOCK_EVT_MODE_PERIODIC:
366 case CLOCK_EVT_MODE_ONESHOT:
367 __setup_APIC_LVTT(calibration_result,
368 mode != CLOCK_EVT_MODE_PERIODIC, 1);
369 break;
370 case CLOCK_EVT_MODE_UNUSED:
371 case CLOCK_EVT_MODE_SHUTDOWN:
372 v = apic_read(APIC_LVTT);
373 v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
374 apic_write(APIC_LVTT, v);
375 break;
376 case CLOCK_EVT_MODE_RESUME:
377 /* Nothing to do here */
378 break;
379 }
380
381 local_irq_restore(flags);
382}
383
384/*
385 * Local APIC timer broadcast function
386 */
387static void lapic_timer_broadcast(cpumask_t mask)
388{
389#ifdef CONFIG_SMP
390 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
391#endif
392}
393
394/*
395 * Setup the local APIC timer for this CPU. Copy the initilized values
396 * of the boot CPU and register the clock event in the framework.
397 */
398static void setup_APIC_timer(void)
399{
400 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
401
402 memcpy(levt, &lapic_clockevent, sizeof(*levt));
403 levt->cpumask = cpumask_of_cpu(smp_processor_id());
404
405 clockevents_register_device(levt);
406}
407
408/*
409 * In this function we calibrate APIC bus clocks to the external
410 * timer. Unfortunately we cannot use jiffies and the timer irq
411 * to calibrate, since some later bootup code depends on getting
412 * the first irq? Ugh.
413 *
414 * We want to do the calibration only once since we
415 * want to have local timer irqs syncron. CPUs connected
416 * by the same APIC bus have the very same bus frequency.
417 * And we want to have irqs off anyways, no accidental
418 * APIC irq that way.
419 */
420
421#define TICK_COUNT 100000000
422
423static int __init calibrate_APIC_clock(void)
424{
425 unsigned apic, apic_start;
426 unsigned long tsc, tsc_start;
427 int result;
428
429 local_irq_disable();
430
431 /*
432 * Put whatever arbitrary (but long enough) timeout
433 * value into the APIC clock, we just want to get the
434 * counter running for calibration.
435 *
436 * No interrupt enable !
437 */
438 __setup_APIC_LVTT(250000000, 0, 0);
439
440 apic_start = apic_read(APIC_TMCCT);
441#ifdef CONFIG_X86_PM_TIMER
442 if (apic_calibrate_pmtmr && pmtmr_ioport) {
443 pmtimer_wait(5000); /* 5ms wait */
444 apic = apic_read(APIC_TMCCT);
445 result = (apic_start - apic) * 1000L / 5;
446 } else
447#endif
448 {
449 rdtscll(tsc_start);
450
451 do {
452 apic = apic_read(APIC_TMCCT);
453 rdtscll(tsc);
454 } while ((tsc - tsc_start) < TICK_COUNT &&
455 (apic_start - apic) < TICK_COUNT);
456
457 result = (apic_start - apic) * 1000L * tsc_khz /
458 (tsc - tsc_start);
459 }
460
461 local_irq_enable();
462
463 printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
464
465 printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
466 result / 1000 / 1000, result / 1000 % 1000);
467
468 /* Calculate the scaled math multiplication factor */
469 lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
470 lapic_clockevent.shift);
471 lapic_clockevent.max_delta_ns =
472 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
473 lapic_clockevent.min_delta_ns =
474 clockevent_delta2ns(0xF, &lapic_clockevent);
475
476 calibration_result = (result * APIC_DIVISOR) / HZ;
477
478 /*
479 * Do a sanity check on the APIC calibration result
480 */
481 if (calibration_result < (1000000 / HZ)) {
482 printk(KERN_WARNING
483 "APIC frequency too slow, disabling apic timer\n");
484 return -1;
485 }
486
487 return 0;
488}
489
490/*
491 * Setup the boot APIC
492 *
493 * Calibrate and verify the result.
494 */
495void __init setup_boot_APIC_clock(void)
496{
497 /*
498 * The local apic timer can be disabled via the kernel
499 * commandline or from the CPU detection code. Register the lapic
500 * timer as a dummy clock event source on SMP systems, so the
501 * broadcast mechanism is used. On UP systems simply ignore it.
502 */
503 if (disable_apic_timer) {
504 printk(KERN_INFO "Disabling APIC timer\n");
505 /* No broadcast on UP ! */
506 if (num_possible_cpus() > 1) {
507 lapic_clockevent.mult = 1;
508 setup_APIC_timer();
509 }
510 return;
511 }
512
513 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
514 "calibrating APIC timer ...\n");
515
516 if (calibrate_APIC_clock()) {
517 /* No broadcast on UP ! */
518 if (num_possible_cpus() > 1)
519 setup_APIC_timer();
520 return;
521 }
522
523 /*
524 * If nmi_watchdog is set to IO_APIC, we need the
525 * PIT/HPET going. Otherwise register lapic as a dummy
526 * device.
527 */
528 if (nmi_watchdog != NMI_IO_APIC)
529 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
530 else
531 printk(KERN_WARNING "APIC timer registered as dummy,"
532 " due to nmi_watchdog=%d!\n", nmi_watchdog);
533
534 /* Setup the lapic or request the broadcast */
535 setup_APIC_timer();
536}
537
538void __cpuinit setup_secondary_APIC_clock(void)
539{
540 setup_APIC_timer();
541}
542
543/*
544 * The guts of the apic timer interrupt
545 */
546static void local_apic_timer_interrupt(void)
547{
548 int cpu = smp_processor_id();
549 struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
550
551 /*
552 * Normally we should not be here till LAPIC has been initialized but
553 * in some cases like kdump, its possible that there is a pending LAPIC
554 * timer interrupt from previous kernel's context and is delivered in
555 * new kernel the moment interrupts are enabled.
556 *
557 * Interrupts are enabled early and LAPIC is setup much later, hence
558 * its possible that when we get here evt->event_handler is NULL.
559 * Check for event_handler being NULL and discard the interrupt as
560 * spurious.
561 */
562 if (!evt->event_handler) {
563 printk(KERN_WARNING
564 "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
565 /* Switch it off */
566 lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
567 return;
568 }
569
570 /*
571 * the NMI deadlock-detector uses this.
572 */
573#ifdef CONFIG_X86_64
574 add_pda(apic_timer_irqs, 1);
575#else
576 per_cpu(irq_stat, cpu).apic_timer_irqs++;
577#endif
578
579 evt->event_handler(evt);
580}
581
582/*
583 * Local APIC timer interrupt. This is the most natural way for doing
584 * local interrupts, but local timer interrupts can be emulated by
585 * broadcast interrupts too. [in case the hw doesn't support APIC timers]
586 *
587 * [ if a single-CPU system runs an SMP kernel then we call the local
588 * interrupt as well. Thus we cannot inline the local irq ... ]
589 */
590void smp_apic_timer_interrupt(struct pt_regs *regs)
591{
592 struct pt_regs *old_regs = set_irq_regs(regs);
593
594 /*
595 * NOTE! We'd better ACK the irq immediately,
596 * because timer handling can be slow.
597 */
598 ack_APIC_irq();
599 /*
600 * update_process_times() expects us to have done irq_enter().
601 * Besides, if we don't timer interrupts ignore the global
602 * interrupt lock, which is the WrongThing (tm) to do.
603 */
604 exit_idle();
605 irq_enter();
606 local_apic_timer_interrupt();
607 irq_exit();
608
609 set_irq_regs(old_regs);
610}
611
612int setup_profiling_timer(unsigned int multiplier)
613{
614 return -EINVAL;
615}
616
617
618/*
619 * Local APIC start and shutdown
620 */
621
622/**
623 * clear_local_APIC - shutdown the local APIC
624 *
625 * This is called, when a CPU is disabled and before rebooting, so the state of
626 * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
627 * leftovers during boot.
628 */
629void clear_local_APIC(void)
630{
631 int maxlvt;
632 u32 v;
633
634 /* APIC hasn't been mapped yet */
635 if (!apic_phys)
636 return;
637
638 maxlvt = lapic_get_maxlvt();
639 /*
640 * Masking an LVT entry can trigger a local APIC error
641 * if the vector is zero. Mask LVTERR first to prevent this.
642 */
643 if (maxlvt >= 3) {
644 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
645 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
646 }
647 /*
648 * Careful: we have to set masks only first to deassert
649 * any level-triggered sources.
650 */
651 v = apic_read(APIC_LVTT);
652 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
653 v = apic_read(APIC_LVT0);
654 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
655 v = apic_read(APIC_LVT1);
656 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
657 if (maxlvt >= 4) {
658 v = apic_read(APIC_LVTPC);
659 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
660 }
661
662 /* lets not touch this if we didn't frob it */
663#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
664 if (maxlvt >= 5) {
665 v = apic_read(APIC_LVTTHMR);
666 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
667 }
668#endif
669 /*
670 * Clean APIC state for other OSs:
671 */
672 apic_write(APIC_LVTT, APIC_LVT_MASKED);
673 apic_write(APIC_LVT0, APIC_LVT_MASKED);
674 apic_write(APIC_LVT1, APIC_LVT_MASKED);
675 if (maxlvt >= 3)
676 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
677 if (maxlvt >= 4)
678 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
679
680 /* Integrated APIC (!82489DX) ? */
681 if (lapic_is_integrated()) {
682 if (maxlvt > 3)
683 /* Clear ESR due to Pentium errata 3AP and 11AP */
684 apic_write(APIC_ESR, 0);
685 apic_read(APIC_ESR);
686 }
687}
688
689/**
690 * disable_local_APIC - clear and disable the local APIC
691 */
692void disable_local_APIC(void)
693{
694 unsigned int value;
695
696 clear_local_APIC();
697
698 /*
699 * Disable APIC (implies clearing of registers
700 * for 82489DX!).
701 */
702 value = apic_read(APIC_SPIV);
703 value &= ~APIC_SPIV_APIC_ENABLED;
704 apic_write(APIC_SPIV, value);
705
706#ifdef CONFIG_X86_32
707 /*
708 * When LAPIC was disabled by the BIOS and enabled by the kernel,
709 * restore the disabled state.
710 */
711 if (enabled_via_apicbase) {
712 unsigned int l, h;
713
714 rdmsr(MSR_IA32_APICBASE, l, h);
715 l &= ~MSR_IA32_APICBASE_ENABLE;
716 wrmsr(MSR_IA32_APICBASE, l, h);
717 }
718#endif
719}
720
721/*
722 * If Linux enabled the LAPIC against the BIOS default disable it down before
723 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
724 * not power-off. Additionally clear all LVT entries before disable_local_APIC
725 * for the case where Linux didn't enable the LAPIC.
726 */
727void lapic_shutdown(void)
728{
729 unsigned long flags;
730
731 if (!cpu_has_apic)
732 return;
733
734 local_irq_save(flags);
735
736#ifdef CONFIG_X86_32
737 if (!enabled_via_apicbase)
738 clear_local_APIC();
739 else
740#endif
741 disable_local_APIC();
742
743
744 local_irq_restore(flags);
745}
746
747/*
748 * This is to verify that we're looking at a real local APIC.
749 * Check these against your board if the CPUs aren't getting
750 * started for no apparent reason.
751 */
752int __init verify_local_APIC(void)
753{
754 unsigned int reg0, reg1;
755
756 /*
757 * The version register is read-only in a real APIC.
758 */
759 reg0 = apic_read(APIC_LVR);
760 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
761 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
762 reg1 = apic_read(APIC_LVR);
763 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
764
765 /*
766 * The two version reads above should print the same
767 * numbers. If the second one is different, then we
768 * poke at a non-APIC.
769 */
770 if (reg1 != reg0)
771 return 0;
772
773 /*
774 * Check if the version looks reasonably.
775 */
776 reg1 = GET_APIC_VERSION(reg0);
777 if (reg1 == 0x00 || reg1 == 0xff)
778 return 0;
779 reg1 = lapic_get_maxlvt();
780 if (reg1 < 0x02 || reg1 == 0xff)
781 return 0;
782
783 /*
784 * The ID register is read/write in a real APIC.
785 */
786 reg0 = apic_read(APIC_ID);
787 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
788 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
789 reg1 = apic_read(APIC_ID);
790 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
791 apic_write(APIC_ID, reg0);
792 if (reg1 != (reg0 ^ APIC_ID_MASK))
793 return 0;
794
795 /*
796 * The next two are just to see if we have sane values.
797 * They're only really relevant if we're in Virtual Wire
798 * compatibility mode, but most boxes are anymore.
799 */
800 reg0 = apic_read(APIC_LVT0);
801 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
802 reg1 = apic_read(APIC_LVT1);
803 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
804
805 return 1;
806}
807
808/**
809 * sync_Arb_IDs - synchronize APIC bus arbitration IDs
810 */
811void __init sync_Arb_IDs(void)
812{
813 /*
814 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
815 * needed on AMD.
816 */
817 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
818 return;
819
820 /*
821 * Wait for idle.
822 */
823 apic_wait_icr_idle();
824
825 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
826 apic_write(APIC_ICR, APIC_DEST_ALLINC |
827 APIC_INT_LEVELTRIG | APIC_DM_INIT);
828}
829
830/*
831 * An initial setup of the virtual wire mode.
832 */
833void __init init_bsp_APIC(void)
834{
835 unsigned int value;
836
837 /*
838 * Don't do the setup now if we have a SMP BIOS as the
839 * through-I/O-APIC virtual wire mode might be active.
840 */
841 if (smp_found_config || !cpu_has_apic)
842 return;
843
844 /*
845 * Do not trust the local APIC being empty at bootup.
846 */
847 clear_local_APIC();
848
849 /*
850 * Enable APIC.
851 */
852 value = apic_read(APIC_SPIV);
853 value &= ~APIC_VECTOR_MASK;
854 value |= APIC_SPIV_APIC_ENABLED;
855
856#ifdef CONFIG_X86_32
857 /* This bit is reserved on P4/Xeon and should be cleared */
858 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
859 (boot_cpu_data.x86 == 15))
860 value &= ~APIC_SPIV_FOCUS_DISABLED;
861 else
862#endif
863 value |= APIC_SPIV_FOCUS_DISABLED;
864 value |= SPURIOUS_APIC_VECTOR;
865 apic_write(APIC_SPIV, value);
866
867 /*
868 * Set up the virtual wire mode.
869 */
870 apic_write(APIC_LVT0, APIC_DM_EXTINT);
871 value = APIC_DM_NMI;
872 if (!lapic_is_integrated()) /* 82489DX */
873 value |= APIC_LVT_LEVEL_TRIGGER;
874 apic_write(APIC_LVT1, value);
875}
876
877static void __cpuinit lapic_setup_esr(void)
878{
879 unsigned long oldvalue, value, maxlvt;
880 if (lapic_is_integrated() && !esr_disable) {
881 if (esr_disable) {
882 /*
883 * Something untraceable is creating bad interrupts on
884 * secondary quads ... for the moment, just leave the
885 * ESR disabled - we can't do anything useful with the
886 * errors anyway - mbligh
887 */
888 printk(KERN_INFO "Leaving ESR disabled.\n");
889 return;
890 }
891 /* !82489DX */
892 maxlvt = lapic_get_maxlvt();
893 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
894 apic_write(APIC_ESR, 0);
895 oldvalue = apic_read(APIC_ESR);
896
897 /* enables sending errors */
898 value = ERROR_APIC_VECTOR;
899 apic_write(APIC_LVTERR, value);
900 /*
901 * spec says clear errors after enabling vector.
902 */
903 if (maxlvt > 3)
904 apic_write(APIC_ESR, 0);
905 value = apic_read(APIC_ESR);
906 if (value != oldvalue)
907 apic_printk(APIC_VERBOSE, "ESR value before enabling "
908 "vector: 0x%08lx after: 0x%08lx\n",
909 oldvalue, value);
910 } else {
911 printk(KERN_INFO "No ESR for 82489DX.\n");
912 }
913}
914
915
916/**
917 * setup_local_APIC - setup the local APIC
918 */
919void __cpuinit setup_local_APIC(void)
920{
921 unsigned int value;
922 int i, j;
923
924 preempt_disable();
925 value = apic_read(APIC_LVR);
926
927 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
928
929 /*
930 * Double-check whether this APIC is really registered.
931 * This is meaningless in clustered apic mode, so we skip it.
932 */
933 if (!apic_id_registered())
934 BUG();
935
936 /*
937 * Intel recommends to set DFR, LDR and TPR before enabling
938 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
939 * document number 292116). So here it goes...
940 */
941 init_apic_ldr();
942
943 /*
944 * Set Task Priority to 'accept all'. We never change this
945 * later on.
946 */
947 value = apic_read(APIC_TASKPRI);
948 value &= ~APIC_TPRI_MASK;
949 apic_write(APIC_TASKPRI, value);
950
951 /*
952 * After a crash, we no longer service the interrupts and a pending
953 * interrupt from previous kernel might still have ISR bit set.
954 *
955 * Most probably by now CPU has serviced that pending interrupt and
956 * it might not have done the ack_APIC_irq() because it thought,
957 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
958 * does not clear the ISR bit and cpu thinks it has already serivced
959 * the interrupt. Hence a vector might get locked. It was noticed
960 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
961 */
962 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
963 value = apic_read(APIC_ISR + i*0x10);
964 for (j = 31; j >= 0; j--) {
965 if (value & (1<<j))
966 ack_APIC_irq();
967 }
968 }
969
970 /*
971 * Now that we are all set up, enable the APIC
972 */
973 value = apic_read(APIC_SPIV);
974 value &= ~APIC_VECTOR_MASK;
975 /*
976 * Enable APIC
977 */
978 value |= APIC_SPIV_APIC_ENABLED;
979
980 /* We always use processor focus */
981
982 /*
983 * Set spurious IRQ vector
984 */
985 value |= SPURIOUS_APIC_VECTOR;
986 apic_write(APIC_SPIV, value);
987
988 /*
989 * Set up LVT0, LVT1:
990 *
991 * set up through-local-APIC on the BP's LINT0. This is not
992 * strictly necessary in pure symmetric-IO mode, but sometimes
993 * we delegate interrupts to the 8259A.
994 */
995 /*
996 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
997 */
998 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
999 if (!smp_processor_id() && !value) {
1000 value = APIC_DM_EXTINT;
1001 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
1002 smp_processor_id());
1003 } else {
1004 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1005 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
1006 smp_processor_id());
1007 }
1008 apic_write(APIC_LVT0, value);
1009
1010 /*
1011 * only the BP should see the LINT1 NMI signal, obviously.
1012 */
1013 if (!smp_processor_id())
1014 value = APIC_DM_NMI;
1015 else
1016 value = APIC_DM_NMI | APIC_LVT_MASKED;
1017 apic_write(APIC_LVT1, value);
1018 preempt_enable();
1019}
1020
1021void __cpuinit end_local_APIC_setup(void)
1022{
1023 lapic_setup_esr();
1024
1025#ifdef CONFIG_X86_32
1026 {
1027 unsigned int value;
1028 /* Disable the local apic timer */
1029 value = apic_read(APIC_LVTT);
1030 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1031 apic_write(APIC_LVTT, value);
1032 }
1033#endif
1034
1035 setup_apic_nmi_watchdog(NULL);
1036 apic_pm_activate();
1037}
1038
1039void check_x2apic(void)
1040{
1041 int msr, msr2;
1042
1043 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1044
1045 if (msr & X2APIC_ENABLE) {
1046 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1047 x2apic_preenabled = x2apic = 1;
1048 apic_ops = &x2apic_ops;
1049 }
1050}
1051
1052void enable_x2apic(void)
1053{
1054 int msr, msr2;
1055
1056 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1057 if (!(msr & X2APIC_ENABLE)) {
1058 printk("Enabling x2apic\n");
1059 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1060 }
1061}
1062
1063void enable_IR_x2apic(void)
1064{
1065#ifdef CONFIG_INTR_REMAP
1066 int ret;
1067 unsigned long flags;
1068
1069 if (!cpu_has_x2apic)
1070 return;
1071
1072 if (!x2apic_preenabled && disable_x2apic) {
1073 printk(KERN_INFO
1074 "Skipped enabling x2apic and Interrupt-remapping "
1075 "because of nox2apic\n");
1076 return;
1077 }
1078
1079 if (x2apic_preenabled && disable_x2apic)
1080 panic("Bios already enabled x2apic, can't enforce nox2apic");
1081
1082 if (!x2apic_preenabled && skip_ioapic_setup) {
1083 printk(KERN_INFO
1084 "Skipped enabling x2apic and Interrupt-remapping "
1085 "because of skipping io-apic setup\n");
1086 return;
1087 }
1088
1089 ret = dmar_table_init();
1090 if (ret) {
1091 printk(KERN_INFO
1092 "dmar_table_init() failed with %d:\n", ret);
1093
1094 if (x2apic_preenabled)
1095 panic("x2apic enabled by bios. But IR enabling failed");
1096 else
1097 printk(KERN_INFO
1098 "Not enabling x2apic,Intr-remapping\n");
1099 return;
1100 }
1101
1102 local_irq_save(flags);
1103 mask_8259A();
1104 save_mask_IO_APIC_setup();
1105
1106 ret = enable_intr_remapping(1);
1107
1108 if (ret && x2apic_preenabled) {
1109 local_irq_restore(flags);
1110 panic("x2apic enabled by bios. But IR enabling failed");
1111 }
1112
1113 if (ret)
1114 goto end;
1115
1116 if (!x2apic) {
1117 x2apic = 1;
1118 apic_ops = &x2apic_ops;
1119 enable_x2apic();
1120 }
1121end:
1122 if (ret)
1123 /*
1124 * IR enabling failed
1125 */
1126 restore_IO_APIC_setup();
1127 else
1128 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1129
1130 unmask_8259A();
1131 local_irq_restore(flags);
1132
1133 if (!ret) {
1134 if (!x2apic_preenabled)
1135 printk(KERN_INFO
1136 "Enabled x2apic and interrupt-remapping\n");
1137 else
1138 printk(KERN_INFO
1139 "Enabled Interrupt-remapping\n");
1140 } else
1141 printk(KERN_ERR
1142 "Failed to enable Interrupt-remapping and x2apic\n");
1143#else
1144 if (!cpu_has_x2apic)
1145 return;
1146
1147 if (x2apic_preenabled)
1148 panic("x2apic enabled prior OS handover,"
1149 " enable CONFIG_INTR_REMAP");
1150
1151 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1152 " and x2apic\n");
1153#endif
1154
1155 return;
1156}
1157
1158/*
1159 * Detect and enable local APICs on non-SMP boards.
1160 * Original code written by Keir Fraser.
1161 * On AMD64 we trust the BIOS - if it says no APIC it is likely
1162 * not correctly set up (usually the APIC timer won't work etc.)
1163 */
1164static int __init detect_init_APIC(void)
1165{
1166 if (!cpu_has_apic) {
1167 printk(KERN_INFO "No local APIC present\n");
1168 return -1;
1169 }
1170
1171 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1172 boot_cpu_physical_apicid = 0;
1173 return 0;
1174}
1175
1176void __init early_init_lapic_mapping(void)
1177{
1178 unsigned long phys_addr;
1179
1180 /*
1181 * If no local APIC can be found then go out
1182 * : it means there is no mpatable and MADT
1183 */
1184 if (!smp_found_config)
1185 return;
1186
1187 phys_addr = mp_lapic_addr;
1188
1189 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1190 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1191 APIC_BASE, phys_addr);
1192
1193 /*
1194 * Fetch the APIC ID of the BSP in case we have a
1195 * default configuration (or the MP table is broken).
1196 */
1197 boot_cpu_physical_apicid = read_apic_id();
1198}
1199
1200/**
1201 * init_apic_mappings - initialize APIC mappings
1202 */
1203void __init init_apic_mappings(void)
1204{
1205 if (x2apic) {
1206 boot_cpu_physical_apicid = read_apic_id();
1207 return;
1208 }
1209
1210 /*
1211 * If no local APIC can be found then set up a fake all
1212 * zeroes page to simulate the local APIC and another
1213 * one for the IO-APIC.
1214 */
1215 if (!smp_found_config && detect_init_APIC()) {
1216 apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
1217 apic_phys = __pa(apic_phys);
1218 } else
1219 apic_phys = mp_lapic_addr;
1220
1221 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
1222 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1223 APIC_BASE, apic_phys);
1224
1225 /*
1226 * Fetch the APIC ID of the BSP in case we have a
1227 * default configuration (or the MP table is broken).
1228 */
1229 boot_cpu_physical_apicid = read_apic_id();
1230}
1231
1232/*
1233 * This initializes the IO-APIC and APIC hardware if this is
1234 * a UP kernel.
1235 */
1236int apic_version[MAX_APICS];
1237
1238int __init APIC_init_uniprocessor(void)
1239{
1240 if (disable_apic) {
1241 printk(KERN_INFO "Apic disabled\n");
1242 return -1;
1243 }
1244 if (!cpu_has_apic) {
1245 disable_apic = 1;
1246 printk(KERN_INFO "Apic disabled by BIOS\n");
1247 return -1;
1248 }
1249
1250 enable_IR_x2apic();
1251 setup_apic_routing();
1252
1253 verify_local_APIC();
1254
1255 connect_bsp_APIC();
1256
1257 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1258 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
1259
1260 setup_local_APIC();
1261
1262 /*
1263 * Now enable IO-APICs, actually call clear_IO_APIC
1264 * We need clear_IO_APIC before enabling vector on BP
1265 */
1266 if (!skip_ioapic_setup && nr_ioapics)
1267 enable_IO_APIC();
1268
1269 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1270 localise_nmi_watchdog();
1271 end_local_APIC_setup();
1272
1273 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1274 setup_IO_APIC();
1275 else
1276 nr_ioapics = 0;
1277 setup_boot_APIC_clock();
1278 check_nmi_watchdog();
1279 return 0;
1280}
1281
1282/*
1283 * Local APIC interrupts
1284 */
1285
1286/*
1287 * This interrupt should _never_ happen with our APIC/SMP architecture
1288 */
1289asmlinkage void smp_spurious_interrupt(void)
1290{
1291 unsigned int v;
1292 exit_idle();
1293 irq_enter();
1294 /*
1295 * Check if this really is a spurious interrupt and ACK it
1296 * if it is a vectored one. Just in case...
1297 * Spurious interrupts should not be ACKed.
1298 */
1299 v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
1300 if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
1301 ack_APIC_irq();
1302
1303 add_pda(irq_spurious_count, 1);
1304 irq_exit();
1305}
1306
1307/*
1308 * This interrupt should never happen with our APIC/SMP architecture
1309 */
1310asmlinkage void smp_error_interrupt(void)
1311{
1312 unsigned int v, v1;
1313
1314 exit_idle();
1315 irq_enter();
1316 /* First tickle the hardware, only then report what went on. -- REW */
1317 v = apic_read(APIC_ESR);
1318 apic_write(APIC_ESR, 0);
1319 v1 = apic_read(APIC_ESR);
1320 ack_APIC_irq();
1321 atomic_inc(&irq_err_count);
1322
1323 /* Here is what the APIC error bits mean:
1324 0: Send CS error
1325 1: Receive CS error
1326 2: Send accept error
1327 3: Receive accept error
1328 4: Reserved
1329 5: Send illegal vector
1330 6: Received illegal vector
1331 7: Illegal register address
1332 */
1333 printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
1334 smp_processor_id(), v , v1);
1335 irq_exit();
1336}
1337
1338/**
1339 * connect_bsp_APIC - attach the APIC to the interrupt system
1340 */
1341void __init connect_bsp_APIC(void)
1342{
1343#ifdef CONFIG_X86_32
1344 if (pic_mode) {
1345 /*
1346 * Do not trust the local APIC being empty at bootup.
1347 */
1348 clear_local_APIC();
1349 /*
1350 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1351 * local APIC to INT and NMI lines.
1352 */
1353 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1354 "enabling APIC mode.\n");
1355 outb(0x70, 0x22);
1356 outb(0x01, 0x23);
1357 }
1358#endif
1359 enable_apic_mode();
1360}
1361
1362/**
1363 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1364 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1365 *
1366 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1367 * APIC is disabled.
1368 */
1369void disconnect_bsp_APIC(int virt_wire_setup)
1370{
1371 unsigned int value;
1372
1373#ifdef CONFIG_X86_32
1374 if (pic_mode) {
1375 /*
1376 * Put the board back into PIC mode (has an effect only on
1377 * certain older boards). Note that APIC interrupts, including
1378 * IPIs, won't work beyond this point! The only exception are
1379 * INIT IPIs.
1380 */
1381 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1382 "entering PIC mode.\n");
1383 outb(0x70, 0x22);
1384 outb(0x00, 0x23);
1385 return;
1386 }
1387#endif
1388
1389 /* Go back to Virtual Wire compatibility mode */
1390
1391 /* For the spurious interrupt use vector F, and enable it */
1392 value = apic_read(APIC_SPIV);
1393 value &= ~APIC_VECTOR_MASK;
1394 value |= APIC_SPIV_APIC_ENABLED;
1395 value |= 0xf;
1396 apic_write(APIC_SPIV, value);
1397
1398 if (!virt_wire_setup) {
1399 /*
1400 * For LVT0 make it edge triggered, active high,
1401 * external and enabled
1402 */
1403 value = apic_read(APIC_LVT0);
1404 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1405 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1406 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1407 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1408 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1409 apic_write(APIC_LVT0, value);
1410 } else {
1411 /* Disable LVT0 */
1412 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1413 }
1414
1415 /*
1416 * For LVT1 make it edge triggered, active high,
1417 * nmi and enabled
1418 */
1419 value = apic_read(APIC_LVT1);
1420 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1421 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1422 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1423 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1424 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1425 apic_write(APIC_LVT1, value);
1426}
1427
1428void __cpuinit generic_processor_info(int apicid, int version)
1429{
1430 int cpu;
1431 cpumask_t tmp_map;
1432
1433 /*
1434 * Validate version
1435 */
1436 if (version == 0x0) {
1437 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1438 "fixing up to 0x10. (tell your hw vendor)\n",
1439 version);
1440 version = 0x10;
1441 }
1442 apic_version[apicid] = version;
1443
1444 if (num_processors >= NR_CPUS) {
1445 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1446 " Processor ignored.\n", NR_CPUS);
1447 return;
1448 }
1449
1450 num_processors++;
1451 cpus_complement(tmp_map, cpu_present_map);
1452 cpu = first_cpu(tmp_map);
1453
1454 physid_set(apicid, phys_cpu_present_map);
1455 if (apicid == boot_cpu_physical_apicid) {
1456 /*
1457 * x86_bios_cpu_apicid is required to have processors listed
1458 * in same order as logical cpu numbers. Hence the first
1459 * entry is BSP, and so on.
1460 */
1461 cpu = 0;
1462 }
1463 if (apicid > max_physical_apicid)
1464 max_physical_apicid = apicid;
1465
1466#ifdef CONFIG_X86_32
1467 /*
1468 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1469 * but we need to work other dependencies like SMP_SUSPEND etc
1470 * before this can be done without some confusion.
1471 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1472 * - Ashok Raj <ashok.raj@intel.com>
1473 */
1474 if (max_physical_apicid >= 8) {
1475 switch (boot_cpu_data.x86_vendor) {
1476 case X86_VENDOR_INTEL:
1477 if (!APIC_XAPIC(version)) {
1478 def_to_bigsmp = 0;
1479 break;
1480 }
1481 /* If P4 and above fall through */
1482 case X86_VENDOR_AMD:
1483 def_to_bigsmp = 1;
1484 }
1485 }
1486#endif
1487
1488#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1489 /* are we being called early in kernel startup? */
1490 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1491 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1492 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1493
1494 cpu_to_apicid[cpu] = apicid;
1495 bios_cpu_apicid[cpu] = apicid;
1496 } else {
1497 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1498 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1499 }
1500#endif
1501
1502 cpu_set(cpu, cpu_possible_map);
1503 cpu_set(cpu, cpu_present_map);
1504}
1505
1506int hard_smp_processor_id(void)
1507{
1508 return read_apic_id();
1509}
1510
1511/*
1512 * Power management
1513 */
1514#ifdef CONFIG_PM
1515
1516static struct {
1517 /*
1518 * 'active' is true if the local APIC was enabled by us and
1519 * not the BIOS; this signifies that we are also responsible
1520 * for disabling it before entering apm/acpi suspend
1521 */
1522 int active;
1523 /* r/w apic fields */
1524 unsigned int apic_id;
1525 unsigned int apic_taskpri;
1526 unsigned int apic_ldr;
1527 unsigned int apic_dfr;
1528 unsigned int apic_spiv;
1529 unsigned int apic_lvtt;
1530 unsigned int apic_lvtpc;
1531 unsigned int apic_lvt0;
1532 unsigned int apic_lvt1;
1533 unsigned int apic_lvterr;
1534 unsigned int apic_tmict;
1535 unsigned int apic_tdcr;
1536 unsigned int apic_thmr;
1537} apic_pm_state;
1538
1539static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1540{
1541 unsigned long flags;
1542 int maxlvt;
1543
1544 if (!apic_pm_state.active)
1545 return 0;
1546
1547 maxlvt = lapic_get_maxlvt();
1548
1549 apic_pm_state.apic_id = apic_read(APIC_ID);
1550 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1551 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1552 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
1553 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
1554 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
1555 if (maxlvt >= 4)
1556 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
1557 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
1558 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
1559 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1560 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1561 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1562#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1563 if (maxlvt >= 5)
1564 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1565#endif
1566
1567 local_irq_save(flags);
1568 disable_local_APIC();
1569 local_irq_restore(flags);
1570 return 0;
1571}
1572
1573static int lapic_resume(struct sys_device *dev)
1574{
1575 unsigned int l, h;
1576 unsigned long flags;
1577 int maxlvt;
1578
1579 if (!apic_pm_state.active)
1580 return 0;
1581
1582 maxlvt = lapic_get_maxlvt();
1583
1584 local_irq_save(flags);
1585
1586#ifdef CONFIG_X86_64
1587 if (x2apic)
1588 enable_x2apic();
1589 else
1590#endif
1591 {
1592 /*
1593 * Make sure the APICBASE points to the right address
1594 *
1595 * FIXME! This will be wrong if we ever support suspend on
1596 * SMP! We'll need to do this as part of the CPU restore!
1597 */
1598 rdmsr(MSR_IA32_APICBASE, l, h);
1599 l &= ~MSR_IA32_APICBASE_BASE;
1600 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1601 wrmsr(MSR_IA32_APICBASE, l, h);
1602 }
1603
1604 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1605 apic_write(APIC_ID, apic_pm_state.apic_id);
1606 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
1607 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
1608 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
1609 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1610 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1611 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1612#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1613 if (maxlvt >= 5)
1614 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1615#endif
1616 if (maxlvt >= 4)
1617 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
1618 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
1619 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
1620 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
1621 apic_write(APIC_ESR, 0);
1622 apic_read(APIC_ESR);
1623 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1624 apic_write(APIC_ESR, 0);
1625 apic_read(APIC_ESR);
1626
1627 local_irq_restore(flags);
1628
1629 return 0;
1630}
1631
1632/*
1633 * This device has no shutdown method - fully functioning local APICs
1634 * are needed on every CPU up until machine_halt/restart/poweroff.
1635 */
1636
1637static struct sysdev_class lapic_sysclass = {
1638 .name = "lapic",
1639 .resume = lapic_resume,
1640 .suspend = lapic_suspend,
1641};
1642
1643static struct sys_device device_lapic = {
1644 .id = 0,
1645 .cls = &lapic_sysclass,
1646};
1647
1648static void __cpuinit apic_pm_activate(void)
1649{
1650 apic_pm_state.active = 1;
1651}
1652
1653static int __init init_lapic_sysfs(void)
1654{
1655 int error;
1656
1657 if (!cpu_has_apic)
1658 return 0;
1659 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
1660
1661 error = sysdev_class_register(&lapic_sysclass);
1662 if (!error)
1663 error = sysdev_register(&device_lapic);
1664 return error;
1665}
1666device_initcall(init_lapic_sysfs);
1667
1668#else /* CONFIG_PM */
1669
1670static void apic_pm_activate(void) { }
1671
1672#endif /* CONFIG_PM */
1673
1674/*
1675 * apic_is_clustered_box() -- Check if we can expect good TSC
1676 *
1677 * Thus far, the major user of this is IBM's Summit2 series:
1678 *
1679 * Clustered boxes may have unsynced TSC problems if they are
1680 * multi-chassis. Use available data to take a good guess.
1681 * If in doubt, go HPET.
1682 */
1683__cpuinit int apic_is_clustered_box(void)
1684{
1685 int i, clusters, zeros;
1686 unsigned id;
1687 u16 *bios_cpu_apicid;
1688 DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
1689
1690 /*
1691 * there is not this kind of box with AMD CPU yet.
1692 * Some AMD box with quadcore cpu and 8 sockets apicid
1693 * will be [4, 0x23] or [8, 0x27] could be thought to
1694 * vsmp box still need checking...
1695 */
1696 if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
1697 return 0;
1698
1699 bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1700 bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
1701
1702 for (i = 0; i < NR_CPUS; i++) {
1703 /* are we being called early in kernel startup? */
1704 if (bios_cpu_apicid) {
1705 id = bios_cpu_apicid[i];
1706 }
1707 else if (i < nr_cpu_ids) {
1708 if (cpu_present(i))
1709 id = per_cpu(x86_bios_cpu_apicid, i);
1710 else
1711 continue;
1712 }
1713 else
1714 break;
1715
1716 if (id != BAD_APICID)
1717 __set_bit(APIC_CLUSTERID(id), clustermap);
1718 }
1719
1720 /* Problem: Partially populated chassis may not have CPUs in some of
1721 * the APIC clusters they have been allocated. Only present CPUs have
1722 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
1723 * Since clusters are allocated sequentially, count zeros only if
1724 * they are bounded by ones.
1725 */
1726 clusters = 0;
1727 zeros = 0;
1728 for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
1729 if (test_bit(i, clustermap)) {
1730 clusters += 1 + zeros;
1731 zeros = 0;
1732 } else
1733 ++zeros;
1734 }
1735
1736 /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
1737 * not guaranteed to be synced between boards
1738 */
1739 if (is_vsmp_box() && clusters > 1)
1740 return 1;
1741
1742 /*
1743 * If clusters > 2, then should be multi-chassis.
1744 * May have to revisit this when multi-core + hyperthreaded CPUs come
1745 * out, but AFAIK this will work even for them.
1746 */
1747 return (clusters > 2);
1748}
1749
1750static __init int setup_nox2apic(char *str)
1751{
1752 disable_x2apic = 1;
1753 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1754 return 0;
1755}
1756early_param("nox2apic", setup_nox2apic);
1757
1758
1759/*
1760 * APIC command line parameters
1761 */
1762static int __init setup_disableapic(char *arg)
1763{
1764 disable_apic = 1;
1765 setup_clear_cpu_cap(X86_FEATURE_APIC);
1766 return 0;
1767}
1768early_param("disableapic", setup_disableapic);
1769
1770/* same as disableapic, for compatibility */
1771static int __init setup_nolapic(char *arg)
1772{
1773 return setup_disableapic(arg);
1774}
1775early_param("nolapic", setup_nolapic);
1776
1777static int __init parse_lapic_timer_c2_ok(char *arg)
1778{
1779 local_apic_timer_c2_ok = 1;
1780 return 0;
1781}
1782early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1783
1784static int __init parse_disable_apic_timer(char *arg)
1785{
1786 disable_apic_timer = 1;
1787 return 0;
1788}
1789early_param("noapictimer", parse_disable_apic_timer);
1790
1791static int __init parse_nolapic_timer(char *arg)
1792{
1793 disable_apic_timer = 1;
1794 return 0;
1795}
1796early_param("nolapic_timer", parse_nolapic_timer);
1797
1798static __init int setup_apicpmtimer(char *s)
1799{
1800 apic_calibrate_pmtmr = 1;
1801 notsc_setup(NULL);
1802 return 0;
1803}
1804__setup("apicpmtimer", setup_apicpmtimer);
1805
1806static int __init apic_set_verbosity(char *arg)
1807{
1808 if (!arg) {
1809#ifdef CONFIG_X86_64
1810 skip_ioapic_setup = 0;
1811 ioapic_force = 1;
1812 return 0;
1813#endif
1814 return -EINVAL;
1815 }
1816
1817 if (strcmp("debug", arg) == 0)
1818 apic_verbosity = APIC_DEBUG;
1819 else if (strcmp("verbose", arg) == 0)
1820 apic_verbosity = APIC_VERBOSE;
1821 else {
1822 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1823 " use apic=verbose or apic=debug\n", arg);
1824 return -EINVAL;
1825 }
1826
1827 return 0;
1828}
1829early_param("apic", apic_set_verbosity);
1830
1831static int __init lapic_insert_resource(void)
1832{
1833 if (!apic_phys)
1834 return -1;
1835
1836 /* Put local APIC into the resource map. */
1837 lapic_resource.start = apic_phys;
1838 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
1839 insert_resource(&iomem_resource, &lapic_resource);
1840
1841 return 0;
1842}
1843
1844/*
1845 * need call insert after e820_reserve_resources()
1846 * that is using request_resource
1847 */
1848late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index fdd585f9c53d..f0dfe6f17e7e 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -1,8 +1,6 @@
1/* 1/*
2 * BIOS run time interface routines. 2 * BIOS run time interface routines.
3 * 3 *
4 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify 4 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by 5 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or 6 * the Free Software Foundation; either version 2 of the License, or
@@ -16,33 +14,128 @@
16 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software 15 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
19 */ 20 */
20 21
22#include <linux/efi.h>
23#include <asm/efi.h>
24#include <linux/io.h>
21#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h>
27
28struct uv_systab uv_systab;
22 29
23const char * 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
24x86_bios_strerror(long status)
25{ 31{
26 const char *str; 32 struct uv_systab *tab = &uv_systab;
27 switch (status) { 33
28 case 0: str = "Call completed without error"; break; 34 if (!tab->function)
29 case -1: str = "Not implemented"; break; 35 /*
30 case -2: str = "Invalid argument"; break; 36 * BIOS does not support UV systab
31 case -3: str = "Call completed with error"; break; 37 */
32 default: str = "Unknown BIOS status code"; break; 38 return BIOS_STATUS_UNIMPLEMENTED;
33 } 39
34 return str; 40 return efi_call6((void *)__va(tab->function),
41 (u64)which, a1, a2, a3, a4, a5);
35} 42}
36 43
37long 44s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
38x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, 45 u64 a4, u64 a5)
39 unsigned long *drift_info)
40{ 46{
41 struct uv_bios_retval isrv; 47 unsigned long bios_flags;
48 s64 ret;
42 49
43 BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); 50 local_irq_save(bios_flags);
44 *ticks_per_second = isrv.v0; 51 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
45 *drift_info = isrv.v1; 52 local_irq_restore(bios_flags);
46 return isrv.status; 53
54 return ret;
47} 55}
48EXPORT_SYMBOL_GPL(x86_bios_freq_base); 56
57s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
58 u64 a4, u64 a5)
59{
60 s64 ret;
61
62 preempt_disable();
63 ret = uv_bios_call(which, a1, a2, a3, a4, a5);
64 preempt_enable();
65
66 return ret;
67}
68
69
70long sn_partition_id;
71EXPORT_SYMBOL_GPL(sn_partition_id);
72long uv_coherency_id;
73EXPORT_SYMBOL_GPL(uv_coherency_id);
74long uv_region_size;
75EXPORT_SYMBOL_GPL(uv_region_size);
76int uv_type;
77
78
79s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
80 long *region)
81{
82 s64 ret;
83 u64 v0, v1;
84 union partition_info_u part;
85
86 ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
87 (u64)(&v0), (u64)(&v1), 0, 0);
88 if (ret != BIOS_STATUS_SUCCESS)
89 return ret;
90
91 part.val = v0;
92 if (uvtype)
93 *uvtype = part.hub_version;
94 if (partid)
95 *partid = part.partition_id;
96 if (coher)
97 *coher = part.coherence_id;
98 if (region)
99 *region = part.region_size;
100 return ret;
101}
102
103
104s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
105{
106 return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
107 (u64)ticks_per_second, 0, 0, 0);
108}
109EXPORT_SYMBOL_GPL(uv_bios_freq_base);
110
111
112#ifdef CONFIG_EFI
113void uv_bios_init(void)
114{
115 struct uv_systab *tab;
116
117 if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
118 (efi.uv_systab == (unsigned long)NULL)) {
119 printk(KERN_CRIT "No EFI UV System Table.\n");
120 uv_systab.function = (unsigned long)NULL;
121 return;
122 }
123
124 tab = (struct uv_systab *)ioremap(efi.uv_systab,
125 sizeof(struct uv_systab));
126 if (strncmp(tab->signature, "UVST", 4) != 0)
127 printk(KERN_ERR "bad signature in UV system table!");
128
129 /*
130 * Copy table to permanent spot for later use.
131 */
132 memcpy(&uv_systab, tab, sizeof(struct uv_systab));
133 iounmap(tab);
134
135 printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
136}
137#else /* !CONFIG_EFI */
138
139void uv_bios_init(void) { }
140#endif
141
diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore
new file mode 100644
index 000000000000..667df55a4399
--- /dev/null
+++ b/arch/x86/kernel/cpu/.gitignore
@@ -0,0 +1 @@
capflags.c
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 32e73520adf7..8f1e31db2ad5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
249 } 249 }
250 numa_set_node(cpu, node); 250 numa_set_node(cpu, node);
251 251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 252 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
253#endif 253#endif
254} 254}
255 255
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 06fcce516d51..b0461856acfb 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * (C) 2001-2004 Dave Jones. <davej@codemonkey.org.uk> 2 * (C) 2001-2004 Dave Jones. <davej@redhat.com>
3 * (C) 2002 Padraig Brady. <padraig@antefacto.com> 3 * (C) 2002 Padraig Brady. <padraig@antefacto.com>
4 * 4 *
5 * Licensed under the terms of the GNU GPL License version 2. 5 * Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
1019module_param(revid_errata, int, 0644); 1019module_param(revid_errata, int, 0644);
1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); 1020MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
1021 1021
1022MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 1022MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); 1023MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
1024MODULE_LICENSE ("GPL"); 1024MODULE_LICENSE ("GPL");
1025 1025
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
index b5ced806a316..c1ac5790c63e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
246} 246}
247 247
248 248
249MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); 249MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); 250MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
251MODULE_LICENSE("GPL"); 251MODULE_LICENSE("GPL");
252 252
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 0a61159d7b71..7c7d56b43136 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * AMD K7 Powernow driver. 2 * AMD K7 Powernow driver.
3 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs. 3 * (C) 2003 Dave Jones on behalf of SuSE Labs.
4 * (C) 2003-2004 Dave Jones <davej@redhat.com> 4 * (C) 2003-2004 Dave Jones <davej@redhat.com>
5 * 5 *
6 * Licensed under the terms of the GNU GPL License version 2. 6 * Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
692module_param(acpi_force, int, 0444); 692module_param(acpi_force, int, 0444);
693MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); 693MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
694 694
695MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); 695MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
696MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); 696MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
697MODULE_LICENSE ("GPL"); 697MODULE_LICENSE ("GPL");
698 698
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 84bb395038d8..008d23ba491b 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -7,7 +7,7 @@
7 * Support : mark.langsdorf@amd.com 7 * Support : mark.langsdorf@amd.com
8 * 8 *
9 * Based on the powernow-k7.c module written by Dave Jones. 9 * Based on the powernow-k7.c module written by Dave Jones.
10 * (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs 10 * (C) 2003 Dave Jones on behalf of SuSE Labs
11 * (C) 2004 Dominik Brodowski <linux@brodo.de> 11 * (C) 2004 Dominik Brodowski <linux@brodo.de>
12 * (C) 2004 Pavel Machek <pavel@suse.cz> 12 * (C) 2004 Pavel Machek <pavel@suse.cz>
13 * Licensed under the terms of the GNU GPL License version 2. 13 * Licensed under the terms of the GNU GPL License version 2.
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 191f7263c61d..04d0376b64b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
431} 431}
432 432
433 433
434MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>"); 434MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); 435MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
436MODULE_LICENSE ("GPL"); 436MODULE_LICENSE ("GPL");
437 437
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 99468dbd08da..cce0b6118d55 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
174 node = first_node(node_online_map); 174 node = first_node(node_online_map);
175 numa_set_node(cpu, node); 175 numa_set_node(cpu, node);
176 176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); 177 printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
178#endif 178#endif
179} 179}
180 180
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
index f390c9f66351..dd3af6e7b39a 100644
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Athlon/Hammer specific Machine Check Exception Reporting 2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> 3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index 774d87cfd8cd..0ebf3fc6a610 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * mce.c - x86 Machine Check Exception Reporting 2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> 3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
4 */ 4 */
5 5
6#include <linux/init.h> 6#include <linux/init.h>
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index cc1fccdd31e0..a74af128efc9 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Non Fatal Machine Check Exception Reporting 2 * Non Fatal Machine Check Exception Reporting
3 * 3 *
4 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> 4 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
5 * 5 *
6 * This file contains routines to check for non-fatal MCEs every 15s 6 * This file contains routines to check for non-fatal MCEs every 15s
7 * 7 *
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 6bff382094f5..9abd48b22674 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -17,6 +17,8 @@
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h>
21
20#include <asm/apic.h> 22#include <asm/apic.h>
21#include <asm/intel_arch_perfmon.h> 23#include <asm/intel_arch_perfmon.h>
22 24
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
336 release_perfctr_nmi(wd_ops->perfctr); 338 release_perfctr_nmi(wd_ops->perfctr);
337} 339}
338 340
339static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 341static void __kprobes
342single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
340{ 343{
341 /* start the cycle over again */ 344 /* start the cycle over again */
342 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); 345 write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
401 return 1; 404 return 1;
402} 405}
403 406
404static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 407static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
405{ 408{
406 /* 409 /*
407 * P6 based Pentium M need to re-unmask 410 * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
605 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); 608 release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
606} 609}
607 610
608static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) 611static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
609{ 612{
610 unsigned dummy; 613 unsigned dummy;
611 /* 614 /*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
784 return hz; 787 return hz;
785} 788}
786 789
787int lapic_wd_event(unsigned nmi_hz) 790int __kprobes lapic_wd_event(unsigned nmi_hz)
788{ 791{
789 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 792 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
790 u64 ctr; 793 u64 ctr;
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 72d0c56c1b48..f7cdb3b457aa 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -13,6 +13,9 @@
13 13
14static void *kdump_buf_page; 14static void *kdump_buf_page;
15 15
16/* Stores the physical address of elf header of crash image. */
17unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
18
16/** 19/**
17 * copy_oldmem_page - copy one page from "oldmem" 20 * copy_oldmem_page - copy one page from "oldmem"
18 * @pfn: page frame number to be copied 21 * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index e90a60ef10c2..045b36cada65 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,6 +10,9 @@
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/io.h> 11#include <linux/io.h>
12 12
13/* Stores the physical address of elf header of crash image. */
14unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
15
13/** 16/**
14 * copy_oldmem_page - copy one page from "oldmem" 17 * copy_oldmem_page - copy one page from "oldmem"
15 * @pfn: page frame number to be copied 18 * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 945a31cdd81f..1119d247fe11 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -367,6 +367,10 @@ void __init efi_init(void)
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369 } else if (!efi_guidcmp(config_tables[i].guid, 369 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table);
373 } else if (!efi_guidcmp(config_tables[i].guid,
370 HCDP_TABLE_GUID)) { 374 HCDP_TABLE_GUID)) {
371 efi.hcdp = config_tables[i].table; 375 efi.hcdp = config_tables[i].table;
372 printk(" HCDP=0x%lx ", config_tables[i].table); 376 printk(" HCDP=0x%lx ", config_tables[i].table);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b21fbfaffe39..c356423a6026 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -629,7 +629,7 @@ ENTRY(interrupt)
629ENTRY(irq_entries_start) 629ENTRY(irq_entries_start)
630 RING0_INT_FRAME 630 RING0_INT_FRAME
631vector=0 631vector=0
632.rept NR_IRQS 632.rept NR_VECTORS
633 ALIGN 633 ALIGN
634 .if vector 634 .if vector
635 CFI_ADJUST_CFA_OFFSET -4 635 CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
1153#ifdef CONFIG_DYNAMIC_FTRACE 1153#ifdef CONFIG_DYNAMIC_FTRACE
1154 1154
1155ENTRY(mcount) 1155ENTRY(mcount)
1156 pushl %eax
1157 pushl %ecx
1158 pushl %edx
1159 movl 0xc(%esp), %eax
1160 subl $MCOUNT_INSN_SIZE, %eax
1161
1162.globl mcount_call
1163mcount_call:
1164 call ftrace_stub
1165
1166 popl %edx
1167 popl %ecx
1168 popl %eax
1169
1170 ret 1156 ret
1171END(mcount) 1157END(mcount)
1172 1158
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1db6ce4314e1..09e7145484c5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -64,32 +64,6 @@
64#ifdef CONFIG_FTRACE 64#ifdef CONFIG_FTRACE
65#ifdef CONFIG_DYNAMIC_FTRACE 65#ifdef CONFIG_DYNAMIC_FTRACE
66ENTRY(mcount) 66ENTRY(mcount)
67
68 subq $0x38, %rsp
69 movq %rax, (%rsp)
70 movq %rcx, 8(%rsp)
71 movq %rdx, 16(%rsp)
72 movq %rsi, 24(%rsp)
73 movq %rdi, 32(%rsp)
74 movq %r8, 40(%rsp)
75 movq %r9, 48(%rsp)
76
77 movq 0x38(%rsp), %rdi
78 subq $MCOUNT_INSN_SIZE, %rdi
79
80.globl mcount_call
81mcount_call:
82 call ftrace_stub
83
84 movq 48(%rsp), %r9
85 movq 40(%rsp), %r8
86 movq 32(%rsp), %rdi
87 movq 24(%rsp), %rsi
88 movq 16(%rsp), %rdx
89 movq 8(%rsp), %rcx
90 movq (%rsp), %rax
91 addq $0x38, %rsp
92
93 retq 67 retq
94END(mcount) 68END(mcount)
95 69
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ab115cd15fdf..d073d981a730 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -11,17 +11,18 @@
11 11
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/hardirq.h> 13#include <linux/hardirq.h>
14#include <linux/uaccess.h>
14#include <linux/ftrace.h> 15#include <linux/ftrace.h>
15#include <linux/percpu.h> 16#include <linux/percpu.h>
16#include <linux/init.h> 17#include <linux/init.h>
17#include <linux/list.h> 18#include <linux/list.h>
18 19
19#include <asm/alternative.h>
20#include <asm/ftrace.h> 20#include <asm/ftrace.h>
21#include <asm/nops.h>
21 22
22 23
23/* Long is fine, even if it is only 4 bytes ;-) */ 24/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop; 25static unsigned long *ftrace_nop;
25 26
26union ftrace_code_union { 27union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE]; 28 char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code, 61ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code) 62 unsigned char *new_code)
62{ 63{
63 unsigned replaced; 64 unsigned char replaced[MCOUNT_INSN_SIZE];
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68 65
69 /* 66 /*
70 * Note: Due to modules and __init, code can 67 * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
72 * as well as code changing. 69 * as well as code changing.
73 * 70 *
74 * No real locking needed, this code is run through 71 * No real locking needed, this code is run through
75 * kstop_machine. 72 * kstop_machine, or before SMP starts.
76 */ 73 */
77 asm volatile ( 74 if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
78 "1: lock\n" 75 return 1;
79 " cmpxchg %3, (%2)\n" 76
80 " jnz 2f\n" 77 if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
81 " movb %b4, 4(%2)\n" 78 return 2;
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93 79
94 if (replaced != old && replaced != new) 80 WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
95 faulted = 2; 81 MCOUNT_INSN_SIZE));
96 82
97 return faulted; 83 sync_core();
84
85 return 0;
98} 86}
99 87
100notrace int ftrace_update_ftrace_func(ftrace_func_t func) 88notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
112 100
113notrace int ftrace_mcount_set(unsigned long *data) 101notrace int ftrace_mcount_set(unsigned long *data)
114{ 102{
115 unsigned long ip = (long)(&mcount_call); 103 /* mcount is initialized as a nop */
116 unsigned long *addr = data; 104 *data = 0;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0; 105 return 0;
128} 106}
129 107
130int __init ftrace_dyn_arch_init(void *data) 108int __init ftrace_dyn_arch_init(void *data)
131{ 109{
132 const unsigned char *const *noptable = find_nop_table(); 110 extern const unsigned char ftrace_test_p6nop[];
133 111 extern const unsigned char ftrace_test_nop5[];
134 /* This is running in kstop_machine */ 112 extern const unsigned char ftrace_test_jmp[];
135 113 int faulted = 0;
136 ftrace_mcount_set(data);
137 114
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; 115 /*
116 * There is no good nop for all x86 archs.
117 * We will default to using the P6_NOP5, but first we
118 * will test to make sure that the nop will actually
119 * work on this CPU. If it faults, we will then
120 * go to a lesser efficient 5 byte nop. If that fails
121 * we then just use a jmp as our nop. This isn't the most
122 * efficient nop, but we can not use a multi part nop
123 * since we would then risk being preempted in the middle
124 * of that nop, and if we enabled tracing then, it might
125 * cause a system crash.
126 *
127 * TODO: check the cpuid to determine the best nop.
128 */
129 asm volatile (
130 "jmp ftrace_test_jmp\n"
131 /* This code needs to stay around */
132 ".section .text, \"ax\"\n"
133 "ftrace_test_jmp:"
134 "jmp ftrace_test_p6nop\n"
135 "nop\n"
136 "nop\n"
137 "nop\n" /* 2 byte jmp + 3 bytes */
138 "ftrace_test_p6nop:"
139 P6_NOP5
140 "jmp 1f\n"
141 "ftrace_test_nop5:"
142 ".byte 0x66,0x66,0x66,0x66,0x90\n"
143 "jmp 1f\n"
144 ".previous\n"
145 "1:"
146 ".section .fixup, \"ax\"\n"
147 "2: movl $1, %0\n"
148 " jmp ftrace_test_nop5\n"
149 "3: movl $2, %0\n"
150 " jmp 1b\n"
151 ".previous\n"
152 _ASM_EXTABLE(ftrace_test_p6nop, 2b)
153 _ASM_EXTABLE(ftrace_test_nop5, 3b)
154 : "=r"(faulted) : "0" (faulted));
155
156 switch (faulted) {
157 case 0:
158 pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
159 ftrace_nop = (unsigned long *)ftrace_test_p6nop;
160 break;
161 case 1:
162 pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
163 ftrace_nop = (unsigned long *)ftrace_test_nop5;
164 break;
165 case 2:
166 pr_info("ftrace: converting mcount calls to jmp . + 5\n");
167 ftrace_nop = (unsigned long *)ftrace_test_jmp;
168 break;
169 }
170
171 /* The return code is retured via data */
172 *(unsigned long *)data = 0;
139 173
140 return 0; 174 return 0;
141} 175}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 9eca5ba7a6b1..2ec2de8d8c46 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
179 * is an example). 179 * is an example).
180 */ 180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && 181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) 182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
183 printk(KERN_DEBUG "system APIC only can use physical flat");
183 return 1; 184 return 1;
185 }
184#endif 186#endif
185 187
186 return 0; 188 return 0;
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 33581d94a90e..bfd532843df6 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
341 341
342static __init void uv_rtc_init(void) 342static __init void uv_rtc_init(void)
343{ 343{
344 long status, ticks_per_sec, drift; 344 long status;
345 u64 ticks_per_sec;
345 346
346 status = 347 status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
347 x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, 348 &ticks_per_sec);
348 &drift); 349 if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
349 if (status != 0 || ticks_per_sec < 100000) {
350 printk(KERN_WARNING 350 printk(KERN_WARNING
351 "unable to determine platform RTC clock frequency, " 351 "unable to determine platform RTC clock frequency, "
352 "guessing.\n"); 352 "guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
356 sn_rtc_cycles_per_second = ticks_per_sec; 356 sn_rtc_cycles_per_second = ticks_per_sec;
357} 357}
358 358
359static bool uv_system_inited; 359/*
360 * Called on each cpu to initialize the per_cpu UV data area.
361 * ZZZ hotplug not supported yet
362 */
363void __cpuinit uv_cpu_init(void)
364{
365 /* CPU 0 initilization will be done via uv_system_init. */
366 if (!uv_blade_info)
367 return;
368
369 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
370
371 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
372 set_x2apic_extra_bits(uv_hub_info->pnode);
373}
374
360 375
361void __init uv_system_init(void) 376void __init uv_system_init(void)
362{ 377{
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
412 gnode_upper = (((unsigned long)node_id.s.node_id) & 427 gnode_upper = (((unsigned long)node_id.s.node_id) &
413 ~((1 << n_val) - 1)) << m_val; 428 ~((1 << n_val) - 1)) << m_val;
414 429
430 uv_bios_init();
431 uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
432 &uv_coherency_id, &uv_region_size);
415 uv_rtc_init(); 433 uv_rtc_init();
416 434
417 for_each_present_cpu(cpu) { 435 for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
433 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; 451 uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
434 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 452 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
435 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; 453 uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
436 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ 454 uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
437 uv_node_to_blade[nid] = blade; 455 uv_node_to_blade[nid] = blade;
438 uv_cpu_to_blade[cpu] = blade; 456 uv_cpu_to_blade[cpu] = blade;
439 max_pnode = max(pnode, max_pnode); 457 max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
448 map_mmr_high(max_pnode); 466 map_mmr_high(max_pnode);
449 map_config_high(max_pnode); 467 map_config_high(max_pnode);
450 map_mmioh_high(max_pnode); 468 map_mmioh_high(max_pnode);
451 uv_system_inited = true;
452}
453 469
454/* 470 uv_cpu_init();
455 * Called on each cpu to initialize the per_cpu UV data area.
456 * ZZZ hotplug not supported yet
457 */
458void __cpuinit uv_cpu_init(void)
459{
460 BUG_ON(!uv_system_inited);
461
462 uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
463
464 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
465 set_x2apic_extra_bits(uv_hub_info->pnode);
466} 471}
467
468
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index acf62fc233da..77017e834cf7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -1,29 +1,49 @@
1#include <linux/clocksource.h> 1#include <linux/clocksource.h>
2#include <linux/clockchips.h> 2#include <linux/clockchips.h>
3#include <linux/interrupt.h>
4#include <linux/sysdev.h>
3#include <linux/delay.h> 5#include <linux/delay.h>
4#include <linux/errno.h> 6#include <linux/errno.h>
5#include <linux/hpet.h> 7#include <linux/hpet.h>
6#include <linux/init.h> 8#include <linux/init.h>
7#include <linux/sysdev.h> 9#include <linux/cpu.h>
8#include <linux/pm.h> 10#include <linux/pm.h>
11#include <linux/io.h>
9 12
10#include <asm/fixmap.h> 13#include <asm/fixmap.h>
11#include <asm/hpet.h>
12#include <asm/i8253.h> 14#include <asm/i8253.h>
13#include <asm/io.h> 15#include <asm/hpet.h>
14 16
15#define HPET_MASK CLOCKSOURCE_MASK(32) 17#define HPET_MASK CLOCKSOURCE_MASK(32)
16#define HPET_SHIFT 22 18#define HPET_SHIFT 22
17 19
18/* FSEC = 10^-15 20/* FSEC = 10^-15
19 NSEC = 10^-9 */ 21 NSEC = 10^-9 */
20#define FSEC_PER_NSEC 1000000L 22#define FSEC_PER_NSEC 1000000L
23
24#define HPET_DEV_USED_BIT 2
25#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT)
26#define HPET_DEV_VALID 0x8
27#define HPET_DEV_FSB_CAP 0x1000
28#define HPET_DEV_PERI_CAP 0x2000
29
30#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
21 31
22/* 32/*
23 * HPET address is set in acpi/boot.c, when an ACPI entry exists 33 * HPET address is set in acpi/boot.c, when an ACPI entry exists
24 */ 34 */
25unsigned long hpet_address; 35unsigned long hpet_address;
26static void __iomem *hpet_virt_address; 36unsigned long hpet_num_timers;
37static void __iomem *hpet_virt_address;
38
39struct hpet_dev {
40 struct clock_event_device evt;
41 unsigned int num;
42 int cpu;
43 unsigned int irq;
44 unsigned int flags;
45 char name[10];
46};
27 47
28unsigned long hpet_readl(unsigned long a) 48unsigned long hpet_readl(unsigned long a)
29{ 49{
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
59static int boot_hpet_disable; 79static int boot_hpet_disable;
60int hpet_force_user; 80int hpet_force_user;
61 81
62static int __init hpet_setup(char* str) 82static int __init hpet_setup(char *str)
63{ 83{
64 if (str) { 84 if (str) {
65 if (!strncmp("disable", str, 7)) 85 if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
80 100
81static inline int is_hpet_capable(void) 101static inline int is_hpet_capable(void)
82{ 102{
83 return (!boot_hpet_disable && hpet_address); 103 return !boot_hpet_disable && hpet_address;
84} 104}
85 105
86/* 106/*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
102 * timer 0 and timer 1 in case of RTC emulation. 122 * timer 0 and timer 1 in case of RTC emulation.
103 */ 123 */
104#ifdef CONFIG_HPET 124#ifdef CONFIG_HPET
125
126static void hpet_reserve_msi_timers(struct hpet_data *hd);
127
105static void hpet_reserve_platform_timers(unsigned long id) 128static void hpet_reserve_platform_timers(unsigned long id)
106{ 129{
107 struct hpet __iomem *hpet = hpet_virt_address; 130 struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
111 134
112 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; 135 nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
113 136
114 memset(&hd, 0, sizeof (hd)); 137 memset(&hd, 0, sizeof(hd));
115 hd.hd_phys_address = hpet_address; 138 hd.hd_phys_address = hpet_address;
116 hd.hd_address = hpet; 139 hd.hd_address = hpet;
117 hd.hd_nirqs = nrtimers; 140 hd.hd_nirqs = nrtimers;
118 hpet_reserve_timer(&hd, 0); 141 hpet_reserve_timer(&hd, 0);
119 142
120#ifdef CONFIG_HPET_EMULATE_RTC 143#ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
130 hd.hd_irq[1] = HPET_LEGACY_RTC; 153 hd.hd_irq[1] = HPET_LEGACY_RTC;
131 154
132 for (i = 2; i < nrtimers; timer++, i++) { 155 for (i = 2; i < nrtimers; timer++, i++) {
133 hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> 156 hd.hd_irq[i] = (readl(&timer->hpet_config) &
134 Tn_INT_ROUTE_CNF_SHIFT; 157 Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
135 } 158 }
136 159
160 hpet_reserve_msi_timers(&hd);
161
137 hpet_alloc(&hd); 162 hpet_alloc(&hd);
138 163
139} 164}
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
227 printk(KERN_DEBUG "hpet clockevent registered\n"); 252 printk(KERN_DEBUG "hpet clockevent registered\n");
228} 253}
229 254
230static void hpet_legacy_set_mode(enum clock_event_mode mode, 255static int hpet_setup_msi_irq(unsigned int irq);
231 struct clock_event_device *evt) 256
257static void hpet_set_mode(enum clock_event_mode mode,
258 struct clock_event_device *evt, int timer)
232{ 259{
233 unsigned long cfg, cmp, now; 260 unsigned long cfg, cmp, now;
234 uint64_t delta; 261 uint64_t delta;
235 262
236 switch(mode) { 263 switch (mode) {
237 case CLOCK_EVT_MODE_PERIODIC: 264 case CLOCK_EVT_MODE_PERIODIC:
238 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; 265 delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
239 delta >>= hpet_clockevent.shift; 266 delta >>= evt->shift;
240 now = hpet_readl(HPET_COUNTER); 267 now = hpet_readl(HPET_COUNTER);
241 cmp = now + (unsigned long) delta; 268 cmp = now + (unsigned long) delta;
242 cfg = hpet_readl(HPET_T0_CFG); 269 cfg = hpet_readl(HPET_Tn_CFG(timer));
243 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 270 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
244 HPET_TN_SETVAL | HPET_TN_32BIT; 271 HPET_TN_SETVAL | HPET_TN_32BIT;
245 hpet_writel(cfg, HPET_T0_CFG); 272 hpet_writel(cfg, HPET_Tn_CFG(timer));
246 /* 273 /*
247 * The first write after writing TN_SETVAL to the 274 * The first write after writing TN_SETVAL to the
248 * config register sets the counter value, the second 275 * config register sets the counter value, the second
249 * write sets the period. 276 * write sets the period.
250 */ 277 */
251 hpet_writel(cmp, HPET_T0_CMP); 278 hpet_writel(cmp, HPET_Tn_CMP(timer));
252 udelay(1); 279 udelay(1);
253 hpet_writel((unsigned long) delta, HPET_T0_CMP); 280 hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
254 break; 281 break;
255 282
256 case CLOCK_EVT_MODE_ONESHOT: 283 case CLOCK_EVT_MODE_ONESHOT:
257 cfg = hpet_readl(HPET_T0_CFG); 284 cfg = hpet_readl(HPET_Tn_CFG(timer));
258 cfg &= ~HPET_TN_PERIODIC; 285 cfg &= ~HPET_TN_PERIODIC;
259 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; 286 cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
260 hpet_writel(cfg, HPET_T0_CFG); 287 hpet_writel(cfg, HPET_Tn_CFG(timer));
261 break; 288 break;
262 289
263 case CLOCK_EVT_MODE_UNUSED: 290 case CLOCK_EVT_MODE_UNUSED:
264 case CLOCK_EVT_MODE_SHUTDOWN: 291 case CLOCK_EVT_MODE_SHUTDOWN:
265 cfg = hpet_readl(HPET_T0_CFG); 292 cfg = hpet_readl(HPET_Tn_CFG(timer));
266 cfg &= ~HPET_TN_ENABLE; 293 cfg &= ~HPET_TN_ENABLE;
267 hpet_writel(cfg, HPET_T0_CFG); 294 hpet_writel(cfg, HPET_Tn_CFG(timer));
268 break; 295 break;
269 296
270 case CLOCK_EVT_MODE_RESUME: 297 case CLOCK_EVT_MODE_RESUME:
271 hpet_enable_legacy_int(); 298 if (timer == 0) {
299 hpet_enable_legacy_int();
300 } else {
301 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
302 hpet_setup_msi_irq(hdev->irq);
303 disable_irq(hdev->irq);
304 irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
305 enable_irq(hdev->irq);
306 }
272 break; 307 break;
273 } 308 }
274} 309}
275 310
276static int hpet_legacy_next_event(unsigned long delta, 311static int hpet_next_event(unsigned long delta,
277 struct clock_event_device *evt) 312 struct clock_event_device *evt, int timer)
278{ 313{
279 u32 cnt; 314 u32 cnt;
280 315
281 cnt = hpet_readl(HPET_COUNTER); 316 cnt = hpet_readl(HPET_COUNTER);
282 cnt += (u32) delta; 317 cnt += (u32) delta;
283 hpet_writel(cnt, HPET_T0_CMP); 318 hpet_writel(cnt, HPET_Tn_CMP(timer));
284 319
285 /* 320 /*
286 * We need to read back the CMP register to make sure that 321 * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
292 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; 327 return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
293} 328}
294 329
330static void hpet_legacy_set_mode(enum clock_event_mode mode,
331 struct clock_event_device *evt)
332{
333 hpet_set_mode(mode, evt, 0);
334}
335
336static int hpet_legacy_next_event(unsigned long delta,
337 struct clock_event_device *evt)
338{
339 return hpet_next_event(delta, evt, 0);
340}
341
342/*
343 * HPET MSI Support
344 */
345#ifdef CONFIG_PCI_MSI
346
347static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
348static struct hpet_dev *hpet_devs;
349
350void hpet_msi_unmask(unsigned int irq)
351{
352 struct hpet_dev *hdev = get_irq_data(irq);
353 unsigned long cfg;
354
355 /* unmask it */
356 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
357 cfg |= HPET_TN_FSB;
358 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
359}
360
361void hpet_msi_mask(unsigned int irq)
362{
363 unsigned long cfg;
364 struct hpet_dev *hdev = get_irq_data(irq);
365
366 /* mask it */
367 cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
368 cfg &= ~HPET_TN_FSB;
369 hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
370}
371
372void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
373{
374 struct hpet_dev *hdev = get_irq_data(irq);
375
376 hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
377 hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
378}
379
380void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
381{
382 struct hpet_dev *hdev = get_irq_data(irq);
383
384 msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
385 msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
386 msg->address_hi = 0;
387}
388
389static void hpet_msi_set_mode(enum clock_event_mode mode,
390 struct clock_event_device *evt)
391{
392 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
393 hpet_set_mode(mode, evt, hdev->num);
394}
395
396static int hpet_msi_next_event(unsigned long delta,
397 struct clock_event_device *evt)
398{
399 struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
400 return hpet_next_event(delta, evt, hdev->num);
401}
402
403static int hpet_setup_msi_irq(unsigned int irq)
404{
405 if (arch_setup_hpet_msi(irq)) {
406 destroy_irq(irq);
407 return -EINVAL;
408 }
409 return 0;
410}
411
412static int hpet_assign_irq(struct hpet_dev *dev)
413{
414 unsigned int irq;
415
416 irq = create_irq();
417 if (!irq)
418 return -EINVAL;
419
420 set_irq_data(irq, dev);
421
422 if (hpet_setup_msi_irq(irq))
423 return -EINVAL;
424
425 dev->irq = irq;
426 return 0;
427}
428
429static irqreturn_t hpet_interrupt_handler(int irq, void *data)
430{
431 struct hpet_dev *dev = (struct hpet_dev *)data;
432 struct clock_event_device *hevt = &dev->evt;
433
434 if (!hevt->event_handler) {
435 printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
436 dev->num);
437 return IRQ_HANDLED;
438 }
439
440 hevt->event_handler(hevt);
441 return IRQ_HANDLED;
442}
443
444static int hpet_setup_irq(struct hpet_dev *dev)
445{
446
447 if (request_irq(dev->irq, hpet_interrupt_handler,
448 IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
449 return -1;
450
451 disable_irq(dev->irq);
452 irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
453 enable_irq(dev->irq);
454
455 printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
456 dev->name, dev->irq);
457
458 return 0;
459}
460
461/* This should be called in specific @cpu */
462static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
463{
464 struct clock_event_device *evt = &hdev->evt;
465 uint64_t hpet_freq;
466
467 WARN_ON(cpu != smp_processor_id());
468 if (!(hdev->flags & HPET_DEV_VALID))
469 return;
470
471 if (hpet_setup_msi_irq(hdev->irq))
472 return;
473
474 hdev->cpu = cpu;
475 per_cpu(cpu_hpet_dev, cpu) = hdev;
476 evt->name = hdev->name;
477 hpet_setup_irq(hdev);
478 evt->irq = hdev->irq;
479
480 evt->rating = 110;
481 evt->features = CLOCK_EVT_FEAT_ONESHOT;
482 if (hdev->flags & HPET_DEV_PERI_CAP)
483 evt->features |= CLOCK_EVT_FEAT_PERIODIC;
484
485 evt->set_mode = hpet_msi_set_mode;
486 evt->set_next_event = hpet_msi_next_event;
487 evt->shift = 32;
488
489 /*
490 * The period is a femto seconds value. We need to calculate the
491 * scaled math multiplication factor for nanosecond to hpet tick
492 * conversion.
493 */
494 hpet_freq = 1000000000000000ULL;
495 do_div(hpet_freq, hpet_period);
496 evt->mult = div_sc((unsigned long) hpet_freq,
497 NSEC_PER_SEC, evt->shift);
498 /* Calculate the max delta */
499 evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
500 /* 5 usec minimum reprogramming delta. */
501 evt->min_delta_ns = 5000;
502
503 evt->cpumask = cpumask_of_cpu(hdev->cpu);
504 clockevents_register_device(evt);
505}
506
507#ifdef CONFIG_HPET
508/* Reserve at least one timer for userspace (/dev/hpet) */
509#define RESERVE_TIMERS 1
510#else
511#define RESERVE_TIMERS 0
512#endif
513
514static void hpet_msi_capability_lookup(unsigned int start_timer)
515{
516 unsigned int id;
517 unsigned int num_timers;
518 unsigned int num_timers_used = 0;
519 int i;
520
521 id = hpet_readl(HPET_ID);
522
523 num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
524 num_timers++; /* Value read out starts from 0 */
525
526 hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
527 if (!hpet_devs)
528 return;
529
530 hpet_num_timers = num_timers;
531
532 for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
533 struct hpet_dev *hdev = &hpet_devs[num_timers_used];
534 unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
535
536 /* Only consider HPET timer with MSI support */
537 if (!(cfg & HPET_TN_FSB_CAP))
538 continue;
539
540 hdev->flags = 0;
541 if (cfg & HPET_TN_PERIODIC_CAP)
542 hdev->flags |= HPET_DEV_PERI_CAP;
543 hdev->num = i;
544
545 sprintf(hdev->name, "hpet%d", i);
546 if (hpet_assign_irq(hdev))
547 continue;
548
549 hdev->flags |= HPET_DEV_FSB_CAP;
550 hdev->flags |= HPET_DEV_VALID;
551 num_timers_used++;
552 if (num_timers_used == num_possible_cpus())
553 break;
554 }
555
556 printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
557 num_timers, num_timers_used);
558}
559
560#ifdef CONFIG_HPET
561static void hpet_reserve_msi_timers(struct hpet_data *hd)
562{
563 int i;
564
565 if (!hpet_devs)
566 return;
567
568 for (i = 0; i < hpet_num_timers; i++) {
569 struct hpet_dev *hdev = &hpet_devs[i];
570
571 if (!(hdev->flags & HPET_DEV_VALID))
572 continue;
573
574 hd->hd_irq[hdev->num] = hdev->irq;
575 hpet_reserve_timer(hd, hdev->num);
576 }
577}
578#endif
579
580static struct hpet_dev *hpet_get_unused_timer(void)
581{
582 int i;
583
584 if (!hpet_devs)
585 return NULL;
586
587 for (i = 0; i < hpet_num_timers; i++) {
588 struct hpet_dev *hdev = &hpet_devs[i];
589
590 if (!(hdev->flags & HPET_DEV_VALID))
591 continue;
592 if (test_and_set_bit(HPET_DEV_USED_BIT,
593 (unsigned long *)&hdev->flags))
594 continue;
595 return hdev;
596 }
597 return NULL;
598}
599
600struct hpet_work_struct {
601 struct delayed_work work;
602 struct completion complete;
603};
604
605static void hpet_work(struct work_struct *w)
606{
607 struct hpet_dev *hdev;
608 int cpu = smp_processor_id();
609 struct hpet_work_struct *hpet_work;
610
611 hpet_work = container_of(w, struct hpet_work_struct, work.work);
612
613 hdev = hpet_get_unused_timer();
614 if (hdev)
615 init_one_hpet_msi_clockevent(hdev, cpu);
616
617 complete(&hpet_work->complete);
618}
619
620static int hpet_cpuhp_notify(struct notifier_block *n,
621 unsigned long action, void *hcpu)
622{
623 unsigned long cpu = (unsigned long)hcpu;
624 struct hpet_work_struct work;
625 struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
626
627 switch (action & 0xf) {
628 case CPU_ONLINE:
629 INIT_DELAYED_WORK(&work.work, hpet_work);
630 init_completion(&work.complete);
631 /* FIXME: add schedule_work_on() */
632 schedule_delayed_work_on(cpu, &work.work, 0);
633 wait_for_completion(&work.complete);
634 break;
635 case CPU_DEAD:
636 if (hdev) {
637 free_irq(hdev->irq, hdev);
638 hdev->flags &= ~HPET_DEV_USED;
639 per_cpu(cpu_hpet_dev, cpu) = NULL;
640 }
641 break;
642 }
643 return NOTIFY_OK;
644}
645#else
646
647static int hpet_setup_msi_irq(unsigned int irq)
648{
649 return 0;
650}
651static void hpet_msi_capability_lookup(unsigned int start_timer)
652{
653 return;
654}
655
656#ifdef CONFIG_HPET
657static void hpet_reserve_msi_timers(struct hpet_data *hd)
658{
659 return;
660}
661#endif
662
663static int hpet_cpuhp_notify(struct notifier_block *n,
664 unsigned long action, void *hcpu)
665{
666 return NOTIFY_OK;
667}
668
669#endif
670
295/* 671/*
296 * Clock source related code 672 * Clock source related code
297 */ 673 */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
427 803
428 if (id & HPET_ID_LEGSUP) { 804 if (id & HPET_ID_LEGSUP) {
429 hpet_legacy_clockevent_register(); 805 hpet_legacy_clockevent_register();
806 hpet_msi_capability_lookup(2);
430 return 1; 807 return 1;
431 } 808 }
809 hpet_msi_capability_lookup(0);
432 return 0; 810 return 0;
433 811
434out_nohpet: 812out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
445 */ 823 */
446static __init int hpet_late_init(void) 824static __init int hpet_late_init(void)
447{ 825{
826 int cpu;
827
448 if (boot_hpet_disable) 828 if (boot_hpet_disable)
449 return -ENODEV; 829 return -ENODEV;
450 830
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
460 840
461 hpet_reserve_platform_timers(hpet_readl(HPET_ID)); 841 hpet_reserve_platform_timers(hpet_readl(HPET_ID));
462 842
843 for_each_online_cpu(cpu) {
844 hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
845 }
846
847 /* This notifier should be called after workqueue is ready */
848 hotcpu_notifier(hpet_cpuhp_notify, -20);
849
463 return 0; 850 return 0;
464} 851}
465fs_initcall(hpet_late_init); 852fs_initcall(hpet_late_init);
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic.c
index 02063ae042f7..b764d7429c61 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic.c
@@ -27,17 +27,21 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/pci.h> 28#include <linux/pci.h>
29#include <linux/mc146818rtc.h> 29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
30#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h>
31#include <linux/sysdev.h> 33#include <linux/sysdev.h>
32#include <linux/msi.h> 34#include <linux/msi.h>
33#include <linux/htirq.h> 35#include <linux/htirq.h>
34#include <linux/dmar.h> 36#include <linux/freezer.h>
35#include <linux/jiffies.h> 37#include <linux/kthread.h>
38#include <linux/jiffies.h> /* time_after() */
36#ifdef CONFIG_ACPI 39#ifdef CONFIG_ACPI
37#include <acpi/acpi_bus.h> 40#include <acpi/acpi_bus.h>
38#endif 41#endif
39#include <linux/bootmem.h> 42#include <linux/bootmem.h>
40#include <linux/dmar.h> 43#include <linux/dmar.h>
44#include <linux/hpet.h>
41 45
42#include <asm/idle.h> 46#include <asm/idle.h>
43#include <asm/io.h> 47#include <asm/io.h>
@@ -46,61 +50,28 @@
46#include <asm/proto.h> 50#include <asm/proto.h>
47#include <asm/acpi.h> 51#include <asm/acpi.h>
48#include <asm/dma.h> 52#include <asm/dma.h>
53#include <asm/timer.h>
49#include <asm/i8259.h> 54#include <asm/i8259.h>
50#include <asm/nmi.h> 55#include <asm/nmi.h>
51#include <asm/msidef.h> 56#include <asm/msidef.h>
52#include <asm/hypertransport.h> 57#include <asm/hypertransport.h>
58#include <asm/setup.h>
53#include <asm/irq_remapping.h> 59#include <asm/irq_remapping.h>
60#include <asm/hpet.h>
61#include <asm/uv/uv_hub.h>
62#include <asm/uv/uv_irq.h>
54 63
55#include <mach_ipi.h> 64#include <mach_ipi.h>
56#include <mach_apic.h> 65#include <mach_apic.h>
66#include <mach_apicdef.h>
57 67
58#define __apicdebuginit(type) static type __init 68#define __apicdebuginit(type) static type __init
59 69
60struct irq_cfg { 70/*
61 cpumask_t domain; 71 * Is the SiS APIC rmw bug present ?
62 cpumask_t old_domain; 72 * -1 = don't know, 0 = no, 1 = yes
63 unsigned move_cleanup_count; 73 */
64 u8 vector; 74int sis_apic_bug = -1;
65 u8 move_in_progress : 1;
66};
67
68/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
69static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
70 [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
71 [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
72 [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
73 [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
74 [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
75 [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
76 [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
77 [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
78 [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
79 [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
80 [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
81 [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
82 [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
83 [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
84 [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
85 [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
86};
87
88static int assign_irq_vector(int irq, cpumask_t mask);
89
90int first_system_vector = 0xfe;
91
92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
93
94int sis_apic_bug; /* not actually supported, dummy for compile */
95
96static int no_timer_check;
97
98static int disable_timer_pin_1 __initdata;
99
100int timer_through_8259 __initdata;
101
102/* Where if anywhere is the i8259 connect in external int mode */
103static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
104 75
105static DEFINE_SPINLOCK(ioapic_lock); 76static DEFINE_SPINLOCK(ioapic_lock);
106static DEFINE_SPINLOCK(vector_lock); 77static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
110 */ 81 */
111int nr_ioapic_registers[MAX_IO_APICS]; 82int nr_ioapic_registers[MAX_IO_APICS];
112 83
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
116/* I/O APIC entries */ 84/* I/O APIC entries */
117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
118int nr_ioapics; 86int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
123/* # of MP IRQ source entries */ 91/* # of MP IRQ source entries */
124int mp_irq_entries; 92int mp_irq_entries;
125 93
94#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
95int mp_bus_id_to_type[MAX_MP_BUSSES];
96#endif
97
126DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); 98DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
127 99
100int skip_ioapic_setup;
101
102static int __init parse_noapic(char *str)
103{
104 /* disable IO-APIC */
105 disable_ioapic_setup();
106 return 0;
107}
108early_param("noapic", parse_noapic);
109
110struct irq_pin_list;
111struct irq_cfg {
112 unsigned int irq;
113 struct irq_pin_list *irq_2_pin;
114 cpumask_t domain;
115 cpumask_t old_domain;
116 unsigned move_cleanup_count;
117 u8 vector;
118 u8 move_in_progress : 1;
119};
120
121/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
122static struct irq_cfg irq_cfgx[NR_IRQS] = {
123 [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, },
124 [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, },
125 [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, },
126 [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, },
127 [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, },
128 [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, },
129 [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, },
130 [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, },
131 [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, },
132 [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, },
133 [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
134 [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
135 [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
136 [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
137 [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
138 [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
139};
140
141#define for_each_irq_cfg(irq, cfg) \
142 for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
143
144static struct irq_cfg *irq_cfg(unsigned int irq)
145{
146 return irq < nr_irqs ? irq_cfgx + irq : NULL;
147}
148
149static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
150{
151 return irq_cfg(irq);
152}
153
128/* 154/*
129 * Rough estimation of how many shared IRQs there are, can 155 * Rough estimation of how many shared IRQs there are, can be changed
130 * be changed anytime. 156 * anytime.
131 */ 157 */
132#define MAX_PLUS_SHARED_IRQS NR_IRQS 158#define MAX_PLUS_SHARED_IRQS NR_IRQS
133#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 159#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
139 * between pins and IRQs. 165 * between pins and IRQs.
140 */ 166 */
141 167
142static struct irq_pin_list { 168struct irq_pin_list {
143 short apic, pin, next; 169 int apic, pin;
144} irq_2_pin[PIN_MAP_SIZE]; 170 struct irq_pin_list *next;
171};
172
173static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
174static struct irq_pin_list *irq_2_pin_ptr;
175
176static void __init irq_2_pin_init(void)
177{
178 struct irq_pin_list *pin = irq_2_pin_head;
179 int i;
180
181 for (i = 1; i < PIN_MAP_SIZE; i++)
182 pin[i-1].next = &pin[i];
183
184 irq_2_pin_ptr = &pin[0];
185}
186
187static struct irq_pin_list *get_one_free_irq_2_pin(void)
188{
189 struct irq_pin_list *pin = irq_2_pin_ptr;
190
191 if (!pin)
192 panic("can not get more irq_2_pin\n");
193
194 irq_2_pin_ptr = pin->next;
195 pin->next = NULL;
196 return pin;
197}
145 198
146struct io_apic { 199struct io_apic {
147 unsigned int index; 200 unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
172/* 225/*
173 * Re-write a value: to be used for read-modify-write 226 * Re-write a value: to be used for read-modify-write
174 * cycles where the read already set up the index register. 227 * cycles where the read already set up the index register.
228 *
229 * Older SiS APIC requires we rewrite the index register
175 */ 230 */
176static inline void io_apic_modify(unsigned int apic, unsigned int value) 231static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
177{ 232{
178 struct io_apic __iomem *io_apic = io_apic_base(apic); 233 struct io_apic __iomem *io_apic = io_apic_base(apic);
234
235 if (sis_apic_bug)
236 writel(reg, &io_apic->index);
179 writel(value, &io_apic->data); 237 writel(value, &io_apic->data);
180} 238}
181 239
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
183{ 241{
184 struct irq_pin_list *entry; 242 struct irq_pin_list *entry;
185 unsigned long flags; 243 unsigned long flags;
244 struct irq_cfg *cfg = irq_cfg(irq);
186 245
187 spin_lock_irqsave(&ioapic_lock, flags); 246 spin_lock_irqsave(&ioapic_lock, flags);
188 entry = irq_2_pin + irq; 247 entry = cfg->irq_2_pin;
189 for (;;) { 248 for (;;) {
190 unsigned int reg; 249 unsigned int reg;
191 int pin; 250 int pin;
192 251
193 pin = entry->pin; 252 if (!entry)
194 if (pin == -1)
195 break; 253 break;
254 pin = entry->pin;
196 reg = io_apic_read(entry->apic, 0x10 + pin*2); 255 reg = io_apic_read(entry->apic, 0x10 + pin*2);
197 /* Is the remote IRR bit set? */ 256 /* Is the remote IRR bit set? */
198 if (reg & IO_APIC_REDIR_REMOTE_IRR) { 257 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
201 } 260 }
202 if (!entry->next) 261 if (!entry->next)
203 break; 262 break;
204 entry = irq_2_pin + entry->next; 263 entry = entry->next;
205 } 264 }
206 spin_unlock_irqrestore(&ioapic_lock, flags); 265 spin_unlock_irqrestore(&ioapic_lock, flags);
207 266
208 return false; 267 return false;
209} 268}
210 269
211/*
212 * Synchronize the IO-APIC and the CPU by doing
213 * a dummy read from the IO-APIC
214 */
215static inline void io_apic_sync(unsigned int apic)
216{
217 struct io_apic __iomem *io_apic = io_apic_base(apic);
218 readl(&io_apic->data);
219}
220
221#define __DO_ACTION(R, ACTION, FINAL) \
222 \
223{ \
224 int pin; \
225 struct irq_pin_list *entry = irq_2_pin + irq; \
226 \
227 BUG_ON(irq >= NR_IRQS); \
228 for (;;) { \
229 unsigned int reg; \
230 pin = entry->pin; \
231 if (pin == -1) \
232 break; \
233 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
234 reg ACTION; \
235 io_apic_modify(entry->apic, reg); \
236 FINAL; \
237 if (!entry->next) \
238 break; \
239 entry = irq_2_pin + entry->next; \
240 } \
241}
242
243union entry_union { 270union entry_union {
244 struct { u32 w1, w2; }; 271 struct { u32 w1, w2; };
245 struct IO_APIC_route_entry entry; 272 struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
299static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) 326static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
300{ 327{
301 int apic, pin; 328 int apic, pin;
302 struct irq_pin_list *entry = irq_2_pin + irq; 329 struct irq_cfg *cfg;
330 struct irq_pin_list *entry;
303 331
304 BUG_ON(irq >= NR_IRQS); 332 cfg = irq_cfg(irq);
333 entry = cfg->irq_2_pin;
305 for (;;) { 334 for (;;) {
306 unsigned int reg; 335 unsigned int reg;
336
337 if (!entry)
338 break;
339
307 apic = entry->apic; 340 apic = entry->apic;
308 pin = entry->pin; 341 pin = entry->pin;
309 if (pin == -1) 342#ifdef CONFIG_INTR_REMAP
310 break;
311 /* 343 /*
312 * With interrupt-remapping, destination information comes 344 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry. 345 * from interrupt-remapping table entry.
314 */ 346 */
315 if (!irq_remapped(irq)) 347 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest); 348 io_apic_write(apic, 0x11 + pin*2, dest);
349#else
350 io_apic_write(apic, 0x11 + pin*2, dest);
351#endif
317 reg = io_apic_read(apic, 0x10 + pin*2); 352 reg = io_apic_read(apic, 0x10 + pin*2);
318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 353 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
319 reg |= vector; 354 reg |= vector;
320 io_apic_modify(apic, reg); 355 io_apic_modify(apic, 0x10 + pin*2, reg);
321 if (!entry->next) 356 if (!entry->next)
322 break; 357 break;
323 entry = irq_2_pin + entry->next; 358 entry = entry->next;
324 } 359 }
325} 360}
326 361
362static int assign_irq_vector(int irq, cpumask_t mask);
363
327static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 364static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
328{ 365{
329 struct irq_cfg *cfg = irq_cfg + irq; 366 struct irq_cfg *cfg;
330 unsigned long flags; 367 unsigned long flags;
331 unsigned int dest; 368 unsigned int dest;
332 cpumask_t tmp; 369 cpumask_t tmp;
370 struct irq_desc *desc;
333 371
334 cpus_and(tmp, mask, cpu_online_map); 372 cpus_and(tmp, mask, cpu_online_map);
335 if (cpus_empty(tmp)) 373 if (cpus_empty(tmp))
336 return; 374 return;
337 375
376 cfg = irq_cfg(irq);
338 if (assign_irq_vector(irq, mask)) 377 if (assign_irq_vector(irq, mask))
339 return; 378 return;
340 379
341 cpus_and(tmp, cfg->domain, mask); 380 cpus_and(tmp, cfg->domain, mask);
342 dest = cpu_mask_to_apicid(tmp); 381 dest = cpu_mask_to_apicid(tmp);
343
344 /* 382 /*
345 * Only the high 8 bits are valid. 383 * Only the high 8 bits are valid.
346 */ 384 */
347 dest = SET_APIC_LOGICAL_ID(dest); 385 dest = SET_APIC_LOGICAL_ID(dest);
348 386
387 desc = irq_to_desc(irq);
349 spin_lock_irqsave(&ioapic_lock, flags); 388 spin_lock_irqsave(&ioapic_lock, flags);
350 __target_IO_APIC_irq(irq, dest, cfg->vector); 389 __target_IO_APIC_irq(irq, dest, cfg->vector);
351 irq_desc[irq].affinity = mask; 390 desc->affinity = mask;
352 spin_unlock_irqrestore(&ioapic_lock, flags); 391 spin_unlock_irqrestore(&ioapic_lock, flags);
353} 392}
354#endif 393#endif /* CONFIG_SMP */
355 394
356/* 395/*
357 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 396 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
360 */ 399 */
361static void add_pin_to_irq(unsigned int irq, int apic, int pin) 400static void add_pin_to_irq(unsigned int irq, int apic, int pin)
362{ 401{
363 static int first_free_entry = NR_IRQS; 402 struct irq_cfg *cfg;
364 struct irq_pin_list *entry = irq_2_pin + irq; 403 struct irq_pin_list *entry;
404
405 /* first time to refer irq_cfg, so with new */
406 cfg = irq_cfg_alloc(irq);
407 entry = cfg->irq_2_pin;
408 if (!entry) {
409 entry = get_one_free_irq_2_pin();
410 cfg->irq_2_pin = entry;
411 entry->apic = apic;
412 entry->pin = pin;
413 return;
414 }
365 415
366 BUG_ON(irq >= NR_IRQS); 416 while (entry->next) {
367 while (entry->next) 417 /* not again, please */
368 entry = irq_2_pin + entry->next; 418 if (entry->apic == apic && entry->pin == pin)
419 return;
369 420
370 if (entry->pin != -1) { 421 entry = entry->next;
371 entry->next = first_free_entry;
372 entry = irq_2_pin + entry->next;
373 if (++first_free_entry >= PIN_MAP_SIZE)
374 panic("io_apic.c: ran out of irq_2_pin entries!");
375 } 422 }
423
424 entry->next = get_one_free_irq_2_pin();
425 entry = entry->next;
376 entry->apic = apic; 426 entry->apic = apic;
377 entry->pin = pin; 427 entry->pin = pin;
378} 428}
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
384 int oldapic, int oldpin, 434 int oldapic, int oldpin,
385 int newapic, int newpin) 435 int newapic, int newpin)
386{ 436{
387 struct irq_pin_list *entry = irq_2_pin + irq; 437 struct irq_cfg *cfg = irq_cfg(irq);
438 struct irq_pin_list *entry = cfg->irq_2_pin;
439 int replaced = 0;
388 440
389 while (1) { 441 while (entry) {
390 if (entry->apic == oldapic && entry->pin == oldpin) { 442 if (entry->apic == oldapic && entry->pin == oldpin) {
391 entry->apic = newapic; 443 entry->apic = newapic;
392 entry->pin = newpin; 444 entry->pin = newpin;
393 } 445 replaced = 1;
394 if (!entry->next) 446 /* every one is different, right? */
395 break; 447 break;
396 entry = irq_2_pin + entry->next; 448 }
449 entry = entry->next;
450 }
451
452 /* why? call replace before add? */
453 if (!replaced)
454 add_pin_to_irq(irq, newapic, newpin);
455}
456
457static inline void io_apic_modify_irq(unsigned int irq,
458 int mask_and, int mask_or,
459 void (*final)(struct irq_pin_list *entry))
460{
461 int pin;
462 struct irq_cfg *cfg;
463 struct irq_pin_list *entry;
464
465 cfg = irq_cfg(irq);
466 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
467 unsigned int reg;
468 pin = entry->pin;
469 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
470 reg &= mask_and;
471 reg |= mask_or;
472 io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
473 if (final)
474 final(entry);
397 } 475 }
398} 476}
399 477
478static void __unmask_IO_APIC_irq(unsigned int irq)
479{
480 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
481}
400 482
401#define DO_ACTION(name,R,ACTION, FINAL) \ 483#ifdef CONFIG_X86_64
402 \ 484void io_apic_sync(struct irq_pin_list *entry)
403 static void name##_IO_APIC_irq (unsigned int irq) \ 485{
404 __DO_ACTION(R, ACTION, FINAL) 486 /*
487 * Synchronize the IO-APIC and the CPU by doing
488 * a dummy read from the IO-APIC
489 */
490 struct io_apic __iomem *io_apic;
491 io_apic = io_apic_base(entry->apic);
492 readl(&io_apic->data);
493}
405 494
406/* mask = 1 */ 495static void __mask_IO_APIC_irq(unsigned int irq)
407DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) 496{
497 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
498}
499#else /* CONFIG_X86_32 */
500static void __mask_IO_APIC_irq(unsigned int irq)
501{
502 io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
503}
408 504
409/* mask = 0 */ 505static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
410DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) 506{
507 io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
508 IO_APIC_REDIR_MASKED, NULL);
509}
510
511static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
512{
513 io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
514 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
515}
516#endif /* CONFIG_X86_32 */
411 517
412static void mask_IO_APIC_irq (unsigned int irq) 518static void mask_IO_APIC_irq (unsigned int irq)
413{ 519{
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
450 clear_IO_APIC_pin(apic, pin); 556 clear_IO_APIC_pin(apic, pin);
451} 557}
452 558
559#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
560void send_IPI_self(int vector)
561{
562 unsigned int cfg;
563
564 /*
565 * Wait for idle.
566 */
567 apic_wait_icr_idle();
568 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
569 /*
570 * Send the IPI. The write to APIC_ICR fires this off.
571 */
572 apic_write(APIC_ICR, cfg);
573}
574#endif /* !CONFIG_SMP && CONFIG_X86_32*/
575
576#ifdef CONFIG_X86_32
577/*
578 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
579 * specific CPU-side IRQs.
580 */
581
582#define MAX_PIRQS 8
583static int pirq_entries [MAX_PIRQS];
584static int pirqs_enabled;
585
586static int __init ioapic_pirq_setup(char *str)
587{
588 int i, max;
589 int ints[MAX_PIRQS+1];
590
591 get_options(str, ARRAY_SIZE(ints), ints);
592
593 for (i = 0; i < MAX_PIRQS; i++)
594 pirq_entries[i] = -1;
595
596 pirqs_enabled = 1;
597 apic_printk(APIC_VERBOSE, KERN_INFO
598 "PIRQ redirection, working around broken MP-BIOS.\n");
599 max = MAX_PIRQS;
600 if (ints[0] < MAX_PIRQS)
601 max = ints[0];
602
603 for (i = 0; i < max; i++) {
604 apic_printk(APIC_VERBOSE, KERN_DEBUG
605 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
606 /*
607 * PIRQs are mapped upside down, usually.
608 */
609 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
610 }
611 return 1;
612}
613
614__setup("pirq=", ioapic_pirq_setup);
615#endif /* CONFIG_X86_32 */
616
617#ifdef CONFIG_INTR_REMAP
618/* I/O APIC RTE contents at the OS boot up */
619static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
620
453/* 621/*
454 * Saves and masks all the unmasked IO-APIC RTE's 622 * Saves and masks all the unmasked IO-APIC RTE's
455 */ 623 */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
474 kzalloc(sizeof(struct IO_APIC_route_entry) * 642 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL); 643 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic]) 644 if (!early_ioapic_entries[apic])
477 return -ENOMEM; 645 goto nomem;
478 } 646 }
479 647
480 for (apic = 0; apic < nr_ioapics; apic++) 648 for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
488 ioapic_write_entry(apic, pin, entry); 656 ioapic_write_entry(apic, pin, entry);
489 } 657 }
490 } 658 }
659
491 return 0; 660 return 0;
661
662nomem:
663 while (apic >= 0)
664 kfree(early_ioapic_entries[apic--]);
665 memset(early_ioapic_entries, 0,
666 ARRAY_SIZE(early_ioapic_entries));
667
668 return -ENOMEM;
492} 669}
493 670
494void restore_IO_APIC_setup(void) 671void restore_IO_APIC_setup(void)
495{ 672{
496 int apic, pin; 673 int apic, pin;
497 674
498 for (apic = 0; apic < nr_ioapics; apic++) 675 for (apic = 0; apic < nr_ioapics; apic++) {
676 if (!early_ioapic_entries[apic])
677 break;
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 678 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin, 679 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]); 680 early_ioapic_entries[apic][pin]);
681 kfree(early_ioapic_entries[apic]);
682 early_ioapic_entries[apic] = NULL;
683 }
502} 684}
503 685
504void reinit_intr_remapped_IO_APIC(int intr_remapping) 686void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
512 */ 694 */
513 restore_IO_APIC_setup(); 695 restore_IO_APIC_setup();
514} 696}
515 697#endif
516int skip_ioapic_setup;
517int ioapic_force;
518
519static int __init parse_noapic(char *str)
520{
521 disable_ioapic_setup();
522 return 0;
523}
524early_param("noapic", parse_noapic);
525
526/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
527static int __init disable_timer_pin_setup(char *arg)
528{
529 disable_timer_pin_1 = 1;
530 return 1;
531}
532__setup("disable_timer_pin_1", disable_timer_pin_setup);
533
534 698
535/* 699/*
536 * Find the IRQ entry number of a certain pin. 700 * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
634 best_guess = irq; 798 best_guess = irq;
635 } 799 }
636 } 800 }
637 BUG_ON(best_guess >= NR_IRQS);
638 return best_guess; 801 return best_guess;
639} 802}
640 803
804EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
805
806#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
807/*
808 * EISA Edge/Level control register, ELCR
809 */
810static int EISA_ELCR(unsigned int irq)
811{
812 if (irq < 16) {
813 unsigned int port = 0x4d0 + (irq >> 3);
814 return (inb(port) >> (irq & 7)) & 1;
815 }
816 apic_printk(APIC_VERBOSE, KERN_INFO
817 "Broken MPtable reports ISA irq %d\n", irq);
818 return 0;
819}
820
821#endif
822
641/* ISA interrupts are always polarity zero edge triggered, 823/* ISA interrupts are always polarity zero edge triggered,
642 * when listed as conforming in the MP table. */ 824 * when listed as conforming in the MP table. */
643 825
644#define default_ISA_trigger(idx) (0) 826#define default_ISA_trigger(idx) (0)
645#define default_ISA_polarity(idx) (0) 827#define default_ISA_polarity(idx) (0)
646 828
829/* EISA interrupts are always polarity zero and can be edge or level
830 * trigger depending on the ELCR value. If an interrupt is listed as
831 * EISA conforming in the MP table, that means its trigger type must
832 * be read in from the ELCR */
833
834#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
835#define default_EISA_polarity(idx) default_ISA_polarity(idx)
836
647/* PCI interrupts are always polarity one level triggered, 837/* PCI interrupts are always polarity one level triggered,
648 * when listed as conforming in the MP table. */ 838 * when listed as conforming in the MP table. */
649 839
650#define default_PCI_trigger(idx) (1) 840#define default_PCI_trigger(idx) (1)
651#define default_PCI_polarity(idx) (1) 841#define default_PCI_polarity(idx) (1)
652 842
843/* MCA interrupts are always polarity zero level triggered,
844 * when listed as conforming in the MP table. */
845
846#define default_MCA_trigger(idx) (1)
847#define default_MCA_polarity(idx) default_ISA_polarity(idx)
848
653static int MPBIOS_polarity(int idx) 849static int MPBIOS_polarity(int idx)
654{ 850{
655 int bus = mp_irqs[idx].mp_srcbus; 851 int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
707 trigger = default_ISA_trigger(idx); 903 trigger = default_ISA_trigger(idx);
708 else 904 else
709 trigger = default_PCI_trigger(idx); 905 trigger = default_PCI_trigger(idx);
906#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
907 switch (mp_bus_id_to_type[bus]) {
908 case MP_BUS_ISA: /* ISA pin */
909 {
910 /* set before the switch */
911 break;
912 }
913 case MP_BUS_EISA: /* EISA pin */
914 {
915 trigger = default_EISA_trigger(idx);
916 break;
917 }
918 case MP_BUS_PCI: /* PCI pin */
919 {
920 /* set before the switch */
921 break;
922 }
923 case MP_BUS_MCA: /* MCA pin */
924 {
925 trigger = default_MCA_trigger(idx);
926 break;
927 }
928 default:
929 {
930 printk(KERN_WARNING "broken BIOS!!\n");
931 trigger = 1;
932 break;
933 }
934 }
935#endif
710 break; 936 break;
711 case 1: /* edge */ 937 case 1: /* edge */
712 { 938 {
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
744 return MPBIOS_trigger(idx); 970 return MPBIOS_trigger(idx);
745} 971}
746 972
973int (*ioapic_renumber_irq)(int ioapic, int irq);
747static int pin_2_irq(int idx, int apic, int pin) 974static int pin_2_irq(int idx, int apic, int pin)
748{ 975{
749 int irq, i; 976 int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
765 while (i < apic) 992 while (i < apic)
766 irq += nr_ioapic_registers[i++]; 993 irq += nr_ioapic_registers[i++];
767 irq += pin; 994 irq += pin;
995 /*
996 * For MPS mode, so far only needed by ES7000 platform
997 */
998 if (ioapic_renumber_irq)
999 irq = ioapic_renumber_irq(apic, irq);
768 } 1000 }
769 BUG_ON(irq >= NR_IRQS); 1001
1002#ifdef CONFIG_X86_32
1003 /*
1004 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1005 */
1006 if ((pin >= 16) && (pin <= 23)) {
1007 if (pirq_entries[pin-16] != -1) {
1008 if (!pirq_entries[pin-16]) {
1009 apic_printk(APIC_VERBOSE, KERN_DEBUG
1010 "disabling PIRQ%d\n", pin-16);
1011 } else {
1012 irq = pirq_entries[pin-16];
1013 apic_printk(APIC_VERBOSE, KERN_DEBUG
1014 "using PIRQ%d -> IRQ %d\n",
1015 pin-16, irq);
1016 }
1017 }
1018 }
1019#endif
1020
770 return irq; 1021 return irq;
771} 1022}
772 1023
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
801 int cpu; 1052 int cpu;
802 struct irq_cfg *cfg; 1053 struct irq_cfg *cfg;
803 1054
804 BUG_ON((unsigned)irq >= NR_IRQS); 1055 cfg = irq_cfg(irq);
805 cfg = &irq_cfg[irq];
806 1056
807 /* Only try and allocate irqs on cpus that are present */ 1057 /* Only try and allocate irqs on cpus that are present */
808 cpus_and(mask, mask, cpu_online_map); 1058 cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
837 } 1087 }
838 if (unlikely(current_vector == vector)) 1088 if (unlikely(current_vector == vector))
839 continue; 1089 continue;
1090#ifdef CONFIG_X86_64
840 if (vector == IA32_SYSCALL_VECTOR) 1091 if (vector == IA32_SYSCALL_VECTOR)
841 goto next; 1092 goto next;
1093#else
1094 if (vector == SYSCALL_VECTOR)
1095 goto next;
1096#endif
842 for_each_cpu_mask_nr(new_cpu, new_mask) 1097 for_each_cpu_mask_nr(new_cpu, new_mask)
843 if (per_cpu(vector_irq, new_cpu)[vector] != -1) 1098 if (per_cpu(vector_irq, new_cpu)[vector] != -1)
844 goto next; 1099 goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
875 cpumask_t mask; 1130 cpumask_t mask;
876 int cpu, vector; 1131 int cpu, vector;
877 1132
878 BUG_ON((unsigned)irq >= NR_IRQS); 1133 cfg = irq_cfg(irq);
879 cfg = &irq_cfg[irq];
880 BUG_ON(!cfg->vector); 1134 BUG_ON(!cfg->vector);
881 1135
882 vector = cfg->vector; 1136 vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
893 /* Initialize vector_irq on a new cpu */ 1147 /* Initialize vector_irq on a new cpu */
894 /* This function must be called with vector_lock held */ 1148 /* This function must be called with vector_lock held */
895 int irq, vector; 1149 int irq, vector;
1150 struct irq_cfg *cfg;
896 1151
897 /* Mark the inuse vectors */ 1152 /* Mark the inuse vectors */
898 for (irq = 0; irq < NR_IRQS; ++irq) { 1153 for_each_irq_cfg(irq, cfg) {
899 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1154 if (!cpu_isset(cpu, cfg->domain))
900 continue; 1155 continue;
901 vector = irq_cfg[irq].vector; 1156 vector = cfg->vector;
902 per_cpu(vector_irq, cpu)[vector] = irq; 1157 per_cpu(vector_irq, cpu)[vector] = irq;
903 } 1158 }
904 /* Mark the free vectors */ 1159 /* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
906 irq = per_cpu(vector_irq, cpu)[vector]; 1161 irq = per_cpu(vector_irq, cpu)[vector];
907 if (irq < 0) 1162 if (irq < 0)
908 continue; 1163 continue;
909 if (!cpu_isset(cpu, irq_cfg[irq].domain)) 1164
1165 cfg = irq_cfg(irq);
1166 if (!cpu_isset(cpu, cfg->domain))
910 per_cpu(vector_irq, cpu)[vector] = -1; 1167 per_cpu(vector_irq, cpu)[vector] = -1;
911 } 1168 }
912} 1169}
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
916static struct irq_chip ir_ioapic_chip; 1173static struct irq_chip ir_ioapic_chip;
917#endif 1174#endif
918 1175
1176#define IOAPIC_AUTO -1
1177#define IOAPIC_EDGE 0
1178#define IOAPIC_LEVEL 1
1179
1180#ifdef CONFIG_X86_32
1181static inline int IO_APIC_irq_trigger(int irq)
1182{
1183 int apic, idx, pin;
1184
1185 for (apic = 0; apic < nr_ioapics; apic++) {
1186 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1187 idx = find_irq_entry(apic, pin, mp_INT);
1188 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1189 return irq_trigger(idx);
1190 }
1191 }
1192 /*
1193 * nonexistent IRQs are edge default
1194 */
1195 return 0;
1196}
1197#else
1198static inline int IO_APIC_irq_trigger(int irq)
1199{
1200 return 1;
1201}
1202#endif
1203
919static void ioapic_register_intr(int irq, unsigned long trigger) 1204static void ioapic_register_intr(int irq, unsigned long trigger)
920{ 1205{
921 if (trigger) 1206 struct irq_desc *desc;
922 irq_desc[irq].status |= IRQ_LEVEL; 1207
1208 desc = irq_to_desc(irq);
1209
1210 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1211 trigger == IOAPIC_LEVEL)
1212 desc->status |= IRQ_LEVEL;
923 else 1213 else
924 irq_desc[irq].status &= ~IRQ_LEVEL; 1214 desc->status &= ~IRQ_LEVEL;
925 1215
926#ifdef CONFIG_INTR_REMAP 1216#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) { 1217 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT; 1218 desc->status |= IRQ_MOVE_PCNTXT;
929 if (trigger) 1219 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, 1220 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq, 1221 handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
936 return; 1226 return;
937 } 1227 }
938#endif 1228#endif
939 if (trigger) 1229 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1230 trigger == IOAPIC_LEVEL)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1231 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq, 1232 handle_fasteoi_irq,
942 "fasteoi"); 1233 "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1300static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1010 int trigger, int polarity) 1301 int trigger, int polarity)
1011{ 1302{
1012 struct irq_cfg *cfg = irq_cfg + irq; 1303 struct irq_cfg *cfg;
1013 struct IO_APIC_route_entry entry; 1304 struct IO_APIC_route_entry entry;
1014 cpumask_t mask; 1305 cpumask_t mask;
1015 1306
1016 if (!IO_APIC_IRQ(irq)) 1307 if (!IO_APIC_IRQ(irq))
1017 return; 1308 return;
1018 1309
1310 cfg = irq_cfg(irq);
1311
1019 mask = TARGET_CPUS; 1312 mask = TARGET_CPUS;
1020 if (assign_irq_vector(irq, mask)) 1313 if (assign_irq_vector(irq, mask))
1021 return; 1314 return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
1047 1340
1048static void __init setup_IO_APIC_irqs(void) 1341static void __init setup_IO_APIC_irqs(void)
1049{ 1342{
1050 int apic, pin, idx, irq, first_notcon = 1; 1343 int apic, pin, idx, irq;
1344 int notcon = 0;
1051 1345
1052 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1346 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1053 1347
1054 for (apic = 0; apic < nr_ioapics; apic++) { 1348 for (apic = 0; apic < nr_ioapics; apic++) {
1055 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1349 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1056
1057 idx = find_irq_entry(apic,pin,mp_INT);
1058 if (idx == -1) {
1059 if (first_notcon) {
1060 apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
1061 first_notcon = 0;
1062 } else
1063 apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
1064 continue;
1065 }
1066 if (!first_notcon) {
1067 apic_printk(APIC_VERBOSE, " not connected.\n");
1068 first_notcon = 1;
1069 }
1070 1350
1071 irq = pin_2_irq(idx, apic, pin); 1351 idx = find_irq_entry(apic, pin, mp_INT);
1072 add_pin_to_irq(irq, apic, pin); 1352 if (idx == -1) {
1353 if (!notcon) {
1354 notcon = 1;
1355 apic_printk(APIC_VERBOSE,
1356 KERN_DEBUG " %d-%d",
1357 mp_ioapics[apic].mp_apicid,
1358 pin);
1359 } else
1360 apic_printk(APIC_VERBOSE, " %d-%d",
1361 mp_ioapics[apic].mp_apicid,
1362 pin);
1363 continue;
1364 }
1365 if (notcon) {
1366 apic_printk(APIC_VERBOSE,
1367 " (apicid-pin) not connected\n");
1368 notcon = 0;
1369 }
1073 1370
1074 setup_IO_APIC_irq(apic, pin, irq, 1371 irq = pin_2_irq(idx, apic, pin);
1075 irq_trigger(idx), irq_polarity(idx)); 1372#ifdef CONFIG_X86_32
1076 } 1373 if (multi_timer_check(apic, irq))
1374 continue;
1375#endif
1376 add_pin_to_irq(irq, apic, pin);
1377
1378 setup_IO_APIC_irq(apic, pin, irq,
1379 irq_trigger(idx), irq_polarity(idx));
1380 }
1077 } 1381 }
1078 1382
1079 if (!first_notcon) 1383 if (notcon)
1080 apic_printk(APIC_VERBOSE, " not connected.\n"); 1384 apic_printk(APIC_VERBOSE,
1385 " (apicid-pin) not connected\n");
1081} 1386}
1082 1387
1083/* 1388/*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1088{ 1393{
1089 struct IO_APIC_route_entry entry; 1394 struct IO_APIC_route_entry entry;
1090 1395
1396#ifdef CONFIG_INTR_REMAP
1091 if (intr_remapping_enabled) 1397 if (intr_remapping_enabled)
1092 return; 1398 return;
1399#endif
1093 1400
1094 memset(&entry, 0, sizeof(entry)); 1401 memset(&entry, 0, sizeof(entry));
1095 1402
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1124 union IO_APIC_reg_00 reg_00; 1431 union IO_APIC_reg_00 reg_00;
1125 union IO_APIC_reg_01 reg_01; 1432 union IO_APIC_reg_01 reg_01;
1126 union IO_APIC_reg_02 reg_02; 1433 union IO_APIC_reg_02 reg_02;
1434 union IO_APIC_reg_03 reg_03;
1127 unsigned long flags; 1435 unsigned long flags;
1436 struct irq_cfg *cfg;
1437 unsigned int irq;
1128 1438
1129 if (apic_verbosity == APIC_QUIET) 1439 if (apic_verbosity == APIC_QUIET)
1130 return; 1440 return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1147 reg_01.raw = io_apic_read(apic, 1); 1457 reg_01.raw = io_apic_read(apic, 1);
1148 if (reg_01.bits.version >= 0x10) 1458 if (reg_01.bits.version >= 0x10)
1149 reg_02.raw = io_apic_read(apic, 2); 1459 reg_02.raw = io_apic_read(apic, 2);
1460 if (reg_01.bits.version >= 0x20)
1461 reg_03.raw = io_apic_read(apic, 3);
1150 spin_unlock_irqrestore(&ioapic_lock, flags); 1462 spin_unlock_irqrestore(&ioapic_lock, flags);
1151 1463
1152 printk("\n"); 1464 printk("\n");
1153 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1465 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1154 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1466 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1155 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1467 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1468 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1469 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1156 1470
1157 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01); 1471 printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
1158 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); 1472 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
1160 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); 1474 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1161 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); 1475 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1162 1476
1163 if (reg_01.bits.version >= 0x10) { 1477 /*
1478 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1479 * but the value of reg_02 is read as the previous read register
1480 * value, so ignore it if reg_02 == reg_01.
1481 */
1482 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1164 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); 1483 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1165 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); 1484 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1166 } 1485 }
1167 1486
1487 /*
1488 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1489 * or reg_03, but the value of reg_0[23] is read as the previous read
1490 * register value, so ignore it if reg_03 == reg_0[12].
1491 */
1492 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1493 reg_03.raw != reg_01.raw) {
1494 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1495 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1496 }
1497
1168 printk(KERN_DEBUG ".... IRQ redirection table:\n"); 1498 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1169 1499
1170 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" 1500 printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
1193 } 1523 }
1194 } 1524 }
1195 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1525 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1196 for (i = 0; i < NR_IRQS; i++) { 1526 for_each_irq_cfg(irq, cfg) {
1197 struct irq_pin_list *entry = irq_2_pin + i; 1527 struct irq_pin_list *entry = cfg->irq_2_pin;
1198 if (entry->pin < 0) 1528 if (!entry)
1199 continue; 1529 continue;
1200 printk(KERN_DEBUG "IRQ%d ", i); 1530 printk(KERN_DEBUG "IRQ%d ", irq);
1201 for (;;) { 1531 for (;;) {
1202 printk("-> %d:%d", entry->apic, entry->pin); 1532 printk("-> %d:%d", entry->apic, entry->pin);
1203 if (!entry->next) 1533 if (!entry->next)
1204 break; 1534 break;
1205 entry = irq_2_pin + entry->next; 1535 entry = entry->next;
1206 } 1536 }
1207 printk("\n"); 1537 printk("\n");
1208 } 1538 }
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
1236__apicdebuginit(void) print_local_APIC(void *dummy) 1566__apicdebuginit(void) print_local_APIC(void *dummy)
1237{ 1567{
1238 unsigned int v, ver, maxlvt; 1568 unsigned int v, ver, maxlvt;
1239 unsigned long icr; 1569 u64 icr;
1240 1570
1241 if (apic_verbosity == APIC_QUIET) 1571 if (apic_verbosity == APIC_QUIET)
1242 return; 1572 return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1253 v = apic_read(APIC_TASKPRI); 1583 v = apic_read(APIC_TASKPRI);
1254 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); 1584 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1255 1585
1256 v = apic_read(APIC_ARBPRI); 1586 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1257 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, 1587 if (!APIC_XAPIC(ver)) {
1258 v & APIC_ARBPRI_MASK); 1588 v = apic_read(APIC_ARBPRI);
1259 v = apic_read(APIC_PROCPRI); 1589 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1260 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); 1590 v & APIC_ARBPRI_MASK);
1591 }
1592 v = apic_read(APIC_PROCPRI);
1593 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1594 }
1595
1596 /*
1597 * Remote read supported only in the 82489DX and local APIC for
1598 * Pentium processors.
1599 */
1600 if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
1601 v = apic_read(APIC_RRR);
1602 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1603 }
1261 1604
1262 v = apic_read(APIC_EOI);
1263 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1264 v = apic_read(APIC_RRR);
1265 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1266 v = apic_read(APIC_LDR); 1605 v = apic_read(APIC_LDR);
1267 printk(KERN_DEBUG "... APIC LDR: %08x\n", v); 1606 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1268 v = apic_read(APIC_DFR); 1607 if (!x2apic_enabled()) {
1269 printk(KERN_DEBUG "... APIC DFR: %08x\n", v); 1608 v = apic_read(APIC_DFR);
1609 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1610 }
1270 v = apic_read(APIC_SPIV); 1611 v = apic_read(APIC_SPIV);
1271 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); 1612 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1272 1613
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1277 printk(KERN_DEBUG "... APIC IRR field:\n"); 1618 printk(KERN_DEBUG "... APIC IRR field:\n");
1278 print_APIC_bitfield(APIC_IRR); 1619 print_APIC_bitfield(APIC_IRR);
1279 1620
1280 v = apic_read(APIC_ESR); 1621 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1622 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1623 apic_write(APIC_ESR, 0);
1624
1625 v = apic_read(APIC_ESR);
1626 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1627 }
1282 1628
1283 icr = apic_icr_read(); 1629 icr = apic_icr_read();
1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); 1630 printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
1312 1658
1313__apicdebuginit(void) print_all_local_APICs(void) 1659__apicdebuginit(void) print_all_local_APICs(void)
1314{ 1660{
1315 on_each_cpu(print_local_APIC, NULL, 1); 1661 int cpu;
1662
1663 preempt_disable();
1664 for_each_online_cpu(cpu)
1665 smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1666 preempt_enable();
1316} 1667}
1317 1668
1318__apicdebuginit(void) print_PIC(void) 1669__apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
1359fs_initcall(print_all_ICs); 1710fs_initcall(print_all_ICs);
1360 1711
1361 1712
1713/* Where if anywhere is the i8259 connect in external int mode */
1714static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
1715
1362void __init enable_IO_APIC(void) 1716void __init enable_IO_APIC(void)
1363{ 1717{
1364 union IO_APIC_reg_01 reg_01; 1718 union IO_APIC_reg_01 reg_01;
1365 int i8259_apic, i8259_pin; 1719 int i8259_apic, i8259_pin;
1366 int i, apic; 1720 int apic;
1367 unsigned long flags; 1721 unsigned long flags;
1368 1722
1369 for (i = 0; i < PIN_MAP_SIZE; i++) { 1723#ifdef CONFIG_X86_32
1370 irq_2_pin[i].pin = -1; 1724 int i;
1371 irq_2_pin[i].next = 0; 1725 if (!pirqs_enabled)
1372 } 1726 for (i = 0; i < MAX_PIRQS; i++)
1727 pirq_entries[i] = -1;
1728#endif
1373 1729
1374 /* 1730 /*
1375 * The number of IO-APIC IRQ registers (== #pins): 1731 * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
1399 } 1755 }
1400 found_i8259: 1756 found_i8259:
1401 /* Look to see what if the MP table has reported the ExtINT */ 1757 /* Look to see what if the MP table has reported the ExtINT */
1758 /* If we could not find the appropriate pin by looking at the ioapic
1759 * the i8259 probably is not connected the ioapic but give the
1760 * mptable a chance anyway.
1761 */
1402 i8259_pin = find_isa_irq_pin(0, mp_ExtINT); 1762 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1403 i8259_apic = find_isa_irq_apic(0, mp_ExtINT); 1763 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1404 /* Trust the MP table if nothing is setup in the hardware */ 1764 /* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
1458 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1818 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1459} 1819}
1460 1820
1821#ifdef CONFIG_X86_32
1822/*
1823 * function to set the IO-APIC physical IDs based on the
1824 * values stored in the MPC table.
1825 *
1826 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1827 */
1828
1829static void __init setup_ioapic_ids_from_mpc(void)
1830{
1831 union IO_APIC_reg_00 reg_00;
1832 physid_mask_t phys_id_present_map;
1833 int apic;
1834 int i;
1835 unsigned char old_id;
1836 unsigned long flags;
1837
1838 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1839 return;
1840
1841 /*
1842 * Don't check I/O APIC IDs for xAPIC systems. They have
1843 * no meaning without the serial APIC bus.
1844 */
1845 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1846 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1847 return;
1848 /*
1849 * This is broken; anything with a real cpu count has to
1850 * circumvent this idiocy regardless.
1851 */
1852 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1853
1854 /*
1855 * Set the IOAPIC ID to the value stored in the MPC table.
1856 */
1857 for (apic = 0; apic < nr_ioapics; apic++) {
1858
1859 /* Read the register 0 value */
1860 spin_lock_irqsave(&ioapic_lock, flags);
1861 reg_00.raw = io_apic_read(apic, 0);
1862 spin_unlock_irqrestore(&ioapic_lock, flags);
1863
1864 old_id = mp_ioapics[apic].mp_apicid;
1865
1866 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1867 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1868 apic, mp_ioapics[apic].mp_apicid);
1869 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1870 reg_00.bits.ID);
1871 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1872 }
1873
1874 /*
1875 * Sanity check, is the ID really free? Every APIC in a
1876 * system must have a unique ID or we get lots of nice
1877 * 'stuck on smp_invalidate_needed IPI wait' messages.
1878 */
1879 if (check_apicid_used(phys_id_present_map,
1880 mp_ioapics[apic].mp_apicid)) {
1881 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1882 apic, mp_ioapics[apic].mp_apicid);
1883 for (i = 0; i < get_physical_broadcast(); i++)
1884 if (!physid_isset(i, phys_id_present_map))
1885 break;
1886 if (i >= get_physical_broadcast())
1887 panic("Max APIC ID exceeded!\n");
1888 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1889 i);
1890 physid_set(i, phys_id_present_map);
1891 mp_ioapics[apic].mp_apicid = i;
1892 } else {
1893 physid_mask_t tmp;
1894 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1895 apic_printk(APIC_VERBOSE, "Setting %d in the "
1896 "phys_id_present_map\n",
1897 mp_ioapics[apic].mp_apicid);
1898 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1899 }
1900
1901
1902 /*
1903 * We need to adjust the IRQ routing table
1904 * if the ID changed.
1905 */
1906 if (old_id != mp_ioapics[apic].mp_apicid)
1907 for (i = 0; i < mp_irq_entries; i++)
1908 if (mp_irqs[i].mp_dstapic == old_id)
1909 mp_irqs[i].mp_dstapic
1910 = mp_ioapics[apic].mp_apicid;
1911
1912 /*
1913 * Read the right value from the MPC table and
1914 * write it into the ID register.
1915 */
1916 apic_printk(APIC_VERBOSE, KERN_INFO
1917 "...changing IO-APIC physical APIC ID to %d ...",
1918 mp_ioapics[apic].mp_apicid);
1919
1920 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1921 spin_lock_irqsave(&ioapic_lock, flags);
1922 io_apic_write(apic, 0, reg_00.raw);
1923 spin_unlock_irqrestore(&ioapic_lock, flags);
1924
1925 /*
1926 * Sanity check
1927 */
1928 spin_lock_irqsave(&ioapic_lock, flags);
1929 reg_00.raw = io_apic_read(apic, 0);
1930 spin_unlock_irqrestore(&ioapic_lock, flags);
1931 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1932 printk("could not set ID!\n");
1933 else
1934 apic_printk(APIC_VERBOSE, " ok.\n");
1935 }
1936}
1937#endif
1938
1939int no_timer_check __initdata;
1940
1941static int __init notimercheck(char *s)
1942{
1943 no_timer_check = 1;
1944 return 1;
1945}
1946__setup("no_timer_check", notimercheck);
1947
1461/* 1948/*
1462 * There is a nasty bug in some older SMP boards, their mptable lies 1949 * There is a nasty bug in some older SMP boards, their mptable lies
1463 * about the timer IRQ. We do the following to work around the situation: 1950 * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
1471 unsigned long t1 = jiffies; 1958 unsigned long t1 = jiffies;
1472 unsigned long flags; 1959 unsigned long flags;
1473 1960
1961 if (no_timer_check)
1962 return 1;
1963
1474 local_save_flags(flags); 1964 local_save_flags(flags);
1475 local_irq_enable(); 1965 local_irq_enable();
1476 /* Let ten ticks pass... */ 1966 /* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1531 return was_pending; 2021 return was_pending;
1532} 2022}
1533 2023
2024#ifdef CONFIG_X86_64
1534static int ioapic_retrigger_irq(unsigned int irq) 2025static int ioapic_retrigger_irq(unsigned int irq)
1535{ 2026{
1536 struct irq_cfg *cfg = &irq_cfg[irq]; 2027
2028 struct irq_cfg *cfg = irq_cfg(irq);
1537 unsigned long flags; 2029 unsigned long flags;
1538 2030
1539 spin_lock_irqsave(&vector_lock, flags); 2031 spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
1542 2034
1543 return 1; 2035 return 1;
1544} 2036}
2037#else
2038static int ioapic_retrigger_irq(unsigned int irq)
2039{
2040 send_IPI_self(irq_cfg(irq)->vector);
2041
2042 return 1;
2043}
2044#endif
1545 2045
1546/* 2046/*
1547 * Level and edge triggered IO-APIC interrupts need different handling, 2047 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1580 */ 2080 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask) 2081static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{ 2082{
1583 struct irq_cfg *cfg = irq_cfg + irq; 2083 struct irq_cfg *cfg;
1584 struct irq_desc *desc = irq_desc + irq; 2084 struct irq_desc *desc;
1585 cpumask_t tmp, cleanup_mask; 2085 cpumask_t tmp, cleanup_mask;
1586 struct irte irte; 2086 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL; 2087 int modify_ioapic_rte;
1588 unsigned int dest; 2088 unsigned int dest;
1589 unsigned long flags; 2089 unsigned long flags;
1590 2090
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1598 if (assign_irq_vector(irq, mask)) 2098 if (assign_irq_vector(irq, mask))
1599 return; 2099 return;
1600 2100
2101 cfg = irq_cfg(irq);
1601 cpus_and(tmp, cfg->domain, mask); 2102 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp); 2103 dest = cpu_mask_to_apicid(tmp);
1603 2104
2105 desc = irq_to_desc(irq);
2106 modify_ioapic_rte = desc->status & IRQ_LEVEL;
1604 if (modify_ioapic_rte) { 2107 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags); 2108 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector); 2109 __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
1622 cfg->move_in_progress = 0; 2125 cfg->move_in_progress = 0;
1623 } 2126 }
1624 2127
1625 irq_desc[irq].affinity = mask; 2128 desc->affinity = mask;
1626} 2129}
1627 2130
1628static int migrate_irq_remapped_level(int irq) 2131static int migrate_irq_remapped_level(int irq)
1629{ 2132{
1630 int ret = -1; 2133 int ret = -1;
2134 struct irq_desc *desc = irq_to_desc(irq);
1631 2135
1632 mask_IO_APIC_irq(irq); 2136 mask_IO_APIC_irq(irq);
1633 2137
1634 if (io_apic_level_ack_pending(irq)) { 2138 if (io_apic_level_ack_pending(irq)) {
1635 /* 2139 /*
1636 * Interrupt in progress. Migrating irq now will change the 2140 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse 2141 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu. 2142 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance. 2143 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
1643 } 2147 }
1644 2148
1645 /* everthing is clear. we have right of way */ 2149 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); 2150 migrate_ioapic_irq(irq, desc->pending_mask);
1647 2151
1648 ret = 0; 2152 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING; 2153 desc->status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask); 2154 cpus_clear(desc->pending_mask);
1651 2155
1652unmask: 2156unmask:
1653 unmask_IO_APIC_irq(irq); 2157 unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
1656 2160
1657static void ir_irq_migration(struct work_struct *work) 2161static void ir_irq_migration(struct work_struct *work)
1658{ 2162{
1659 int irq; 2163 unsigned int irq;
2164 struct irq_desc *desc;
1660 2165
1661 for (irq = 0; irq < NR_IRQS; irq++) { 2166 for_each_irq_desc(irq, desc) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) { 2167 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags; 2168 unsigned long flags;
1665 2169
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
1671 continue; 2175 continue;
1672 } 2176 }
1673 2177
1674 desc->chip->set_affinity(irq, 2178 desc->chip->set_affinity(irq, desc->pending_mask);
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags); 2179 spin_unlock_irqrestore(&desc->lock, flags);
1677 } 2180 }
1678 } 2181 }
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
1683 */ 2186 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 2187static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{ 2188{
1686 if (irq_desc[irq].status & IRQ_LEVEL) { 2189 struct irq_desc *desc = irq_to_desc(irq);
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING; 2190
1688 irq_desc[irq].pending_mask = mask; 2191 if (desc->status & IRQ_LEVEL) {
2192 desc->status |= IRQ_MOVE_PENDING;
2193 desc->pending_mask = mask;
1689 migrate_irq_remapped_level(irq); 2194 migrate_irq_remapped_level(irq);
1690 return; 2195 return;
1691 } 2196 }
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1698{ 2203{
1699 unsigned vector, me; 2204 unsigned vector, me;
1700 ack_APIC_irq(); 2205 ack_APIC_irq();
2206#ifdef CONFIG_X86_64
1701 exit_idle(); 2207 exit_idle();
2208#endif
1702 irq_enter(); 2209 irq_enter();
1703 2210
1704 me = smp_processor_id(); 2211 me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
1707 struct irq_desc *desc; 2214 struct irq_desc *desc;
1708 struct irq_cfg *cfg; 2215 struct irq_cfg *cfg;
1709 irq = __get_cpu_var(vector_irq)[vector]; 2216 irq = __get_cpu_var(vector_irq)[vector];
1710 if (irq >= NR_IRQS) 2217
2218 desc = irq_to_desc(irq);
2219 if (!desc)
1711 continue; 2220 continue;
1712 2221
1713 desc = irq_desc + irq; 2222 cfg = irq_cfg(irq);
1714 cfg = irq_cfg + irq;
1715 spin_lock(&desc->lock); 2223 spin_lock(&desc->lock);
1716 if (!cfg->move_cleanup_count) 2224 if (!cfg->move_cleanup_count)
1717 goto unlock; 2225 goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
1730 2238
1731static void irq_complete_move(unsigned int irq) 2239static void irq_complete_move(unsigned int irq)
1732{ 2240{
1733 struct irq_cfg *cfg = irq_cfg + irq; 2241 struct irq_cfg *cfg = irq_cfg(irq);
1734 unsigned vector, me; 2242 unsigned vector, me;
1735 2243
1736 if (likely(!cfg->move_in_progress)) 2244 if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
1769 ack_APIC_irq(); 2277 ack_APIC_irq();
1770} 2278}
1771 2279
2280atomic_t irq_mis_count;
2281
1772static void ack_apic_level(unsigned int irq) 2282static void ack_apic_level(unsigned int irq)
1773{ 2283{
2284#ifdef CONFIG_X86_32
2285 unsigned long v;
2286 int i;
2287#endif
1774 int do_unmask_irq = 0; 2288 int do_unmask_irq = 0;
1775 2289
1776 irq_complete_move(irq); 2290 irq_complete_move(irq);
1777#ifdef CONFIG_GENERIC_PENDING_IRQ 2291#ifdef CONFIG_GENERIC_PENDING_IRQ
1778 /* If we are moving the irq we need to mask it */ 2292 /* If we are moving the irq we need to mask it */
1779 if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { 2293 if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
1780 do_unmask_irq = 1; 2294 do_unmask_irq = 1;
1781 mask_IO_APIC_irq(irq); 2295 mask_IO_APIC_irq(irq);
1782 } 2296 }
1783#endif 2297#endif
1784 2298
2299#ifdef CONFIG_X86_32
2300 /*
2301 * It appears there is an erratum which affects at least version 0x11
2302 * of I/O APIC (that's the 82093AA and cores integrated into various
2303 * chipsets). Under certain conditions a level-triggered interrupt is
2304 * erroneously delivered as edge-triggered one but the respective IRR
2305 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2306 * message but it will never arrive and further interrupts are blocked
2307 * from the source. The exact reason is so far unknown, but the
2308 * phenomenon was observed when two consecutive interrupt requests
2309 * from a given source get delivered to the same CPU and the source is
2310 * temporarily disabled in between.
2311 *
2312 * A workaround is to simulate an EOI message manually. We achieve it
2313 * by setting the trigger mode to edge and then to level when the edge
2314 * trigger mode gets detected in the TMR of a local APIC for a
2315 * level-triggered interrupt. We mask the source for the time of the
2316 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2317 * The idea is from Manfred Spraul. --macro
2318 */
2319 i = irq_cfg(irq)->vector;
2320
2321 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2322#endif
2323
1785 /* 2324 /*
1786 * We must acknowledge the irq before we move it or the acknowledge will 2325 * We must acknowledge the irq before we move it or the acknowledge will
1787 * not propagate properly. 2326 * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
1820 move_masked_irq(irq); 2359 move_masked_irq(irq);
1821 unmask_IO_APIC_irq(irq); 2360 unmask_IO_APIC_irq(irq);
1822 } 2361 }
2362
2363#ifdef CONFIG_X86_32
2364 if (!(v & (1 << (i & 0x1f)))) {
2365 atomic_inc(&irq_mis_count);
2366 spin_lock(&ioapic_lock);
2367 __mask_and_edge_IO_APIC_irq(irq);
2368 __unmask_and_level_IO_APIC_irq(irq);
2369 spin_unlock(&ioapic_lock);
2370 }
2371#endif
1823} 2372}
1824 2373
1825static struct irq_chip ioapic_chip __read_mostly = { 2374static struct irq_chip ioapic_chip __read_mostly = {
1826 .name = "IO-APIC", 2375 .name = "IO-APIC",
1827 .startup = startup_ioapic_irq, 2376 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq, 2377 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq, 2378 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_apic_edge, 2379 .ack = ack_apic_edge,
1831 .eoi = ack_apic_level, 2380 .eoi = ack_apic_level,
1832#ifdef CONFIG_SMP 2381#ifdef CONFIG_SMP
1833 .set_affinity = set_ioapic_affinity_irq, 2382 .set_affinity = set_ioapic_affinity_irq,
1834#endif 2383#endif
1835 .retrigger = ioapic_retrigger_irq, 2384 .retrigger = ioapic_retrigger_irq,
1836}; 2385};
1837 2386
1838#ifdef CONFIG_INTR_REMAP 2387#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = { 2388static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC", 2389 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq, 2390 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq, 2391 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq, 2392 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge, 2393 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level, 2394 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP 2395#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq, 2396 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif 2397#endif
1849 .retrigger = ioapic_retrigger_irq, 2398 .retrigger = ioapic_retrigger_irq,
1850}; 2399};
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
1853static inline void init_IO_APIC_traps(void) 2402static inline void init_IO_APIC_traps(void)
1854{ 2403{
1855 int irq; 2404 int irq;
2405 struct irq_desc *desc;
2406 struct irq_cfg *cfg;
1856 2407
1857 /* 2408 /*
1858 * NOTE! The local APIC isn't very good at handling 2409 * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
1865 * Also, we've got to be careful not to trash gate 2416 * Also, we've got to be careful not to trash gate
1866 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2417 * 0x80, because int 0x80 is hm, kind of importantish. ;)
1867 */ 2418 */
1868 for (irq = 0; irq < NR_IRQS ; irq++) { 2419 for_each_irq_cfg(irq, cfg) {
1869 if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { 2420 if (IO_APIC_IRQ(irq) && !cfg->vector) {
1870 /* 2421 /*
1871 * Hmm.. We don't have an entry for this, 2422 * Hmm.. We don't have an entry for this,
1872 * so default to an old-fashioned 8259 2423 * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
1874 */ 2425 */
1875 if (irq < 16) 2426 if (irq < 16)
1876 make_8259A_irq(irq); 2427 make_8259A_irq(irq);
1877 else 2428 else {
2429 desc = irq_to_desc(irq);
1878 /* Strange. Oh, well.. */ 2430 /* Strange. Oh, well.. */
1879 irq_desc[irq].chip = &no_irq_chip; 2431 desc->chip = &no_irq_chip;
2432 }
1880 } 2433 }
1881 } 2434 }
1882} 2435}
1883 2436
1884static void unmask_lapic_irq(unsigned int irq) 2437/*
2438 * The local APIC irq-chip implementation:
2439 */
2440
2441static void mask_lapic_irq(unsigned int irq)
1885{ 2442{
1886 unsigned long v; 2443 unsigned long v;
1887 2444
1888 v = apic_read(APIC_LVT0); 2445 v = apic_read(APIC_LVT0);
1889 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2446 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
1890} 2447}
1891 2448
1892static void mask_lapic_irq(unsigned int irq) 2449static void unmask_lapic_irq(unsigned int irq)
1893{ 2450{
1894 unsigned long v; 2451 unsigned long v;
1895 2452
1896 v = apic_read(APIC_LVT0); 2453 v = apic_read(APIC_LVT0);
1897 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2454 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1898} 2455}
1899 2456
1900static void ack_lapic_irq (unsigned int irq) 2457static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
1911 2468
1912static void lapic_register_intr(int irq) 2469static void lapic_register_intr(int irq)
1913{ 2470{
1914 irq_desc[irq].status &= ~IRQ_LEVEL; 2471 struct irq_desc *desc;
2472
2473 desc = irq_to_desc(irq);
2474 desc->status &= ~IRQ_LEVEL;
1915 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2475 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1916 "edge"); 2476 "edge");
1917} 2477}
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
1919static void __init setup_nmi(void) 2479static void __init setup_nmi(void)
1920{ 2480{
1921 /* 2481 /*
1922 * Dirty trick to enable the NMI watchdog ... 2482 * Dirty trick to enable the NMI watchdog ...
1923 * We put the 8259A master into AEOI mode and 2483 * We put the 8259A master into AEOI mode and
1924 * unmask on all local APICs LVT0 as NMI. 2484 * unmask on all local APICs LVT0 as NMI.
1925 * 2485 *
1926 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') 2486 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
1927 * is from Maciej W. Rozycki - so we do not have to EOI from 2487 * is from Maciej W. Rozycki - so we do not have to EOI from
1928 * the NMI handler or the timer interrupt. 2488 * the NMI handler or the timer interrupt.
1929 */ 2489 */
1930 printk(KERN_INFO "activating NMI Watchdog ..."); 2490 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
1931 2491
1932 enable_NMI_through_LVT0(); 2492 enable_NMI_through_LVT0();
1933 2493
1934 printk(" done.\n"); 2494 apic_printk(APIC_VERBOSE, " done.\n");
1935} 2495}
1936 2496
1937/* 2497/*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
1948 unsigned char save_control, save_freq_select; 2508 unsigned char save_control, save_freq_select;
1949 2509
1950 pin = find_isa_irq_pin(8, mp_INT); 2510 pin = find_isa_irq_pin(8, mp_INT);
2511 if (pin == -1) {
2512 WARN_ON_ONCE(1);
2513 return;
2514 }
1951 apic = find_isa_irq_apic(8, mp_INT); 2515 apic = find_isa_irq_apic(8, mp_INT);
1952 if (pin == -1) 2516 if (apic == -1) {
2517 WARN_ON_ONCE(1);
1953 return; 2518 return;
2519 }
1954 2520
1955 entry0 = ioapic_read_entry(apic, pin); 2521 entry0 = ioapic_read_entry(apic, pin);
1956
1957 clear_IO_APIC_pin(apic, pin); 2522 clear_IO_APIC_pin(apic, pin);
1958 2523
1959 memset(&entry1, 0, sizeof(entry1)); 2524 memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
1988 ioapic_write_entry(apic, pin, entry0); 2553 ioapic_write_entry(apic, pin, entry0);
1989} 2554}
1990 2555
2556static int disable_timer_pin_1 __initdata;
2557/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2558static int __init disable_timer_pin_setup(char *arg)
2559{
2560 disable_timer_pin_1 = 1;
2561 return 0;
2562}
2563early_param("disable_timer_pin_1", disable_timer_pin_setup);
2564
2565int timer_through_8259 __initdata;
2566
1991/* 2567/*
1992 * This code may look a bit paranoid, but it's supposed to cooperate with 2568 * This code may look a bit paranoid, but it's supposed to cooperate with
1993 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 2569 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1994 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 2570 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1995 * fanatically on his truly buggy board. 2571 * fanatically on his truly buggy board.
1996 * 2572 *
1997 * FIXME: really need to revamp this for modern platforms only. 2573 * FIXME: really need to revamp this for all platforms.
1998 */ 2574 */
1999static inline void __init check_timer(void) 2575static inline void __init check_timer(void)
2000{ 2576{
2001 struct irq_cfg *cfg = irq_cfg + 0; 2577 struct irq_cfg *cfg = irq_cfg(0);
2002 int apic1, pin1, apic2, pin2; 2578 int apic1, pin1, apic2, pin2;
2003 unsigned long flags; 2579 unsigned long flags;
2580 unsigned int ver;
2004 int no_pin1 = 0; 2581 int no_pin1 = 0;
2005 2582
2006 local_irq_save(flags); 2583 local_irq_save(flags);
2007 2584
2585 ver = apic_read(APIC_LVR);
2586 ver = GET_APIC_VERSION(ver);
2587
2008 /* 2588 /*
2009 * get/set the timer IRQ vector: 2589 * get/set the timer IRQ vector:
2010 */ 2590 */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
2013 2593
2014 /* 2594 /*
2015 * As IRQ0 is to be enabled in the 8259A, the virtual 2595 * As IRQ0 is to be enabled in the 8259A, the virtual
2016 * wire has to be disabled in the local APIC. 2596 * wire has to be disabled in the local APIC. Also
2597 * timer interrupts need to be acknowledged manually in
2598 * the 8259A for the i82489DX when using the NMI
2599 * watchdog as that APIC treats NMIs as level-triggered.
2600 * The AEOI mode will finish them in the 8259A
2601 * automatically.
2017 */ 2602 */
2018 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2603 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2019 init_8259A(1); 2604 init_8259A(1);
2605#ifdef CONFIG_X86_32
2606 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2607#endif
2020 2608
2021 pin1 = find_isa_irq_pin(0, mp_INT); 2609 pin1 = find_isa_irq_pin(0, mp_INT);
2022 apic1 = find_isa_irq_apic(0, mp_INT); 2610 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
2035 * 8259A. 2623 * 8259A.
2036 */ 2624 */
2037 if (pin1 == -1) { 2625 if (pin1 == -1) {
2626#ifdef CONFIG_INTR_REMAP
2038 if (intr_remapping_enabled) 2627 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC"); 2628 panic("BIOS bug: timer not connected to IO-APIC");
2629#endif
2040 pin1 = pin2; 2630 pin1 = pin2;
2041 apic1 = apic2; 2631 apic1 = apic2;
2042 no_pin1 = 1; 2632 no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
2054 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2644 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2055 } 2645 }
2056 unmask_IO_APIC_irq(0); 2646 unmask_IO_APIC_irq(0);
2057 if (!no_timer_check && timer_irq_works()) { 2647 if (timer_irq_works()) {
2058 if (nmi_watchdog == NMI_IO_APIC) { 2648 if (nmi_watchdog == NMI_IO_APIC) {
2059 setup_nmi(); 2649 setup_nmi();
2060 enable_8259A_irq(0); 2650 enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
2063 clear_IO_APIC_pin(0, pin1); 2653 clear_IO_APIC_pin(0, pin1);
2064 goto out; 2654 goto out;
2065 } 2655 }
2656#ifdef CONFIG_INTR_REMAP
2066 if (intr_remapping_enabled) 2657 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2658 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2659#endif
2068 clear_IO_APIC_pin(apic1, pin1); 2660 clear_IO_APIC_pin(apic1, pin1);
2069 if (!no_pin1) 2661 if (!no_pin1)
2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2662 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
2104 "through the IO-APIC - disabling NMI Watchdog!\n"); 2696 "through the IO-APIC - disabling NMI Watchdog!\n");
2105 nmi_watchdog = NMI_NONE; 2697 nmi_watchdog = NMI_NONE;
2106 } 2698 }
2699#ifdef CONFIG_X86_32
2700 timer_ack = 0;
2701#endif
2107 2702
2108 apic_printk(APIC_QUIET, KERN_INFO 2703 apic_printk(APIC_QUIET, KERN_INFO
2109 "...trying to set up timer as Virtual Wire IRQ...\n"); 2704 "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
2140 local_irq_restore(flags); 2735 local_irq_restore(flags);
2141} 2736}
2142 2737
2143static int __init notimercheck(char *s)
2144{
2145 no_timer_check = 1;
2146 return 1;
2147}
2148__setup("no_timer_check", notimercheck);
2149
2150/* 2738/*
2151 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available 2739 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2152 * to devices. However there may be an I/O APIC pin available for 2740 * to devices. However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
2164 * the I/O APIC in all cases now. No actual device should request 2752 * the I/O APIC in all cases now. No actual device should request
2165 * it anyway. --macro 2753 * it anyway. --macro
2166 */ 2754 */
2167#define PIC_IRQS (1<<2) 2755#define PIC_IRQS (1 << PIC_CASCADE_IR)
2168 2756
2169void __init setup_IO_APIC(void) 2757void __init setup_IO_APIC(void)
2170{ 2758{
2171 2759
2760#ifdef CONFIG_X86_32
2761 enable_IO_APIC();
2762#else
2172 /* 2763 /*
2173 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 2764 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
2174 */ 2765 */
2766#endif
2175 2767
2176 io_apic_irqs = ~PIC_IRQS; 2768 io_apic_irqs = ~PIC_IRQS;
2177 2769
2178 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 2770 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
2179 2771 /*
2772 * Set up IO-APIC IRQ routing.
2773 */
2774#ifdef CONFIG_X86_32
2775 if (!acpi_ioapic)
2776 setup_ioapic_ids_from_mpc();
2777#endif
2180 sync_Arb_IDs(); 2778 sync_Arb_IDs();
2181 setup_IO_APIC_irqs(); 2779 setup_IO_APIC_irqs();
2182 init_IO_APIC_traps(); 2780 init_IO_APIC_traps();
2183 check_timer(); 2781 check_timer();
2184} 2782}
2185 2783
2784/*
2785 * Called after all the initialization is done. If we didnt find any
2786 * APIC bugs then we can allow the modify fast path
2787 */
2788
2789static int __init io_apic_bug_finalize(void)
2790{
2791 if (sis_apic_bug == -1)
2792 sis_apic_bug = 0;
2793 return 0;
2794}
2795
2796late_initcall(io_apic_bug_finalize);
2797
2186struct sysfs_ioapic_data { 2798struct sysfs_ioapic_data {
2187 struct sys_device dev; 2799 struct sys_device dev;
2188 struct IO_APIC_route_entry entry[0]; 2800 struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
2270/* 2882/*
2271 * Dynamic irq allocate and deallocation 2883 * Dynamic irq allocate and deallocation
2272 */ 2884 */
2273int create_irq(void) 2885unsigned int create_irq_nr(unsigned int irq_want)
2274{ 2886{
2275 /* Allocate an unused irq */ 2887 /* Allocate an unused irq */
2276 int irq; 2888 unsigned int irq;
2277 int new; 2889 unsigned int new;
2278 unsigned long flags; 2890 unsigned long flags;
2891 struct irq_cfg *cfg_new;
2892
2893 irq_want = nr_irqs - 1;
2279 2894
2280 irq = -ENOSPC; 2895 irq = 0;
2281 spin_lock_irqsave(&vector_lock, flags); 2896 spin_lock_irqsave(&vector_lock, flags);
2282 for (new = (NR_IRQS - 1); new >= 0; new--) { 2897 for (new = irq_want; new > 0; new--) {
2283 if (platform_legacy_irq(new)) 2898 if (platform_legacy_irq(new))
2284 continue; 2899 continue;
2285 if (irq_cfg[new].vector != 0) 2900 cfg_new = irq_cfg(new);
2901 if (cfg_new && cfg_new->vector != 0)
2286 continue; 2902 continue;
2903 /* check if need to create one */
2904 if (!cfg_new)
2905 cfg_new = irq_cfg_alloc(new);
2287 if (__assign_irq_vector(new, TARGET_CPUS) == 0) 2906 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
2288 irq = new; 2907 irq = new;
2289 break; 2908 break;
2290 } 2909 }
2291 spin_unlock_irqrestore(&vector_lock, flags); 2910 spin_unlock_irqrestore(&vector_lock, flags);
2292 2911
2293 if (irq >= 0) { 2912 if (irq > 0) {
2294 dynamic_irq_init(irq); 2913 dynamic_irq_init(irq);
2295 } 2914 }
2296 return irq; 2915 return irq;
2297} 2916}
2298 2917
2918int create_irq(void)
2919{
2920 int irq;
2921
2922 irq = create_irq_nr(nr_irqs - 1);
2923
2924 if (irq == 0)
2925 irq = -1;
2926
2927 return irq;
2928}
2929
2299void destroy_irq(unsigned int irq) 2930void destroy_irq(unsigned int irq)
2300{ 2931{
2301 unsigned long flags; 2932 unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
2316#ifdef CONFIG_PCI_MSI 2947#ifdef CONFIG_PCI_MSI
2317static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) 2948static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2318{ 2949{
2319 struct irq_cfg *cfg = irq_cfg + irq; 2950 struct irq_cfg *cfg;
2320 int err; 2951 int err;
2321 unsigned dest; 2952 unsigned dest;
2322 cpumask_t tmp; 2953 cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2326 if (err) 2957 if (err)
2327 return err; 2958 return err;
2328 2959
2960 cfg = irq_cfg(irq);
2329 cpus_and(tmp, cfg->domain, tmp); 2961 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp); 2962 dest = cpu_mask_to_apicid(tmp);
2331 2963
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
2383#ifdef CONFIG_SMP 3015#ifdef CONFIG_SMP
2384static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3016static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2385{ 3017{
2386 struct irq_cfg *cfg = irq_cfg + irq; 3018 struct irq_cfg *cfg;
2387 struct msi_msg msg; 3019 struct msi_msg msg;
2388 unsigned int dest; 3020 unsigned int dest;
2389 cpumask_t tmp; 3021 cpumask_t tmp;
3022 struct irq_desc *desc;
2390 3023
2391 cpus_and(tmp, mask, cpu_online_map); 3024 cpus_and(tmp, mask, cpu_online_map);
2392 if (cpus_empty(tmp)) 3025 if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2395 if (assign_irq_vector(irq, mask)) 3028 if (assign_irq_vector(irq, mask))
2396 return; 3029 return;
2397 3030
3031 cfg = irq_cfg(irq);
2398 cpus_and(tmp, cfg->domain, mask); 3032 cpus_and(tmp, cfg->domain, mask);
2399 dest = cpu_mask_to_apicid(tmp); 3033 dest = cpu_mask_to_apicid(tmp);
2400 3034
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3040 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2407 3041
2408 write_msi_msg(irq, &msg); 3042 write_msi_msg(irq, &msg);
2409 irq_desc[irq].affinity = mask; 3043 desc = irq_to_desc(irq);
3044 desc->affinity = mask;
2410} 3045}
2411 3046
2412#ifdef CONFIG_INTR_REMAP 3047#ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2416 */ 3051 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) 3052static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{ 3053{
2419 struct irq_cfg *cfg = irq_cfg + irq; 3054 struct irq_cfg *cfg;
2420 unsigned int dest; 3055 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask; 3056 cpumask_t tmp, cleanup_mask;
2422 struct irte irte; 3057 struct irte irte;
3058 struct irq_desc *desc;
2423 3059
2424 cpus_and(tmp, mask, cpu_online_map); 3060 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp)) 3061 if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2431 if (assign_irq_vector(irq, mask)) 3067 if (assign_irq_vector(irq, mask))
2432 return; 3068 return;
2433 3069
3070 cfg = irq_cfg(irq);
2434 cpus_and(tmp, cfg->domain, mask); 3071 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp); 3072 dest = cpu_mask_to_apicid(tmp);
2436 3073
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2454 cfg->move_in_progress = 0; 3091 cfg->move_in_progress = 0;
2455 } 3092 }
2456 3093
2457 irq_desc[irq].affinity = mask; 3094 desc = irq_to_desc(irq);
3095 desc->affinity = mask;
2458} 3096}
2459#endif 3097#endif
2460#endif /* CONFIG_SMP */ 3098#endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2507 if (index < 0) { 3145 if (index < 0) {
2508 printk(KERN_ERR 3146 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec, 3147 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev)); 3148 pci_name(dev));
2511 return -ENOSPC; 3149 return -ENOSPC;
2512 } 3150 }
2513 return index; 3151 return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2528 3166
2529#ifdef CONFIG_INTR_REMAP 3167#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) { 3168 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq; 3169 struct irq_desc *desc = irq_to_desc(irq);
2532 /* 3170 /*
2533 * irq migration in process context 3171 * irq migration in process context
2534 */ 3172 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2538#endif 3176#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 3177 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540 3178
3179 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3180
2541 return 0; 3181 return 0;
2542} 3182}
2543 3183
3184static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
3185{
3186 unsigned int irq;
3187
3188 irq = dev->bus->number;
3189 irq <<= 8;
3190 irq |= dev->devfn;
3191 irq <<= 12;
3192
3193 return irq;
3194}
3195
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 3196int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{ 3197{
2546 int irq, ret; 3198 unsigned int irq;
3199 int ret;
3200 unsigned int irq_want;
2547 3201
2548 irq = create_irq(); 3202 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2549 if (irq < 0) 3203
2550 return irq; 3204 irq = create_irq_nr(irq_want);
3205 if (irq == 0)
3206 return -1;
2551 3207
2552#ifdef CONFIG_INTR_REMAP 3208#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled) 3209 if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
2574 3230
2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3231int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{ 3232{
2577 int irq, ret, sub_handle; 3233 unsigned int irq;
3234 int ret, sub_handle;
2578 struct msi_desc *desc; 3235 struct msi_desc *desc;
3236 unsigned int irq_want;
3237
2579#ifdef CONFIG_INTR_REMAP 3238#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0; 3239 struct intel_iommu *iommu = 0;
2581 int index = 0; 3240 int index = 0;
2582#endif 3241#endif
2583 3242
3243 irq_want = build_irq_for_pci_dev(dev) + 0x100;
2584 sub_handle = 0; 3244 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) { 3245 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq(); 3246 irq = create_irq_nr(irq_want--);
2587 if (irq < 0) 3247 if (irq == 0)
2588 return irq; 3248 return -1;
2589#ifdef CONFIG_INTR_REMAP 3249#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled) 3250 if (!intr_remapping_enabled)
2591 goto no_ir; 3251 goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
2636#ifdef CONFIG_SMP 3296#ifdef CONFIG_SMP
2637static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) 3297static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2638{ 3298{
2639 struct irq_cfg *cfg = irq_cfg + irq; 3299 struct irq_cfg *cfg;
2640 struct msi_msg msg; 3300 struct msi_msg msg;
2641 unsigned int dest; 3301 unsigned int dest;
2642 cpumask_t tmp; 3302 cpumask_t tmp;
3303 struct irq_desc *desc;
2643 3304
2644 cpus_and(tmp, mask, cpu_online_map); 3305 cpus_and(tmp, mask, cpu_online_map);
2645 if (cpus_empty(tmp)) 3306 if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2648 if (assign_irq_vector(irq, mask)) 3309 if (assign_irq_vector(irq, mask))
2649 return; 3310 return;
2650 3311
3312 cfg = irq_cfg(irq);
2651 cpus_and(tmp, cfg->domain, mask); 3313 cpus_and(tmp, cfg->domain, mask);
2652 dest = cpu_mask_to_apicid(tmp); 3314 dest = cpu_mask_to_apicid(tmp);
2653 3315
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
2659 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3321 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2660 3322
2661 dmar_msi_write(irq, &msg); 3323 dmar_msi_write(irq, &msg);
2662 irq_desc[irq].affinity = mask; 3324 desc = irq_to_desc(irq);
3325 desc->affinity = mask;
2663} 3326}
2664#endif /* CONFIG_SMP */ 3327#endif /* CONFIG_SMP */
2665 3328
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
2689} 3352}
2690#endif 3353#endif
2691 3354
3355#ifdef CONFIG_HPET_TIMER
3356
3357#ifdef CONFIG_SMP
3358static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
3359{
3360 struct irq_cfg *cfg;
3361 struct irq_desc *desc;
3362 struct msi_msg msg;
3363 unsigned int dest;
3364 cpumask_t tmp;
3365
3366 cpus_and(tmp, mask, cpu_online_map);
3367 if (cpus_empty(tmp))
3368 return;
3369
3370 if (assign_irq_vector(irq, mask))
3371 return;
3372
3373 cfg = irq_cfg(irq);
3374 cpus_and(tmp, cfg->domain, mask);
3375 dest = cpu_mask_to_apicid(tmp);
3376
3377 hpet_msi_read(irq, &msg);
3378
3379 msg.data &= ~MSI_DATA_VECTOR_MASK;
3380 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3381 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3382 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3383
3384 hpet_msi_write(irq, &msg);
3385 desc = irq_to_desc(irq);
3386 desc->affinity = mask;
3387}
3388#endif /* CONFIG_SMP */
3389
3390struct irq_chip hpet_msi_type = {
3391 .name = "HPET_MSI",
3392 .unmask = hpet_msi_unmask,
3393 .mask = hpet_msi_mask,
3394 .ack = ack_apic_edge,
3395#ifdef CONFIG_SMP
3396 .set_affinity = hpet_msi_set_affinity,
3397#endif
3398 .retrigger = ioapic_retrigger_irq,
3399};
3400
3401int arch_setup_hpet_msi(unsigned int irq)
3402{
3403 int ret;
3404 struct msi_msg msg;
3405
3406 ret = msi_compose_msg(NULL, irq, &msg);
3407 if (ret < 0)
3408 return ret;
3409
3410 hpet_msi_write(irq, &msg);
3411 set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
3412 "edge");
3413
3414 return 0;
3415}
3416#endif
3417
2692#endif /* CONFIG_PCI_MSI */ 3418#endif /* CONFIG_PCI_MSI */
2693/* 3419/*
2694 * Hypertransport interrupt support 3420 * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
2713 3439
2714static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) 3440static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2715{ 3441{
2716 struct irq_cfg *cfg = irq_cfg + irq; 3442 struct irq_cfg *cfg;
2717 unsigned int dest; 3443 unsigned int dest;
2718 cpumask_t tmp; 3444 cpumask_t tmp;
3445 struct irq_desc *desc;
2719 3446
2720 cpus_and(tmp, mask, cpu_online_map); 3447 cpus_and(tmp, mask, cpu_online_map);
2721 if (cpus_empty(tmp)) 3448 if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2724 if (assign_irq_vector(irq, mask)) 3451 if (assign_irq_vector(irq, mask))
2725 return; 3452 return;
2726 3453
3454 cfg = irq_cfg(irq);
2727 cpus_and(tmp, cfg->domain, mask); 3455 cpus_and(tmp, cfg->domain, mask);
2728 dest = cpu_mask_to_apicid(tmp); 3456 dest = cpu_mask_to_apicid(tmp);
2729 3457
2730 target_ht_irq(irq, dest, cfg->vector); 3458 target_ht_irq(irq, dest, cfg->vector);
2731 irq_desc[irq].affinity = mask; 3459 desc = irq_to_desc(irq);
3460 desc->affinity = mask;
2732} 3461}
2733#endif 3462#endif
2734 3463
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
2745 3474
2746int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3475int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2747{ 3476{
2748 struct irq_cfg *cfg = irq_cfg + irq; 3477 struct irq_cfg *cfg;
2749 int err; 3478 int err;
2750 cpumask_t tmp; 3479 cpumask_t tmp;
2751 3480
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2755 struct ht_irq_msg msg; 3484 struct ht_irq_msg msg;
2756 unsigned dest; 3485 unsigned dest;
2757 3486
3487 cfg = irq_cfg(irq);
2758 cpus_and(tmp, cfg->domain, tmp); 3488 cpus_and(tmp, cfg->domain, tmp);
2759 dest = cpu_mask_to_apicid(tmp); 3489 dest = cpu_mask_to_apicid(tmp);
2760 3490
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2777 3507
2778 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3508 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2779 handle_edge_irq, "edge"); 3509 handle_edge_irq, "edge");
3510
3511 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
2780 } 3512 }
2781 return err; 3513 return err;
2782} 3514}
2783#endif /* CONFIG_HT_IRQ */ 3515#endif /* CONFIG_HT_IRQ */
2784 3516
3517#ifdef CONFIG_X86_64
3518/*
3519 * Re-target the irq to the specified CPU and enable the specified MMR located
3520 * on the specified blade to allow the sending of MSIs to the specified CPU.
3521 */
3522int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3523 unsigned long mmr_offset)
3524{
3525 const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
3526 struct irq_cfg *cfg;
3527 int mmr_pnode;
3528 unsigned long mmr_value;
3529 struct uv_IO_APIC_route_entry *entry;
3530 unsigned long flags;
3531 int err;
3532
3533 err = assign_irq_vector(irq, *eligible_cpu);
3534 if (err != 0)
3535 return err;
3536
3537 spin_lock_irqsave(&vector_lock, flags);
3538 set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
3539 irq_name);
3540 spin_unlock_irqrestore(&vector_lock, flags);
3541
3542 cfg = irq_cfg(irq);
3543
3544 mmr_value = 0;
3545 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3546 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3547
3548 entry->vector = cfg->vector;
3549 entry->delivery_mode = INT_DELIVERY_MODE;
3550 entry->dest_mode = INT_DEST_MODE;
3551 entry->polarity = 0;
3552 entry->trigger = 0;
3553 entry->mask = 0;
3554 entry->dest = cpu_mask_to_apicid(*eligible_cpu);
3555
3556 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3557 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3558
3559 return irq;
3560}
3561
3562/*
3563 * Disable the specified MMR located on the specified blade so that MSIs are
3564 * longer allowed to be sent.
3565 */
3566void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
3567{
3568 unsigned long mmr_value;
3569 struct uv_IO_APIC_route_entry *entry;
3570 int mmr_pnode;
3571
3572 mmr_value = 0;
3573 entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
3574 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3575
3576 entry->mask = 1;
3577
3578 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3579 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
3580}
3581#endif /* CONFIG_X86_64 */
3582
3583int __init io_apic_get_redir_entries (int ioapic)
3584{
3585 union IO_APIC_reg_01 reg_01;
3586 unsigned long flags;
3587
3588 spin_lock_irqsave(&ioapic_lock, flags);
3589 reg_01.raw = io_apic_read(ioapic, 1);
3590 spin_unlock_irqrestore(&ioapic_lock, flags);
3591
3592 return reg_01.bits.entries;
3593}
3594
3595int __init probe_nr_irqs(void)
3596{
3597 int idx;
3598 int nr = 0;
3599#ifndef CONFIG_XEN
3600 int nr_min = 32;
3601#else
3602 int nr_min = NR_IRQS;
3603#endif
3604
3605 for (idx = 0; idx < nr_ioapics; idx++)
3606 nr += io_apic_get_redir_entries(idx) + 1;
3607
3608 /* double it for hotplug and msi and nmi */
3609 nr <<= 1;
3610
3611 /* something wrong ? */
3612 if (nr < nr_min)
3613 nr = nr_min;
3614
3615 return nr;
3616}
3617
2785/* -------------------------------------------------------------------------- 3618/* --------------------------------------------------------------------------
2786 ACPI-based IOAPIC Configuration 3619 ACPI-based IOAPIC Configuration
2787 -------------------------------------------------------------------------- */ 3620 -------------------------------------------------------------------------- */
2788 3621
2789#ifdef CONFIG_ACPI 3622#ifdef CONFIG_ACPI
2790 3623
2791#define IO_APIC_MAX_ID 0xFE 3624#ifdef CONFIG_X86_32
3625int __init io_apic_get_unique_id(int ioapic, int apic_id)
3626{
3627 union IO_APIC_reg_00 reg_00;
3628 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
3629 physid_mask_t tmp;
3630 unsigned long flags;
3631 int i = 0;
2792 3632
2793int __init io_apic_get_redir_entries (int ioapic) 3633 /*
3634 * The P4 platform supports up to 256 APIC IDs on two separate APIC
3635 * buses (one for LAPICs, one for IOAPICs), where predecessors only
3636 * supports up to 16 on one shared APIC bus.
3637 *
3638 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
3639 * advantage of new APIC bus architecture.
3640 */
3641
3642 if (physids_empty(apic_id_map))
3643 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
3644
3645 spin_lock_irqsave(&ioapic_lock, flags);
3646 reg_00.raw = io_apic_read(ioapic, 0);
3647 spin_unlock_irqrestore(&ioapic_lock, flags);
3648
3649 if (apic_id >= get_physical_broadcast()) {
3650 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
3651 "%d\n", ioapic, apic_id, reg_00.bits.ID);
3652 apic_id = reg_00.bits.ID;
3653 }
3654
3655 /*
3656 * Every APIC in a system must have a unique ID or we get lots of nice
3657 * 'stuck on smp_invalidate_needed IPI wait' messages.
3658 */
3659 if (check_apicid_used(apic_id_map, apic_id)) {
3660
3661 for (i = 0; i < get_physical_broadcast(); i++) {
3662 if (!check_apicid_used(apic_id_map, i))
3663 break;
3664 }
3665
3666 if (i == get_physical_broadcast())
3667 panic("Max apic_id exceeded!\n");
3668
3669 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
3670 "trying %d\n", ioapic, apic_id, i);
3671
3672 apic_id = i;
3673 }
3674
3675 tmp = apicid_to_cpu_present(apic_id);
3676 physids_or(apic_id_map, apic_id_map, tmp);
3677
3678 if (reg_00.bits.ID != apic_id) {
3679 reg_00.bits.ID = apic_id;
3680
3681 spin_lock_irqsave(&ioapic_lock, flags);
3682 io_apic_write(ioapic, 0, reg_00.raw);
3683 reg_00.raw = io_apic_read(ioapic, 0);
3684 spin_unlock_irqrestore(&ioapic_lock, flags);
3685
3686 /* Sanity check */
3687 if (reg_00.bits.ID != apic_id) {
3688 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
3689 return -1;
3690 }
3691 }
3692
3693 apic_printk(APIC_VERBOSE, KERN_INFO
3694 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
3695
3696 return apic_id;
3697}
3698
3699int __init io_apic_get_version(int ioapic)
2794{ 3700{
2795 union IO_APIC_reg_01 reg_01; 3701 union IO_APIC_reg_01 reg_01;
2796 unsigned long flags; 3702 unsigned long flags;
@@ -2799,9 +3705,9 @@ int __init io_apic_get_redir_entries (int ioapic)
2799 reg_01.raw = io_apic_read(ioapic, 1); 3705 reg_01.raw = io_apic_read(ioapic, 1);
2800 spin_unlock_irqrestore(&ioapic_lock, flags); 3706 spin_unlock_irqrestore(&ioapic_lock, flags);
2801 3707
2802 return reg_01.bits.entries; 3708 return reg_01.bits.version;
2803} 3709}
2804 3710#endif
2805 3711
2806int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) 3712int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
2807{ 3713{
@@ -2853,6 +3759,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2853void __init setup_ioapic_dest(void) 3759void __init setup_ioapic_dest(void)
2854{ 3760{
2855 int pin, ioapic, irq, irq_entry; 3761 int pin, ioapic, irq, irq_entry;
3762 struct irq_cfg *cfg;
2856 3763
2857 if (skip_ioapic_setup == 1) 3764 if (skip_ioapic_setup == 1)
2858 return; 3765 return;
@@ -2868,7 +3775,8 @@ void __init setup_ioapic_dest(void)
2868 * when you have too many devices, because at that time only boot 3775 * when you have too many devices, because at that time only boot
2869 * cpu is online. 3776 * cpu is online.
2870 */ 3777 */
2871 if (!irq_cfg[irq].vector) 3778 cfg = irq_cfg(irq);
3779 if (!cfg->vector)
2872 setup_IO_APIC_irq(ioapic, pin, irq, 3780 setup_IO_APIC_irq(ioapic, pin, irq,
2873 irq_trigger(irq_entry), 3781 irq_trigger(irq_entry),
2874 irq_polarity(irq_entry)); 3782 irq_polarity(irq_entry));
@@ -2926,18 +3834,33 @@ void __init ioapic_init_mappings(void)
2926 struct resource *ioapic_res; 3834 struct resource *ioapic_res;
2927 int i; 3835 int i;
2928 3836
3837 irq_2_pin_init();
2929 ioapic_res = ioapic_setup_resources(); 3838 ioapic_res = ioapic_setup_resources();
2930 for (i = 0; i < nr_ioapics; i++) { 3839 for (i = 0; i < nr_ioapics; i++) {
2931 if (smp_found_config) { 3840 if (smp_found_config) {
2932 ioapic_phys = mp_ioapics[i].mp_apicaddr; 3841 ioapic_phys = mp_ioapics[i].mp_apicaddr;
3842#ifdef CONFIG_X86_32
3843 if (!ioapic_phys) {
3844 printk(KERN_ERR
3845 "WARNING: bogus zero IO-APIC "
3846 "address found in MPTABLE, "
3847 "disabling IO/APIC support!\n");
3848 smp_found_config = 0;
3849 skip_ioapic_setup = 1;
3850 goto fake_ioapic_page;
3851 }
3852#endif
2933 } else { 3853 } else {
3854#ifdef CONFIG_X86_32
3855fake_ioapic_page:
3856#endif
2934 ioapic_phys = (unsigned long) 3857 ioapic_phys = (unsigned long)
2935 alloc_bootmem_pages(PAGE_SIZE); 3858 alloc_bootmem_pages(PAGE_SIZE);
2936 ioapic_phys = __pa(ioapic_phys); 3859 ioapic_phys = __pa(ioapic_phys);
2937 } 3860 }
2938 set_fixmap_nocache(idx, ioapic_phys); 3861 set_fixmap_nocache(idx, ioapic_phys);
2939 apic_printk(APIC_VERBOSE, 3862 apic_printk(APIC_VERBOSE,
2940 "mapped IOAPIC to %016lx (%016lx)\n", 3863 "mapped IOAPIC to %08lx (%08lx)\n",
2941 __fix_to_virt(idx), ioapic_phys); 3864 __fix_to_virt(idx), ioapic_phys);
2942 idx++; 3865 idx++;
2943 3866
@@ -2971,4 +3894,3 @@ static int __init ioapic_insert_resources(void)
2971/* Insert the IO APIC resources after PCI initialization has occured to handle 3894/* Insert the IO APIC resources after PCI initialization has occured to handle
2972 * IO APICS that are mapped in on a BAR in PCI space. */ 3895 * IO APICS that are mapped in on a BAR in PCI space. */
2973late_initcall(ioapic_insert_resources); 3896late_initcall(ioapic_insert_resources);
2974
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
deleted file mode 100644
index e710289f673e..000000000000
--- a/arch/x86/kernel/io_apic_32.c
+++ /dev/null
@@ -1,2908 +0,0 @@
1/*
2 * Intel IO-APIC support for multi-Pentium hosts.
3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
5 *
6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently!
8 *
9 * (c) 1999, Multiple IO-APIC support, developed by
10 * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
11 * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
12 * further tested and cleaned up by Zach Brown <zab@redhat.com>
13 * and Ingo Molnar <mingo@redhat.com>
14 *
15 * Fixes
16 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
17 * thanks to Eric Gilmore
18 * and Rolf G. Tews
19 * for testing these extensively
20 * Paul Diefenbaugh : Added full ACPI support
21 */
22
23#include <linux/mm.h>
24#include <linux/interrupt.h>
25#include <linux/init.h>
26#include <linux/delay.h>
27#include <linux/sched.h>
28#include <linux/bootmem.h>
29#include <linux/mc146818rtc.h>
30#include <linux/compiler.h>
31#include <linux/acpi.h>
32#include <linux/module.h>
33#include <linux/sysdev.h>
34#include <linux/pci.h>
35#include <linux/msi.h>
36#include <linux/htirq.h>
37#include <linux/freezer.h>
38#include <linux/kthread.h>
39#include <linux/jiffies.h> /* time_after() */
40
41#include <asm/io.h>
42#include <asm/smp.h>
43#include <asm/desc.h>
44#include <asm/timer.h>
45#include <asm/i8259.h>
46#include <asm/nmi.h>
47#include <asm/msidef.h>
48#include <asm/hypertransport.h>
49#include <asm/setup.h>
50
51#include <mach_apic.h>
52#include <mach_apicdef.h>
53
54#define __apicdebuginit(type) static type __init
55
56int (*ioapic_renumber_irq)(int ioapic, int irq);
57atomic_t irq_mis_count;
58
59/* Where if anywhere is the i8259 connect in external int mode */
60static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
61
62static DEFINE_SPINLOCK(ioapic_lock);
63DEFINE_SPINLOCK(vector_lock);
64
65int timer_through_8259 __initdata;
66
67/*
68 * Is the SiS APIC rmw bug present ?
69 * -1 = don't know, 0 = no, 1 = yes
70 */
71int sis_apic_bug = -1;
72
73/*
74 * # of IRQ routing registers
75 */
76int nr_ioapic_registers[MAX_IO_APICS];
77
78/* I/O APIC entries */
79struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
80int nr_ioapics;
81
82/* MP IRQ source entries */
83struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
84
85/* # of MP IRQ source entries */
86int mp_irq_entries;
87
88#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
89int mp_bus_id_to_type[MAX_MP_BUSSES];
90#endif
91
92DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
93
94static int disable_timer_pin_1 __initdata;
95
96/*
97 * Rough estimation of how many shared IRQs there are, can
98 * be changed anytime.
99 */
100#define MAX_PLUS_SHARED_IRQS NR_IRQS
101#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
102
103/*
104 * This is performance-critical, we want to do it O(1)
105 *
106 * the indexing order of this array favors 1:1 mappings
107 * between pins and IRQs.
108 */
109
110static struct irq_pin_list {
111 int apic, pin, next;
112} irq_2_pin[PIN_MAP_SIZE];
113
114struct io_apic {
115 unsigned int index;
116 unsigned int unused[3];
117 unsigned int data;
118};
119
120static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
121{
122 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
123 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
124}
125
126static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
127{
128 struct io_apic __iomem *io_apic = io_apic_base(apic);
129 writel(reg, &io_apic->index);
130 return readl(&io_apic->data);
131}
132
133static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
134{
135 struct io_apic __iomem *io_apic = io_apic_base(apic);
136 writel(reg, &io_apic->index);
137 writel(value, &io_apic->data);
138}
139
140/*
141 * Re-write a value: to be used for read-modify-write
142 * cycles where the read already set up the index register.
143 *
144 * Older SiS APIC requires we rewrite the index register
145 */
146static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
147{
148 volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
149 if (sis_apic_bug)
150 writel(reg, &io_apic->index);
151 writel(value, &io_apic->data);
152}
153
154union entry_union {
155 struct { u32 w1, w2; };
156 struct IO_APIC_route_entry entry;
157};
158
159static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
160{
161 union entry_union eu;
162 unsigned long flags;
163 spin_lock_irqsave(&ioapic_lock, flags);
164 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
165 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
166 spin_unlock_irqrestore(&ioapic_lock, flags);
167 return eu.entry;
168}
169
170/*
171 * When we write a new IO APIC routing entry, we need to write the high
172 * word first! If the mask bit in the low word is clear, we will enable
173 * the interrupt, and we need to make sure the entry is fully populated
174 * before that happens.
175 */
176static void
177__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
178{
179 union entry_union eu;
180 eu.entry = e;
181 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
182 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
183}
184
185static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
186{
187 unsigned long flags;
188 spin_lock_irqsave(&ioapic_lock, flags);
189 __ioapic_write_entry(apic, pin, e);
190 spin_unlock_irqrestore(&ioapic_lock, flags);
191}
192
193/*
194 * When we mask an IO APIC routing entry, we need to write the low
195 * word first, in order to set the mask bit before we change the
196 * high bits!
197 */
198static void ioapic_mask_entry(int apic, int pin)
199{
200 unsigned long flags;
201 union entry_union eu = { .entry.mask = 1 };
202
203 spin_lock_irqsave(&ioapic_lock, flags);
204 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
205 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
206 spin_unlock_irqrestore(&ioapic_lock, flags);
207}
208
209/*
210 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
211 * shared ISA-space IRQs, so we have to support them. We are super
212 * fast in the common case, and fast for shared ISA-space IRQs.
213 */
214static void add_pin_to_irq(unsigned int irq, int apic, int pin)
215{
216 static int first_free_entry = NR_IRQS;
217 struct irq_pin_list *entry = irq_2_pin + irq;
218
219 while (entry->next)
220 entry = irq_2_pin + entry->next;
221
222 if (entry->pin != -1) {
223 entry->next = first_free_entry;
224 entry = irq_2_pin + entry->next;
225 if (++first_free_entry >= PIN_MAP_SIZE)
226 panic("io_apic.c: whoops");
227 }
228 entry->apic = apic;
229 entry->pin = pin;
230}
231
232/*
233 * Reroute an IRQ to a different pin.
234 */
235static void __init replace_pin_at_irq(unsigned int irq,
236 int oldapic, int oldpin,
237 int newapic, int newpin)
238{
239 struct irq_pin_list *entry = irq_2_pin + irq;
240
241 while (1) {
242 if (entry->apic == oldapic && entry->pin == oldpin) {
243 entry->apic = newapic;
244 entry->pin = newpin;
245 }
246 if (!entry->next)
247 break;
248 entry = irq_2_pin + entry->next;
249 }
250}
251
252static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
253{
254 struct irq_pin_list *entry = irq_2_pin + irq;
255 unsigned int pin, reg;
256
257 for (;;) {
258 pin = entry->pin;
259 if (pin == -1)
260 break;
261 reg = io_apic_read(entry->apic, 0x10 + pin*2);
262 reg &= ~disable;
263 reg |= enable;
264 io_apic_modify(entry->apic, 0x10 + pin*2, reg);
265 if (!entry->next)
266 break;
267 entry = irq_2_pin + entry->next;
268 }
269}
270
271/* mask = 1 */
272static void __mask_IO_APIC_irq(unsigned int irq)
273{
274 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
275}
276
277/* mask = 0 */
278static void __unmask_IO_APIC_irq(unsigned int irq)
279{
280 __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
281}
282
283/* mask = 1, trigger = 0 */
284static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
285{
286 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
287 IO_APIC_REDIR_LEVEL_TRIGGER);
288}
289
290/* mask = 0, trigger = 1 */
291static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
292{
293 __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
294 IO_APIC_REDIR_MASKED);
295}
296
297static void mask_IO_APIC_irq(unsigned int irq)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&ioapic_lock, flags);
302 __mask_IO_APIC_irq(irq);
303 spin_unlock_irqrestore(&ioapic_lock, flags);
304}
305
306static void unmask_IO_APIC_irq(unsigned int irq)
307{
308 unsigned long flags;
309
310 spin_lock_irqsave(&ioapic_lock, flags);
311 __unmask_IO_APIC_irq(irq);
312 spin_unlock_irqrestore(&ioapic_lock, flags);
313}
314
315static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
316{
317 struct IO_APIC_route_entry entry;
318
319 /* Check delivery_mode to be sure we're not clearing an SMI pin */
320 entry = ioapic_read_entry(apic, pin);
321 if (entry.delivery_mode == dest_SMI)
322 return;
323
324 /*
325 * Disable it in the IO-APIC irq-routing table:
326 */
327 ioapic_mask_entry(apic, pin);
328}
329
330static void clear_IO_APIC(void)
331{
332 int apic, pin;
333
334 for (apic = 0; apic < nr_ioapics; apic++)
335 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
336 clear_IO_APIC_pin(apic, pin);
337}
338
339#ifdef CONFIG_SMP
340static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
341{
342 unsigned long flags;
343 int pin;
344 struct irq_pin_list *entry = irq_2_pin + irq;
345 unsigned int apicid_value;
346 cpumask_t tmp;
347
348 cpus_and(tmp, cpumask, cpu_online_map);
349 if (cpus_empty(tmp))
350 tmp = TARGET_CPUS;
351
352 cpus_and(cpumask, tmp, CPU_MASK_ALL);
353
354 apicid_value = cpu_mask_to_apicid(cpumask);
355 /* Prepare to do the io_apic_write */
356 apicid_value = apicid_value << 24;
357 spin_lock_irqsave(&ioapic_lock, flags);
358 for (;;) {
359 pin = entry->pin;
360 if (pin == -1)
361 break;
362 io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
363 if (!entry->next)
364 break;
365 entry = irq_2_pin + entry->next;
366 }
367 irq_desc[irq].affinity = cpumask;
368 spin_unlock_irqrestore(&ioapic_lock, flags);
369}
370
371#if defined(CONFIG_IRQBALANCE)
372# include <asm/processor.h> /* kernel_thread() */
373# include <linux/kernel_stat.h> /* kstat */
374# include <linux/slab.h> /* kmalloc() */
375# include <linux/timer.h>
376
377#define IRQBALANCE_CHECK_ARCH -999
378#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
379#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
380#define BALANCED_IRQ_MORE_DELTA (HZ/10)
381#define BALANCED_IRQ_LESS_DELTA (HZ)
382
383static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
384static int physical_balance __read_mostly;
385static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
386
387static struct irq_cpu_info {
388 unsigned long *last_irq;
389 unsigned long *irq_delta;
390 unsigned long irq;
391} irq_cpu_data[NR_CPUS];
392
393#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
394#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
395#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
396
397#define IDLE_ENOUGH(cpu,now) \
398 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
399
400#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
401
402#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
403
404static cpumask_t balance_irq_affinity[NR_IRQS] = {
405 [0 ... NR_IRQS-1] = CPU_MASK_ALL
406};
407
408void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
409{
410 balance_irq_affinity[irq] = mask;
411}
412
413static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
414 unsigned long now, int direction)
415{
416 int search_idle = 1;
417 int cpu = curr_cpu;
418
419 goto inside;
420
421 do {
422 if (unlikely(cpu == curr_cpu))
423 search_idle = 0;
424inside:
425 if (direction == 1) {
426 cpu++;
427 if (cpu >= NR_CPUS)
428 cpu = 0;
429 } else {
430 cpu--;
431 if (cpu == -1)
432 cpu = NR_CPUS-1;
433 }
434 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
435 (search_idle && !IDLE_ENOUGH(cpu, now)));
436
437 return cpu;
438}
439
440static inline void balance_irq(int cpu, int irq)
441{
442 unsigned long now = jiffies;
443 cpumask_t allowed_mask;
444 unsigned int new_cpu;
445
446 if (irqbalance_disabled)
447 return;
448
449 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
450 new_cpu = move(cpu, allowed_mask, now, 1);
451 if (cpu != new_cpu)
452 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
453}
454
455static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
456{
457 int i, j;
458
459 for_each_online_cpu(i) {
460 for (j = 0; j < NR_IRQS; j++) {
461 if (!irq_desc[j].action)
462 continue;
463 /* Is it a significant load ? */
464 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
465 useful_load_threshold)
466 continue;
467 balance_irq(i, j);
468 }
469 }
470 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
471 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
472 return;
473}
474
475static void do_irq_balance(void)
476{
477 int i, j;
478 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
479 unsigned long move_this_load = 0;
480 int max_loaded = 0, min_loaded = 0;
481 int load;
482 unsigned long useful_load_threshold = balanced_irq_interval + 10;
483 int selected_irq;
484 int tmp_loaded, first_attempt = 1;
485 unsigned long tmp_cpu_irq;
486 unsigned long imbalance = 0;
487 cpumask_t allowed_mask, target_cpu_mask, tmp;
488
489 for_each_possible_cpu(i) {
490 int package_index;
491 CPU_IRQ(i) = 0;
492 if (!cpu_online(i))
493 continue;
494 package_index = CPU_TO_PACKAGEINDEX(i);
495 for (j = 0; j < NR_IRQS; j++) {
496 unsigned long value_now, delta;
497 /* Is this an active IRQ or balancing disabled ? */
498 if (!irq_desc[j].action || irq_balancing_disabled(j))
499 continue;
500 if (package_index == i)
501 IRQ_DELTA(package_index, j) = 0;
502 /* Determine the total count per processor per IRQ */
503 value_now = (unsigned long) kstat_cpu(i).irqs[j];
504
505 /* Determine the activity per processor per IRQ */
506 delta = value_now - LAST_CPU_IRQ(i, j);
507
508 /* Update last_cpu_irq[][] for the next time */
509 LAST_CPU_IRQ(i, j) = value_now;
510
511 /* Ignore IRQs whose rate is less than the clock */
512 if (delta < useful_load_threshold)
513 continue;
514 /* update the load for the processor or package total */
515 IRQ_DELTA(package_index, j) += delta;
516
517 /* Keep track of the higher numbered sibling as well */
518 if (i != package_index)
519 CPU_IRQ(i) += delta;
520 /*
521 * We have sibling A and sibling B in the package
522 *
523 * cpu_irq[A] = load for cpu A + load for cpu B
524 * cpu_irq[B] = load for cpu B
525 */
526 CPU_IRQ(package_index) += delta;
527 }
528 }
529 /* Find the least loaded processor package */
530 for_each_online_cpu(i) {
531 if (i != CPU_TO_PACKAGEINDEX(i))
532 continue;
533 if (min_cpu_irq > CPU_IRQ(i)) {
534 min_cpu_irq = CPU_IRQ(i);
535 min_loaded = i;
536 }
537 }
538 max_cpu_irq = ULONG_MAX;
539
540tryanothercpu:
541 /*
542 * Look for heaviest loaded processor.
543 * We may come back to get the next heaviest loaded processor.
544 * Skip processors with trivial loads.
545 */
546 tmp_cpu_irq = 0;
547 tmp_loaded = -1;
548 for_each_online_cpu(i) {
549 if (i != CPU_TO_PACKAGEINDEX(i))
550 continue;
551 if (max_cpu_irq <= CPU_IRQ(i))
552 continue;
553 if (tmp_cpu_irq < CPU_IRQ(i)) {
554 tmp_cpu_irq = CPU_IRQ(i);
555 tmp_loaded = i;
556 }
557 }
558
559 if (tmp_loaded == -1) {
560 /*
561 * In the case of small number of heavy interrupt sources,
562 * loading some of the cpus too much. We use Ingo's original
563 * approach to rotate them around.
564 */
565 if (!first_attempt && imbalance >= useful_load_threshold) {
566 rotate_irqs_among_cpus(useful_load_threshold);
567 return;
568 }
569 goto not_worth_the_effort;
570 }
571
572 first_attempt = 0; /* heaviest search */
573 max_cpu_irq = tmp_cpu_irq; /* load */
574 max_loaded = tmp_loaded; /* processor */
575 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
576
577 /*
578 * if imbalance is less than approx 10% of max load, then
579 * observe diminishing returns action. - quit
580 */
581 if (imbalance < (max_cpu_irq >> 3))
582 goto not_worth_the_effort;
583
584tryanotherirq:
585 /* if we select an IRQ to move that can't go where we want, then
586 * see if there is another one to try.
587 */
588 move_this_load = 0;
589 selected_irq = -1;
590 for (j = 0; j < NR_IRQS; j++) {
591 /* Is this an active IRQ? */
592 if (!irq_desc[j].action)
593 continue;
594 if (imbalance <= IRQ_DELTA(max_loaded, j))
595 continue;
596 /* Try to find the IRQ that is closest to the imbalance
597 * without going over.
598 */
599 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
600 move_this_load = IRQ_DELTA(max_loaded, j);
601 selected_irq = j;
602 }
603 }
604 if (selected_irq == -1)
605 goto tryanothercpu;
606
607 imbalance = move_this_load;
608
609 /* For physical_balance case, we accumulated both load
610 * values in the one of the siblings cpu_irq[],
611 * to use the same code for physical and logical processors
612 * as much as possible.
613 *
614 * NOTE: the cpu_irq[] array holds the sum of the load for
615 * sibling A and sibling B in the slot for the lowest numbered
616 * sibling (A), _AND_ the load for sibling B in the slot for
617 * the higher numbered sibling.
618 *
619 * We seek the least loaded sibling by making the comparison
620 * (A+B)/2 vs B
621 */
622 load = CPU_IRQ(min_loaded) >> 1;
623 for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
624 if (load > CPU_IRQ(j)) {
625 /* This won't change cpu_sibling_map[min_loaded] */
626 load = CPU_IRQ(j);
627 min_loaded = j;
628 }
629 }
630
631 cpus_and(allowed_mask,
632 cpu_online_map,
633 balance_irq_affinity[selected_irq]);
634 target_cpu_mask = cpumask_of_cpu(min_loaded);
635 cpus_and(tmp, target_cpu_mask, allowed_mask);
636
637 if (!cpus_empty(tmp)) {
638 /* mark for change destination */
639 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
640
641 /* Since we made a change, come back sooner to
642 * check for more variation.
643 */
644 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
645 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
646 return;
647 }
648 goto tryanotherirq;
649
650not_worth_the_effort:
651 /*
652 * if we did not find an IRQ to move, then adjust the time interval
653 * upward
654 */
655 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
656 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
657 return;
658}
659
660static int balanced_irq(void *unused)
661{
662 int i;
663 unsigned long prev_balance_time = jiffies;
664 long time_remaining = balanced_irq_interval;
665
666 /* push everything to CPU 0 to give us a starting point. */
667 for (i = 0 ; i < NR_IRQS ; i++) {
668 irq_desc[i].pending_mask = cpumask_of_cpu(0);
669 set_pending_irq(i, cpumask_of_cpu(0));
670 }
671
672 set_freezable();
673 for ( ; ; ) {
674 time_remaining = schedule_timeout_interruptible(time_remaining);
675 try_to_freeze();
676 if (time_after(jiffies,
677 prev_balance_time+balanced_irq_interval)) {
678 preempt_disable();
679 do_irq_balance();
680 prev_balance_time = jiffies;
681 time_remaining = balanced_irq_interval;
682 preempt_enable();
683 }
684 }
685 return 0;
686}
687
688static int __init balanced_irq_init(void)
689{
690 int i;
691 struct cpuinfo_x86 *c;
692 cpumask_t tmp;
693
694 cpus_shift_right(tmp, cpu_online_map, 2);
695 c = &boot_cpu_data;
696 /* When not overwritten by the command line ask subarchitecture. */
697 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
698 irqbalance_disabled = NO_BALANCE_IRQ;
699 if (irqbalance_disabled)
700 return 0;
701
702 /* disable irqbalance completely if there is only one processor online */
703 if (num_online_cpus() < 2) {
704 irqbalance_disabled = 1;
705 return 0;
706 }
707 /*
708 * Enable physical balance only if more than 1 physical processor
709 * is present
710 */
711 if (smp_num_siblings > 1 && !cpus_empty(tmp))
712 physical_balance = 1;
713
714 for_each_online_cpu(i) {
715 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
716 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
717 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
718 printk(KERN_ERR "balanced_irq_init: out of memory");
719 goto failed;
720 }
721 }
722
723 printk(KERN_INFO "Starting balanced_irq\n");
724 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
725 return 0;
726 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
727failed:
728 for_each_possible_cpu(i) {
729 kfree(irq_cpu_data[i].irq_delta);
730 irq_cpu_data[i].irq_delta = NULL;
731 kfree(irq_cpu_data[i].last_irq);
732 irq_cpu_data[i].last_irq = NULL;
733 }
734 return 0;
735}
736
737int __devinit irqbalance_disable(char *str)
738{
739 irqbalance_disabled = 1;
740 return 1;
741}
742
743__setup("noirqbalance", irqbalance_disable);
744
745late_initcall(balanced_irq_init);
746#endif /* CONFIG_IRQBALANCE */
747#endif /* CONFIG_SMP */
748
749#ifndef CONFIG_SMP
750void send_IPI_self(int vector)
751{
752 unsigned int cfg;
753
754 /*
755 * Wait for idle.
756 */
757 apic_wait_icr_idle();
758 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
759 /*
760 * Send the IPI. The write to APIC_ICR fires this off.
761 */
762 apic_write(APIC_ICR, cfg);
763}
764#endif /* !CONFIG_SMP */
765
766
767/*
768 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
769 * specific CPU-side IRQs.
770 */
771
772#define MAX_PIRQS 8
773static int pirq_entries [MAX_PIRQS];
774static int pirqs_enabled;
775int skip_ioapic_setup;
776
777static int __init ioapic_pirq_setup(char *str)
778{
779 int i, max;
780 int ints[MAX_PIRQS+1];
781
782 get_options(str, ARRAY_SIZE(ints), ints);
783
784 for (i = 0; i < MAX_PIRQS; i++)
785 pirq_entries[i] = -1;
786
787 pirqs_enabled = 1;
788 apic_printk(APIC_VERBOSE, KERN_INFO
789 "PIRQ redirection, working around broken MP-BIOS.\n");
790 max = MAX_PIRQS;
791 if (ints[0] < MAX_PIRQS)
792 max = ints[0];
793
794 for (i = 0; i < max; i++) {
795 apic_printk(APIC_VERBOSE, KERN_DEBUG
796 "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
797 /*
798 * PIRQs are mapped upside down, usually.
799 */
800 pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
801 }
802 return 1;
803}
804
805__setup("pirq=", ioapic_pirq_setup);
806
807/*
808 * Find the IRQ entry number of a certain pin.
809 */
810static int find_irq_entry(int apic, int pin, int type)
811{
812 int i;
813
814 for (i = 0; i < mp_irq_entries; i++)
815 if (mp_irqs[i].mp_irqtype == type &&
816 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
817 mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
818 mp_irqs[i].mp_dstirq == pin)
819 return i;
820
821 return -1;
822}
823
824/*
825 * Find the pin to which IRQ[irq] (ISA) is connected
826 */
827static int __init find_isa_irq_pin(int irq, int type)
828{
829 int i;
830
831 for (i = 0; i < mp_irq_entries; i++) {
832 int lbus = mp_irqs[i].mp_srcbus;
833
834 if (test_bit(lbus, mp_bus_not_pci) &&
835 (mp_irqs[i].mp_irqtype == type) &&
836 (mp_irqs[i].mp_srcbusirq == irq))
837
838 return mp_irqs[i].mp_dstirq;
839 }
840 return -1;
841}
842
843static int __init find_isa_irq_apic(int irq, int type)
844{
845 int i;
846
847 for (i = 0; i < mp_irq_entries; i++) {
848 int lbus = mp_irqs[i].mp_srcbus;
849
850 if (test_bit(lbus, mp_bus_not_pci) &&
851 (mp_irqs[i].mp_irqtype == type) &&
852 (mp_irqs[i].mp_srcbusirq == irq))
853 break;
854 }
855 if (i < mp_irq_entries) {
856 int apic;
857 for (apic = 0; apic < nr_ioapics; apic++) {
858 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
859 return apic;
860 }
861 }
862
863 return -1;
864}
865
866/*
867 * Find a specific PCI IRQ entry.
868 * Not an __init, possibly needed by modules
869 */
870static int pin_2_irq(int idx, int apic, int pin);
871
872int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
873{
874 int apic, i, best_guess = -1;
875
876 apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
877 "slot:%d, pin:%d.\n", bus, slot, pin);
878 if (test_bit(bus, mp_bus_not_pci)) {
879 printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
880 return -1;
881 }
882 for (i = 0; i < mp_irq_entries; i++) {
883 int lbus = mp_irqs[i].mp_srcbus;
884
885 for (apic = 0; apic < nr_ioapics; apic++)
886 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
887 mp_irqs[i].mp_dstapic == MP_APIC_ALL)
888 break;
889
890 if (!test_bit(lbus, mp_bus_not_pci) &&
891 !mp_irqs[i].mp_irqtype &&
892 (bus == lbus) &&
893 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
894 int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
895
896 if (!(apic || IO_APIC_IRQ(irq)))
897 continue;
898
899 if (pin == (mp_irqs[i].mp_srcbusirq & 3))
900 return irq;
901 /*
902 * Use the first all-but-pin matching entry as a
903 * best-guess fuzzy result for broken mptables.
904 */
905 if (best_guess < 0)
906 best_guess = irq;
907 }
908 }
909 return best_guess;
910}
911EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
912
913/*
914 * This function currently is only a helper for the i386 smp boot process where
915 * we need to reprogram the ioredtbls to cater for the cpus which have come online
916 * so mask in all cases should simply be TARGET_CPUS
917 */
918#ifdef CONFIG_SMP
919void __init setup_ioapic_dest(void)
920{
921 int pin, ioapic, irq, irq_entry;
922
923 if (skip_ioapic_setup == 1)
924 return;
925
926 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
927 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
928 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
929 if (irq_entry == -1)
930 continue;
931 irq = pin_2_irq(irq_entry, ioapic, pin);
932 set_ioapic_affinity_irq(irq, TARGET_CPUS);
933 }
934
935 }
936}
937#endif
938
939#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
940/*
941 * EISA Edge/Level control register, ELCR
942 */
943static int EISA_ELCR(unsigned int irq)
944{
945 if (irq < 16) {
946 unsigned int port = 0x4d0 + (irq >> 3);
947 return (inb(port) >> (irq & 7)) & 1;
948 }
949 apic_printk(APIC_VERBOSE, KERN_INFO
950 "Broken MPtable reports ISA irq %d\n", irq);
951 return 0;
952}
953#endif
954
955/* ISA interrupts are always polarity zero edge triggered,
956 * when listed as conforming in the MP table. */
957
958#define default_ISA_trigger(idx) (0)
959#define default_ISA_polarity(idx) (0)
960
961/* EISA interrupts are always polarity zero and can be edge or level
962 * trigger depending on the ELCR value. If an interrupt is listed as
963 * EISA conforming in the MP table, that means its trigger type must
964 * be read in from the ELCR */
965
966#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
967#define default_EISA_polarity(idx) default_ISA_polarity(idx)
968
969/* PCI interrupts are always polarity one level triggered,
970 * when listed as conforming in the MP table. */
971
972#define default_PCI_trigger(idx) (1)
973#define default_PCI_polarity(idx) (1)
974
975/* MCA interrupts are always polarity zero level triggered,
976 * when listed as conforming in the MP table. */
977
978#define default_MCA_trigger(idx) (1)
979#define default_MCA_polarity(idx) default_ISA_polarity(idx)
980
981static int MPBIOS_polarity(int idx)
982{
983 int bus = mp_irqs[idx].mp_srcbus;
984 int polarity;
985
986 /*
987 * Determine IRQ line polarity (high active or low active):
988 */
989 switch (mp_irqs[idx].mp_irqflag & 3) {
990 case 0: /* conforms, ie. bus-type dependent polarity */
991 {
992 polarity = test_bit(bus, mp_bus_not_pci)?
993 default_ISA_polarity(idx):
994 default_PCI_polarity(idx);
995 break;
996 }
997 case 1: /* high active */
998 {
999 polarity = 0;
1000 break;
1001 }
1002 case 2: /* reserved */
1003 {
1004 printk(KERN_WARNING "broken BIOS!!\n");
1005 polarity = 1;
1006 break;
1007 }
1008 case 3: /* low active */
1009 {
1010 polarity = 1;
1011 break;
1012 }
1013 default: /* invalid */
1014 {
1015 printk(KERN_WARNING "broken BIOS!!\n");
1016 polarity = 1;
1017 break;
1018 }
1019 }
1020 return polarity;
1021}
1022
1023static int MPBIOS_trigger(int idx)
1024{
1025 int bus = mp_irqs[idx].mp_srcbus;
1026 int trigger;
1027
1028 /*
1029 * Determine IRQ trigger mode (edge or level sensitive):
1030 */
1031 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
1032 case 0: /* conforms, ie. bus-type dependent */
1033 {
1034 trigger = test_bit(bus, mp_bus_not_pci)?
1035 default_ISA_trigger(idx):
1036 default_PCI_trigger(idx);
1037#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
1038 switch (mp_bus_id_to_type[bus]) {
1039 case MP_BUS_ISA: /* ISA pin */
1040 {
1041 /* set before the switch */
1042 break;
1043 }
1044 case MP_BUS_EISA: /* EISA pin */
1045 {
1046 trigger = default_EISA_trigger(idx);
1047 break;
1048 }
1049 case MP_BUS_PCI: /* PCI pin */
1050 {
1051 /* set before the switch */
1052 break;
1053 }
1054 case MP_BUS_MCA: /* MCA pin */
1055 {
1056 trigger = default_MCA_trigger(idx);
1057 break;
1058 }
1059 default:
1060 {
1061 printk(KERN_WARNING "broken BIOS!!\n");
1062 trigger = 1;
1063 break;
1064 }
1065 }
1066#endif
1067 break;
1068 }
1069 case 1: /* edge */
1070 {
1071 trigger = 0;
1072 break;
1073 }
1074 case 2: /* reserved */
1075 {
1076 printk(KERN_WARNING "broken BIOS!!\n");
1077 trigger = 1;
1078 break;
1079 }
1080 case 3: /* level */
1081 {
1082 trigger = 1;
1083 break;
1084 }
1085 default: /* invalid */
1086 {
1087 printk(KERN_WARNING "broken BIOS!!\n");
1088 trigger = 0;
1089 break;
1090 }
1091 }
1092 return trigger;
1093}
1094
1095static inline int irq_polarity(int idx)
1096{
1097 return MPBIOS_polarity(idx);
1098}
1099
1100static inline int irq_trigger(int idx)
1101{
1102 return MPBIOS_trigger(idx);
1103}
1104
1105static int pin_2_irq(int idx, int apic, int pin)
1106{
1107 int irq, i;
1108 int bus = mp_irqs[idx].mp_srcbus;
1109
1110 /*
1111 * Debugging check, we are in big trouble if this message pops up!
1112 */
1113 if (mp_irqs[idx].mp_dstirq != pin)
1114 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1115
1116 if (test_bit(bus, mp_bus_not_pci))
1117 irq = mp_irqs[idx].mp_srcbusirq;
1118 else {
1119 /*
1120 * PCI IRQs are mapped in order
1121 */
1122 i = irq = 0;
1123 while (i < apic)
1124 irq += nr_ioapic_registers[i++];
1125 irq += pin;
1126
1127 /*
1128 * For MPS mode, so far only needed by ES7000 platform
1129 */
1130 if (ioapic_renumber_irq)
1131 irq = ioapic_renumber_irq(apic, irq);
1132 }
1133
1134 /*
1135 * PCI IRQ command line redirection. Yes, limits are hardcoded.
1136 */
1137 if ((pin >= 16) && (pin <= 23)) {
1138 if (pirq_entries[pin-16] != -1) {
1139 if (!pirq_entries[pin-16]) {
1140 apic_printk(APIC_VERBOSE, KERN_DEBUG
1141 "disabling PIRQ%d\n", pin-16);
1142 } else {
1143 irq = pirq_entries[pin-16];
1144 apic_printk(APIC_VERBOSE, KERN_DEBUG
1145 "using PIRQ%d -> IRQ %d\n",
1146 pin-16, irq);
1147 }
1148 }
1149 }
1150 return irq;
1151}
1152
1153static inline int IO_APIC_irq_trigger(int irq)
1154{
1155 int apic, idx, pin;
1156
1157 for (apic = 0; apic < nr_ioapics; apic++) {
1158 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1159 idx = find_irq_entry(apic, pin, mp_INT);
1160 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1161 return irq_trigger(idx);
1162 }
1163 }
1164 /*
1165 * nonexistent IRQs are edge default
1166 */
1167 return 0;
1168}
1169
1170/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
1171static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
1172
1173static int __assign_irq_vector(int irq)
1174{
1175 static int current_vector = FIRST_DEVICE_VECTOR, current_offset;
1176 int vector, offset;
1177
1178 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
1179
1180 if (irq_vector[irq] > 0)
1181 return irq_vector[irq];
1182
1183 vector = current_vector;
1184 offset = current_offset;
1185next:
1186 vector += 8;
1187 if (vector >= first_system_vector) {
1188 offset = (offset + 1) % 8;
1189 vector = FIRST_DEVICE_VECTOR + offset;
1190 }
1191 if (vector == current_vector)
1192 return -ENOSPC;
1193 if (test_and_set_bit(vector, used_vectors))
1194 goto next;
1195
1196 current_vector = vector;
1197 current_offset = offset;
1198 irq_vector[irq] = vector;
1199
1200 return vector;
1201}
1202
1203static int assign_irq_vector(int irq)
1204{
1205 unsigned long flags;
1206 int vector;
1207
1208 spin_lock_irqsave(&vector_lock, flags);
1209 vector = __assign_irq_vector(irq);
1210 spin_unlock_irqrestore(&vector_lock, flags);
1211
1212 return vector;
1213}
1214
1215static struct irq_chip ioapic_chip;
1216
1217#define IOAPIC_AUTO -1
1218#define IOAPIC_EDGE 0
1219#define IOAPIC_LEVEL 1
1220
1221static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
1222{
1223 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1224 trigger == IOAPIC_LEVEL) {
1225 irq_desc[irq].status |= IRQ_LEVEL;
1226 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1227 handle_fasteoi_irq, "fasteoi");
1228 } else {
1229 irq_desc[irq].status &= ~IRQ_LEVEL;
1230 set_irq_chip_and_handler_name(irq, &ioapic_chip,
1231 handle_edge_irq, "edge");
1232 }
1233 set_intr_gate(vector, interrupt[irq]);
1234}
1235
1236static void __init setup_IO_APIC_irqs(void)
1237{
1238 struct IO_APIC_route_entry entry;
1239 int apic, pin, idx, irq, first_notcon = 1, vector;
1240
1241 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1242
1243 for (apic = 0; apic < nr_ioapics; apic++) {
1244 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1245
1246 /*
1247 * add it to the IO-APIC irq-routing table:
1248 */
1249 memset(&entry, 0, sizeof(entry));
1250
1251 entry.delivery_mode = INT_DELIVERY_MODE;
1252 entry.dest_mode = INT_DEST_MODE;
1253 entry.mask = 0; /* enable IRQ */
1254 entry.dest.logical.logical_dest =
1255 cpu_mask_to_apicid(TARGET_CPUS);
1256
1257 idx = find_irq_entry(apic, pin, mp_INT);
1258 if (idx == -1) {
1259 if (first_notcon) {
1260 apic_printk(APIC_VERBOSE, KERN_DEBUG
1261 " IO-APIC (apicid-pin) %d-%d",
1262 mp_ioapics[apic].mp_apicid,
1263 pin);
1264 first_notcon = 0;
1265 } else
1266 apic_printk(APIC_VERBOSE, ", %d-%d",
1267 mp_ioapics[apic].mp_apicid, pin);
1268 continue;
1269 }
1270
1271 if (!first_notcon) {
1272 apic_printk(APIC_VERBOSE, " not connected.\n");
1273 first_notcon = 1;
1274 }
1275
1276 entry.trigger = irq_trigger(idx);
1277 entry.polarity = irq_polarity(idx);
1278
1279 if (irq_trigger(idx)) {
1280 entry.trigger = 1;
1281 entry.mask = 1;
1282 }
1283
1284 irq = pin_2_irq(idx, apic, pin);
1285 /*
1286 * skip adding the timer int on secondary nodes, which causes
1287 * a small but painful rift in the time-space continuum
1288 */
1289 if (multi_timer_check(apic, irq))
1290 continue;
1291 else
1292 add_pin_to_irq(irq, apic, pin);
1293
1294 if (!apic && !IO_APIC_IRQ(irq))
1295 continue;
1296
1297 if (IO_APIC_IRQ(irq)) {
1298 vector = assign_irq_vector(irq);
1299 entry.vector = vector;
1300 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
1301
1302 if (!apic && (irq < 16))
1303 disable_8259A_irq(irq);
1304 }
1305 ioapic_write_entry(apic, pin, entry);
1306 }
1307 }
1308
1309 if (!first_notcon)
1310 apic_printk(APIC_VERBOSE, " not connected.\n");
1311}
1312
1313/*
1314 * Set up the timer pin, possibly with the 8259A-master behind.
1315 */
1316static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1317 int vector)
1318{
1319 struct IO_APIC_route_entry entry;
1320
1321 memset(&entry, 0, sizeof(entry));
1322
1323 /*
1324 * We use logical delivery to get the timer IRQ
1325 * to the first CPU.
1326 */
1327 entry.dest_mode = INT_DEST_MODE;
1328 entry.mask = 1; /* mask IRQ now */
1329 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
1330 entry.delivery_mode = INT_DELIVERY_MODE;
1331 entry.polarity = 0;
1332 entry.trigger = 0;
1333 entry.vector = vector;
1334
1335 /*
1336 * The timer IRQ doesn't have to know that behind the
1337 * scene we may have a 8259A-master in AEOI mode ...
1338 */
1339 ioapic_register_intr(0, vector, IOAPIC_EDGE);
1340
1341 /*
1342 * Add it to the IO-APIC irq-routing table:
1343 */
1344 ioapic_write_entry(apic, pin, entry);
1345}
1346
1347
1348__apicdebuginit(void) print_IO_APIC(void)
1349{
1350 int apic, i;
1351 union IO_APIC_reg_00 reg_00;
1352 union IO_APIC_reg_01 reg_01;
1353 union IO_APIC_reg_02 reg_02;
1354 union IO_APIC_reg_03 reg_03;
1355 unsigned long flags;
1356
1357 if (apic_verbosity == APIC_QUIET)
1358 return;
1359
1360 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1361 for (i = 0; i < nr_ioapics; i++)
1362 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1363 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
1364
1365 /*
1366 * We are a bit conservative about what we expect. We have to
1367 * know about every hardware change ASAP.
1368 */
1369 printk(KERN_INFO "testing the IO APIC.......................\n");
1370
1371 for (apic = 0; apic < nr_ioapics; apic++) {
1372
1373 spin_lock_irqsave(&ioapic_lock, flags);
1374 reg_00.raw = io_apic_read(apic, 0);
1375 reg_01.raw = io_apic_read(apic, 1);
1376 if (reg_01.bits.version >= 0x10)
1377 reg_02.raw = io_apic_read(apic, 2);
1378 if (reg_01.bits.version >= 0x20)
1379 reg_03.raw = io_apic_read(apic, 3);
1380 spin_unlock_irqrestore(&ioapic_lock, flags);
1381
1382 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
1383 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1384 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1385 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
1386 printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
1387
1388 printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
1389 printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
1390
1391 printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
1392 printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
1393
1394 /*
1395 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
1396 * but the value of reg_02 is read as the previous read register
1397 * value, so ignore it if reg_02 == reg_01.
1398 */
1399 if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
1400 printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
1401 printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
1402 }
1403
1404 /*
1405 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
1406 * or reg_03, but the value of reg_0[23] is read as the previous read
1407 * register value, so ignore it if reg_03 == reg_0[12].
1408 */
1409 if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
1410 reg_03.raw != reg_01.raw) {
1411 printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
1412 printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
1413 }
1414
1415 printk(KERN_DEBUG ".... IRQ redirection table:\n");
1416
1417 printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
1418 " Stat Dest Deli Vect: \n");
1419
1420 for (i = 0; i <= reg_01.bits.entries; i++) {
1421 struct IO_APIC_route_entry entry;
1422
1423 entry = ioapic_read_entry(apic, i);
1424
1425 printk(KERN_DEBUG " %02x %03X %02X ",
1426 i,
1427 entry.dest.logical.logical_dest,
1428 entry.dest.physical.physical_dest
1429 );
1430
1431 printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
1432 entry.mask,
1433 entry.trigger,
1434 entry.irr,
1435 entry.polarity,
1436 entry.delivery_status,
1437 entry.dest_mode,
1438 entry.delivery_mode,
1439 entry.vector
1440 );
1441 }
1442 }
1443 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1444 for (i = 0; i < NR_IRQS; i++) {
1445 struct irq_pin_list *entry = irq_2_pin + i;
1446 if (entry->pin < 0)
1447 continue;
1448 printk(KERN_DEBUG "IRQ%d ", i);
1449 for (;;) {
1450 printk("-> %d:%d", entry->apic, entry->pin);
1451 if (!entry->next)
1452 break;
1453 entry = irq_2_pin + entry->next;
1454 }
1455 printk("\n");
1456 }
1457
1458 printk(KERN_INFO ".................................... done.\n");
1459
1460 return;
1461}
1462
1463__apicdebuginit(void) print_APIC_bitfield(int base)
1464{
1465 unsigned int v;
1466 int i, j;
1467
1468 if (apic_verbosity == APIC_QUIET)
1469 return;
1470
1471 printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
1472 for (i = 0; i < 8; i++) {
1473 v = apic_read(base + i*0x10);
1474 for (j = 0; j < 32; j++) {
1475 if (v & (1<<j))
1476 printk("1");
1477 else
1478 printk("0");
1479 }
1480 printk("\n");
1481 }
1482}
1483
1484__apicdebuginit(void) print_local_APIC(void *dummy)
1485{
1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1488
1489 if (apic_verbosity == APIC_QUIET)
1490 return;
1491
1492 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1493 smp_processor_id(), hard_smp_processor_id());
1494 v = apic_read(APIC_ID);
1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1496 GET_APIC_ID(v));
1497 v = apic_read(APIC_LVR);
1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1499 ver = GET_APIC_VERSION(v);
1500 maxlvt = lapic_get_maxlvt();
1501
1502 v = apic_read(APIC_TASKPRI);
1503 printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
1504
1505 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1506 v = apic_read(APIC_ARBPRI);
1507 printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
1508 v & APIC_ARBPRI_MASK);
1509 v = apic_read(APIC_PROCPRI);
1510 printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
1511 }
1512
1513 v = apic_read(APIC_EOI);
1514 printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
1515 v = apic_read(APIC_RRR);
1516 printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
1517 v = apic_read(APIC_LDR);
1518 printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1519 v = apic_read(APIC_DFR);
1520 printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
1521 v = apic_read(APIC_SPIV);
1522 printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
1523
1524 printk(KERN_DEBUG "... APIC ISR field:\n");
1525 print_APIC_bitfield(APIC_ISR);
1526 printk(KERN_DEBUG "... APIC TMR field:\n");
1527 print_APIC_bitfield(APIC_TMR);
1528 printk(KERN_DEBUG "... APIC IRR field:\n");
1529 print_APIC_bitfield(APIC_IRR);
1530
1531 if (APIC_INTEGRATED(ver)) { /* !82489DX */
1532 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
1533 apic_write(APIC_ESR, 0);
1534 v = apic_read(APIC_ESR);
1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1536 }
1537
1538 icr = apic_icr_read();
1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1541
1542 v = apic_read(APIC_LVTT);
1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
1544
1545 if (maxlvt > 3) { /* PC is LVT#4. */
1546 v = apic_read(APIC_LVTPC);
1547 printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
1548 }
1549 v = apic_read(APIC_LVT0);
1550 printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
1551 v = apic_read(APIC_LVT1);
1552 printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
1553
1554 if (maxlvt > 2) { /* ERR is LVT#3. */
1555 v = apic_read(APIC_LVTERR);
1556 printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
1557 }
1558
1559 v = apic_read(APIC_TMICT);
1560 printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
1561 v = apic_read(APIC_TMCCT);
1562 printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
1563 v = apic_read(APIC_TDCR);
1564 printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1565 printk("\n");
1566}
1567
1568__apicdebuginit(void) print_all_local_APICs(void)
1569{
1570 on_each_cpu(print_local_APIC, NULL, 1);
1571}
1572
1573__apicdebuginit(void) print_PIC(void)
1574{
1575 unsigned int v;
1576 unsigned long flags;
1577
1578 if (apic_verbosity == APIC_QUIET)
1579 return;
1580
1581 printk(KERN_DEBUG "\nprinting PIC contents\n");
1582
1583 spin_lock_irqsave(&i8259A_lock, flags);
1584
1585 v = inb(0xa1) << 8 | inb(0x21);
1586 printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
1587
1588 v = inb(0xa0) << 8 | inb(0x20);
1589 printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
1590
1591 outb(0x0b, 0xa0);
1592 outb(0x0b, 0x20);
1593 v = inb(0xa0) << 8 | inb(0x20);
1594 outb(0x0a, 0xa0);
1595 outb(0x0a, 0x20);
1596
1597 spin_unlock_irqrestore(&i8259A_lock, flags);
1598
1599 printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
1600
1601 v = inb(0x4d1) << 8 | inb(0x4d0);
1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1603}
1604
1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1616
1617static void __init enable_IO_APIC(void)
1618{
1619 union IO_APIC_reg_01 reg_01;
1620 int i8259_apic, i8259_pin;
1621 int i, apic;
1622 unsigned long flags;
1623
1624 for (i = 0; i < PIN_MAP_SIZE; i++) {
1625 irq_2_pin[i].pin = -1;
1626 irq_2_pin[i].next = 0;
1627 }
1628 if (!pirqs_enabled)
1629 for (i = 0; i < MAX_PIRQS; i++)
1630 pirq_entries[i] = -1;
1631
1632 /*
1633 * The number of IO-APIC IRQ registers (== #pins):
1634 */
1635 for (apic = 0; apic < nr_ioapics; apic++) {
1636 spin_lock_irqsave(&ioapic_lock, flags);
1637 reg_01.raw = io_apic_read(apic, 1);
1638 spin_unlock_irqrestore(&ioapic_lock, flags);
1639 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
1640 }
1641 for (apic = 0; apic < nr_ioapics; apic++) {
1642 int pin;
1643 /* See if any of the pins is in ExtINT mode */
1644 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1645 struct IO_APIC_route_entry entry;
1646 entry = ioapic_read_entry(apic, pin);
1647
1648
1649 /* If the interrupt line is enabled and in ExtInt mode
1650 * I have found the pin where the i8259 is connected.
1651 */
1652 if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
1653 ioapic_i8259.apic = apic;
1654 ioapic_i8259.pin = pin;
1655 goto found_i8259;
1656 }
1657 }
1658 }
1659 found_i8259:
1660 /* Look to see what if the MP table has reported the ExtINT */
1661 /* If we could not find the appropriate pin by looking at the ioapic
1662 * the i8259 probably is not connected the ioapic but give the
1663 * mptable a chance anyway.
1664 */
1665 i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
1666 i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
1667 /* Trust the MP table if nothing is setup in the hardware */
1668 if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
1669 printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
1670 ioapic_i8259.pin = i8259_pin;
1671 ioapic_i8259.apic = i8259_apic;
1672 }
1673 /* Complain if the MP table and the hardware disagree */
1674 if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
1675 (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
1676 {
1677 printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
1678 }
1679
1680 /*
1681 * Do not trust the IO-APIC being empty at bootup
1682 */
1683 clear_IO_APIC();
1684}
1685
1686/*
1687 * Not an __init, needed by the reboot code
1688 */
1689void disable_IO_APIC(void)
1690{
1691 /*
1692 * Clear the IO-APIC before rebooting:
1693 */
1694 clear_IO_APIC();
1695
1696 /*
1697 * If the i8259 is routed through an IOAPIC
1698 * Put that IOAPIC in virtual wire mode
1699 * so legacy interrupts can be delivered.
1700 */
1701 if (ioapic_i8259.pin != -1) {
1702 struct IO_APIC_route_entry entry;
1703
1704 memset(&entry, 0, sizeof(entry));
1705 entry.mask = 0; /* Enabled */
1706 entry.trigger = 0; /* Edge */
1707 entry.irr = 0;
1708 entry.polarity = 0; /* High */
1709 entry.delivery_status = 0;
1710 entry.dest_mode = 0; /* Physical */
1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1712 entry.vector = 0;
1713 entry.dest.physical.physical_dest = read_apic_id();
1714
1715 /*
1716 * Add it to the IO-APIC irq-routing table:
1717 */
1718 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1719 }
1720 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1721}
1722
1723/*
1724 * function to set the IO-APIC physical IDs based on the
1725 * values stored in the MPC table.
1726 *
1727 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
1728 */
1729
1730static void __init setup_ioapic_ids_from_mpc(void)
1731{
1732 union IO_APIC_reg_00 reg_00;
1733 physid_mask_t phys_id_present_map;
1734 int apic;
1735 int i;
1736 unsigned char old_id;
1737 unsigned long flags;
1738
1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1740 return;
1741
1742 /*
1743 * Don't check I/O APIC IDs for xAPIC systems. They have
1744 * no meaning without the serial APIC bus.
1745 */
1746 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
1747 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
1748 return;
1749 /*
1750 * This is broken; anything with a real cpu count has to
1751 * circumvent this idiocy regardless.
1752 */
1753 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
1754
1755 /*
1756 * Set the IOAPIC ID to the value stored in the MPC table.
1757 */
1758 for (apic = 0; apic < nr_ioapics; apic++) {
1759
1760 /* Read the register 0 value */
1761 spin_lock_irqsave(&ioapic_lock, flags);
1762 reg_00.raw = io_apic_read(apic, 0);
1763 spin_unlock_irqrestore(&ioapic_lock, flags);
1764
1765 old_id = mp_ioapics[apic].mp_apicid;
1766
1767 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
1768 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
1769 apic, mp_ioapics[apic].mp_apicid);
1770 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1771 reg_00.bits.ID);
1772 mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
1773 }
1774
1775 /*
1776 * Sanity check, is the ID really free? Every APIC in a
1777 * system must have a unique ID or we get lots of nice
1778 * 'stuck on smp_invalidate_needed IPI wait' messages.
1779 */
1780 if (check_apicid_used(phys_id_present_map,
1781 mp_ioapics[apic].mp_apicid)) {
1782 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
1783 apic, mp_ioapics[apic].mp_apicid);
1784 for (i = 0; i < get_physical_broadcast(); i++)
1785 if (!physid_isset(i, phys_id_present_map))
1786 break;
1787 if (i >= get_physical_broadcast())
1788 panic("Max APIC ID exceeded!\n");
1789 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
1790 i);
1791 physid_set(i, phys_id_present_map);
1792 mp_ioapics[apic].mp_apicid = i;
1793 } else {
1794 physid_mask_t tmp;
1795 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
1796 apic_printk(APIC_VERBOSE, "Setting %d in the "
1797 "phys_id_present_map\n",
1798 mp_ioapics[apic].mp_apicid);
1799 physids_or(phys_id_present_map, phys_id_present_map, tmp);
1800 }
1801
1802
1803 /*
1804 * We need to adjust the IRQ routing table
1805 * if the ID changed.
1806 */
1807 if (old_id != mp_ioapics[apic].mp_apicid)
1808 for (i = 0; i < mp_irq_entries; i++)
1809 if (mp_irqs[i].mp_dstapic == old_id)
1810 mp_irqs[i].mp_dstapic
1811 = mp_ioapics[apic].mp_apicid;
1812
1813 /*
1814 * Read the right value from the MPC table and
1815 * write it into the ID register.
1816 */
1817 apic_printk(APIC_VERBOSE, KERN_INFO
1818 "...changing IO-APIC physical APIC ID to %d ...",
1819 mp_ioapics[apic].mp_apicid);
1820
1821 reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
1822 spin_lock_irqsave(&ioapic_lock, flags);
1823 io_apic_write(apic, 0, reg_00.raw);
1824 spin_unlock_irqrestore(&ioapic_lock, flags);
1825
1826 /*
1827 * Sanity check
1828 */
1829 spin_lock_irqsave(&ioapic_lock, flags);
1830 reg_00.raw = io_apic_read(apic, 0);
1831 spin_unlock_irqrestore(&ioapic_lock, flags);
1832 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
1833 printk("could not set ID!\n");
1834 else
1835 apic_printk(APIC_VERBOSE, " ok.\n");
1836 }
1837}
1838
1839int no_timer_check __initdata;
1840
1841static int __init notimercheck(char *s)
1842{
1843 no_timer_check = 1;
1844 return 1;
1845}
1846__setup("no_timer_check", notimercheck);
1847
1848/*
1849 * There is a nasty bug in some older SMP boards, their mptable lies
1850 * about the timer IRQ. We do the following to work around the situation:
1851 *
1852 * - timer IRQ defaults to IO-APIC IRQ
1853 * - if this function detects that timer IRQs are defunct, then we fall
1854 * back to ISA timer IRQs
1855 */
1856static int __init timer_irq_works(void)
1857{
1858 unsigned long t1 = jiffies;
1859 unsigned long flags;
1860
1861 if (no_timer_check)
1862 return 1;
1863
1864 local_save_flags(flags);
1865 local_irq_enable();
1866 /* Let ten ticks pass... */
1867 mdelay((10 * 1000) / HZ);
1868 local_irq_restore(flags);
1869
1870 /*
1871 * Expect a few ticks at least, to be sure some possible
1872 * glue logic does not lock up after one or two first
1873 * ticks in a non-ExtINT mode. Also the local APIC
1874 * might have cached one ExtINT interrupt. Finally, at
1875 * least one tick may be lost due to delays.
1876 */
1877 if (time_after(jiffies, t1 + 4))
1878 return 1;
1879
1880 return 0;
1881}
1882
1883/*
1884 * In the SMP+IOAPIC case it might happen that there are an unspecified
1885 * number of pending IRQ events unhandled. These cases are very rare,
1886 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
1887 * better to do it this way as thus we do not have to be aware of
1888 * 'pending' interrupts in the IRQ path, except at this point.
1889 */
1890/*
1891 * Edge triggered needs to resend any interrupt
1892 * that was delayed but this is now handled in the device
1893 * independent code.
1894 */
1895
1896/*
1897 * Startup quirk:
1898 *
1899 * Starting up a edge-triggered IO-APIC interrupt is
1900 * nasty - we need to make sure that we get the edge.
1901 * If it is already asserted for some reason, we need
1902 * return 1 to indicate that is was pending.
1903 *
1904 * This is not complete - we should be able to fake
1905 * an edge even if it isn't on the 8259A...
1906 *
1907 * (We do this for level-triggered IRQs too - it cannot hurt.)
1908 */
1909static unsigned int startup_ioapic_irq(unsigned int irq)
1910{
1911 int was_pending = 0;
1912 unsigned long flags;
1913
1914 spin_lock_irqsave(&ioapic_lock, flags);
1915 if (irq < 16) {
1916 disable_8259A_irq(irq);
1917 if (i8259A_irq_pending(irq))
1918 was_pending = 1;
1919 }
1920 __unmask_IO_APIC_irq(irq);
1921 spin_unlock_irqrestore(&ioapic_lock, flags);
1922
1923 return was_pending;
1924}
1925
1926static void ack_ioapic_irq(unsigned int irq)
1927{
1928 move_native_irq(irq);
1929 ack_APIC_irq();
1930}
1931
1932static void ack_ioapic_quirk_irq(unsigned int irq)
1933{
1934 unsigned long v;
1935 int i;
1936
1937 move_native_irq(irq);
1938/*
1939 * It appears there is an erratum which affects at least version 0x11
1940 * of I/O APIC (that's the 82093AA and cores integrated into various
1941 * chipsets). Under certain conditions a level-triggered interrupt is
1942 * erroneously delivered as edge-triggered one but the respective IRR
1943 * bit gets set nevertheless. As a result the I/O unit expects an EOI
1944 * message but it will never arrive and further interrupts are blocked
1945 * from the source. The exact reason is so far unknown, but the
1946 * phenomenon was observed when two consecutive interrupt requests
1947 * from a given source get delivered to the same CPU and the source is
1948 * temporarily disabled in between.
1949 *
1950 * A workaround is to simulate an EOI message manually. We achieve it
1951 * by setting the trigger mode to edge and then to level when the edge
1952 * trigger mode gets detected in the TMR of a local APIC for a
1953 * level-triggered interrupt. We mask the source for the time of the
1954 * operation to prevent an edge-triggered interrupt escaping meanwhile.
1955 * The idea is from Manfred Spraul. --macro
1956 */
1957 i = irq_vector[irq];
1958
1959 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
1960
1961 ack_APIC_irq();
1962
1963 if (!(v & (1 << (i & 0x1f)))) {
1964 atomic_inc(&irq_mis_count);
1965 spin_lock(&ioapic_lock);
1966 __mask_and_edge_IO_APIC_irq(irq);
1967 __unmask_and_level_IO_APIC_irq(irq);
1968 spin_unlock(&ioapic_lock);
1969 }
1970}
1971
1972static int ioapic_retrigger_irq(unsigned int irq)
1973{
1974 send_IPI_self(irq_vector[irq]);
1975
1976 return 1;
1977}
1978
1979static struct irq_chip ioapic_chip __read_mostly = {
1980 .name = "IO-APIC",
1981 .startup = startup_ioapic_irq,
1982 .mask = mask_IO_APIC_irq,
1983 .unmask = unmask_IO_APIC_irq,
1984 .ack = ack_ioapic_irq,
1985 .eoi = ack_ioapic_quirk_irq,
1986#ifdef CONFIG_SMP
1987 .set_affinity = set_ioapic_affinity_irq,
1988#endif
1989 .retrigger = ioapic_retrigger_irq,
1990};
1991
1992
1993static inline void init_IO_APIC_traps(void)
1994{
1995 int irq;
1996
1997 /*
1998 * NOTE! The local APIC isn't very good at handling
1999 * multiple interrupts at the same interrupt level.
2000 * As the interrupt level is determined by taking the
2001 * vector number and shifting that right by 4, we
2002 * want to spread these out a bit so that they don't
2003 * all fall in the same interrupt level.
2004 *
2005 * Also, we've got to be careful not to trash gate
2006 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2007 */
2008 for (irq = 0; irq < NR_IRQS ; irq++) {
2009 if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
2010 /*
2011 * Hmm.. We don't have an entry for this,
2012 * so default to an old-fashioned 8259
2013 * interrupt if we can..
2014 */
2015 if (irq < 16)
2016 make_8259A_irq(irq);
2017 else
2018 /* Strange. Oh, well.. */
2019 irq_desc[irq].chip = &no_irq_chip;
2020 }
2021 }
2022}
2023
2024/*
2025 * The local APIC irq-chip implementation:
2026 */
2027
2028static void ack_lapic_irq(unsigned int irq)
2029{
2030 ack_APIC_irq();
2031}
2032
2033static void mask_lapic_irq(unsigned int irq)
2034{
2035 unsigned long v;
2036
2037 v = apic_read(APIC_LVT0);
2038 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2039}
2040
2041static void unmask_lapic_irq(unsigned int irq)
2042{
2043 unsigned long v;
2044
2045 v = apic_read(APIC_LVT0);
2046 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2047}
2048
2049static struct irq_chip lapic_chip __read_mostly = {
2050 .name = "local-APIC",
2051 .mask = mask_lapic_irq,
2052 .unmask = unmask_lapic_irq,
2053 .ack = ack_lapic_irq,
2054};
2055
2056static void lapic_register_intr(int irq, int vector)
2057{
2058 irq_desc[irq].status &= ~IRQ_LEVEL;
2059 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2060 "edge");
2061 set_intr_gate(vector, interrupt[irq]);
2062}
2063
2064static void __init setup_nmi(void)
2065{
2066 /*
2067 * Dirty trick to enable the NMI watchdog ...
2068 * We put the 8259A master into AEOI mode and
2069 * unmask on all local APICs LVT0 as NMI.
2070 *
2071 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2072 * is from Maciej W. Rozycki - so we do not have to EOI from
2073 * the NMI handler or the timer interrupt.
2074 */
2075 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2076
2077 enable_NMI_through_LVT0();
2078
2079 apic_printk(APIC_VERBOSE, " done.\n");
2080}
2081
2082/*
2083 * This looks a bit hackish but it's about the only one way of sending
2084 * a few INTA cycles to 8259As and any associated glue logic. ICR does
2085 * not support the ExtINT mode, unfortunately. We need to send these
2086 * cycles as some i82489DX-based boards have glue logic that keeps the
2087 * 8259A interrupt line asserted until INTA. --macro
2088 */
2089static inline void __init unlock_ExtINT_logic(void)
2090{
2091 int apic, pin, i;
2092 struct IO_APIC_route_entry entry0, entry1;
2093 unsigned char save_control, save_freq_select;
2094
2095 pin = find_isa_irq_pin(8, mp_INT);
2096 if (pin == -1) {
2097 WARN_ON_ONCE(1);
2098 return;
2099 }
2100 apic = find_isa_irq_apic(8, mp_INT);
2101 if (apic == -1) {
2102 WARN_ON_ONCE(1);
2103 return;
2104 }
2105
2106 entry0 = ioapic_read_entry(apic, pin);
2107 clear_IO_APIC_pin(apic, pin);
2108
2109 memset(&entry1, 0, sizeof(entry1));
2110
2111 entry1.dest_mode = 0; /* physical delivery */
2112 entry1.mask = 0; /* unmask IRQ now */
2113 entry1.dest.physical.physical_dest = hard_smp_processor_id();
2114 entry1.delivery_mode = dest_ExtINT;
2115 entry1.polarity = entry0.polarity;
2116 entry1.trigger = 0;
2117 entry1.vector = 0;
2118
2119 ioapic_write_entry(apic, pin, entry1);
2120
2121 save_control = CMOS_READ(RTC_CONTROL);
2122 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
2123 CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
2124 RTC_FREQ_SELECT);
2125 CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
2126
2127 i = 100;
2128 while (i-- > 0) {
2129 mdelay(10);
2130 if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
2131 i -= 10;
2132 }
2133
2134 CMOS_WRITE(save_control, RTC_CONTROL);
2135 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2136 clear_IO_APIC_pin(apic, pin);
2137
2138 ioapic_write_entry(apic, pin, entry0);
2139}
2140
2141/*
2142 * This code may look a bit paranoid, but it's supposed to cooperate with
2143 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
2144 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
2145 * fanatically on his truly buggy board.
2146 */
2147static inline void __init check_timer(void)
2148{
2149 int apic1, pin1, apic2, pin2;
2150 int no_pin1 = 0;
2151 int vector;
2152 unsigned int ver;
2153 unsigned long flags;
2154
2155 local_irq_save(flags);
2156
2157 ver = apic_read(APIC_LVR);
2158 ver = GET_APIC_VERSION(ver);
2159
2160 /*
2161 * get/set the timer IRQ vector:
2162 */
2163 disable_8259A_irq(0);
2164 vector = assign_irq_vector(0);
2165 set_intr_gate(vector, interrupt[0]);
2166
2167 /*
2168 * As IRQ0 is to be enabled in the 8259A, the virtual
2169 * wire has to be disabled in the local APIC. Also
2170 * timer interrupts need to be acknowledged manually in
2171 * the 8259A for the i82489DX when using the NMI
2172 * watchdog as that APIC treats NMIs as level-triggered.
2173 * The AEOI mode will finish them in the 8259A
2174 * automatically.
2175 */
2176 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2177 init_8259A(1);
2178 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2179
2180 pin1 = find_isa_irq_pin(0, mp_INT);
2181 apic1 = find_isa_irq_apic(0, mp_INT);
2182 pin2 = ioapic_i8259.pin;
2183 apic2 = ioapic_i8259.apic;
2184
2185 apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
2186 "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2187 vector, apic1, pin1, apic2, pin2);
2188
2189 /*
2190 * Some BIOS writers are clueless and report the ExtINTA
2191 * I/O APIC input from the cascaded 8259A as the timer
2192 * interrupt input. So just in case, if only one pin
2193 * was found above, try it both directly and through the
2194 * 8259A.
2195 */
2196 if (pin1 == -1) {
2197 pin1 = pin2;
2198 apic1 = apic2;
2199 no_pin1 = 1;
2200 } else if (pin2 == -1) {
2201 pin2 = pin1;
2202 apic2 = apic1;
2203 }
2204
2205 if (pin1 != -1) {
2206 /*
2207 * Ok, does IRQ0 through the IOAPIC work?
2208 */
2209 if (no_pin1) {
2210 add_pin_to_irq(0, apic1, pin1);
2211 setup_timer_IRQ0_pin(apic1, pin1, vector);
2212 }
2213 unmask_IO_APIC_irq(0);
2214 if (timer_irq_works()) {
2215 if (nmi_watchdog == NMI_IO_APIC) {
2216 setup_nmi();
2217 enable_8259A_irq(0);
2218 }
2219 if (disable_timer_pin_1 > 0)
2220 clear_IO_APIC_pin(0, pin1);
2221 goto out;
2222 }
2223 clear_IO_APIC_pin(apic1, pin1);
2224 if (!no_pin1)
2225 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
2226 "8254 timer not connected to IO-APIC\n");
2227
2228 apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
2229 "(IRQ0) through the 8259A ...\n");
2230 apic_printk(APIC_QUIET, KERN_INFO
2231 "..... (found apic %d pin %d) ...\n", apic2, pin2);
2232 /*
2233 * legacy devices should be connected to IO APIC #0
2234 */
2235 replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
2236 setup_timer_IRQ0_pin(apic2, pin2, vector);
2237 unmask_IO_APIC_irq(0);
2238 enable_8259A_irq(0);
2239 if (timer_irq_works()) {
2240 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2241 timer_through_8259 = 1;
2242 if (nmi_watchdog == NMI_IO_APIC) {
2243 disable_8259A_irq(0);
2244 setup_nmi();
2245 enable_8259A_irq(0);
2246 }
2247 goto out;
2248 }
2249 /*
2250 * Cleanup, just in case ...
2251 */
2252 disable_8259A_irq(0);
2253 clear_IO_APIC_pin(apic2, pin2);
2254 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
2255 }
2256
2257 if (nmi_watchdog == NMI_IO_APIC) {
2258 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
2259 "through the IO-APIC - disabling NMI Watchdog!\n");
2260 nmi_watchdog = NMI_NONE;
2261 }
2262 timer_ack = 0;
2263
2264 apic_printk(APIC_QUIET, KERN_INFO
2265 "...trying to set up timer as Virtual Wire IRQ...\n");
2266
2267 lapic_register_intr(0, vector);
2268 apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2269 enable_8259A_irq(0);
2270
2271 if (timer_irq_works()) {
2272 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2273 goto out;
2274 }
2275 disable_8259A_irq(0);
2276 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
2277 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
2278
2279 apic_printk(APIC_QUIET, KERN_INFO
2280 "...trying to set up timer as ExtINT IRQ...\n");
2281
2282 init_8259A(0);
2283 make_8259A_irq(0);
2284 apic_write(APIC_LVT0, APIC_DM_EXTINT);
2285
2286 unlock_ExtINT_logic();
2287
2288 if (timer_irq_works()) {
2289 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2290 goto out;
2291 }
2292 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
2293 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
2294 "report. Then try booting with the 'noapic' option.\n");
2295out:
2296 local_irq_restore(flags);
2297}
2298
2299/*
2300 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2301 * to devices. However there may be an I/O APIC pin available for
2302 * this interrupt regardless. The pin may be left unconnected, but
2303 * typically it will be reused as an ExtINT cascade interrupt for
2304 * the master 8259A. In the MPS case such a pin will normally be
2305 * reported as an ExtINT interrupt in the MP table. With ACPI
2306 * there is no provision for ExtINT interrupts, and in the absence
2307 * of an override it would be treated as an ordinary ISA I/O APIC
2308 * interrupt, that is edge-triggered and unmasked by default. We
2309 * used to do this, but it caused problems on some systems because
2310 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2311 * the same ExtINT cascade interrupt to drive the local APIC of the
2312 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2313 * the I/O APIC in all cases now. No actual device should request
2314 * it anyway. --macro
2315 */
2316#define PIC_IRQS (1 << PIC_CASCADE_IR)
2317
2318void __init setup_IO_APIC(void)
2319{
2320 int i;
2321
2322 /* Reserve all the system vectors. */
2323 for (i = first_system_vector; i < NR_VECTORS; i++)
2324 set_bit(i, used_vectors);
2325
2326 enable_IO_APIC();
2327
2328 io_apic_irqs = ~PIC_IRQS;
2329
2330 printk("ENABLING IO-APIC IRQs\n");
2331
2332 /*
2333 * Set up IO-APIC IRQ routing.
2334 */
2335 if (!acpi_ioapic)
2336 setup_ioapic_ids_from_mpc();
2337 sync_Arb_IDs();
2338 setup_IO_APIC_irqs();
2339 init_IO_APIC_traps();
2340 check_timer();
2341}
2342
2343/*
2344 * Called after all the initialization is done. If we didnt find any
2345 * APIC bugs then we can allow the modify fast path
2346 */
2347
2348static int __init io_apic_bug_finalize(void)
2349{
2350 if (sis_apic_bug == -1)
2351 sis_apic_bug = 0;
2352 return 0;
2353}
2354
2355late_initcall(io_apic_bug_finalize);
2356
2357struct sysfs_ioapic_data {
2358 struct sys_device dev;
2359 struct IO_APIC_route_entry entry[0];
2360};
2361static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
2362
2363static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2364{
2365 struct IO_APIC_route_entry *entry;
2366 struct sysfs_ioapic_data *data;
2367 int i;
2368
2369 data = container_of(dev, struct sysfs_ioapic_data, dev);
2370 entry = data->entry;
2371 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2372 entry[i] = ioapic_read_entry(dev->id, i);
2373
2374 return 0;
2375}
2376
2377static int ioapic_resume(struct sys_device *dev)
2378{
2379 struct IO_APIC_route_entry *entry;
2380 struct sysfs_ioapic_data *data;
2381 unsigned long flags;
2382 union IO_APIC_reg_00 reg_00;
2383 int i;
2384
2385 data = container_of(dev, struct sysfs_ioapic_data, dev);
2386 entry = data->entry;
2387
2388 spin_lock_irqsave(&ioapic_lock, flags);
2389 reg_00.raw = io_apic_read(dev->id, 0);
2390 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
2391 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
2392 io_apic_write(dev->id, 0, reg_00.raw);
2393 }
2394 spin_unlock_irqrestore(&ioapic_lock, flags);
2395 for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2396 ioapic_write_entry(dev->id, i, entry[i]);
2397
2398 return 0;
2399}
2400
2401static struct sysdev_class ioapic_sysdev_class = {
2402 .name = "ioapic",
2403 .suspend = ioapic_suspend,
2404 .resume = ioapic_resume,
2405};
2406
2407static int __init ioapic_init_sysfs(void)
2408{
2409 struct sys_device *dev;
2410 int i, size, error = 0;
2411
2412 error = sysdev_class_register(&ioapic_sysdev_class);
2413 if (error)
2414 return error;
2415
2416 for (i = 0; i < nr_ioapics; i++) {
2417 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
2418 * sizeof(struct IO_APIC_route_entry);
2419 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
2420 if (!mp_ioapic_data[i]) {
2421 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2422 continue;
2423 }
2424 dev = &mp_ioapic_data[i]->dev;
2425 dev->id = i;
2426 dev->cls = &ioapic_sysdev_class;
2427 error = sysdev_register(dev);
2428 if (error) {
2429 kfree(mp_ioapic_data[i]);
2430 mp_ioapic_data[i] = NULL;
2431 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
2432 continue;
2433 }
2434 }
2435
2436 return 0;
2437}
2438
2439device_initcall(ioapic_init_sysfs);
2440
2441/*
2442 * Dynamic irq allocate and deallocation
2443 */
2444int create_irq(void)
2445{
2446 /* Allocate an unused irq */
2447 int irq, new, vector = 0;
2448 unsigned long flags;
2449
2450 irq = -ENOSPC;
2451 spin_lock_irqsave(&vector_lock, flags);
2452 for (new = (NR_IRQS - 1); new >= 0; new--) {
2453 if (platform_legacy_irq(new))
2454 continue;
2455 if (irq_vector[new] != 0)
2456 continue;
2457 vector = __assign_irq_vector(new);
2458 if (likely(vector > 0))
2459 irq = new;
2460 break;
2461 }
2462 spin_unlock_irqrestore(&vector_lock, flags);
2463
2464 if (irq >= 0) {
2465 set_intr_gate(vector, interrupt[irq]);
2466 dynamic_irq_init(irq);
2467 }
2468 return irq;
2469}
2470
2471void destroy_irq(unsigned int irq)
2472{
2473 unsigned long flags;
2474
2475 dynamic_irq_cleanup(irq);
2476
2477 spin_lock_irqsave(&vector_lock, flags);
2478 clear_bit(irq_vector[irq], used_vectors);
2479 irq_vector[irq] = 0;
2480 spin_unlock_irqrestore(&vector_lock, flags);
2481}
2482
2483/*
2484 * MSI message composition
2485 */
2486#ifdef CONFIG_PCI_MSI
2487static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
2488{
2489 int vector;
2490 unsigned dest;
2491
2492 vector = assign_irq_vector(irq);
2493 if (vector >= 0) {
2494 dest = cpu_mask_to_apicid(TARGET_CPUS);
2495
2496 msg->address_hi = MSI_ADDR_BASE_HI;
2497 msg->address_lo =
2498 MSI_ADDR_BASE_LO |
2499 ((INT_DEST_MODE == 0) ?
2500MSI_ADDR_DEST_MODE_PHYSICAL:
2501 MSI_ADDR_DEST_MODE_LOGICAL) |
2502 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2503 MSI_ADDR_REDIRECTION_CPU:
2504 MSI_ADDR_REDIRECTION_LOWPRI) |
2505 MSI_ADDR_DEST_ID(dest);
2506
2507 msg->data =
2508 MSI_DATA_TRIGGER_EDGE |
2509 MSI_DATA_LEVEL_ASSERT |
2510 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2511MSI_DATA_DELIVERY_FIXED:
2512 MSI_DATA_DELIVERY_LOWPRI) |
2513 MSI_DATA_VECTOR(vector);
2514 }
2515 return vector;
2516}
2517
2518#ifdef CONFIG_SMP
2519static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2520{
2521 struct msi_msg msg;
2522 unsigned int dest;
2523 cpumask_t tmp;
2524 int vector;
2525
2526 cpus_and(tmp, mask, cpu_online_map);
2527 if (cpus_empty(tmp))
2528 tmp = TARGET_CPUS;
2529
2530 vector = assign_irq_vector(irq);
2531 if (vector < 0)
2532 return;
2533
2534 dest = cpu_mask_to_apicid(mask);
2535
2536 read_msi_msg(irq, &msg);
2537
2538 msg.data &= ~MSI_DATA_VECTOR_MASK;
2539 msg.data |= MSI_DATA_VECTOR(vector);
2540 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
2541 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
2542
2543 write_msi_msg(irq, &msg);
2544 irq_desc[irq].affinity = mask;
2545}
2546#endif /* CONFIG_SMP */
2547
2548/*
2549 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
2550 * which implement the MSI or MSI-X Capability Structure.
2551 */
2552static struct irq_chip msi_chip = {
2553 .name = "PCI-MSI",
2554 .unmask = unmask_msi_irq,
2555 .mask = mask_msi_irq,
2556 .ack = ack_ioapic_irq,
2557#ifdef CONFIG_SMP
2558 .set_affinity = set_msi_irq_affinity,
2559#endif
2560 .retrigger = ioapic_retrigger_irq,
2561};
2562
2563int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2564{
2565 struct msi_msg msg;
2566 int irq, ret;
2567 irq = create_irq();
2568 if (irq < 0)
2569 return irq;
2570
2571 ret = msi_compose_msg(dev, irq, &msg);
2572 if (ret < 0) {
2573 destroy_irq(irq);
2574 return ret;
2575 }
2576
2577 set_irq_msi(irq, desc);
2578 write_msi_msg(irq, &msg);
2579
2580 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
2581 "edge");
2582
2583 return 0;
2584}
2585
2586void arch_teardown_msi_irq(unsigned int irq)
2587{
2588 destroy_irq(irq);
2589}
2590
2591#endif /* CONFIG_PCI_MSI */
2592
2593/*
2594 * Hypertransport interrupt support
2595 */
2596#ifdef CONFIG_HT_IRQ
2597
2598#ifdef CONFIG_SMP
2599
2600static void target_ht_irq(unsigned int irq, unsigned int dest)
2601{
2602 struct ht_irq_msg msg;
2603 fetch_ht_irq_msg(irq, &msg);
2604
2605 msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
2606 msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
2607
2608 msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
2609 msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
2610
2611 write_ht_irq_msg(irq, &msg);
2612}
2613
2614static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
2615{
2616 unsigned int dest;
2617 cpumask_t tmp;
2618
2619 cpus_and(tmp, mask, cpu_online_map);
2620 if (cpus_empty(tmp))
2621 tmp = TARGET_CPUS;
2622
2623 cpus_and(mask, tmp, CPU_MASK_ALL);
2624
2625 dest = cpu_mask_to_apicid(mask);
2626
2627 target_ht_irq(irq, dest);
2628 irq_desc[irq].affinity = mask;
2629}
2630#endif
2631
2632static struct irq_chip ht_irq_chip = {
2633 .name = "PCI-HT",
2634 .mask = mask_ht_irq,
2635 .unmask = unmask_ht_irq,
2636 .ack = ack_ioapic_irq,
2637#ifdef CONFIG_SMP
2638 .set_affinity = set_ht_irq_affinity,
2639#endif
2640 .retrigger = ioapic_retrigger_irq,
2641};
2642
2643int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
2644{
2645 int vector;
2646
2647 vector = assign_irq_vector(irq);
2648 if (vector >= 0) {
2649 struct ht_irq_msg msg;
2650 unsigned dest;
2651 cpumask_t tmp;
2652
2653 cpus_clear(tmp);
2654 cpu_set(vector >> 8, tmp);
2655 dest = cpu_mask_to_apicid(tmp);
2656
2657 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
2658
2659 msg.address_lo =
2660 HT_IRQ_LOW_BASE |
2661 HT_IRQ_LOW_DEST_ID(dest) |
2662 HT_IRQ_LOW_VECTOR(vector) |
2663 ((INT_DEST_MODE == 0) ?
2664 HT_IRQ_LOW_DM_PHYSICAL :
2665 HT_IRQ_LOW_DM_LOGICAL) |
2666 HT_IRQ_LOW_RQEOI_EDGE |
2667 ((INT_DELIVERY_MODE != dest_LowestPrio) ?
2668 HT_IRQ_LOW_MT_FIXED :
2669 HT_IRQ_LOW_MT_ARBITRATED) |
2670 HT_IRQ_LOW_IRQ_MASKED;
2671
2672 write_ht_irq_msg(irq, &msg);
2673
2674 set_irq_chip_and_handler_name(irq, &ht_irq_chip,
2675 handle_edge_irq, "edge");
2676 }
2677 return vector;
2678}
2679#endif /* CONFIG_HT_IRQ */
2680
2681/* --------------------------------------------------------------------------
2682 ACPI-based IOAPIC Configuration
2683 -------------------------------------------------------------------------- */
2684
2685#ifdef CONFIG_ACPI
2686
2687int __init io_apic_get_unique_id(int ioapic, int apic_id)
2688{
2689 union IO_APIC_reg_00 reg_00;
2690 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
2691 physid_mask_t tmp;
2692 unsigned long flags;
2693 int i = 0;
2694
2695 /*
2696 * The P4 platform supports up to 256 APIC IDs on two separate APIC
2697 * buses (one for LAPICs, one for IOAPICs), where predecessors only
2698 * supports up to 16 on one shared APIC bus.
2699 *
2700 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
2701 * advantage of new APIC bus architecture.
2702 */
2703
2704 if (physids_empty(apic_id_map))
2705 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
2706
2707 spin_lock_irqsave(&ioapic_lock, flags);
2708 reg_00.raw = io_apic_read(ioapic, 0);
2709 spin_unlock_irqrestore(&ioapic_lock, flags);
2710
2711 if (apic_id >= get_physical_broadcast()) {
2712 printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
2713 "%d\n", ioapic, apic_id, reg_00.bits.ID);
2714 apic_id = reg_00.bits.ID;
2715 }
2716
2717 /*
2718 * Every APIC in a system must have a unique ID or we get lots of nice
2719 * 'stuck on smp_invalidate_needed IPI wait' messages.
2720 */
2721 if (check_apicid_used(apic_id_map, apic_id)) {
2722
2723 for (i = 0; i < get_physical_broadcast(); i++) {
2724 if (!check_apicid_used(apic_id_map, i))
2725 break;
2726 }
2727
2728 if (i == get_physical_broadcast())
2729 panic("Max apic_id exceeded!\n");
2730
2731 printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
2732 "trying %d\n", ioapic, apic_id, i);
2733
2734 apic_id = i;
2735 }
2736
2737 tmp = apicid_to_cpu_present(apic_id);
2738 physids_or(apic_id_map, apic_id_map, tmp);
2739
2740 if (reg_00.bits.ID != apic_id) {
2741 reg_00.bits.ID = apic_id;
2742
2743 spin_lock_irqsave(&ioapic_lock, flags);
2744 io_apic_write(ioapic, 0, reg_00.raw);
2745 reg_00.raw = io_apic_read(ioapic, 0);
2746 spin_unlock_irqrestore(&ioapic_lock, flags);
2747
2748 /* Sanity check */
2749 if (reg_00.bits.ID != apic_id) {
2750 printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
2751 return -1;
2752 }
2753 }
2754
2755 apic_printk(APIC_VERBOSE, KERN_INFO
2756 "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
2757
2758 return apic_id;
2759}
2760
2761
2762int __init io_apic_get_version(int ioapic)
2763{
2764 union IO_APIC_reg_01 reg_01;
2765 unsigned long flags;
2766
2767 spin_lock_irqsave(&ioapic_lock, flags);
2768 reg_01.raw = io_apic_read(ioapic, 1);
2769 spin_unlock_irqrestore(&ioapic_lock, flags);
2770
2771 return reg_01.bits.version;
2772}
2773
2774
2775int __init io_apic_get_redir_entries(int ioapic)
2776{
2777 union IO_APIC_reg_01 reg_01;
2778 unsigned long flags;
2779
2780 spin_lock_irqsave(&ioapic_lock, flags);
2781 reg_01.raw = io_apic_read(ioapic, 1);
2782 spin_unlock_irqrestore(&ioapic_lock, flags);
2783
2784 return reg_01.bits.entries;
2785}
2786
2787
2788int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
2789{
2790 struct IO_APIC_route_entry entry;
2791
2792 if (!IO_APIC_IRQ(irq)) {
2793 printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
2794 ioapic);
2795 return -EINVAL;
2796 }
2797
2798 /*
2799 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
2800 * Note that we mask (disable) IRQs now -- these get enabled when the
2801 * corresponding device driver registers for this IRQ.
2802 */
2803
2804 memset(&entry, 0, sizeof(entry));
2805
2806 entry.delivery_mode = INT_DELIVERY_MODE;
2807 entry.dest_mode = INT_DEST_MODE;
2808 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
2809 entry.trigger = edge_level;
2810 entry.polarity = active_high_low;
2811 entry.mask = 1;
2812
2813 /*
2814 * IRQs < 16 are already in the irq_2_pin[] map
2815 */
2816 if (irq >= 16)
2817 add_pin_to_irq(irq, ioapic, pin);
2818
2819 entry.vector = assign_irq_vector(irq);
2820
2821 apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
2822 "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
2823 mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
2824 edge_level, active_high_low);
2825
2826 ioapic_register_intr(irq, entry.vector, edge_level);
2827
2828 if (!ioapic && (irq < 16))
2829 disable_8259A_irq(irq);
2830
2831 ioapic_write_entry(ioapic, pin, entry);
2832
2833 return 0;
2834}
2835
2836int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
2837{
2838 int i;
2839
2840 if (skip_ioapic_setup)
2841 return -1;
2842
2843 for (i = 0; i < mp_irq_entries; i++)
2844 if (mp_irqs[i].mp_irqtype == mp_INT &&
2845 mp_irqs[i].mp_srcbusirq == bus_irq)
2846 break;
2847 if (i >= mp_irq_entries)
2848 return -1;
2849
2850 *trigger = irq_trigger(i);
2851 *polarity = irq_polarity(i);
2852 return 0;
2853}
2854
2855#endif /* CONFIG_ACPI */
2856
2857static int __init parse_disable_timer_pin_1(char *arg)
2858{
2859 disable_timer_pin_1 = 1;
2860 return 0;
2861}
2862early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2863
2864static int __init parse_enable_timer_pin_1(char *arg)
2865{
2866 disable_timer_pin_1 = -1;
2867 return 0;
2868}
2869early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2870
2871static int __init parse_noapic(char *arg)
2872{
2873 /* disable IO-APIC */
2874 disable_ioapic_setup();
2875 return 0;
2876}
2877early_param("noapic", parse_noapic);
2878
2879void __init ioapic_init_mappings(void)
2880{
2881 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
2882 int i;
2883
2884 for (i = 0; i < nr_ioapics; i++) {
2885 if (smp_found_config) {
2886 ioapic_phys = mp_ioapics[i].mp_apicaddr;
2887 if (!ioapic_phys) {
2888 printk(KERN_ERR
2889 "WARNING: bogus zero IO-APIC "
2890 "address found in MPTABLE, "
2891 "disabling IO/APIC support!\n");
2892 smp_found_config = 0;
2893 skip_ioapic_setup = 1;
2894 goto fake_ioapic_page;
2895 }
2896 } else {
2897fake_ioapic_page:
2898 ioapic_phys = (unsigned long)
2899 alloc_bootmem_pages(PAGE_SIZE);
2900 ioapic_phys = __pa(ioapic_phys);
2901 }
2902 set_fixmap_nocache(idx, ioapic_phys);
2903 printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
2904 __fix_to_virt(idx), ioapic_phys);
2905 idx++;
2906 }
2907}
2908
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
new file mode 100644
index 000000000000..ccf6c503fc3b
--- /dev/null
+++ b/arch/x86/kernel/irq.c
@@ -0,0 +1,189 @@
1/*
2 * Common interrupt code for 32 and 64 bit
3 */
4#include <linux/cpu.h>
5#include <linux/interrupt.h>
6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h>
8
9#include <asm/apic.h>
10#include <asm/io_apic.h>
11#include <asm/smp.h>
12
13atomic_t irq_err_count;
14
15/*
16 * 'what should we do if we get a hw irq event on an illegal vector'.
17 * each architecture has to answer this themselves.
18 */
19void ack_bad_irq(unsigned int irq)
20{
21 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
22
23#ifdef CONFIG_X86_LOCAL_APIC
24 /*
25 * Currently unexpected vectors happen only on SMP and APIC.
26 * We _must_ ack these because every local APIC has only N
27 * irq slots per priority level, and a 'hanging, unacked' IRQ
28 * holds up an irq slot - in excessive cases (when multiple
29 * unexpected vectors occur) that might lock up the APIC
30 * completely.
31 * But only ack when the APIC is enabled -AK
32 */
33 if (cpu_has_apic)
34 ack_APIC_irq();
35#endif
36}
37
38#ifdef CONFIG_X86_32
39# define irq_stats(x) (&per_cpu(irq_stat,x))
40#else
41# define irq_stats(x) cpu_pda(x)
42#endif
43/*
44 * /proc/interrupts printing:
45 */
46static int show_other_interrupts(struct seq_file *p)
47{
48 int j;
49
50 seq_printf(p, "NMI: ");
51 for_each_online_cpu(j)
52 seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
53 seq_printf(p, " Non-maskable interrupts\n");
54#ifdef CONFIG_X86_LOCAL_APIC
55 seq_printf(p, "LOC: ");
56 for_each_online_cpu(j)
57 seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
58 seq_printf(p, " Local timer interrupts\n");
59#endif
60#ifdef CONFIG_SMP
61 seq_printf(p, "RES: ");
62 for_each_online_cpu(j)
63 seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
64 seq_printf(p, " Rescheduling interrupts\n");
65 seq_printf(p, "CAL: ");
66 for_each_online_cpu(j)
67 seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
68 seq_printf(p, " Function call interrupts\n");
69 seq_printf(p, "TLB: ");
70 for_each_online_cpu(j)
71 seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
72 seq_printf(p, " TLB shootdowns\n");
73#endif
74#ifdef CONFIG_X86_MCE
75 seq_printf(p, "TRM: ");
76 for_each_online_cpu(j)
77 seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
78 seq_printf(p, " Thermal event interrupts\n");
79# ifdef CONFIG_X86_64
80 seq_printf(p, "THR: ");
81 for_each_online_cpu(j)
82 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
83 seq_printf(p, " Threshold APIC interrupts\n");
84# endif
85#endif
86#ifdef CONFIG_X86_LOCAL_APIC
87 seq_printf(p, "SPU: ");
88 for_each_online_cpu(j)
89 seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
90 seq_printf(p, " Spurious interrupts\n");
91#endif
92 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
93#if defined(CONFIG_X86_IO_APIC)
94 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
95#endif
96 return 0;
97}
98
99int show_interrupts(struct seq_file *p, void *v)
100{
101 unsigned long flags, any_count = 0;
102 int i = *(loff_t *) v, j;
103 struct irqaction *action;
104 struct irq_desc *desc;
105
106 if (i > nr_irqs)
107 return 0;
108
109 if (i == nr_irqs)
110 return show_other_interrupts(p);
111
112 /* print header */
113 if (i == 0) {
114 seq_printf(p, " ");
115 for_each_online_cpu(j)
116 seq_printf(p, "CPU%-8d",j);
117 seq_putc(p, '\n');
118 }
119
120 desc = irq_to_desc(i);
121 spin_lock_irqsave(&desc->lock, flags);
122#ifndef CONFIG_SMP
123 any_count = kstat_irqs(i);
124#else
125 for_each_online_cpu(j)
126 any_count |= kstat_irqs_cpu(i, j);
127#endif
128 action = desc->action;
129 if (!action && !any_count)
130 goto out;
131
132 seq_printf(p, "%3d: ", i);
133#ifndef CONFIG_SMP
134 seq_printf(p, "%10u ", kstat_irqs(i));
135#else
136 for_each_online_cpu(j)
137 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
138#endif
139 seq_printf(p, " %8s", desc->chip->name);
140 seq_printf(p, "-%-8s", desc->name);
141
142 if (action) {
143 seq_printf(p, " %s", action->name);
144 while ((action = action->next) != NULL)
145 seq_printf(p, ", %s", action->name);
146 }
147
148 seq_putc(p, '\n');
149out:
150 spin_unlock_irqrestore(&desc->lock, flags);
151 return 0;
152}
153
154/*
155 * /proc/stat helpers
156 */
157u64 arch_irq_stat_cpu(unsigned int cpu)
158{
159 u64 sum = irq_stats(cpu)->__nmi_count;
160
161#ifdef CONFIG_X86_LOCAL_APIC
162 sum += irq_stats(cpu)->apic_timer_irqs;
163#endif
164#ifdef CONFIG_SMP
165 sum += irq_stats(cpu)->irq_resched_count;
166 sum += irq_stats(cpu)->irq_call_count;
167 sum += irq_stats(cpu)->irq_tlb_count;
168#endif
169#ifdef CONFIG_X86_MCE
170 sum += irq_stats(cpu)->irq_thermal_count;
171# ifdef CONFIG_X86_64
172 sum += irq_stats(cpu)->irq_threshold_count;
173#endif
174#endif
175#ifdef CONFIG_X86_LOCAL_APIC
176 sum += irq_stats(cpu)->irq_spurious_count;
177#endif
178 return sum;
179}
180
181u64 arch_irq_stat(void)
182{
183 u64 sum = atomic_read(&irq_err_count);
184
185#ifdef CONFIG_X86_IO_APIC
186 sum += atomic_read(&irq_mis_count);
187#endif
188 return sum;
189}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index b71e02d42f4f..a51382672de0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat);
25DEFINE_PER_CPU(struct pt_regs *, irq_regs); 25DEFINE_PER_CPU(struct pt_regs *, irq_regs);
26EXPORT_PER_CPU_SYMBOL(irq_regs); 26EXPORT_PER_CPU_SYMBOL(irq_regs);
27 27
28/*
29 * 'what should we do if we get a hw irq event on an illegal vector'.
30 * each architecture has to answer this themselves.
31 */
32void ack_bad_irq(unsigned int irq)
33{
34 printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
35
36#ifdef CONFIG_X86_LOCAL_APIC
37 /*
38 * Currently unexpected vectors happen only on SMP and APIC.
39 * We _must_ ack these because every local APIC has only N
40 * irq slots per priority level, and a 'hanging, unacked' IRQ
41 * holds up an irq slot - in excessive cases (when multiple
42 * unexpected vectors occur) that might lock up the APIC
43 * completely.
44 * But only ack when the APIC is enabled -AK
45 */
46 if (cpu_has_apic)
47 ack_APIC_irq();
48#endif
49}
50
51#ifdef CONFIG_DEBUG_STACKOVERFLOW 28#ifdef CONFIG_DEBUG_STACKOVERFLOW
52/* Debugging check for stack overflow: is there less than 1KB free? */ 29/* Debugging check for stack overflow: is there less than 1KB free? */
53static int check_stack_overflow(void) 30static int check_stack_overflow(void)
@@ -223,20 +200,25 @@ unsigned int do_IRQ(struct pt_regs *regs)
223{ 200{
224 struct pt_regs *old_regs; 201 struct pt_regs *old_regs;
225 /* high bit used in ret_from_ code */ 202 /* high bit used in ret_from_ code */
226 int overflow, irq = ~regs->orig_ax; 203 int overflow;
227 struct irq_desc *desc = irq_desc + irq; 204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc;
206 unsigned irq;
228 207
229 if (unlikely((unsigned)irq >= NR_IRQS)) {
230 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
231 __func__, irq);
232 BUG();
233 }
234 208
235 old_regs = set_irq_regs(regs); 209 old_regs = set_irq_regs(regs);
236 irq_enter(); 210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
237 212
238 overflow = check_stack_overflow(); 213 overflow = check_stack_overflow();
239 214
215 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) {
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221
240 if (!execute_on_irq_stack(overflow, desc, irq)) { 222 if (!execute_on_irq_stack(overflow, desc, irq)) {
241 if (unlikely(overflow)) 223 if (unlikely(overflow))
242 print_stack_overflow(); 224 print_stack_overflow();
@@ -248,146 +230,6 @@ unsigned int do_IRQ(struct pt_regs *regs)
248 return 1; 230 return 1;
249} 231}
250 232
251/*
252 * Interrupt statistics:
253 */
254
255atomic_t irq_err_count;
256
257/*
258 * /proc/interrupts printing:
259 */
260
261int show_interrupts(struct seq_file *p, void *v)
262{
263 int i = *(loff_t *) v, j;
264 struct irqaction * action;
265 unsigned long flags;
266
267 if (i == 0) {
268 seq_printf(p, " ");
269 for_each_online_cpu(j)
270 seq_printf(p, "CPU%-8d",j);
271 seq_putc(p, '\n');
272 }
273
274 if (i < NR_IRQS) {
275 unsigned any_count = 0;
276
277 spin_lock_irqsave(&irq_desc[i].lock, flags);
278#ifndef CONFIG_SMP
279 any_count = kstat_irqs(i);
280#else
281 for_each_online_cpu(j)
282 any_count |= kstat_cpu(j).irqs[i];
283#endif
284 action = irq_desc[i].action;
285 if (!action && !any_count)
286 goto skip;
287 seq_printf(p, "%3d: ",i);
288#ifndef CONFIG_SMP
289 seq_printf(p, "%10u ", kstat_irqs(i));
290#else
291 for_each_online_cpu(j)
292 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
293#endif
294 seq_printf(p, " %8s", irq_desc[i].chip->name);
295 seq_printf(p, "-%-8s", irq_desc[i].name);
296
297 if (action) {
298 seq_printf(p, " %s", action->name);
299 while ((action = action->next) != NULL)
300 seq_printf(p, ", %s", action->name);
301 }
302
303 seq_putc(p, '\n');
304skip:
305 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
306 } else if (i == NR_IRQS) {
307 seq_printf(p, "NMI: ");
308 for_each_online_cpu(j)
309 seq_printf(p, "%10u ", nmi_count(j));
310 seq_printf(p, " Non-maskable interrupts\n");
311#ifdef CONFIG_X86_LOCAL_APIC
312 seq_printf(p, "LOC: ");
313 for_each_online_cpu(j)
314 seq_printf(p, "%10u ",
315 per_cpu(irq_stat,j).apic_timer_irqs);
316 seq_printf(p, " Local timer interrupts\n");
317#endif
318#ifdef CONFIG_SMP
319 seq_printf(p, "RES: ");
320 for_each_online_cpu(j)
321 seq_printf(p, "%10u ",
322 per_cpu(irq_stat,j).irq_resched_count);
323 seq_printf(p, " Rescheduling interrupts\n");
324 seq_printf(p, "CAL: ");
325 for_each_online_cpu(j)
326 seq_printf(p, "%10u ",
327 per_cpu(irq_stat,j).irq_call_count);
328 seq_printf(p, " Function call interrupts\n");
329 seq_printf(p, "TLB: ");
330 for_each_online_cpu(j)
331 seq_printf(p, "%10u ",
332 per_cpu(irq_stat,j).irq_tlb_count);
333 seq_printf(p, " TLB shootdowns\n");
334#endif
335#ifdef CONFIG_X86_MCE
336 seq_printf(p, "TRM: ");
337 for_each_online_cpu(j)
338 seq_printf(p, "%10u ",
339 per_cpu(irq_stat,j).irq_thermal_count);
340 seq_printf(p, " Thermal event interrupts\n");
341#endif
342#ifdef CONFIG_X86_LOCAL_APIC
343 seq_printf(p, "SPU: ");
344 for_each_online_cpu(j)
345 seq_printf(p, "%10u ",
346 per_cpu(irq_stat,j).irq_spurious_count);
347 seq_printf(p, " Spurious interrupts\n");
348#endif
349 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
350#if defined(CONFIG_X86_IO_APIC)
351 seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
352#endif
353 }
354 return 0;
355}
356
357/*
358 * /proc/stat helpers
359 */
360u64 arch_irq_stat_cpu(unsigned int cpu)
361{
362 u64 sum = nmi_count(cpu);
363
364#ifdef CONFIG_X86_LOCAL_APIC
365 sum += per_cpu(irq_stat, cpu).apic_timer_irqs;
366#endif
367#ifdef CONFIG_SMP
368 sum += per_cpu(irq_stat, cpu).irq_resched_count;
369 sum += per_cpu(irq_stat, cpu).irq_call_count;
370 sum += per_cpu(irq_stat, cpu).irq_tlb_count;
371#endif
372#ifdef CONFIG_X86_MCE
373 sum += per_cpu(irq_stat, cpu).irq_thermal_count;
374#endif
375#ifdef CONFIG_X86_LOCAL_APIC
376 sum += per_cpu(irq_stat, cpu).irq_spurious_count;
377#endif
378 return sum;
379}
380
381u64 arch_irq_stat(void)
382{
383 u64 sum = atomic_read(&irq_err_count);
384
385#ifdef CONFIG_X86_IO_APIC
386 sum += atomic_read(&irq_mis_count);
387#endif
388 return sum;
389}
390
391#ifdef CONFIG_HOTPLUG_CPU 233#ifdef CONFIG_HOTPLUG_CPU
392#include <mach_apic.h> 234#include <mach_apic.h>
393 235
@@ -395,20 +237,22 @@ void fixup_irqs(cpumask_t map)
395{ 237{
396 unsigned int irq; 238 unsigned int irq;
397 static int warned; 239 static int warned;
240 struct irq_desc *desc;
398 241
399 for (irq = 0; irq < NR_IRQS; irq++) { 242 for_each_irq_desc(irq, desc) {
400 cpumask_t mask; 243 cpumask_t mask;
244
401 if (irq == 2) 245 if (irq == 2)
402 continue; 246 continue;
403 247
404 cpus_and(mask, irq_desc[irq].affinity, map); 248 cpus_and(mask, desc->affinity, map);
405 if (any_online_cpu(mask) == NR_CPUS) { 249 if (any_online_cpu(mask) == NR_CPUS) {
406 printk("Breaking affinity for irq %i\n", irq); 250 printk("Breaking affinity for irq %i\n", irq);
407 mask = map; 251 mask = map;
408 } 252 }
409 if (irq_desc[irq].chip->set_affinity) 253 if (desc->chip->set_affinity)
410 irq_desc[irq].chip->set_affinity(irq, mask); 254 desc->chip->set_affinity(irq, mask);
411 else if (irq_desc[irq].action && !(warned++)) 255 else if (desc->action && !(warned++))
412 printk("Cannot set affinity for irq %i\n", irq); 256 printk("Cannot set affinity for irq %i\n", irq);
413 } 257 }
414 258
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index f065fe9071b9..60eb84eb77a0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,28 +18,6 @@
18#include <asm/idle.h> 18#include <asm/idle.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20 20
21atomic_t irq_err_count;
22
23/*
24 * 'what should we do if we get a hw irq event on an illegal vector'.
25 * each architecture has to answer this themselves.
26 */
27void ack_bad_irq(unsigned int irq)
28{
29 printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq);
30 /*
31 * Currently unexpected vectors happen only on SMP and APIC.
32 * We _must_ ack these because every local APIC has only N
33 * irq slots per priority level, and a 'hanging, unacked' IRQ
34 * holds up an irq slot - in excessive cases (when multiple
35 * unexpected vectors occur) that might lock up the APIC
36 * completely.
37 * But don't ack when the APIC is disabled. -AK
38 */
39 if (!disable_apic)
40 ack_APIC_irq();
41}
42
43#ifdef CONFIG_DEBUG_STACKOVERFLOW 21#ifdef CONFIG_DEBUG_STACKOVERFLOW
44/* 22/*
45 * Probabilistic stack overflow check: 23 * Probabilistic stack overflow check:
@@ -65,122 +43,6 @@ static inline void stack_overflow_check(struct pt_regs *regs)
65#endif 43#endif
66 44
67/* 45/*
68 * Generic, controller-independent functions:
69 */
70
71int show_interrupts(struct seq_file *p, void *v)
72{
73 int i = *(loff_t *) v, j;
74 struct irqaction * action;
75 unsigned long flags;
76
77 if (i == 0) {
78 seq_printf(p, " ");
79 for_each_online_cpu(j)
80 seq_printf(p, "CPU%-8d",j);
81 seq_putc(p, '\n');
82 }
83
84 if (i < NR_IRQS) {
85 unsigned any_count = 0;
86
87 spin_lock_irqsave(&irq_desc[i].lock, flags);
88#ifndef CONFIG_SMP
89 any_count = kstat_irqs(i);
90#else
91 for_each_online_cpu(j)
92 any_count |= kstat_cpu(j).irqs[i];
93#endif
94 action = irq_desc[i].action;
95 if (!action && !any_count)
96 goto skip;
97 seq_printf(p, "%3d: ",i);
98#ifndef CONFIG_SMP
99 seq_printf(p, "%10u ", kstat_irqs(i));
100#else
101 for_each_online_cpu(j)
102 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
103#endif
104 seq_printf(p, " %8s", irq_desc[i].chip->name);
105 seq_printf(p, "-%-8s", irq_desc[i].name);
106
107 if (action) {
108 seq_printf(p, " %s", action->name);
109 while ((action = action->next) != NULL)
110 seq_printf(p, ", %s", action->name);
111 }
112 seq_putc(p, '\n');
113skip:
114 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
115 } else if (i == NR_IRQS) {
116 seq_printf(p, "NMI: ");
117 for_each_online_cpu(j)
118 seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count);
119 seq_printf(p, " Non-maskable interrupts\n");
120 seq_printf(p, "LOC: ");
121 for_each_online_cpu(j)
122 seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs);
123 seq_printf(p, " Local timer interrupts\n");
124#ifdef CONFIG_SMP
125 seq_printf(p, "RES: ");
126 for_each_online_cpu(j)
127 seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count);
128 seq_printf(p, " Rescheduling interrupts\n");
129 seq_printf(p, "CAL: ");
130 for_each_online_cpu(j)
131 seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
132 seq_printf(p, " Function call interrupts\n");
133 seq_printf(p, "TLB: ");
134 for_each_online_cpu(j)
135 seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
136 seq_printf(p, " TLB shootdowns\n");
137#endif
138#ifdef CONFIG_X86_MCE
139 seq_printf(p, "TRM: ");
140 for_each_online_cpu(j)
141 seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count);
142 seq_printf(p, " Thermal event interrupts\n");
143 seq_printf(p, "THR: ");
144 for_each_online_cpu(j)
145 seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count);
146 seq_printf(p, " Threshold APIC interrupts\n");
147#endif
148 seq_printf(p, "SPU: ");
149 for_each_online_cpu(j)
150 seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count);
151 seq_printf(p, " Spurious interrupts\n");
152 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
153 }
154 return 0;
155}
156
157/*
158 * /proc/stat helpers
159 */
160u64 arch_irq_stat_cpu(unsigned int cpu)
161{
162 u64 sum = cpu_pda(cpu)->__nmi_count;
163
164 sum += cpu_pda(cpu)->apic_timer_irqs;
165#ifdef CONFIG_SMP
166 sum += cpu_pda(cpu)->irq_resched_count;
167 sum += cpu_pda(cpu)->irq_call_count;
168 sum += cpu_pda(cpu)->irq_tlb_count;
169#endif
170#ifdef CONFIG_X86_MCE
171 sum += cpu_pda(cpu)->irq_thermal_count;
172 sum += cpu_pda(cpu)->irq_threshold_count;
173#endif
174 sum += cpu_pda(cpu)->irq_spurious_count;
175 return sum;
176}
177
178u64 arch_irq_stat(void)
179{
180 return atomic_read(&irq_err_count);
181}
182
183/*
184 * do_IRQ handles all normal device IRQ's (the special 46 * do_IRQ handles all normal device IRQ's (the special
185 * SMP cross-CPU interrupts have their own specific 47 * SMP cross-CPU interrupts have their own specific
186 * handlers). 48 * handlers).
@@ -188,6 +50,7 @@ u64 arch_irq_stat(void)
188asmlinkage unsigned int do_IRQ(struct pt_regs *regs) 50asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
189{ 51{
190 struct pt_regs *old_regs = set_irq_regs(regs); 52 struct pt_regs *old_regs = set_irq_regs(regs);
53 struct irq_desc *desc;
191 54
192 /* high bit used in ret_from_ code */ 55 /* high bit used in ret_from_ code */
193 unsigned vector = ~regs->orig_ax; 56 unsigned vector = ~regs->orig_ax;
@@ -201,8 +64,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
201 stack_overflow_check(regs); 64 stack_overflow_check(regs);
202#endif 65#endif
203 66
204 if (likely(irq < NR_IRQS)) 67 desc = irq_to_desc(irq);
205 generic_handle_irq(irq); 68 if (likely(desc))
69 generic_handle_irq_desc(irq, desc);
206 else { 70 else {
207 if (!disable_apic) 71 if (!disable_apic)
208 ack_APIC_irq(); 72 ack_APIC_irq();
@@ -223,8 +87,9 @@ void fixup_irqs(cpumask_t map)
223{ 87{
224 unsigned int irq; 88 unsigned int irq;
225 static int warned; 89 static int warned;
90 struct irq_desc *desc;
226 91
227 for (irq = 0; irq < NR_IRQS; irq++) { 92 for_each_irq_desc(irq, desc) {
228 cpumask_t mask; 93 cpumask_t mask;
229 int break_affinity = 0; 94 int break_affinity = 0;
230 int set_affinity = 1; 95 int set_affinity = 1;
@@ -233,32 +98,32 @@ void fixup_irqs(cpumask_t map)
233 continue; 98 continue;
234 99
235 /* interrupt's are disabled at this point */ 100 /* interrupt's are disabled at this point */
236 spin_lock(&irq_desc[irq].lock); 101 spin_lock(&desc->lock);
237 102
238 if (!irq_has_action(irq) || 103 if (!irq_has_action(irq) ||
239 cpus_equal(irq_desc[irq].affinity, map)) { 104 cpus_equal(desc->affinity, map)) {
240 spin_unlock(&irq_desc[irq].lock); 105 spin_unlock(&desc->lock);
241 continue; 106 continue;
242 } 107 }
243 108
244 cpus_and(mask, irq_desc[irq].affinity, map); 109 cpus_and(mask, desc->affinity, map);
245 if (cpus_empty(mask)) { 110 if (cpus_empty(mask)) {
246 break_affinity = 1; 111 break_affinity = 1;
247 mask = map; 112 mask = map;
248 } 113 }
249 114
250 if (irq_desc[irq].chip->mask) 115 if (desc->chip->mask)
251 irq_desc[irq].chip->mask(irq); 116 desc->chip->mask(irq);
252 117
253 if (irq_desc[irq].chip->set_affinity) 118 if (desc->chip->set_affinity)
254 irq_desc[irq].chip->set_affinity(irq, mask); 119 desc->chip->set_affinity(irq, mask);
255 else if (!(warned++)) 120 else if (!(warned++))
256 set_affinity = 0; 121 set_affinity = 0;
257 122
258 if (irq_desc[irq].chip->unmask) 123 if (desc->chip->unmask)
259 irq_desc[irq].chip->unmask(irq); 124 desc->chip->unmask(irq);
260 125
261 spin_unlock(&irq_desc[irq].lock); 126 spin_unlock(&desc->lock);
262 127
263 if (break_affinity && set_affinity) 128 if (break_affinity && set_affinity)
264 printk("Broke affinity for irq %i\n", irq); 129 printk("Broke affinity for irq %i\n", irq);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 9200a1e2752d..845aa9803e80 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -69,6 +69,13 @@ void __init init_ISA_irqs (void)
69 * 16 old-style INTA-cycle interrupts: 69 * 16 old-style INTA-cycle interrupts:
70 */ 70 */
71 for (i = 0; i < 16; i++) { 71 for (i = 0; i < 16; i++) {
72 /* first time call this irq_desc */
73 struct irq_desc *desc = irq_to_desc(i);
74
75 desc->status = IRQ_DISABLED;
76 desc->action = NULL;
77 desc->depth = 1;
78
72 set_irq_chip_and_handler_name(i, &i8259A_chip, 79 set_irq_chip_and_handler_name(i, &i8259A_chip,
73 handle_level_irq, "XT"); 80 handle_level_irq, "XT");
74 } 81 }
@@ -83,6 +90,27 @@ static struct irqaction irq2 = {
83 .name = "cascade", 90 .name = "cascade",
84}; 91};
85 92
93DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
94 [0 ... IRQ0_VECTOR - 1] = -1,
95 [IRQ0_VECTOR] = 0,
96 [IRQ1_VECTOR] = 1,
97 [IRQ2_VECTOR] = 2,
98 [IRQ3_VECTOR] = 3,
99 [IRQ4_VECTOR] = 4,
100 [IRQ5_VECTOR] = 5,
101 [IRQ6_VECTOR] = 6,
102 [IRQ7_VECTOR] = 7,
103 [IRQ8_VECTOR] = 8,
104 [IRQ9_VECTOR] = 9,
105 [IRQ10_VECTOR] = 10,
106 [IRQ11_VECTOR] = 11,
107 [IRQ12_VECTOR] = 12,
108 [IRQ13_VECTOR] = 13,
109 [IRQ14_VECTOR] = 14,
110 [IRQ15_VECTOR] = 15,
111 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
112};
113
86/* Overridden in paravirt.c */ 114/* Overridden in paravirt.c */
87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 115void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
88 116
@@ -98,22 +126,14 @@ void __init native_init_IRQ(void)
98 * us. (some of these will be overridden and become 126 * us. (some of these will be overridden and become
99 * 'special' SMP interrupts) 127 * 'special' SMP interrupts)
100 */ 128 */
101 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { 129 for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
102 int vector = FIRST_EXTERNAL_VECTOR + i;
103 if (i >= NR_IRQS)
104 break;
105 /* SYSCALL_VECTOR was reserved in trap_init. */ 130 /* SYSCALL_VECTOR was reserved in trap_init. */
106 if (!test_bit(vector, used_vectors)) 131 if (i != SYSCALL_VECTOR)
107 set_intr_gate(vector, interrupt[i]); 132 set_intr_gate(i, interrupt[i]);
108 } 133 }
109 134
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116 135
136#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
117 /* 137 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 138 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup. 139 * IPI, driven by wakeup.
@@ -128,6 +148,9 @@ void __init native_init_IRQ(void)
128 148
129 /* IPI for single call function */ 149 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); 150 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
151
152 /* Low priority IPI to cleanup after moving an irq */
153 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
131#endif 154#endif
132 155
133#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 5b5be9d43c2a..ff0235391285 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -142,23 +142,19 @@ void __init init_ISA_irqs(void)
142 init_bsp_APIC(); 142 init_bsp_APIC();
143 init_8259A(0); 143 init_8259A(0);
144 144
145 for (i = 0; i < NR_IRQS; i++) { 145 for (i = 0; i < 16; i++) {
146 irq_desc[i].status = IRQ_DISABLED; 146 /* first time call this irq_desc */
147 irq_desc[i].action = NULL; 147 struct irq_desc *desc = irq_to_desc(i);
148 irq_desc[i].depth = 1; 148
149 149 desc->status = IRQ_DISABLED;
150 if (i < 16) { 150 desc->action = NULL;
151 /* 151 desc->depth = 1;
152 * 16 old-style INTA-cycle interrupts: 152
153 */ 153 /*
154 set_irq_chip_and_handler_name(i, &i8259A_chip, 154 * 16 old-style INTA-cycle interrupts:
155 */
156 set_irq_chip_and_handler_name(i, &i8259A_chip,
155 handle_level_irq, "XT"); 157 handle_level_irq, "XT");
156 } else {
157 /*
158 * 'high' PCI IRQs filled in on demand
159 */
160 irq_desc[i].chip = &no_irq_chip;
161 }
162 } 158 }
163} 159}
164 160
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
35 if (!(word & (1 << 13))) { 35 if (!(word & (1 << 13))) {
36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " 36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
37 "disabling irq balancing and affinity\n"); 37 "disabling irq balancing and affinity\n");
38#ifdef CONFIG_IRQBALANCE
39 irqbalance_disable("");
40#endif
41 noirqdebug_setup(""); 38 noirqdebug_setup("");
42#ifdef CONFIG_PROC_FS 39#ifdef CONFIG_PROC_FS
43 no_irq_affinity = 1; 40 no_irq_affinity = 1;
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 0a23b5795b25..dd6f2b71561b 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -52,7 +52,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
52 52
53 cmos_minutes = CMOS_READ(RTC_MINUTES); 53 cmos_minutes = CMOS_READ(RTC_MINUTES);
54 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) 54 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
55 BCD_TO_BIN(cmos_minutes); 55 cmos_minutes = bcd2bin(cmos_minutes);
56 56
57 /* 57 /*
58 * since we're only adjusting minutes and seconds, 58 * since we're only adjusting minutes and seconds,
@@ -69,8 +69,8 @@ int mach_set_rtc_mmss(unsigned long nowtime)
69 69
70 if (abs(real_minutes - cmos_minutes) < 30) { 70 if (abs(real_minutes - cmos_minutes) < 30) {
71 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { 71 if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
72 BIN_TO_BCD(real_seconds); 72 real_seconds = bin2bcd(real_seconds);
73 BIN_TO_BCD(real_minutes); 73 real_minutes = bin2bcd(real_minutes);
74 } 74 }
75 CMOS_WRITE(real_seconds,RTC_SECONDS); 75 CMOS_WRITE(real_seconds,RTC_SECONDS);
76 CMOS_WRITE(real_minutes,RTC_MINUTES); 76 CMOS_WRITE(real_minutes,RTC_MINUTES);
@@ -124,16 +124,16 @@ unsigned long mach_get_cmos_time(void)
124 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY)); 124 WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
125 125
126 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) { 126 if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
127 BCD_TO_BIN(sec); 127 sec = bcd2bin(sec);
128 BCD_TO_BIN(min); 128 min = bcd2bin(min);
129 BCD_TO_BIN(hour); 129 hour = bcd2bin(hour);
130 BCD_TO_BIN(day); 130 day = bcd2bin(day);
131 BCD_TO_BIN(mon); 131 mon = bcd2bin(mon);
132 BCD_TO_BIN(year); 132 year = bcd2bin(year);
133 } 133 }
134 134
135 if (century) { 135 if (century) {
136 BCD_TO_BIN(century); 136 century = bcd2bin(century);
137 year += century * 100; 137 year += century * 100;
138 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); 138 printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
139 } else 139 } else
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2255782e8d4b..0fa6790c1dd3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -561,7 +561,13 @@ static void __init reserve_standard_io_resources(void)
561 561
562} 562}
563 563
564#ifdef CONFIG_PROC_VMCORE 564/*
565 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
566 * is_kdump_kernel() to determine if we are booting after a panic. Hence
567 * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
568 */
569
570#ifdef CONFIG_CRASH_DUMP
565/* elfcorehdr= specifies the location of elf core header 571/* elfcorehdr= specifies the location of elf core header
566 * stored by the crashed kernel. This option will be passed 572 * stored by the crashed kernel. This option will be passed
567 * by kexec loader to the capture kernel. 573 * by kexec loader to the capture kernel.
@@ -1067,6 +1073,7 @@ void __init setup_arch(char **cmdline_p)
1067#endif 1073#endif
1068 1074
1069 prefill_possible_map(); 1075 prefill_possible_map();
1076
1070#ifdef CONFIG_X86_64 1077#ifdef CONFIG_X86_64
1071 init_cpu_to_node(); 1078 init_cpu_to_node();
1072#endif 1079#endif
@@ -1074,6 +1081,9 @@ void __init setup_arch(char **cmdline_p)
1074 init_apic_mappings(); 1081 init_apic_mappings();
1075 ioapic_init_mappings(); 1082 ioapic_init_mappings();
1076 1083
1084 /* need to wait for io_apic is mapped */
1085 nr_irqs = probe_nr_irqs();
1086
1077 kvm_guest_init(); 1087 kvm_guest_init();
1078 1088
1079 e820_reserve_resources(); 1089 e820_reserve_resources();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0e67f72d9316..410c88f0bfeb 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -140,25 +140,30 @@ static void __init setup_cpu_pda_map(void)
140 */ 140 */
141void __init setup_per_cpu_areas(void) 141void __init setup_per_cpu_areas(void)
142{ 142{
143 ssize_t size = PERCPU_ENOUGH_ROOM; 143 ssize_t size, old_size;
144 char *ptr; 144 char *ptr;
145 int cpu; 145 int cpu;
146 unsigned long align = 1;
146 147
147 /* Setup cpu_pda map */ 148 /* Setup cpu_pda map */
148 setup_cpu_pda_map(); 149 setup_cpu_pda_map();
149 150
150 /* Copy section for each CPU (we discard the original) */ 151 /* Copy section for each CPU (we discard the original) */
151 size = PERCPU_ENOUGH_ROOM; 152 old_size = PERCPU_ENOUGH_ROOM;
153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align);
152 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 155 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
153 size); 156 size);
154 157
155 for_each_possible_cpu(cpu) { 158 for_each_possible_cpu(cpu) {
156#ifndef CONFIG_NEED_MULTIPLE_NODES 159#ifndef CONFIG_NEED_MULTIPLE_NODES
157 ptr = alloc_bootmem_pages(size); 160 ptr = __alloc_bootmem(size, align,
161 __pa(MAX_DMA_ADDRESS));
158#else 162#else
159 int node = early_cpu_to_node(cpu); 163 int node = early_cpu_to_node(cpu);
160 if (!node_online(node) || !NODE_DATA(node)) { 164 if (!node_online(node) || !NODE_DATA(node)) {
161 ptr = alloc_bootmem_pages(size); 165 ptr = __alloc_bootmem(size, align,
166 __pa(MAX_DMA_ADDRESS));
162 printk(KERN_INFO 167 printk(KERN_INFO
163 "cpu %d has no node %d or node-local memory\n", 168 "cpu %d has no node %d or node-local memory\n",
164 cpu, node); 169 cpu, node);
@@ -167,7 +172,8 @@ void __init setup_per_cpu_areas(void)
167 cpu, __pa(ptr)); 172 cpu, __pa(ptr));
168 } 173 }
169 else { 174 else {
170 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
176 __pa(MAX_DMA_ADDRESS));
171 if (ptr) 177 if (ptr)
172 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", 178 printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n",
173 cpu, node, __pa(ptr)); 179 cpu, node, __pa(ptr));
@@ -175,7 +181,6 @@ void __init setup_per_cpu_areas(void)
175#endif 181#endif
176 per_cpu_offset(cpu) = ptr - __per_cpu_start; 182 per_cpu_offset(cpu) = ptr - __per_cpu_start;
177 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 183 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
178
179 } 184 }
180 185
181 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", 186 printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7ed9e070a6e9..7ece815ea637 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid)
543 int timeout; 543 int timeout;
544 u32 status; 544 u32 status;
545 545
546 printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); 546 printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid);
547 547
548 for (i = 0; i < ARRAY_SIZE(regs); i++) { 548 for (i = 0; i < ARRAY_SIZE(regs); i++) {
549 printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); 549 printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]);
550 550
551 /* 551 /*
552 * Wait for idle. 552 * Wait for idle.
@@ -874,7 +874,7 @@ do_rest:
874 start_ip = setup_trampoline(); 874 start_ip = setup_trampoline();
875 875
876 /* So we see what's up */ 876 /* So we see what's up */
877 printk(KERN_INFO "Booting processor %d/%d ip %lx\n", 877 printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n",
878 cpu, apicid, start_ip); 878 cpu, apicid, start_ip);
879 879
880 /* 880 /*
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c
new file mode 100644
index 000000000000..aeef529917e4
--- /dev/null
+++ b/arch/x86/kernel/uv_irq.c
@@ -0,0 +1,79 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV IRQ functions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/module.h>
12#include <linux/irq.h>
13
14#include <asm/apic.h>
15#include <asm/uv/uv_irq.h>
16
17static void uv_noop(unsigned int irq)
18{
19}
20
21static unsigned int uv_noop_ret(unsigned int irq)
22{
23 return 0;
24}
25
26static void uv_ack_apic(unsigned int irq)
27{
28 ack_APIC_irq();
29}
30
31struct irq_chip uv_irq_chip = {
32 .name = "UV-CORE",
33 .startup = uv_noop_ret,
34 .shutdown = uv_noop,
35 .enable = uv_noop,
36 .disable = uv_noop,
37 .ack = uv_noop,
38 .mask = uv_noop,
39 .unmask = uv_noop,
40 .eoi = uv_ack_apic,
41 .end = uv_noop,
42};
43
44/*
45 * Set up a mapping of an available irq and vector, and enable the specified
46 * MMR that defines the MSI that is to be sent to the specified CPU when an
47 * interrupt is raised.
48 */
49int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
50 unsigned long mmr_offset)
51{
52 int irq;
53 int ret;
54
55 irq = create_irq();
56 if (irq <= 0)
57 return -EBUSY;
58
59 ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
60 if (ret != irq)
61 destroy_irq(irq);
62
63 return ret;
64}
65EXPORT_SYMBOL_GPL(uv_setup_irq);
66
67/*
68 * Tear down a mapping of an irq and vector, and disable the specified MMR that
69 * defined the MSI that was to be sent to the specified CPU when an interrupt
70 * was raised.
71 *
72 * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
73 */
74void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
75{
76 arch_disable_uv_irq(mmr_blade, mmr_offset);
77 destroy_irq(irq);
78}
79EXPORT_SYMBOL_GPL(uv_teardown_irq);
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
new file mode 100644
index 000000000000..67f9b9dbf800
--- /dev/null
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -0,0 +1,72 @@
1/*
2 * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
19 * Copyright (c) Russ Anderson
20 */
21
22#include <linux/sysdev.h>
23#include <asm/uv/bios.h>
24
25struct kobject *sgi_uv_kobj;
26
27static ssize_t partition_id_show(struct kobject *kobj,
28 struct kobj_attribute *attr, char *buf)
29{
30 return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id);
31}
32
33static ssize_t coherence_id_show(struct kobject *kobj,
34 struct kobj_attribute *attr, char *buf)
35{
36 return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id());
37}
38
39static struct kobj_attribute partition_id_attr =
40 __ATTR(partition_id, S_IRUGO, partition_id_show, NULL);
41
42static struct kobj_attribute coherence_id_attr =
43 __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL);
44
45
46static int __init sgi_uv_sysfs_init(void)
47{
48 unsigned long ret;
49
50 if (!sgi_uv_kobj)
51 sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
52 if (!sgi_uv_kobj) {
53 printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n");
54 return -EINVAL;
55 }
56
57 ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr);
58 if (ret) {
59 printk(KERN_WARNING "sysfs_create_file partition_id failed \n");
60 return ret;
61 }
62
63 ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr);
64 if (ret) {
65 printk(KERN_WARNING "sysfs_create_file coherence_id failed \n");
66 return ret;
67 }
68
69 return 0;
70}
71
72device_initcall(sgi_uv_sysfs_init);
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 61a97e616f70..0c9667f0752a 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq)
484static unsigned int startup_cobalt_irq(unsigned int irq) 484static unsigned int startup_cobalt_irq(unsigned int irq)
485{ 485{
486 unsigned long flags; 486 unsigned long flags;
487 struct irq_desc *desc = irq_to_desc(irq);
487 488
488 spin_lock_irqsave(&cobalt_lock, flags); 489 spin_lock_irqsave(&cobalt_lock, flags);
489 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) 490 if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
490 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); 491 desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
491 enable_cobalt_irq(irq); 492 enable_cobalt_irq(irq);
492 spin_unlock_irqrestore(&cobalt_lock, flags); 493 spin_unlock_irqrestore(&cobalt_lock, flags);
493 return 0; 494 return 0;
@@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq)
506static void end_cobalt_irq(unsigned int irq) 507static void end_cobalt_irq(unsigned int irq)
507{ 508{
508 unsigned long flags; 509 unsigned long flags;
510 struct irq_desc *desc = irq_to_desc(irq);
509 511
510 spin_lock_irqsave(&cobalt_lock, flags); 512 spin_lock_irqsave(&cobalt_lock, flags);
511 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) 513 if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS)))
512 enable_cobalt_irq(irq); 514 enable_cobalt_irq(irq);
513 spin_unlock_irqrestore(&cobalt_lock, flags); 515 spin_unlock_irqrestore(&cobalt_lock, flags);
514} 516}
@@ -626,12 +628,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
626 628
627 spin_unlock_irqrestore(&i8259A_lock, flags); 629 spin_unlock_irqrestore(&i8259A_lock, flags);
628 630
629 desc = irq_desc + realirq; 631 desc = irq_to_desc(realirq);
630 632
631 /* 633 /*
632 * handle this 'virtual interrupt' as a Cobalt one now. 634 * handle this 'virtual interrupt' as a Cobalt one now.
633 */ 635 */
634 kstat_cpu(smp_processor_id()).irqs[realirq]++; 636 kstat_incr_irqs_this_cpu(realirq, desc);
635 637
636 if (likely(desc->action != NULL)) 638 if (likely(desc->action != NULL))
637 handle_IRQ_event(realirq, desc->action); 639 handle_IRQ_event(realirq, desc->action);
@@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void)
662 int i; 664 int i;
663 665
664 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { 666 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
665 irq_desc[i].status = IRQ_DISABLED; 667 struct irq_desc *desc = irq_to_desc(i);
666 irq_desc[i].action = 0; 668
667 irq_desc[i].depth = 1; 669 desc->status = IRQ_DISABLED;
670 desc->action = 0;
671 desc->depth = 1;
668 672
669 if (i == 0) { 673 if (i == 0) {
670 irq_desc[i].chip = &cobalt_irq_type; 674 desc->chip = &cobalt_irq_type;
671 } 675 }
672 else if (i == CO_IRQ_IDE0) { 676 else if (i == CO_IRQ_IDE0) {
673 irq_desc[i].chip = &cobalt_irq_type; 677 desc->chip = &cobalt_irq_type;
674 } 678 }
675 else if (i == CO_IRQ_IDE1) { 679 else if (i == CO_IRQ_IDE1) {
676 irq_desc[i].chip = &cobalt_irq_type; 680 desc->chip = &cobalt_irq_type;
677 } 681 }
678 else if (i == CO_IRQ_8259) { 682 else if (i == CO_IRQ_8259) {
679 irq_desc[i].chip = &piix4_master_irq_type; 683 desc->chip = &piix4_master_irq_type;
680 } 684 }
681 else if (i < CO_IRQ_APIC0) { 685 else if (i < CO_IRQ_APIC0) {
682 irq_desc[i].chip = &piix4_virtual_irq_type; 686 desc->chip = &piix4_virtual_irq_type;
683 } 687 }
684 else if (IS_CO_APIC(i)) { 688 else if (IS_CO_APIC(i)) {
685 irq_desc[i].chip = &cobalt_irq_type; 689 desc->chip = &cobalt_irq_type;
686 } 690 }
687 } 691 }
688 692
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 6953859fe289..254ee07f8635 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void)
235 235
236void __init vmi_time_init(void) 236void __init vmi_time_init(void)
237{ 237{
238 unsigned int cpu;
238 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ 239 /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
239 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ 240 outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
240 241
241 vmi_time_init_clockevent(); 242 vmi_time_init_clockevent();
242 setup_irq(0, &vmi_clock_action); 243 setup_irq(0, &vmi_clock_action);
244 for_each_possible_cpu(cpu)
245 per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0;
243} 246}
244 247
245#ifdef CONFIG_X86_LOCAL_APIC 248#ifdef CONFIG_X86_LOCAL_APIC