aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-10 13:50:00 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-11 14:17:36 -0400
commitd84705969f898f294bc3fc32eca33580f14105bd (patch)
treec59c7e835820c6bfb5a957f70e58fee1699a6b38
parent725c25819e4a0dafdcf42a5f31bc569341919c7c (diff)
parent11494547b1754c4f3bd7f707ab869e2adf54d52f (diff)
Merge branch 'x86/apic' into x86-v28-for-linus-phase4-B
Conflicts: arch/x86/kernel/apic_32.c arch/x86/kernel/apic_64.c arch/x86/kernel/setup.c drivers/pci/intel-iommu.c include/asm-x86/cpufeature.h include/asm-x86/dma-mapping.h
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--arch/um/sys-x86_64/syscall_table.c4
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/es7000/Makefile (renamed from arch/x86/mach-es7000/Makefile)0
-rw-r--r--arch/x86/es7000/es7000.h (renamed from arch/x86/mach-es7000/es7000.h)18
-rw-r--r--arch/x86/es7000/es7000plat.c (renamed from arch/x86/mach-es7000/es7000plat.c)2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/apic_32.c437
-rw-r--r--arch/x86/kernel/apic_64.c626
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c2
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c70
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c47
-rw-r--r--arch/x86/kernel/io_apic_64.c639
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-generic/Makefile2
-rw-r--r--arch/x86/mach-generic/bigsmp.c9
-rw-r--r--arch/x86/mach-generic/es7000.c13
-rw-r--r--arch/x86/mach-generic/numaq.c12
-rw-r--r--arch/x86/mach-generic/summit.c11
-rw-r--r--arch/x86/pci/acpi.c5
-rw-r--r--arch/x86/xen/enlighten.c45
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c397
-rw-r--r--drivers/pci/intel-iommu.c185
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--include/asm-x86/apic.h65
-rw-r--r--include/asm-x86/apicdef.h3
-rw-r--r--include/asm-x86/arch_hooks.h2
-rw-r--r--include/asm-x86/bigsmp/apic.h (renamed from include/asm-x86/mach-bigsmp/mach_apic.h)10
-rw-r--r--include/asm-x86/bigsmp/apicdef.h13
-rw-r--r--include/asm-x86/bigsmp/ipi.h (renamed from include/asm-x86/mach-bigsmp/mach_ipi.h)6
-rw-r--r--include/asm-x86/cpufeature.h2
-rw-r--r--include/asm-x86/es7000/apic.h (renamed from include/asm-x86/mach-es7000/mach_apic.h)32
-rw-r--r--include/asm-x86/es7000/apicdef.h13
-rw-r--r--include/asm-x86/es7000/ipi.h (renamed from include/asm-x86/mach-es7000/mach_ipi.h)6
-rw-r--r--include/asm-x86/es7000/mpparse.h (renamed from include/asm-x86/mach-es7000/mach_mpparse.h)6
-rw-r--r--include/asm-x86/es7000/wakecpu.h (renamed from include/asm-x86/mach-es7000/mach_wakecpu.h)8
-rw-r--r--include/asm-x86/genapic_64.h8
-rw-r--r--include/asm-x86/hw_irq.h3
-rw-r--r--include/asm-x86/i8259.h3
-rw-r--r--include/asm-x86/io_apic.h20
-rw-r--r--include/asm-x86/ipi.h16
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/asm-x86/mach-bigsmp/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/mach_apicdef.h6
-rw-r--r--include/asm-x86/mach-es7000/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-numaq/mach_mpparse.h7
-rw-r--r--include/asm-x86/mach-summit/mach_apicdef.h13
-rw-r--r--include/asm-x86/mpspec.h3
-rw-r--r--include/asm-x86/msidef.h4
-rw-r--r--include/asm-x86/numaq/apic.h (renamed from include/asm-x86/mach-numaq/mach_apic.h)6
-rw-r--r--include/asm-x86/numaq/apicdef.h (renamed from include/asm-x86/mach-numaq/mach_apicdef.h)6
-rw-r--r--include/asm-x86/numaq/ipi.h (renamed from include/asm-x86/mach-numaq/mach_ipi.h)6
-rw-r--r--include/asm-x86/numaq/mpparse.h7
-rw-r--r--include/asm-x86/numaq/wakecpu.h (renamed from include/asm-x86/mach-numaq/mach_wakecpu.h)6
-rw-r--r--include/asm-x86/paravirt.h19
-rw-r--r--include/asm-x86/setup.h1
-rw-r--r--include/asm-x86/smp.h17
-rw-r--r--include/asm-x86/summit/apic.h (renamed from include/asm-x86/mach-summit/mach_apic.h)24
-rw-r--r--include/asm-x86/summit/apicdef.h13
-rw-r--r--include/asm-x86/summit/ipi.h (renamed from include/asm-x86/mach-summit/mach_ipi.h)6
-rw-r--r--include/asm-x86/summit/irq_vectors_limits.h (renamed from include/asm-x86/mach-summit/irq_vectors_limits.h)6
-rw-r--r--include/asm-x86/summit/mpparse.h (renamed from include/asm-x86/mach-summit/mach_mpparse.h)13
-rw-r--r--include/linux/dmar.h127
-rw-r--r--include/linux/irq.h1
-rw-r--r--kernel/irq/manage.c9
86 files changed, 3635 insertions, 996 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a2701cb6b37..c5d891fd75e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1424,6 +1424,12 @@ and is between 256 and 4096 characters. It is defined in the file
1424 1424
1425 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1425 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1426 1426
1427 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1428
1429 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1430 default x2apic cluster mode on platforms
1431 supporting x2apic.
1432
1427 noltlbs [PPC] Do not use large page/tlb entries for kernel 1433 noltlbs [PPC] Do not use large page/tlb entries for kernel
1428 lowmem mapping on PPC40x. 1434 lowmem mapping on PPC40x.
1429 1435
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c
index c128eb89700..32f5fbe2d0d 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/um/sys-x86_64/syscall_table.c
@@ -41,12 +41,12 @@
41#define stub_rt_sigreturn sys_rt_sigreturn 41#define stub_rt_sigreturn sys_rt_sigreturn
42 42
43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
44#undef _ASM_X86_64_UNISTD_H_ 44#undef ASM_X86__UNISTD_64_H
45#include <asm-x86/unistd_64.h> 45#include <asm-x86/unistd_64.h>
46 46
47#undef __SYSCALL 47#undef __SYSCALL
48#define __SYSCALL(nr, sym) [ nr ] = sym, 48#define __SYSCALL(nr, sym) [ nr ] = sym,
49#undef _ASM_X86_64_UNISTD_H_ 49#undef ASM_X86__UNISTD_64_H
50 50
51typedef void (*sys_call_ptr_t)(void); 51typedef void (*sys_call_ptr_t)(void);
52 52
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0d7cdbbfc1e..44d4f2130d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1689,6 +1689,14 @@ config DMAR_FLOPPY_WA
1689 workaround will setup a 1:1 mapping for the first 1689 workaround will setup a 1:1 mapping for the first
1690 16M to make floppy (an ISA device) work. 1690 16M to make floppy (an ISA device) work.
1691 1691
1692config INTR_REMAP
1693 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1694 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1695 help
1696 Supports Interrupt remapping for IO-APIC and MSI devices.
1697 To use x2apic mode in the CPU's which support x2APIC enhancements or
1698 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1699
1692source "drivers/pci/pcie/Kconfig" 1700source "drivers/pci/pcie/Kconfig"
1693 1701
1694source "drivers/pci/Kconfig" 1702source "drivers/pci/Kconfig"
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/es7000/Makefile
index 3ef8b43b62f..3ef8b43b62f 100644
--- a/arch/x86/mach-es7000/Makefile
+++ b/arch/x86/es7000/Makefile
diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/es7000/es7000.h
index c8d5aa132fa..4e62f6fa95b 100644
--- a/arch/x86/mach-es7000/es7000.h
+++ b/arch/x86/es7000/es7000.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * Written by: Garry Forsgren, Unisys Corporation 2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation 3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface 4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager. 5 * with Unisys ES7000 series hardware system manager.
6 * 6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved. 7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
@@ -18,7 +18,7 @@
18 * with this program; if not, write the Free Software Foundation, Inc., 59 18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA. 19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 * 20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting 21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: 22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 * 23 *
24 * http://www.unisys.com 24 * http://www.unisys.com
@@ -41,7 +41,7 @@
41#define MIP_VALID 0x0100000000000000ULL 41#define MIP_VALID 0x0100000000000000ULL
42#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff) 42#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
43 43
44#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff) 44#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
45 45
46struct mip_reg_info { 46struct mip_reg_info {
47 unsigned long long mip_info; 47 unsigned long long mip_info;
@@ -51,11 +51,11 @@ struct mip_reg_info {
51}; 51};
52 52
53struct part_info { 53struct part_info {
54 unsigned char type; 54 unsigned char type;
55 unsigned char length; 55 unsigned char length;
56 unsigned char part_id; 56 unsigned char part_id;
57 unsigned char apic_mode; 57 unsigned char apic_mode;
58 unsigned long snum; 58 unsigned long snum;
59 char ptype[16]; 59 char ptype[16];
60 char sname[64]; 60 char sname[64];
61 char pname[64]; 61 char pname[64];
@@ -68,11 +68,11 @@ struct psai {
68}; 68};
69 69
70struct es7000_mem_info { 70struct es7000_mem_info {
71 unsigned char type; 71 unsigned char type;
72 unsigned char length; 72 unsigned char length;
73 unsigned char resv[6]; 73 unsigned char resv[6];
74 unsigned long long start; 74 unsigned long long start;
75 unsigned long long size; 75 unsigned long long size;
76}; 76};
77 77
78struct es7000_oem_table { 78struct es7000_oem_table {
@@ -106,7 +106,7 @@ struct mip_reg {
106}; 106};
107 107
108#define MIP_SW_APIC 0x1020b 108#define MIP_SW_APIC 0x1020b
109#define MIP_FUNC(VALUE) (VALUE & 0xff) 109#define MIP_FUNC(VALUE) (VALUE & 0xff)
110 110
111extern int parse_unisys_oem (char *oemptr); 111extern int parse_unisys_oem (char *oemptr);
112extern void setup_unisys(void); 112extern void setup_unisys(void);
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/es7000/es7000plat.c
index 50189af14b8..7789fde13c3 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/es7000/es7000plat.c
@@ -72,7 +72,7 @@ es7000_rename_gsi(int ioapic, int gsi)
72 base += nr_ioapic_registers[i]; 72 base += nr_ioapic_registers[i];
73 } 73 }
74 74
75 if (!ioapic && (gsi < 16)) 75 if (!ioapic && (gsi < 16))
76 gsi += base; 76 gsi += base;
77 return gsi; 77 return gsi;
78} 78}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec..a07ec14f331 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 104ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 106 obj-y += bios_uv.o
107 obj-y += genx2apic_cluster.o
108 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 109 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 110 obj-$(CONFIG_AUDIT) += audit_64.o
109 111
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7d40ef7b36e..c2ac1b4515a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -252,10 +252,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
252 return; 252 return;
253 } 253 }
254 254
255#ifdef CONFIG_X86_32
256 if (boot_cpu_physical_apicid != -1U) 255 if (boot_cpu_physical_apicid != -1U)
257 ver = apic_version[boot_cpu_physical_apicid]; 256 ver = apic_version[boot_cpu_physical_apicid];
258#endif
259 257
260 generic_processor_info(id, ver); 258 generic_processor_info(id, ver);
261} 259}
@@ -774,11 +772,9 @@ static void __init acpi_register_lapic_address(unsigned long address)
774 772
775 set_fixmap_nocache(FIX_APIC_BASE, address); 773 set_fixmap_nocache(FIX_APIC_BASE, address);
776 if (boot_cpu_physical_apicid == -1U) { 774 if (boot_cpu_physical_apicid == -1U) {
777 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 775 boot_cpu_physical_apicid = read_apic_id();
778#ifdef CONFIG_X86_32
779 apic_version[boot_cpu_physical_apicid] = 776 apic_version[boot_cpu_physical_apicid] =
780 GET_APIC_VERSION(apic_read(APIC_LVR)); 777 GET_APIC_VERSION(apic_read(APIC_LVR));
781#endif
782 } 778 }
783} 779}
784 780
@@ -1350,7 +1346,9 @@ static void __init acpi_process_madt(void)
1350 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1351 1347
1352 smp_found_config = 1; 1348 smp_found_config = 1;
1349#ifdef CONFIG_X86_32
1353 setup_apic_routing(); 1350 setup_apic_routing();
1351#endif
1354 } 1352 }
1355 } 1353 }
1356 if (error == -EINVAL) { 1354 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b..a91c57cb666 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -60,10 +60,8 @@ unsigned long mp_lapic_addr;
60static int force_enable_local_apic; 60static int force_enable_local_apic;
61int disable_apic; 61int disable_apic;
62 62
63/* Local APIC timer verification ok */
64static int local_apic_timer_verify_ok;
65/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 63/* Disable local APIC timer from the kernel commandline or via dmi quirk */
66static int local_apic_timer_disabled; 64static int disable_apic_timer __cpuinitdata;
67/* Local APIC timer works in C2 */ 65/* Local APIC timer works in C2 */
68int local_apic_timer_c2_ok; 66int local_apic_timer_c2_ok;
69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 67EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -130,7 +128,11 @@ static inline int lapic_get_version(void)
130 */ 128 */
131static inline int lapic_is_integrated(void) 129static inline int lapic_is_integrated(void)
132{ 130{
131#ifdef CONFIG_X86_64
132 return 1;
133#else
133 return APIC_INTEGRATED(lapic_get_version()); 134 return APIC_INTEGRATED(lapic_get_version());
135#endif
134} 136}
135 137
136/* 138/*
@@ -145,13 +147,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 147 return lapic_get_version() >= 0x14;
146} 148}
147 149
148void apic_wait_icr_idle(void) 150/*
151 * Paravirt kernels also might be using these below ops. So we still
152 * use generic apic_read()/apic_write(), which might be pointing to different
153 * ops in PARAVIRT case.
154 */
155void xapic_wait_icr_idle(void)
149{ 156{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 157 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 158 cpu_relax();
152} 159}
153 160
154u32 safe_apic_wait_icr_idle(void) 161u32 safe_xapic_wait_icr_idle(void)
155{ 162{
156 u32 send_status; 163 u32 send_status;
157 int timeout; 164 int timeout;
@@ -167,16 +174,48 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 174 return send_status;
168} 175}
169 176
177void xapic_icr_write(u32 low, u32 id)
178{
179 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
180 apic_write(APIC_ICR, low);
181}
182
183u64 xapic_icr_read(void)
184{
185 u32 icr1, icr2;
186
187 icr2 = apic_read(APIC_ICR2);
188 icr1 = apic_read(APIC_ICR);
189
190 return icr1 | ((u64)icr2 << 32);
191}
192
193static struct apic_ops xapic_ops = {
194 .read = native_apic_mem_read,
195 .write = native_apic_mem_write,
196 .icr_read = xapic_icr_read,
197 .icr_write = xapic_icr_write,
198 .wait_icr_idle = xapic_wait_icr_idle,
199 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
200};
201
202struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops);
204
170/** 205/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 207 */
173void __cpuinit enable_NMI_through_LVT0(void) 208void __cpuinit enable_NMI_through_LVT0(void)
174{ 209{
175 unsigned int v = APIC_DM_NMI; 210 unsigned int v;
176 211
177 /* Level triggered for 82489DX */ 212 /* unmask and set to NMI */
213 v = APIC_DM_NMI;
214
215 /* Level triggered for 82489DX (32bit mode) */
178 if (!lapic_is_integrated()) 216 if (!lapic_is_integrated())
179 v |= APIC_LVT_LEVEL_TRIGGER; 217 v |= APIC_LVT_LEVEL_TRIGGER;
218
180 apic_write(APIC_LVT0, v); 219 apic_write(APIC_LVT0, v);
181} 220}
182 221
@@ -193,9 +232,13 @@ int get_physical_broadcast(void)
193 */ 232 */
194int lapic_get_maxlvt(void) 233int lapic_get_maxlvt(void)
195{ 234{
196 unsigned int v = apic_read(APIC_LVR); 235 unsigned int v;
197 236
198 /* 82489DXs do not report # of LVT entries. */ 237 v = apic_read(APIC_LVR);
238 /*
239 * - we always have APIC integrated on 64bit mode
240 * - 82489DXs do not report # of LVT entries
241 */
199 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; 242 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
200} 243}
201 244
@@ -203,8 +246,12 @@ int lapic_get_maxlvt(void)
203 * Local APIC timer 246 * Local APIC timer
204 */ 247 */
205 248
206/* Clock divisor is set to 16 */ 249/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
207#define APIC_DIVISOR 16 253#define APIC_DIVISOR 16
254#endif
208 255
209/* 256/*
210 * This function sets up the local APIC timer, with a timeout of 257 * This function sets up the local APIC timer, with a timeout of
@@ -212,6 +259,9 @@ int lapic_get_maxlvt(void)
212 * this function twice on the boot CPU, once with a bogus timeout 259 * this function twice on the boot CPU, once with a bogus timeout
213 * value, second time for real. The other (noncalibrating) CPUs 260 * value, second time for real. The other (noncalibrating) CPUs
214 * call this function only once, with the real, calibrated value. 261 * call this function only once, with the real, calibrated value.
262 *
263 * We do reads before writes even if unnecessary, to get around the
264 * P5 APIC double write bug.
215 */ 265 */
216static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 266static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
217{ 267{
@@ -233,14 +283,44 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
233 */ 283 */
234 tmp_value = apic_read(APIC_TDCR); 284 tmp_value = apic_read(APIC_TDCR);
235 apic_write(APIC_TDCR, 285 apic_write(APIC_TDCR,
236 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 286 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
237 APIC_TDR_DIV_16); 287 APIC_TDR_DIV_16);
238 288
239 if (!oneshot) 289 if (!oneshot)
240 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 290 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
241} 291}
242 292
243/* 293/*
294 * Setup extended LVT, AMD specific (K8, family 10h)
295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 */
299
300#define APIC_EILVT_LVTOFF_MCE 0
301#define APIC_EILVT_LVTOFF_IBS 1
302
303static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
304{
305 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
306 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
307
308 apic_write(reg, v);
309}
310
311u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
312{
313 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
314 return APIC_EILVT_LVTOFF_MCE;
315}
316
317u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
318{
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS;
321}
322
323/*
244 * Program the next event, relative to now 324 * Program the next event, relative to now
245 */ 325 */
246static int lapic_next_event(unsigned long delta, 326static int lapic_next_event(unsigned long delta,
@@ -259,8 +339,8 @@ static void lapic_timer_setup(enum clock_event_mode mode,
259 unsigned long flags; 339 unsigned long flags;
260 unsigned int v; 340 unsigned int v;
261 341
262 /* Lapic used for broadcast ? */ 342 /* Lapic used as dummy for broadcast ? */
263 if (!local_apic_timer_verify_ok) 343 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
264 return; 344 return;
265 345
266 local_irq_save(flags); 346 local_irq_save(flags);
@@ -473,7 +553,7 @@ static int __init calibrate_APIC_clock(void)
473 return -1; 553 return -1;
474 } 554 }
475 555
476 local_apic_timer_verify_ok = 1; 556 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
477 557
478 /* We trust the pm timer based calibration */ 558 /* We trust the pm timer based calibration */
479 if (!pm_referenced) { 559 if (!pm_referenced) {
@@ -507,11 +587,11 @@ static int __init calibrate_APIC_clock(void)
507 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 587 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
508 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 588 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
509 else 589 else
510 local_apic_timer_verify_ok = 0; 590 levt->features |= CLOCK_EVT_FEAT_DUMMY;
511 } else 591 } else
512 local_irq_enable(); 592 local_irq_enable();
513 593
514 if (!local_apic_timer_verify_ok) { 594 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
515 printk(KERN_WARNING 595 printk(KERN_WARNING
516 "APIC timer disabled due to verification failure.\n"); 596 "APIC timer disabled due to verification failure.\n");
517 return -1; 597 return -1;
@@ -533,7 +613,8 @@ void __init setup_boot_APIC_clock(void)
533 * timer as a dummy clock event source on SMP systems, so the 613 * timer as a dummy clock event source on SMP systems, so the
534 * broadcast mechanism is used. On UP systems simply ignore it. 614 * broadcast mechanism is used. On UP systems simply ignore it.
535 */ 615 */
536 if (local_apic_timer_disabled) { 616 if (disable_apic_timer) {
617 printk(KERN_INFO "Disabling APIC timer\n");
537 /* No broadcast on UP ! */ 618 /* No broadcast on UP ! */
538 if (num_possible_cpus() > 1) { 619 if (num_possible_cpus() > 1) {
539 lapic_clockevent.mult = 1; 620 lapic_clockevent.mult = 1;
@@ -602,7 +683,11 @@ static void local_apic_timer_interrupt(void)
602 /* 683 /*
603 * the NMI deadlock-detector uses this. 684 * the NMI deadlock-detector uses this.
604 */ 685 */
686#ifdef CONFIG_X86_64
687 add_pda(apic_timer_irqs, 1);
688#else
605 per_cpu(irq_stat, cpu).apic_timer_irqs++; 689 per_cpu(irq_stat, cpu).apic_timer_irqs++;
690#endif
606 691
607 evt->event_handler(evt); 692 evt->event_handler(evt);
608} 693}
@@ -642,35 +727,6 @@ int setup_profiling_timer(unsigned int multiplier)
642} 727}
643 728
644/* 729/*
645 * Setup extended LVT, AMD specific (K8, family 10h)
646 *
647 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
648 * MCE interrupts are supported. Thus MCE offset must be set to 0.
649 */
650
651#define APIC_EILVT_LVTOFF_MCE 0
652#define APIC_EILVT_LVTOFF_IBS 1
653
654static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
655{
656 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
657 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
658 apic_write(reg, v);
659}
660
661u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
662{
663 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
664 return APIC_EILVT_LVTOFF_MCE;
665}
666
667u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
668{
669 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
670 return APIC_EILVT_LVTOFF_IBS;
671}
672
673/*
674 * Local APIC start and shutdown 730 * Local APIC start and shutdown
675 */ 731 */
676 732
@@ -715,7 +771,7 @@ void clear_local_APIC(void)
715 } 771 }
716 772
717 /* lets not touch this if we didn't frob it */ 773 /* lets not touch this if we didn't frob it */
718#ifdef CONFIG_X86_MCE_P4THERMAL 774#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
719 if (maxlvt >= 5) { 775 if (maxlvt >= 5) {
720 v = apic_read(APIC_LVTTHMR); 776 v = apic_read(APIC_LVTTHMR);
721 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 777 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -732,10 +788,6 @@ void clear_local_APIC(void)
732 if (maxlvt >= 4) 788 if (maxlvt >= 4)
733 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 789 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
734 790
735#ifdef CONFIG_X86_MCE_P4THERMAL
736 if (maxlvt >= 5)
737 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
738#endif
739 /* Integrated APIC (!82489DX) ? */ 791 /* Integrated APIC (!82489DX) ? */
740 if (lapic_is_integrated()) { 792 if (lapic_is_integrated()) {
741 if (maxlvt > 3) 793 if (maxlvt > 3)
@@ -750,7 +802,7 @@ void clear_local_APIC(void)
750 */ 802 */
751void disable_local_APIC(void) 803void disable_local_APIC(void)
752{ 804{
753 unsigned long value; 805 unsigned int value;
754 806
755 clear_local_APIC(); 807 clear_local_APIC();
756 808
@@ -762,6 +814,7 @@ void disable_local_APIC(void)
762 value &= ~APIC_SPIV_APIC_ENABLED; 814 value &= ~APIC_SPIV_APIC_ENABLED;
763 apic_write(APIC_SPIV, value); 815 apic_write(APIC_SPIV, value);
764 816
817#ifdef CONFIG_X86_32
765 /* 818 /*
766 * When LAPIC was disabled by the BIOS and enabled by the kernel, 819 * When LAPIC was disabled by the BIOS and enabled by the kernel,
767 * restore the disabled state. 820 * restore the disabled state.
@@ -773,6 +826,7 @@ void disable_local_APIC(void)
773 l &= ~MSR_IA32_APICBASE_ENABLE; 826 l &= ~MSR_IA32_APICBASE_ENABLE;
774 wrmsr(MSR_IA32_APICBASE, l, h); 827 wrmsr(MSR_IA32_APICBASE, l, h);
775 } 828 }
829#endif
776} 830}
777 831
778/* 832/*
@@ -789,11 +843,15 @@ void lapic_shutdown(void)
789 return; 843 return;
790 844
791 local_irq_save(flags); 845 local_irq_save(flags);
792 clear_local_APIC();
793 846
794 if (enabled_via_apicbase) 847#ifdef CONFIG_X86_32
848 if (!enabled_via_apicbase)
849 clear_local_APIC();
850 else
851#endif
795 disable_local_APIC(); 852 disable_local_APIC();
796 853
854
797 local_irq_restore(flags); 855 local_irq_restore(flags);
798} 856}
799 857
@@ -838,6 +896,12 @@ int __init verify_local_APIC(void)
838 */ 896 */
839 reg0 = apic_read(APIC_ID); 897 reg0 = apic_read(APIC_ID);
840 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 898 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
899 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
900 reg1 = apic_read(APIC_ID);
901 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
902 apic_write(APIC_ID, reg0);
903 if (reg1 != (reg0 ^ APIC_ID_MASK))
904 return 0;
841 905
842 /* 906 /*
843 * The next two are just to see if we have sane values. 907 * The next two are just to see if we have sane values.
@@ -863,14 +927,15 @@ void __init sync_Arb_IDs(void)
863 */ 927 */
864 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 928 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
865 return; 929 return;
930
866 /* 931 /*
867 * Wait for idle. 932 * Wait for idle.
868 */ 933 */
869 apic_wait_icr_idle(); 934 apic_wait_icr_idle();
870 935
871 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 936 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
872 apic_write(APIC_ICR, 937 apic_write(APIC_ICR, APIC_DEST_ALLINC |
873 APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); 938 APIC_INT_LEVELTRIG | APIC_DM_INIT);
874} 939}
875 940
876/* 941/*
@@ -878,7 +943,7 @@ void __init sync_Arb_IDs(void)
878 */ 943 */
879void __init init_bsp_APIC(void) 944void __init init_bsp_APIC(void)
880{ 945{
881 unsigned long value; 946 unsigned int value;
882 947
883 /* 948 /*
884 * Don't do the setup now if we have a SMP BIOS as the 949 * Don't do the setup now if we have a SMP BIOS as the
@@ -899,11 +964,13 @@ void __init init_bsp_APIC(void)
899 value &= ~APIC_VECTOR_MASK; 964 value &= ~APIC_VECTOR_MASK;
900 value |= APIC_SPIV_APIC_ENABLED; 965 value |= APIC_SPIV_APIC_ENABLED;
901 966
967#ifdef CONFIG_X86_32
902 /* This bit is reserved on P4/Xeon and should be cleared */ 968 /* This bit is reserved on P4/Xeon and should be cleared */
903 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 969 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
904 (boot_cpu_data.x86 == 15)) 970 (boot_cpu_data.x86 == 15))
905 value &= ~APIC_SPIV_FOCUS_DISABLED; 971 value &= ~APIC_SPIV_FOCUS_DISABLED;
906 else 972 else
973#endif
907 value |= APIC_SPIV_FOCUS_DISABLED; 974 value |= APIC_SPIV_FOCUS_DISABLED;
908 value |= SPURIOUS_APIC_VECTOR; 975 value |= SPURIOUS_APIC_VECTOR;
909 apic_write(APIC_SPIV, value); 976 apic_write(APIC_SPIV, value);
@@ -922,6 +989,16 @@ static void __cpuinit lapic_setup_esr(void)
922{ 989{
923 unsigned long oldvalue, value, maxlvt; 990 unsigned long oldvalue, value, maxlvt;
924 if (lapic_is_integrated() && !esr_disable) { 991 if (lapic_is_integrated() && !esr_disable) {
992 if (esr_disable) {
993 /*
994 * Something untraceable is creating bad interrupts on
995 * secondary quads ... for the moment, just leave the
996 * ESR disabled - we can't do anything useful with the
997 * errors anyway - mbligh
998 */
999 printk(KERN_INFO "Leaving ESR disabled.\n");
1000 return;
1001 }
925 /* !82489DX */ 1002 /* !82489DX */
926 maxlvt = lapic_get_maxlvt(); 1003 maxlvt = lapic_get_maxlvt();
927 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1004 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
@@ -942,16 +1019,7 @@ static void __cpuinit lapic_setup_esr(void)
942 "vector: 0x%08lx after: 0x%08lx\n", 1019 "vector: 0x%08lx after: 0x%08lx\n",
943 oldvalue, value); 1020 oldvalue, value);
944 } else { 1021 } else {
945 if (esr_disable) 1022 printk(KERN_INFO "No ESR for 82489DX.\n");
946 /*
947 * Something untraceable is creating bad interrupts on
948 * secondary quads ... for the moment, just leave the
949 * ESR disabled - we can't do anything useful with the
950 * errors anyway - mbligh
951 */
952 printk(KERN_INFO "Leaving ESR disabled.\n");
953 else
954 printk(KERN_INFO "No ESR for 82489DX.\n");
955 } 1023 }
956} 1024}
957 1025
@@ -1089,13 +1157,17 @@ void __cpuinit setup_local_APIC(void)
1089 1157
1090void __cpuinit end_local_APIC_setup(void) 1158void __cpuinit end_local_APIC_setup(void)
1091{ 1159{
1092 unsigned long value;
1093
1094 lapic_setup_esr(); 1160 lapic_setup_esr();
1095 /* Disable the local apic timer */ 1161
1096 value = apic_read(APIC_LVTT); 1162#ifdef CONFIG_X86_32
1097 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1163 {
1098 apic_write(APIC_LVTT, value); 1164 unsigned int value;
1165 /* Disable the local apic timer */
1166 value = apic_read(APIC_LVTT);
1167 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1168 apic_write(APIC_LVTT, value);
1169 }
1170#endif
1099 1171
1100 setup_apic_nmi_watchdog(NULL); 1172 setup_apic_nmi_watchdog(NULL);
1101 apic_pm_activate(); 1173 apic_pm_activate();
@@ -1205,7 +1277,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1277 * default configuration (or the MP table is broken).
1206 */ 1278 */
1207 if (boot_cpu_physical_apicid == -1U) 1279 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1280 boot_cpu_physical_apicid = read_apic_id();
1209 1281
1210} 1282}
1211 1283
@@ -1242,7 +1314,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1314 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1315 */
1244#ifdef CONFIG_CRASH_DUMP 1316#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1317 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1318#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1319 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1320
@@ -1321,59 +1393,12 @@ void smp_error_interrupt(struct pt_regs *regs)
1321 irq_exit(); 1393 irq_exit();
1322} 1394}
1323 1395
1324#ifdef CONFIG_SMP
1325void __init smp_intr_init(void)
1326{
1327 /*
1328 * IRQ0 must be given a fixed assignment and initialized,
1329 * because it's used before the IO-APIC is set up.
1330 */
1331 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1332
1333 /*
1334 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1335 * IPI, driven by wakeup.
1336 */
1337 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1338
1339 /* IPI for invalidation */
1340 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1341
1342 /* IPI for generic function call */
1343 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1344
1345 /* IPI for single call function */
1346 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1347 call_function_single_interrupt);
1348}
1349#endif
1350
1351/*
1352 * Initialize APIC interrupts
1353 */
1354void __init apic_intr_init(void)
1355{
1356#ifdef CONFIG_SMP
1357 smp_intr_init();
1358#endif
1359 /* self generated IPI for local APIC timer */
1360 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1361
1362 /* IPI vectors for APIC spurious and error interrupts */
1363 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1364 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1365
1366 /* thermal monitor LVT interrupt */
1367#ifdef CONFIG_X86_MCE_P4THERMAL
1368 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1369#endif
1370}
1371
1372/** 1396/**
1373 * connect_bsp_APIC - attach the APIC to the interrupt system 1397 * connect_bsp_APIC - attach the APIC to the interrupt system
1374 */ 1398 */
1375void __init connect_bsp_APIC(void) 1399void __init connect_bsp_APIC(void)
1376{ 1400{
1401#ifdef CONFIG_X86_32
1377 if (pic_mode) { 1402 if (pic_mode) {
1378 /* 1403 /*
1379 * Do not trust the local APIC being empty at bootup. 1404 * Do not trust the local APIC being empty at bootup.
@@ -1388,6 +1413,7 @@ void __init connect_bsp_APIC(void)
1388 outb(0x70, 0x22); 1413 outb(0x70, 0x22);
1389 outb(0x01, 0x23); 1414 outb(0x01, 0x23);
1390 } 1415 }
1416#endif
1391 enable_apic_mode(); 1417 enable_apic_mode();
1392} 1418}
1393 1419
@@ -1400,6 +1426,9 @@ void __init connect_bsp_APIC(void)
1400 */ 1426 */
1401void disconnect_bsp_APIC(int virt_wire_setup) 1427void disconnect_bsp_APIC(int virt_wire_setup)
1402{ 1428{
1429 unsigned int value;
1430
1431#ifdef CONFIG_X86_32
1403 if (pic_mode) { 1432 if (pic_mode) {
1404 /* 1433 /*
1405 * Put the board back into PIC mode (has an effect only on 1434 * Put the board back into PIC mode (has an effect only on
@@ -1411,54 +1440,53 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1411 "entering PIC mode.\n"); 1440 "entering PIC mode.\n");
1412 outb(0x70, 0x22); 1441 outb(0x70, 0x22);
1413 outb(0x00, 0x23); 1442 outb(0x00, 0x23);
1414 } else { 1443 return;
1415 /* Go back to Virtual Wire compatibility mode */ 1444 }
1416 unsigned long value; 1445#endif
1417 1446
1418 /* For the spurious interrupt use vector F, and enable it */ 1447 /* Go back to Virtual Wire compatibility mode */
1419 value = apic_read(APIC_SPIV);
1420 value &= ~APIC_VECTOR_MASK;
1421 value |= APIC_SPIV_APIC_ENABLED;
1422 value |= 0xf;
1423 apic_write(APIC_SPIV, value);
1424 1448
1425 if (!virt_wire_setup) { 1449 /* For the spurious interrupt use vector F, and enable it */
1426 /* 1450 value = apic_read(APIC_SPIV);
1427 * For LVT0 make it edge triggered, active high, 1451 value &= ~APIC_VECTOR_MASK;
1428 * external and enabled 1452 value |= APIC_SPIV_APIC_ENABLED;
1429 */ 1453 value |= 0xf;
1430 value = apic_read(APIC_LVT0); 1454 apic_write(APIC_SPIV, value);
1431 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1432 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1433 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1434 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1435 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1436 apic_write(APIC_LVT0, value);
1437 } else {
1438 /* Disable LVT0 */
1439 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1440 }
1441 1455
1456 if (!virt_wire_setup) {
1442 /* 1457 /*
1443 * For LVT1 make it edge triggered, active high, nmi and 1458 * For LVT0 make it edge triggered, active high,
1444 * enabled 1459 * external and enabled
1445 */ 1460 */
1446 value = apic_read(APIC_LVT1); 1461 value = apic_read(APIC_LVT0);
1447 value &= ~( 1462 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1448 APIC_MODE_MASK | APIC_SEND_PENDING |
1449 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1463 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1450 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 1464 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1451 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 1465 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1452 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 1466 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1453 apic_write(APIC_LVT1, value); 1467 apic_write(APIC_LVT0, value);
1468 } else {
1469 /* Disable LVT0 */
1470 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1454 } 1471 }
1472
1473 /*
1474 * For LVT1 make it edge triggered, active high,
1475 * nmi and enabled
1476 */
1477 value = apic_read(APIC_LVT1);
1478 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1479 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1480 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1481 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1482 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1483 apic_write(APIC_LVT1, value);
1455} 1484}
1456 1485
1457void __cpuinit generic_processor_info(int apicid, int version) 1486void __cpuinit generic_processor_info(int apicid, int version)
1458{ 1487{
1459 int cpu; 1488 int cpu;
1460 cpumask_t tmp_map; 1489 cpumask_t tmp_map;
1461 physid_mask_t phys_cpu;
1462 1490
1463 /* 1491 /*
1464 * Validate version 1492 * Validate version
@@ -1471,9 +1499,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1471 } 1499 }
1472 apic_version[apicid] = version; 1500 apic_version[apicid] = version;
1473 1501
1474 phys_cpu = apicid_to_cpu_present(apicid);
1475 physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
1476
1477 if (num_processors >= NR_CPUS) { 1502 if (num_processors >= NR_CPUS) {
1478 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1503 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1479 " Processor ignored.\n", NR_CPUS); 1504 " Processor ignored.\n", NR_CPUS);
@@ -1484,17 +1509,19 @@ void __cpuinit generic_processor_info(int apicid, int version)
1484 cpus_complement(tmp_map, cpu_present_map); 1509 cpus_complement(tmp_map, cpu_present_map);
1485 cpu = first_cpu(tmp_map); 1510 cpu = first_cpu(tmp_map);
1486 1511
1487 if (apicid == boot_cpu_physical_apicid) 1512 physid_set(apicid, phys_cpu_present_map);
1513 if (apicid == boot_cpu_physical_apicid) {
1488 /* 1514 /*
1489 * x86_bios_cpu_apicid is required to have processors listed 1515 * x86_bios_cpu_apicid is required to have processors listed
1490 * in same order as logical cpu numbers. Hence the first 1516 * in same order as logical cpu numbers. Hence the first
1491 * entry is BSP, and so on. 1517 * entry is BSP, and so on.
1492 */ 1518 */
1493 cpu = 0; 1519 cpu = 0;
1494 1520 }
1495 if (apicid > max_physical_apicid) 1521 if (apicid > max_physical_apicid)
1496 max_physical_apicid = apicid; 1522 max_physical_apicid = apicid;
1497 1523
1524#ifdef CONFIG_X86_32
1498 /* 1525 /*
1499 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1526 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1500 * but we need to work other dependencies like SMP_SUSPEND etc 1527 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1514,7 +1541,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1514 def_to_bigsmp = 1; 1541 def_to_bigsmp = 1;
1515 } 1542 }
1516 } 1543 }
1517#ifdef CONFIG_SMP 1544#endif
1545
1546#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1518 /* are we being called early in kernel startup? */ 1547 /* are we being called early in kernel startup? */
1519 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1548 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1520 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1549 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1527,6 +1556,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1527 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1556 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1528 } 1557 }
1529#endif 1558#endif
1559
1530 cpu_set(cpu, cpu_possible_map); 1560 cpu_set(cpu, cpu_possible_map);
1531 cpu_set(cpu, cpu_present_map); 1561 cpu_set(cpu, cpu_present_map);
1532} 1562}
@@ -1537,6 +1567,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1537#ifdef CONFIG_PM 1567#ifdef CONFIG_PM
1538 1568
1539static struct { 1569static struct {
1570 /*
1571 * 'active' is true if the local APIC was enabled by us and
1572 * not the BIOS; this signifies that we are also responsible
1573 * for disabling it before entering apm/acpi suspend
1574 */
1540 int active; 1575 int active;
1541 /* r/w apic fields */ 1576 /* r/w apic fields */
1542 unsigned int apic_id; 1577 unsigned int apic_id;
@@ -1577,7 +1612,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1577 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1612 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1578 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1613 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1579 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1614 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1580#ifdef CONFIG_X86_MCE_P4THERMAL 1615#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1581 if (maxlvt >= 5) 1616 if (maxlvt >= 5)
1582 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1617 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1583#endif 1618#endif
@@ -1601,16 +1636,23 @@ static int lapic_resume(struct sys_device *dev)
1601 1636
1602 local_irq_save(flags); 1637 local_irq_save(flags);
1603 1638
1604 /* 1639#ifdef CONFIG_X86_64
1605 * Make sure the APICBASE points to the right address 1640 if (x2apic)
1606 * 1641 enable_x2apic();
1607 * FIXME! This will be wrong if we ever support suspend on 1642 else
1608 * SMP! We'll need to do this as part of the CPU restore! 1643#endif
1609 */ 1644 {
1610 rdmsr(MSR_IA32_APICBASE, l, h); 1645 /*
1611 l &= ~MSR_IA32_APICBASE_BASE; 1646 * Make sure the APICBASE points to the right address
1612 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1647 *
1613 wrmsr(MSR_IA32_APICBASE, l, h); 1648 * FIXME! This will be wrong if we ever support suspend on
1649 * SMP! We'll need to do this as part of the CPU restore!
1650 */
1651 rdmsr(MSR_IA32_APICBASE, l, h);
1652 l &= ~MSR_IA32_APICBASE_BASE;
1653 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1654 wrmsr(MSR_IA32_APICBASE, l, h);
1655 }
1614 1656
1615 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1657 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1616 apic_write(APIC_ID, apic_pm_state.apic_id); 1658 apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -1620,7 +1662,7 @@ static int lapic_resume(struct sys_device *dev)
1620 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1662 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1621 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1663 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1622 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1664 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1623#ifdef CONFIG_X86_MCE_P4THERMAL 1665#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1624 if (maxlvt >= 5) 1666 if (maxlvt >= 5)
1625 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1667 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1626#endif 1668#endif
@@ -1634,7 +1676,9 @@ static int lapic_resume(struct sys_device *dev)
1634 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1676 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1635 apic_write(APIC_ESR, 0); 1677 apic_write(APIC_ESR, 0);
1636 apic_read(APIC_ESR); 1678 apic_read(APIC_ESR);
1679
1637 local_irq_restore(flags); 1680 local_irq_restore(flags);
1681
1638 return 0; 1682 return 0;
1639} 1683}
1640 1684
@@ -1690,20 +1734,20 @@ static int __init parse_lapic(char *arg)
1690} 1734}
1691early_param("lapic", parse_lapic); 1735early_param("lapic", parse_lapic);
1692 1736
1693static int __init parse_nolapic(char *arg) 1737static int __init setup_disableapic(char *arg)
1694{ 1738{
1695 disable_apic = 1; 1739 disable_apic = 1;
1696 setup_clear_cpu_cap(X86_FEATURE_APIC); 1740 setup_clear_cpu_cap(X86_FEATURE_APIC);
1697 return 0; 1741 return 0;
1698} 1742}
1699early_param("nolapic", parse_nolapic); 1743early_param("disableapic", setup_disableapic);
1700 1744
1701static int __init parse_disable_lapic_timer(char *arg) 1745/* same as disableapic, for compatibility */
1746static int __init setup_nolapic(char *arg)
1702{ 1747{
1703 local_apic_timer_disabled = 1; 1748 return setup_disableapic(arg);
1704 return 0;
1705} 1749}
1706early_param("nolapic_timer", parse_disable_lapic_timer); 1750early_param("nolapic", setup_nolapic);
1707 1751
1708static int __init parse_lapic_timer_c2_ok(char *arg) 1752static int __init parse_lapic_timer_c2_ok(char *arg)
1709{ 1753{
@@ -1712,15 +1756,40 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1712} 1756}
1713early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1757early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1714 1758
1759static int __init parse_disable_apic_timer(char *arg)
1760{
1761 disable_apic_timer = 1;
1762 return 0;
1763}
1764early_param("noapictimer", parse_disable_apic_timer);
1765
1766static int __init parse_nolapic_timer(char *arg)
1767{
1768 disable_apic_timer = 1;
1769 return 0;
1770}
1771early_param("nolapic_timer", parse_nolapic_timer);
1772
1715static int __init apic_set_verbosity(char *arg) 1773static int __init apic_set_verbosity(char *arg)
1716{ 1774{
1717 if (!arg) 1775 if (!arg) {
1776#ifdef CONFIG_X86_64
1777 skip_ioapic_setup = 0;
1778 ioapic_force = 1;
1779 return 0;
1780#endif
1718 return -EINVAL; 1781 return -EINVAL;
1782 }
1719 1783
1720 if (strcmp(arg, "debug") == 0) 1784 if (strcmp("debug", arg) == 0)
1721 apic_verbosity = APIC_DEBUG; 1785 apic_verbosity = APIC_DEBUG;
1722 else if (strcmp(arg, "verbose") == 0) 1786 else if (strcmp("verbose", arg) == 0)
1723 apic_verbosity = APIC_VERBOSE; 1787 apic_verbosity = APIC_VERBOSE;
1788 else {
1789 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1790 " use apic=verbose or apic=debug\n", arg);
1791 return -EINVAL;
1792 }
1724 1793
1725 return 0; 1794 return 0;
1726} 1795}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831..53898b65a6a 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,13 +40,20 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
45 47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
46static int disable_apic_timer __cpuinitdata; 49static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 50static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
49 57
50/* Local APIC timer works in C2 */ 58/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 59int local_apic_timer_c2_ok;
@@ -73,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
73static void lapic_timer_broadcast(cpumask_t mask); 81static void lapic_timer_broadcast(cpumask_t mask);
74static void apic_pm_activate(void); 82static void apic_pm_activate(void);
75 83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
76static struct clock_event_device lapic_clockevent = { 87static struct clock_event_device lapic_clockevent = {
77 .name = "lapic", 88 .name = "lapic",
78 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT 89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
@@ -99,11 +110,15 @@ static inline int lapic_get_version(void)
99} 110}
100 111
101/* 112/*
102 * Check, if the APIC is integrated or a seperate chip 113 * Check, if the APIC is integrated or a separate chip
103 */ 114 */
104static inline int lapic_is_integrated(void) 115static inline int lapic_is_integrated(void)
105{ 116{
117#ifdef CONFIG_X86_64
106 return 1; 118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
107} 122}
108 123
109/* 124/*
@@ -118,13 +133,18 @@ static int modern_apic(void)
118 return lapic_get_version() >= 0x14; 133 return lapic_get_version() >= 0x14;
119} 134}
120 135
121void apic_wait_icr_idle(void) 136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
122{ 142{
123 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
124 cpu_relax(); 144 cpu_relax();
125} 145}
126 146
127u32 safe_apic_wait_icr_idle(void) 147u32 safe_xapic_wait_icr_idle(void)
128{ 148{
129 u32 send_status; 149 u32 send_status;
130 int timeout; 150 int timeout;
@@ -140,6 +160,68 @@ u32 safe_apic_wait_icr_idle(void)
140 return send_status; 160 return send_status;
141} 161}
142 162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
143/** 225/**
144 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
145 */ 227 */
@@ -149,6 +231,11 @@ void __cpuinit enable_NMI_through_LVT0(void)
149 231
150 /* unmask and set to NMI */ 232 /* unmask and set to NMI */
151 v = APIC_DM_NMI; 233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
152 apic_write(APIC_LVT0, v); 239 apic_write(APIC_LVT0, v);
153} 240}
154 241
@@ -157,14 +244,28 @@ void __cpuinit enable_NMI_through_LVT0(void)
157 */ 244 */
158int lapic_get_maxlvt(void) 245int lapic_get_maxlvt(void)
159{ 246{
160 unsigned int v, maxlvt; 247 unsigned int v;
161 248
162 v = apic_read(APIC_LVR); 249 v = apic_read(APIC_LVR);
163 maxlvt = GET_APIC_MAXLVT(v); 250 /*
164 return maxlvt; 251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
165} 255}
166 256
167/* 257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
168 * This function sets up the local APIC timer, with a timeout of 269 * This function sets up the local APIC timer, with a timeout of
169 * 'clocks' APIC bus clock. During calibration we actually call 270 * 'clocks' APIC bus clock. During calibration we actually call
170 * this function twice on the boot CPU, once with a bogus timeout 271 * this function twice on the boot CPU, once with a bogus timeout
@@ -174,7 +275,6 @@ int lapic_get_maxlvt(void)
174 * We do reads before writes even if unnecessary, to get around the 275 * We do reads before writes even if unnecessary, to get around the
175 * P5 APIC double write bug. 276 * P5 APIC double write bug.
176 */ 277 */
177
178static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
179{ 279{
180 unsigned int lvtt_value, tmp_value; 280 unsigned int lvtt_value, tmp_value;
@@ -182,6 +282,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
182 lvtt_value = LOCAL_TIMER_VECTOR; 282 lvtt_value = LOCAL_TIMER_VECTOR;
183 if (!oneshot) 283 if (!oneshot)
184 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
185 if (!irqen) 288 if (!irqen)
186 lvtt_value |= APIC_LVT_MASKED; 289 lvtt_value |= APIC_LVT_MASKED;
187 290
@@ -191,12 +294,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
191 * Divide PICLK by 16 294 * Divide PICLK by 16
192 */ 295 */
193 tmp_value = apic_read(APIC_TDCR); 296 tmp_value = apic_read(APIC_TDCR);
194 apic_write(APIC_TDCR, (tmp_value 297 apic_write(APIC_TDCR,
195 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
196 | APIC_TDR_DIV_16); 299 APIC_TDR_DIV_16);
197 300
198 if (!oneshot) 301 if (!oneshot)
199 apic_write(APIC_TMICT, clocks); 302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
200} 303}
201 304
202/* 305/*
@@ -366,7 +469,7 @@ static int __init calibrate_APIC_clock(void)
366 lapic_clockevent.min_delta_ns = 469 lapic_clockevent.min_delta_ns =
367 clockevent_delta2ns(0xF, &lapic_clockevent); 470 clockevent_delta2ns(0xF, &lapic_clockevent);
368 471
369 calibration_result = result / HZ; 472 calibration_result = (result * APIC_DIVISOR) / HZ;
370 473
371 /* 474 /*
372 * Do a sanity check on the APIC calibration result 475 * Do a sanity check on the APIC calibration result
@@ -388,10 +491,10 @@ static int __init calibrate_APIC_clock(void)
388void __init setup_boot_APIC_clock(void) 491void __init setup_boot_APIC_clock(void)
389{ 492{
390 /* 493 /*
391 * The local apic timer can be disabled via the kernel commandline. 494 * The local apic timer can be disabled via the kernel
392 * Register the lapic timer as a dummy clock event source on SMP 495 * commandline or from the CPU detection code. Register the lapic
393 * systems, so the broadcast mechanism is used. On UP systems simply 496 * timer as a dummy clock event source on SMP systems, so the
394 * ignore it. 497 * broadcast mechanism is used. On UP systems simply ignore it.
395 */ 498 */
396 if (disable_apic_timer) { 499 if (disable_apic_timer) {
397 printk(KERN_INFO "Disabling APIC timer\n"); 500 printk(KERN_INFO "Disabling APIC timer\n");
@@ -403,7 +506,9 @@ void __init setup_boot_APIC_clock(void)
403 return; 506 return;
404 } 507 }
405 508
406 printk(KERN_INFO "Using local APIC timer interrupts.\n"); 509 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
510 "calibrating APIC timer ...\n");
511
407 if (calibrate_APIC_clock()) { 512 if (calibrate_APIC_clock()) {
408 /* No broadcast on UP ! */ 513 /* No broadcast on UP ! */
409 if (num_possible_cpus() > 1) 514 if (num_possible_cpus() > 1)
@@ -422,6 +527,7 @@ void __init setup_boot_APIC_clock(void)
422 printk(KERN_WARNING "APIC timer registered as dummy," 527 printk(KERN_WARNING "APIC timer registered as dummy,"
423 " due to nmi_watchdog=%d!\n", nmi_watchdog); 528 " due to nmi_watchdog=%d!\n", nmi_watchdog);
424 529
530 /* Setup the lapic or request the broadcast */
425 setup_APIC_timer(); 531 setup_APIC_timer();
426} 532}
427 533
@@ -460,7 +566,11 @@ static void local_apic_timer_interrupt(void)
460 /* 566 /*
461 * the NMI deadlock-detector uses this. 567 * the NMI deadlock-detector uses this.
462 */ 568 */
569#ifdef CONFIG_X86_64
463 add_pda(apic_timer_irqs, 1); 570 add_pda(apic_timer_irqs, 1);
571#else
572 per_cpu(irq_stat, cpu).apic_timer_irqs++;
573#endif
464 574
465 evt->event_handler(evt); 575 evt->event_handler(evt);
466} 576}
@@ -491,6 +601,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
491 irq_enter(); 601 irq_enter();
492 local_apic_timer_interrupt(); 602 local_apic_timer_interrupt();
493 irq_exit(); 603 irq_exit();
604
494 set_irq_regs(old_regs); 605 set_irq_regs(old_regs);
495} 606}
496 607
@@ -544,6 +655,13 @@ void clear_local_APIC(void)
544 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 655 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
545 } 656 }
546 657
658 /* lets not touch this if we didn't frob it */
659#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
660 if (maxlvt >= 5) {
661 v = apic_read(APIC_LVTTHMR);
662 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
663 }
664#endif
547 /* 665 /*
548 * Clean APIC state for other OSs: 666 * Clean APIC state for other OSs:
549 */ 667 */
@@ -554,8 +672,14 @@ void clear_local_APIC(void)
554 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 672 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
555 if (maxlvt >= 4) 673 if (maxlvt >= 4)
556 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 674 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
557 apic_write(APIC_ESR, 0); 675
558 apic_read(APIC_ESR); 676 /* Integrated APIC (!82489DX) ? */
677 if (lapic_is_integrated()) {
678 if (maxlvt > 3)
679 /* Clear ESR due to Pentium errata 3AP and 11AP */
680 apic_write(APIC_ESR, 0);
681 apic_read(APIC_ESR);
682 }
559} 683}
560 684
561/** 685/**
@@ -574,8 +698,28 @@ void disable_local_APIC(void)
574 value = apic_read(APIC_SPIV); 698 value = apic_read(APIC_SPIV);
575 value &= ~APIC_SPIV_APIC_ENABLED; 699 value &= ~APIC_SPIV_APIC_ENABLED;
576 apic_write(APIC_SPIV, value); 700 apic_write(APIC_SPIV, value);
701
702#ifdef CONFIG_X86_32
703 /*
704 * When LAPIC was disabled by the BIOS and enabled by the kernel,
705 * restore the disabled state.
706 */
707 if (enabled_via_apicbase) {
708 unsigned int l, h;
709
710 rdmsr(MSR_IA32_APICBASE, l, h);
711 l &= ~MSR_IA32_APICBASE_ENABLE;
712 wrmsr(MSR_IA32_APICBASE, l, h);
713 }
714#endif
577} 715}
578 716
717/*
718 * If Linux enabled the LAPIC against the BIOS default disable it down before
719 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
720 * not power-off. Additionally clear all LVT entries before disable_local_APIC
721 * for the case where Linux didn't enable the LAPIC.
722 */
579void lapic_shutdown(void) 723void lapic_shutdown(void)
580{ 724{
581 unsigned long flags; 725 unsigned long flags;
@@ -585,7 +729,13 @@ void lapic_shutdown(void)
585 729
586 local_irq_save(flags); 730 local_irq_save(flags);
587 731
588 disable_local_APIC(); 732#ifdef CONFIG_X86_32
733 if (!enabled_via_apicbase)
734 clear_local_APIC();
735 else
736#endif
737 disable_local_APIC();
738
589 739
590 local_irq_restore(flags); 740 local_irq_restore(flags);
591} 741}
@@ -629,10 +779,10 @@ int __init verify_local_APIC(void)
629 /* 779 /*
630 * The ID register is read/write in a real APIC. 780 * The ID register is read/write in a real APIC.
631 */ 781 */
632 reg0 = read_apic_id(); 782 reg0 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 783 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
634 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 784 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
635 reg1 = read_apic_id(); 785 reg1 = apic_read(APIC_ID);
636 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 786 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
637 apic_write(APIC_ID, reg0); 787 apic_write(APIC_ID, reg0);
638 if (reg1 != (reg0 ^ APIC_ID_MASK)) 788 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -656,8 +806,11 @@ int __init verify_local_APIC(void)
656 */ 806 */
657void __init sync_Arb_IDs(void) 807void __init sync_Arb_IDs(void)
658{ 808{
659 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ 809 /*
660 if (modern_apic()) 810 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
811 * needed on AMD.
812 */
813 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
661 return; 814 return;
662 815
663 /* 816 /*
@@ -666,8 +819,8 @@ void __init sync_Arb_IDs(void)
666 apic_wait_icr_idle(); 819 apic_wait_icr_idle();
667 820
668 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 821 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
669 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG 822 apic_write(APIC_ICR, APIC_DEST_ALLINC |
670 | APIC_DM_INIT); 823 APIC_INT_LEVELTRIG | APIC_DM_INIT);
671} 824}
672 825
673/* 826/*
@@ -684,8 +837,6 @@ void __init init_bsp_APIC(void)
684 if (smp_found_config || !cpu_has_apic) 837 if (smp_found_config || !cpu_has_apic)
685 return; 838 return;
686 839
687 value = apic_read(APIC_LVR);
688
689 /* 840 /*
690 * Do not trust the local APIC being empty at bootup. 841 * Do not trust the local APIC being empty at bootup.
691 */ 842 */
@@ -697,7 +848,15 @@ void __init init_bsp_APIC(void)
697 value = apic_read(APIC_SPIV); 848 value = apic_read(APIC_SPIV);
698 value &= ~APIC_VECTOR_MASK; 849 value &= ~APIC_VECTOR_MASK;
699 value |= APIC_SPIV_APIC_ENABLED; 850 value |= APIC_SPIV_APIC_ENABLED;
700 value |= APIC_SPIV_FOCUS_DISABLED; 851
852#ifdef CONFIG_X86_32
853 /* This bit is reserved on P4/Xeon and should be cleared */
854 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
855 (boot_cpu_data.x86 == 15))
856 value &= ~APIC_SPIV_FOCUS_DISABLED;
857 else
858#endif
859 value |= APIC_SPIV_FOCUS_DISABLED;
701 value |= SPURIOUS_APIC_VECTOR; 860 value |= SPURIOUS_APIC_VECTOR;
702 apic_write(APIC_SPIV, value); 861 apic_write(APIC_SPIV, value);
703 862
@@ -706,9 +865,50 @@ void __init init_bsp_APIC(void)
706 */ 865 */
707 apic_write(APIC_LVT0, APIC_DM_EXTINT); 866 apic_write(APIC_LVT0, APIC_DM_EXTINT);
708 value = APIC_DM_NMI; 867 value = APIC_DM_NMI;
868 if (!lapic_is_integrated()) /* 82489DX */
869 value |= APIC_LVT_LEVEL_TRIGGER;
709 apic_write(APIC_LVT1, value); 870 apic_write(APIC_LVT1, value);
710} 871}
711 872
873static void __cpuinit lapic_setup_esr(void)
874{
875 unsigned long oldvalue, value, maxlvt;
876 if (lapic_is_integrated() && !esr_disable) {
877 if (esr_disable) {
878 /*
879 * Something untraceable is creating bad interrupts on
880 * secondary quads ... for the moment, just leave the
881 * ESR disabled - we can't do anything useful with the
882 * errors anyway - mbligh
883 */
884 printk(KERN_INFO "Leaving ESR disabled.\n");
885 return;
886 }
887 /* !82489DX */
888 maxlvt = lapic_get_maxlvt();
889 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
890 apic_write(APIC_ESR, 0);
891 oldvalue = apic_read(APIC_ESR);
892
893 /* enables sending errors */
894 value = ERROR_APIC_VECTOR;
895 apic_write(APIC_LVTERR, value);
896 /*
897 * spec says clear errors after enabling vector.
898 */
899 if (maxlvt > 3)
900 apic_write(APIC_ESR, 0);
901 value = apic_read(APIC_ESR);
902 if (value != oldvalue)
903 apic_printk(APIC_VERBOSE, "ESR value before enabling "
904 "vector: 0x%08lx after: 0x%08lx\n",
905 oldvalue, value);
906 } else {
907 printk(KERN_INFO "No ESR for 82489DX.\n");
908 }
909}
910
911
712/** 912/**
713 * setup_local_APIC - setup the local APIC 913 * setup_local_APIC - setup the local APIC
714 */ 914 */
@@ -814,25 +1014,143 @@ void __cpuinit setup_local_APIC(void)
814 preempt_enable(); 1014 preempt_enable();
815} 1015}
816 1016
817static void __cpuinit lapic_setup_esr(void)
818{
819 unsigned maxlvt = lapic_get_maxlvt();
820
821 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
822 /*
823 * spec says clear errors after enabling vector.
824 */
825 if (maxlvt > 3)
826 apic_write(APIC_ESR, 0);
827}
828
829void __cpuinit end_local_APIC_setup(void) 1017void __cpuinit end_local_APIC_setup(void)
830{ 1018{
831 lapic_setup_esr(); 1019 lapic_setup_esr();
1020
1021#ifdef CONFIG_X86_32
1022 {
1023 unsigned int value;
1024 /* Disable the local apic timer */
1025 value = apic_read(APIC_LVTT);
1026 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1027 apic_write(APIC_LVTT, value);
1028 }
1029#endif
1030
832 setup_apic_nmi_watchdog(NULL); 1031 setup_apic_nmi_watchdog(NULL);
833 apic_pm_activate(); 1032 apic_pm_activate();
834} 1033}
835 1034
1035void check_x2apic(void)
1036{
1037 int msr, msr2;
1038
1039 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1040
1041 if (msr & X2APIC_ENABLE) {
1042 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1043 x2apic_preenabled = x2apic = 1;
1044 apic_ops = &x2apic_ops;
1045 }
1046}
1047
1048void enable_x2apic(void)
1049{
1050 int msr, msr2;
1051
1052 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1053 if (!(msr & X2APIC_ENABLE)) {
1054 printk("Enabling x2apic\n");
1055 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1056 }
1057}
1058
1059void enable_IR_x2apic(void)
1060{
1061#ifdef CONFIG_INTR_REMAP
1062 int ret;
1063 unsigned long flags;
1064
1065 if (!cpu_has_x2apic)
1066 return;
1067
1068 if (!x2apic_preenabled && disable_x2apic) {
1069 printk(KERN_INFO
1070 "Skipped enabling x2apic and Interrupt-remapping "
1071 "because of nox2apic\n");
1072 return;
1073 }
1074
1075 if (x2apic_preenabled && disable_x2apic)
1076 panic("Bios already enabled x2apic, can't enforce nox2apic");
1077
1078 if (!x2apic_preenabled && skip_ioapic_setup) {
1079 printk(KERN_INFO
1080 "Skipped enabling x2apic and Interrupt-remapping "
1081 "because of skipping io-apic setup\n");
1082 return;
1083 }
1084
1085 ret = dmar_table_init();
1086 if (ret) {
1087 printk(KERN_INFO
1088 "dmar_table_init() failed with %d:\n", ret);
1089
1090 if (x2apic_preenabled)
1091 panic("x2apic enabled by bios. But IR enabling failed");
1092 else
1093 printk(KERN_INFO
1094 "Not enabling x2apic,Intr-remapping\n");
1095 return;
1096 }
1097
1098 local_irq_save(flags);
1099 mask_8259A();
1100 save_mask_IO_APIC_setup();
1101
1102 ret = enable_intr_remapping(1);
1103
1104 if (ret && x2apic_preenabled) {
1105 local_irq_restore(flags);
1106 panic("x2apic enabled by bios. But IR enabling failed");
1107 }
1108
1109 if (ret)
1110 goto end;
1111
1112 if (!x2apic) {
1113 x2apic = 1;
1114 apic_ops = &x2apic_ops;
1115 enable_x2apic();
1116 }
1117end:
1118 if (ret)
1119 /*
1120 * IR enabling failed
1121 */
1122 restore_IO_APIC_setup();
1123 else
1124 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1125
1126 unmask_8259A();
1127 local_irq_restore(flags);
1128
1129 if (!ret) {
1130 if (!x2apic_preenabled)
1131 printk(KERN_INFO
1132 "Enabled x2apic and interrupt-remapping\n");
1133 else
1134 printk(KERN_INFO
1135 "Enabled Interrupt-remapping\n");
1136 } else
1137 printk(KERN_ERR
1138 "Failed to enable Interrupt-remapping and x2apic\n");
1139#else
1140 if (!cpu_has_x2apic)
1141 return;
1142
1143 if (x2apic_preenabled)
1144 panic("x2apic enabled prior OS handover,"
1145 " enable CONFIG_INTR_REMAP");
1146
1147 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1148 " and x2apic\n");
1149#endif
1150
1151 return;
1152}
1153
836/* 1154/*
837 * Detect and enable local APICs on non-SMP boards. 1155 * Detect and enable local APICs on non-SMP boards.
838 * Original code written by Keir Fraser. 1156 * Original code written by Keir Fraser.
@@ -872,7 +1190,7 @@ void __init early_init_lapic_mapping(void)
872 * Fetch the APIC ID of the BSP in case we have a 1190 * Fetch the APIC ID of the BSP in case we have a
873 * default configuration (or the MP table is broken). 1191 * default configuration (or the MP table is broken).
874 */ 1192 */
875 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1193 boot_cpu_physical_apicid = read_apic_id();
876} 1194}
877 1195
878/** 1196/**
@@ -880,6 +1198,11 @@ void __init early_init_lapic_mapping(void)
880 */ 1198 */
881void __init init_apic_mappings(void) 1199void __init init_apic_mappings(void)
882{ 1200{
1201 if (x2apic) {
1202 boot_cpu_physical_apicid = read_apic_id();
1203 return;
1204 }
1205
883 /* 1206 /*
884 * If no local APIC can be found then set up a fake all 1207 * If no local APIC can be found then set up a fake all
885 * zeroes page to simulate the local APIC and another 1208 * zeroes page to simulate the local APIC and another
@@ -899,13 +1222,15 @@ void __init init_apic_mappings(void)
899 * Fetch the APIC ID of the BSP in case we have a 1222 * Fetch the APIC ID of the BSP in case we have a
900 * default configuration (or the MP table is broken). 1223 * default configuration (or the MP table is broken).
901 */ 1224 */
902 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1225 boot_cpu_physical_apicid = read_apic_id();
903} 1226}
904 1227
905/* 1228/*
906 * This initializes the IO-APIC and APIC hardware if this is 1229 * This initializes the IO-APIC and APIC hardware if this is
907 * a UP kernel. 1230 * a UP kernel.
908 */ 1231 */
1232int apic_version[MAX_APICS];
1233
909int __init APIC_init_uniprocessor(void) 1234int __init APIC_init_uniprocessor(void)
910{ 1235{
911 if (disable_apic) { 1236 if (disable_apic) {
@@ -918,6 +1243,9 @@ int __init APIC_init_uniprocessor(void)
918 return -1; 1243 return -1;
919 } 1244 }
920 1245
1246 enable_IR_x2apic();
1247 setup_apic_routing();
1248
921 verify_local_APIC(); 1249 verify_local_APIC();
922 1250
923 connect_bsp_APIC(); 1251 connect_bsp_APIC();
@@ -1004,17 +1332,57 @@ asmlinkage void smp_error_interrupt(void)
1004} 1332}
1005 1333
1006/** 1334/**
1007 * * connect_bsp_APIC - attach the APIC to the interrupt system 1335 * connect_bsp_APIC - attach the APIC to the interrupt system
1008 * */ 1336 */
1009void __init connect_bsp_APIC(void) 1337void __init connect_bsp_APIC(void)
1010{ 1338{
1339#ifdef CONFIG_X86_32
1340 if (pic_mode) {
1341 /*
1342 * Do not trust the local APIC being empty at bootup.
1343 */
1344 clear_local_APIC();
1345 /*
1346 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1347 * local APIC to INT and NMI lines.
1348 */
1349 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1350 "enabling APIC mode.\n");
1351 outb(0x70, 0x22);
1352 outb(0x01, 0x23);
1353 }
1354#endif
1011 enable_apic_mode(); 1355 enable_apic_mode();
1012} 1356}
1013 1357
1358/**
1359 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1360 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1361 *
1362 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1363 * APIC is disabled.
1364 */
1014void disconnect_bsp_APIC(int virt_wire_setup) 1365void disconnect_bsp_APIC(int virt_wire_setup)
1015{ 1366{
1367 unsigned int value;
1368
1369#ifdef CONFIG_X86_32
1370 if (pic_mode) {
1371 /*
1372 * Put the board back into PIC mode (has an effect only on
1373 * certain older boards). Note that APIC interrupts, including
1374 * IPIs, won't work beyond this point! The only exception are
1375 * INIT IPIs.
1376 */
1377 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1378 "entering PIC mode.\n");
1379 outb(0x70, 0x22);
1380 outb(0x00, 0x23);
1381 return;
1382 }
1383#endif
1384
1016 /* Go back to Virtual Wire compatibility mode */ 1385 /* Go back to Virtual Wire compatibility mode */
1017 unsigned long value;
1018 1386
1019 /* For the spurious interrupt use vector F, and enable it */ 1387 /* For the spurious interrupt use vector F, and enable it */
1020 value = apic_read(APIC_SPIV); 1388 value = apic_read(APIC_SPIV);
@@ -1040,7 +1408,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1040 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1408 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1041 } 1409 }
1042 1410
1043 /* For LVT1 make it edge triggered, active high, nmi and enabled */ 1411 /*
1412 * For LVT1 make it edge triggered, active high,
1413 * nmi and enabled
1414 */
1044 value = apic_read(APIC_LVT1); 1415 value = apic_read(APIC_LVT1);
1045 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 1416 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1046 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1417 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
@@ -1055,9 +1426,20 @@ void __cpuinit generic_processor_info(int apicid, int version)
1055 int cpu; 1426 int cpu;
1056 cpumask_t tmp_map; 1427 cpumask_t tmp_map;
1057 1428
1429 /*
1430 * Validate version
1431 */
1432 if (version == 0x0) {
1433 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1434 "fixing up to 0x10. (tell your hw vendor)\n",
1435 version);
1436 version = 0x10;
1437 }
1438 apic_version[apicid] = version;
1439
1058 if (num_processors >= NR_CPUS) { 1440 if (num_processors >= NR_CPUS) {
1059 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1441 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1060 " Processor ignored.\n", NR_CPUS); 1442 " Processor ignored.\n", NR_CPUS);
1061 return; 1443 return;
1062 } 1444 }
1063 1445
@@ -1077,6 +1459,29 @@ void __cpuinit generic_processor_info(int apicid, int version)
1077 if (apicid > max_physical_apicid) 1459 if (apicid > max_physical_apicid)
1078 max_physical_apicid = apicid; 1460 max_physical_apicid = apicid;
1079 1461
1462#ifdef CONFIG_X86_32
1463 /*
1464 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1465 * but we need to work other dependencies like SMP_SUSPEND etc
1466 * before this can be done without some confusion.
1467 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1468 * - Ashok Raj <ashok.raj@intel.com>
1469 */
1470 if (max_physical_apicid >= 8) {
1471 switch (boot_cpu_data.x86_vendor) {
1472 case X86_VENDOR_INTEL:
1473 if (!APIC_XAPIC(version)) {
1474 def_to_bigsmp = 0;
1475 break;
1476 }
1477 /* If P4 and above fall through */
1478 case X86_VENDOR_AMD:
1479 def_to_bigsmp = 1;
1480 }
1481 }
1482#endif
1483
1484#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1080 /* are we being called early in kernel startup? */ 1485 /* are we being called early in kernel startup? */
1081 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1486 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1082 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1487 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1088,20 +1493,28 @@ void __cpuinit generic_processor_info(int apicid, int version)
1088 per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1493 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1089 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1494 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1090 } 1495 }
1496#endif
1091 1497
1092 cpu_set(cpu, cpu_possible_map); 1498 cpu_set(cpu, cpu_possible_map);
1093 cpu_set(cpu, cpu_present_map); 1499 cpu_set(cpu, cpu_present_map);
1094} 1500}
1095 1501
1502int hard_smp_processor_id(void)
1503{
1504 return read_apic_id();
1505}
1506
1096/* 1507/*
1097 * Power management 1508 * Power management
1098 */ 1509 */
1099#ifdef CONFIG_PM 1510#ifdef CONFIG_PM
1100 1511
1101static struct { 1512static struct {
1102 /* 'active' is true if the local APIC was enabled by us and 1513 /*
1103 not the BIOS; this signifies that we are also responsible 1514 * 'active' is true if the local APIC was enabled by us and
1104 for disabling it before entering apm/acpi suspend */ 1515 * not the BIOS; this signifies that we are also responsible
1516 * for disabling it before entering apm/acpi suspend
1517 */
1105 int active; 1518 int active;
1106 /* r/w apic fields */ 1519 /* r/w apic fields */
1107 unsigned int apic_id; 1520 unsigned int apic_id;
@@ -1129,7 +1542,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1129 1542
1130 maxlvt = lapic_get_maxlvt(); 1543 maxlvt = lapic_get_maxlvt();
1131 1544
1132 apic_pm_state.apic_id = read_apic_id(); 1545 apic_pm_state.apic_id = apic_read(APIC_ID);
1133 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1546 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1134 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1547 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1135 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1548 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1142,10 +1555,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1142 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1555 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1143 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1556 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1144 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1557 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1145#ifdef CONFIG_X86_MCE_INTEL 1558#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1146 if (maxlvt >= 5) 1559 if (maxlvt >= 5)
1147 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1560 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1148#endif 1561#endif
1562
1149 local_irq_save(flags); 1563 local_irq_save(flags);
1150 disable_local_APIC(); 1564 disable_local_APIC();
1151 local_irq_restore(flags); 1565 local_irq_restore(flags);
@@ -1164,10 +1578,25 @@ static int lapic_resume(struct sys_device *dev)
1164 maxlvt = lapic_get_maxlvt(); 1578 maxlvt = lapic_get_maxlvt();
1165 1579
1166 local_irq_save(flags); 1580 local_irq_save(flags);
1167 rdmsr(MSR_IA32_APICBASE, l, h); 1581
1168 l &= ~MSR_IA32_APICBASE_BASE; 1582#ifdef CONFIG_X86_64
1169 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1583 if (x2apic)
1170 wrmsr(MSR_IA32_APICBASE, l, h); 1584 enable_x2apic();
1585 else
1586#endif
1587 {
1588 /*
1589 * Make sure the APICBASE points to the right address
1590 *
1591 * FIXME! This will be wrong if we ever support suspend on
1592 * SMP! We'll need to do this as part of the CPU restore!
1593 */
1594 rdmsr(MSR_IA32_APICBASE, l, h);
1595 l &= ~MSR_IA32_APICBASE_BASE;
1596 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1597 wrmsr(MSR_IA32_APICBASE, l, h);
1598 }
1599
1171 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1600 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1172 apic_write(APIC_ID, apic_pm_state.apic_id); 1601 apic_write(APIC_ID, apic_pm_state.apic_id);
1173 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1602 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1176,7 +1605,7 @@ static int lapic_resume(struct sys_device *dev)
1176 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1605 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1177 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1606 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1178 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1607 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1179#ifdef CONFIG_X86_MCE_INTEL 1608#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1180 if (maxlvt >= 5) 1609 if (maxlvt >= 5)
1181 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1610 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1182#endif 1611#endif
@@ -1190,10 +1619,17 @@ static int lapic_resume(struct sys_device *dev)
1190 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1619 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1191 apic_write(APIC_ESR, 0); 1620 apic_write(APIC_ESR, 0);
1192 apic_read(APIC_ESR); 1621 apic_read(APIC_ESR);
1622
1193 local_irq_restore(flags); 1623 local_irq_restore(flags);
1624
1194 return 0; 1625 return 0;
1195} 1626}
1196 1627
1628/*
1629 * This device has no shutdown method - fully functioning local APICs
1630 * are needed on every CPU up until machine_halt/restart/poweroff.
1631 */
1632
1197static struct sysdev_class lapic_sysclass = { 1633static struct sysdev_class lapic_sysclass = {
1198 .name = "lapic", 1634 .name = "lapic",
1199 .resume = lapic_resume, 1635 .resume = lapic_resume,
@@ -1307,31 +1743,19 @@ __cpuinit int apic_is_clustered_box(void)
1307 return (clusters > 2); 1743 return (clusters > 2);
1308} 1744}
1309 1745
1310/* 1746static __init int setup_nox2apic(char *str)
1311 * APIC command line parameters
1312 */
1313static int __init apic_set_verbosity(char *str)
1314{ 1747{
1315 if (str == NULL) { 1748 disable_x2apic = 1;
1316 skip_ioapic_setup = 0; 1749 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1317 ioapic_force = 1;
1318 return 0;
1319 }
1320 if (strcmp("debug", str) == 0)
1321 apic_verbosity = APIC_DEBUG;
1322 else if (strcmp("verbose", str) == 0)
1323 apic_verbosity = APIC_VERBOSE;
1324 else {
1325 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1326 " use apic=verbose or apic=debug\n", str);
1327 return -EINVAL;
1328 }
1329
1330 return 0; 1750 return 0;
1331} 1751}
1332early_param("apic", apic_set_verbosity); 1752early_param("nox2apic", setup_nox2apic);
1753
1333 1754
1334static __init int setup_disableapic(char *str) 1755/*
1756 * APIC command line parameters
1757 */
1758static int __init setup_disableapic(char *arg)
1335{ 1759{
1336 disable_apic = 1; 1760 disable_apic = 1;
1337 setup_clear_cpu_cap(X86_FEATURE_APIC); 1761 setup_clear_cpu_cap(X86_FEATURE_APIC);
@@ -1340,9 +1764,9 @@ static __init int setup_disableapic(char *str)
1340early_param("disableapic", setup_disableapic); 1764early_param("disableapic", setup_disableapic);
1341 1765
1342/* same as disableapic, for compatibility */ 1766/* same as disableapic, for compatibility */
1343static __init int setup_nolapic(char *str) 1767static int __init setup_nolapic(char *arg)
1344{ 1768{
1345 return setup_disableapic(str); 1769 return setup_disableapic(arg);
1346} 1770}
1347early_param("nolapic", setup_nolapic); 1771early_param("nolapic", setup_nolapic);
1348 1772
@@ -1353,14 +1777,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1353} 1777}
1354early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1778early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1355 1779
1356static __init int setup_noapictimer(char *str) 1780static int __init parse_disable_apic_timer(char *arg)
1357{ 1781{
1358 if (str[0] != ' ' && str[0] != 0)
1359 return 0;
1360 disable_apic_timer = 1; 1782 disable_apic_timer = 1;
1361 return 1; 1783 return 0;
1362} 1784}
1363__setup("noapictimer", setup_noapictimer); 1785early_param("noapictimer", parse_disable_apic_timer);
1786
1787static int __init parse_nolapic_timer(char *arg)
1788{
1789 disable_apic_timer = 1;
1790 return 0;
1791}
1792early_param("nolapic_timer", parse_nolapic_timer);
1364 1793
1365static __init int setup_apicpmtimer(char *s) 1794static __init int setup_apicpmtimer(char *s)
1366{ 1795{
@@ -1370,6 +1799,31 @@ static __init int setup_apicpmtimer(char *s)
1370} 1799}
1371__setup("apicpmtimer", setup_apicpmtimer); 1800__setup("apicpmtimer", setup_apicpmtimer);
1372 1801
1802static int __init apic_set_verbosity(char *arg)
1803{
1804 if (!arg) {
1805#ifdef CONFIG_X86_64
1806 skip_ioapic_setup = 0;
1807 ioapic_force = 1;
1808 return 0;
1809#endif
1810 return -EINVAL;
1811 }
1812
1813 if (strcmp("debug", arg) == 0)
1814 apic_verbosity = APIC_DEBUG;
1815 else if (strcmp("verbose", arg) == 0)
1816 apic_verbosity = APIC_VERBOSE;
1817 else {
1818 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1819 " use apic=verbose or apic=debug\n", arg);
1820 return -EINVAL;
1821 }
1822
1823 return 0;
1824}
1825early_param("apic", apic_set_verbosity);
1826
1373static int __init lapic_insert_resource(void) 1827static int __init lapic_insert_resource(void)
1374{ 1828{
1375 if (!apic_phys) 1829 if (!apic_phys)
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 305b465889b..43f1aa51da5 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -687,6 +687,8 @@ void __cpuinit cpu_init(void)
687 barrier(); 687 barrier();
688 688
689 check_efer(); 689 check_efer();
690 if (cpu != 0 && x2apic)
691 enable_x2apic();
690 692
691 /* 693 /*
692 * set up and load the per-CPU TSS 694 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index c9017799497..b96b69545fb 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -46,7 +46,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
46 /* Intel-defined (#2) */ 46 /* Intel-defined (#2) */
47 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 47 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
48 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 48 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
49 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", 49 NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
51 51
52 /* VIA/Cyrix/Centaur-defined */ 52 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb144..6c9bfc9e1e9 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d3..9eca5ba7a6b 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 00000000000..fed9f68efd6
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int x2apic_read_id(void)
124{
125 return apic_read(APIC_ID);
126}
127
128static unsigned int phys_pkg_id(int index_msb)
129{
130 return x2apic_read_id() >> index_msb;
131}
132
133static void x2apic_send_IPI_self(int vector)
134{
135 apic_write(APIC_SELF_IPI, vector);
136}
137
138static void init_x2apic_ldr(void)
139{
140 int cpu = smp_processor_id();
141
142 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
143 return;
144}
145
146struct genapic apic_x2apic_cluster = {
147 .name = "cluster x2apic",
148 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
149 .int_delivery_mode = dest_LowestPrio,
150 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
151 .target_cpus = x2apic_target_cpus,
152 .vector_allocation_domain = x2apic_vector_allocation_domain,
153 .apic_id_registered = x2apic_apic_id_registered,
154 .init_apic_ldr = init_x2apic_ldr,
155 .send_IPI_all = x2apic_send_IPI_all,
156 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
157 .send_IPI_mask = x2apic_send_IPI_mask,
158 .send_IPI_self = x2apic_send_IPI_self,
159 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
160 .phys_pkg_id = phys_pkg_id,
161 .get_apic_id = get_apic_id,
162 .set_apic_id = set_apic_id,
163 .apic_id_mask = (0xFFFFFFFFu),
164};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 00000000000..958d537b4cc
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int x2apic_read_id(void)
122{
123 return apic_read(APIC_ID);
124}
125
126static unsigned int phys_pkg_id(int index_msb)
127{
128 return x2apic_read_id() >> index_msb;
129}
130
131void x2apic_send_IPI_self(int vector)
132{
133 apic_write(APIC_SELF_IPI, vector);
134}
135
136void init_x2apic_ldr(void)
137{
138 return;
139}
140
141struct genapic apic_x2apic_phys = {
142 .name = "physical x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_Fixed,
145 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16b..ae2ffc8a400 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,36 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57EXPORT_SYMBOL_GPL(is_uv_system);
58
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 59DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 60EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 61
@@ -123,6 +153,10 @@ static int uv_apic_id_registered(void)
123 return 1; 153 return 1;
124} 154}
125 155
156static void uv_init_apic_ldr(void)
157{
158}
159
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 160static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 161{
128 int cpu; 162 int cpu;
@@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 172 return BAD_APICID;
139} 173}
140 174
175static unsigned int get_apic_id(unsigned long x)
176{
177 unsigned int id;
178
179 WARN_ON(preemptible() && num_online_cpus() > 1);
180 id = x | __get_cpu_var(x2apic_extra_bits);
181
182 return id;
183}
184
185static unsigned long set_apic_id(unsigned int id)
186{
187 unsigned long x;
188
189 /* maskout x2apic_extra_bits ? */
190 x = id;
191 return x;
192}
193
194static unsigned int uv_read_apic_id(void)
195{
196
197 return get_apic_id(apic_read(APIC_ID));
198}
199
141static unsigned int phys_pkg_id(int index_msb) 200static unsigned int phys_pkg_id(int index_msb)
142{ 201{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 202 return uv_read_apic_id() >> index_msb;
144} 203}
145 204
146#ifdef ZZZ /* Needs x2apic patch */ 205#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector)
152 211
153struct genapic apic_x2apic_uv_x = { 212struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 213 .name = "UV large system",
214 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 215 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 217 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 219 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 221 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 222 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 223 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
227 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu),
166}; 230};
167 231
168static __cpuinit void set_x2apic_extra_bits(int pnode) 232static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +465,5 @@ void __cpuinit uv_cpu_init(void)
401 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 465 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
402 set_x2apic_extra_bits(uv_hub_info->pnode); 466 set_x2apic_extra_bits(uv_hub_info->pnode);
403} 467}
468
469
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d920..4b8a53d841f 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec..e710289f673 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,10 +46,13 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
52 53
54#define __apicdebuginit(type) static type __init
55
53int (*ioapic_renumber_irq)(int ioapic, int irq); 56int (*ioapic_renumber_irq)(int ioapic, int irq);
54atomic_t irq_mis_count; 57atomic_t irq_mis_count;
55 58
@@ -1341,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1341 ioapic_write_entry(apic, pin, entry); 1344 ioapic_write_entry(apic, pin, entry);
1342} 1345}
1343 1346
1344void __init print_IO_APIC(void) 1347
1348__apicdebuginit(void) print_IO_APIC(void)
1345{ 1349{
1346 int apic, i; 1350 int apic, i;
1347 union IO_APIC_reg_00 reg_00; 1351 union IO_APIC_reg_00 reg_00;
@@ -1456,9 +1460,7 @@ void __init print_IO_APIC(void)
1456 return; 1460 return;
1457} 1461}
1458 1462
1459#if 0 1463__apicdebuginit(void) print_APIC_bitfield(int base)
1460
1461static void print_APIC_bitfield(int base)
1462{ 1464{
1463 unsigned int v; 1465 unsigned int v;
1464 int i, j; 1466 int i, j;
@@ -1479,9 +1481,10 @@ static void print_APIC_bitfield(int base)
1479 } 1481 }
1480} 1482}
1481 1483
1482void /*__init*/ print_local_APIC(void *dummy) 1484__apicdebuginit(void) print_local_APIC(void *dummy)
1483{ 1485{
1484 unsigned int v, ver, maxlvt; 1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1485 1488
1486 if (apic_verbosity == APIC_QUIET) 1489 if (apic_verbosity == APIC_QUIET)
1487 return; 1490 return;
@@ -1490,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1493 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1494 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1496 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1497 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1499 ver = GET_APIC_VERSION(v);
@@ -1532,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy)
1532 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1533 } 1536 }
1534 1537
1535 v = apic_read(APIC_ICR); 1538 icr = apic_icr_read();
1536 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1537 v = apic_read(APIC_ICR2); 1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1538 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1539 1541
1540 v = apic_read(APIC_LVTT); 1542 v = apic_read(APIC_LVTT);
1541 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1563,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy)
1563 printk("\n"); 1565 printk("\n");
1564} 1566}
1565 1567
1566void print_all_local_APICs(void) 1568__apicdebuginit(void) print_all_local_APICs(void)
1567{ 1569{
1568 on_each_cpu(print_local_APIC, NULL, 1); 1570 on_each_cpu(print_local_APIC, NULL, 1);
1569} 1571}
1570 1572
1571void /*__init*/ print_PIC(void) 1573__apicdebuginit(void) print_PIC(void)
1572{ 1574{
1573 unsigned int v; 1575 unsigned int v;
1574 unsigned long flags; 1576 unsigned long flags;
@@ -1600,7 +1602,17 @@ void /*__init*/ print_PIC(void)
1600 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1601} 1603}
1602 1604
1603#endif /* 0 */ 1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1604 1616
1605static void __init enable_IO_APIC(void) 1617static void __init enable_IO_APIC(void)
1606{ 1618{
@@ -1698,8 +1710,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1710 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1712 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1713 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1714
1704 /* 1715 /*
1705 * Add it to the IO-APIC irq-routing table: 1716 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1736 unsigned char old_id;
1726 unsigned long flags; 1737 unsigned long flags;
1727 1738
1728#ifdef CONFIG_X86_NUMAQ 1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1740 return;
1731#endif
1732 1741
1733 /* 1742 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1743 * Don't check I/O APIC IDs for xAPIC systems. They have
@@ -2329,8 +2338,6 @@ void __init setup_IO_APIC(void)
2329 setup_IO_APIC_irqs(); 2338 setup_IO_APIC_irqs();
2330 init_IO_APIC_traps(); 2339 init_IO_APIC_traps();
2331 check_timer(); 2340 check_timer();
2332 if (!acpi_ioapic)
2333 print_IO_APIC();
2334} 2341}
2335 2342
2336/* 2343/*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18..a1bec2969c6 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,10 +50,13 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
55 57
58#define __apicdebuginit(type) static type __init
59
56struct irq_cfg { 60struct irq_cfg {
57 cpumask_t domain; 61 cpumask_t domain;
58 cpumask_t old_domain; 62 cpumask_t old_domain;
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe;
87 91
88char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; 92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
89 93
90#define __apicdebuginit __init
91
92int sis_apic_bug; /* not actually supported, dummy for compile */ 94int sis_apic_bug; /* not actually supported, dummy for compile */
93 95
94static int no_timer_check; 96static int no_timer_check;
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882
883 entry.delivery_mode = INT_DELIVERY_MODE;
884 entry.dest_mode = INT_DEST_MODE;
885 entry.dest = cpu_mask_to_apicid(mask);
886 entry.mask = 0; /* enable IRQ */
887 entry.trigger = trigger;
888 entry.polarity = polarity;
889 entry.vector = cfg->vector;
890 1031
891 /* Mask level triggered irqs. 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1033 cpu_mask_to_apicid(mask), trigger, polarity,
893 */ 1034 cfg->vector)) {
894 if (trigger) 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
895 entry.mask = 1; 1036 mp_ioapics[apic].mp_apicid, pin);
1037 __clear_irq_vector(irq);
1038 return;
1039 }
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
970 ioapic_write_entry(apic, pin, entry); 1117 ioapic_write_entry(apic, pin, entry);
971} 1118}
972 1119
973void __apicdebuginit print_IO_APIC(void) 1120
1121__apicdebuginit(void) print_IO_APIC(void)
974{ 1122{
975 int apic, i; 1123 int apic, i;
976 union IO_APIC_reg_00 reg_00; 1124 union IO_APIC_reg_00 reg_00;
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void)
1064 return; 1212 return;
1065} 1213}
1066 1214
1067#if 0 1215__apicdebuginit(void) print_APIC_bitfield(int base)
1068
1069static __apicdebuginit void print_APIC_bitfield (int base)
1070{ 1216{
1071 unsigned int v; 1217 unsigned int v;
1072 int i, j; 1218 int i, j;
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1087 } 1233 }
1088} 1234}
1089 1235
1090void __apicdebuginit print_local_APIC(void * dummy) 1236__apicdebuginit(void) print_local_APIC(void *dummy)
1091{ 1237{
1092 unsigned int v, ver, maxlvt; 1238 unsigned int v, ver, maxlvt;
1239 unsigned long icr;
1093 1240
1094 if (apic_verbosity == APIC_QUIET) 1241 if (apic_verbosity == APIC_QUIET)
1095 return; 1242 return;
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1244 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1245 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1246 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1247 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1248 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1249 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1250 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1280 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_ICR); 1283 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1285 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1286
1141 v = apic_read(APIC_LVTT); 1287 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1288 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
1164 printk("\n"); 1310 printk("\n");
1165} 1311}
1166 1312
1167void print_all_local_APICs (void) 1313__apicdebuginit(void) print_all_local_APICs(void)
1168{ 1314{
1169 on_each_cpu(print_local_APIC, NULL, 1); 1315 on_each_cpu(print_local_APIC, NULL, 1);
1170} 1316}
1171 1317
1172void __apicdebuginit print_PIC(void) 1318__apicdebuginit(void) print_PIC(void)
1173{ 1319{
1174 unsigned int v; 1320 unsigned int v;
1175 unsigned long flags; 1321 unsigned long flags;
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void)
1201 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1347 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1202} 1348}
1203 1349
1204#endif /* 0 */ 1350__apicdebuginit(int) print_all_ICs(void)
1351{
1352 print_PIC();
1353 print_all_local_APICs();
1354 print_IO_APIC();
1355
1356 return 0;
1357}
1358
1359fs_initcall(print_all_ICs);
1360
1205 1361
1206void __init enable_IO_APIC(void) 1362void __init enable_IO_APIC(void)
1207{ 1363{
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1447 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1448 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1449 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1450 entry.dest = read_apic_id();
1295 1451
1296 /* 1452 /*
1297 * Add it to the IO-APIC irq-routing table: 1453 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1553 */
1398 1554
1399#ifdef CONFIG_SMP 1555#ifdef CONFIG_SMP
1556
1557#ifdef CONFIG_INTR_REMAP
1558static void ir_irq_migration(struct work_struct *work);
1559
1560static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1561
1562/*
1563 * Migrate the IO-APIC irq in the presence of intr-remapping.
1564 *
1565 * For edge triggered, irq migration is a simple atomic update(of vector
1566 * and cpu destination) of IRTE and flush the hardware cache.
1567 *
1568 * For level triggered, we need to modify the io-apic RTE aswell with the update
1569 * vector information, along with modifying IRTE with vector and destination.
1570 * So irq migration for level triggered is little bit more complex compared to
1571 * edge triggered migration. But the good news is, we use the same algorithm
1572 * for level triggered migration as we have today, only difference being,
1573 * we now initiate the irq migration from process context instead of the
1574 * interrupt context.
1575 *
1576 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1577 * suppression) to the IO-APIC, level triggered irq migration will also be
1578 * as simple as edge triggered migration and we can do the irq migration
1579 * with a simple atomic update to IO-APIC RTE.
1580 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{
1583 struct irq_cfg *cfg = irq_cfg + irq;
1584 struct irq_desc *desc = irq_desc + irq;
1585 cpumask_t tmp, cleanup_mask;
1586 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1588 unsigned int dest;
1589 unsigned long flags;
1590
1591 cpus_and(tmp, mask, cpu_online_map);
1592 if (cpus_empty(tmp))
1593 return;
1594
1595 if (get_irte(irq, &irte))
1596 return;
1597
1598 if (assign_irq_vector(irq, mask))
1599 return;
1600
1601 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp);
1603
1604 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector);
1607 spin_unlock_irqrestore(&ioapic_lock, flags);
1608 }
1609
1610 irte.vector = cfg->vector;
1611 irte.dest_id = IRTE_DEST(dest);
1612
1613 /*
1614 * Modified the IRTE and flushes the Interrupt entry cache.
1615 */
1616 modify_irte(irq, &irte);
1617
1618 if (cfg->move_in_progress) {
1619 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1620 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1621 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1622 cfg->move_in_progress = 0;
1623 }
1624
1625 irq_desc[irq].affinity = mask;
1626}
1627
1628static int migrate_irq_remapped_level(int irq)
1629{
1630 int ret = -1;
1631
1632 mask_IO_APIC_irq(irq);
1633
1634 if (io_apic_level_ack_pending(irq)) {
1635 /*
1636 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance.
1640 */
1641 schedule_delayed_work(&ir_migration_work, 1);
1642 goto unmask;
1643 }
1644
1645 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1647
1648 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask);
1651
1652unmask:
1653 unmask_IO_APIC_irq(irq);
1654 return ret;
1655}
1656
1657static void ir_irq_migration(struct work_struct *work)
1658{
1659 int irq;
1660
1661 for (irq = 0; irq < NR_IRQS; irq++) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags;
1665
1666 spin_lock_irqsave(&desc->lock, flags);
1667 if (!desc->chip->set_affinity ||
1668 !(desc->status & IRQ_MOVE_PENDING)) {
1669 desc->status &= ~IRQ_MOVE_PENDING;
1670 spin_unlock_irqrestore(&desc->lock, flags);
1671 continue;
1672 }
1673
1674 desc->chip->set_affinity(irq,
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags);
1677 }
1678 }
1679}
1680
1681/*
1682 * Migrates the IRQ destination in the process context.
1683 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{
1686 if (irq_desc[irq].status & IRQ_LEVEL) {
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1688 irq_desc[irq].pending_mask = mask;
1689 migrate_irq_remapped_level(irq);
1690 return;
1691 }
1692
1693 migrate_ioapic_irq(irq, mask);
1694}
1695#endif
1696
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1697asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1698{
1402 unsigned vector, me; 1699 unsigned vector, me;
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1750#else
1454static inline void irq_complete_move(unsigned int irq) {} 1751static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1752#endif
1753#ifdef CONFIG_INTR_REMAP
1754static void ack_x2apic_level(unsigned int irq)
1755{
1756 ack_x2APIC_irq();
1757}
1758
1759static void ack_x2apic_edge(unsigned int irq)
1760{
1761 ack_x2APIC_irq();
1762}
1763#endif
1456 1764
1457static void ack_apic_edge(unsigned int irq) 1765static void ack_apic_edge(unsigned int irq)
1458{ 1766{
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1835 .retrigger = ioapic_retrigger_irq,
1528}; 1836};
1529 1837
1838#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif
1849 .retrigger = ioapic_retrigger_irq,
1850};
1851#endif
1852
1530static inline void init_IO_APIC_traps(void) 1853static inline void init_IO_APIC_traps(void)
1531{ 1854{
1532 int irq; 1855 int irq;
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2035 * 8259A.
1713 */ 2036 */
1714 if (pin1 == -1) { 2037 if (pin1 == -1) {
2038 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2040 pin1 = pin2;
1716 apic1 = apic2; 2041 apic1 = apic2;
1717 no_pin1 = 1; 2042 no_pin1 = 1;
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2063 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2064 goto out;
1740 } 2065 }
2066 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2068 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2069 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void)
1854 setup_IO_APIC_irqs(); 2181 setup_IO_APIC_irqs();
1855 init_IO_APIC_traps(); 2182 init_IO_APIC_traps();
1856 check_timer(); 2183 check_timer();
1857 if (!acpi_ioapic)
1858 print_IO_APIC();
1859} 2184}
1860 2185
1861struct sysfs_ioapic_data { 2186struct sysfs_ioapic_data {
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq)
1977 2302
1978 dynamic_irq_cleanup(irq); 2303 dynamic_irq_cleanup(irq);
1979 2304
2305#ifdef CONFIG_INTR_REMAP
2306 free_irte(irq);
2307#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2308 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2309 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2310 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2323
1996 tmp = TARGET_CPUS; 2324 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2325 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2326 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2327 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2328
2329 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp);
2331
2332#ifdef CONFIG_INTR_REMAP
2333 if (irq_remapped(irq)) {
2334 struct irte irte;
2335 int ir_index;
2336 u16 sub_handle;
2337
2338 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2339 BUG_ON(ir_index == -1);
2340
2341 memset (&irte, 0, sizeof(irte));
2342
2343 irte.present = 1;
2344 irte.dst_mode = INT_DEST_MODE;
2345 irte.trigger_mode = 0; /* edge */
2346 irte.dlvry_mode = INT_DELIVERY_MODE;
2347 irte.vector = cfg->vector;
2348 irte.dest_id = IRTE_DEST(dest);
2349
2350 modify_irte(irq, &irte);
2001 2351
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2352 msg->address_hi = MSI_ADDR_BASE_HI;
2353 msg->data = sub_handle;
2354 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2355 MSI_ADDR_IR_SHV |
2356 MSI_ADDR_IR_INDEX1(ir_index) |
2357 MSI_ADDR_IR_INDEX2(ir_index);
2358 } else
2359#endif
2360 {
2361 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2362 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2363 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2364 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2408 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2409 irq_desc[irq].affinity = mask;
2051} 2410}
2411
2412#ifdef CONFIG_INTR_REMAP
2413/*
2414 * Migrate the MSI irq to another cpumask. This migration is
2415 * done in the process context using interrupt-remapping hardware.
2416 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{
2419 struct irq_cfg *cfg = irq_cfg + irq;
2420 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask;
2422 struct irte irte;
2423
2424 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp))
2426 return;
2427
2428 if (get_irte(irq, &irte))
2429 return;
2430
2431 if (assign_irq_vector(irq, mask))
2432 return;
2433
2434 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp);
2436
2437 irte.vector = cfg->vector;
2438 irte.dest_id = IRTE_DEST(dest);
2439
2440 /*
2441 * atomically update the IRTE with the new destination and vector.
2442 */
2443 modify_irte(irq, &irte);
2444
2445 /*
2446 * After this point, all the interrupts will start arriving
2447 * at the new destination. So, time to cleanup the previous
2448 * vector allocation.
2449 */
2450 if (cfg->move_in_progress) {
2451 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2452 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2453 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2454 cfg->move_in_progress = 0;
2455 }
2456
2457 irq_desc[irq].affinity = mask;
2458}
2459#endif
2052#endif /* CONFIG_SMP */ 2460#endif /* CONFIG_SMP */
2053 2461
2054/* 2462/*
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2474 .retrigger = ioapic_retrigger_irq,
2067}; 2475};
2068 2476
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2477#ifdef CONFIG_INTR_REMAP
2478static struct irq_chip msi_ir_chip = {
2479 .name = "IR-PCI-MSI",
2480 .unmask = unmask_msi_irq,
2481 .mask = mask_msi_irq,
2482 .ack = ack_x2apic_edge,
2483#ifdef CONFIG_SMP
2484 .set_affinity = ir_set_msi_irq_affinity,
2485#endif
2486 .retrigger = ioapic_retrigger_irq,
2487};
2488
2489/*
2490 * Map the PCI dev to the corresponding remapping hardware unit
2491 * and allocate 'nvec' consecutive interrupt-remapping table entries
2492 * in it.
2493 */
2494static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2070{ 2495{
2496 struct intel_iommu *iommu;
2497 int index;
2498
2499 iommu = map_dev_to_ir(dev);
2500 if (!iommu) {
2501 printk(KERN_ERR
2502 "Unable to map PCI %s to iommu\n", pci_name(dev));
2503 return -ENOENT;
2504 }
2505
2506 index = alloc_irte(iommu, irq, nvec);
2507 if (index < 0) {
2508 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev));
2511 return -ENOSPC;
2512 }
2513 return index;
2514}
2515#endif
2516
2517static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2518{
2519 int ret;
2071 struct msi_msg msg; 2520 struct msi_msg msg;
2521
2522 ret = msi_compose_msg(dev, irq, &msg);
2523 if (ret < 0)
2524 return ret;
2525
2526 set_irq_msi(irq, desc);
2527 write_msi_msg(irq, &msg);
2528
2529#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq;
2532 /*
2533 * irq migration in process context
2534 */
2535 desc->status |= IRQ_MOVE_PCNTXT;
2536 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2537 } else
2538#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540
2541 return 0;
2542}
2543
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{
2072 int irq, ret; 2546 int irq, ret;
2547
2073 irq = create_irq(); 2548 irq = create_irq();
2074 if (irq < 0) 2549 if (irq < 0)
2075 return irq; 2550 return irq;
2076 2551
2077 ret = msi_compose_msg(dev, irq, &msg); 2552#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled)
2554 goto no_ir;
2555
2556 ret = msi_alloc_irte(dev, irq, 1);
2557 if (ret < 0)
2558 goto error;
2559no_ir:
2560#endif
2561 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2562 if (ret < 0) {
2079 destroy_irq(irq); 2563 destroy_irq(irq);
2080 return ret; 2564 return ret;
2081 } 2565 }
2566 return 0;
2082 2567
2083 set_irq_msi(irq, desc); 2568#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2569error:
2570 destroy_irq(irq);
2571 return ret;
2572#endif
2573}
2085 2574
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{
2577 int irq, ret, sub_handle;
2578 struct msi_desc *desc;
2579#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0;
2581 int index = 0;
2582#endif
2583
2584 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq();
2587 if (irq < 0)
2588 return irq;
2589#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled)
2591 goto no_ir;
2087 2592
2593 if (!sub_handle) {
2594 /*
2595 * allocate the consecutive block of IRTE's
2596 * for 'nvec'
2597 */
2598 index = msi_alloc_irte(dev, irq, nvec);
2599 if (index < 0) {
2600 ret = index;
2601 goto error;
2602 }
2603 } else {
2604 iommu = map_dev_to_ir(dev);
2605 if (!iommu) {
2606 ret = -ENOENT;
2607 goto error;
2608 }
2609 /*
2610 * setup the mapping between the irq and the IRTE
2611 * base index, the sub_handle pointing to the
2612 * appropriate interrupt remap table entry.
2613 */
2614 set_irte_irq(irq, iommu, index, sub_handle);
2615 }
2616no_ir:
2617#endif
2618 ret = setup_msi_irq(dev, desc, irq);
2619 if (ret < 0)
2620 goto error;
2621 sub_handle++;
2622 }
2088 return 0; 2623 return 0;
2624
2625error:
2626 destroy_irq(irq);
2627 return ret;
2089} 2628}
2090 2629
2091void arch_teardown_msi_irq(unsigned int irq) 2630void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2872 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2873 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2874 irq_polarity(irq_entry));
2875#ifdef CONFIG_INTR_REMAP
2876 else if (intr_remapping_enabled)
2877 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2878#endif
2336 else 2879 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2880 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2881 }
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee..9200a1e2752 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
74 } 74 }
75} 75}
76 76
77/*
78 * IRQ2 is cascade interrupt to second interrupt controller
79 */
80static struct irqaction irq2 = {
81 .handler = no_action,
82 .mask = CPU_MASK_NONE,
83 .name = "cascade",
84};
85
77/* Overridden in paravirt.c */ 86/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79 88
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
98 set_intr_gate(vector, interrupt[i]); 107 set_intr_gate(vector, interrupt[i]);
99 } 108 }
100 109
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116
117 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup.
120 */
121 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
122
123 /* IPI for invalidation */
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
131#endif
132
133#ifdef CONFIG_X86_LOCAL_APIC
134 /* self generated IPI for local APIC timer */
135 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
136
137 /* IPI vectors for APIC spurious and error interrupts */
138 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
139 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
140#endif
141
142#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
143 /* thermal monitor LVT interrupt */
144 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
145#endif
146
147 if (!acpi_ioapic)
148 setup_irq(2, &irq2);
149
101 /* setup after call gates are initialised (usually add in 150 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates) 151 * the architecture specific gates)
103 */ 152 */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725c..f98f4e1dba0 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f01..4caff39078e 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e2f43768723..6b0bb73998d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
374 374
375struct pv_apic_ops pv_apic_ops = { 375struct pv_apic_ops pv_apic_ops = {
376#ifdef CONFIG_X86_LOCAL_APIC 376#ifdef CONFIG_X86_LOCAL_APIC
377 .apic_write = native_apic_write,
378 .apic_read = native_apic_read,
379 .setup_boot_clock = setup_boot_APIC_clock, 377 .setup_boot_clock = setup_boot_APIC_clock,
380 .setup_secondary_clock = setup_secondary_APIC_clock, 378 .setup_secondary_clock = setup_secondary_APIC_clock,
381 .startup_ipi_hook = paravirt_nop, 379 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 141efab5240..46c98efbbf8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -758,6 +758,8 @@ void __init setup_arch(char **cmdline_p)
758#else 758#else
759 num_physpages = max_pfn; 759 num_physpages = max_pfn;
760 760
761 if (cpu_has_x2apic)
762 check_x2apic();
761 763
762 /* How many end-of-memory variables you have, grandma! */ 764 /* How many end-of-memory variables you have, grandma! */
763 /* need this before calling reserve_initrd */ 765 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 45531e3ba19..2ff0bbcd5bd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
550 printk(KERN_CONT 551 printk(KERN_CONT
551 "a previous APIC delivery may have failed\n"); 552 "a previous APIC delivery may have failed\n");
552 553
553 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 554 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
554 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
555 555
556 timeout = 0; 556 timeout = 0;
557 do { 557 do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
583 int maxlvt; 583 int maxlvt;
584 584
585 /* Target chip */ 585 /* Target chip */
586 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
587
588 /* Boot on the stack */ 586 /* Boot on the stack */
589 /* Kick the second */ 587 /* Kick the second */
590 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 588 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
591 589
592 pr_debug("Waiting for send to finish...\n"); 590 pr_debug("Waiting for send to finish...\n");
593 send_status = safe_apic_wait_icr_idle(); 591 send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
640 /* 638 /*
641 * Turn INIT on target chip 639 * Turn INIT on target chip
642 */ 640 */
643 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
644
645 /* 641 /*
646 * Send IPI 642 * Send IPI
647 */ 643 */
648 apic_write(APIC_ICR, 644 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
649 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 645 phys_apicid);
650 646
651 pr_debug("Waiting for send to finish...\n"); 647 pr_debug("Waiting for send to finish...\n");
652 send_status = safe_apic_wait_icr_idle(); 648 send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
656 pr_debug("Deasserting INIT.\n"); 652 pr_debug("Deasserting INIT.\n");
657 653
658 /* Target chip */ 654 /* Target chip */
659 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
660
661 /* Send IPI */ 655 /* Send IPI */
662 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 656 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
663 657
664 pr_debug("Waiting for send to finish...\n"); 658 pr_debug("Waiting for send to finish...\n");
665 send_status = safe_apic_wait_icr_idle(); 659 send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
702 */ 696 */
703 697
704 /* Target chip */ 698 /* Target chip */
705 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
706
707 /* Boot on the stack */ 699 /* Boot on the stack */
708 /* Kick the second */ 700 /* Kick the second */
709 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 701 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
702 phys_apicid);
710 703
711 /* 704 /*
712 * Give the other CPU some time to accept the IPI. 705 * Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1175 * Setup boot CPU information 1168 * Setup boot CPU information
1176 */ 1169 */
1177 smp_store_cpu_info(0); /* Final full version of the data */ 1170 smp_store_cpu_info(0); /* Final full version of the data */
1171#ifdef CONFIG_X86_32
1178 boot_cpu_logical_apicid = logical_smp_processor_id(); 1172 boot_cpu_logical_apicid = logical_smp_processor_id();
1173#endif
1179 current_thread_info()->cpu = 0; /* needed? */ 1174 current_thread_info()->cpu = 0; /* needed? */
1180 set_cpu_sibling_map(0); 1175 set_cpu_sibling_map(0);
1181 1176
1177#ifdef CONFIG_X86_64
1178 enable_IR_x2apic();
1179 setup_apic_routing();
1180#endif
1181
1182 if (smp_sanity_check(max_cpus) < 0) { 1182 if (smp_sanity_check(max_cpus) < 0) {
1183 printk(KERN_INFO "SMP disabled\n"); 1183 printk(KERN_INFO "SMP disabled\n");
1184 disable_smp(); 1184 disable_smp();
@@ -1186,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1186 } 1186 }
1187 1187
1188 preempt_disable(); 1188 preempt_disable();
1189 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1189 if (read_apic_id() != boot_cpu_physical_apicid) {
1190 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1190 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1191 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1191 read_apic_id(), boot_cpu_physical_apicid);
1192 /* Or can we switch back to PIC here? */ 1192 /* Or can we switch back to PIC here? */
1193 } 1193 }
1194 preempt_enable(); 1194 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044b..7b987852e87 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8c9ad02af5a..8b6c393ab9f 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a882aa..65f0b8a47be 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
55#include <linux/lguest_launcher.h> 55#include <linux/lguest_launcher.h>
56#include <linux/virtio_console.h> 56#include <linux/virtio_console.h>
57#include <linux/pm.h> 57#include <linux/pm.h>
58#include <asm/apic.h>
58#include <asm/lguest.h> 59#include <asm/lguest.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 784 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 785 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 786#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 787static void lguest_apic_write(u32 reg, u32 v)
787{ 788{
788} 789}
789 790
790static u32 lguest_apic_read(unsigned long reg) 791static u32 lguest_apic_read(u32 reg)
791{ 792{
792 return 0; 793 return 0;
793} 794}
795
796static u64 lguest_apic_icr_read(void)
797{
798 return 0;
799}
800
801static void lguest_apic_icr_write(u32 low, u32 id)
802{
803 /* Warn to see if there's any stray references */
804 WARN_ON(1);
805}
806
807static void lguest_apic_wait_icr_idle(void)
808{
809 return;
810}
811
812static u32 lguest_apic_safe_wait_icr_idle(void)
813{
814 return 0;
815}
816
817static struct apic_ops lguest_basic_apic_ops = {
818 .read = lguest_apic_read,
819 .write = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_read = lguest_apic_read;
995#endif 1025#endif
996 1026
997 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3f2cf11f201..37b9ae4d44c 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -38,15 +38,6 @@ void __init pre_intr_init_hook(void)
38 init_ISA_irqs(); 38 init_ISA_irqs();
39} 39}
40 40
41/*
42 * IRQ2 is cascade interrupt to second interrupt controller
43 */
44static struct irqaction irq2 = {
45 .handler = no_action,
46 .mask = CPU_MASK_NONE,
47 .name = "cascade",
48};
49
50/** 41/**
51 * intr_init_hook - post gate setup interrupt initialisation 42 * intr_init_hook - post gate setup interrupt initialisation
52 * 43 *
@@ -62,12 +53,6 @@ void __init intr_init_hook(void)
62 if (x86_quirks->arch_intr_init()) 53 if (x86_quirks->arch_intr_init())
63 return; 54 return;
64 } 55 }
65#ifdef CONFIG_X86_LOCAL_APIC
66 apic_intr_init();
67#endif
68
69 if (!acpi_ioapic)
70 setup_irq(2, &irq2);
71} 56}
72 57
73/** 58/**
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 0dbd7803a1d..4706de7676b 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -9,4 +9,4 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
9obj-$(CONFIG_X86_SUMMIT) += summit.o 9obj-$(CONFIG_X86_SUMMIT) += summit.o
10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o 10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
11obj-$(CONFIG_X86_ES7000) += es7000.o 11obj-$(CONFIG_X86_ES7000) += es7000.o
12obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/ 12obj-$(CONFIG_X86_ES7000) += ../../x86/es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d77171455..df37fc9d6a2 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,18 +5,17 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h> 15#include <asm/bigsmp/apicdef.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 16#include <linux/smp.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
22static int dmi_bigsmp; /* can be set by dmi scanners */ 21static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c..520cca0ee04 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,20 +4,19 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/es7000/apicdef.h>
17#include <asm/mach-es7000/mach_apic.h> 15#include <linux/smp.h>
18#include <asm/mach-es7000/mach_ipi.h> 16#include <asm/es7000/apic.h>
19#include <asm/mach-es7000/mach_mpparse.h> 17#include <asm/es7000/ipi.h>
20#include <asm/mach-es7000/mach_wakecpu.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h>
21 20
22static int probe_es7000(void) 21static int probe_es7000(void)
23{ 22{
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c..8cf58394975 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,11 +11,12 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h> 14#include <asm/numaq/apicdef.h>
16#include <asm/mach-numaq/mach_apicdef.h> 15#include <linux/smp.h>
17#include <asm/mach-numaq/mach_ipi.h> 16#include <asm/numaq/apic.h>
18#include <asm/mach-numaq/mach_mpparse.h> 17#include <asm/numaq/ipi.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 18#include <asm/numaq/mpparse.h>
19#include <asm/numaq/wakecpu.h>
20#include <asm/numaq.h> 20#include <asm/numaq.h>
21 21
22static int mps_oem_check(struct mp_config_table *mpc, char *oem, 22static int mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1..6ad6b67a723 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,19 +4,18 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h> 14#include <asm/summit/apicdef.h>
17#include <asm/mach-summit/mach_apicdef.h> 15#include <linux/smp.h>
18#include <asm/mach-summit/mach_ipi.h> 16#include <asm/summit/apic.h>
19#include <asm/mach-summit/mach_mpparse.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h>
20 19
21static int probe_summit(void) 20static int probe_summit(void)
22{ 21{
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 19af06927fb..1d88d2b3977 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -250,10 +250,5 @@ int __init pci_acpi_init(void)
250 acpi_pci_irq_enable(dev); 250 acpi_pci_irq_enable(dev);
251 } 251 }
252 252
253#ifdef CONFIG_X86_IO_APIC
254 if (acpi_ioapic)
255 print_IO_APIC();
256#endif
257
258 return 0; 253 return 0;
259} 254}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7dcd321a050..a27d562a974 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
36#include <xen/hvc-console.h> 36#include <xen/hvc-console.h>
37 37
38#include <asm/paravirt.h> 38#include <asm/paravirt.h>
39#include <asm/apic.h>
39#include <asm/page.h> 40#include <asm/page.h>
40#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
580} 581}
581 582
582#ifdef CONFIG_X86_LOCAL_APIC 583#ifdef CONFIG_X86_LOCAL_APIC
583static u32 xen_apic_read(unsigned long reg) 584static u32 xen_apic_read(u32 reg)
584{ 585{
585 return 0; 586 return 0;
586} 587}
587 588
588static void xen_apic_write(unsigned long reg, u32 val) 589static void xen_apic_write(u32 reg, u32 val)
589{ 590{
590 /* Warn to see if there's any stray references */ 591 /* Warn to see if there's any stray references */
591 WARN_ON(1); 592 WARN_ON(1);
592} 593}
594
595static u64 xen_apic_icr_read(void)
596{
597 return 0;
598}
599
600static void xen_apic_icr_write(u32 low, u32 id)
601{
602 /* Warn to see if there's any stray references */
603 WARN_ON(1);
604}
605
606static void xen_apic_wait_icr_idle(void)
607{
608 return;
609}
610
611static u32 xen_safe_apic_wait_icr_idle(void)
612{
613 return 0;
614}
615
616static struct apic_ops xen_basic_apic_ops = {
617 .read = xen_apic_read,
618 .write = xen_apic_write,
619 .icr_read = xen_apic_icr_read,
620 .icr_write = xen_apic_icr_write,
621 .wait_icr_idle = xen_apic_wait_icr_idle,
622 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
623};
624
593#endif 625#endif
594 626
595static void xen_flush_tlb(void) 627static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1273 1305
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1306static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1307#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write,
1277 .apic_read = xen_apic_read,
1278 .setup_boot_clock = paravirt_nop, 1308 .setup_boot_clock = paravirt_nop,
1279 .setup_secondary_clock = paravirt_nop, 1309 .setup_secondary_clock = paravirt_nop,
1280 .startup_ipi_hook = paravirt_nop, 1310 .startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
1677 pv_apic_ops = xen_apic_ops; 1707 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1708 pv_mmu_ops = xen_mmu_ops;
1679 1709
1710#ifdef CONFIG_X86_LOCAL_APIC
1711 /*
1712 * set up the basic apic ops.
1713 */
1714 apic_ops = &xen_basic_apic_ops;
1715#endif
1716
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1717 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1718 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1719 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24..4b47f4ece5b 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
26# Build Intel IOMMU support 26# Build Intel IOMMU support
27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o 27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
28 28
29obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
30
29# 31#
30# Some architectures use the generic PCI setup functions 32# Some architectures use the generic PCI setup functions
31# 33#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 00000000000..bff5c65f81d
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae2333..bd2c01674f5 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
19 * Author: Shaohua Li <shaohua.li@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * 21 *
22 * This file implements early detection/parsing of DMA Remapping Devices 22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables. 24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
25 */ 27 */
26 28
27#include <linux/pci.h> 29#include <linux/pci.h>
28#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/timer.h>
29#include "iova.h" 32#include "iova.h"
30#include "intel-iommu.h" 33#include "intel-iommu.h"
31 34
@@ -37,7 +40,6 @@
37 * these units are not supported by the architecture. 40 * these units are not supported by the architecture.
38 */ 41 */
39LIST_HEAD(dmar_drhd_units); 42LIST_HEAD(dmar_drhd_units);
40LIST_HEAD(dmar_rmrr_units);
41 43
42static struct acpi_table_header * __initdata dmar_tbl; 44static struct acpi_table_header * __initdata dmar_tbl;
43 45
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 list_add(&drhd->list, &dmar_drhd_units); 55 list_add(&drhd->list, &dmar_drhd_units);
54} 56}
55 57
56static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
57{
58 list_add(&rmrr->list, &dmar_rmrr_units);
59}
60
61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, 58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment) 59 struct pci_dev **dev, u16 segment)
63{ 60{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd; 169 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru; 170 struct dmar_drhd_unit *dmaru;
174 int ret = 0; 171 int ret = 0;
175 static int include_all;
176 172
177 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); 173 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178 if (!dmaru) 174 if (!dmaru)
179 return -ENOMEM; 175 return -ENOMEM;
180 176
177 dmaru->hdr = header;
181 drhd = (struct acpi_dmar_hardware_unit *)header; 178 drhd = (struct acpi_dmar_hardware_unit *)header;
182 dmaru->reg_base_addr = drhd->address; 179 dmaru->reg_base_addr = drhd->address;
183 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184 181
182 ret = alloc_iommu(dmaru);
183 if (ret) {
184 kfree(dmaru);
185 return ret;
186 }
187 dmar_register_drhd_unit(dmaru);
188 return 0;
189}
190
191static int __init
192dmar_parse_dev(struct dmar_drhd_unit *dmaru)
193{
194 struct acpi_dmar_hardware_unit *drhd;
195 static int include_all;
196 int ret;
197
198 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
185 if (!dmaru->include_all) 200 if (!dmaru->include_all)
186 ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
187 ((void *)drhd) + header->length, 202 ((void *)drhd) + drhd->header.length,
188 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
189 drhd->segment); 204 drhd->segment);
190 else { 205 else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
197 include_all = 1; 212 include_all = 1;
198 } 213 }
199 214
200 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) 215 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
216 list_del(&dmaru->list);
201 kfree(dmaru); 217 kfree(dmaru);
202 else 218 }
203 dmar_register_drhd_unit(dmaru);
204 return ret; 219 return ret;
205} 220}
206 221
222#ifdef CONFIG_DMAR
223LIST_HEAD(dmar_rmrr_units);
224
225static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
226{
227 list_add(&rmrr->list, &dmar_rmrr_units);
228}
229
230
207static int __init 231static int __init
208dmar_parse_one_rmrr(struct acpi_dmar_header *header) 232dmar_parse_one_rmrr(struct acpi_dmar_header *header)
209{ 233{
210 struct acpi_dmar_reserved_memory *rmrr; 234 struct acpi_dmar_reserved_memory *rmrr;
211 struct dmar_rmrr_unit *rmrru; 235 struct dmar_rmrr_unit *rmrru;
212 int ret = 0;
213 236
214 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 237 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
215 if (!rmrru) 238 if (!rmrru)
216 return -ENOMEM; 239 return -ENOMEM;
217 240
241 rmrru->hdr = header;
218 rmrr = (struct acpi_dmar_reserved_memory *)header; 242 rmrr = (struct acpi_dmar_reserved_memory *)header;
219 rmrru->base_address = rmrr->base_address; 243 rmrru->base_address = rmrr->base_address;
220 rmrru->end_address = rmrr->end_address; 244 rmrru->end_address = rmrr->end_address;
245
246 dmar_register_rmrr_unit(rmrru);
247 return 0;
248}
249
250static int __init
251rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
252{
253 struct acpi_dmar_reserved_memory *rmrr;
254 int ret;
255
256 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
221 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 257 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
222 ((void *)rmrr) + header->length, 258 ((void *)rmrr) + rmrr->header.length,
223 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 259 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
224 260
225 if (ret || (rmrru->devices_cnt == 0)) 261 if (ret || (rmrru->devices_cnt == 0)) {
262 list_del(&rmrru->list);
226 kfree(rmrru); 263 kfree(rmrru);
227 else 264 }
228 dmar_register_rmrr_unit(rmrru);
229 return ret; 265 return ret;
230} 266}
267#endif
231 268
232static void __init 269static void __init
233dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 270dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
252 } 289 }
253} 290}
254 291
292
255/** 293/**
256 * parse_dmar_table - parses the DMA reporting table 294 * parse_dmar_table - parses the DMA reporting table
257 */ 295 */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
284 ret = dmar_parse_one_drhd(entry_header); 322 ret = dmar_parse_one_drhd(entry_header);
285 break; 323 break;
286 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 324 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
325#ifdef CONFIG_DMAR
287 ret = dmar_parse_one_rmrr(entry_header); 326 ret = dmar_parse_one_rmrr(entry_header);
327#endif
288 break; 328 break;
289 default: 329 default:
290 printk(KERN_WARNING PREFIX 330 printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
300 return ret; 340 return ret;
301} 341}
302 342
343int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
344 struct pci_dev *dev)
345{
346 int index;
347
348 while (dev) {
349 for (index = 0; index < cnt; index++)
350 if (dev == devices[index])
351 return 1;
303 352
304int __init dmar_table_init(void) 353 /* Check our parent */
354 dev = dev->bus->self;
355 }
356
357 return 0;
358}
359
360struct dmar_drhd_unit *
361dmar_find_matched_drhd_unit(struct pci_dev *dev)
305{ 362{
363 struct dmar_drhd_unit *drhd = NULL;
364
365 list_for_each_entry(drhd, &dmar_drhd_units, list) {
366 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
367 drhd->devices_cnt, dev))
368 return drhd;
369 }
370
371 return NULL;
372}
373
374int __init dmar_dev_scope_init(void)
375{
376 struct dmar_drhd_unit *drhd;
377 int ret = -ENODEV;
378
379 for_each_drhd_unit(drhd) {
380 ret = dmar_parse_dev(drhd);
381 if (ret)
382 return ret;
383 }
384
385#ifdef CONFIG_DMAR
386 {
387 struct dmar_rmrr_unit *rmrr;
388 for_each_rmrr_units(rmrr) {
389 ret = rmrr_parse_dev(rmrr);
390 if (ret)
391 return ret;
392 }
393 }
394#endif
395
396 return ret;
397}
306 398
399
400int __init dmar_table_init(void)
401{
402 static int dmar_table_initialized;
307 int ret; 403 int ret;
308 404
405 if (dmar_table_initialized)
406 return 0;
407
408 dmar_table_initialized = 1;
409
309 ret = parse_dmar_table(); 410 ret = parse_dmar_table();
310 if (ret) { 411 if (ret) {
311 printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); 412 if (ret != -ENODEV)
413 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
312 return ret; 414 return ret;
313 } 415 }
314 416
@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
317 return -ENODEV; 419 return -ENODEV;
318 } 420 }
319 421
422#ifdef CONFIG_DMAR
320 if (list_empty(&dmar_rmrr_units)) 423 if (list_empty(&dmar_rmrr_units))
321 printk(KERN_INFO PREFIX "No RMRR found\n"); 424 printk(KERN_INFO PREFIX "No RMRR found\n");
425#endif
322 426
427#ifdef CONFIG_INTR_REMAP
428 parse_ioapics_under_ir();
429#endif
323 return 0; 430 return 0;
324} 431}
325 432
@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
341 448
342 return (ACPI_SUCCESS(status) ? 1 : 0); 449 return (ACPI_SUCCESS(status) ? 1 : 0);
343} 450}
451
452void __init detect_intel_iommu(void)
453{
454 int ret;
455
456 ret = early_dmar_detect();
457
458#ifdef CONFIG_DMAR
459 {
460 struct acpi_table_dmar *dmar;
461 /*
462 * for now we will disable dma-remapping when interrupt
463 * remapping is enabled.
464 * When support for queued invalidation for IOTLB invalidation
465 * is added, we will not need this any more.
466 */
467 dmar = (struct acpi_table_dmar *) dmar_tbl;
468 if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
469 printk(KERN_INFO
470 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n");
472 printk(KERN_INFO
473 "Disabling IOMMU detection, because of missing "
474 "queued invalidation support for IOTLB "
475 "invalidation\n");
476 printk(KERN_INFO
477 "Use \"nox2apic\", if you want to use Intel "
478 " IOMMU for DMA-remapping and don't care about "
479 " x2apic support\n");
480
481 dmar_disabled = 1;
482 return;
483 }
484
485 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
486 !dmar_disabled)
487 iommu_detected = 1;
488 }
489#endif
490}
491
492
493int alloc_iommu(struct dmar_drhd_unit *drhd)
494{
495 struct intel_iommu *iommu;
496 int map_size;
497 u32 ver;
498 static int iommu_allocated = 0;
499
500 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
501 if (!iommu)
502 return -ENOMEM;
503
504 iommu->seq_id = iommu_allocated++;
505
506 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
507 if (!iommu->reg) {
508 printk(KERN_ERR "IOMMU: can't map the region\n");
509 goto error;
510 }
511 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
512 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
513
514 /* the registers might be more than one page */
515 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
516 cap_max_fault_reg_offset(iommu->cap));
517 map_size = PAGE_ALIGN_4K(map_size);
518 if (map_size > PAGE_SIZE_4K) {
519 iounmap(iommu->reg);
520 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
521 if (!iommu->reg) {
522 printk(KERN_ERR "IOMMU: can't map the region\n");
523 goto error;
524 }
525 }
526
527 ver = readl(iommu->reg + DMAR_VER_REG);
528 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
529 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
530 iommu->cap, iommu->ecap);
531
532 spin_lock_init(&iommu->register_lock);
533
534 drhd->iommu = iommu;
535 return 0;
536error:
537 kfree(iommu);
538 return -1;
539}
540
541void free_iommu(struct intel_iommu *iommu)
542{
543 if (!iommu)
544 return;
545
546#ifdef CONFIG_DMAR
547 free_dmar_iommu(iommu);
548#endif
549
550 if (iommu->reg)
551 iounmap(iommu->reg);
552 kfree(iommu);
553}
554
555/*
556 * Reclaim all the submitted descriptors which have completed its work.
557 */
558static inline void reclaim_free_desc(struct q_inval *qi)
559{
560 while (qi->desc_status[qi->free_tail] == QI_DONE) {
561 qi->desc_status[qi->free_tail] = QI_FREE;
562 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
563 qi->free_cnt++;
564 }
565}
566
567/*
568 * Submit the queued invalidation descriptor to the remapping
569 * hardware unit and wait for its completion.
570 */
571void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
572{
573 struct q_inval *qi = iommu->qi;
574 struct qi_desc *hw, wait_desc;
575 int wait_index, index;
576 unsigned long flags;
577
578 if (!qi)
579 return;
580
581 hw = qi->desc;
582
583 spin_lock(&qi->q_lock);
584 while (qi->free_cnt < 3) {
585 spin_unlock(&qi->q_lock);
586 cpu_relax();
587 spin_lock(&qi->q_lock);
588 }
589
590 index = qi->free_head;
591 wait_index = (index + 1) % QI_LENGTH;
592
593 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
594
595 hw[index] = *desc;
596
597 wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
598 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
599
600 hw[wait_index] = wait_desc;
601
602 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
603 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
604
605 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
606 qi->free_cnt -= 2;
607
608 spin_lock_irqsave(&iommu->register_lock, flags);
609 /*
610 * update the HW tail register indicating the presence of
611 * new descriptors.
612 */
613 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
614 spin_unlock_irqrestore(&iommu->register_lock, flags);
615
616 while (qi->desc_status[wait_index] != QI_DONE) {
617 spin_unlock(&qi->q_lock);
618 cpu_relax();
619 spin_lock(&qi->q_lock);
620 }
621
622 qi->desc_status[index] = QI_DONE;
623
624 reclaim_free_desc(qi);
625 spin_unlock(&qi->q_lock);
626}
627
628/*
629 * Flush the global interrupt entry cache.
630 */
631void qi_global_iec(struct intel_iommu *iommu)
632{
633 struct qi_desc desc;
634
635 desc.low = QI_IEC_TYPE;
636 desc.high = 0;
637
638 qi_submit_sync(&desc, iommu);
639}
640
641/*
642 * Enable Queued Invalidation interface. This is a must to support
643 * interrupt-remapping. Also used by DMA-remapping, which replaces
644 * register based IOTLB invalidation.
645 */
646int dmar_enable_qi(struct intel_iommu *iommu)
647{
648 u32 cmd, sts;
649 unsigned long flags;
650 struct q_inval *qi;
651
652 if (!ecap_qis(iommu->ecap))
653 return -ENOENT;
654
655 /*
656 * queued invalidation is already setup and enabled.
657 */
658 if (iommu->qi)
659 return 0;
660
661 iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
662 if (!iommu->qi)
663 return -ENOMEM;
664
665 qi = iommu->qi;
666
667 qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
668 if (!qi->desc) {
669 kfree(qi);
670 iommu->qi = 0;
671 return -ENOMEM;
672 }
673
674 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
675 if (!qi->desc_status) {
676 free_page((unsigned long) qi->desc);
677 kfree(qi);
678 iommu->qi = 0;
679 return -ENOMEM;
680 }
681
682 qi->free_head = qi->free_tail = 0;
683 qi->free_cnt = QI_LENGTH;
684
685 spin_lock_init(&qi->q_lock);
686
687 spin_lock_irqsave(&iommu->register_lock, flags);
688 /* write zero to the tail reg */
689 writel(0, iommu->reg + DMAR_IQT_REG);
690
691 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
692
693 cmd = iommu->gcmd | DMA_GCMD_QIE;
694 iommu->gcmd |= DMA_GCMD_QIE;
695 writel(cmd, iommu->reg + DMAR_GCMD_REG);
696
697 /* Make sure hardware complete it */
698 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
699 spin_unlock_irqrestore(&iommu->register_lock, flags);
700
701 return 0;
702}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6c4c1c3c50e..389fdd6f4a9 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
49 49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51 51
52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 53
56 54
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
58 56
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 58
61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250 59#define HIGH_WATER_MARK 250
64struct deferred_flush_tables { 60struct deferred_flush_tables {
65 int next; 61 int next;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
185 kmem_cache_free(iommu_iova_cache, iova); 181 kmem_cache_free(iommu_iova_cache, iova);
186} 182}
187 183
188static inline void __iommu_flush_cache(
189 struct intel_iommu *iommu, void *addr, int size)
190{
191 if (!ecap_coherent(iommu->ecap))
192 clflush_cache_range(addr, size);
193}
194
195/* Gets context entry for a given bus and devfn */ 184/* Gets context entry for a given bus and devfn */
196static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
197 u8 bus, u8 devfn) 186 u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
488 return 0; 477 return 0;
489} 478}
490 479
491#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
492{\
493 cycles_t start_time = get_cycles();\
494 while (1) {\
495 sts = op (iommu->reg + offset);\
496 if (cond)\
497 break;\
498 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
499 panic("DMAR hardware is malfunctioning\n");\
500 cpu_relax();\
501 }\
502}
503
504static void iommu_set_root_entry(struct intel_iommu *iommu) 480static void iommu_set_root_entry(struct intel_iommu *iommu)
505{ 481{
506 void *addr; 482 void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
990 return -ENOMEM; 966 return -ENOMEM;
991 } 967 }
992 968
969 spin_lock_init(&iommu->lock);
970
993 /* 971 /*
994 * if Caching mode is set, then invalid translations are tagged 972 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it. 973 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
998 set_bit(0, iommu->domain_ids); 976 set_bit(0, iommu->domain_ids);
999 return 0; 977 return 0;
1000} 978}
1001static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
1002 struct dmar_drhd_unit *drhd)
1003{
1004 int ret;
1005 int map_size;
1006 u32 ver;
1007
1008 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
1009 if (!iommu->reg) {
1010 printk(KERN_ERR "IOMMU: can't map the region\n");
1011 goto error;
1012 }
1013 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
1014 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
1015
1016 /* the registers might be more than one page */
1017 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1018 cap_max_fault_reg_offset(iommu->cap));
1019 map_size = PAGE_ALIGN_4K(map_size);
1020 if (map_size > PAGE_SIZE_4K) {
1021 iounmap(iommu->reg);
1022 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1023 if (!iommu->reg) {
1024 printk(KERN_ERR "IOMMU: can't map the region\n");
1025 goto error;
1026 }
1027 }
1028
1029 ver = readl(iommu->reg + DMAR_VER_REG);
1030 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1031 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1032 iommu->cap, iommu->ecap);
1033 ret = iommu_init_domains(iommu);
1034 if (ret)
1035 goto error_unmap;
1036 spin_lock_init(&iommu->lock);
1037 spin_lock_init(&iommu->register_lock);
1038 979
1039 drhd->iommu = iommu;
1040 return iommu;
1041error_unmap:
1042 iounmap(iommu->reg);
1043error:
1044 kfree(iommu);
1045 return NULL;
1046}
1047 980
1048static void domain_exit(struct dmar_domain *domain); 981static void domain_exit(struct dmar_domain *domain);
1049static void free_iommu(struct intel_iommu *iommu) 982
983void free_dmar_iommu(struct intel_iommu *iommu)
1050{ 984{
1051 struct dmar_domain *domain; 985 struct dmar_domain *domain;
1052 int i; 986 int i;
1053 987
1054 if (!iommu)
1055 return;
1056
1057 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1058 for (; i < cap_ndoms(iommu->cap); ) { 989 for (; i < cap_ndoms(iommu->cap); ) {
1059 domain = iommu->domains[i]; 990 domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
1078 1009
1079 /* free context mapping */ 1010 /* free context mapping */
1080 free_context_table(iommu); 1011 free_context_table(iommu);
1081
1082 if (iommu->reg)
1083 iounmap(iommu->reg);
1084 kfree(iommu);
1085} 1012}
1086 1013
1087static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1014static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
1426 return NULL; 1353 return NULL;
1427} 1354}
1428 1355
1429static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1430 struct pci_dev *dev)
1431{
1432 int index;
1433
1434 while (dev) {
1435 for (index = 0; index < cnt; index++)
1436 if (dev == devices[index])
1437 return 1;
1438
1439 /* Check our parent */
1440 dev = dev->bus->self;
1441 }
1442
1443 return 0;
1444}
1445
1446static struct dmar_drhd_unit *
1447dmar_find_matched_drhd_unit(struct pci_dev *dev)
1448{
1449 struct dmar_drhd_unit *drhd = NULL;
1450
1451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1452 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1453 drhd->devices_cnt, dev))
1454 return drhd;
1455 }
1456
1457 return NULL;
1458}
1459
1460/* domain is initialized */ 1356/* domain is initialized */
1461static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1357static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1462{ 1358{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
1729 * endfor 1625 * endfor
1730 */ 1626 */
1731 for_each_drhd_unit(drhd) { 1627 for_each_drhd_unit(drhd) {
1732 if (drhd->ignored)
1733 continue;
1734 g_num_of_iommus++; 1628 g_num_of_iommus++;
1735 /* 1629 /*
1736 * lock not needed as this is only incremented in the single 1630 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
1739 */ 1633 */
1740 } 1634 }
1741 1635
1742 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1743 if (!g_iommus) {
1744 ret = -ENOMEM;
1745 goto error;
1746 }
1747
1748 deferred_flush = kzalloc(g_num_of_iommus * 1636 deferred_flush = kzalloc(g_num_of_iommus *
1749 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1750 if (!deferred_flush) { 1638 if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
1752 goto error; 1640 goto error;
1753 } 1641 }
1754 1642
1755 i = 0;
1756 for_each_drhd_unit(drhd) { 1643 for_each_drhd_unit(drhd) {
1757 if (drhd->ignored) 1644 if (drhd->ignored)
1758 continue; 1645 continue;
1759 iommu = alloc_iommu(&g_iommus[i], drhd); 1646
1760 i++; 1647 iommu = drhd->iommu;
1761 if (!iommu) { 1648
1762 ret = -ENOMEM; 1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1763 goto error; 1651 goto error;
1764 }
1765 1652
1766 /* 1653 /*
1767 * TBD: 1654 * TBD:
@@ -1845,7 +1732,6 @@ error:
1845 iommu = drhd->iommu; 1732 iommu = drhd->iommu;
1846 free_iommu(iommu); 1733 free_iommu(iommu);
1847 } 1734 }
1848 kfree(g_iommus);
1849 return ret; 1735 return ret;
1850} 1736}
1851 1737
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
2002 /* just flush them all */ 1888 /* just flush them all */
2003 for (i = 0; i < g_num_of_iommus; i++) { 1889 for (i = 0; i < g_num_of_iommus; i++) {
2004 if (deferred_flush[i].next) { 1890 if (deferred_flush[i].next) {
2005 iommu_flush_iotlb_global(&g_iommus[i], 0); 1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
2006 for (j = 0; j < deferred_flush[i].next; j++) { 1895 for (j = 0; j < deferred_flush[i].next; j++) {
2007 __free_iova(&deferred_flush[i].domain[j]->iovad, 1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
2008 deferred_flush[i].iova[j]); 1897 deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2032 if (list_size == HIGH_WATER_MARK) 1921 if (list_size == HIGH_WATER_MARK)
2033 flush_unmaps(); 1922 flush_unmaps();
2034 1923
2035 iommu_id = dom->iommu - g_iommus; 1924 iommu_id = dom->iommu->seq_id;
1925
2036 next = deferred_flush[iommu_id].next; 1926 next = deferred_flush[iommu_id].next;
2037 deferred_flush[iommu_id].domain[next] = dom; 1927 deferred_flush[iommu_id].domain[next] = dom;
2038 deferred_flush[iommu_id].iova[next] = iova; 1928 deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,38 +2238,6 @@ static void __init iommu_exit_mempool(void)
2348 2238
2349} 2239}
2350 2240
2351static int blacklist_iommu(const struct dmi_system_id *id)
2352{
2353 printk(KERN_INFO "%s detected; disabling IOMMU\n",
2354 id->ident);
2355 dmar_disabled = 1;
2356 return 0;
2357}
2358
2359static struct dmi_system_id __initdata intel_iommu_dmi_table[] = {
2360 { /* Some DG33BU BIOS revisions advertised non-existent VT-d */
2361 .callback = blacklist_iommu,
2362 .ident = "Intel DG33BU",
2363 { DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
2364 DMI_MATCH(DMI_BOARD_NAME, "DG33BU"),
2365 }
2366 },
2367 { }
2368};
2369
2370
2371void __init detect_intel_iommu(void)
2372{
2373 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2374 return;
2375 if (early_dmar_detect()) {
2376 dmi_check_system(intel_iommu_dmi_table);
2377 if (dmar_disabled)
2378 return;
2379 iommu_detected = 1;
2380 }
2381}
2382
2383static void __init init_no_remapping_devices(void) 2241static void __init init_no_remapping_devices(void)
2384{ 2242{
2385 struct dmar_drhd_unit *drhd; 2243 struct dmar_drhd_unit *drhd;
@@ -2426,12 +2284,19 @@ int __init intel_iommu_init(void)
2426{ 2284{
2427 int ret = 0; 2285 int ret = 0;
2428 2286
2429 if (no_iommu || swiotlb || dmar_disabled)
2430 return -ENODEV;
2431
2432 if (dmar_table_init()) 2287 if (dmar_table_init())
2433 return -ENODEV; 2288 return -ENODEV;
2434 2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2435 iommu_init_mempool(); 2300 iommu_init_mempool();
2436 dmar_init_reserved_ranges(); 2301 dmar_init_reserved_ranges();
2437 2302
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122..2142c01e014 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include "iova.h" 28#include "iova.h"
29#include <linux/io.h> 29#include <linux/io.h>
30 30#include <asm/cacheflush.h>
31/* 31#include "dma_remapping.h"
32 * We need a fixed PAGE_SIZE of 4K irrespective of
33 * arch PAGE_SIZE for IOMMU page tables.
34 */
35#define PAGE_SHIFT_4K (12)
36#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
37#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
38#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
39
40#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
41#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
42#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
43 32
44/* 33/*
45 * Intel IOMMU register specification per version 1.0 public spec. 34 * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
63#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ 52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
64#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ 53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
65#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ 54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
66 60
67#define OFFSET_STRIDE (9) 61#define OFFSET_STRIDE (9)
68/* 62/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
126#define ecap_max_iotlb_offset(e) \ 120#define ecap_max_iotlb_offset(e) \
127 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
128#define ecap_coherent(e) ((e) & 0x1) 122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
129 127
130 128
131/* IOTLB_REG */ 129/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
141#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) 139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
142#define DMA_TLB_MAX_SIZE (0x3f) 140#define DMA_TLB_MAX_SIZE (0x3f)
143 141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
144/* PMEN_REG */ 153/* PMEN_REG */
145#define DMA_PMEN_EPM (((u32)1)<<31) 154#define DMA_PMEN_EPM (((u32)1)<<31)
146#define DMA_PMEN_PRS (((u32)1)<<0) 155#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
151#define DMA_GCMD_SFL (((u32)1) << 29) 160#define DMA_GCMD_SFL (((u32)1) << 29)
152#define DMA_GCMD_EAFL (((u32)1) << 28) 161#define DMA_GCMD_EAFL (((u32)1) << 28)
153#define DMA_GCMD_WBF (((u32)1) << 27) 162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
154 166
155/* GSTS_REG */ 167/* GSTS_REG */
156#define DMA_GSTS_TES (((u32)1) << 31) 168#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
158#define DMA_GSTS_FLS (((u32)1) << 29) 170#define DMA_GSTS_FLS (((u32)1) << 29)
159#define DMA_GSTS_AFLS (((u32)1) << 28) 171#define DMA_GSTS_AFLS (((u32)1) << 28)
160#define DMA_GSTS_WBFS (((u32)1) << 27) 172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
161 176
162/* CCMD_REG */ 177/* CCMD_REG */
163#define DMA_CCMD_ICC (((u64)1) << 63) 178#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
187#define dma_frcd_source_id(c) (c & 0xffff) 202#define dma_frcd_source_id(c) (c & 0xffff)
188#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ 203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
189 204
190/* 205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
191 * 0: Present 206
192 * 1-11: Reserved 207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
193 * 12-63: Context Ptr (12 - (haw-1)) 208{\
194 * 64-127: Reserved 209 cycles_t start_time = get_cycles();\
195 */ 210 while (1) {\
196struct root_entry { 211 sts = op (iommu->reg + offset);\
197 u64 val; 212 if (cond)\
198 u64 rsvd1; 213 break;\
199}; 214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
200#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) 215 panic("DMAR hardware is malfunctioning\n");\
201static inline bool root_present(struct root_entry *root) 216 cpu_relax();\
202{ 217 }\
203 return (root->val & 1);
204}
205static inline void set_root_present(struct root_entry *root)
206{
207 root->val |= 1;
208}
209static inline void set_root_value(struct root_entry *root, unsigned long value)
210{
211 root->val |= value & PAGE_MASK_4K;
212} 218}
213 219
214struct context_entry; 220#define QI_LENGTH 256 /* queue length */
215static inline struct context_entry *
216get_context_addr_from_root(struct root_entry *root)
217{
218 return (struct context_entry *)
219 (root_present(root)?phys_to_virt(
220 root->val & PAGE_MASK_4K):
221 NULL);
222}
223
224/*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235struct context_entry {
236 u64 lo;
237 u64 hi;
238};
239#define context_present(c) ((c).lo & 1)
240#define context_fault_disable(c) (((c).lo >> 1) & 1)
241#define context_translation_type(c) (((c).lo >> 2) & 3)
242#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
243#define context_address_width(c) ((c).hi & 7)
244#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
245
246#define context_set_present(c) do {(c).lo |= 1;} while (0)
247#define context_set_fault_enable(c) \
248 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
249#define context_set_translation_type(c, val) \
250 do { \
251 (c).lo &= (((u64)-1) << 4) | 3; \
252 (c).lo |= ((val) & 3) << 2; \
253 } while (0)
254#define CONTEXT_TT_MULTI_LEVEL 0
255#define context_set_address_root(c, val) \
256 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
257#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
258#define context_set_domain_id(c, val) \
259 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
260#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
261 221
262/* 222enum {
263 * 0: readable 223 QI_FREE,
264 * 1: writable 224 QI_IN_USE,
265 * 2-6: reserved 225 QI_DONE
266 * 7: super page
267 * 8-11: available
268 * 12-63: Host physcial address
269 */
270struct dma_pte {
271 u64 val;
272}; 226};
273#define dma_clear_pte(p) do {(p).val = 0;} while (0)
274
275#define DMA_PTE_READ (1)
276#define DMA_PTE_WRITE (2)
277 227
278#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 228#define QI_CC_TYPE 0x1
279#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 229#define QI_IOTLB_TYPE 0x2
280#define dma_set_pte_prot(p, prot) \ 230#define QI_DIOTLB_TYPE 0x3
281 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 231#define QI_IEC_TYPE 0x4
282#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) 232#define QI_IWD_TYPE 0x5
283#define dma_set_pte_addr(p, addr) do {\
284 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
285#define dma_pte_present(p) (((p).val & 3) != 0)
286 233
287struct intel_iommu; 234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
288 237
289struct dmar_domain { 238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
290 int id; /* domain id */ 239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
291 struct intel_iommu *iommu; /* back pointer to owning iommu */
292 240
293 struct list_head devices; /* all devices' list */ 241struct qi_desc {
294 struct iova_domain iovad; /* iova's that belong to this domain */ 242 u64 low, high;
243};
295 244
296 struct dma_pte *pgd; /* virtual address */ 245struct q_inval {
297 spinlock_t mapping_lock; /* page table lock */ 246 spinlock_t q_lock;
298 int gaw; /* max guest address width */ 247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
299 253
300 /* adjusted guest address width, 0 is level 2 30-bit */ 254#ifdef CONFIG_INTR_REMAP
301 int agaw; 255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
302 258
303#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 259#define INTR_REMAP_TABLE_ENTRIES 65536
304 int flags;
305};
306 260
307/* PCI domain-device relationship */ 261struct ir_table {
308struct device_domain_info { 262 struct irte *base;
309 struct list_head link; /* link to domain siblings */
310 struct list_head global; /* link to global list */
311 u8 bus; /* PCI bus numer */
312 u8 devfn; /* PCI devfn number */
313 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
314 struct dmar_domain *domain; /* pointer to domain */
315}; 263};
316 264#endif
317extern int init_dmars(void);
318 265
319struct intel_iommu { 266struct intel_iommu {
320 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
321 u64 cap; 268 u64 cap;
322 u64 ecap; 269 u64 ecap;
323 unsigned long *domain_ids; /* bitmap of domains */
324 struct dmar_domain **domains; /* ptr to domains */
325 int seg; 270 int seg;
326 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
327 spinlock_t lock; /* protect context, domain ids */
328 spinlock_t register_lock; /* protect register handling */ 272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
329 struct root_entry *root_entry; /* virtual address */ 279 struct root_entry *root_entry; /* virtual address */
330 280
331 unsigned int irq; 281 unsigned int irq;
332 unsigned char name[7]; /* Device Name */ 282 unsigned char name[7]; /* Device Name */
333 struct msi_msg saved_msg; 283 struct msi_msg saved_msg;
334 struct sys_device sysdev; 284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
335}; 290};
336 291
337#ifndef CONFIG_DMAR_GFX_WA 292static inline void __iommu_flush_cache(
338static inline void iommu_prepare_gfx_mapping(void) 293 struct intel_iommu *iommu, void *addr, int size)
339{ 294{
340 return; 295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
341} 297}
342#endif /* !CONFIG_DMAR_GFX_WA */
343 298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
344#endif 307#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 00000000000..bb642cc5e18
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
1#include <linux/dmar.h>
2#include <linux/spinlock.h>
3#include <linux/jiffies.h>
4#include <linux/pci.h>
5#include <linux/irq.h>
6#include <asm/io_apic.h>
7#include "intel-iommu.h"
8#include "intr_remapping.h"
9
10static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num;
12int intr_remapping_enabled;
13
14static struct {
15 struct intel_iommu *iommu;
16 u16 irte_index;
17 u16 sub_handle;
18 u8 irte_mask;
19} irq_2_iommu[NR_IRQS];
20
21static DEFINE_SPINLOCK(irq_2_ir_lock);
22
23int irq_remapped(int irq)
24{
25 if (irq > NR_IRQS)
26 return 0;
27
28 if (!irq_2_iommu[irq].iommu)
29 return 0;
30
31 return 1;
32}
33
34int get_irte(int irq, struct irte *entry)
35{
36 int index;
37
38 if (!entry || irq > NR_IRQS)
39 return -1;
40
41 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) {
43 spin_unlock(&irq_2_ir_lock);
44 return -1;
45 }
46
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
49
50 spin_unlock(&irq_2_ir_lock);
51 return 0;
52}
53
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{
56 struct ir_table *table = iommu->ir_table;
57 u16 index, start_index;
58 unsigned int mask = 0;
59 int i;
60
61 if (!count)
62 return -1;
63
64 /*
65 * start the IRTE search from index 0.
66 */
67 index = start_index = 0;
68
69 if (count > 1) {
70 count = __roundup_pow_of_two(count);
71 mask = ilog2(count);
72 }
73
74 if (mask > ecap_max_handle_mask(iommu->ecap)) {
75 printk(KERN_ERR
76 "Requested mask %x exceeds the max invalidation handle"
77 " mask value %Lx\n", mask,
78 ecap_max_handle_mask(iommu->ecap));
79 return -1;
80 }
81
82 spin_lock(&irq_2_ir_lock);
83 do {
84 for (i = index; i < index + count; i++)
85 if (table->base[i].present)
86 break;
87 /* empty index found */
88 if (i == index + count)
89 break;
90
91 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
92
93 if (index == start_index) {
94 spin_unlock(&irq_2_ir_lock);
95 printk(KERN_ERR "can't allocate an IRTE\n");
96 return -1;
97 }
98 } while (1);
99
100 for (i = index; i < index + count; i++)
101 table->base[i].present = 1;
102
103 irq_2_iommu[irq].iommu = iommu;
104 irq_2_iommu[irq].irte_index = index;
105 irq_2_iommu[irq].sub_handle = 0;
106 irq_2_iommu[irq].irte_mask = mask;
107
108 spin_unlock(&irq_2_ir_lock);
109
110 return index;
111}
112
113static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
114{
115 struct qi_desc desc;
116
117 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
118 | QI_IEC_SELECTIVE;
119 desc.high = 0;
120
121 qi_submit_sync(&desc, iommu);
122}
123
124int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{
126 int index;
127
128 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
130 spin_unlock(&irq_2_ir_lock);
131 return -1;
132 }
133
134 *sub_handle = irq_2_iommu[irq].sub_handle;
135 index = irq_2_iommu[irq].irte_index;
136 spin_unlock(&irq_2_ir_lock);
137 return index;
138}
139
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{
142 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147
148 irq_2_iommu[irq].iommu = iommu;
149 irq_2_iommu[irq].irte_index = index;
150 irq_2_iommu[irq].sub_handle = subhandle;
151 irq_2_iommu[irq].irte_mask = 0;
152
153 spin_unlock(&irq_2_ir_lock);
154
155 return 0;
156}
157
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{
160 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
162 spin_unlock(&irq_2_ir_lock);
163 return -1;
164 }
165
166 irq_2_iommu[irq].iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0;
170
171 spin_unlock(&irq_2_ir_lock);
172
173 return 0;
174}
175
176int modify_irte(int irq, struct irte *irte_modified)
177{
178 int index;
179 struct irte *irte;
180 struct intel_iommu *iommu;
181
182 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
184 spin_unlock(&irq_2_ir_lock);
185 return -1;
186 }
187
188 iommu = irq_2_iommu[irq].iommu;
189
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
191 irte = &iommu->ir_table->base[index];
192
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
194 __iommu_flush_cache(iommu, irte, sizeof(*irte));
195
196 qi_flush_iec(iommu, index, 0);
197
198 spin_unlock(&irq_2_ir_lock);
199 return 0;
200}
201
202int flush_irte(int irq)
203{
204 int index;
205 struct intel_iommu *iommu;
206
207 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
209 spin_unlock(&irq_2_ir_lock);
210 return -1;
211 }
212
213 iommu = irq_2_iommu[irq].iommu;
214
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
216
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
218 spin_unlock(&irq_2_ir_lock);
219
220 return 0;
221}
222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
233struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
234{
235 struct dmar_drhd_unit *drhd;
236
237 drhd = dmar_find_matched_drhd_unit(dev);
238 if (!drhd)
239 return NULL;
240
241 return drhd->iommu;
242}
243
244int free_irte(int irq)
245{
246 int index, i;
247 struct irte *irte;
248 struct intel_iommu *iommu;
249
250 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
252 spin_unlock(&irq_2_ir_lock);
253 return -1;
254 }
255
256 iommu = irq_2_iommu[irq].iommu;
257
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
259 irte = &iommu->ir_table->base[index];
260
261 if (!irq_2_iommu[irq].sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
265 }
266
267 irq_2_iommu[irq].iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0;
271
272 spin_unlock(&irq_2_ir_lock);
273
274 return 0;
275}
276
277static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
278{
279 u64 addr;
280 u32 cmd, sts;
281 unsigned long flags;
282
283 addr = virt_to_phys((void *)iommu->ir_table->base);
284
285 spin_lock_irqsave(&iommu->register_lock, flags);
286
287 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
288 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
289
290 /* Set interrupt-remapping table pointer */
291 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
292 writel(cmd, iommu->reg + DMAR_GCMD_REG);
293
294 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
295 readl, (sts & DMA_GSTS_IRTPS), sts);
296 spin_unlock_irqrestore(&iommu->register_lock, flags);
297
298 /*
299 * global invalidation of interrupt entry cache before enabling
300 * interrupt-remapping.
301 */
302 qi_global_iec(iommu);
303
304 spin_lock_irqsave(&iommu->register_lock, flags);
305
306 /* Enable interrupt-remapping */
307 cmd = iommu->gcmd | DMA_GCMD_IRE;
308 iommu->gcmd |= DMA_GCMD_IRE;
309 writel(cmd, iommu->reg + DMAR_GCMD_REG);
310
311 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
312 readl, (sts & DMA_GSTS_IRES), sts);
313
314 spin_unlock_irqrestore(&iommu->register_lock, flags);
315}
316
317
318static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
319{
320 struct ir_table *ir_table;
321 struct page *pages;
322
323 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
324 GFP_KERNEL);
325
326 if (!iommu->ir_table)
327 return -ENOMEM;
328
329 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
330
331 if (!pages) {
332 printk(KERN_ERR "failed to allocate pages of order %d\n",
333 INTR_REMAP_PAGE_ORDER);
334 kfree(iommu->ir_table);
335 return -ENOMEM;
336 }
337
338 ir_table->base = page_address(pages);
339
340 iommu_set_intr_remapping(iommu, mode);
341 return 0;
342}
343
344int __init enable_intr_remapping(int eim)
345{
346 struct dmar_drhd_unit *drhd;
347 int setup = 0;
348
349 /*
350 * check for the Interrupt-remapping support
351 */
352 for_each_drhd_unit(drhd) {
353 struct intel_iommu *iommu = drhd->iommu;
354
355 if (!ecap_ir_support(iommu->ecap))
356 continue;
357
358 if (eim && !ecap_eim_support(iommu->ecap)) {
359 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
360 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
361 return -1;
362 }
363 }
364
365 /*
366 * Enable queued invalidation for all the DRHD's.
367 */
368 for_each_drhd_unit(drhd) {
369 int ret;
370 struct intel_iommu *iommu = drhd->iommu;
371 ret = dmar_enable_qi(iommu);
372
373 if (ret) {
374 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
375 " invalidation, ecap %Lx, ret %d\n",
376 drhd->reg_base_addr, iommu->ecap, ret);
377 return -1;
378 }
379 }
380
381 /*
382 * Setup Interrupt-remapping for all the DRHD's now.
383 */
384 for_each_drhd_unit(drhd) {
385 struct intel_iommu *iommu = drhd->iommu;
386
387 if (!ecap_ir_support(iommu->ecap))
388 continue;
389
390 if (setup_intr_remapping(iommu, eim))
391 goto error;
392
393 setup = 1;
394 }
395
396 if (!setup)
397 goto error;
398
399 intr_remapping_enabled = 1;
400
401 return 0;
402
403error:
404 /*
405 * handle error condition gracefully here!
406 */
407 return -1;
408}
409
410static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
411 struct intel_iommu *iommu)
412{
413 struct acpi_dmar_hardware_unit *drhd;
414 struct acpi_dmar_device_scope *scope;
415 void *start, *end;
416
417 drhd = (struct acpi_dmar_hardware_unit *)header;
418
419 start = (void *)(drhd + 1);
420 end = ((void *)drhd) + header->length;
421
422 while (start < end) {
423 scope = start;
424 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
425 if (ir_ioapic_num == MAX_IO_APICS) {
426 printk(KERN_WARNING "Exceeded Max IO APICS\n");
427 return -1;
428 }
429
430 printk(KERN_INFO "IOAPIC id %d under DRHD base"
431 " 0x%Lx\n", scope->enumeration_id,
432 drhd->address);
433
434 ir_ioapic[ir_ioapic_num].iommu = iommu;
435 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
436 ir_ioapic_num++;
437 }
438 start += scope->length;
439 }
440
441 return 0;
442}
443
444/*
445 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
446 * hardware unit.
447 */
448int __init parse_ioapics_under_ir(void)
449{
450 struct dmar_drhd_unit *drhd;
451 int ir_supported = 0;
452
453 for_each_drhd_unit(drhd) {
454 struct intel_iommu *iommu = drhd->iommu;
455
456 if (ecap_ir_support(iommu->ecap)) {
457 if (ir_parse_ioapic_scope(drhd->hdr, iommu))
458 return -1;
459
460 ir_supported = 1;
461 }
462 }
463
464 if (ir_supported && ir_ioapic_num != nr_ioapics) {
465 printk(KERN_WARNING
466 "Not all IO-APIC's listed under remapping hardware\n");
467 return -1;
468 }
469
470 return ir_supported;
471}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 00000000000..05f2635bbe4
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
1#include "intel-iommu.h"
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6};
7
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 65590c9aecd..d76a0839abe 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -9,6 +9,8 @@
9#include <asm/apicdef.h> 9#include <asm/apicdef.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/cpufeature.h>
13#include <asm/msr.h>
12 14
13#define ARCH_APICTIMER_STOPS_ON_C3 1 15#define ARCH_APICTIMER_STOPS_ON_C3 1
14 16
@@ -47,8 +49,6 @@ extern int disable_apic;
47#ifdef CONFIG_PARAVIRT 49#ifdef CONFIG_PARAVIRT
48#include <asm/paravirt.h> 50#include <asm/paravirt.h>
49#else 51#else
50#define apic_write native_apic_write
51#define apic_read native_apic_read
52#define setup_boot_clock setup_boot_APIC_clock 52#define setup_boot_clock setup_boot_APIC_clock
53#define setup_secondary_clock setup_secondary_APIC_clock 53#define setup_secondary_clock setup_secondary_APIC_clock
54#endif 54#endif
@@ -60,7 +60,7 @@ extern u64 xapic_icr_read(void);
60extern void xapic_icr_write(u32, u32); 60extern void xapic_icr_write(u32, u32);
61extern int setup_profiling_timer(unsigned int); 61extern int setup_profiling_timer(unsigned int);
62 62
63static inline void native_apic_write(unsigned long reg, u32 v) 63static inline void native_apic_mem_write(u32 reg, u32 v)
64{ 64{
65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
66 66
@@ -69,15 +69,68 @@ static inline void native_apic_write(unsigned long reg, u32 v)
69 ASM_OUTPUT2("0" (v), "m" (*addr))); 69 ASM_OUTPUT2("0" (v), "m" (*addr)));
70} 70}
71 71
72static inline u32 native_apic_read(unsigned long reg) 72static inline u32 native_apic_mem_read(u32 reg)
73{ 73{
74 return *((volatile u32 *)(APIC_BASE + reg)); 74 return *((volatile u32 *)(APIC_BASE + reg));
75} 75}
76 76
77extern void apic_wait_icr_idle(void); 77static inline void native_apic_msr_write(u32 reg, u32 v)
78extern u32 safe_apic_wait_icr_idle(void); 78{
79 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
80 reg == APIC_LVR)
81 return;
82
83 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
84}
85
86static inline u32 native_apic_msr_read(u32 reg)
87{
88 u32 low, high;
89
90 if (reg == APIC_DFR)
91 return -1;
92
93 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
94 return low;
95}
96
97#ifndef CONFIG_X86_32
98extern int x2apic, x2apic_preenabled;
99extern void check_x2apic(void);
100extern void enable_x2apic(void);
101extern void enable_IR_x2apic(void);
102extern void x2apic_icr_write(u32 low, u32 id);
103#endif
104
105struct apic_ops {
106 u32 (*read)(u32 reg);
107 void (*write)(u32 reg, u32 v);
108 u64 (*icr_read)(void);
109 void (*icr_write)(u32 low, u32 high);
110 void (*wait_icr_idle)(void);
111 u32 (*safe_wait_icr_idle)(void);
112};
113
114extern struct apic_ops *apic_ops;
115
116#define apic_read (apic_ops->read)
117#define apic_write (apic_ops->write)
118#define apic_icr_read (apic_ops->icr_read)
119#define apic_icr_write (apic_ops->icr_write)
120#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
121#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
122
79extern int get_physical_broadcast(void); 123extern int get_physical_broadcast(void);
80 124
125#ifdef CONFIG_X86_64
126static inline void ack_x2APIC_irq(void)
127{
128 /* Docs say use 0 for future compatibility */
129 native_apic_msr_write(APIC_EOI, 0);
130}
131#endif
132
133
81static inline void ack_APIC_irq(void) 134static inline void ack_APIC_irq(void)
82{ 135{
83 /* 136 /*
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index c40687da20f..b922c85ac91 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -105,6 +105,7 @@
105#define APIC_TMICT 0x380 105#define APIC_TMICT 0x380
106#define APIC_TMCCT 0x390 106#define APIC_TMCCT 0x390
107#define APIC_TDCR 0x3E0 107#define APIC_TDCR 0x3E0
108#define APIC_SELF_IPI 0x3F0
108#define APIC_TDR_DIV_TMBASE (1 << 2) 109#define APIC_TDR_DIV_TMBASE (1 << 2)
109#define APIC_TDR_DIV_1 0xB 110#define APIC_TDR_DIV_1 0xB
110#define APIC_TDR_DIV_2 0x0 111#define APIC_TDR_DIV_2 0x0
@@ -128,6 +129,8 @@
128#define APIC_EILVT3 0x530 129#define APIC_EILVT3 0x530
129 130
130#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 131#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
132#define APIC_BASE_MSR 0x800
133#define X2APIC_ENABLE (1UL << 10)
131 134
132#ifdef CONFIG_X86_32 135#ifdef CONFIG_X86_32
133# define MAX_IO_APICS 64 136# define MAX_IO_APICS 64
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 72adc3a109c..de4596b24c2 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -12,8 +12,6 @@
12/* these aren't arch hooks, they are generic routines 12/* these aren't arch hooks, they are generic routines
13 * that can be used by the hooks */ 13 * that can be used by the hooks */
14extern void init_ISA_irqs(void); 14extern void init_ISA_irqs(void);
15extern void apic_intr_init(void);
16extern void smp_intr_init(void);
17extern irqreturn_t timer_interrupt(int irq, void *dev_id); 15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
18 16
19/* these are the defined hooks */ 17/* these are the defined hooks */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/bigsmp/apic.h
index 05362d44a3e..0a9cd7c5ca0 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H 1#ifndef __ASM_MACH_APIC_H
2#define ASM_X86__MACH_BIGSMP__MACH_APIC_H 2#define __ASM_MACH_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) 4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
5#define esr_disable (1) 5#define esr_disable (1)
@@ -11,7 +11,7 @@ static inline int apic_id_registered(void)
11 11
12/* Round robin the irqs amoung the online cpus */ 12/* Round robin the irqs amoung the online cpus */
13static inline cpumask_t target_cpus(void) 13static inline cpumask_t target_cpus(void)
14{ 14{
15 static unsigned long cpu = NR_CPUS; 15 static unsigned long cpu = NR_CPUS;
16 do { 16 do {
17 if (cpu >= NR_CPUS) 17 if (cpu >= NR_CPUS)
@@ -23,7 +23,7 @@ static inline cpumask_t target_cpus(void)
23} 23}
24 24
25#undef APIC_DEST_LOGICAL 25#undef APIC_DEST_LOGICAL
26#define APIC_DEST_LOGICAL 0 26#define APIC_DEST_LOGICAL 0
27#define TARGET_CPUS (target_cpus()) 27#define TARGET_CPUS (target_cpus())
28#define APIC_DFR_VALUE (APIC_DFR_FLAT) 28#define APIC_DFR_VALUE (APIC_DFR_FLAT)
29#define INT_DELIVERY_MODE (dest_Fixed) 29#define INT_DELIVERY_MODE (dest_Fixed)
@@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
141 return cpuid_apic >> index_msb; 141 return cpuid_apic >> index_msb;
142} 142}
143 143
144#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ 144#endif /* __ASM_MACH_APIC_H */
diff --git a/include/asm-x86/bigsmp/apicdef.h b/include/asm-x86/bigsmp/apicdef.h
new file mode 100644
index 00000000000..392c3f5ef2f
--- /dev/null
+++ b/include/asm-x86/bigsmp/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_MACH_APICDEF_H
2#define __ASM_MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/bigsmp/ipi.h
index b1b0f966a00..9404c535b7e 100644
--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
+++ b/include/asm-x86/bigsmp/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define ASM_X86__MACH_BIGSMP__MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ 25#endif /* __ASM_MACH_IPI_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 250fa0cb144..065c6a86ed8 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -93,6 +93,7 @@
93#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 93#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
94#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 94#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
95#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 95#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
96#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
96#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ 97#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */
97 98
98/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 99/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
@@ -192,6 +193,7 @@ extern const char * const x86_power_flags[32];
192#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) 193#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
193#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) 194#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
194#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) 195#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
196#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
195#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 197#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
196 198
197#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 199#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/es7000/apic.h
index c1f6f682d61..bd2c44d1f7a 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H 1#ifndef __ASM_ES7000_APIC_H
2#define ASM_X86__MACH_ES7000__MACH_APIC_H 2#define __ASM_ES7000_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) 4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
5#define esr_disable (1) 5#define esr_disable (1)
@@ -10,7 +10,7 @@ static inline int apic_id_registered(void)
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC 14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 15 return CPU_MASK_ALL;
16#else 16#else
@@ -23,24 +23,24 @@ static inline cpumask_t target_cpus(void)
23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
24#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DELIVERY_MODE (dest_LowestPrio)
25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
26#define NO_BALANCE_IRQ (1) 26#define NO_BALANCE_IRQ (1)
27#undef WAKE_SECONDARY_VIA_INIT 27#undef WAKE_SECONDARY_VIA_INIT
28#define WAKE_SECONDARY_VIA_MIP 28#define WAKE_SECONDARY_VIA_MIP
29#else 29#else
30#define APIC_DFR_VALUE (APIC_DFR_FLAT) 30#define APIC_DFR_VALUE (APIC_DFR_FLAT)
31#define INT_DELIVERY_MODE (dest_Fixed) 31#define INT_DELIVERY_MODE (dest_Fixed)
32#define INT_DEST_MODE (0) /* phys delivery to target procs */ 32#define INT_DEST_MODE (0) /* phys delivery to target procs */
33#define NO_BALANCE_IRQ (0) 33#define NO_BALANCE_IRQ (0)
34#undef APIC_DEST_LOGICAL 34#undef APIC_DEST_LOGICAL
35#define APIC_DEST_LOGICAL 0x0 35#define APIC_DEST_LOGICAL 0x0
36#define WAKE_SECONDARY_VIA_INIT 36#define WAKE_SECONDARY_VIA_INIT
37#endif 37#endif
38 38
39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
40{ 40{
41 return 0; 41 return 0;
42} 42}
43static inline unsigned long check_apicid_present(int bit) 43static inline unsigned long check_apicid_present(int bit)
44{ 44{
45 return physid_isset(bit, phys_cpu_present_map); 45 return physid_isset(bit, phys_cpu_present_map);
46} 46}
@@ -80,7 +80,7 @@ static inline void setup_apic_routing(void)
80{ 80{
81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
83 (apic_version[apic] == 0x14) ? 83 (apic_version[apic] == 0x14) ?
84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); 84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
85} 85}
86 86
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
141extern unsigned int boot_cpu_physical_apicid; 141extern unsigned int boot_cpu_physical_apicid;
142static inline int check_phys_apicid_present(int cpu_physical_apicid) 142static inline int check_phys_apicid_present(int cpu_physical_apicid)
143{ 143{
144 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 144 boot_cpu_physical_apicid = read_apic_id();
145 return (1); 145 return (1);
146} 146}
147 147
@@ -150,7 +150,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
150 int num_bits_set; 150 int num_bits_set;
151 int cpus_found = 0; 151 int cpus_found = 0;
152 int cpu; 152 int cpu;
153 int apicid; 153 int apicid;
154 154
155 num_bits_set = cpus_weight(cpumask); 155 num_bits_set = cpus_weight(cpumask);
156 /* Return id to all */ 156 /* Return id to all */
@@ -160,16 +160,16 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
160#else 160#else
161 return cpu_to_logical_apicid(0); 161 return cpu_to_logical_apicid(0);
162#endif 162#endif
163 /* 163 /*
164 * The cpus in the mask must all be on the apic cluster. If are not 164 * The cpus in the mask must all be on the apic cluster. If are not
165 * on the same apicid cluster return default value of TARGET_CPUS. 165 * on the same apicid cluster return default value of TARGET_CPUS.
166 */ 166 */
167 cpu = first_cpu(cpumask); 167 cpu = first_cpu(cpumask);
168 apicid = cpu_to_logical_apicid(cpu); 168 apicid = cpu_to_logical_apicid(cpu);
169 while (cpus_found < num_bits_set) { 169 while (cpus_found < num_bits_set) {
170 if (cpu_isset(cpu, cpumask)) { 170 if (cpu_isset(cpu, cpumask)) {
171 int new_apicid = cpu_to_logical_apicid(cpu); 171 int new_apicid = cpu_to_logical_apicid(cpu);
172 if (apicid_cluster(apicid) != 172 if (apicid_cluster(apicid) !=
173 apicid_cluster(new_apicid)){ 173 apicid_cluster(new_apicid)){
174 printk ("%s: Not a valid mask!\n",__FUNCTION__); 174 printk ("%s: Not a valid mask!\n",__FUNCTION__);
175#if defined CONFIG_ES7000_CLUSTERED_APIC 175#if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
191 return cpuid_apic >> index_msb; 191 return cpuid_apic >> index_msb;
192} 192}
193 193
194#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ 194#endif /* __ASM_ES7000_APIC_H */
diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h
new file mode 100644
index 00000000000..8b234a3cb85
--- /dev/null
+++ b/include/asm-x86/es7000/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_ES7000_APICDEF_H
2#define __ASM_ES7000_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/es7000/ipi.h
index 3a21240e03d..632a955fcc0 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/es7000/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define ASM_X86__MACH_ES7000__MACH_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector)
21 send_IPI_mask(cpu_online_map, vector); 21 send_IPI_mask(cpu_online_map, vector);
22} 22}
23 23
24#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ 24#endif /* __ASM_ES7000_IPI_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/es7000/mpparse.h
index befde24705b..7b5c889d8e7 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/es7000/mpparse.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H 1#ifndef __ASM_ES7000_MPPARSE_H
2#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H 2#define __ASM_ES7000_MPPARSE_H
3 3
4#include <linux/acpi.h> 4#include <linux/acpi.h>
5 5
@@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void)
26} 26}
27#endif 27#endif
28 28
29#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ 29#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/es7000/wakecpu.h
index 97c776ce13f..3ffc5a7bf66 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/es7000/wakecpu.h
@@ -1,7 +1,7 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4/*
5 * This file copes with machines that wakeup secondary CPUs by the 5 * This file copes with machines that wakeup secondary CPUs by the
6 * INIT, INIT, STARTUP sequence. 6 * INIT, INIT, STARTUP sequence.
7 */ 7 */
@@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
56 #define inquire_remote_apic(apicid) {} 56 #define inquire_remote_apic(apicid) {}
57#endif 57#endif
58 58
59#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ 59#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 25097a8cc5e..ed6a4886c08 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -14,6 +14,7 @@
14 14
15struct genapic { 15struct genapic {
16 char *name; 16 char *name;
17 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
17 u32 int_delivery_mode; 18 u32 int_delivery_mode;
18 u32 int_dest_mode; 19 u32 int_dest_mode;
19 int (*apic_id_registered)(void); 20 int (*apic_id_registered)(void);
@@ -24,17 +25,24 @@ struct genapic {
24 void (*send_IPI_mask)(cpumask_t mask, int vector); 25 void (*send_IPI_mask)(cpumask_t mask, int vector);
25 void (*send_IPI_allbutself)(int vector); 26 void (*send_IPI_allbutself)(int vector);
26 void (*send_IPI_all)(int vector); 27 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector);
27 /* */ 29 /* */
28 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
29 unsigned int (*phys_pkg_id)(int index_msb); 31 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask;
30}; 35};
31 36
32extern struct genapic *genapic; 37extern struct genapic *genapic;
33 38
34extern struct genapic apic_flat; 39extern struct genapic apic_flat;
35extern struct genapic apic_physflat; 40extern struct genapic apic_physflat;
41extern struct genapic apic_x2apic_cluster;
42extern struct genapic apic_x2apic_phys;
36extern int acpi_madt_oem_check(char *, char *); 43extern int acpi_madt_oem_check(char *, char *);
37 44
45extern void apic_send_IPI_self(int vector);
38enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 46enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
39extern enum uv_system_type get_uv_system_type(void); 47extern enum uv_system_type get_uv_system_type(void);
40extern int is_uv_system(void); 48extern int is_uv_system(void);
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 65997b15d56..50f6e0316b5 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs;
64extern void init_VISWS_APIC_irqs(void); 64extern void init_VISWS_APIC_irqs(void);
65extern void setup_IO_APIC(void); 65extern void setup_IO_APIC(void);
66extern void disable_IO_APIC(void); 66extern void disable_IO_APIC(void);
67extern void print_IO_APIC(void);
68extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); 67extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
69extern void setup_ioapic_dest(void); 68extern void setup_ioapic_dest(void);
70 69
@@ -73,7 +72,9 @@ extern void enable_IO_APIC(void);
73#endif 72#endif
74 73
75/* IPI functions */ 74/* IPI functions */
75#ifdef CONFIG_X86_32
76extern void send_IPI_self(int vector); 76extern void send_IPI_self(int vector);
77#endif
77extern void send_IPI(int dest, int vector); 78extern void send_IPI(int dest, int vector);
78 79
79/* Statistics */ 80/* Statistics */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index c586559a695..23c1b3baaec 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 57
58extern struct irq_chip i8259A_chip; 58extern struct irq_chip i8259A_chip;
59 59
60extern void mask_8259A(void);
61extern void unmask_8259A(void);
62
60#endif /* ASM_X86__I8259_H */ 63#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index be62847ab07..8ec68a50cf1 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
183extern int (*ioapic_renumber_irq)(int ioapic, int irq); 197extern int (*ioapic_renumber_irq)(int ioapic, int irq);
184extern void ioapic_init_mappings(void); 198extern void ioapic_init_mappings(void);
185 199
200#ifdef CONFIG_X86_64
201extern int save_mask_IO_APIC_setup(void);
202extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int);
204#endif
205
186#else /* !CONFIG_X86_IO_APIC */ 206#else /* !CONFIG_X86_IO_APIC */
187#define io_apic_assign_pci_irqs 0 207#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0; 208static const int timer_through_8259 = 0;
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index c1b22679751..30a692cfaff 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
49 return SET_APIC_DEST_FIELD(mask); 49 return SET_APIC_DEST_FIELD(mask);
50} 50}
51 51
52static inline void __xapic_wait_icr_idle(void)
53{
54 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
55 cpu_relax();
56}
57
52static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, 58static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
53 unsigned int dest) 59 unsigned int dest)
54{ 60{
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
64 /* 70 /*
65 * Wait for idle. 71 * Wait for idle.
66 */ 72 */
67 apic_wait_icr_idle(); 73 __xapic_wait_icr_idle();
68 74
69 /* 75 /*
70 * No need to touch the target chip field 76 * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
74 /* 80 /*
75 * Send the IPI. The write to APIC_ICR fires this off. 81 * Send the IPI. The write to APIC_ICR fires this off.
76 */ 82 */
77 apic_write(APIC_ICR, cfg); 83 native_apic_mem_write(APIC_ICR, cfg);
78} 84}
79 85
80/* 86/*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
92 if (unlikely(vector == NMI_VECTOR)) 98 if (unlikely(vector == NMI_VECTOR))
93 safe_apic_wait_icr_idle(); 99 safe_apic_wait_icr_idle();
94 else 100 else
95 apic_wait_icr_idle(); 101 __xapic_wait_icr_idle();
96 102
97 /* 103 /*
98 * prepare target chip field 104 * prepare target chip field
99 */ 105 */
100 cfg = __prepare_ICR2(mask); 106 cfg = __prepare_ICR2(mask);
101 apic_write(APIC_ICR2, cfg); 107 native_apic_mem_write(APIC_ICR2, cfg);
102 108
103 /* 109 /*
104 * program the ICR 110 * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
108 /* 114 /*
109 * Send the IPI. The write to APIC_ICR fires this off. 115 * Send the IPI. The write to APIC_ICR fires this off.
110 */ 116 */
111 apic_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
112} 118}
113 119
114static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 00000000000..78242c6ffa5
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h
deleted file mode 100644
index 811935d9d49..00000000000
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
2#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index b615f40736b..2a330a41b3d 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define phys_pkg_id (genapic->phys_pkg_id) 31#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self)
33extern void setup_apic_routing(void); 35extern void setup_apic_routing(void);
34#else 36#else
35#define INT_DELIVERY_MODE dest_LowestPrio 37#define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
54 56
55static inline int apic_id_registered(void) 57static inline int apic_id_registered(void)
56{ 58{
57 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 59 return physid_isset(read_apic_id(), phys_cpu_present_map);
58} 60}
59 61
60static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index 936704f816d..0c2d41c41b2 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
4#include <asm/apic.h> 4#include <asm/apic.h>
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7#define APIC_ID_MASK (0xFFu<<24) 7#define APIC_ID_MASK (genapic->apic_id_mask)
8#define GET_APIC_ID(x) (((x)>>24)&0xFFu) 8#define GET_APIC_ID(x) (genapic->get_apic_id(x))
9#define SET_APIC_ID(x) (((x)<<24)) 9#define SET_APIC_ID(x) (genapic->set_apic_id(x))
10#else 10#else
11#define APIC_ID_MASK (0xF<<24) 11#define APIC_ID_MASK (0xF<<24)
12static inline unsigned get_apic_id(unsigned long x) 12static inline unsigned get_apic_id(unsigned long x)
diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
deleted file mode 100644
index a07e5674402..00000000000
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H
2#define ASM_X86__MACH_ES7000__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
deleted file mode 100644
index 74ade184920..00000000000
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
2#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
deleted file mode 100644
index d4bc8590c4f..00000000000
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
2#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 118da365e37..be2241a818f 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -5,11 +5,12 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS];
9
8#ifdef CONFIG_X86_32 10#ifdef CONFIG_X86_32
9#include <mach_mpspec.h> 11#include <mach_mpspec.h>
10 12
11extern unsigned int def_to_bigsmp; 13extern unsigned int def_to_bigsmp;
12extern int apic_version[MAX_APICS];
13extern u8 apicid_2_node[]; 14extern u8 apicid_2_node[];
14extern int pic_mode; 15extern int pic_mode;
15 16
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 3139666a94f..ed919024687 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -48,4 +48,8 @@
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50 50
51#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3)
53#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
54#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
51#endif /* ASM_X86__MSIDEF_H */ 55#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/numaq/apic.h
index 7a0d39edfcf..a8344ba6ea1 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H 1#ifndef __ASM_NUMAQ_APIC_H
2#define ASM_X86__MACH_NUMAQ__MACH_APIC_H 2#define __ASM_NUMAQ_APIC_H
3 3
4#include <asm/io.h> 4#include <asm/io.h>
5#include <linux/mmzone.h> 5#include <linux/mmzone.h>
@@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
135 return cpuid_apic >> index_msb; 135 return cpuid_apic >> index_msb;
136} 136}
137 137
138#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ 138#endif /* __ASM_NUMAQ_APIC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/numaq/apicdef.h
index f870ec5f778..e012a46cc22 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/numaq/apicdef.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 1#ifndef __ASM_NUMAQ_APICDEF_H
2#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 2#define __ASM_NUMAQ_APICDEF_H
3 3
4 4
5#define APIC_ID_MASK (0xF<<24) 5#define APIC_ID_MASK (0xF<<24)
@@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x)
11 11
12#define GET_APIC_ID(x) get_apic_id(x) 12#define GET_APIC_ID(x) get_apic_id(x)
13 13
14#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ 14#endif
diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/numaq/ipi.h
index 1e835823f4b..935588d286c 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/numaq/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define ASM_X86__MACH_NUMAQ__MACH_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(cpumask_t, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ 25#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h
new file mode 100644
index 00000000000..252292e077b
--- /dev/null
+++ b/include/asm-x86/numaq/mpparse.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_NUMAQ_MPPARSE_H
2#define __ASM_NUMAQ_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/numaq/wakecpu.h
index 0db8cea643c..c577bda5b1c 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/numaq/wakecpu.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 1#ifndef __ASM_NUMAQ_WAKECPU_H
2#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 2#define __ASM_NUMAQ_WAKECPU_H
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
@@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
40 40
41#define inquire_remote_apic(apicid) {} 41#define inquire_remote_apic(apicid) {}
42 42
43#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ 43#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 891971f57d3..d7d358a4399 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -201,12 +201,6 @@ struct pv_irq_ops {
201 201
202struct pv_apic_ops { 202struct pv_apic_ops {
203#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
204 /*
205 * Direct APIC operations, principally for VMI. Ideally
206 * these shouldn't be in this interface.
207 */
208 void (*apic_write)(unsigned long reg, u32 v);
209 u32 (*apic_read)(unsigned long reg);
210 void (*setup_boot_clock)(void); 204 void (*setup_boot_clock)(void);
211 void (*setup_secondary_clock)(void); 205 void (*setup_secondary_clock)(void);
212 206
@@ -910,19 +904,6 @@ static inline void slow_down_io(void)
910} 904}
911 905
912#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
913/*
914 * Basic functions accessing APICs.
915 */
916static inline void apic_write(unsigned long reg, u32 v)
917{
918 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
919}
920
921static inline u32 apic_read(unsigned long reg)
922{
923 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
924}
925
926static inline void setup_boot_clock(void) 907static inline void setup_boot_clock(void)
927{ 908{
928 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 909 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 9030cb73c4d..11b6cc14b28 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -38,6 +38,7 @@ struct x86_quirks {
38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); 38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 40 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void);
41}; 42};
42 43
43extern struct x86_quirks *x86_quirks; 44extern struct x86_quirks *x86_quirks;
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 04f84f4e2c8..29324c10334 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -167,30 +167,33 @@ extern int safe_smp_processor_id(void);
167 167
168#ifdef CONFIG_X86_LOCAL_APIC 168#ifdef CONFIG_X86_LOCAL_APIC
169 169
170#ifndef CONFIG_X86_64
170static inline int logical_smp_processor_id(void) 171static inline int logical_smp_processor_id(void)
171{ 172{
172 /* we don't want to mark this access volatile - bad code generation */ 173 /* we don't want to mark this access volatile - bad code generation */
173 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 174 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
174} 175}
175 176
176#ifndef CONFIG_X86_64 177#include <mach_apicdef.h>
177static inline unsigned int read_apic_id(void) 178static inline unsigned int read_apic_id(void)
178{ 179{
179 return *(u32 *)(APIC_BASE + APIC_ID); 180 unsigned int reg;
181
182 reg = *(u32 *)(APIC_BASE + APIC_ID);
183
184 return GET_APIC_ID(reg);
180} 185}
181#else
182extern unsigned int read_apic_id(void);
183#endif 186#endif
184 187
185 188
186# ifdef APIC_DEFINITION 189# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
187extern int hard_smp_processor_id(void); 190extern int hard_smp_processor_id(void);
188# else 191# else
189# include <mach_apicdef.h> 192#include <mach_apicdef.h>
190static inline int hard_smp_processor_id(void) 193static inline int hard_smp_processor_id(void)
191{ 194{
192 /* we don't want to mark this access volatile - bad code generation */ 195 /* we don't want to mark this access volatile - bad code generation */
193 return GET_APIC_ID(read_apic_id()); 196 return read_apic_id();
194} 197}
195# endif /* APIC_DEFINITION */ 198# endif /* APIC_DEFINITION */
196 199
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/summit/apic.h
index 7a66758d701..c5b2e4b1035 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H 1#ifndef __ASM_SUMMIT_APIC_H
2#define ASM_X86__MACH_SUMMIT__MACH_APIC_H 2#define __ASM_SUMMIT_APIC_H
3 3
4#include <asm/smp.h> 4#include <asm/smp.h>
5 5
@@ -21,7 +21,7 @@ static inline cpumask_t target_cpus(void)
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return cpumask_of_cpu(0);
24} 24}
25#define TARGET_CPUS (target_cpus()) 25#define TARGET_CPUS (target_cpus())
26 26
27#define INT_DELIVERY_MODE (dest_LowestPrio) 27#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -30,10 +30,10 @@ static inline cpumask_t target_cpus(void)
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 31{
32 return 0; 32 return 0;
33} 33}
34 34
35/* we don't use the phys_cpu_present_map to indicate apicid presence */ 35/* we don't use the phys_cpu_present_map to indicate apicid presence */
36static inline unsigned long check_apicid_present(int bit) 36static inline unsigned long check_apicid_present(int bit)
37{ 37{
38 return 1; 38 return 1;
39} 39}
@@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
122 122
123static inline physid_mask_t apicid_to_cpu_present(int apicid) 123static inline physid_mask_t apicid_to_cpu_present(int apicid)
124{ 124{
125 return physid_mask_of_physid(apicid); 125 return physid_mask_of_physid(0);
126} 126}
127 127
128static inline void setup_portio_remap(void) 128static inline void setup_portio_remap(void)
@@ -143,22 +143,22 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
143 int num_bits_set; 143 int num_bits_set;
144 int cpus_found = 0; 144 int cpus_found = 0;
145 int cpu; 145 int cpu;
146 int apicid; 146 int apicid;
147 147
148 num_bits_set = cpus_weight(cpumask); 148 num_bits_set = cpus_weight(cpumask);
149 /* Return id to all */ 149 /* Return id to all */
150 if (num_bits_set == NR_CPUS) 150 if (num_bits_set == NR_CPUS)
151 return (int) 0xFF; 151 return (int) 0xFF;
152 /* 152 /*
153 * The cpus in the mask must all be on the apic cluster. If are not 153 * The cpus in the mask must all be on the apic cluster. If are not
154 * on the same apicid cluster return default value of TARGET_CPUS. 154 * on the same apicid cluster return default value of TARGET_CPUS.
155 */ 155 */
156 cpu = first_cpu(cpumask); 156 cpu = first_cpu(cpumask);
157 apicid = cpu_to_logical_apicid(cpu); 157 apicid = cpu_to_logical_apicid(cpu);
158 while (cpus_found < num_bits_set) { 158 while (cpus_found < num_bits_set) {
159 if (cpu_isset(cpu, cpumask)) { 159 if (cpu_isset(cpu, cpumask)) {
160 int new_apicid = cpu_to_logical_apicid(cpu); 160 int new_apicid = cpu_to_logical_apicid(cpu);
161 if (apicid_cluster(apicid) != 161 if (apicid_cluster(apicid) !=
162 apicid_cluster(new_apicid)){ 162 apicid_cluster(new_apicid)){
163 printk ("%s: Not a valid mask!\n",__FUNCTION__); 163 printk ("%s: Not a valid mask!\n",__FUNCTION__);
164 return 0xFF; 164 return 0xFF;
@@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
182 return hard_smp_processor_id() >> index_msb; 182 return hard_smp_processor_id() >> index_msb;
183} 183}
184 184
185#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ 185#endif /* __ASM_SUMMIT_APIC_H */
diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h
new file mode 100644
index 00000000000..f3fbca1f61c
--- /dev/null
+++ b/include/asm-x86/summit/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_SUMMIT_APICDEF_H
2#define __ASM_SUMMIT_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (x>>24)&0xFF;
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/summit/ipi.h
index a3b31c528d9..53bd1e7bd7b 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/summit/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define ASM_X86__MACH_SUMMIT__MACH_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ 25#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
index 22f376ad68e..890ce3f5e09 100644
--- a/include/asm-x86/mach-summit/irq_vectors_limits.h
+++ b/include/asm-x86/summit/irq_vectors_limits.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 2#define _ASM_IRQ_VECTORS_LIMITS_H
3 3
4/* 4/*
5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, 5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
11#define NR_IRQS 224 11#define NR_IRQS 224
12#define NR_IRQ_VECTORS 1024 12#define NR_IRQ_VECTORS 1024
13 13
14#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ 14#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/summit/mpparse.h
index 92396f28772..013ce6fab2d 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/summit/mpparse.h
@@ -1,7 +1,6 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 1#ifndef __ASM_SUMMIT_MPPARSE_H
2#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 2#define __ASM_SUMMIT_MPPARSE_H
3 3
4#include <mach_apic.h>
5#include <asm/tsc.h> 4#include <asm/tsc.h>
6 5
7extern int use_cyclone; 6extern int use_cyclone;
@@ -12,11 +11,11 @@ extern void setup_summit(void);
12#define setup_summit() {} 11#define setup_summit() {}
13#endif 12#endif
14 13
15static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 14static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
16 char *productid) 15 char *productid)
17{ 16{
18 if (!strncmp(oem, "IBM ENSW", 8) && 17 if (!strncmp(oem, "IBM ENSW", 8) &&
19 (!strncmp(productid, "VIGIL SMP", 9) 18 (!strncmp(productid, "VIGIL SMP", 9)
20 || !strncmp(productid, "EXA", 3) 19 || !strncmp(productid, "EXA", 3)
21 || !strncmp(productid, "RUTHLESS SMP", 12))){ 20 || !strncmp(productid, "RUTHLESS SMP", 12))){
22 mark_tsc_unstable("Summit based system"); 21 mark_tsc_unstable("Summit based system");
@@ -107,4 +106,4 @@ static inline int is_WPEG(struct rio_detail *rio){
107 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); 106 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
108} 107}
109 108
110#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ 109#endif /* __ASM_SUMMIT_MPPARSE_H */
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b84755..c360c558e59 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27 27
28#ifdef CONFIG_DMAR 28#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
29struct intel_iommu; 29struct intel_iommu;
30 30
31struct dmar_drhd_unit {
32 struct list_head list; /* list of drhd units */
33 struct acpi_dmar_header *hdr; /* ACPI header */
34 u64 reg_base_addr; /* register base address*/
35 struct pci_dev **devices; /* target device array */
36 int devices_cnt; /* target device count */
37 u8 ignored:1; /* ignore drhd */
38 u8 include_all:1;
39 struct intel_iommu *iommu;
40};
41
42extern struct list_head dmar_drhd_units;
43
44#define for_each_drhd_unit(drhd) \
45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void);
50
51/* Intel IOMMU detection */
52extern void detect_intel_iommu(void);
53
54
55extern int parse_ioapics_under_ir(void);
56extern int alloc_iommu(struct dmar_drhd_unit *);
57#else
58static inline void detect_intel_iommu(void)
59{
60 return;
61}
62
63static inline int dmar_table_init(void)
64{
65 return -ENODEV;
66}
67#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
68
69#ifdef CONFIG_INTR_REMAP
70extern int intr_remapping_enabled;
71extern int enable_intr_remapping(int);
72
73struct irte {
74 union {
75 struct {
76 __u64 present : 1,
77 fpd : 1,
78 dst_mode : 1,
79 redir_hint : 1,
80 trigger_mode : 1,
81 dlvry_mode : 3,
82 avail : 4,
83 __reserved_1 : 4,
84 vector : 8,
85 __reserved_2 : 8,
86 dest_id : 32;
87 };
88 __u64 low;
89 };
90
91 union {
92 struct {
93 __u64 sid : 16,
94 sq : 2,
95 svt : 2,
96 __reserved_3 : 44;
97 };
98 __u64 high;
99 };
100};
101extern int get_irte(int irq, struct irte *entry);
102extern int modify_irte(int irq, struct irte *irte_modified);
103extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
104extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
105 u16 sub_handle);
106extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
107extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
108extern int flush_irte(int irq);
109extern int free_irte(int irq);
110
111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
113extern struct intel_iommu *map_ioapic_to_ir(int apic);
114#else
115#define irq_remapped(irq) (0)
116#define enable_intr_remapping(mode) (-1)
117#define intr_remapping_enabled (0)
118#endif
119
120#ifdef CONFIG_DMAR
31extern const char *dmar_get_fault_reason(u8 fault_reason); 121extern const char *dmar_get_fault_reason(u8 fault_reason);
32 122
33/* Can't use the common MSI interrupt functions 123/* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
40extern int dmar_set_interrupt(struct intel_iommu *iommu); 130extern int dmar_set_interrupt(struct intel_iommu *iommu);
41extern int arch_setup_dmar_msi(unsigned int irq); 131extern int arch_setup_dmar_msi(unsigned int irq);
42 132
43/* Intel IOMMU detection and initialization functions */ 133extern int iommu_detected, no_iommu;
44extern void detect_intel_iommu(void);
45extern int intel_iommu_init(void);
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49
50extern struct list_head dmar_drhd_units;
51extern struct list_head dmar_rmrr_units; 134extern struct list_head dmar_rmrr_units;
52
53struct dmar_drhd_unit {
54 struct list_head list; /* list of drhd units */
55 u64 reg_base_addr; /* register base address*/
56 struct pci_dev **devices; /* target device array */
57 int devices_cnt; /* target device count */
58 u8 ignored:1; /* ignore drhd */
59 u8 include_all:1;
60 struct intel_iommu *iommu;
61};
62
63struct dmar_rmrr_unit { 135struct dmar_rmrr_unit {
64 struct list_head list; /* list of rmrr units */ 136 struct list_head list; /* list of rmrr units */
137 struct acpi_dmar_header *hdr; /* ACPI header */
65 u64 base_address; /* reserved base address*/ 138 u64 base_address; /* reserved base address*/
66 u64 end_address; /* reserved end address */ 139 u64 end_address; /* reserved end address */
67 struct pci_dev **devices; /* target devices */ 140 struct pci_dev **devices; /* target devices */
68 int devices_cnt; /* target device count */ 141 int devices_cnt; /* target device count */
69}; 142};
70 143
71#define for_each_drhd_unit(drhd) \
72 list_for_each_entry(drhd, &dmar_drhd_units, list)
73#define for_each_rmrr_units(rmrr) \ 144#define for_each_rmrr_units(rmrr) \
74 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
146/* Intel DMAR initialization functions */
147extern int intel_iommu_init(void);
148extern int dmar_disabled;
75#else 149#else
76static inline void detect_intel_iommu(void)
77{
78 return;
79}
80static inline int intel_iommu_init(void) 150static inline int intel_iommu_init(void)
81{ 151{
152#ifdef CONFIG_INTR_REMAP
153 return dmar_dev_scope_init();
154#else
82 return -ENODEV; 155 return -ENODEV;
156#endif
83} 157}
84
85#endif /* !CONFIG_DMAR */ 158#endif /* !CONFIG_DMAR */
86#endif /* __DMAR_H__ */ 159#endif /* __DMAR_H__ */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42..8d9411bc60f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
65#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
65 66
66#ifdef CONFIG_IRQ_PER_CPU 67#ifdef CONFIG_IRQ_PER_CPU
67# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 68# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0314074fa23..60c49e32439 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
89 set_balance_irq_affinity(irq, cpumask); 89 set_balance_irq_affinity(irq, cpumask);
90 90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 91#ifdef CONFIG_GENERIC_PENDING_IRQ
92 set_pending_irq(irq, cpumask); 92 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags;
94
95 spin_lock_irqsave(&desc->lock, flags);
96 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags);
98 } else
99 set_pending_irq(irq, cpumask);
93#else 100#else
94 desc->affinity = cpumask; 101 desc->affinity = cpumask;
95 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);