aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-26 10:29:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-26 10:29:23 -0400
commit6dec3a10a7a6093af10cef7ac56021150afd6451 (patch)
treef4d8511f023e3e0c203baf889d4a0f3925882381
parent29308333fbe2cc61258c1c470f9403960428beb2 (diff)
parent10a010f6953b5a14ba2f0be40a4fce1bea220875 (diff)
Merge branch 'x86/x2apic' into x86/core
Conflicts: include/asm-x86/i8259.h include/asm-x86/msidef.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c41
-rw-r--r--arch/x86/kernel/apic_64.c237
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c2
-rw-r--r--arch/x86/kernel/genapic_64.c83
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c5
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c3
-rw-r--r--arch/x86/mach-generic/numaq.c4
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/xen/enlighten.c45
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c397
-rw-r--r--drivers/pci/intel-iommu.c164
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--include/asm-x86/apic.h65
-rw-r--r--include/asm-x86/apicdef.h3
-rw-r--r--include/asm-x86/cpufeature.h2
-rw-r--r--include/asm-x86/genapic_64.h8
-rw-r--r--include/asm-x86/hw_irq.h2
-rw-r--r--include/asm-x86/i8259.h3
-rw-r--r--include/asm-x86/io_apic.h20
-rw-r--r--include/asm-x86/ipi.h16
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/mach_apicdef.h6
-rw-r--r--include/asm-x86/mach-es7000/mach_apic.h2
-rw-r--r--include/asm-x86/msidef.h4
-rw-r--r--include/asm-x86/paravirt.h19
-rw-r--r--include/asm-x86/smp.h17
-rw-r--r--include/linux/dmar.h127
-rw-r--r--include/linux/irq.h1
-rw-r--r--kernel/irq/manage.c9
52 files changed, 2854 insertions, 518 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e7bea3e85304..0f130a4f9ba3 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1420,6 +1420,12 @@ and is between 256 and 4096 characters. It is defined in the file
1420 1420
1421 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1421 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1422 1422
1423 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1424
1425 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1426 default x2apic cluster mode on platforms
1427 supporting x2apic.
1428
1423 noltlbs [PPC] Do not use large page/tlb entries for kernel 1429 noltlbs [PPC] Do not use large page/tlb entries for kernel
1424 lowmem mapping on PPC40x. 1430 lowmem mapping on PPC40x.
1425 1431
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3cba0b45600..4d621eb90bbf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1652,6 +1652,14 @@ config DMAR_FLOPPY_WA
1652 workaround will setup a 1:1 mapping for the first 1652 workaround will setup a 1:1 mapping for the first
1653 16M to make floppy (an ISA device) work. 1653 16M to make floppy (an ISA device) work.
1654 1654
1655config INTR_REMAP
1656 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1657 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1658 help
1659 Supports Interrupt remapping for IO-APIC and MSI devices.
1660 To use x2apic mode in the CPU's which support x2APIC enhancements or
1661 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1662
1655source "drivers/pci/pcie/Kconfig" 1663source "drivers/pci/pcie/Kconfig"
1656 1664
1657source "drivers/pci/Kconfig" 1665source "drivers/pci/Kconfig"
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..a07ec14f3312 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 104ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 106 obj-y += bios_uv.o
107 obj-y += genx2apic_cluster.o
108 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 109 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 110 obj-$(CONFIG_AUDIT) += audit_64.o
109 111
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d71290..12e260e8fb2a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
761 761
762 set_fixmap_nocache(FIX_APIC_BASE, address); 762 set_fixmap_nocache(FIX_APIC_BASE, address);
763 if (boot_cpu_physical_apicid == -1U) { 763 if (boot_cpu_physical_apicid == -1U) {
764 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 764 boot_cpu_physical_apicid = read_apic_id();
765#ifdef CONFIG_X86_32 765#ifdef CONFIG_X86_32
766 apic_version[boot_cpu_physical_apicid] = 766 apic_version[boot_cpu_physical_apicid] =
767 GET_APIC_VERSION(apic_read(APIC_LVR)); 767 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
1337 acpi_ioapic = 1; 1337 acpi_ioapic = 1;
1338 1338
1339 smp_found_config = 1; 1339 smp_found_config = 1;
1340#ifdef CONFIG_X86_32
1340 setup_apic_routing(); 1341 setup_apic_routing();
1342#endif
1341 } 1343 }
1342 } 1344 }
1343 if (error == -EINVAL) { 1345 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c898358371..f93c18f5b79d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 145 return lapic_get_version() >= 0x14;
146} 146}
147 147
148void apic_wait_icr_idle(void) 148/*
149 * Paravirt kernels also might be using these below ops. So we still
150 * use generic apic_read()/apic_write(), which might be pointing to different
151 * ops in PARAVIRT case.
152 */
153void xapic_wait_icr_idle(void)
149{ 154{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 155 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 156 cpu_relax();
152} 157}
153 158
154u32 safe_apic_wait_icr_idle(void) 159u32 safe_xapic_wait_icr_idle(void)
155{ 160{
156 u32 send_status; 161 u32 send_status;
157 int timeout; 162 int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 172 return send_status;
168} 173}
169 174
175void xapic_icr_write(u32 low, u32 id)
176{
177 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
178 apic_write(APIC_ICR, low);
179}
180
181u64 xapic_icr_read(void)
182{
183 u32 icr1, icr2;
184
185 icr2 = apic_read(APIC_ICR2);
186 icr1 = apic_read(APIC_ICR);
187
188 return icr1 | ((u64)icr2 << 32);
189}
190
191static struct apic_ops xapic_ops = {
192 .read = native_apic_mem_read,
193 .write = native_apic_mem_write,
194 .icr_read = xapic_icr_read,
195 .icr_write = xapic_icr_write,
196 .wait_icr_idle = xapic_wait_icr_idle,
197 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
198};
199
200struct apic_ops __read_mostly *apic_ops = &xapic_ops;
201EXPORT_SYMBOL_GPL(apic_ops);
202
170/** 203/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 204 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 205 */
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1238 * default configuration (or the MP table is broken).
1206 */ 1239 */
1207 if (boot_cpu_physical_apicid == -1U) 1240 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1241 boot_cpu_physical_apicid = read_apic_id();
1209 1242
1210} 1243}
1211 1244
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1275 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1276 */
1244#ifdef CONFIG_CRASH_DUMP 1277#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1278 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1279#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1280 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1281
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 7f1f030da7ee..cd63c0bc6180 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,6 +40,7 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
@@ -46,6 +48,11 @@
46static int disable_apic_timer __cpuinitdata; 48static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 49static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 50int disable_apic;
51int disable_x2apic;
52int x2apic;
53
54/* x2apic enabled before OS handover */
55int x2apic_preenabled;
49 56
50/* Local APIC timer works in C2 */ 57/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 58int local_apic_timer_c2_ok;
@@ -119,13 +126,13 @@ static int modern_apic(void)
119 return lapic_get_version() >= 0x14; 126 return lapic_get_version() >= 0x14;
120} 127}
121 128
122void apic_wait_icr_idle(void) 129void xapic_wait_icr_idle(void)
123{ 130{
124 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 131 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
125 cpu_relax(); 132 cpu_relax();
126} 133}
127 134
128u32 safe_apic_wait_icr_idle(void) 135u32 safe_xapic_wait_icr_idle(void)
129{ 136{
130 u32 send_status; 137 u32 send_status;
131 int timeout; 138 int timeout;
@@ -141,6 +148,69 @@ u32 safe_apic_wait_icr_idle(void)
141 return send_status; 148 return send_status;
142} 149}
143 150
151void xapic_icr_write(u32 low, u32 id)
152{
153 apic_write(APIC_ICR2, id << 24);
154 apic_write(APIC_ICR, low);
155}
156
157u64 xapic_icr_read(void)
158{
159 u32 icr1, icr2;
160
161 icr2 = apic_read(APIC_ICR2);
162 icr1 = apic_read(APIC_ICR);
163
164 return (icr1 | ((u64)icr2 << 32));
165}
166
167static struct apic_ops xapic_ops = {
168 .read = native_apic_mem_read,
169 .write = native_apic_mem_write,
170 .icr_read = xapic_icr_read,
171 .icr_write = xapic_icr_write,
172 .wait_icr_idle = xapic_wait_icr_idle,
173 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
174};
175
176struct apic_ops __read_mostly *apic_ops = &xapic_ops;
177
178EXPORT_SYMBOL_GPL(apic_ops);
179
180static void x2apic_wait_icr_idle(void)
181{
182 /* no need to wait for icr idle in x2apic */
183 return;
184}
185
186static u32 safe_x2apic_wait_icr_idle(void)
187{
188 /* no need to wait for icr idle in x2apic */
189 return 0;
190}
191
192void x2apic_icr_write(u32 low, u32 id)
193{
194 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
195}
196
197u64 x2apic_icr_read(void)
198{
199 unsigned long val;
200
201 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
202 return val;
203}
204
205static struct apic_ops x2apic_ops = {
206 .read = native_apic_msr_read,
207 .write = native_apic_msr_write,
208 .icr_read = x2apic_icr_read,
209 .icr_write = x2apic_icr_write,
210 .wait_icr_idle = x2apic_wait_icr_idle,
211 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
212};
213
144/** 214/**
145 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 215 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
146 */ 216 */
@@ -630,10 +700,10 @@ int __init verify_local_APIC(void)
630 /* 700 /*
631 * The ID register is read/write in a real APIC. 701 * The ID register is read/write in a real APIC.
632 */ 702 */
633 reg0 = read_apic_id(); 703 reg0 = apic_read(APIC_ID);
634 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 704 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
635 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 705 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
636 reg1 = read_apic_id(); 706 reg1 = apic_read(APIC_ID);
637 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 707 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
638 apic_write(APIC_ID, reg0); 708 apic_write(APIC_ID, reg0);
639 if (reg1 != (reg0 ^ APIC_ID_MASK)) 709 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -834,6 +904,125 @@ void __cpuinit end_local_APIC_setup(void)
834 apic_pm_activate(); 904 apic_pm_activate();
835} 905}
836 906
907void check_x2apic(void)
908{
909 int msr, msr2;
910
911 rdmsr(MSR_IA32_APICBASE, msr, msr2);
912
913 if (msr & X2APIC_ENABLE) {
914 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
915 x2apic_preenabled = x2apic = 1;
916 apic_ops = &x2apic_ops;
917 }
918}
919
920void enable_x2apic(void)
921{
922 int msr, msr2;
923
924 rdmsr(MSR_IA32_APICBASE, msr, msr2);
925 if (!(msr & X2APIC_ENABLE)) {
926 printk("Enabling x2apic\n");
927 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
928 }
929}
930
931void enable_IR_x2apic(void)
932{
933#ifdef CONFIG_INTR_REMAP
934 int ret;
935 unsigned long flags;
936
937 if (!cpu_has_x2apic)
938 return;
939
940 if (!x2apic_preenabled && disable_x2apic) {
941 printk(KERN_INFO
942 "Skipped enabling x2apic and Interrupt-remapping "
943 "because of nox2apic\n");
944 return;
945 }
946
947 if (x2apic_preenabled && disable_x2apic)
948 panic("Bios already enabled x2apic, can't enforce nox2apic");
949
950 if (!x2apic_preenabled && skip_ioapic_setup) {
951 printk(KERN_INFO
952 "Skipped enabling x2apic and Interrupt-remapping "
953 "because of skipping io-apic setup\n");
954 return;
955 }
956
957 ret = dmar_table_init();
958 if (ret) {
959 printk(KERN_INFO
960 "dmar_table_init() failed with %d:\n", ret);
961
962 if (x2apic_preenabled)
963 panic("x2apic enabled by bios. But IR enabling failed");
964 else
965 printk(KERN_INFO
966 "Not enabling x2apic,Intr-remapping\n");
967 return;
968 }
969
970 local_irq_save(flags);
971 mask_8259A();
972 save_mask_IO_APIC_setup();
973
974 ret = enable_intr_remapping(1);
975
976 if (ret && x2apic_preenabled) {
977 local_irq_restore(flags);
978 panic("x2apic enabled by bios. But IR enabling failed");
979 }
980
981 if (ret)
982 goto end;
983
984 if (!x2apic) {
985 x2apic = 1;
986 apic_ops = &x2apic_ops;
987 enable_x2apic();
988 }
989end:
990 if (ret)
991 /*
992 * IR enabling failed
993 */
994 restore_IO_APIC_setup();
995 else
996 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
997
998 unmask_8259A();
999 local_irq_restore(flags);
1000
1001 if (!ret) {
1002 if (!x2apic_preenabled)
1003 printk(KERN_INFO
1004 "Enabled x2apic and interrupt-remapping\n");
1005 else
1006 printk(KERN_INFO
1007 "Enabled Interrupt-remapping\n");
1008 } else
1009 printk(KERN_ERR
1010 "Failed to enable Interrupt-remapping and x2apic\n");
1011#else
1012 if (!cpu_has_x2apic)
1013 return;
1014
1015 if (x2apic_preenabled)
1016 panic("x2apic enabled prior OS handover,"
1017 " enable CONFIG_INTR_REMAP");
1018
1019 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1020 " and x2apic\n");
1021#endif
1022
1023 return;
1024}
1025
837/* 1026/*
838 * Detect and enable local APICs on non-SMP boards. 1027 * Detect and enable local APICs on non-SMP boards.
839 * Original code written by Keir Fraser. 1028 * Original code written by Keir Fraser.
@@ -873,7 +1062,7 @@ void __init early_init_lapic_mapping(void)
873 * Fetch the APIC ID of the BSP in case we have a 1062 * Fetch the APIC ID of the BSP in case we have a
874 * default configuration (or the MP table is broken). 1063 * default configuration (or the MP table is broken).
875 */ 1064 */
876 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1065 boot_cpu_physical_apicid = read_apic_id();
877} 1066}
878 1067
879/** 1068/**
@@ -881,6 +1070,11 @@ void __init early_init_lapic_mapping(void)
881 */ 1070 */
882void __init init_apic_mappings(void) 1071void __init init_apic_mappings(void)
883{ 1072{
1073 if (x2apic) {
1074 boot_cpu_physical_apicid = read_apic_id();
1075 return;
1076 }
1077
884 /* 1078 /*
885 * If no local APIC can be found then set up a fake all 1079 * If no local APIC can be found then set up a fake all
886 * zeroes page to simulate the local APIC and another 1080 * zeroes page to simulate the local APIC and another
@@ -900,7 +1094,7 @@ void __init init_apic_mappings(void)
900 * Fetch the APIC ID of the BSP in case we have a 1094 * Fetch the APIC ID of the BSP in case we have a
901 * default configuration (or the MP table is broken). 1095 * default configuration (or the MP table is broken).
902 */ 1096 */
903 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1097 boot_cpu_physical_apicid = read_apic_id();
904} 1098}
905 1099
906/* 1100/*
@@ -919,6 +1113,9 @@ int __init APIC_init_uniprocessor(void)
919 return -1; 1113 return -1;
920 } 1114 }
921 1115
1116 enable_IR_x2apic();
1117 setup_apic_routing();
1118
922 verify_local_APIC(); 1119 verify_local_APIC();
923 1120
924 connect_bsp_APIC(); 1121 connect_bsp_APIC();
@@ -1100,6 +1297,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1100 cpu_set(cpu, cpu_present_map); 1297 cpu_set(cpu, cpu_present_map);
1101} 1298}
1102 1299
1300int hard_smp_processor_id(void)
1301{
1302 return read_apic_id();
1303}
1304
1103/* 1305/*
1104 * Power management 1306 * Power management
1105 */ 1307 */
@@ -1136,7 +1338,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1136 1338
1137 maxlvt = lapic_get_maxlvt(); 1339 maxlvt = lapic_get_maxlvt();
1138 1340
1139 apic_pm_state.apic_id = read_apic_id(); 1341 apic_pm_state.apic_id = apic_read(APIC_ID);
1140 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1342 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1141 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1343 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1142 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1344 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1171,10 +1373,14 @@ static int lapic_resume(struct sys_device *dev)
1171 maxlvt = lapic_get_maxlvt(); 1373 maxlvt = lapic_get_maxlvt();
1172 1374
1173 local_irq_save(flags); 1375 local_irq_save(flags);
1174 rdmsr(MSR_IA32_APICBASE, l, h); 1376 if (!x2apic) {
1175 l &= ~MSR_IA32_APICBASE_BASE; 1377 rdmsr(MSR_IA32_APICBASE, l, h);
1176 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1378 l &= ~MSR_IA32_APICBASE_BASE;
1177 wrmsr(MSR_IA32_APICBASE, l, h); 1379 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1380 wrmsr(MSR_IA32_APICBASE, l, h);
1381 } else
1382 enable_x2apic();
1383
1178 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1384 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1179 apic_write(APIC_ID, apic_pm_state.apic_id); 1385 apic_write(APIC_ID, apic_pm_state.apic_id);
1180 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1386 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1314,6 +1520,15 @@ __cpuinit int apic_is_clustered_box(void)
1314 return (clusters > 2); 1520 return (clusters > 2);
1315} 1521}
1316 1522
1523static __init int setup_nox2apic(char *str)
1524{
1525 disable_x2apic = 1;
1526 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1527 return 0;
1528}
1529early_param("nox2apic", setup_nox2apic);
1530
1531
1317/* 1532/*
1318 * APIC command line parameters 1533 * APIC command line parameters
1319 */ 1534 */
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017e..6f9b8924bdc0 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -597,6 +597,8 @@ void __cpuinit cpu_init(void)
597 barrier(); 597 barrier();
598 598
599 check_efer(); 599 check_efer();
600 if (cpu != 0 && x2apic)
601 enable_x2apic();
600 602
601 /* 603 /*
602 * set up and load the per-CPU TSS 604 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..0bf4d37a0483 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
45 /* Intel-defined (#2) */ 45 /* Intel-defined (#2) */
46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
48 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", 48 NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50 50
51 /* VIA/Cyrix/Centaur-defined */ 51 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..b3ba969c50d2 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,81 +21,52 @@
21#include <asm/ipi.h> 21#include <asm/ipi.h>
22#include <asm/genapic.h> 22#include <asm/genapic.h>
23 23
24#ifdef CONFIG_ACPI 24extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 25extern struct genapic apic_physflat;
26#endif 26extern struct genapic apic_x2xpic_uv_x;
27 27extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 28extern struct genapic apic_x2apic_cluster;
29 29
30struct genapic __read_mostly *genapic = &apic_flat; 30struct genapic __read_mostly *genapic = &apic_flat;
31 31
32static enum uv_system_type uv_system_type; 32static struct genapic *apic_probe[] __initdata = {
33 &apic_x2apic_uv_x,
34 &apic_x2apic_phys,
35 &apic_x2apic_cluster,
36 &apic_physflat,
37 NULL,
38};
33 39
34/* 40/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 41 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 42 */
37void __init setup_apic_routing(void) 43void __init setup_apic_routing(void)
38{ 44{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 45 if (genapic == &apic_flat) {
40 genapic = &apic_x2apic_uv_x; 46 if (max_physical_apicid >= 8)
41 else 47 genapic = &apic_physflat;
42#ifdef CONFIG_ACPI 48 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
43 /* 49 }
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
60} 50}
61 51
62/* Same for both flat and physical. */ 52/* Same for both flat and physical. */
63 53
64void send_IPI_self(int vector) 54void apic_send_IPI_self(int vector)
65{ 55{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 56 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 57}
68 58
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 59int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 60{
71 if (!strcmp(oem_id, "SGI")) { 61 int i;
72 if (!strcmp(oem_table_id, "UVL")) 62
73 uv_system_type = UV_LEGACY_APIC; 63 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 64 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 65 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 66 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 67 genapic->name);
68 return 1;
69 }
78 } 70 }
79 return 0; 71 return 0;
80} 72}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..ef3f3182d50a
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic && intr_remapping_enabled)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int x2apic_read_id(void)
124{
125 return apic_read(APIC_ID);
126}
127
128static unsigned int phys_pkg_id(int index_msb)
129{
130 return x2apic_read_id() >> index_msb;
131}
132
133static void x2apic_send_IPI_self(int vector)
134{
135 apic_write(APIC_SELF_IPI, vector);
136}
137
138static void init_x2apic_ldr(void)
139{
140 int cpu = smp_processor_id();
141
142 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
143 return;
144}
145
146struct genapic apic_x2apic_cluster = {
147 .name = "cluster x2apic",
148 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
149 .int_delivery_mode = dest_LowestPrio,
150 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
151 .target_cpus = x2apic_target_cpus,
152 .vector_allocation_domain = x2apic_vector_allocation_domain,
153 .apic_id_registered = x2apic_apic_id_registered,
154 .init_apic_ldr = init_x2apic_ldr,
155 .send_IPI_all = x2apic_send_IPI_all,
156 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
157 .send_IPI_mask = x2apic_send_IPI_mask,
158 .send_IPI_self = x2apic_send_IPI_self,
159 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
160 .phys_pkg_id = phys_pkg_id,
161 .get_apic_id = get_apic_id,
162 .set_apic_id = set_apic_id,
163 .apic_id_mask = (0xFFFFFFFFu),
164};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..3229c68aedd4
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int x2apic_read_id(void)
122{
123 return apic_read(APIC_ID);
124}
125
126static unsigned int phys_pkg_id(int index_msb)
127{
128 return x2apic_read_id() >> index_msb;
129}
130
131void x2apic_send_IPI_self(int vector)
132{
133 apic_write(APIC_SELF_IPI, vector);
134}
135
136void init_x2apic_ldr(void)
137{
138 return;
139}
140
141struct genapic apic_x2apic_phys = {
142 .name = "physical x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_Fixed,
145 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888a..3fe472223a99 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,35 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 58DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 59EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 60
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
123 return 1; 152 return 1;
124} 153}
125 154
155static void uv_init_apic_ldr(void)
156{
157}
158
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 159static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 160{
128 int cpu; 161 int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 171 return BAD_APICID;
139} 172}
140 173
174static unsigned int get_apic_id(unsigned long x)
175{
176 unsigned int id;
177
178 WARN_ON(preemptible() && num_online_cpus() > 1);
179 id = x | __get_cpu_var(x2apic_extra_bits);
180
181 return id;
182}
183
184static unsigned long set_apic_id(unsigned int id)
185{
186 unsigned long x;
187
188 /* maskout x2apic_extra_bits ? */
189 x = id;
190 return x;
191}
192
193static unsigned int uv_read_apic_id(void)
194{
195
196 return get_apic_id(apic_read(APIC_ID));
197}
198
141static unsigned int phys_pkg_id(int index_msb) 199static unsigned int phys_pkg_id(int index_msb)
142{ 200{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 201 return uv_read_apic_id() >> index_msb;
144} 202}
145 203
146#ifdef ZZZ /* Needs x2apic patch */ 204#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
152 210
153struct genapic apic_x2apic_uv_x = { 211struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 212 .name = "UV large system",
213 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 214 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 215 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 216 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 217 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 218 .apic_id_registered = uv_apic_id_registered,
219 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 220 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 221 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 222 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 223 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 224 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 225 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
226 .get_apic_id = get_apic_id,
227 .set_apic_id = set_apic_id,
228 .apic_id_mask = (0xFFFFFFFFu),
166}; 229};
167 230
168static __cpuinit void set_x2apic_extra_bits(int pnode) 231static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -399,3 +462,5 @@ void __cpuinit uv_cpu_init(void)
399 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 462 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
400 set_x2apic_extra_bits(uv_hub_info->pnode); 463 set_x2apic_extra_bits(uv_hub_info->pnode);
401} 464}
465
466
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..98e4db5373f3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1494 smp_processor_id(), hard_smp_processor_id()); 1494 smp_processor_id(), hard_smp_processor_id());
1495 v = apic_read(APIC_ID); 1495 v = apic_read(APIC_ID);
1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1497 GET_APIC_ID(read_apic_id())); 1497 GET_APIC_ID(v));
1498 v = apic_read(APIC_LVR); 1498 v = apic_read(APIC_LVR);
1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1500 ver = GET_APIC_VERSION(v); 1500 ver = GET_APIC_VERSION(v);
@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void)
1702 entry.dest_mode = 0; /* Physical */ 1702 entry.dest_mode = 0; /* Physical */
1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1704 entry.vector = 0; 1704 entry.vector = 0;
1705 entry.dest.physical.physical_dest = 1705 entry.dest.physical.physical_dest = read_apic_id();
1706 GET_APIC_ID(read_apic_id());
1707 1706
1708 /* 1707 /*
1709 * Add it to the IO-APIC irq-routing table: 1708 * Add it to the IO-APIC irq-routing table:
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d1707..b9950dae59b7 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,6 +50,7 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu)
834 907
835 908
836static struct irq_chip ioapic_chip; 909static struct irq_chip ioapic_chip;
910#ifdef CONFIG_INTR_REMAP
911static struct irq_chip ir_ioapic_chip;
912#endif
837 913
838static void ioapic_register_intr(int irq, unsigned long trigger) 914static void ioapic_register_intr(int irq, unsigned long trigger)
839{ 915{
840 if (trigger) { 916 if (trigger)
841 irq_desc[irq].status |= IRQ_LEVEL; 917 irq_desc[irq].status |= IRQ_LEVEL;
842 set_irq_chip_and_handler_name(irq, &ioapic_chip, 918 else
843 handle_fasteoi_irq, "fasteoi");
844 } else {
845 irq_desc[irq].status &= ~IRQ_LEVEL; 919 irq_desc[irq].status &= ~IRQ_LEVEL;
920
921#ifdef CONFIG_INTR_REMAP
922 if (irq_remapped(irq)) {
923 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
924 if (trigger)
925 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
926 handle_fasteoi_irq,
927 "fasteoi");
928 else
929 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
930 handle_edge_irq, "edge");
931 return;
932 }
933#endif
934 if (trigger)
935 set_irq_chip_and_handler_name(irq, &ioapic_chip,
936 handle_fasteoi_irq,
937 "fasteoi");
938 else
846 set_irq_chip_and_handler_name(irq, &ioapic_chip, 939 set_irq_chip_and_handler_name(irq, &ioapic_chip,
847 handle_edge_irq, "edge"); 940 handle_edge_irq, "edge");
941}
942
943static int setup_ioapic_entry(int apic, int irq,
944 struct IO_APIC_route_entry *entry,
945 unsigned int destination, int trigger,
946 int polarity, int vector)
947{
948 /*
949 * add it to the IO-APIC irq-routing table:
950 */
951 memset(entry,0,sizeof(*entry));
952
953#ifdef CONFIG_INTR_REMAP
954 if (intr_remapping_enabled) {
955 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
956 struct irte irte;
957 struct IR_IO_APIC_route_entry *ir_entry =
958 (struct IR_IO_APIC_route_entry *) entry;
959 int index;
960
961 if (!iommu)
962 panic("No mapping iommu for ioapic %d\n", apic);
963
964 index = alloc_irte(iommu, irq, 1);
965 if (index < 0)
966 panic("Failed to allocate IRTE for ioapic %d\n", apic);
967
968 memset(&irte, 0, sizeof(irte));
969
970 irte.present = 1;
971 irte.dst_mode = INT_DEST_MODE;
972 irte.trigger_mode = trigger;
973 irte.dlvry_mode = INT_DELIVERY_MODE;
974 irte.vector = vector;
975 irte.dest_id = IRTE_DEST(destination);
976
977 modify_irte(irq, &irte);
978
979 ir_entry->index2 = (index >> 15) & 0x1;
980 ir_entry->zero = 0;
981 ir_entry->format = 1;
982 ir_entry->index = (index & 0x7fff);
983 } else
984#endif
985 {
986 entry->delivery_mode = INT_DELIVERY_MODE;
987 entry->dest_mode = INT_DEST_MODE;
988 entry->dest = destination;
848 } 989 }
990
991 entry->mask = 0; /* enable IRQ */
992 entry->trigger = trigger;
993 entry->polarity = polarity;
994 entry->vector = vector;
995
996 /* Mask level triggered irqs.
997 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
998 */
999 if (trigger)
1000 entry->mask = 1;
1001 return 0;
849} 1002}
850 1003
851static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1004static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
870 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1023 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
871 irq, trigger, polarity); 1024 irq, trigger, polarity);
872 1025
873 /*
874 * add it to the IO-APIC irq-routing table:
875 */
876 memset(&entry,0,sizeof(entry));
877 1026
878 entry.delivery_mode = INT_DELIVERY_MODE; 1027 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
879 entry.dest_mode = INT_DEST_MODE; 1028 cpu_mask_to_apicid(mask), trigger, polarity,
880 entry.dest = cpu_mask_to_apicid(mask); 1029 cfg->vector)) {
881 entry.mask = 0; /* enable IRQ */ 1030 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
882 entry.trigger = trigger; 1031 mp_ioapics[apic].mp_apicid, pin);
883 entry.polarity = polarity; 1032 __clear_irq_vector(irq);
884 entry.vector = cfg->vector; 1033 return;
885 1034 }
886 /* Mask level triggered irqs.
887 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
888 */
889 if (trigger)
890 entry.mask = 1;
891 1035
892 ioapic_register_intr(irq, trigger); 1036 ioapic_register_intr(irq, trigger);
893 if (irq < 16) 1037 if (irq < 16)
@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
939{ 1083{
940 struct IO_APIC_route_entry entry; 1084 struct IO_APIC_route_entry entry;
941 1085
1086 if (intr_remapping_enabled)
1087 return;
1088
942 memset(&entry, 0, sizeof(entry)); 1089 memset(&entry, 0, sizeof(entry));
943 1090
944 /* 1091 /*
@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1085void __apicdebuginit print_local_APIC(void * dummy) 1232void __apicdebuginit print_local_APIC(void * dummy)
1086{ 1233{
1087 unsigned int v, ver, maxlvt; 1234 unsigned int v, ver, maxlvt;
1235 unsigned long icr;
1088 1236
1089 if (apic_verbosity == APIC_QUIET) 1237 if (apic_verbosity == APIC_QUIET)
1090 return; 1238 return;
@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1092 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1240 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1093 smp_processor_id(), hard_smp_processor_id()); 1241 smp_processor_id(), hard_smp_processor_id());
1094 v = apic_read(APIC_ID); 1242 v = apic_read(APIC_ID);
1095 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1243 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1096 v = apic_read(APIC_LVR); 1244 v = apic_read(APIC_LVR);
1097 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1245 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1098 ver = GET_APIC_VERSION(v); 1246 ver = GET_APIC_VERSION(v);
@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1128 v = apic_read(APIC_ESR); 1276 v = apic_read(APIC_ESR);
1129 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1277 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1130 1278
1131 v = apic_read(APIC_ICR); 1279 icr = apic_icr_read();
1132 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1280 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1133 v = apic_read(APIC_ICR2); 1281 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1134 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_LVTT); 1283 v = apic_read(APIC_LVTT);
1137 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void)
1286 entry.dest_mode = 0; /* Physical */ 1433 entry.dest_mode = 0; /* Physical */
1287 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1434 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1288 entry.vector = 0; 1435 entry.vector = 0;
1289 entry.dest = GET_APIC_ID(read_apic_id()); 1436 entry.dest = read_apic_id();
1290 1437
1291 /* 1438 /*
1292 * Add it to the IO-APIC irq-routing table: 1439 * Add it to the IO-APIC irq-routing table:
@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1392 */ 1539 */
1393 1540
1394#ifdef CONFIG_SMP 1541#ifdef CONFIG_SMP
1542
1543#ifdef CONFIG_INTR_REMAP
1544static void ir_irq_migration(struct work_struct *work);
1545
1546static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1547
1548/*
1549 * Migrate the IO-APIC irq in the presence of intr-remapping.
1550 *
1551 * For edge triggered, irq migration is a simple atomic update(of vector
1552 * and cpu destination) of IRTE and flush the hardware cache.
1553 *
1554 * For level triggered, we need to modify the io-apic RTE aswell with the update
1555 * vector information, along with modifying IRTE with vector and destination.
1556 * So irq migration for level triggered is little bit more complex compared to
1557 * edge triggered migration. But the good news is, we use the same algorithm
1558 * for level triggered migration as we have today, only difference being,
1559 * we now initiate the irq migration from process context instead of the
1560 * interrupt context.
1561 *
1562 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1563 * suppression) to the IO-APIC, level triggered irq migration will also be
1564 * as simple as edge triggered migration and we can do the irq migration
1565 * with a simple atomic update to IO-APIC RTE.
1566 */
1567static void migrate_ioapic_irq(int irq, cpumask_t mask)
1568{
1569 struct irq_cfg *cfg = irq_cfg + irq;
1570 struct irq_desc *desc = irq_desc + irq;
1571 cpumask_t tmp, cleanup_mask;
1572 struct irte irte;
1573 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1574 unsigned int dest;
1575 unsigned long flags;
1576
1577 cpus_and(tmp, mask, cpu_online_map);
1578 if (cpus_empty(tmp))
1579 return;
1580
1581 if (get_irte(irq, &irte))
1582 return;
1583
1584 if (assign_irq_vector(irq, mask))
1585 return;
1586
1587 cpus_and(tmp, cfg->domain, mask);
1588 dest = cpu_mask_to_apicid(tmp);
1589
1590 if (modify_ioapic_rte) {
1591 spin_lock_irqsave(&ioapic_lock, flags);
1592 __target_IO_APIC_irq(irq, dest, cfg->vector);
1593 spin_unlock_irqrestore(&ioapic_lock, flags);
1594 }
1595
1596 irte.vector = cfg->vector;
1597 irte.dest_id = IRTE_DEST(dest);
1598
1599 /*
1600 * Modified the IRTE and flushes the Interrupt entry cache.
1601 */
1602 modify_irte(irq, &irte);
1603
1604 if (cfg->move_in_progress) {
1605 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1606 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1607 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1608 cfg->move_in_progress = 0;
1609 }
1610
1611 irq_desc[irq].affinity = mask;
1612}
1613
1614static int migrate_irq_remapped_level(int irq)
1615{
1616 int ret = -1;
1617
1618 mask_IO_APIC_irq(irq);
1619
1620 if (io_apic_level_ack_pending(irq)) {
1621 /*
1622 * Interrupt in progress. Migrating irq now will change the
1623 * vector information in the IO-APIC RTE and that will confuse
1624 * the EOI broadcast performed by cpu.
1625 * So, delay the irq migration to the next instance.
1626 */
1627 schedule_delayed_work(&ir_migration_work, 1);
1628 goto unmask;
1629 }
1630
1631 /* everthing is clear. we have right of way */
1632 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1633
1634 ret = 0;
1635 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1636 cpus_clear(irq_desc[irq].pending_mask);
1637
1638unmask:
1639 unmask_IO_APIC_irq(irq);
1640 return ret;
1641}
1642
1643static void ir_irq_migration(struct work_struct *work)
1644{
1645 int irq;
1646
1647 for (irq = 0; irq < NR_IRQS; irq++) {
1648 struct irq_desc *desc = irq_desc + irq;
1649 if (desc->status & IRQ_MOVE_PENDING) {
1650 unsigned long flags;
1651
1652 spin_lock_irqsave(&desc->lock, flags);
1653 if (!desc->chip->set_affinity ||
1654 !(desc->status & IRQ_MOVE_PENDING)) {
1655 desc->status &= ~IRQ_MOVE_PENDING;
1656 spin_unlock_irqrestore(&desc->lock, flags);
1657 continue;
1658 }
1659
1660 desc->chip->set_affinity(irq,
1661 irq_desc[irq].pending_mask);
1662 spin_unlock_irqrestore(&desc->lock, flags);
1663 }
1664 }
1665}
1666
1667/*
1668 * Migrates the IRQ destination in the process context.
1669 */
1670static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1671{
1672 if (irq_desc[irq].status & IRQ_LEVEL) {
1673 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1674 irq_desc[irq].pending_mask = mask;
1675 migrate_irq_remapped_level(irq);
1676 return;
1677 }
1678
1679 migrate_ioapic_irq(irq, mask);
1680}
1681#endif
1682
1395asmlinkage void smp_irq_move_cleanup_interrupt(void) 1683asmlinkage void smp_irq_move_cleanup_interrupt(void)
1396{ 1684{
1397 unsigned vector, me; 1685 unsigned vector, me;
@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq)
1448#else 1736#else
1449static inline void irq_complete_move(unsigned int irq) {} 1737static inline void irq_complete_move(unsigned int irq) {}
1450#endif 1738#endif
1739#ifdef CONFIG_INTR_REMAP
1740static void ack_x2apic_level(unsigned int irq)
1741{
1742 ack_x2APIC_irq();
1743}
1744
1745static void ack_x2apic_edge(unsigned int irq)
1746{
1747 ack_x2APIC_irq();
1748}
1749#endif
1451 1750
1452static void ack_apic_edge(unsigned int irq) 1751static void ack_apic_edge(unsigned int irq)
1453{ 1752{
@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1522 .retrigger = ioapic_retrigger_irq, 1821 .retrigger = ioapic_retrigger_irq,
1523}; 1822};
1524 1823
1824#ifdef CONFIG_INTR_REMAP
1825static struct irq_chip ir_ioapic_chip __read_mostly = {
1826 .name = "IR-IO-APIC",
1827 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_x2apic_edge,
1831 .eoi = ack_x2apic_level,
1832#ifdef CONFIG_SMP
1833 .set_affinity = set_ir_ioapic_affinity_irq,
1834#endif
1835 .retrigger = ioapic_retrigger_irq,
1836};
1837#endif
1838
1525static inline void init_IO_APIC_traps(void) 1839static inline void init_IO_APIC_traps(void)
1526{ 1840{
1527 int irq; 1841 int irq;
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
1707 * 8259A. 2021 * 8259A.
1708 */ 2022 */
1709 if (pin1 == -1) { 2023 if (pin1 == -1) {
2024 if (intr_remapping_enabled)
2025 panic("BIOS bug: timer not connected to IO-APIC");
1710 pin1 = pin2; 2026 pin1 = pin2;
1711 apic1 = apic2; 2027 apic1 = apic2;
1712 no_pin1 = 1; 2028 no_pin1 = 1;
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
1733 clear_IO_APIC_pin(0, pin1); 2049 clear_IO_APIC_pin(0, pin1);
1734 goto out; 2050 goto out;
1735 } 2051 }
2052 if (intr_remapping_enabled)
2053 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1736 clear_IO_APIC_pin(apic1, pin1); 2054 clear_IO_APIC_pin(apic1, pin1);
1737 if (!no_pin1) 2055 if (!no_pin1)
1738 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2056 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq)
1972 2290
1973 dynamic_irq_cleanup(irq); 2291 dynamic_irq_cleanup(irq);
1974 2292
2293#ifdef CONFIG_INTR_REMAP
2294 free_irte(irq);
2295#endif
1975 spin_lock_irqsave(&vector_lock, flags); 2296 spin_lock_irqsave(&vector_lock, flags);
1976 __clear_irq_vector(irq); 2297 __clear_irq_vector(irq);
1977 spin_unlock_irqrestore(&vector_lock, flags); 2298 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1990,11 +2311,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1990 2311
1991 tmp = TARGET_CPUS; 2312 tmp = TARGET_CPUS;
1992 err = assign_irq_vector(irq, tmp); 2313 err = assign_irq_vector(irq, tmp);
1993 if (!err) { 2314 if (err)
1994 cpus_and(tmp, cfg->domain, tmp); 2315 return err;
1995 dest = cpu_mask_to_apicid(tmp); 2316
2317 cpus_and(tmp, cfg->domain, tmp);
2318 dest = cpu_mask_to_apicid(tmp);
2319
2320#ifdef CONFIG_INTR_REMAP
2321 if (irq_remapped(irq)) {
2322 struct irte irte;
2323 int ir_index;
2324 u16 sub_handle;
2325
2326 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2327 BUG_ON(ir_index == -1);
2328
2329 memset (&irte, 0, sizeof(irte));
2330
2331 irte.present = 1;
2332 irte.dst_mode = INT_DEST_MODE;
2333 irte.trigger_mode = 0; /* edge */
2334 irte.dlvry_mode = INT_DELIVERY_MODE;
2335 irte.vector = cfg->vector;
2336 irte.dest_id = IRTE_DEST(dest);
2337
2338 modify_irte(irq, &irte);
1996 2339
1997 msg->address_hi = MSI_ADDR_BASE_HI; 2340 msg->address_hi = MSI_ADDR_BASE_HI;
2341 msg->data = sub_handle;
2342 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2343 MSI_ADDR_IR_SHV |
2344 MSI_ADDR_IR_INDEX1(ir_index) |
2345 MSI_ADDR_IR_INDEX2(ir_index);
2346 } else
2347#endif
2348 {
2349 msg->address_hi = MSI_ADDR_BASE_HI;
1998 msg->address_lo = 2350 msg->address_lo =
1999 MSI_ADDR_BASE_LO | 2351 MSI_ADDR_BASE_LO |
2000 ((INT_DEST_MODE == 0) ? 2352 ((INT_DEST_MODE == 0) ?
@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2044 write_msi_msg(irq, &msg); 2396 write_msi_msg(irq, &msg);
2045 irq_desc[irq].affinity = mask; 2397 irq_desc[irq].affinity = mask;
2046} 2398}
2399
2400#ifdef CONFIG_INTR_REMAP
2401/*
2402 * Migrate the MSI irq to another cpumask. This migration is
2403 * done in the process context using interrupt-remapping hardware.
2404 */
2405static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406{
2407 struct irq_cfg *cfg = irq_cfg + irq;
2408 unsigned int dest;
2409 cpumask_t tmp, cleanup_mask;
2410 struct irte irte;
2411
2412 cpus_and(tmp, mask, cpu_online_map);
2413 if (cpus_empty(tmp))
2414 return;
2415
2416 if (get_irte(irq, &irte))
2417 return;
2418
2419 if (assign_irq_vector(irq, mask))
2420 return;
2421
2422 cpus_and(tmp, cfg->domain, mask);
2423 dest = cpu_mask_to_apicid(tmp);
2424
2425 irte.vector = cfg->vector;
2426 irte.dest_id = IRTE_DEST(dest);
2427
2428 /*
2429 * atomically update the IRTE with the new destination and vector.
2430 */
2431 modify_irte(irq, &irte);
2432
2433 /*
2434 * After this point, all the interrupts will start arriving
2435 * at the new destination. So, time to cleanup the previous
2436 * vector allocation.
2437 */
2438 if (cfg->move_in_progress) {
2439 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2440 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2441 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2442 cfg->move_in_progress = 0;
2443 }
2444
2445 irq_desc[irq].affinity = mask;
2446}
2447#endif
2047#endif /* CONFIG_SMP */ 2448#endif /* CONFIG_SMP */
2048 2449
2049/* 2450/*
@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = {
2061 .retrigger = ioapic_retrigger_irq, 2462 .retrigger = ioapic_retrigger_irq,
2062}; 2463};
2063 2464
2064int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2465#ifdef CONFIG_INTR_REMAP
2466static struct irq_chip msi_ir_chip = {
2467 .name = "IR-PCI-MSI",
2468 .unmask = unmask_msi_irq,
2469 .mask = mask_msi_irq,
2470 .ack = ack_x2apic_edge,
2471#ifdef CONFIG_SMP
2472 .set_affinity = ir_set_msi_irq_affinity,
2473#endif
2474 .retrigger = ioapic_retrigger_irq,
2475};
2476
2477/*
2478 * Map the PCI dev to the corresponding remapping hardware unit
2479 * and allocate 'nvec' consecutive interrupt-remapping table entries
2480 * in it.
2481 */
2482static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2483{
2484 struct intel_iommu *iommu;
2485 int index;
2486
2487 iommu = map_dev_to_ir(dev);
2488 if (!iommu) {
2489 printk(KERN_ERR
2490 "Unable to map PCI %s to iommu\n", pci_name(dev));
2491 return -ENOENT;
2492 }
2493
2494 index = alloc_irte(iommu, irq, nvec);
2495 if (index < 0) {
2496 printk(KERN_ERR
2497 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2498 pci_name(dev));
2499 return -ENOSPC;
2500 }
2501 return index;
2502}
2503#endif
2504
2505static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2065{ 2506{
2507 int ret;
2066 struct msi_msg msg; 2508 struct msi_msg msg;
2509
2510 ret = msi_compose_msg(dev, irq, &msg);
2511 if (ret < 0)
2512 return ret;
2513
2514 set_irq_msi(irq, desc);
2515 write_msi_msg(irq, &msg);
2516
2517#ifdef CONFIG_INTR_REMAP
2518 if (irq_remapped(irq)) {
2519 struct irq_desc *desc = irq_desc + irq;
2520 /*
2521 * irq migration in process context
2522 */
2523 desc->status |= IRQ_MOVE_PCNTXT;
2524 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2525 } else
2526#endif
2527 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2528
2529 return 0;
2530}
2531
2532int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2533{
2067 int irq, ret; 2534 int irq, ret;
2535
2068 irq = create_irq(); 2536 irq = create_irq();
2069 if (irq < 0) 2537 if (irq < 0)
2070 return irq; 2538 return irq;
2071 2539
2072 ret = msi_compose_msg(dev, irq, &msg); 2540#ifdef CONFIG_INTR_REMAP
2541 if (!intr_remapping_enabled)
2542 goto no_ir;
2543
2544 ret = msi_alloc_irte(dev, irq, 1);
2545 if (ret < 0)
2546 goto error;
2547no_ir:
2548#endif
2549 ret = setup_msi_irq(dev, desc, irq);
2073 if (ret < 0) { 2550 if (ret < 0) {
2074 destroy_irq(irq); 2551 destroy_irq(irq);
2075 return ret; 2552 return ret;
2076 } 2553 }
2554 return 0;
2077 2555
2078 set_irq_msi(irq, desc); 2556#ifdef CONFIG_INTR_REMAP
2079 write_msi_msg(irq, &msg); 2557error:
2558 destroy_irq(irq);
2559 return ret;
2560#endif
2561}
2080 2562
2081 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2563int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2564{
2565 int irq, ret, sub_handle;
2566 struct msi_desc *desc;
2567#ifdef CONFIG_INTR_REMAP
2568 struct intel_iommu *iommu = 0;
2569 int index = 0;
2570#endif
2082 2571
2572 sub_handle = 0;
2573 list_for_each_entry(desc, &dev->msi_list, list) {
2574 irq = create_irq();
2575 if (irq < 0)
2576 return irq;
2577#ifdef CONFIG_INTR_REMAP
2578 if (!intr_remapping_enabled)
2579 goto no_ir;
2580
2581 if (!sub_handle) {
2582 /*
2583 * allocate the consecutive block of IRTE's
2584 * for 'nvec'
2585 */
2586 index = msi_alloc_irte(dev, irq, nvec);
2587 if (index < 0) {
2588 ret = index;
2589 goto error;
2590 }
2591 } else {
2592 iommu = map_dev_to_ir(dev);
2593 if (!iommu) {
2594 ret = -ENOENT;
2595 goto error;
2596 }
2597 /*
2598 * setup the mapping between the irq and the IRTE
2599 * base index, the sub_handle pointing to the
2600 * appropriate interrupt remap table entry.
2601 */
2602 set_irte_irq(irq, iommu, index, sub_handle);
2603 }
2604no_ir:
2605#endif
2606 ret = setup_msi_irq(dev, desc, irq);
2607 if (ret < 0)
2608 goto error;
2609 sub_handle++;
2610 }
2083 return 0; 2611 return 0;
2612
2613error:
2614 destroy_irq(irq);
2615 return ret;
2084} 2616}
2085 2617
2086void arch_teardown_msi_irq(unsigned int irq) 2618void arch_teardown_msi_irq(unsigned int irq)
@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void)
2328 setup_IO_APIC_irq(ioapic, pin, irq, 2860 setup_IO_APIC_irq(ioapic, pin, irq,
2329 irq_trigger(irq_entry), 2861 irq_trigger(irq_entry),
2330 irq_polarity(irq_entry)); 2862 irq_polarity(irq_entry));
2863#ifdef CONFIG_INTR_REMAP
2864 else if (intr_remapping_enabled)
2865 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2866#endif
2331 else 2867 else
2332 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2868 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2333 } 2869 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..e362c6ab4d35 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..5744789a78f4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
373 373
374struct pv_apic_ops pv_apic_ops = { 374struct pv_apic_ops pv_apic_ops = {
375#ifdef CONFIG_X86_LOCAL_APIC 375#ifdef CONFIG_X86_LOCAL_APIC
376 .apic_write = native_apic_write,
377 .apic_read = native_apic_read,
378 .setup_boot_clock = setup_boot_APIC_clock, 376 .setup_boot_clock = setup_boot_APIC_clock,
379 .setup_secondary_clock = setup_secondary_APIC_clock, 377 .setup_secondary_clock = setup_secondary_APIC_clock,
380 .startup_ipi_hook = paravirt_nop, 378 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf4..792b87853a76 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -731,6 +731,8 @@ void __init setup_arch(char **cmdline_p)
731 num_physpages = max_pfn; 731 num_physpages = max_pfn;
732 732
733 check_efer(); 733 check_efer();
734 if (cpu_has_x2apic)
735 check_x2apic();
734 736
735 /* How many end-of-memory variables you have, grandma! */ 737 /* How many end-of-memory variables you have, grandma! */
736 /* need this before calling reserve_initrd */ 738 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4f..626618bf2f81 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid)
546 printk(KERN_CONT 547 printk(KERN_CONT
547 "a previous APIC delivery may have failed\n"); 548 "a previous APIC delivery may have failed\n");
548 549
549 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 550 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
550 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
551 551
552 timeout = 0; 552 timeout = 0;
553 do { 553 do {
@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
579 int maxlvt; 579 int maxlvt;
580 580
581 /* Target chip */ 581 /* Target chip */
582 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
583
584 /* Boot on the stack */ 582 /* Boot on the stack */
585 /* Kick the second */ 583 /* Kick the second */
586 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 584 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
587 585
588 pr_debug("Waiting for send to finish...\n"); 586 pr_debug("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 587 send_status = safe_apic_wait_icr_idle();
@@ -636,13 +634,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
636 /* 634 /*
637 * Turn INIT on target chip 635 * Turn INIT on target chip
638 */ 636 */
639 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
640
641 /* 637 /*
642 * Send IPI 638 * Send IPI
643 */ 639 */
644 apic_write(APIC_ICR, 640 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
645 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 641 phys_apicid);
646 642
647 pr_debug("Waiting for send to finish...\n"); 643 pr_debug("Waiting for send to finish...\n");
648 send_status = safe_apic_wait_icr_idle(); 644 send_status = safe_apic_wait_icr_idle();
@@ -652,10 +648,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
652 pr_debug("Deasserting INIT.\n"); 648 pr_debug("Deasserting INIT.\n");
653 649
654 /* Target chip */ 650 /* Target chip */
655 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
656
657 /* Send IPI */ 651 /* Send IPI */
658 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 652 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
659 653
660 pr_debug("Waiting for send to finish...\n"); 654 pr_debug("Waiting for send to finish...\n");
661 send_status = safe_apic_wait_icr_idle(); 655 send_status = safe_apic_wait_icr_idle();
@@ -698,11 +692,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
698 */ 692 */
699 693
700 /* Target chip */ 694 /* Target chip */
701 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
702
703 /* Boot on the stack */ 695 /* Boot on the stack */
704 /* Kick the second */ 696 /* Kick the second */
705 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 697 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
698 phys_apicid);
706 699
707 /* 700 /*
708 * Give the other CPU some time to accept the IPI. 701 * Give the other CPU some time to accept the IPI.
@@ -1136,10 +1129,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1136 * Setup boot CPU information 1129 * Setup boot CPU information
1137 */ 1130 */
1138 smp_store_cpu_info(0); /* Final full version of the data */ 1131 smp_store_cpu_info(0); /* Final full version of the data */
1132#ifdef CONFIG_X86_32
1139 boot_cpu_logical_apicid = logical_smp_processor_id(); 1133 boot_cpu_logical_apicid = logical_smp_processor_id();
1134#endif
1140 current_thread_info()->cpu = 0; /* needed? */ 1135 current_thread_info()->cpu = 0; /* needed? */
1141 set_cpu_sibling_map(0); 1136 set_cpu_sibling_map(0);
1142 1137
1138#ifdef CONFIG_X86_64
1139 enable_IR_x2apic();
1140 setup_apic_routing();
1141#endif
1142
1143 if (smp_sanity_check(max_cpus) < 0) { 1143 if (smp_sanity_check(max_cpus) < 0) {
1144 printk(KERN_INFO "SMP disabled\n"); 1144 printk(KERN_INFO "SMP disabled\n");
1145 disable_smp(); 1145 disable_smp();
@@ -1147,9 +1147,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1147 } 1147 }
1148 1148
1149 preempt_disable(); 1149 preempt_disable();
1150 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1150 if (read_apic_id() != boot_cpu_physical_apicid) {
1151 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1151 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1152 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1152 read_apic_id(), boot_cpu_physical_apicid);
1153 /* Or can we switch back to PIC here? */ 1153 /* Or can we switch back to PIC here? */
1154 } 1154 }
1155 preempt_enable(); 1155 preempt_enable();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..45c27c4e2a6e 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -904,8 +904,8 @@ static inline int __init activate_vmi(void)
904#endif 904#endif
905 905
906#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
907 para_fill(pv_apic_ops.apic_read, APICRead); 907 para_fill(apic_ops->read, APICRead);
908 para_fill(pv_apic_ops.apic_write, APICWrite); 908 para_fill(apic_ops->write, APICWrite);
909#endif 909#endif
910 910
911 /* 911 /*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 0313a5eec412..756fc489652b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
55#include <linux/lguest_launcher.h> 55#include <linux/lguest_launcher.h>
56#include <linux/virtio_console.h> 56#include <linux/virtio_console.h>
57#include <linux/pm.h> 57#include <linux/pm.h>
58#include <asm/apic.h>
58#include <asm/lguest.h> 59#include <asm/lguest.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 784 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 785 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 786#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 787static void lguest_apic_write(u32 reg, u32 v)
787{ 788{
788} 789}
789 790
790static u32 lguest_apic_read(unsigned long reg) 791static u32 lguest_apic_read(u32 reg)
791{ 792{
792 return 0; 793 return 0;
793} 794}
795
796static u64 lguest_apic_icr_read(void)
797{
798 return 0;
799}
800
801static void lguest_apic_icr_write(u32 low, u32 id)
802{
803 /* Warn to see if there's any stray references */
804 WARN_ON(1);
805}
806
807static void lguest_apic_wait_icr_idle(void)
808{
809 return;
810}
811
812static u32 lguest_apic_safe_wait_icr_idle(void)
813{
814 return 0;
815}
816
817static struct apic_ops lguest_basic_apic_ops = {
818 .read = lguest_apic_read,
819 .write = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_read = lguest_apic_read;
995#endif 1025#endif
996 1026
997 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d771714559..b31f2800638e 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,17 +5,16 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 15#include <asm/mach-bigsmp/mach_apicdef.h>
16#include <linux/smp.h>
17#include <asm/mach-bigsmp/mach_apic.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 18#include <asm/mach-bigsmp/mach_ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c4..9b30547d746e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,16 +4,15 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/mach-es7000/mach_apicdef.h>
15#include <linux/smp.h>
17#include <asm/mach-es7000/mach_apic.h> 16#include <asm/mach-es7000/mach_apic.h>
18#include <asm/mach-es7000/mach_ipi.h> 17#include <asm/mach-es7000/mach_ipi.h>
19#include <asm/mach-es7000/mach_mpparse.h> 18#include <asm/mach-es7000/mach_mpparse.h>
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c4..95c07efff6b7 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,8 +11,9 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h>
16#include <asm/mach-numaq/mach_apicdef.h> 14#include <asm/mach-numaq/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-numaq/mach_apic.h>
17#include <asm/mach-numaq/mach_ipi.h> 17#include <asm/mach-numaq/mach_ipi.h>
18#include <asm/mach-numaq/mach_mpparse.h> 18#include <asm/mach-numaq/mach_mpparse.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 19#include <asm/mach-numaq/mach_wakecpu.h>
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1e..752edd96b1bf 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,17 +4,16 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h>
17#include <asm/mach-summit/mach_apicdef.h> 14#include <asm/mach-summit/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-summit/mach_apic.h>
18#include <asm/mach-summit/mach_ipi.h> 17#include <asm/mach-summit/mach_ipi.h>
19#include <asm/mach-summit/mach_mpparse.h> 18#include <asm/mach-summit/mach_mpparse.h>
20 19
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..8d28925ebed9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
36#include <xen/hvc-console.h> 36#include <xen/hvc-console.h>
37 37
38#include <asm/paravirt.h> 38#include <asm/paravirt.h>
39#include <asm/apic.h>
39#include <asm/page.h> 40#include <asm/page.h>
40#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
580} 581}
581 582
582#ifdef CONFIG_X86_LOCAL_APIC 583#ifdef CONFIG_X86_LOCAL_APIC
583static u32 xen_apic_read(unsigned long reg) 584static u32 xen_apic_read(u32 reg)
584{ 585{
585 return 0; 586 return 0;
586} 587}
587 588
588static void xen_apic_write(unsigned long reg, u32 val) 589static void xen_apic_write(u32 reg, u32 val)
589{ 590{
590 /* Warn to see if there's any stray references */ 591 /* Warn to see if there's any stray references */
591 WARN_ON(1); 592 WARN_ON(1);
592} 593}
594
595static u64 xen_apic_icr_read(void)
596{
597 return 0;
598}
599
600static void xen_apic_icr_write(u32 low, u32 id)
601{
602 /* Warn to see if there's any stray references */
603 WARN_ON(1);
604}
605
606static void xen_apic_wait_icr_idle(void)
607{
608 return;
609}
610
611static u32 xen_safe_apic_wait_icr_idle(void)
612{
613 return 0;
614}
615
616static struct apic_ops xen_basic_apic_ops = {
617 .read = xen_apic_read,
618 .write = xen_apic_write,
619 .icr_read = xen_apic_icr_read,
620 .icr_write = xen_apic_icr_write,
621 .wait_icr_idle = xen_apic_wait_icr_idle,
622 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
623};
624
593#endif 625#endif
594 626
595static void xen_flush_tlb(void) 627static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1273 1305
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1306static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1307#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write,
1277 .apic_read = xen_apic_read,
1278 .setup_boot_clock = paravirt_nop, 1308 .setup_boot_clock = paravirt_nop,
1279 .setup_secondary_clock = paravirt_nop, 1309 .setup_secondary_clock = paravirt_nop,
1280 .startup_ipi_hook = paravirt_nop, 1310 .startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
1677 pv_apic_ops = xen_apic_ops; 1707 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1708 pv_mmu_ops = xen_mmu_ops;
1679 1709
1710#ifdef CONFIG_X86_LOCAL_APIC
1711 /*
1712 * set up the basic apic ops.
1713 */
1714 apic_ops = &xen_basic_apic_ops;
1715#endif
1716
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1717 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1718 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1719 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24b..4b47f4ece5b7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
26# Build Intel IOMMU support 26# Build Intel IOMMU support
27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o 27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
28 28
29obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
30
29# 31#
30# Some architectures use the generic PCI setup functions 32# Some architectures use the generic PCI setup functions
31# 33#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae2333f..bd2c01674f5e 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
19 * Author: Shaohua Li <shaohua.li@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * 21 *
22 * This file implements early detection/parsing of DMA Remapping Devices 22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables. 24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
25 */ 27 */
26 28
27#include <linux/pci.h> 29#include <linux/pci.h>
28#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/timer.h>
29#include "iova.h" 32#include "iova.h"
30#include "intel-iommu.h" 33#include "intel-iommu.h"
31 34
@@ -37,7 +40,6 @@
37 * these units are not supported by the architecture. 40 * these units are not supported by the architecture.
38 */ 41 */
39LIST_HEAD(dmar_drhd_units); 42LIST_HEAD(dmar_drhd_units);
40LIST_HEAD(dmar_rmrr_units);
41 43
42static struct acpi_table_header * __initdata dmar_tbl; 44static struct acpi_table_header * __initdata dmar_tbl;
43 45
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 list_add(&drhd->list, &dmar_drhd_units); 55 list_add(&drhd->list, &dmar_drhd_units);
54} 56}
55 57
56static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
57{
58 list_add(&rmrr->list, &dmar_rmrr_units);
59}
60
61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, 58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment) 59 struct pci_dev **dev, u16 segment)
63{ 60{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd; 169 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru; 170 struct dmar_drhd_unit *dmaru;
174 int ret = 0; 171 int ret = 0;
175 static int include_all;
176 172
177 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); 173 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178 if (!dmaru) 174 if (!dmaru)
179 return -ENOMEM; 175 return -ENOMEM;
180 176
177 dmaru->hdr = header;
181 drhd = (struct acpi_dmar_hardware_unit *)header; 178 drhd = (struct acpi_dmar_hardware_unit *)header;
182 dmaru->reg_base_addr = drhd->address; 179 dmaru->reg_base_addr = drhd->address;
183 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184 181
182 ret = alloc_iommu(dmaru);
183 if (ret) {
184 kfree(dmaru);
185 return ret;
186 }
187 dmar_register_drhd_unit(dmaru);
188 return 0;
189}
190
191static int __init
192dmar_parse_dev(struct dmar_drhd_unit *dmaru)
193{
194 struct acpi_dmar_hardware_unit *drhd;
195 static int include_all;
196 int ret;
197
198 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
185 if (!dmaru->include_all) 200 if (!dmaru->include_all)
186 ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
187 ((void *)drhd) + header->length, 202 ((void *)drhd) + drhd->header.length,
188 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
189 drhd->segment); 204 drhd->segment);
190 else { 205 else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
197 include_all = 1; 212 include_all = 1;
198 } 213 }
199 214
200 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) 215 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
216 list_del(&dmaru->list);
201 kfree(dmaru); 217 kfree(dmaru);
202 else 218 }
203 dmar_register_drhd_unit(dmaru);
204 return ret; 219 return ret;
205} 220}
206 221
222#ifdef CONFIG_DMAR
223LIST_HEAD(dmar_rmrr_units);
224
225static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
226{
227 list_add(&rmrr->list, &dmar_rmrr_units);
228}
229
230
207static int __init 231static int __init
208dmar_parse_one_rmrr(struct acpi_dmar_header *header) 232dmar_parse_one_rmrr(struct acpi_dmar_header *header)
209{ 233{
210 struct acpi_dmar_reserved_memory *rmrr; 234 struct acpi_dmar_reserved_memory *rmrr;
211 struct dmar_rmrr_unit *rmrru; 235 struct dmar_rmrr_unit *rmrru;
212 int ret = 0;
213 236
214 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 237 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
215 if (!rmrru) 238 if (!rmrru)
216 return -ENOMEM; 239 return -ENOMEM;
217 240
241 rmrru->hdr = header;
218 rmrr = (struct acpi_dmar_reserved_memory *)header; 242 rmrr = (struct acpi_dmar_reserved_memory *)header;
219 rmrru->base_address = rmrr->base_address; 243 rmrru->base_address = rmrr->base_address;
220 rmrru->end_address = rmrr->end_address; 244 rmrru->end_address = rmrr->end_address;
245
246 dmar_register_rmrr_unit(rmrru);
247 return 0;
248}
249
250static int __init
251rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
252{
253 struct acpi_dmar_reserved_memory *rmrr;
254 int ret;
255
256 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
221 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 257 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
222 ((void *)rmrr) + header->length, 258 ((void *)rmrr) + rmrr->header.length,
223 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 259 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
224 260
225 if (ret || (rmrru->devices_cnt == 0)) 261 if (ret || (rmrru->devices_cnt == 0)) {
262 list_del(&rmrru->list);
226 kfree(rmrru); 263 kfree(rmrru);
227 else 264 }
228 dmar_register_rmrr_unit(rmrru);
229 return ret; 265 return ret;
230} 266}
267#endif
231 268
232static void __init 269static void __init
233dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 270dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
252 } 289 }
253} 290}
254 291
292
255/** 293/**
256 * parse_dmar_table - parses the DMA reporting table 294 * parse_dmar_table - parses the DMA reporting table
257 */ 295 */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
284 ret = dmar_parse_one_drhd(entry_header); 322 ret = dmar_parse_one_drhd(entry_header);
285 break; 323 break;
286 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 324 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
325#ifdef CONFIG_DMAR
287 ret = dmar_parse_one_rmrr(entry_header); 326 ret = dmar_parse_one_rmrr(entry_header);
327#endif
288 break; 328 break;
289 default: 329 default:
290 printk(KERN_WARNING PREFIX 330 printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
300 return ret; 340 return ret;
301} 341}
302 342
343int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
344 struct pci_dev *dev)
345{
346 int index;
347
348 while (dev) {
349 for (index = 0; index < cnt; index++)
350 if (dev == devices[index])
351 return 1;
303 352
304int __init dmar_table_init(void) 353 /* Check our parent */
354 dev = dev->bus->self;
355 }
356
357 return 0;
358}
359
360struct dmar_drhd_unit *
361dmar_find_matched_drhd_unit(struct pci_dev *dev)
305{ 362{
363 struct dmar_drhd_unit *drhd = NULL;
364
365 list_for_each_entry(drhd, &dmar_drhd_units, list) {
366 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
367 drhd->devices_cnt, dev))
368 return drhd;
369 }
370
371 return NULL;
372}
373
374int __init dmar_dev_scope_init(void)
375{
376 struct dmar_drhd_unit *drhd;
377 int ret = -ENODEV;
378
379 for_each_drhd_unit(drhd) {
380 ret = dmar_parse_dev(drhd);
381 if (ret)
382 return ret;
383 }
384
385#ifdef CONFIG_DMAR
386 {
387 struct dmar_rmrr_unit *rmrr;
388 for_each_rmrr_units(rmrr) {
389 ret = rmrr_parse_dev(rmrr);
390 if (ret)
391 return ret;
392 }
393 }
394#endif
395
396 return ret;
397}
306 398
399
400int __init dmar_table_init(void)
401{
402 static int dmar_table_initialized;
307 int ret; 403 int ret;
308 404
405 if (dmar_table_initialized)
406 return 0;
407
408 dmar_table_initialized = 1;
409
309 ret = parse_dmar_table(); 410 ret = parse_dmar_table();
310 if (ret) { 411 if (ret) {
311 printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); 412 if (ret != -ENODEV)
413 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
312 return ret; 414 return ret;
313 } 415 }
314 416
@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
317 return -ENODEV; 419 return -ENODEV;
318 } 420 }
319 421
422#ifdef CONFIG_DMAR
320 if (list_empty(&dmar_rmrr_units)) 423 if (list_empty(&dmar_rmrr_units))
321 printk(KERN_INFO PREFIX "No RMRR found\n"); 424 printk(KERN_INFO PREFIX "No RMRR found\n");
425#endif
322 426
427#ifdef CONFIG_INTR_REMAP
428 parse_ioapics_under_ir();
429#endif
323 return 0; 430 return 0;
324} 431}
325 432
@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
341 448
342 return (ACPI_SUCCESS(status) ? 1 : 0); 449 return (ACPI_SUCCESS(status) ? 1 : 0);
343} 450}
451
452void __init detect_intel_iommu(void)
453{
454 int ret;
455
456 ret = early_dmar_detect();
457
458#ifdef CONFIG_DMAR
459 {
460 struct acpi_table_dmar *dmar;
461 /*
462 * for now we will disable dma-remapping when interrupt
463 * remapping is enabled.
464 * When support for queued invalidation for IOTLB invalidation
465 * is added, we will not need this any more.
466 */
467 dmar = (struct acpi_table_dmar *) dmar_tbl;
468 if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
469 printk(KERN_INFO
470 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n");
472 printk(KERN_INFO
473 "Disabling IOMMU detection, because of missing "
474 "queued invalidation support for IOTLB "
475 "invalidation\n");
476 printk(KERN_INFO
477 "Use \"nox2apic\", if you want to use Intel "
478 " IOMMU for DMA-remapping and don't care about "
479 " x2apic support\n");
480
481 dmar_disabled = 1;
482 return;
483 }
484
485 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
486 !dmar_disabled)
487 iommu_detected = 1;
488 }
489#endif
490}
491
492
493int alloc_iommu(struct dmar_drhd_unit *drhd)
494{
495 struct intel_iommu *iommu;
496 int map_size;
497 u32 ver;
498 static int iommu_allocated = 0;
499
500 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
501 if (!iommu)
502 return -ENOMEM;
503
504 iommu->seq_id = iommu_allocated++;
505
506 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
507 if (!iommu->reg) {
508 printk(KERN_ERR "IOMMU: can't map the region\n");
509 goto error;
510 }
511 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
512 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
513
514 /* the registers might be more than one page */
515 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
516 cap_max_fault_reg_offset(iommu->cap));
517 map_size = PAGE_ALIGN_4K(map_size);
518 if (map_size > PAGE_SIZE_4K) {
519 iounmap(iommu->reg);
520 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
521 if (!iommu->reg) {
522 printk(KERN_ERR "IOMMU: can't map the region\n");
523 goto error;
524 }
525 }
526
527 ver = readl(iommu->reg + DMAR_VER_REG);
528 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
529 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
530 iommu->cap, iommu->ecap);
531
532 spin_lock_init(&iommu->register_lock);
533
534 drhd->iommu = iommu;
535 return 0;
536error:
537 kfree(iommu);
538 return -1;
539}
540
541void free_iommu(struct intel_iommu *iommu)
542{
543 if (!iommu)
544 return;
545
546#ifdef CONFIG_DMAR
547 free_dmar_iommu(iommu);
548#endif
549
550 if (iommu->reg)
551 iounmap(iommu->reg);
552 kfree(iommu);
553}
554
555/*
556 * Reclaim all the submitted descriptors which have completed its work.
557 */
558static inline void reclaim_free_desc(struct q_inval *qi)
559{
560 while (qi->desc_status[qi->free_tail] == QI_DONE) {
561 qi->desc_status[qi->free_tail] = QI_FREE;
562 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
563 qi->free_cnt++;
564 }
565}
566
567/*
568 * Submit the queued invalidation descriptor to the remapping
569 * hardware unit and wait for its completion.
570 */
571void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
572{
573 struct q_inval *qi = iommu->qi;
574 struct qi_desc *hw, wait_desc;
575 int wait_index, index;
576 unsigned long flags;
577
578 if (!qi)
579 return;
580
581 hw = qi->desc;
582
583 spin_lock(&qi->q_lock);
584 while (qi->free_cnt < 3) {
585 spin_unlock(&qi->q_lock);
586 cpu_relax();
587 spin_lock(&qi->q_lock);
588 }
589
590 index = qi->free_head;
591 wait_index = (index + 1) % QI_LENGTH;
592
593 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
594
595 hw[index] = *desc;
596
597 wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
598 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
599
600 hw[wait_index] = wait_desc;
601
602 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
603 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
604
605 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
606 qi->free_cnt -= 2;
607
608 spin_lock_irqsave(&iommu->register_lock, flags);
609 /*
610 * update the HW tail register indicating the presence of
611 * new descriptors.
612 */
613 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
614 spin_unlock_irqrestore(&iommu->register_lock, flags);
615
616 while (qi->desc_status[wait_index] != QI_DONE) {
617 spin_unlock(&qi->q_lock);
618 cpu_relax();
619 spin_lock(&qi->q_lock);
620 }
621
622 qi->desc_status[index] = QI_DONE;
623
624 reclaim_free_desc(qi);
625 spin_unlock(&qi->q_lock);
626}
627
628/*
629 * Flush the global interrupt entry cache.
630 */
631void qi_global_iec(struct intel_iommu *iommu)
632{
633 struct qi_desc desc;
634
635 desc.low = QI_IEC_TYPE;
636 desc.high = 0;
637
638 qi_submit_sync(&desc, iommu);
639}
640
641/*
642 * Enable Queued Invalidation interface. This is a must to support
643 * interrupt-remapping. Also used by DMA-remapping, which replaces
644 * register based IOTLB invalidation.
645 */
646int dmar_enable_qi(struct intel_iommu *iommu)
647{
648 u32 cmd, sts;
649 unsigned long flags;
650 struct q_inval *qi;
651
652 if (!ecap_qis(iommu->ecap))
653 return -ENOENT;
654
655 /*
656 * queued invalidation is already setup and enabled.
657 */
658 if (iommu->qi)
659 return 0;
660
661 iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
662 if (!iommu->qi)
663 return -ENOMEM;
664
665 qi = iommu->qi;
666
667 qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
668 if (!qi->desc) {
669 kfree(qi);
670 iommu->qi = 0;
671 return -ENOMEM;
672 }
673
674 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
675 if (!qi->desc_status) {
676 free_page((unsigned long) qi->desc);
677 kfree(qi);
678 iommu->qi = 0;
679 return -ENOMEM;
680 }
681
682 qi->free_head = qi->free_tail = 0;
683 qi->free_cnt = QI_LENGTH;
684
685 spin_lock_init(&qi->q_lock);
686
687 spin_lock_irqsave(&iommu->register_lock, flags);
688 /* write zero to the tail reg */
689 writel(0, iommu->reg + DMAR_IQT_REG);
690
691 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
692
693 cmd = iommu->gcmd | DMA_GCMD_QIE;
694 iommu->gcmd |= DMA_GCMD_QIE;
695 writel(cmd, iommu->reg + DMAR_GCMD_REG);
696
697 /* Make sure hardware complete it */
698 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
699 spin_unlock_irqrestore(&iommu->register_lock, flags);
700
701 return 0;
702}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 8d0e60ac849c..389fdd6f4a9f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
49 49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51 51
52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 53
56 54
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
58 56
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 58
61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250 59#define HIGH_WATER_MARK 250
64struct deferred_flush_tables { 60struct deferred_flush_tables {
65 int next; 61 int next;
@@ -80,7 +76,7 @@ static long list_size;
80 76
81static void domain_remove_dev_info(struct dmar_domain *domain); 77static void domain_remove_dev_info(struct dmar_domain *domain);
82 78
83static int dmar_disabled; 79int dmar_disabled;
84static int __initdata dmar_map_gfx = 1; 80static int __initdata dmar_map_gfx = 1;
85static int dmar_forcedac; 81static int dmar_forcedac;
86static int intel_iommu_strict; 82static int intel_iommu_strict;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
185 kmem_cache_free(iommu_iova_cache, iova); 181 kmem_cache_free(iommu_iova_cache, iova);
186} 182}
187 183
188static inline void __iommu_flush_cache(
189 struct intel_iommu *iommu, void *addr, int size)
190{
191 if (!ecap_coherent(iommu->ecap))
192 clflush_cache_range(addr, size);
193}
194
195/* Gets context entry for a given bus and devfn */ 184/* Gets context entry for a given bus and devfn */
196static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
197 u8 bus, u8 devfn) 186 u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
488 return 0; 477 return 0;
489} 478}
490 479
491#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
492{\
493 cycles_t start_time = get_cycles();\
494 while (1) {\
495 sts = op (iommu->reg + offset);\
496 if (cond)\
497 break;\
498 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
499 panic("DMAR hardware is malfunctioning\n");\
500 cpu_relax();\
501 }\
502}
503
504static void iommu_set_root_entry(struct intel_iommu *iommu) 480static void iommu_set_root_entry(struct intel_iommu *iommu)
505{ 481{
506 void *addr; 482 void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
990 return -ENOMEM; 966 return -ENOMEM;
991 } 967 }
992 968
969 spin_lock_init(&iommu->lock);
970
993 /* 971 /*
994 * if Caching mode is set, then invalid translations are tagged 972 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it. 973 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
998 set_bit(0, iommu->domain_ids); 976 set_bit(0, iommu->domain_ids);
999 return 0; 977 return 0;
1000} 978}
1001static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
1002 struct dmar_drhd_unit *drhd)
1003{
1004 int ret;
1005 int map_size;
1006 u32 ver;
1007
1008 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
1009 if (!iommu->reg) {
1010 printk(KERN_ERR "IOMMU: can't map the region\n");
1011 goto error;
1012 }
1013 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
1014 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
1015
1016 /* the registers might be more than one page */
1017 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1018 cap_max_fault_reg_offset(iommu->cap));
1019 map_size = PAGE_ALIGN_4K(map_size);
1020 if (map_size > PAGE_SIZE_4K) {
1021 iounmap(iommu->reg);
1022 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1023 if (!iommu->reg) {
1024 printk(KERN_ERR "IOMMU: can't map the region\n");
1025 goto error;
1026 }
1027 }
1028
1029 ver = readl(iommu->reg + DMAR_VER_REG);
1030 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1031 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1032 iommu->cap, iommu->ecap);
1033 ret = iommu_init_domains(iommu);
1034 if (ret)
1035 goto error_unmap;
1036 spin_lock_init(&iommu->lock);
1037 spin_lock_init(&iommu->register_lock);
1038 979
1039 drhd->iommu = iommu;
1040 return iommu;
1041error_unmap:
1042 iounmap(iommu->reg);
1043error:
1044 kfree(iommu);
1045 return NULL;
1046}
1047 980
1048static void domain_exit(struct dmar_domain *domain); 981static void domain_exit(struct dmar_domain *domain);
1049static void free_iommu(struct intel_iommu *iommu) 982
983void free_dmar_iommu(struct intel_iommu *iommu)
1050{ 984{
1051 struct dmar_domain *domain; 985 struct dmar_domain *domain;
1052 int i; 986 int i;
1053 987
1054 if (!iommu)
1055 return;
1056
1057 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1058 for (; i < cap_ndoms(iommu->cap); ) { 989 for (; i < cap_ndoms(iommu->cap); ) {
1059 domain = iommu->domains[i]; 990 domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
1078 1009
1079 /* free context mapping */ 1010 /* free context mapping */
1080 free_context_table(iommu); 1011 free_context_table(iommu);
1081
1082 if (iommu->reg)
1083 iounmap(iommu->reg);
1084 kfree(iommu);
1085} 1012}
1086 1013
1087static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1014static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
1426 return NULL; 1353 return NULL;
1427} 1354}
1428 1355
1429static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1430 struct pci_dev *dev)
1431{
1432 int index;
1433
1434 while (dev) {
1435 for (index = 0; index < cnt; index++)
1436 if (dev == devices[index])
1437 return 1;
1438
1439 /* Check our parent */
1440 dev = dev->bus->self;
1441 }
1442
1443 return 0;
1444}
1445
1446static struct dmar_drhd_unit *
1447dmar_find_matched_drhd_unit(struct pci_dev *dev)
1448{
1449 struct dmar_drhd_unit *drhd = NULL;
1450
1451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1452 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1453 drhd->devices_cnt, dev))
1454 return drhd;
1455 }
1456
1457 return NULL;
1458}
1459
1460/* domain is initialized */ 1356/* domain is initialized */
1461static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1357static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1462{ 1358{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
1729 * endfor 1625 * endfor
1730 */ 1626 */
1731 for_each_drhd_unit(drhd) { 1627 for_each_drhd_unit(drhd) {
1732 if (drhd->ignored)
1733 continue;
1734 g_num_of_iommus++; 1628 g_num_of_iommus++;
1735 /* 1629 /*
1736 * lock not needed as this is only incremented in the single 1630 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
1739 */ 1633 */
1740 } 1634 }
1741 1635
1742 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1743 if (!g_iommus) {
1744 ret = -ENOMEM;
1745 goto error;
1746 }
1747
1748 deferred_flush = kzalloc(g_num_of_iommus * 1636 deferred_flush = kzalloc(g_num_of_iommus *
1749 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1750 if (!deferred_flush) { 1638 if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
1752 goto error; 1640 goto error;
1753 } 1641 }
1754 1642
1755 i = 0;
1756 for_each_drhd_unit(drhd) { 1643 for_each_drhd_unit(drhd) {
1757 if (drhd->ignored) 1644 if (drhd->ignored)
1758 continue; 1645 continue;
1759 iommu = alloc_iommu(&g_iommus[i], drhd); 1646
1760 i++; 1647 iommu = drhd->iommu;
1761 if (!iommu) { 1648
1762 ret = -ENOMEM; 1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1763 goto error; 1651 goto error;
1764 }
1765 1652
1766 /* 1653 /*
1767 * TBD: 1654 * TBD:
@@ -1845,7 +1732,6 @@ error:
1845 iommu = drhd->iommu; 1732 iommu = drhd->iommu;
1846 free_iommu(iommu); 1733 free_iommu(iommu);
1847 } 1734 }
1848 kfree(g_iommus);
1849 return ret; 1735 return ret;
1850} 1736}
1851 1737
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
2002 /* just flush them all */ 1888 /* just flush them all */
2003 for (i = 0; i < g_num_of_iommus; i++) { 1889 for (i = 0; i < g_num_of_iommus; i++) {
2004 if (deferred_flush[i].next) { 1890 if (deferred_flush[i].next) {
2005 iommu_flush_iotlb_global(&g_iommus[i], 0); 1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
2006 for (j = 0; j < deferred_flush[i].next; j++) { 1895 for (j = 0; j < deferred_flush[i].next; j++) {
2007 __free_iova(&deferred_flush[i].domain[j]->iovad, 1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
2008 deferred_flush[i].iova[j]); 1897 deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2032 if (list_size == HIGH_WATER_MARK) 1921 if (list_size == HIGH_WATER_MARK)
2033 flush_unmaps(); 1922 flush_unmaps();
2034 1923
2035 iommu_id = dom->iommu - g_iommus; 1924 iommu_id = dom->iommu->seq_id;
1925
2036 next = deferred_flush[iommu_id].next; 1926 next = deferred_flush[iommu_id].next;
2037 deferred_flush[iommu_id].domain[next] = dom; 1927 deferred_flush[iommu_id].domain[next] = dom;
2038 deferred_flush[iommu_id].iova[next] = iova; 1928 deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void)
2348 2238
2349} 2239}
2350 2240
2351void __init detect_intel_iommu(void)
2352{
2353 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2354 return;
2355 if (early_dmar_detect()) {
2356 iommu_detected = 1;
2357 }
2358}
2359
2360static void __init init_no_remapping_devices(void) 2241static void __init init_no_remapping_devices(void)
2361{ 2242{
2362 struct dmar_drhd_unit *drhd; 2243 struct dmar_drhd_unit *drhd;
@@ -2403,12 +2284,19 @@ int __init intel_iommu_init(void)
2403{ 2284{
2404 int ret = 0; 2285 int ret = 0;
2405 2286
2406 if (no_iommu || swiotlb || dmar_disabled)
2407 return -ENODEV;
2408
2409 if (dmar_table_init()) 2287 if (dmar_table_init())
2410 return -ENODEV; 2288 return -ENODEV;
2411 2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2412 iommu_init_mempool(); 2300 iommu_init_mempool();
2413 dmar_init_reserved_ranges(); 2301 dmar_init_reserved_ranges();
2414 2302
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122e..2142c01e0143 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include "iova.h" 28#include "iova.h"
29#include <linux/io.h> 29#include <linux/io.h>
30 30#include <asm/cacheflush.h>
31/* 31#include "dma_remapping.h"
32 * We need a fixed PAGE_SIZE of 4K irrespective of
33 * arch PAGE_SIZE for IOMMU page tables.
34 */
35#define PAGE_SHIFT_4K (12)
36#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
37#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
38#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
39
40#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
41#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
42#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
43 32
44/* 33/*
45 * Intel IOMMU register specification per version 1.0 public spec. 34 * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
63#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ 52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
64#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ 53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
65#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ 54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
66 60
67#define OFFSET_STRIDE (9) 61#define OFFSET_STRIDE (9)
68/* 62/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
126#define ecap_max_iotlb_offset(e) \ 120#define ecap_max_iotlb_offset(e) \
127 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
128#define ecap_coherent(e) ((e) & 0x1) 122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
129 127
130 128
131/* IOTLB_REG */ 129/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
141#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) 139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
142#define DMA_TLB_MAX_SIZE (0x3f) 140#define DMA_TLB_MAX_SIZE (0x3f)
143 141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
144/* PMEN_REG */ 153/* PMEN_REG */
145#define DMA_PMEN_EPM (((u32)1)<<31) 154#define DMA_PMEN_EPM (((u32)1)<<31)
146#define DMA_PMEN_PRS (((u32)1)<<0) 155#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
151#define DMA_GCMD_SFL (((u32)1) << 29) 160#define DMA_GCMD_SFL (((u32)1) << 29)
152#define DMA_GCMD_EAFL (((u32)1) << 28) 161#define DMA_GCMD_EAFL (((u32)1) << 28)
153#define DMA_GCMD_WBF (((u32)1) << 27) 162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
154 166
155/* GSTS_REG */ 167/* GSTS_REG */
156#define DMA_GSTS_TES (((u32)1) << 31) 168#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
158#define DMA_GSTS_FLS (((u32)1) << 29) 170#define DMA_GSTS_FLS (((u32)1) << 29)
159#define DMA_GSTS_AFLS (((u32)1) << 28) 171#define DMA_GSTS_AFLS (((u32)1) << 28)
160#define DMA_GSTS_WBFS (((u32)1) << 27) 172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
161 176
162/* CCMD_REG */ 177/* CCMD_REG */
163#define DMA_CCMD_ICC (((u64)1) << 63) 178#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
187#define dma_frcd_source_id(c) (c & 0xffff) 202#define dma_frcd_source_id(c) (c & 0xffff)
188#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ 203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
189 204
190/* 205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
191 * 0: Present 206
192 * 1-11: Reserved 207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
193 * 12-63: Context Ptr (12 - (haw-1)) 208{\
194 * 64-127: Reserved 209 cycles_t start_time = get_cycles();\
195 */ 210 while (1) {\
196struct root_entry { 211 sts = op (iommu->reg + offset);\
197 u64 val; 212 if (cond)\
198 u64 rsvd1; 213 break;\
199}; 214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
200#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) 215 panic("DMAR hardware is malfunctioning\n");\
201static inline bool root_present(struct root_entry *root) 216 cpu_relax();\
202{ 217 }\
203 return (root->val & 1);
204}
205static inline void set_root_present(struct root_entry *root)
206{
207 root->val |= 1;
208}
209static inline void set_root_value(struct root_entry *root, unsigned long value)
210{
211 root->val |= value & PAGE_MASK_4K;
212} 218}
213 219
214struct context_entry; 220#define QI_LENGTH 256 /* queue length */
215static inline struct context_entry *
216get_context_addr_from_root(struct root_entry *root)
217{
218 return (struct context_entry *)
219 (root_present(root)?phys_to_virt(
220 root->val & PAGE_MASK_4K):
221 NULL);
222}
223
224/*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235struct context_entry {
236 u64 lo;
237 u64 hi;
238};
239#define context_present(c) ((c).lo & 1)
240#define context_fault_disable(c) (((c).lo >> 1) & 1)
241#define context_translation_type(c) (((c).lo >> 2) & 3)
242#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
243#define context_address_width(c) ((c).hi & 7)
244#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
245
246#define context_set_present(c) do {(c).lo |= 1;} while (0)
247#define context_set_fault_enable(c) \
248 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
249#define context_set_translation_type(c, val) \
250 do { \
251 (c).lo &= (((u64)-1) << 4) | 3; \
252 (c).lo |= ((val) & 3) << 2; \
253 } while (0)
254#define CONTEXT_TT_MULTI_LEVEL 0
255#define context_set_address_root(c, val) \
256 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
257#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
258#define context_set_domain_id(c, val) \
259 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
260#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
261 221
262/* 222enum {
263 * 0: readable 223 QI_FREE,
264 * 1: writable 224 QI_IN_USE,
265 * 2-6: reserved 225 QI_DONE
266 * 7: super page
267 * 8-11: available
268 * 12-63: Host physcial address
269 */
270struct dma_pte {
271 u64 val;
272}; 226};
273#define dma_clear_pte(p) do {(p).val = 0;} while (0)
274
275#define DMA_PTE_READ (1)
276#define DMA_PTE_WRITE (2)
277 227
278#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 228#define QI_CC_TYPE 0x1
279#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 229#define QI_IOTLB_TYPE 0x2
280#define dma_set_pte_prot(p, prot) \ 230#define QI_DIOTLB_TYPE 0x3
281 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 231#define QI_IEC_TYPE 0x4
282#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) 232#define QI_IWD_TYPE 0x5
283#define dma_set_pte_addr(p, addr) do {\
284 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
285#define dma_pte_present(p) (((p).val & 3) != 0)
286 233
287struct intel_iommu; 234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
288 237
289struct dmar_domain { 238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
290 int id; /* domain id */ 239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
291 struct intel_iommu *iommu; /* back pointer to owning iommu */
292 240
293 struct list_head devices; /* all devices' list */ 241struct qi_desc {
294 struct iova_domain iovad; /* iova's that belong to this domain */ 242 u64 low, high;
243};
295 244
296 struct dma_pte *pgd; /* virtual address */ 245struct q_inval {
297 spinlock_t mapping_lock; /* page table lock */ 246 spinlock_t q_lock;
298 int gaw; /* max guest address width */ 247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
299 253
300 /* adjusted guest address width, 0 is level 2 30-bit */ 254#ifdef CONFIG_INTR_REMAP
301 int agaw; 255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
302 258
303#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 259#define INTR_REMAP_TABLE_ENTRIES 65536
304 int flags;
305};
306 260
307/* PCI domain-device relationship */ 261struct ir_table {
308struct device_domain_info { 262 struct irte *base;
309 struct list_head link; /* link to domain siblings */
310 struct list_head global; /* link to global list */
311 u8 bus; /* PCI bus numer */
312 u8 devfn; /* PCI devfn number */
313 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
314 struct dmar_domain *domain; /* pointer to domain */
315}; 263};
316 264#endif
317extern int init_dmars(void);
318 265
319struct intel_iommu { 266struct intel_iommu {
320 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
321 u64 cap; 268 u64 cap;
322 u64 ecap; 269 u64 ecap;
323 unsigned long *domain_ids; /* bitmap of domains */
324 struct dmar_domain **domains; /* ptr to domains */
325 int seg; 270 int seg;
326 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
327 spinlock_t lock; /* protect context, domain ids */
328 spinlock_t register_lock; /* protect register handling */ 272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
329 struct root_entry *root_entry; /* virtual address */ 279 struct root_entry *root_entry; /* virtual address */
330 280
331 unsigned int irq; 281 unsigned int irq;
332 unsigned char name[7]; /* Device Name */ 282 unsigned char name[7]; /* Device Name */
333 struct msi_msg saved_msg; 283 struct msi_msg saved_msg;
334 struct sys_device sysdev; 284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
335}; 290};
336 291
337#ifndef CONFIG_DMAR_GFX_WA 292static inline void __iommu_flush_cache(
338static inline void iommu_prepare_gfx_mapping(void) 293 struct intel_iommu *iommu, void *addr, int size)
339{ 294{
340 return; 295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
341} 297}
342#endif /* !CONFIG_DMAR_GFX_WA */
343 298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
344#endif 307#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 000000000000..bb642cc5e18c
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
1#include <linux/dmar.h>
2#include <linux/spinlock.h>
3#include <linux/jiffies.h>
4#include <linux/pci.h>
5#include <linux/irq.h>
6#include <asm/io_apic.h>
7#include "intel-iommu.h"
8#include "intr_remapping.h"
9
10static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num;
12int intr_remapping_enabled;
13
14static struct {
15 struct intel_iommu *iommu;
16 u16 irte_index;
17 u16 sub_handle;
18 u8 irte_mask;
19} irq_2_iommu[NR_IRQS];
20
21static DEFINE_SPINLOCK(irq_2_ir_lock);
22
23int irq_remapped(int irq)
24{
25 if (irq > NR_IRQS)
26 return 0;
27
28 if (!irq_2_iommu[irq].iommu)
29 return 0;
30
31 return 1;
32}
33
34int get_irte(int irq, struct irte *entry)
35{
36 int index;
37
38 if (!entry || irq > NR_IRQS)
39 return -1;
40
41 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) {
43 spin_unlock(&irq_2_ir_lock);
44 return -1;
45 }
46
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
49
50 spin_unlock(&irq_2_ir_lock);
51 return 0;
52}
53
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{
56 struct ir_table *table = iommu->ir_table;
57 u16 index, start_index;
58 unsigned int mask = 0;
59 int i;
60
61 if (!count)
62 return -1;
63
64 /*
65 * start the IRTE search from index 0.
66 */
67 index = start_index = 0;
68
69 if (count > 1) {
70 count = __roundup_pow_of_two(count);
71 mask = ilog2(count);
72 }
73
74 if (mask > ecap_max_handle_mask(iommu->ecap)) {
75 printk(KERN_ERR
76 "Requested mask %x exceeds the max invalidation handle"
77 " mask value %Lx\n", mask,
78 ecap_max_handle_mask(iommu->ecap));
79 return -1;
80 }
81
82 spin_lock(&irq_2_ir_lock);
83 do {
84 for (i = index; i < index + count; i++)
85 if (table->base[i].present)
86 break;
87 /* empty index found */
88 if (i == index + count)
89 break;
90
91 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
92
93 if (index == start_index) {
94 spin_unlock(&irq_2_ir_lock);
95 printk(KERN_ERR "can't allocate an IRTE\n");
96 return -1;
97 }
98 } while (1);
99
100 for (i = index; i < index + count; i++)
101 table->base[i].present = 1;
102
103 irq_2_iommu[irq].iommu = iommu;
104 irq_2_iommu[irq].irte_index = index;
105 irq_2_iommu[irq].sub_handle = 0;
106 irq_2_iommu[irq].irte_mask = mask;
107
108 spin_unlock(&irq_2_ir_lock);
109
110 return index;
111}
112
113static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
114{
115 struct qi_desc desc;
116
117 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
118 | QI_IEC_SELECTIVE;
119 desc.high = 0;
120
121 qi_submit_sync(&desc, iommu);
122}
123
124int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{
126 int index;
127
128 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
130 spin_unlock(&irq_2_ir_lock);
131 return -1;
132 }
133
134 *sub_handle = irq_2_iommu[irq].sub_handle;
135 index = irq_2_iommu[irq].irte_index;
136 spin_unlock(&irq_2_ir_lock);
137 return index;
138}
139
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{
142 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147
148 irq_2_iommu[irq].iommu = iommu;
149 irq_2_iommu[irq].irte_index = index;
150 irq_2_iommu[irq].sub_handle = subhandle;
151 irq_2_iommu[irq].irte_mask = 0;
152
153 spin_unlock(&irq_2_ir_lock);
154
155 return 0;
156}
157
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{
160 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
162 spin_unlock(&irq_2_ir_lock);
163 return -1;
164 }
165
166 irq_2_iommu[irq].iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0;
170
171 spin_unlock(&irq_2_ir_lock);
172
173 return 0;
174}
175
176int modify_irte(int irq, struct irte *irte_modified)
177{
178 int index;
179 struct irte *irte;
180 struct intel_iommu *iommu;
181
182 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
184 spin_unlock(&irq_2_ir_lock);
185 return -1;
186 }
187
188 iommu = irq_2_iommu[irq].iommu;
189
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
191 irte = &iommu->ir_table->base[index];
192
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
194 __iommu_flush_cache(iommu, irte, sizeof(*irte));
195
196 qi_flush_iec(iommu, index, 0);
197
198 spin_unlock(&irq_2_ir_lock);
199 return 0;
200}
201
202int flush_irte(int irq)
203{
204 int index;
205 struct intel_iommu *iommu;
206
207 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
209 spin_unlock(&irq_2_ir_lock);
210 return -1;
211 }
212
213 iommu = irq_2_iommu[irq].iommu;
214
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
216
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
218 spin_unlock(&irq_2_ir_lock);
219
220 return 0;
221}
222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
233struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
234{
235 struct dmar_drhd_unit *drhd;
236
237 drhd = dmar_find_matched_drhd_unit(dev);
238 if (!drhd)
239 return NULL;
240
241 return drhd->iommu;
242}
243
244int free_irte(int irq)
245{
246 int index, i;
247 struct irte *irte;
248 struct intel_iommu *iommu;
249
250 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
252 spin_unlock(&irq_2_ir_lock);
253 return -1;
254 }
255
256 iommu = irq_2_iommu[irq].iommu;
257
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
259 irte = &iommu->ir_table->base[index];
260
261 if (!irq_2_iommu[irq].sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
265 }
266
267 irq_2_iommu[irq].iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0;
271
272 spin_unlock(&irq_2_ir_lock);
273
274 return 0;
275}
276
277static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
278{
279 u64 addr;
280 u32 cmd, sts;
281 unsigned long flags;
282
283 addr = virt_to_phys((void *)iommu->ir_table->base);
284
285 spin_lock_irqsave(&iommu->register_lock, flags);
286
287 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
288 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
289
290 /* Set interrupt-remapping table pointer */
291 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
292 writel(cmd, iommu->reg + DMAR_GCMD_REG);
293
294 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
295 readl, (sts & DMA_GSTS_IRTPS), sts);
296 spin_unlock_irqrestore(&iommu->register_lock, flags);
297
298 /*
299 * global invalidation of interrupt entry cache before enabling
300 * interrupt-remapping.
301 */
302 qi_global_iec(iommu);
303
304 spin_lock_irqsave(&iommu->register_lock, flags);
305
306 /* Enable interrupt-remapping */
307 cmd = iommu->gcmd | DMA_GCMD_IRE;
308 iommu->gcmd |= DMA_GCMD_IRE;
309 writel(cmd, iommu->reg + DMAR_GCMD_REG);
310
311 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
312 readl, (sts & DMA_GSTS_IRES), sts);
313
314 spin_unlock_irqrestore(&iommu->register_lock, flags);
315}
316
317
318static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
319{
320 struct ir_table *ir_table;
321 struct page *pages;
322
323 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
324 GFP_KERNEL);
325
326 if (!iommu->ir_table)
327 return -ENOMEM;
328
329 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
330
331 if (!pages) {
332 printk(KERN_ERR "failed to allocate pages of order %d\n",
333 INTR_REMAP_PAGE_ORDER);
334 kfree(iommu->ir_table);
335 return -ENOMEM;
336 }
337
338 ir_table->base = page_address(pages);
339
340 iommu_set_intr_remapping(iommu, mode);
341 return 0;
342}
343
344int __init enable_intr_remapping(int eim)
345{
346 struct dmar_drhd_unit *drhd;
347 int setup = 0;
348
349 /*
350 * check for the Interrupt-remapping support
351 */
352 for_each_drhd_unit(drhd) {
353 struct intel_iommu *iommu = drhd->iommu;
354
355 if (!ecap_ir_support(iommu->ecap))
356 continue;
357
358 if (eim && !ecap_eim_support(iommu->ecap)) {
359 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
360 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
361 return -1;
362 }
363 }
364
365 /*
366 * Enable queued invalidation for all the DRHD's.
367 */
368 for_each_drhd_unit(drhd) {
369 int ret;
370 struct intel_iommu *iommu = drhd->iommu;
371 ret = dmar_enable_qi(iommu);
372
373 if (ret) {
374 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
375 " invalidation, ecap %Lx, ret %d\n",
376 drhd->reg_base_addr, iommu->ecap, ret);
377 return -1;
378 }
379 }
380
381 /*
382 * Setup Interrupt-remapping for all the DRHD's now.
383 */
384 for_each_drhd_unit(drhd) {
385 struct intel_iommu *iommu = drhd->iommu;
386
387 if (!ecap_ir_support(iommu->ecap))
388 continue;
389
390 if (setup_intr_remapping(iommu, eim))
391 goto error;
392
393 setup = 1;
394 }
395
396 if (!setup)
397 goto error;
398
399 intr_remapping_enabled = 1;
400
401 return 0;
402
403error:
404 /*
405 * handle error condition gracefully here!
406 */
407 return -1;
408}
409
410static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
411 struct intel_iommu *iommu)
412{
413 struct acpi_dmar_hardware_unit *drhd;
414 struct acpi_dmar_device_scope *scope;
415 void *start, *end;
416
417 drhd = (struct acpi_dmar_hardware_unit *)header;
418
419 start = (void *)(drhd + 1);
420 end = ((void *)drhd) + header->length;
421
422 while (start < end) {
423 scope = start;
424 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
425 if (ir_ioapic_num == MAX_IO_APICS) {
426 printk(KERN_WARNING "Exceeded Max IO APICS\n");
427 return -1;
428 }
429
430 printk(KERN_INFO "IOAPIC id %d under DRHD base"
431 " 0x%Lx\n", scope->enumeration_id,
432 drhd->address);
433
434 ir_ioapic[ir_ioapic_num].iommu = iommu;
435 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
436 ir_ioapic_num++;
437 }
438 start += scope->length;
439 }
440
441 return 0;
442}
443
444/*
445 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
446 * hardware unit.
447 */
448int __init parse_ioapics_under_ir(void)
449{
450 struct dmar_drhd_unit *drhd;
451 int ir_supported = 0;
452
453 for_each_drhd_unit(drhd) {
454 struct intel_iommu *iommu = drhd->iommu;
455
456 if (ecap_ir_support(iommu->ecap)) {
457 if (ir_parse_ioapic_scope(drhd->hdr, iommu))
458 return -1;
459
460 ir_supported = 1;
461 }
462 }
463
464 if (ir_supported && ir_ioapic_num != nr_ioapics) {
465 printk(KERN_WARNING
466 "Not all IO-APIC's listed under remapping hardware\n");
467 return -1;
468 }
469
470 return ir_supported;
471}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 000000000000..05f2635bbe4e
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
1#include "intel-iommu.h"
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6};
7
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index d0ace3d63a2d..bc2e364bdb10 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -9,6 +9,8 @@
9#include <asm/apicdef.h> 9#include <asm/apicdef.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/cpufeature.h>
13#include <asm/msr.h>
12 14
13#define ARCH_APICTIMER_STOPS_ON_C3 1 15#define ARCH_APICTIMER_STOPS_ON_C3 1
14 16
@@ -47,15 +49,13 @@ extern int disable_apic;
47#ifdef CONFIG_PARAVIRT 49#ifdef CONFIG_PARAVIRT
48#include <asm/paravirt.h> 50#include <asm/paravirt.h>
49#else 51#else
50#define apic_write native_apic_write
51#define apic_read native_apic_read
52#define setup_boot_clock setup_boot_APIC_clock 52#define setup_boot_clock setup_boot_APIC_clock
53#define setup_secondary_clock setup_secondary_APIC_clock 53#define setup_secondary_clock setup_secondary_APIC_clock
54#endif 54#endif
55 55
56extern int is_vsmp_box(void); 56extern int is_vsmp_box(void);
57 57
58static inline void native_apic_write(unsigned long reg, u32 v) 58static inline void native_apic_mem_write(u32 reg, u32 v)
59{ 59{
60 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 60 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
61 61
@@ -64,15 +64,68 @@ static inline void native_apic_write(unsigned long reg, u32 v)
64 ASM_OUTPUT2("0" (v), "m" (*addr))); 64 ASM_OUTPUT2("0" (v), "m" (*addr)));
65} 65}
66 66
67static inline u32 native_apic_read(unsigned long reg) 67static inline u32 native_apic_mem_read(u32 reg)
68{ 68{
69 return *((volatile u32 *)(APIC_BASE + reg)); 69 return *((volatile u32 *)(APIC_BASE + reg));
70} 70}
71 71
72extern void apic_wait_icr_idle(void); 72static inline void native_apic_msr_write(u32 reg, u32 v)
73extern u32 safe_apic_wait_icr_idle(void); 73{
74 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
75 reg == APIC_LVR)
76 return;
77
78 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
79}
80
81static inline u32 native_apic_msr_read(u32 reg)
82{
83 u32 low, high;
84
85 if (reg == APIC_DFR)
86 return -1;
87
88 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
89 return low;
90}
91
92#ifndef CONFIG_X86_32
93extern int x2apic, x2apic_preenabled;
94extern void check_x2apic(void);
95extern void enable_x2apic(void);
96extern void enable_IR_x2apic(void);
97extern void x2apic_icr_write(u32 low, u32 id);
98#endif
99
100struct apic_ops {
101 u32 (*read)(u32 reg);
102 void (*write)(u32 reg, u32 v);
103 u64 (*icr_read)(void);
104 void (*icr_write)(u32 low, u32 high);
105 void (*wait_icr_idle)(void);
106 u32 (*safe_wait_icr_idle)(void);
107};
108
109extern struct apic_ops *apic_ops;
110
111#define apic_read (apic_ops->read)
112#define apic_write (apic_ops->write)
113#define apic_icr_read (apic_ops->icr_read)
114#define apic_icr_write (apic_ops->icr_write)
115#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
116#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
117
74extern int get_physical_broadcast(void); 118extern int get_physical_broadcast(void);
75 119
120#ifdef CONFIG_X86_64
121static inline void ack_x2APIC_irq(void)
122{
123 /* Docs say use 0 for future compatibility */
124 native_apic_msr_write(APIC_EOI, 0);
125}
126#endif
127
128
76static inline void ack_APIC_irq(void) 129static inline void ack_APIC_irq(void)
77{ 130{
78 /* 131 /*
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index c40687da20fc..b922c85ac91d 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -105,6 +105,7 @@
105#define APIC_TMICT 0x380 105#define APIC_TMICT 0x380
106#define APIC_TMCCT 0x390 106#define APIC_TMCCT 0x390
107#define APIC_TDCR 0x3E0 107#define APIC_TDCR 0x3E0
108#define APIC_SELF_IPI 0x3F0
108#define APIC_TDR_DIV_TMBASE (1 << 2) 109#define APIC_TDR_DIV_TMBASE (1 << 2)
109#define APIC_TDR_DIV_1 0xB 110#define APIC_TDR_DIV_1 0xB
110#define APIC_TDR_DIV_2 0x0 111#define APIC_TDR_DIV_2 0x0
@@ -128,6 +129,8 @@
128#define APIC_EILVT3 0x530 129#define APIC_EILVT3 0x530
129 130
130#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 131#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
132#define APIC_BASE_MSR 0x800
133#define X2APIC_ENABLE (1UL << 10)
131 134
132#ifdef CONFIG_X86_32 135#ifdef CONFIG_X86_32
133# define MAX_IO_APICS 64 136# define MAX_IO_APICS 64
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 2f3143e45149..42afe9ca3a37 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -91,6 +91,7 @@
91#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 91#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
92#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 92#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
93#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 93#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
94#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
94 95
95/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 96/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
96#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ 97#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
@@ -189,6 +190,7 @@ extern const char * const x86_power_flags[32];
189#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) 190#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
190#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) 191#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
191#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) 192#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
193#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
192 194
193#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 195#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
194# define cpu_has_invlpg 1 196# define cpu_has_invlpg 1
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 381a09d19d58..128b9ec2d6f2 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -14,6 +14,7 @@
14 14
15struct genapic { 15struct genapic {
16 char *name; 16 char *name;
17 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
17 u32 int_delivery_mode; 18 u32 int_delivery_mode;
18 u32 int_dest_mode; 19 u32 int_dest_mode;
19 int (*apic_id_registered)(void); 20 int (*apic_id_registered)(void);
@@ -24,17 +25,24 @@ struct genapic {
24 void (*send_IPI_mask)(cpumask_t mask, int vector); 25 void (*send_IPI_mask)(cpumask_t mask, int vector);
25 void (*send_IPI_allbutself)(int vector); 26 void (*send_IPI_allbutself)(int vector);
26 void (*send_IPI_all)(int vector); 27 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector);
27 /* */ 29 /* */
28 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
29 unsigned int (*phys_pkg_id)(int index_msb); 31 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask;
30}; 35};
31 36
32extern struct genapic *genapic; 37extern struct genapic *genapic;
33 38
34extern struct genapic apic_flat; 39extern struct genapic apic_flat;
35extern struct genapic apic_physflat; 40extern struct genapic apic_physflat;
41extern struct genapic apic_x2apic_cluster;
42extern struct genapic apic_x2apic_phys;
36extern int acpi_madt_oem_check(char *, char *); 43extern int acpi_madt_oem_check(char *, char *);
37 44
45extern void apic_send_IPI_self(int vector);
38enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 46enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
39extern enum uv_system_type get_uv_system_type(void); 47extern enum uv_system_type get_uv_system_type(void);
40extern int is_uv_system(void); 48extern int is_uv_system(void);
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 3f3c465b915f..12f12b8eebfe 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -73,7 +73,9 @@ extern void enable_IO_APIC(void);
73#endif 73#endif
74 74
75/* IPI functions */ 75/* IPI functions */
76#ifdef CONFIG_X86_32
76extern void send_IPI_self(int vector); 77extern void send_IPI_self(int vector);
78#endif
77extern void send_IPI(int dest, int vector); 79extern void send_IPI(int dest, int vector);
78 80
79/* Statistics */ 81/* Statistics */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index c586559a6957..23c1b3baaecd 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 57
58extern struct irq_chip i8259A_chip; 58extern struct irq_chip i8259A_chip;
59 59
60extern void mask_8259A(void);
61extern void unmask_8259A(void);
62
60#endif /* ASM_X86__I8259_H */ 63#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index be62847ab07e..8ec68a50cf10 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
183extern int (*ioapic_renumber_irq)(int ioapic, int irq); 197extern int (*ioapic_renumber_irq)(int ioapic, int irq);
184extern void ioapic_init_mappings(void); 198extern void ioapic_init_mappings(void);
185 199
200#ifdef CONFIG_X86_64
201extern int save_mask_IO_APIC_setup(void);
202extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int);
204#endif
205
186#else /* !CONFIG_X86_IO_APIC */ 206#else /* !CONFIG_X86_IO_APIC */
187#define io_apic_assign_pci_irqs 0 207#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0; 208static const int timer_through_8259 = 0;
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index c1b226797518..30a692cfaff8 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
49 return SET_APIC_DEST_FIELD(mask); 49 return SET_APIC_DEST_FIELD(mask);
50} 50}
51 51
52static inline void __xapic_wait_icr_idle(void)
53{
54 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
55 cpu_relax();
56}
57
52static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, 58static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
53 unsigned int dest) 59 unsigned int dest)
54{ 60{
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
64 /* 70 /*
65 * Wait for idle. 71 * Wait for idle.
66 */ 72 */
67 apic_wait_icr_idle(); 73 __xapic_wait_icr_idle();
68 74
69 /* 75 /*
70 * No need to touch the target chip field 76 * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
74 /* 80 /*
75 * Send the IPI. The write to APIC_ICR fires this off. 81 * Send the IPI. The write to APIC_ICR fires this off.
76 */ 82 */
77 apic_write(APIC_ICR, cfg); 83 native_apic_mem_write(APIC_ICR, cfg);
78} 84}
79 85
80/* 86/*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
92 if (unlikely(vector == NMI_VECTOR)) 98 if (unlikely(vector == NMI_VECTOR))
93 safe_apic_wait_icr_idle(); 99 safe_apic_wait_icr_idle();
94 else 100 else
95 apic_wait_icr_idle(); 101 __xapic_wait_icr_idle();
96 102
97 /* 103 /*
98 * prepare target chip field 104 * prepare target chip field
99 */ 105 */
100 cfg = __prepare_ICR2(mask); 106 cfg = __prepare_ICR2(mask);
101 apic_write(APIC_ICR2, cfg); 107 native_apic_mem_write(APIC_ICR2, cfg);
102 108
103 /* 109 /*
104 * program the ICR 110 * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
108 /* 114 /*
109 * Send the IPI. The write to APIC_ICR fires this off. 115 * Send the IPI. The write to APIC_ICR fires this off.
110 */ 116 */
111 apic_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
112} 118}
113 119
114static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 000000000000..78242c6ffa58
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index b615f40736be..2a330a41b3dd 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define phys_pkg_id (genapic->phys_pkg_id) 31#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self)
33extern void setup_apic_routing(void); 35extern void setup_apic_routing(void);
34#else 36#else
35#define INT_DELIVERY_MODE dest_LowestPrio 37#define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
54 56
55static inline int apic_id_registered(void) 57static inline int apic_id_registered(void)
56{ 58{
57 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 59 return physid_isset(read_apic_id(), phys_cpu_present_map);
58} 60}
59 61
60static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index 936704f816d6..0c2d41c41b20 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
4#include <asm/apic.h> 4#include <asm/apic.h>
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7#define APIC_ID_MASK (0xFFu<<24) 7#define APIC_ID_MASK (genapic->apic_id_mask)
8#define GET_APIC_ID(x) (((x)>>24)&0xFFu) 8#define GET_APIC_ID(x) (genapic->get_apic_id(x))
9#define SET_APIC_ID(x) (((x)<<24)) 9#define SET_APIC_ID(x) (genapic->set_apic_id(x))
10#else 10#else
11#define APIC_ID_MASK (0xF<<24) 11#define APIC_ID_MASK (0xF<<24)
12static inline unsigned get_apic_id(unsigned long x) 12static inline unsigned get_apic_id(unsigned long x)
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index c1f6f682d619..ae877ec267f2 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
141extern unsigned int boot_cpu_physical_apicid; 141extern unsigned int boot_cpu_physical_apicid;
142static inline int check_phys_apicid_present(int cpu_physical_apicid) 142static inline int check_phys_apicid_present(int cpu_physical_apicid)
143{ 143{
144 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 144 boot_cpu_physical_apicid = read_apic_id();
145 return (1); 145 return (1);
146} 146}
147 147
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 3139666a94fa..ed9190246876 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -48,4 +48,8 @@
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50 50
51#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3)
53#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
54#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
51#endif /* ASM_X86__MSIDEF_H */ 55#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 2e6821a0b6e7..ff9a7e4bc3f6 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -200,12 +200,6 @@ struct pv_irq_ops {
200 200
201struct pv_apic_ops { 201struct pv_apic_ops {
202#ifdef CONFIG_X86_LOCAL_APIC 202#ifdef CONFIG_X86_LOCAL_APIC
203 /*
204 * Direct APIC operations, principally for VMI. Ideally
205 * these shouldn't be in this interface.
206 */
207 void (*apic_write)(unsigned long reg, u32 v);
208 u32 (*apic_read)(unsigned long reg);
209 void (*setup_boot_clock)(void); 203 void (*setup_boot_clock)(void);
210 void (*setup_secondary_clock)(void); 204 void (*setup_secondary_clock)(void);
211 205
@@ -898,19 +892,6 @@ static inline void slow_down_io(void)
898} 892}
899 893
900#ifdef CONFIG_X86_LOCAL_APIC 894#ifdef CONFIG_X86_LOCAL_APIC
901/*
902 * Basic functions accessing APICs.
903 */
904static inline void apic_write(unsigned long reg, u32 v)
905{
906 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
907}
908
909static inline u32 apic_read(unsigned long reg)
910{
911 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
912}
913
914static inline void setup_boot_clock(void) 895static inline void setup_boot_clock(void)
915{ 896{
916 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 897 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index cd3c5f03fcaf..88b075c6fe8f 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -165,30 +165,33 @@ extern int safe_smp_processor_id(void);
165 165
166#ifdef CONFIG_X86_LOCAL_APIC 166#ifdef CONFIG_X86_LOCAL_APIC
167 167
168#ifndef CONFIG_X86_64
168static inline int logical_smp_processor_id(void) 169static inline int logical_smp_processor_id(void)
169{ 170{
170 /* we don't want to mark this access volatile - bad code generation */ 171 /* we don't want to mark this access volatile - bad code generation */
171 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 172 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
172} 173}
173 174
174#ifndef CONFIG_X86_64 175#include <mach_apicdef.h>
175static inline unsigned int read_apic_id(void) 176static inline unsigned int read_apic_id(void)
176{ 177{
177 return *(u32 *)(APIC_BASE + APIC_ID); 178 unsigned int reg;
179
180 reg = *(u32 *)(APIC_BASE + APIC_ID);
181
182 return GET_APIC_ID(reg);
178} 183}
179#else
180extern unsigned int read_apic_id(void);
181#endif 184#endif
182 185
183 186
184# ifdef APIC_DEFINITION 187# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
185extern int hard_smp_processor_id(void); 188extern int hard_smp_processor_id(void);
186# else 189# else
187# include <mach_apicdef.h> 190#include <mach_apicdef.h>
188static inline int hard_smp_processor_id(void) 191static inline int hard_smp_processor_id(void)
189{ 192{
190 /* we don't want to mark this access volatile - bad code generation */ 193 /* we don't want to mark this access volatile - bad code generation */
191 return GET_APIC_ID(read_apic_id()); 194 return read_apic_id();
192} 195}
193# endif /* APIC_DEFINITION */ 196# endif /* APIC_DEFINITION */
194 197
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b847551..c360c558e59e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27 27
28#ifdef CONFIG_DMAR 28#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
29struct intel_iommu; 29struct intel_iommu;
30 30
31struct dmar_drhd_unit {
32 struct list_head list; /* list of drhd units */
33 struct acpi_dmar_header *hdr; /* ACPI header */
34 u64 reg_base_addr; /* register base address*/
35 struct pci_dev **devices; /* target device array */
36 int devices_cnt; /* target device count */
37 u8 ignored:1; /* ignore drhd */
38 u8 include_all:1;
39 struct intel_iommu *iommu;
40};
41
42extern struct list_head dmar_drhd_units;
43
44#define for_each_drhd_unit(drhd) \
45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void);
50
51/* Intel IOMMU detection */
52extern void detect_intel_iommu(void);
53
54
55extern int parse_ioapics_under_ir(void);
56extern int alloc_iommu(struct dmar_drhd_unit *);
57#else
58static inline void detect_intel_iommu(void)
59{
60 return;
61}
62
63static inline int dmar_table_init(void)
64{
65 return -ENODEV;
66}
67#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
68
69#ifdef CONFIG_INTR_REMAP
70extern int intr_remapping_enabled;
71extern int enable_intr_remapping(int);
72
73struct irte {
74 union {
75 struct {
76 __u64 present : 1,
77 fpd : 1,
78 dst_mode : 1,
79 redir_hint : 1,
80 trigger_mode : 1,
81 dlvry_mode : 3,
82 avail : 4,
83 __reserved_1 : 4,
84 vector : 8,
85 __reserved_2 : 8,
86 dest_id : 32;
87 };
88 __u64 low;
89 };
90
91 union {
92 struct {
93 __u64 sid : 16,
94 sq : 2,
95 svt : 2,
96 __reserved_3 : 44;
97 };
98 __u64 high;
99 };
100};
101extern int get_irte(int irq, struct irte *entry);
102extern int modify_irte(int irq, struct irte *irte_modified);
103extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
104extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
105 u16 sub_handle);
106extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
107extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
108extern int flush_irte(int irq);
109extern int free_irte(int irq);
110
111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
113extern struct intel_iommu *map_ioapic_to_ir(int apic);
114#else
115#define irq_remapped(irq) (0)
116#define enable_intr_remapping(mode) (-1)
117#define intr_remapping_enabled (0)
118#endif
119
120#ifdef CONFIG_DMAR
31extern const char *dmar_get_fault_reason(u8 fault_reason); 121extern const char *dmar_get_fault_reason(u8 fault_reason);
32 122
33/* Can't use the common MSI interrupt functions 123/* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
40extern int dmar_set_interrupt(struct intel_iommu *iommu); 130extern int dmar_set_interrupt(struct intel_iommu *iommu);
41extern int arch_setup_dmar_msi(unsigned int irq); 131extern int arch_setup_dmar_msi(unsigned int irq);
42 132
43/* Intel IOMMU detection and initialization functions */ 133extern int iommu_detected, no_iommu;
44extern void detect_intel_iommu(void);
45extern int intel_iommu_init(void);
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49
50extern struct list_head dmar_drhd_units;
51extern struct list_head dmar_rmrr_units; 134extern struct list_head dmar_rmrr_units;
52
53struct dmar_drhd_unit {
54 struct list_head list; /* list of drhd units */
55 u64 reg_base_addr; /* register base address*/
56 struct pci_dev **devices; /* target device array */
57 int devices_cnt; /* target device count */
58 u8 ignored:1; /* ignore drhd */
59 u8 include_all:1;
60 struct intel_iommu *iommu;
61};
62
63struct dmar_rmrr_unit { 135struct dmar_rmrr_unit {
64 struct list_head list; /* list of rmrr units */ 136 struct list_head list; /* list of rmrr units */
137 struct acpi_dmar_header *hdr; /* ACPI header */
65 u64 base_address; /* reserved base address*/ 138 u64 base_address; /* reserved base address*/
66 u64 end_address; /* reserved end address */ 139 u64 end_address; /* reserved end address */
67 struct pci_dev **devices; /* target devices */ 140 struct pci_dev **devices; /* target devices */
68 int devices_cnt; /* target device count */ 141 int devices_cnt; /* target device count */
69}; 142};
70 143
71#define for_each_drhd_unit(drhd) \
72 list_for_each_entry(drhd, &dmar_drhd_units, list)
73#define for_each_rmrr_units(rmrr) \ 144#define for_each_rmrr_units(rmrr) \
74 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
146/* Intel DMAR initialization functions */
147extern int intel_iommu_init(void);
148extern int dmar_disabled;
75#else 149#else
76static inline void detect_intel_iommu(void)
77{
78 return;
79}
80static inline int intel_iommu_init(void) 150static inline int intel_iommu_init(void)
81{ 151{
152#ifdef CONFIG_INTR_REMAP
153 return dmar_dev_scope_init();
154#else
82 return -ENODEV; 155 return -ENODEV;
156#endif
83} 157}
84
85#endif /* !CONFIG_DMAR */ 158#endif /* !CONFIG_DMAR */
86#endif /* __DMAR_H__ */ 159#endif /* __DMAR_H__ */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..8d9411bc60f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
65#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
65 66
66#ifdef CONFIG_IRQ_PER_CPU 67#ifdef CONFIG_IRQ_PER_CPU
67# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 68# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f8914b92b664..7d73e008fc3b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
89 set_balance_irq_affinity(irq, cpumask); 89 set_balance_irq_affinity(irq, cpumask);
90 90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 91#ifdef CONFIG_GENERIC_PENDING_IRQ
92 set_pending_irq(irq, cpumask); 92 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags;
94
95 spin_lock_irqsave(&desc->lock, flags);
96 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags);
98 } else
99 set_pending_irq(irq, cpumask);
93#else 100#else
94 desc->affinity = cpumask; 101 desc->affinity = cpumask;
95 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);