aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c42
-rw-r--r--arch/x86/kernel/apic_64.c239
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c2
-rw-r--r--arch/x86/kernel/genapic_64.c30
-rw-r--r--arch/x86/kernel/genapic_flat_64.c36
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c153
-rw-r--r--arch/x86/kernel/genx2apic_phys.c140
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c36
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c5
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c39
-rw-r--r--arch/x86/kernel/vmi_32.c6
-rw-r--r--arch/x86/lguest/boot.c39
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c3
-rw-r--r--arch/x86/mach-generic/numaq.c4
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/xen/enlighten.c46
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c401
-rw-r--r--drivers/pci/intel-iommu.c164
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--include/asm-x86/apic.h74
-rw-r--r--include/asm-x86/apicdef.h3
-rw-r--r--include/asm-x86/cpufeature.h2
-rw-r--r--include/asm-x86/genapic_64.h7
-rw-r--r--include/asm-x86/hw_irq.h2
-rw-r--r--include/asm-x86/i8259.h3
-rw-r--r--include/asm-x86/io_apic.h20
-rw-r--r--include/asm-x86/ipi.h16
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/mach_apicdef.h6
-rw-r--r--include/asm-x86/mach-es7000/mach_apic.h2
-rw-r--r--include/asm-x86/msidef.h4
-rw-r--r--include/asm-x86/paravirt.h25
-rw-r--r--include/asm-x86/smp.h17
-rw-r--r--include/linux/dmar.h127
-rw-r--r--include/linux/irq.h1
-rw-r--r--kernel/irq/manage.c9
52 files changed, 2767 insertions, 490 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 09ad7450647b..556b4187d016 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1388,6 +1388,12 @@ and is between 256 and 4096 characters. It is defined in the file
1388 1388
1389 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1389 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1390 1390
1391 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1392
1393 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1394 default x2apic cluster mode on platforms
1395 supporting x2apic.
1396
1391 noltlbs [PPC] Do not use large page/tlb entries for kernel 1397 noltlbs [PPC] Do not use large page/tlb entries for kernel
1392 lowmem mapping on PPC40x. 1398 lowmem mapping on PPC40x.
1393 1399
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 96e0c2ebc388..baca55455005 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1650,6 +1650,14 @@ config DMAR_FLOPPY_WA
1650 workaround will setup a 1:1 mapping for the first 1650 workaround will setup a 1:1 mapping for the first
1651 16M to make floppy (an ISA device) work. 1651 16M to make floppy (an ISA device) work.
1652 1652
1653config INTR_REMAP
1654 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1655 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1656 help
1657 Supports Interrupt remapping for IO-APIC and MSI devices.
1658 To use x2apic mode in the CPU's which support x2APIC enhancements or
1659 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1660
1653source "drivers/pci/pcie/Kconfig" 1661source "drivers/pci/pcie/Kconfig"
1654 1662
1655source "drivers/pci/Kconfig" 1663source "drivers/pci/Kconfig"
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index da140611bb57..673f1d12b420 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -102,6 +102,8 @@ obj-$(CONFIG_OLPC) += olpc.o
102# 64 bit specific files 102# 64 bit specific files
103ifeq ($(CONFIG_X86_64),y) 103ifeq ($(CONFIG_X86_64),y)
104 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 104 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
105 obj-y += genx2apic_cluster.o
106 obj-y += genx2apic_phys.o
105 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
106 obj-$(CONFIG_AUDIT) += audit_64.o 108 obj-$(CONFIG_AUDIT) += audit_64.o
107 109
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f489d7a9be92..b41b27af33e6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
761 761
762 set_fixmap_nocache(FIX_APIC_BASE, address); 762 set_fixmap_nocache(FIX_APIC_BASE, address);
763 if (boot_cpu_physical_apicid == -1U) { 763 if (boot_cpu_physical_apicid == -1U) {
764 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 764 boot_cpu_physical_apicid = read_apic_id();
765#ifdef CONFIG_X86_32 765#ifdef CONFIG_X86_32
766 apic_version[boot_cpu_physical_apicid] = 766 apic_version[boot_cpu_physical_apicid] =
767 GET_APIC_VERSION(apic_read(APIC_LVR)); 767 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
1337 acpi_ioapic = 1; 1337 acpi_ioapic = 1;
1338 1338
1339 smp_found_config = 1; 1339 smp_found_config = 1;
1340#ifdef CONFIG_X86_32
1340 setup_apic_routing(); 1341 setup_apic_routing();
1342#endif
1341 } 1343 }
1342 } 1344 }
1343 if (error == -EINVAL) { 1345 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a437d027f20b..34101962fb0e 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 145 return lapic_get_version() >= 0x14;
146} 146}
147 147
148void apic_wait_icr_idle(void) 148/*
149 * Paravirt kernels also might be using these below ops. So we still
150 * use generic apic_read()/apic_write(), which might be pointing to different
151 * ops in PARAVIRT case.
152 */
153void xapic_wait_icr_idle(void)
149{ 154{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 155 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 156 cpu_relax();
152} 157}
153 158
154u32 safe_apic_wait_icr_idle(void) 159u32 safe_xapic_wait_icr_idle(void)
155{ 160{
156 u32 send_status; 161 u32 send_status;
157 int timeout; 162 int timeout;
@@ -167,6 +172,35 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 172 return send_status;
168} 173}
169 174
175void xapic_icr_write(u32 low, u32 id)
176{
177 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
178 apic_write_around(APIC_ICR, low);
179}
180
181u64 xapic_icr_read(void)
182{
183 u32 icr1, icr2;
184
185 icr2 = apic_read(APIC_ICR2);
186 icr1 = apic_read(APIC_ICR);
187
188 return icr1 | ((u64)icr2 << 32);
189}
190
191static struct apic_ops xapic_ops = {
192 .read = native_apic_mem_read,
193 .write = native_apic_mem_write,
194 .write_atomic = native_apic_mem_write_atomic,
195 .icr_read = xapic_icr_read,
196 .icr_write = xapic_icr_write,
197 .wait_icr_idle = xapic_wait_icr_idle,
198 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
199};
200
201struct apic_ops __read_mostly *apic_ops = &xapic_ops;
202EXPORT_SYMBOL_GPL(apic_ops);
203
170/** 204/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 205 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 206 */
@@ -1201,7 +1235,7 @@ void __init init_apic_mappings(void)
1201 * default configuration (or the MP table is broken). 1235 * default configuration (or the MP table is broken).
1202 */ 1236 */
1203 if (boot_cpu_physical_apicid == -1U) 1237 if (boot_cpu_physical_apicid == -1U)
1204 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1238 boot_cpu_physical_apicid = read_apic_id();
1205 1239
1206} 1240}
1207 1241
@@ -1241,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
1241 * might be zero if read from MP tables. Get it from LAPIC. 1275 * might be zero if read from MP tables. Get it from LAPIC.
1242 */ 1276 */
1243#ifdef CONFIG_CRASH_DUMP 1277#ifdef CONFIG_CRASH_DUMP
1244 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1278 boot_cpu_physical_apicid = read_apic_id();
1245#endif 1279#endif
1246 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1280 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1247 1281
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 1e3d32e27c14..c75f58a66d8e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,6 +40,7 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
@@ -46,6 +48,11 @@
46static int disable_apic_timer __cpuinitdata; 48static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 49static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 50int disable_apic;
51int disable_x2apic;
52int x2apic;
53
54/* x2apic enabled before OS handover */
55int x2apic_preenabled;
49 56
50/* Local APIC timer works in C2 */ 57/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 58int local_apic_timer_c2_ok;
@@ -119,13 +126,13 @@ static int modern_apic(void)
119 return lapic_get_version() >= 0x14; 126 return lapic_get_version() >= 0x14;
120} 127}
121 128
122void apic_wait_icr_idle(void) 129void xapic_wait_icr_idle(void)
123{ 130{
124 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 131 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
125 cpu_relax(); 132 cpu_relax();
126} 133}
127 134
128u32 safe_apic_wait_icr_idle(void) 135u32 safe_xapic_wait_icr_idle(void)
129{ 136{
130 u32 send_status; 137 u32 send_status;
131 int timeout; 138 int timeout;
@@ -141,6 +148,71 @@ u32 safe_apic_wait_icr_idle(void)
141 return send_status; 148 return send_status;
142} 149}
143 150
151void xapic_icr_write(u32 low, u32 id)
152{
153 apic_write(APIC_ICR2, id << 24);
154 apic_write(APIC_ICR, low);
155}
156
157u64 xapic_icr_read(void)
158{
159 u32 icr1, icr2;
160
161 icr2 = apic_read(APIC_ICR2);
162 icr1 = apic_read(APIC_ICR);
163
164 return (icr1 | ((u64)icr2 << 32));
165}
166
167static struct apic_ops xapic_ops = {
168 .read = native_apic_mem_read,
169 .write = native_apic_mem_write,
170 .write_atomic = native_apic_mem_write_atomic,
171 .icr_read = xapic_icr_read,
172 .icr_write = xapic_icr_write,
173 .wait_icr_idle = xapic_wait_icr_idle,
174 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
175};
176
177struct apic_ops __read_mostly *apic_ops = &xapic_ops;
178
179EXPORT_SYMBOL_GPL(apic_ops);
180
181static void x2apic_wait_icr_idle(void)
182{
183 /* no need to wait for icr idle in x2apic */
184 return;
185}
186
187static u32 safe_x2apic_wait_icr_idle(void)
188{
189 /* no need to wait for icr idle in x2apic */
190 return 0;
191}
192
193void x2apic_icr_write(u32 low, u32 id)
194{
195 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
196}
197
198u64 x2apic_icr_read(void)
199{
200 unsigned long val;
201
202 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
203 return val;
204}
205
206static struct apic_ops x2apic_ops = {
207 .read = native_apic_msr_read,
208 .write = native_apic_msr_write,
209 .write_atomic = native_apic_msr_write,
210 .icr_read = x2apic_icr_read,
211 .icr_write = x2apic_icr_write,
212 .wait_icr_idle = x2apic_wait_icr_idle,
213 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
214};
215
144/** 216/**
145 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 217 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
146 */ 218 */
@@ -626,10 +698,10 @@ int __init verify_local_APIC(void)
626 /* 698 /*
627 * The ID register is read/write in a real APIC. 699 * The ID register is read/write in a real APIC.
628 */ 700 */
629 reg0 = read_apic_id(); 701 reg0 = apic_read(APIC_ID);
630 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 702 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
631 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 703 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
632 reg1 = read_apic_id(); 704 reg1 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 705 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
634 apic_write(APIC_ID, reg0); 706 apic_write(APIC_ID, reg0);
635 if (reg1 != (reg0 ^ APIC_ID_MASK)) 707 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -830,6 +902,125 @@ void __cpuinit end_local_APIC_setup(void)
830 apic_pm_activate(); 902 apic_pm_activate();
831} 903}
832 904
905void check_x2apic(void)
906{
907 int msr, msr2;
908
909 rdmsr(MSR_IA32_APICBASE, msr, msr2);
910
911 if (msr & X2APIC_ENABLE) {
912 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
913 x2apic_preenabled = x2apic = 1;
914 apic_ops = &x2apic_ops;
915 }
916}
917
918void enable_x2apic(void)
919{
920 int msr, msr2;
921
922 rdmsr(MSR_IA32_APICBASE, msr, msr2);
923 if (!(msr & X2APIC_ENABLE)) {
924 printk("Enabling x2apic\n");
925 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
926 }
927}
928
929void enable_IR_x2apic(void)
930{
931#ifdef CONFIG_INTR_REMAP
932 int ret;
933 unsigned long flags;
934
935 if (!cpu_has_x2apic)
936 return;
937
938 if (!x2apic_preenabled && disable_x2apic) {
939 printk(KERN_INFO
940 "Skipped enabling x2apic and Interrupt-remapping "
941 "because of nox2apic\n");
942 return;
943 }
944
945 if (x2apic_preenabled && disable_x2apic)
946 panic("Bios already enabled x2apic, can't enforce nox2apic");
947
948 if (!x2apic_preenabled && skip_ioapic_setup) {
949 printk(KERN_INFO
950 "Skipped enabling x2apic and Interrupt-remapping "
951 "because of skipping io-apic setup\n");
952 return;
953 }
954
955 ret = dmar_table_init();
956 if (ret) {
957 printk(KERN_INFO
958 "dmar_table_init() failed with %d:\n", ret);
959
960 if (x2apic_preenabled)
961 panic("x2apic enabled by bios. But IR enabling failed");
962 else
963 printk(KERN_INFO
964 "Not enabling x2apic,Intr-remapping\n");
965 return;
966 }
967
968 local_irq_save(flags);
969 mask_8259A();
970 save_mask_IO_APIC_setup();
971
972 ret = enable_intr_remapping(1);
973
974 if (ret && x2apic_preenabled) {
975 local_irq_restore(flags);
976 panic("x2apic enabled by bios. But IR enabling failed");
977 }
978
979 if (ret)
980 goto end;
981
982 if (!x2apic) {
983 x2apic = 1;
984 apic_ops = &x2apic_ops;
985 enable_x2apic();
986 }
987end:
988 if (ret)
989 /*
990 * IR enabling failed
991 */
992 restore_IO_APIC_setup();
993 else
994 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
995
996 unmask_8259A();
997 local_irq_restore(flags);
998
999 if (!ret) {
1000 if (!x2apic_preenabled)
1001 printk(KERN_INFO
1002 "Enabled x2apic and interrupt-remapping\n");
1003 else
1004 printk(KERN_INFO
1005 "Enabled Interrupt-remapping\n");
1006 } else
1007 printk(KERN_ERR
1008 "Failed to enable Interrupt-remapping and x2apic\n");
1009#else
1010 if (!cpu_has_x2apic)
1011 return;
1012
1013 if (x2apic_preenabled)
1014 panic("x2apic enabled prior OS handover,"
1015 " enable CONFIG_INTR_REMAP");
1016
1017 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1018 " and x2apic\n");
1019#endif
1020
1021 return;
1022}
1023
833/* 1024/*
834 * Detect and enable local APICs on non-SMP boards. 1025 * Detect and enable local APICs on non-SMP boards.
835 * Original code written by Keir Fraser. 1026 * Original code written by Keir Fraser.
@@ -869,7 +1060,7 @@ void __init early_init_lapic_mapping(void)
869 * Fetch the APIC ID of the BSP in case we have a 1060 * Fetch the APIC ID of the BSP in case we have a
870 * default configuration (or the MP table is broken). 1061 * default configuration (or the MP table is broken).
871 */ 1062 */
872 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1063 boot_cpu_physical_apicid = read_apic_id();
873} 1064}
874 1065
875/** 1066/**
@@ -877,6 +1068,11 @@ void __init early_init_lapic_mapping(void)
877 */ 1068 */
878void __init init_apic_mappings(void) 1069void __init init_apic_mappings(void)
879{ 1070{
1071 if (x2apic) {
1072 boot_cpu_physical_apicid = read_apic_id();
1073 return;
1074 }
1075
880 /* 1076 /*
881 * If no local APIC can be found then set up a fake all 1077 * If no local APIC can be found then set up a fake all
882 * zeroes page to simulate the local APIC and another 1078 * zeroes page to simulate the local APIC and another
@@ -896,7 +1092,7 @@ void __init init_apic_mappings(void)
896 * Fetch the APIC ID of the BSP in case we have a 1092 * Fetch the APIC ID of the BSP in case we have a
897 * default configuration (or the MP table is broken). 1093 * default configuration (or the MP table is broken).
898 */ 1094 */
899 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1095 boot_cpu_physical_apicid = read_apic_id();
900} 1096}
901 1097
902/* 1098/*
@@ -915,6 +1111,9 @@ int __init APIC_init_uniprocessor(void)
915 return -1; 1111 return -1;
916 } 1112 }
917 1113
1114 enable_IR_x2apic();
1115 setup_apic_routing();
1116
918 verify_local_APIC(); 1117 verify_local_APIC();
919 1118
920 connect_bsp_APIC(); 1119 connect_bsp_APIC();
@@ -1096,6 +1295,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1096 cpu_set(cpu, cpu_present_map); 1295 cpu_set(cpu, cpu_present_map);
1097} 1296}
1098 1297
1298int hard_smp_processor_id(void)
1299{
1300 return read_apic_id();
1301}
1302
1099/* 1303/*
1100 * Power management 1304 * Power management
1101 */ 1305 */
@@ -1132,7 +1336,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1132 1336
1133 maxlvt = lapic_get_maxlvt(); 1337 maxlvt = lapic_get_maxlvt();
1134 1338
1135 apic_pm_state.apic_id = read_apic_id(); 1339 apic_pm_state.apic_id = apic_read(APIC_ID);
1136 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1340 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1137 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1341 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1138 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1342 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1167,10 +1371,14 @@ static int lapic_resume(struct sys_device *dev)
1167 maxlvt = lapic_get_maxlvt(); 1371 maxlvt = lapic_get_maxlvt();
1168 1372
1169 local_irq_save(flags); 1373 local_irq_save(flags);
1170 rdmsr(MSR_IA32_APICBASE, l, h); 1374 if (!x2apic) {
1171 l &= ~MSR_IA32_APICBASE_BASE; 1375 rdmsr(MSR_IA32_APICBASE, l, h);
1172 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1376 l &= ~MSR_IA32_APICBASE_BASE;
1173 wrmsr(MSR_IA32_APICBASE, l, h); 1377 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1378 wrmsr(MSR_IA32_APICBASE, l, h);
1379 } else
1380 enable_x2apic();
1381
1174 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1382 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1175 apic_write(APIC_ID, apic_pm_state.apic_id); 1383 apic_write(APIC_ID, apic_pm_state.apic_id);
1176 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1384 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1310,6 +1518,15 @@ __cpuinit int apic_is_clustered_box(void)
1310 return (clusters > 2); 1518 return (clusters > 2);
1311} 1519}
1312 1520
1521static __init int setup_nox2apic(char *str)
1522{
1523 disable_x2apic = 1;
1524 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1525 return 0;
1526}
1527early_param("nox2apic", setup_nox2apic);
1528
1529
1313/* 1530/*
1314 * APIC command line parameters 1531 * APIC command line parameters
1315 */ 1532 */
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 7b8cc72feb40..c6bee77ca9e6 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -608,6 +608,8 @@ void __cpuinit cpu_init(void)
608 barrier(); 608 barrier();
609 609
610 check_efer(); 610 check_efer();
611 if (cpu != 0 && x2apic)
612 enable_x2apic();
611 613
612 /* 614 /*
613 * set up and load the per-CPU TSS 615 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..0bf4d37a0483 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
45 /* Intel-defined (#2) */ 45 /* Intel-defined (#2) */
46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
48 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", 48 NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50 50
51 /* VIA/Cyrix/Centaur-defined */ 51 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..3940d8161f8b 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,6 +16,7 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
@@ -29,6 +30,15 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
33static int x2apic_phys = 0;
34
35static int set_x2apic_phys_mode(char *arg)
36{
37 x2apic_phys = 1;
38 return 0;
39}
40early_param("x2apic_phys", set_x2apic_phys_mode);
41
32static enum uv_system_type uv_system_type; 42static enum uv_system_type uv_system_type;
33 43
34/* 44/*
@@ -38,7 +48,12 @@ void __init setup_apic_routing(void)
38{ 48{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 49 if (uv_system_type == UV_NON_UNIQUE_APIC)
40 genapic = &apic_x2apic_uv_x; 50 genapic = &apic_x2apic_uv_x;
41 else 51 else if (cpu_has_x2apic && intr_remapping_enabled) {
52 if (x2apic_phys)
53 genapic = &apic_x2apic_phys;
54 else
55 genapic = &apic_x2apic_cluster;
56 } else
42#ifdef CONFIG_ACPI 57#ifdef CONFIG_ACPI
43 /* 58 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode 59 * Quirk: some x86_64 machines can only use physical APIC mode
@@ -61,7 +76,7 @@ void __init setup_apic_routing(void)
61 76
62/* Same for both flat and physical. */ 77/* Same for both flat and physical. */
63 78
64void send_IPI_self(int vector) 79void apic_send_IPI_self(int vector)
65{ 80{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 81 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 82}
@@ -79,17 +94,6 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
79 return 0; 94 return 0;
80} 95}
81 96
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void) 97enum uv_system_type get_uv_system_type(void)
94{ 98{
95 return uv_system_type; 99 return uv_system_type;
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 1a9c68845ee8..2c973cbf054f 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,11 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
21 23
22static cpumask_t flat_target_cpus(void) 24static cpumask_t flat_target_cpus(void)
23{ 25{
@@ -95,9 +97,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 97 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 98}
97 99
100static unsigned int get_apic_id(unsigned long x)
101{
102 unsigned int id;
103
104 id = (((x)>>24) & 0xFFu);
105 return id;
106}
107
108static unsigned long set_apic_id(unsigned int id)
109{
110 unsigned long x;
111
112 x = ((id & 0xFFu)<<24);
113 return x;
114}
115
116static unsigned int read_xapic_id(void)
117{
118 unsigned int id;
119
120 id = get_apic_id(apic_read(APIC_ID));
121 return id;
122}
123
98static int flat_apic_id_registered(void) 124static int flat_apic_id_registered(void)
99{ 125{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 126 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 127}
102 128
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 129static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -121,8 +147,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 147 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 148 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 149 .send_IPI_mask = flat_send_IPI_mask,
150 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 151 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 152 .phys_pkg_id = phys_pkg_id,
153 .get_apic_id = get_apic_id,
154 .set_apic_id = set_apic_id,
155 .apic_id_mask = (0xFFu<<24),
126}; 156};
127 157
128/* 158/*
@@ -185,6 +215,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 215 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 216 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 217 .send_IPI_mask = physflat_send_IPI_mask,
218 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 219 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 220 .phys_pkg_id = phys_pkg_id,
221 .get_apic_id = get_apic_id,
222 .set_apic_id = set_apic_id,
223 .apic_id_mask = (0xFFu<<24),
190}; 224};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..40bc0140d89f
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,153 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <asm/smp.h>
8#include <asm/ipi.h>
9#include <asm/genapic.h>
10
11DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
12
13/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
14
15static cpumask_t x2apic_target_cpus(void)
16{
17 return cpumask_of_cpu(0);
18}
19
20/*
21 * for now each logical cpu is in its own vector allocation domain.
22 */
23static cpumask_t x2apic_vector_allocation_domain(int cpu)
24{
25 cpumask_t domain = CPU_MASK_NONE;
26 cpu_set(cpu, domain);
27 return domain;
28}
29
30static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
31 unsigned int dest)
32{
33 unsigned long cfg;
34
35 cfg = __prepare_ICR(0, vector, dest);
36
37 /*
38 * send the IPI.
39 */
40 x2apic_icr_write(cfg, apicid);
41}
42
43/*
44 * for now, we send the IPI's one by one in the cpumask.
45 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
46 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
47 * writes.
48 */
49static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
50{
51 unsigned long flags;
52 unsigned long query_cpu;
53
54 local_irq_save(flags);
55 for_each_cpu_mask(query_cpu, mask) {
56 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
57 vector, APIC_DEST_LOGICAL);
58 }
59 local_irq_restore(flags);
60}
61
62static void x2apic_send_IPI_allbutself(int vector)
63{
64 cpumask_t mask = cpu_online_map;
65
66 cpu_clear(smp_processor_id(), mask);
67
68 if (!cpus_empty(mask))
69 x2apic_send_IPI_mask(mask, vector);
70}
71
72static void x2apic_send_IPI_all(int vector)
73{
74 x2apic_send_IPI_mask(cpu_online_map, vector);
75}
76
77static int x2apic_apic_id_registered(void)
78{
79 return 1;
80}
81
82static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
83{
84 int cpu;
85
86 /*
87 * We're using fixed IRQ delivery, can only return one phys APIC ID.
88 * May as well be the first.
89 */
90 cpu = first_cpu(cpumask);
91 if ((unsigned)cpu < NR_CPUS)
92 return per_cpu(x86_cpu_to_logical_apicid, cpu);
93 else
94 return BAD_APICID;
95}
96
97static unsigned int get_apic_id(unsigned long x)
98{
99 unsigned int id;
100
101 id = x;
102 return id;
103}
104
105static unsigned long set_apic_id(unsigned int id)
106{
107 unsigned long x;
108
109 x = id;
110 return x;
111}
112
113static unsigned int x2apic_read_id(void)
114{
115 return apic_read(APIC_ID);
116}
117
118static unsigned int phys_pkg_id(int index_msb)
119{
120 return x2apic_read_id() >> index_msb;
121}
122
123static void x2apic_send_IPI_self(int vector)
124{
125 apic_write(APIC_SELF_IPI, vector);
126}
127
128static void init_x2apic_ldr(void)
129{
130 int cpu = smp_processor_id();
131
132 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
133 return;
134}
135
136struct genapic apic_x2apic_cluster = {
137 .name = "cluster x2apic",
138 .int_delivery_mode = dest_LowestPrio,
139 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
140 .target_cpus = x2apic_target_cpus,
141 .vector_allocation_domain = x2apic_vector_allocation_domain,
142 .apic_id_registered = x2apic_apic_id_registered,
143 .init_apic_ldr = init_x2apic_ldr,
144 .send_IPI_all = x2apic_send_IPI_all,
145 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
146 .send_IPI_mask = x2apic_send_IPI_mask,
147 .send_IPI_self = x2apic_send_IPI_self,
148 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
149 .phys_pkg_id = phys_pkg_id,
150 .get_apic_id = get_apic_id,
151 .set_apic_id = set_apic_id,
152 .apic_id_mask = (0xFFFFFFFFu),
153};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..2f3c6ca19de9
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,140 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <asm/smp.h>
8#include <asm/ipi.h>
9#include <asm/genapic.h>
10
11
12/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
13
14static cpumask_t x2apic_target_cpus(void)
15{
16 return cpumask_of_cpu(0);
17}
18
19static cpumask_t x2apic_vector_allocation_domain(int cpu)
20{
21 cpumask_t domain = CPU_MASK_NONE;
22 cpu_set(cpu, domain);
23 return domain;
24}
25
26static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
27 unsigned int dest)
28{
29 unsigned long cfg;
30
31 cfg = __prepare_ICR(0, vector, dest);
32
33 /*
34 * send the IPI.
35 */
36 x2apic_icr_write(cfg, apicid);
37}
38
39static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
40{
41 unsigned long flags;
42 unsigned long query_cpu;
43
44 local_irq_save(flags);
45 for_each_cpu_mask(query_cpu, mask) {
46 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
47 vector, APIC_DEST_PHYSICAL);
48 }
49 local_irq_restore(flags);
50}
51
52static void x2apic_send_IPI_allbutself(int vector)
53{
54 cpumask_t mask = cpu_online_map;
55
56 cpu_clear(smp_processor_id(), mask);
57
58 if (!cpus_empty(mask))
59 x2apic_send_IPI_mask(mask, vector);
60}
61
62static void x2apic_send_IPI_all(int vector)
63{
64 x2apic_send_IPI_mask(cpu_online_map, vector);
65}
66
67static int x2apic_apic_id_registered(void)
68{
69 return 1;
70}
71
72static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
73{
74 int cpu;
75
76 /*
77 * We're using fixed IRQ delivery, can only return one phys APIC ID.
78 * May as well be the first.
79 */
80 cpu = first_cpu(cpumask);
81 if ((unsigned)cpu < NR_CPUS)
82 return per_cpu(x86_cpu_to_apicid, cpu);
83 else
84 return BAD_APICID;
85}
86
87static unsigned int get_apic_id(unsigned long x)
88{
89 unsigned int id;
90
91 id = x;
92 return id;
93}
94
95static unsigned long set_apic_id(unsigned int id)
96{
97 unsigned long x;
98
99 x = id;
100 return x;
101}
102
103static unsigned int x2apic_read_id(void)
104{
105 return apic_read(APIC_ID);
106}
107
108static unsigned int phys_pkg_id(int index_msb)
109{
110 return x2apic_read_id() >> index_msb;
111}
112
113void x2apic_send_IPI_self(int vector)
114{
115 apic_write(APIC_SELF_IPI, vector);
116}
117
118void init_x2apic_ldr(void)
119{
120 return;
121}
122
123struct genapic apic_x2apic_phys = {
124 .name = "physical x2apic",
125 .int_delivery_mode = dest_Fixed,
126 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
127 .target_cpus = x2apic_target_cpus,
128 .vector_allocation_domain = x2apic_vector_allocation_domain,
129 .apic_id_registered = x2apic_apic_id_registered,
130 .init_apic_ldr = init_x2apic_ldr,
131 .send_IPI_all = x2apic_send_IPI_all,
132 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
133 .send_IPI_mask = x2apic_send_IPI_mask,
134 .send_IPI_self = x2apic_send_IPI_self,
135 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
136 .phys_pkg_id = phys_pkg_id,
137 .get_apic_id = get_apic_id,
138 .set_apic_id = set_apic_id,
139 .apic_id_mask = (0xFFFFFFFFu),
140};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 711f11c30b06..3ca29cd8c23c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -18,6 +18,7 @@
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/bootmem.h> 19#include <linux/bootmem.h>
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/hardirq.h>
21#include <asm/smp.h> 22#include <asm/smp.h>
22#include <asm/ipi.h> 23#include <asm/ipi.h>
23#include <asm/genapic.h> 24#include <asm/genapic.h>
@@ -119,6 +120,10 @@ static int uv_apic_id_registered(void)
119 return 1; 120 return 1;
120} 121}
121 122
123static void uv_init_apic_ldr(void)
124{
125}
126
122static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 127static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
123{ 128{
124 int cpu; 129 int cpu;
@@ -134,9 +139,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
134 return BAD_APICID; 139 return BAD_APICID;
135} 140}
136 141
142static unsigned int get_apic_id(unsigned long x)
143{
144 unsigned int id;
145
146 WARN_ON(preemptible() && num_online_cpus() > 1);
147 id = x | __get_cpu_var(x2apic_extra_bits);
148
149 return id;
150}
151
152static long set_apic_id(unsigned int id)
153{
154 unsigned long x;
155
156 /* maskout x2apic_extra_bits ? */
157 x = id;
158 return x;
159}
160
161static unsigned int uv_read_apic_id(void)
162{
163
164 return get_apic_id(apic_read(APIC_ID));
165}
166
137static unsigned int phys_pkg_id(int index_msb) 167static unsigned int phys_pkg_id(int index_msb)
138{ 168{
139 return GET_APIC_ID(read_apic_id()) >> index_msb; 169 return uv_read_apic_id() >> index_msb;
140} 170}
141 171
142#ifdef ZZZ /* Needs x2apic patch */ 172#ifdef ZZZ /* Needs x2apic patch */
@@ -153,12 +183,16 @@ struct genapic apic_x2apic_uv_x = {
153 .target_cpus = uv_target_cpus, 183 .target_cpus = uv_target_cpus,
154 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 184 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
155 .apic_id_registered = uv_apic_id_registered, 185 .apic_id_registered = uv_apic_id_registered,
186 .init_apic_ldr = uv_init_apic_ldr,
156 .send_IPI_all = uv_send_IPI_all, 187 .send_IPI_all = uv_send_IPI_all,
157 .send_IPI_allbutself = uv_send_IPI_allbutself, 188 .send_IPI_allbutself = uv_send_IPI_allbutself,
158 .send_IPI_mask = uv_send_IPI_mask, 189 .send_IPI_mask = uv_send_IPI_mask,
159 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 190 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
160 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 191 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
161 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 192 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
193 .get_apic_id = get_apic_id,
194 .set_apic_id = set_apic_id,
195 .apic_id_mask = (0xFFFFFFFFu),
162}; 196};
163 197
164static __cpuinit void set_x2apic_extra_bits(int pnode) 198static __cpuinit void set_x2apic_extra_bits(int pnode)
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 558abf4c796a..a82065b0699e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1494 smp_processor_id(), hard_smp_processor_id()); 1494 smp_processor_id(), hard_smp_processor_id());
1495 v = apic_read(APIC_ID); 1495 v = apic_read(APIC_ID);
1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1497 GET_APIC_ID(read_apic_id())); 1497 GET_APIC_ID(v));
1498 v = apic_read(APIC_LVR); 1498 v = apic_read(APIC_LVR);
1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1500 ver = GET_APIC_VERSION(v); 1500 ver = GET_APIC_VERSION(v);
@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void)
1702 entry.dest_mode = 0; /* Physical */ 1702 entry.dest_mode = 0; /* Physical */
1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1704 entry.vector = 0; 1704 entry.vector = 0;
1705 entry.dest.physical.physical_dest = 1705 entry.dest.physical.physical_dest = read_apic_id();
1706 GET_APIC_ID(read_apic_id());
1707 1706
1708 /* 1707 /*
1709 * Add it to the IO-APIC irq-routing table: 1708 * Add it to the IO-APIC irq-routing table:
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 6510cde36b35..39f0be37e9a1 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -48,6 +49,7 @@
48#include <asm/nmi.h> 49#include <asm/nmi.h>
49#include <asm/msidef.h> 50#include <asm/msidef.h>
50#include <asm/hypertransport.h> 51#include <asm/hypertransport.h>
52#include <asm/irq_remapping.h>
51 53
52#include <mach_ipi.h> 54#include <mach_ipi.h>
53#include <mach_apic.h> 55#include <mach_apic.h>
@@ -107,6 +109,9 @@ DEFINE_SPINLOCK(vector_lock);
107 */ 109 */
108int nr_ioapic_registers[MAX_IO_APICS]; 110int nr_ioapic_registers[MAX_IO_APICS];
109 111
112/* I/O APIC RTE contents at the OS boot up */
113struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
114
110/* I/O APIC entries */ 115/* I/O APIC entries */
111struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 116struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
112int nr_ioapics; 117int nr_ioapics;
@@ -302,7 +307,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
302 pin = entry->pin; 307 pin = entry->pin;
303 if (pin == -1) 308 if (pin == -1)
304 break; 309 break;
305 io_apic_write(apic, 0x11 + pin*2, dest); 310 /*
311 * With interrupt-remapping, destination information comes
312 * from interrupt-remapping table entry.
313 */
314 if (!irq_remapped(irq))
315 io_apic_write(apic, 0x11 + pin*2, dest);
306 reg = io_apic_read(apic, 0x10 + pin*2); 316 reg = io_apic_read(apic, 0x10 + pin*2);
307 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 317 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
308 reg |= vector; 318 reg |= vector;
@@ -439,6 +449,69 @@ static void clear_IO_APIC (void)
439 clear_IO_APIC_pin(apic, pin); 449 clear_IO_APIC_pin(apic, pin);
440} 450}
441 451
452/*
453 * Saves and masks all the unmasked IO-APIC RTE's
454 */
455int save_mask_IO_APIC_setup(void)
456{
457 union IO_APIC_reg_01 reg_01;
458 unsigned long flags;
459 int apic, pin;
460
461 /*
462 * The number of IO-APIC IRQ registers (== #pins):
463 */
464 for (apic = 0; apic < nr_ioapics; apic++) {
465 spin_lock_irqsave(&ioapic_lock, flags);
466 reg_01.raw = io_apic_read(apic, 1);
467 spin_unlock_irqrestore(&ioapic_lock, flags);
468 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
469 }
470
471 for (apic = 0; apic < nr_ioapics; apic++) {
472 early_ioapic_entries[apic] =
473 kzalloc(sizeof(struct IO_APIC_route_entry) *
474 nr_ioapic_registers[apic], GFP_KERNEL);
475 if (!early_ioapic_entries[apic])
476 return -ENOMEM;
477 }
478
479 for (apic = 0; apic < nr_ioapics; apic++)
480 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
481 struct IO_APIC_route_entry entry;
482
483 entry = early_ioapic_entries[apic][pin] =
484 ioapic_read_entry(apic, pin);
485 if (!entry.mask) {
486 entry.mask = 1;
487 ioapic_write_entry(apic, pin, entry);
488 }
489 }
490 return 0;
491}
492
493void restore_IO_APIC_setup(void)
494{
495 int apic, pin;
496
497 for (apic = 0; apic < nr_ioapics; apic++)
498 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
499 ioapic_write_entry(apic, pin,
500 early_ioapic_entries[apic][pin]);
501}
502
503void reinit_intr_remapped_IO_APIC(int intr_remapping)
504{
505 /*
506 * for now plain restore of previous settings.
507 * TBD: In the case of OS enabling interrupt-remapping,
508 * IO-APIC RTE's need to be setup to point to interrupt-remapping
509 * table entries. for now, do a plain restore, and wait for
510 * the setup_IO_APIC_irqs() to do proper initialization.
511 */
512 restore_IO_APIC_setup();
513}
514
442int skip_ioapic_setup; 515int skip_ioapic_setup;
443int ioapic_force; 516int ioapic_force;
444 517
@@ -833,18 +906,98 @@ void setup_vector_irq(int cpu)
833 906
834 907
835static struct irq_chip ioapic_chip; 908static struct irq_chip ioapic_chip;
909#ifdef CONFIG_INTR_REMAP
910static struct irq_chip ir_ioapic_chip;
911#endif
836 912
837static void ioapic_register_intr(int irq, unsigned long trigger) 913static void ioapic_register_intr(int irq, unsigned long trigger)
838{ 914{
839 if (trigger) { 915 if (trigger)
840 irq_desc[irq].status |= IRQ_LEVEL; 916 irq_desc[irq].status |= IRQ_LEVEL;
841 set_irq_chip_and_handler_name(irq, &ioapic_chip, 917 else
842 handle_fasteoi_irq, "fasteoi");
843 } else {
844 irq_desc[irq].status &= ~IRQ_LEVEL; 918 irq_desc[irq].status &= ~IRQ_LEVEL;
919
920#ifdef CONFIG_INTR_REMAP
921 if (irq_remapped(irq)) {
922 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
923 if (trigger)
924 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
925 handle_fasteoi_irq,
926 "fasteoi");
927 else
928 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
929 handle_edge_irq, "edge");
930 return;
931 }
932#endif
933 if (trigger)
934 set_irq_chip_and_handler_name(irq, &ioapic_chip,
935 handle_fasteoi_irq,
936 "fasteoi");
937 else
845 set_irq_chip_and_handler_name(irq, &ioapic_chip, 938 set_irq_chip_and_handler_name(irq, &ioapic_chip,
846 handle_edge_irq, "edge"); 939 handle_edge_irq, "edge");
940}
941
942static int setup_ioapic_entry(int apic, int irq,
943 struct IO_APIC_route_entry *entry,
944 unsigned int destination, int trigger,
945 int polarity, int vector)
946{
947 /*
948 * add it to the IO-APIC irq-routing table:
949 */
950 memset(entry,0,sizeof(*entry));
951
952#ifdef CONFIG_INTR_REMAP
953 if (intr_remapping_enabled) {
954 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
955 struct irte irte;
956 struct IR_IO_APIC_route_entry *ir_entry =
957 (struct IR_IO_APIC_route_entry *) entry;
958 int index;
959
960 if (!iommu)
961 panic("No mapping iommu for ioapic %d\n", apic);
962
963 index = alloc_irte(iommu, irq, 1);
964 if (index < 0)
965 panic("Failed to allocate IRTE for ioapic %d\n", apic);
966
967 memset(&irte, 0, sizeof(irte));
968
969 irte.present = 1;
970 irte.dst_mode = INT_DEST_MODE;
971 irte.trigger_mode = trigger;
972 irte.dlvry_mode = INT_DELIVERY_MODE;
973 irte.vector = vector;
974 irte.dest_id = IRTE_DEST(destination);
975
976 modify_irte(irq, &irte);
977
978 ir_entry->index2 = (index >> 15) & 0x1;
979 ir_entry->zero = 0;
980 ir_entry->format = 1;
981 ir_entry->index = (index & 0x7fff);
982 } else
983#endif
984 {
985 entry->delivery_mode = INT_DELIVERY_MODE;
986 entry->dest_mode = INT_DEST_MODE;
987 entry->dest = destination;
847 } 988 }
989
990 entry->mask = 0; /* enable IRQ */
991 entry->trigger = trigger;
992 entry->polarity = polarity;
993 entry->vector = vector;
994
995 /* Mask level triggered irqs.
996 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
997 */
998 if (trigger)
999 entry->mask = 1;
1000 return 0;
848} 1001}
849 1002
850static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1003static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -869,24 +1022,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
869 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1022 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
870 irq, trigger, polarity); 1023 irq, trigger, polarity);
871 1024
872 /*
873 * add it to the IO-APIC irq-routing table:
874 */
875 memset(&entry,0,sizeof(entry));
876 1025
877 entry.delivery_mode = INT_DELIVERY_MODE; 1026 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
878 entry.dest_mode = INT_DEST_MODE; 1027 cpu_mask_to_apicid(mask), trigger, polarity,
879 entry.dest = cpu_mask_to_apicid(mask); 1028 cfg->vector)) {
880 entry.mask = 0; /* enable IRQ */ 1029 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
881 entry.trigger = trigger; 1030 mp_ioapics[apic].mp_apicid, pin);
882 entry.polarity = polarity; 1031 __clear_irq_vector(irq);
883 entry.vector = cfg->vector; 1032 return;
884 1033 }
885 /* Mask level triggered irqs.
886 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
887 */
888 if (trigger)
889 entry.mask = 1;
890 1034
891 ioapic_register_intr(irq, trigger); 1035 ioapic_register_intr(irq, trigger);
892 if (irq < 16) 1036 if (irq < 16)
@@ -938,6 +1082,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
938{ 1082{
939 struct IO_APIC_route_entry entry; 1083 struct IO_APIC_route_entry entry;
940 1084
1085 if (intr_remapping_enabled)
1086 return;
1087
941 memset(&entry, 0, sizeof(entry)); 1088 memset(&entry, 0, sizeof(entry));
942 1089
943 /* 1090 /*
@@ -1084,6 +1231,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1084void __apicdebuginit print_local_APIC(void * dummy) 1231void __apicdebuginit print_local_APIC(void * dummy)
1085{ 1232{
1086 unsigned int v, ver, maxlvt; 1233 unsigned int v, ver, maxlvt;
1234 unsigned long icr;
1087 1235
1088 if (apic_verbosity == APIC_QUIET) 1236 if (apic_verbosity == APIC_QUIET)
1089 return; 1237 return;
@@ -1091,7 +1239,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1091 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1239 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1092 smp_processor_id(), hard_smp_processor_id()); 1240 smp_processor_id(), hard_smp_processor_id());
1093 v = apic_read(APIC_ID); 1241 v = apic_read(APIC_ID);
1094 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1242 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1095 v = apic_read(APIC_LVR); 1243 v = apic_read(APIC_LVR);
1096 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1244 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1097 ver = GET_APIC_VERSION(v); 1245 ver = GET_APIC_VERSION(v);
@@ -1127,10 +1275,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1127 v = apic_read(APIC_ESR); 1275 v = apic_read(APIC_ESR);
1128 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1276 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1129 1277
1130 v = apic_read(APIC_ICR); 1278 icr = apic_icr_read();
1131 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1279 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1132 v = apic_read(APIC_ICR2); 1280 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1133 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1134 1281
1135 v = apic_read(APIC_LVTT); 1282 v = apic_read(APIC_LVTT);
1136 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1283 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1285,7 +1432,7 @@ void disable_IO_APIC(void)
1285 entry.dest_mode = 0; /* Physical */ 1432 entry.dest_mode = 0; /* Physical */
1286 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1433 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1287 entry.vector = 0; 1434 entry.vector = 0;
1288 entry.dest = GET_APIC_ID(read_apic_id()); 1435 entry.dest = read_apic_id();
1289 1436
1290 /* 1437 /*
1291 * Add it to the IO-APIC irq-routing table: 1438 * Add it to the IO-APIC irq-routing table:
@@ -1393,6 +1540,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1393 */ 1540 */
1394 1541
1395#ifdef CONFIG_SMP 1542#ifdef CONFIG_SMP
1543
1544#ifdef CONFIG_INTR_REMAP
1545static void ir_irq_migration(struct work_struct *work);
1546
1547static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1548
1549/*
1550 * Migrate the IO-APIC irq in the presence of intr-remapping.
1551 *
1552 * For edge triggered, irq migration is a simple atomic update(of vector
1553 * and cpu destination) of IRTE and flush the hardware cache.
1554 *
1555 * For level triggered, we need to modify the io-apic RTE aswell with the update
1556 * vector information, along with modifying IRTE with vector and destination.
1557 * So irq migration for level triggered is little bit more complex compared to
1558 * edge triggered migration. But the good news is, we use the same algorithm
1559 * for level triggered migration as we have today, only difference being,
1560 * we now initiate the irq migration from process context instead of the
1561 * interrupt context.
1562 *
1563 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1564 * suppression) to the IO-APIC, level triggered irq migration will also be
1565 * as simple as edge triggered migration and we can do the irq migration
1566 * with a simple atomic update to IO-APIC RTE.
1567 */
1568static void migrate_ioapic_irq(int irq, cpumask_t mask)
1569{
1570 struct irq_cfg *cfg = irq_cfg + irq;
1571 struct irq_desc *desc = irq_desc + irq;
1572 cpumask_t tmp, cleanup_mask;
1573 struct irte irte;
1574 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1575 unsigned int dest;
1576 unsigned long flags;
1577
1578 cpus_and(tmp, mask, cpu_online_map);
1579 if (cpus_empty(tmp))
1580 return;
1581
1582 if (get_irte(irq, &irte))
1583 return;
1584
1585 if (assign_irq_vector(irq, mask))
1586 return;
1587
1588 cpus_and(tmp, cfg->domain, mask);
1589 dest = cpu_mask_to_apicid(tmp);
1590
1591 if (modify_ioapic_rte) {
1592 spin_lock_irqsave(&ioapic_lock, flags);
1593 __target_IO_APIC_irq(irq, dest, cfg->vector);
1594 spin_unlock_irqrestore(&ioapic_lock, flags);
1595 }
1596
1597 irte.vector = cfg->vector;
1598 irte.dest_id = IRTE_DEST(dest);
1599
1600 /*
1601 * Modified the IRTE and flushes the Interrupt entry cache.
1602 */
1603 modify_irte(irq, &irte);
1604
1605 if (cfg->move_in_progress) {
1606 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1607 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1608 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1609 cfg->move_in_progress = 0;
1610 }
1611
1612 irq_desc[irq].affinity = mask;
1613}
1614
1615static int migrate_irq_remapped_level(int irq)
1616{
1617 int ret = -1;
1618
1619 mask_IO_APIC_irq(irq);
1620
1621 if (io_apic_level_ack_pending(irq)) {
1622 /*
1623 * Interrupt in progress. Migrating irq now will change the
1624 * vector information in the IO-APIC RTE and that will confuse
1625 * the EOI broadcast performed by cpu.
1626 * So, delay the irq migration to the next instance.
1627 */
1628 schedule_delayed_work(&ir_migration_work, 1);
1629 goto unmask;
1630 }
1631
1632 /* everthing is clear. we have right of way */
1633 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1634
1635 ret = 0;
1636 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1637 cpus_clear(irq_desc[irq].pending_mask);
1638
1639unmask:
1640 unmask_IO_APIC_irq(irq);
1641 return ret;
1642}
1643
1644static void ir_irq_migration(struct work_struct *work)
1645{
1646 int irq;
1647
1648 for (irq = 0; irq < NR_IRQS; irq++) {
1649 struct irq_desc *desc = irq_desc + irq;
1650 if (desc->status & IRQ_MOVE_PENDING) {
1651 unsigned long flags;
1652
1653 spin_lock_irqsave(&desc->lock, flags);
1654 if (!desc->chip->set_affinity ||
1655 !(desc->status & IRQ_MOVE_PENDING)) {
1656 desc->status &= ~IRQ_MOVE_PENDING;
1657 spin_unlock_irqrestore(&desc->lock, flags);
1658 continue;
1659 }
1660
1661 desc->chip->set_affinity(irq,
1662 irq_desc[irq].pending_mask);
1663 spin_unlock_irqrestore(&desc->lock, flags);
1664 }
1665 }
1666}
1667
1668/*
1669 * Migrates the IRQ destination in the process context.
1670 */
1671static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1672{
1673 if (irq_desc[irq].status & IRQ_LEVEL) {
1674 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1675 irq_desc[irq].pending_mask = mask;
1676 migrate_irq_remapped_level(irq);
1677 return;
1678 }
1679
1680 migrate_ioapic_irq(irq, mask);
1681}
1682#endif
1683
1396asmlinkage void smp_irq_move_cleanup_interrupt(void) 1684asmlinkage void smp_irq_move_cleanup_interrupt(void)
1397{ 1685{
1398 unsigned vector, me; 1686 unsigned vector, me;
@@ -1449,6 +1737,17 @@ static void irq_complete_move(unsigned int irq)
1449#else 1737#else
1450static inline void irq_complete_move(unsigned int irq) {} 1738static inline void irq_complete_move(unsigned int irq) {}
1451#endif 1739#endif
1740#ifdef CONFIG_INTR_REMAP
1741static void ack_x2apic_level(unsigned int irq)
1742{
1743 ack_x2APIC_irq();
1744}
1745
1746static void ack_x2apic_edge(unsigned int irq)
1747{
1748 ack_x2APIC_irq();
1749}
1750#endif
1452 1751
1453static void ack_apic_edge(unsigned int irq) 1752static void ack_apic_edge(unsigned int irq)
1454{ 1753{
@@ -1523,6 +1822,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1523 .retrigger = ioapic_retrigger_irq, 1822 .retrigger = ioapic_retrigger_irq,
1524}; 1823};
1525 1824
1825#ifdef CONFIG_INTR_REMAP
1826static struct irq_chip ir_ioapic_chip __read_mostly = {
1827 .name = "IR-IO-APIC",
1828 .startup = startup_ioapic_irq,
1829 .mask = mask_IO_APIC_irq,
1830 .unmask = unmask_IO_APIC_irq,
1831 .ack = ack_x2apic_edge,
1832 .eoi = ack_x2apic_level,
1833#ifdef CONFIG_SMP
1834 .set_affinity = set_ir_ioapic_affinity_irq,
1835#endif
1836 .retrigger = ioapic_retrigger_irq,
1837};
1838#endif
1839
1526static inline void init_IO_APIC_traps(void) 1840static inline void init_IO_APIC_traps(void)
1527{ 1841{
1528 int irq; 1842 int irq;
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
1707 * 8259A. 2021 * 8259A.
1708 */ 2022 */
1709 if (pin1 == -1) { 2023 if (pin1 == -1) {
2024 if (intr_remapping_enabled)
2025 panic("BIOS bug: timer not connected to IO-APIC");
1710 pin1 = pin2; 2026 pin1 = pin2;
1711 apic1 = apic2; 2027 apic1 = apic2;
1712 no_pin1 = 1; 2028 no_pin1 = 1;
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
1733 clear_IO_APIC_pin(0, pin1); 2049 clear_IO_APIC_pin(0, pin1);
1734 goto out; 2050 goto out;
1735 } 2051 }
2052 if (intr_remapping_enabled)
2053 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1736 clear_IO_APIC_pin(apic1, pin1); 2054 clear_IO_APIC_pin(apic1, pin1);
1737 if (!no_pin1) 2055 if (!no_pin1)
1738 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " 2056 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: "
@@ -1969,6 +2287,9 @@ void destroy_irq(unsigned int irq)
1969 2287
1970 dynamic_irq_cleanup(irq); 2288 dynamic_irq_cleanup(irq);
1971 2289
2290#ifdef CONFIG_INTR_REMAP
2291 free_irte(irq);
2292#endif
1972 spin_lock_irqsave(&vector_lock, flags); 2293 spin_lock_irqsave(&vector_lock, flags);
1973 __clear_irq_vector(irq); 2294 __clear_irq_vector(irq);
1974 spin_unlock_irqrestore(&vector_lock, flags); 2295 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1987,11 +2308,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1987 2308
1988 tmp = TARGET_CPUS; 2309 tmp = TARGET_CPUS;
1989 err = assign_irq_vector(irq, tmp); 2310 err = assign_irq_vector(irq, tmp);
1990 if (!err) { 2311 if (err)
1991 cpus_and(tmp, cfg->domain, tmp); 2312 return err;
1992 dest = cpu_mask_to_apicid(tmp); 2313
2314 cpus_and(tmp, cfg->domain, tmp);
2315 dest = cpu_mask_to_apicid(tmp);
2316
2317#ifdef CONFIG_INTR_REMAP
2318 if (irq_remapped(irq)) {
2319 struct irte irte;
2320 int ir_index;
2321 u16 sub_handle;
2322
2323 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2324 BUG_ON(ir_index == -1);
2325
2326 memset (&irte, 0, sizeof(irte));
2327
2328 irte.present = 1;
2329 irte.dst_mode = INT_DEST_MODE;
2330 irte.trigger_mode = 0; /* edge */
2331 irte.dlvry_mode = INT_DELIVERY_MODE;
2332 irte.vector = cfg->vector;
2333 irte.dest_id = IRTE_DEST(dest);
2334
2335 modify_irte(irq, &irte);
1993 2336
1994 msg->address_hi = MSI_ADDR_BASE_HI; 2337 msg->address_hi = MSI_ADDR_BASE_HI;
2338 msg->data = sub_handle;
2339 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2340 MSI_ADDR_IR_SHV |
2341 MSI_ADDR_IR_INDEX1(ir_index) |
2342 MSI_ADDR_IR_INDEX2(ir_index);
2343 } else
2344#endif
2345 {
2346 msg->address_hi = MSI_ADDR_BASE_HI;
1995 msg->address_lo = 2347 msg->address_lo =
1996 MSI_ADDR_BASE_LO | 2348 MSI_ADDR_BASE_LO |
1997 ((INT_DEST_MODE == 0) ? 2349 ((INT_DEST_MODE == 0) ?
@@ -2041,6 +2393,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2041 write_msi_msg(irq, &msg); 2393 write_msi_msg(irq, &msg);
2042 irq_desc[irq].affinity = mask; 2394 irq_desc[irq].affinity = mask;
2043} 2395}
2396
2397#ifdef CONFIG_INTR_REMAP
2398/*
2399 * Migrate the MSI irq to another cpumask. This migration is
2400 * done in the process context using interrupt-remapping hardware.
2401 */
2402static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2403{
2404 struct irq_cfg *cfg = irq_cfg + irq;
2405 unsigned int dest;
2406 cpumask_t tmp, cleanup_mask;
2407 struct irte irte;
2408
2409 cpus_and(tmp, mask, cpu_online_map);
2410 if (cpus_empty(tmp))
2411 return;
2412
2413 if (get_irte(irq, &irte))
2414 return;
2415
2416 if (assign_irq_vector(irq, mask))
2417 return;
2418
2419 cpus_and(tmp, cfg->domain, mask);
2420 dest = cpu_mask_to_apicid(tmp);
2421
2422 irte.vector = cfg->vector;
2423 irte.dest_id = IRTE_DEST(dest);
2424
2425 /*
2426 * atomically update the IRTE with the new destination and vector.
2427 */
2428 modify_irte(irq, &irte);
2429
2430 /*
2431 * After this point, all the interrupts will start arriving
2432 * at the new destination. So, time to cleanup the previous
2433 * vector allocation.
2434 */
2435 if (cfg->move_in_progress) {
2436 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2437 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2438 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2439 cfg->move_in_progress = 0;
2440 }
2441
2442 irq_desc[irq].affinity = mask;
2443}
2444#endif
2044#endif /* CONFIG_SMP */ 2445#endif /* CONFIG_SMP */
2045 2446
2046/* 2447/*
@@ -2058,26 +2459,157 @@ static struct irq_chip msi_chip = {
2058 .retrigger = ioapic_retrigger_irq, 2459 .retrigger = ioapic_retrigger_irq,
2059}; 2460};
2060 2461
2061int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2462#ifdef CONFIG_INTR_REMAP
2463static struct irq_chip msi_ir_chip = {
2464 .name = "IR-PCI-MSI",
2465 .unmask = unmask_msi_irq,
2466 .mask = mask_msi_irq,
2467 .ack = ack_x2apic_edge,
2468#ifdef CONFIG_SMP
2469 .set_affinity = ir_set_msi_irq_affinity,
2470#endif
2471 .retrigger = ioapic_retrigger_irq,
2472};
2473
2474/*
2475 * Map the PCI dev to the corresponding remapping hardware unit
2476 * and allocate 'nvec' consecutive interrupt-remapping table entries
2477 * in it.
2478 */
2479static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2480{
2481 struct intel_iommu *iommu;
2482 int index;
2483
2484 iommu = map_dev_to_ir(dev);
2485 if (!iommu) {
2486 printk(KERN_ERR
2487 "Unable to map PCI %s to iommu\n", pci_name(dev));
2488 return -ENOENT;
2489 }
2490
2491 index = alloc_irte(iommu, irq, nvec);
2492 if (index < 0) {
2493 printk(KERN_ERR
2494 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2495 pci_name(dev));
2496 return -ENOSPC;
2497 }
2498 return index;
2499}
2500#endif
2501
2502static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2062{ 2503{
2504 int ret;
2063 struct msi_msg msg; 2505 struct msi_msg msg;
2506
2507 ret = msi_compose_msg(dev, irq, &msg);
2508 if (ret < 0)
2509 return ret;
2510
2511 set_irq_msi(irq, desc);
2512 write_msi_msg(irq, &msg);
2513
2514#ifdef CONFIG_INTR_REMAP
2515 if (irq_remapped(irq)) {
2516 struct irq_desc *desc = irq_desc + irq;
2517 /*
2518 * irq migration in process context
2519 */
2520 desc->status |= IRQ_MOVE_PCNTXT;
2521 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2522 } else
2523#endif
2524 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2525
2526 return 0;
2527}
2528
2529int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2530{
2064 int irq, ret; 2531 int irq, ret;
2532
2065 irq = create_irq(); 2533 irq = create_irq();
2066 if (irq < 0) 2534 if (irq < 0)
2067 return irq; 2535 return irq;
2068 2536
2069 ret = msi_compose_msg(dev, irq, &msg); 2537#ifdef CONFIG_INTR_REMAP
2538 if (!intr_remapping_enabled)
2539 goto no_ir;
2540
2541 ret = msi_alloc_irte(dev, irq, 1);
2542 if (ret < 0)
2543 goto error;
2544no_ir:
2545#endif
2546 ret = setup_msi_irq(dev, desc, irq);
2070 if (ret < 0) { 2547 if (ret < 0) {
2071 destroy_irq(irq); 2548 destroy_irq(irq);
2072 return ret; 2549 return ret;
2073 } 2550 }
2551 return 0;
2074 2552
2075 set_irq_msi(irq, desc); 2553#ifdef CONFIG_INTR_REMAP
2076 write_msi_msg(irq, &msg); 2554error:
2555 destroy_irq(irq);
2556 return ret;
2557#endif
2558}
2077 2559
2078 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2560int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2561{
2562 int irq, ret, sub_handle;
2563 struct msi_desc *desc;
2564#ifdef CONFIG_INTR_REMAP
2565 struct intel_iommu *iommu = 0;
2566 int index = 0;
2567#endif
2079 2568
2569 sub_handle = 0;
2570 list_for_each_entry(desc, &dev->msi_list, list) {
2571 irq = create_irq();
2572 if (irq < 0)
2573 return irq;
2574#ifdef CONFIG_INTR_REMAP
2575 if (!intr_remapping_enabled)
2576 goto no_ir;
2577
2578 if (!sub_handle) {
2579 /*
2580 * allocate the consecutive block of IRTE's
2581 * for 'nvec'
2582 */
2583 index = msi_alloc_irte(dev, irq, nvec);
2584 if (index < 0) {
2585 ret = index;
2586 goto error;
2587 }
2588 } else {
2589 iommu = map_dev_to_ir(dev);
2590 if (!iommu) {
2591 ret = -ENOENT;
2592 goto error;
2593 }
2594 /*
2595 * setup the mapping between the irq and the IRTE
2596 * base index, the sub_handle pointing to the
2597 * appropriate interrupt remap table entry.
2598 */
2599 set_irte_irq(irq, iommu, index, sub_handle);
2600 }
2601no_ir:
2602#endif
2603 ret = setup_msi_irq(dev, desc, irq);
2604 if (ret < 0)
2605 goto error;
2606 sub_handle++;
2607 }
2080 return 0; 2608 return 0;
2609
2610error:
2611 destroy_irq(irq);
2612 return ret;
2081} 2613}
2082 2614
2083void arch_teardown_msi_irq(unsigned int irq) 2615void arch_teardown_msi_irq(unsigned int irq)
@@ -2325,6 +2857,10 @@ void __init setup_ioapic_dest(void)
2325 setup_IO_APIC_irq(ioapic, pin, irq, 2857 setup_IO_APIC_irq(ioapic, pin, irq,
2326 irq_trigger(irq_entry), 2858 irq_trigger(irq_entry),
2327 irq_polarity(irq_entry)); 2859 irq_polarity(irq_entry));
2860#ifdef CONFIG_INTR_REMAP
2861 else if (intr_remapping_enabled)
2862 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2863#endif
2328 else 2864 else
2329 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2865 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2330 } 2866 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3b25e49380c6..70e1f3e287fb 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -545,7 +545,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
545 generic_bigsmp_probe(); 545 generic_bigsmp_probe();
546#endif 546#endif
547 547
548#ifdef CONFIG_X86_32
548 setup_apic_routing(); 549 setup_apic_routing();
550#endif
549 if (!num_processors) 551 if (!num_processors)
550 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 552 printk(KERN_ERR "MPTABLE: no processors registered!\n");
551 return num_processors; 553 return num_processors;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e0f571d58c19..e0f139106c7e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -360,9 +360,6 @@ struct pv_cpu_ops pv_cpu_ops = {
360 360
361struct pv_apic_ops pv_apic_ops = { 361struct pv_apic_ops pv_apic_ops = {
362#ifdef CONFIG_X86_LOCAL_APIC 362#ifdef CONFIG_X86_LOCAL_APIC
363 .apic_write = native_apic_write,
364 .apic_write_atomic = native_apic_write_atomic,
365 .apic_read = native_apic_read,
366 .setup_boot_clock = setup_boot_APIC_clock, 363 .setup_boot_clock = setup_boot_APIC_clock,
367 .setup_secondary_clock = setup_secondary_APIC_clock, 364 .setup_secondary_clock = setup_secondary_APIC_clock,
368 .startup_ipi_hook = paravirt_nop, 365 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 531b55b8e81a..6121ffd46b9e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -735,6 +735,8 @@ void __init setup_arch(char **cmdline_p)
735 num_physpages = max_pfn; 735 num_physpages = max_pfn;
736 736
737 check_efer(); 737 check_efer();
738 if (cpu_has_x2apic)
739 check_x2apic();
738 740
739 /* How many end-of-memory variables you have, grandma! */ 741 /* How many end-of-memory variables you have, grandma! */
740 /* need this before calling reserve_initrd */ 742 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 687376ab07e8..23c3b3d1f4cc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid)
546 printk(KERN_CONT 547 printk(KERN_CONT
547 "a previous APIC delivery may have failed\n"); 548 "a previous APIC delivery may have failed\n");
548 549
549 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 550 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
550 apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
551 551
552 timeout = 0; 552 timeout = 0;
553 do { 553 do {
@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
579 int maxlvt; 579 int maxlvt;
580 580
581 /* Target chip */ 581 /* Target chip */
582 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
583
584 /* Boot on the stack */ 582 /* Boot on the stack */
585 /* Kick the second */ 583 /* Kick the second */
586 apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 584 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
587 585
588 Dprintk("Waiting for send to finish...\n"); 586 Dprintk("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 587 send_status = safe_apic_wait_icr_idle();
@@ -639,13 +637,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
639 /* 637 /*
640 * Turn INIT on target chip 638 * Turn INIT on target chip
641 */ 639 */
642 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
643
644 /* 640 /*
645 * Send IPI 641 * Send IPI
646 */ 642 */
647 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT 643 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
648 | APIC_DM_INIT); 644 phys_apicid);
649 645
650 Dprintk("Waiting for send to finish...\n"); 646 Dprintk("Waiting for send to finish...\n");
651 send_status = safe_apic_wait_icr_idle(); 647 send_status = safe_apic_wait_icr_idle();
@@ -655,10 +651,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
655 Dprintk("Deasserting INIT.\n"); 651 Dprintk("Deasserting INIT.\n");
656 652
657 /* Target chip */ 653 /* Target chip */
658 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
659
660 /* Send IPI */ 654 /* Send IPI */
661 apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 655 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
662 656
663 Dprintk("Waiting for send to finish...\n"); 657 Dprintk("Waiting for send to finish...\n");
664 send_status = safe_apic_wait_icr_idle(); 658 send_status = safe_apic_wait_icr_idle();
@@ -703,12 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
703 */ 697 */
704 698
705 /* Target chip */ 699 /* Target chip */
706 apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
707
708 /* Boot on the stack */ 700 /* Boot on the stack */
709 /* Kick the second */ 701 /* Kick the second */
710 apic_write_around(APIC_ICR, APIC_DM_STARTUP 702 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
711 | (start_eip >> 12)); 703 phys_apicid);
712 704
713 /* 705 /*
714 * Give the other CPU some time to accept the IPI. 706 * Give the other CPU some time to accept the IPI.
@@ -1147,10 +1139,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1147 * Setup boot CPU information 1139 * Setup boot CPU information
1148 */ 1140 */
1149 smp_store_cpu_info(0); /* Final full version of the data */ 1141 smp_store_cpu_info(0); /* Final full version of the data */
1142#ifdef CONFIG_X86_32
1150 boot_cpu_logical_apicid = logical_smp_processor_id(); 1143 boot_cpu_logical_apicid = logical_smp_processor_id();
1144#endif
1151 current_thread_info()->cpu = 0; /* needed? */ 1145 current_thread_info()->cpu = 0; /* needed? */
1152 set_cpu_sibling_map(0); 1146 set_cpu_sibling_map(0);
1153 1147
1148#ifdef CONFIG_X86_64
1149 enable_IR_x2apic();
1150 setup_apic_routing();
1151#endif
1152
1154 if (smp_sanity_check(max_cpus) < 0) { 1153 if (smp_sanity_check(max_cpus) < 0) {
1155 printk(KERN_INFO "SMP disabled\n"); 1154 printk(KERN_INFO "SMP disabled\n");
1156 disable_smp(); 1155 disable_smp();
@@ -1158,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1158 } 1157 }
1159 1158
1160 preempt_disable(); 1159 preempt_disable();
1161 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1160 if (read_apic_id() != boot_cpu_physical_apicid) {
1162 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1161 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1163 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1162 read_apic_id(), boot_cpu_physical_apicid);
1164 /* Or can we switch back to PIC here? */ 1163 /* Or can we switch back to PIC here? */
1165 } 1164 }
1166 preempt_enable(); 1165 preempt_enable();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b15346092b7b..237082833c14 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -904,9 +904,9 @@ static inline int __init activate_vmi(void)
904#endif 904#endif
905 905
906#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
907 para_fill(pv_apic_ops.apic_read, APICRead); 907 para_fill(apic_ops->read, APICRead);
908 para_fill(pv_apic_ops.apic_write, APICWrite); 908 para_fill(apic_ops->write, APICWrite);
909 para_fill(pv_apic_ops.apic_write_atomic, APICWrite); 909 para_fill(apic_ops->write_atomic, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 50dad44fb542..675ee7a6475e 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -783,14 +783,45 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 783 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 784 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 785#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 786static void lguest_apic_write(u32 reg, u32 v)
787{ 787{
788} 788}
789 789
790static u32 lguest_apic_read(unsigned long reg) 790static u32 lguest_apic_read(u32 reg)
791{ 791{
792 return 0; 792 return 0;
793} 793}
794
795static u64 lguest_apic_icr_read(void)
796{
797 return 0;
798}
799
800static void lguest_apic_icr_write(u32 low, u32 id)
801{
802 /* Warn to see if there's any stray references */
803 WARN_ON(1);
804}
805
806static void lguest_apic_wait_icr_idle(void)
807{
808 return;
809}
810
811static u32 lguest_apic_safe_wait_icr_idle(void)
812{
813 return 0;
814}
815
816static struct apic_ops lguest_basic_apic_ops = {
817 .read = lguest_apic_read,
818 .write = lguest_apic_write,
819 .write_atomic = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,9 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_write_atomic = lguest_apic_write;
995 pv_apic_ops.apic_read = lguest_apic_read;
996#endif 1025#endif
997 1026
998 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d771714559..b31f2800638e 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,17 +5,16 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 15#include <asm/mach-bigsmp/mach_apicdef.h>
16#include <linux/smp.h>
17#include <asm/mach-bigsmp/mach_apic.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 18#include <asm/mach-bigsmp/mach_ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c4..9b30547d746e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,16 +4,15 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/mach-es7000/mach_apicdef.h>
15#include <linux/smp.h>
17#include <asm/mach-es7000/mach_apic.h> 16#include <asm/mach-es7000/mach_apic.h>
18#include <asm/mach-es7000/mach_ipi.h> 17#include <asm/mach-es7000/mach_ipi.h>
19#include <asm/mach-es7000/mach_mpparse.h> 18#include <asm/mach-es7000/mach_mpparse.h>
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c4..95c07efff6b7 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,8 +11,9 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h>
16#include <asm/mach-numaq/mach_apicdef.h> 14#include <asm/mach-numaq/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-numaq/mach_apic.h>
17#include <asm/mach-numaq/mach_ipi.h> 17#include <asm/mach-numaq/mach_ipi.h>
18#include <asm/mach-numaq/mach_mpparse.h> 18#include <asm/mach-numaq/mach_mpparse.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 19#include <asm/mach-numaq/mach_wakecpu.h>
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1e..752edd96b1bf 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,17 +4,16 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h>
17#include <asm/mach-summit/mach_apicdef.h> 14#include <asm/mach-summit/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-summit/mach_apic.h>
18#include <asm/mach-summit/mach_ipi.h> 17#include <asm/mach-summit/mach_ipi.h>
19#include <asm/mach-summit/mach_mpparse.h> 18#include <asm/mach-summit/mach_mpparse.h>
20 19
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bb508456ef52..402f3e2c7bee 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -548,16 +548,48 @@ static void xen_io_delay(void)
548} 548}
549 549
550#ifdef CONFIG_X86_LOCAL_APIC 550#ifdef CONFIG_X86_LOCAL_APIC
551static u32 xen_apic_read(unsigned long reg) 551static u32 xen_apic_read(u32 reg)
552{ 552{
553 return 0; 553 return 0;
554} 554}
555 555
556static void xen_apic_write(unsigned long reg, u32 val) 556static void xen_apic_write(u32 reg, u32 val)
557{ 557{
558 /* Warn to see if there's any stray references */ 558 /* Warn to see if there's any stray references */
559 WARN_ON(1); 559 WARN_ON(1);
560} 560}
561
562static u64 xen_apic_icr_read(void)
563{
564 return 0;
565}
566
567static void xen_apic_icr_write(u32 low, u32 id)
568{
569 /* Warn to see if there's any stray references */
570 WARN_ON(1);
571}
572
573static void xen_apic_wait_icr_idle(void)
574{
575 return;
576}
577
578static u32 xen_safe_apic_wait_icr_idle(void)
579{
580 return 0;
581}
582
583static struct apic_ops xen_basic_apic_ops = {
584 .read = xen_apic_read,
585 .write = xen_apic_write,
586 .write_atomic = xen_apic_write,
587 .icr_read = xen_apic_icr_read,
588 .icr_write = xen_apic_icr_write,
589 .wait_icr_idle = xen_apic_wait_icr_idle,
590 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
591};
592
561#endif 593#endif
562 594
563static void xen_flush_tlb(void) 595static void xen_flush_tlb(void)
@@ -1130,9 +1162,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1130 1162
1131static const struct pv_apic_ops xen_apic_ops __initdata = { 1163static const struct pv_apic_ops xen_apic_ops __initdata = {
1132#ifdef CONFIG_X86_LOCAL_APIC 1164#ifdef CONFIG_X86_LOCAL_APIC
1133 .apic_write = xen_apic_write,
1134 .apic_write_atomic = xen_apic_write,
1135 .apic_read = xen_apic_read,
1136 .setup_boot_clock = paravirt_nop, 1165 .setup_boot_clock = paravirt_nop,
1137 .setup_secondary_clock = paravirt_nop, 1166 .setup_secondary_clock = paravirt_nop,
1138 .startup_ipi_hook = paravirt_nop, 1167 .startup_ipi_hook = paravirt_nop,
@@ -1294,6 +1323,13 @@ asmlinkage void __init xen_start_kernel(void)
1294 pv_apic_ops = xen_apic_ops; 1323 pv_apic_ops = xen_apic_ops;
1295 pv_mmu_ops = xen_mmu_ops; 1324 pv_mmu_ops = xen_mmu_ops;
1296 1325
1326#ifdef CONFIG_X86_LOCAL_APIC
1327 /*
1328 * set up the basic apic ops.
1329 */
1330 apic_ops = &xen_basic_apic_ops;
1331#endif
1332
1297 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1333 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1298 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1334 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1299 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1335 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24b..4b47f4ece5b7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
26# Build Intel IOMMU support 26# Build Intel IOMMU support
27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o 27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
28 28
29obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
30
29# 31#
30# Some architectures use the generic PCI setup functions 32# Some architectures use the generic PCI setup functions
31# 33#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index f941f609dbf3..bd2c01674f5e 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
19 * Author: Shaohua Li <shaohua.li@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * 21 *
22 * This file implements early detection/parsing of DMA Remapping Devices 22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables. 24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
25 */ 27 */
26 28
27#include <linux/pci.h> 29#include <linux/pci.h>
28#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/timer.h>
29#include "iova.h" 32#include "iova.h"
30#include "intel-iommu.h" 33#include "intel-iommu.h"
31 34
@@ -37,7 +40,6 @@
37 * these units are not supported by the architecture. 40 * these units are not supported by the architecture.
38 */ 41 */
39LIST_HEAD(dmar_drhd_units); 42LIST_HEAD(dmar_drhd_units);
40LIST_HEAD(dmar_rmrr_units);
41 43
42static struct acpi_table_header * __initdata dmar_tbl; 44static struct acpi_table_header * __initdata dmar_tbl;
43 45
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 list_add(&drhd->list, &dmar_drhd_units); 55 list_add(&drhd->list, &dmar_drhd_units);
54} 56}
55 57
56static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
57{
58 list_add(&rmrr->list, &dmar_rmrr_units);
59}
60
61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, 58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment) 59 struct pci_dev **dev, u16 segment)
63{ 60{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd; 169 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru; 170 struct dmar_drhd_unit *dmaru;
174 int ret = 0; 171 int ret = 0;
175 static int include_all;
176 172
177 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); 173 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178 if (!dmaru) 174 if (!dmaru)
179 return -ENOMEM; 175 return -ENOMEM;
180 176
177 dmaru->hdr = header;
181 drhd = (struct acpi_dmar_hardware_unit *)header; 178 drhd = (struct acpi_dmar_hardware_unit *)header;
182 dmaru->reg_base_addr = drhd->address; 179 dmaru->reg_base_addr = drhd->address;
183 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184 181
182 ret = alloc_iommu(dmaru);
183 if (ret) {
184 kfree(dmaru);
185 return ret;
186 }
187 dmar_register_drhd_unit(dmaru);
188 return 0;
189}
190
191static int __init
192dmar_parse_dev(struct dmar_drhd_unit *dmaru)
193{
194 struct acpi_dmar_hardware_unit *drhd;
195 static int include_all;
196 int ret;
197
198 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
185 if (!dmaru->include_all) 200 if (!dmaru->include_all)
186 ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
187 ((void *)drhd) + header->length, 202 ((void *)drhd) + drhd->header.length,
188 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
189 drhd->segment); 204 drhd->segment);
190 else { 205 else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
197 include_all = 1; 212 include_all = 1;
198 } 213 }
199 214
200 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) 215 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
216 list_del(&dmaru->list);
201 kfree(dmaru); 217 kfree(dmaru);
202 else 218 }
203 dmar_register_drhd_unit(dmaru);
204 return ret; 219 return ret;
205} 220}
206 221
222#ifdef CONFIG_DMAR
223LIST_HEAD(dmar_rmrr_units);
224
225static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
226{
227 list_add(&rmrr->list, &dmar_rmrr_units);
228}
229
230
207static int __init 231static int __init
208dmar_parse_one_rmrr(struct acpi_dmar_header *header) 232dmar_parse_one_rmrr(struct acpi_dmar_header *header)
209{ 233{
210 struct acpi_dmar_reserved_memory *rmrr; 234 struct acpi_dmar_reserved_memory *rmrr;
211 struct dmar_rmrr_unit *rmrru; 235 struct dmar_rmrr_unit *rmrru;
212 int ret = 0;
213 236
214 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 237 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
215 if (!rmrru) 238 if (!rmrru)
216 return -ENOMEM; 239 return -ENOMEM;
217 240
241 rmrru->hdr = header;
218 rmrr = (struct acpi_dmar_reserved_memory *)header; 242 rmrr = (struct acpi_dmar_reserved_memory *)header;
219 rmrru->base_address = rmrr->base_address; 243 rmrru->base_address = rmrr->base_address;
220 rmrru->end_address = rmrr->end_address; 244 rmrru->end_address = rmrr->end_address;
245
246 dmar_register_rmrr_unit(rmrru);
247 return 0;
248}
249
250static int __init
251rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
252{
253 struct acpi_dmar_reserved_memory *rmrr;
254 int ret;
255
256 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
221 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 257 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
222 ((void *)rmrr) + header->length, 258 ((void *)rmrr) + rmrr->header.length,
223 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 259 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
224 260
225 if (ret || (rmrru->devices_cnt == 0)) 261 if (ret || (rmrru->devices_cnt == 0)) {
262 list_del(&rmrru->list);
226 kfree(rmrru); 263 kfree(rmrru);
227 else 264 }
228 dmar_register_rmrr_unit(rmrru);
229 return ret; 265 return ret;
230} 266}
267#endif
231 268
232static void __init 269static void __init
233dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 270dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
252 } 289 }
253} 290}
254 291
292
255/** 293/**
256 * parse_dmar_table - parses the DMA reporting table 294 * parse_dmar_table - parses the DMA reporting table
257 */ 295 */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
284 ret = dmar_parse_one_drhd(entry_header); 322 ret = dmar_parse_one_drhd(entry_header);
285 break; 323 break;
286 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 324 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
325#ifdef CONFIG_DMAR
287 ret = dmar_parse_one_rmrr(entry_header); 326 ret = dmar_parse_one_rmrr(entry_header);
327#endif
288 break; 328 break;
289 default: 329 default:
290 printk(KERN_WARNING PREFIX 330 printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
300 return ret; 340 return ret;
301} 341}
302 342
343int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
344 struct pci_dev *dev)
345{
346 int index;
347
348 while (dev) {
349 for (index = 0; index < cnt; index++)
350 if (dev == devices[index])
351 return 1;
303 352
304int __init dmar_table_init(void) 353 /* Check our parent */
354 dev = dev->bus->self;
355 }
356
357 return 0;
358}
359
360struct dmar_drhd_unit *
361dmar_find_matched_drhd_unit(struct pci_dev *dev)
305{ 362{
363 struct dmar_drhd_unit *drhd = NULL;
306 364
365 list_for_each_entry(drhd, &dmar_drhd_units, list) {
366 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
367 drhd->devices_cnt, dev))
368 return drhd;
369 }
370
371 return NULL;
372}
373
374int __init dmar_dev_scope_init(void)
375{
376 struct dmar_drhd_unit *drhd;
377 int ret = -ENODEV;
378
379 for_each_drhd_unit(drhd) {
380 ret = dmar_parse_dev(drhd);
381 if (ret)
382 return ret;
383 }
384
385#ifdef CONFIG_DMAR
386 {
387 struct dmar_rmrr_unit *rmrr;
388 for_each_rmrr_units(rmrr) {
389 ret = rmrr_parse_dev(rmrr);
390 if (ret)
391 return ret;
392 }
393 }
394#endif
395
396 return ret;
397}
398
399
400int __init dmar_table_init(void)
401{
402 static int dmar_table_initialized;
307 int ret; 403 int ret;
308 404
405 if (dmar_table_initialized)
406 return 0;
407
408 dmar_table_initialized = 1;
409
309 ret = parse_dmar_table(); 410 ret = parse_dmar_table();
310 if (ret) { 411 if (ret) {
311 printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); 412 if (ret != -ENODEV)
413 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
312 return ret; 414 return ret;
313 } 415 }
314 416
@@ -317,11 +419,14 @@ int __init dmar_table_init(void)
317 return -ENODEV; 419 return -ENODEV;
318 } 420 }
319 421
320 if (list_empty(&dmar_rmrr_units)) { 422#ifdef CONFIG_DMAR
423 if (list_empty(&dmar_rmrr_units))
321 printk(KERN_INFO PREFIX "No RMRR found\n"); 424 printk(KERN_INFO PREFIX "No RMRR found\n");
322 return -ENODEV; 425#endif
323 }
324 426
427#ifdef CONFIG_INTR_REMAP
428 parse_ioapics_under_ir();
429#endif
325 return 0; 430 return 0;
326} 431}
327 432
@@ -343,3 +448,255 @@ int __init early_dmar_detect(void)
343 448
344 return (ACPI_SUCCESS(status) ? 1 : 0); 449 return (ACPI_SUCCESS(status) ? 1 : 0);
345} 450}
451
452void __init detect_intel_iommu(void)
453{
454 int ret;
455
456 ret = early_dmar_detect();
457
458#ifdef CONFIG_DMAR
459 {
460 struct acpi_table_dmar *dmar;
461 /*
462 * for now we will disable dma-remapping when interrupt
463 * remapping is enabled.
464 * When support for queued invalidation for IOTLB invalidation
465 * is added, we will not need this any more.
466 */
467 dmar = (struct acpi_table_dmar *) dmar_tbl;
468 if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
469 printk(KERN_INFO
470 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n");
472 printk(KERN_INFO
473 "Disabling IOMMU detection, because of missing "
474 "queued invalidation support for IOTLB "
475 "invalidation\n");
476 printk(KERN_INFO
477 "Use \"nox2apic\", if you want to use Intel "
478 " IOMMU for DMA-remapping and don't care about "
479 " x2apic support\n");
480
481 dmar_disabled = 1;
482 return;
483 }
484
485 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
486 !dmar_disabled)
487 iommu_detected = 1;
488 }
489#endif
490}
491
492
493int alloc_iommu(struct dmar_drhd_unit *drhd)
494{
495 struct intel_iommu *iommu;
496 int map_size;
497 u32 ver;
498 static int iommu_allocated = 0;
499
500 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
501 if (!iommu)
502 return -ENOMEM;
503
504 iommu->seq_id = iommu_allocated++;
505
506 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
507 if (!iommu->reg) {
508 printk(KERN_ERR "IOMMU: can't map the region\n");
509 goto error;
510 }
511 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
512 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
513
514 /* the registers might be more than one page */
515 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
516 cap_max_fault_reg_offset(iommu->cap));
517 map_size = PAGE_ALIGN_4K(map_size);
518 if (map_size > PAGE_SIZE_4K) {
519 iounmap(iommu->reg);
520 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
521 if (!iommu->reg) {
522 printk(KERN_ERR "IOMMU: can't map the region\n");
523 goto error;
524 }
525 }
526
527 ver = readl(iommu->reg + DMAR_VER_REG);
528 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
529 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
530 iommu->cap, iommu->ecap);
531
532 spin_lock_init(&iommu->register_lock);
533
534 drhd->iommu = iommu;
535 return 0;
536error:
537 kfree(iommu);
538 return -1;
539}
540
541void free_iommu(struct intel_iommu *iommu)
542{
543 if (!iommu)
544 return;
545
546#ifdef CONFIG_DMAR
547 free_dmar_iommu(iommu);
548#endif
549
550 if (iommu->reg)
551 iounmap(iommu->reg);
552 kfree(iommu);
553}
554
555/*
556 * Reclaim all the submitted descriptors which have completed its work.
557 */
558static inline void reclaim_free_desc(struct q_inval *qi)
559{
560 while (qi->desc_status[qi->free_tail] == QI_DONE) {
561 qi->desc_status[qi->free_tail] = QI_FREE;
562 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
563 qi->free_cnt++;
564 }
565}
566
567/*
568 * Submit the queued invalidation descriptor to the remapping
569 * hardware unit and wait for its completion.
570 */
571void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
572{
573 struct q_inval *qi = iommu->qi;
574 struct qi_desc *hw, wait_desc;
575 int wait_index, index;
576 unsigned long flags;
577
578 if (!qi)
579 return;
580
581 hw = qi->desc;
582
583 spin_lock(&qi->q_lock);
584 while (qi->free_cnt < 3) {
585 spin_unlock(&qi->q_lock);
586 cpu_relax();
587 spin_lock(&qi->q_lock);
588 }
589
590 index = qi->free_head;
591 wait_index = (index + 1) % QI_LENGTH;
592
593 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
594
595 hw[index] = *desc;
596
597 wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
598 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
599
600 hw[wait_index] = wait_desc;
601
602 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
603 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
604
605 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
606 qi->free_cnt -= 2;
607
608 spin_lock_irqsave(&iommu->register_lock, flags);
609 /*
610 * update the HW tail register indicating the presence of
611 * new descriptors.
612 */
613 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
614 spin_unlock_irqrestore(&iommu->register_lock, flags);
615
616 while (qi->desc_status[wait_index] != QI_DONE) {
617 spin_unlock(&qi->q_lock);
618 cpu_relax();
619 spin_lock(&qi->q_lock);
620 }
621
622 qi->desc_status[index] = QI_DONE;
623
624 reclaim_free_desc(qi);
625 spin_unlock(&qi->q_lock);
626}
627
628/*
629 * Flush the global interrupt entry cache.
630 */
631void qi_global_iec(struct intel_iommu *iommu)
632{
633 struct qi_desc desc;
634
635 desc.low = QI_IEC_TYPE;
636 desc.high = 0;
637
638 qi_submit_sync(&desc, iommu);
639}
640
641/*
642 * Enable Queued Invalidation interface. This is a must to support
643 * interrupt-remapping. Also used by DMA-remapping, which replaces
644 * register based IOTLB invalidation.
645 */
646int dmar_enable_qi(struct intel_iommu *iommu)
647{
648 u32 cmd, sts;
649 unsigned long flags;
650 struct q_inval *qi;
651
652 if (!ecap_qis(iommu->ecap))
653 return -ENOENT;
654
655 /*
656 * queued invalidation is already setup and enabled.
657 */
658 if (iommu->qi)
659 return 0;
660
661 iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
662 if (!iommu->qi)
663 return -ENOMEM;
664
665 qi = iommu->qi;
666
667 qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
668 if (!qi->desc) {
669 kfree(qi);
670 iommu->qi = 0;
671 return -ENOMEM;
672 }
673
674 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
675 if (!qi->desc_status) {
676 free_page((unsigned long) qi->desc);
677 kfree(qi);
678 iommu->qi = 0;
679 return -ENOMEM;
680 }
681
682 qi->free_head = qi->free_tail = 0;
683 qi->free_cnt = QI_LENGTH;
684
685 spin_lock_init(&qi->q_lock);
686
687 spin_lock_irqsave(&iommu->register_lock, flags);
688 /* write zero to the tail reg */
689 writel(0, iommu->reg + DMAR_IQT_REG);
690
691 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
692
693 cmd = iommu->gcmd | DMA_GCMD_QIE;
694 iommu->gcmd |= DMA_GCMD_QIE;
695 writel(cmd, iommu->reg + DMAR_GCMD_REG);
696
697 /* Make sure hardware complete it */
698 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
699 spin_unlock_irqrestore(&iommu->register_lock, flags);
700
701 return 0;
702}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3f7b81c065d2..ffccf2341b98 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
49 49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51 51
52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 53
56 54
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
58 56
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 58
61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250 59#define HIGH_WATER_MARK 250
64struct deferred_flush_tables { 60struct deferred_flush_tables {
65 int next; 61 int next;
@@ -80,7 +76,7 @@ static long list_size;
80 76
81static void domain_remove_dev_info(struct dmar_domain *domain); 77static void domain_remove_dev_info(struct dmar_domain *domain);
82 78
83static int dmar_disabled; 79int dmar_disabled;
84static int __initdata dmar_map_gfx = 1; 80static int __initdata dmar_map_gfx = 1;
85static int dmar_forcedac; 81static int dmar_forcedac;
86static int intel_iommu_strict; 82static int intel_iommu_strict;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
185 kmem_cache_free(iommu_iova_cache, iova); 181 kmem_cache_free(iommu_iova_cache, iova);
186} 182}
187 183
188static inline void __iommu_flush_cache(
189 struct intel_iommu *iommu, void *addr, int size)
190{
191 if (!ecap_coherent(iommu->ecap))
192 clflush_cache_range(addr, size);
193}
194
195/* Gets context entry for a given bus and devfn */ 184/* Gets context entry for a given bus and devfn */
196static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
197 u8 bus, u8 devfn) 186 u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
488 return 0; 477 return 0;
489} 478}
490 479
491#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
492{\
493 cycles_t start_time = get_cycles();\
494 while (1) {\
495 sts = op (iommu->reg + offset);\
496 if (cond)\
497 break;\
498 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
499 panic("DMAR hardware is malfunctioning\n");\
500 cpu_relax();\
501 }\
502}
503
504static void iommu_set_root_entry(struct intel_iommu *iommu) 480static void iommu_set_root_entry(struct intel_iommu *iommu)
505{ 481{
506 void *addr; 482 void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
990 return -ENOMEM; 966 return -ENOMEM;
991 } 967 }
992 968
969 spin_lock_init(&iommu->lock);
970
993 /* 971 /*
994 * if Caching mode is set, then invalid translations are tagged 972 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it. 973 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
998 set_bit(0, iommu->domain_ids); 976 set_bit(0, iommu->domain_ids);
999 return 0; 977 return 0;
1000} 978}
1001static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
1002 struct dmar_drhd_unit *drhd)
1003{
1004 int ret;
1005 int map_size;
1006 u32 ver;
1007
1008 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
1009 if (!iommu->reg) {
1010 printk(KERN_ERR "IOMMU: can't map the region\n");
1011 goto error;
1012 }
1013 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
1014 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
1015
1016 /* the registers might be more than one page */
1017 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1018 cap_max_fault_reg_offset(iommu->cap));
1019 map_size = PAGE_ALIGN_4K(map_size);
1020 if (map_size > PAGE_SIZE_4K) {
1021 iounmap(iommu->reg);
1022 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1023 if (!iommu->reg) {
1024 printk(KERN_ERR "IOMMU: can't map the region\n");
1025 goto error;
1026 }
1027 }
1028
1029 ver = readl(iommu->reg + DMAR_VER_REG);
1030 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1031 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1032 iommu->cap, iommu->ecap);
1033 ret = iommu_init_domains(iommu);
1034 if (ret)
1035 goto error_unmap;
1036 spin_lock_init(&iommu->lock);
1037 spin_lock_init(&iommu->register_lock);
1038 979
1039 drhd->iommu = iommu;
1040 return iommu;
1041error_unmap:
1042 iounmap(iommu->reg);
1043error:
1044 kfree(iommu);
1045 return NULL;
1046}
1047 980
1048static void domain_exit(struct dmar_domain *domain); 981static void domain_exit(struct dmar_domain *domain);
1049static void free_iommu(struct intel_iommu *iommu) 982
983void free_dmar_iommu(struct intel_iommu *iommu)
1050{ 984{
1051 struct dmar_domain *domain; 985 struct dmar_domain *domain;
1052 int i; 986 int i;
1053 987
1054 if (!iommu)
1055 return;
1056
1057 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1058 for (; i < cap_ndoms(iommu->cap); ) { 989 for (; i < cap_ndoms(iommu->cap); ) {
1059 domain = iommu->domains[i]; 990 domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
1078 1009
1079 /* free context mapping */ 1010 /* free context mapping */
1080 free_context_table(iommu); 1011 free_context_table(iommu);
1081
1082 if (iommu->reg)
1083 iounmap(iommu->reg);
1084 kfree(iommu);
1085} 1012}
1086 1013
1087static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1014static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
1426 return NULL; 1353 return NULL;
1427} 1354}
1428 1355
1429static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1430 struct pci_dev *dev)
1431{
1432 int index;
1433
1434 while (dev) {
1435 for (index = 0; index < cnt; index++)
1436 if (dev == devices[index])
1437 return 1;
1438
1439 /* Check our parent */
1440 dev = dev->bus->self;
1441 }
1442
1443 return 0;
1444}
1445
1446static struct dmar_drhd_unit *
1447dmar_find_matched_drhd_unit(struct pci_dev *dev)
1448{
1449 struct dmar_drhd_unit *drhd = NULL;
1450
1451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1452 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1453 drhd->devices_cnt, dev))
1454 return drhd;
1455 }
1456
1457 return NULL;
1458}
1459
1460/* domain is initialized */ 1356/* domain is initialized */
1461static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1357static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1462{ 1358{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
1729 * endfor 1625 * endfor
1730 */ 1626 */
1731 for_each_drhd_unit(drhd) { 1627 for_each_drhd_unit(drhd) {
1732 if (drhd->ignored)
1733 continue;
1734 g_num_of_iommus++; 1628 g_num_of_iommus++;
1735 /* 1629 /*
1736 * lock not needed as this is only incremented in the single 1630 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
1739 */ 1633 */
1740 } 1634 }
1741 1635
1742 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1743 if (!g_iommus) {
1744 ret = -ENOMEM;
1745 goto error;
1746 }
1747
1748 deferred_flush = kzalloc(g_num_of_iommus * 1636 deferred_flush = kzalloc(g_num_of_iommus *
1749 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1750 if (!deferred_flush) { 1638 if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
1752 goto error; 1640 goto error;
1753 } 1641 }
1754 1642
1755 i = 0;
1756 for_each_drhd_unit(drhd) { 1643 for_each_drhd_unit(drhd) {
1757 if (drhd->ignored) 1644 if (drhd->ignored)
1758 continue; 1645 continue;
1759 iommu = alloc_iommu(&g_iommus[i], drhd); 1646
1760 i++; 1647 iommu = drhd->iommu;
1761 if (!iommu) { 1648
1762 ret = -ENOMEM; 1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1763 goto error; 1651 goto error;
1764 }
1765 1652
1766 /* 1653 /*
1767 * TBD: 1654 * TBD:
@@ -1845,7 +1732,6 @@ error:
1845 iommu = drhd->iommu; 1732 iommu = drhd->iommu;
1846 free_iommu(iommu); 1733 free_iommu(iommu);
1847 } 1734 }
1848 kfree(g_iommus);
1849 return ret; 1735 return ret;
1850} 1736}
1851 1737
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
2002 /* just flush them all */ 1888 /* just flush them all */
2003 for (i = 0; i < g_num_of_iommus; i++) { 1889 for (i = 0; i < g_num_of_iommus; i++) {
2004 if (deferred_flush[i].next) { 1890 if (deferred_flush[i].next) {
2005 iommu_flush_iotlb_global(&g_iommus[i], 0); 1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
2006 for (j = 0; j < deferred_flush[i].next; j++) { 1895 for (j = 0; j < deferred_flush[i].next; j++) {
2007 __free_iova(&deferred_flush[i].domain[j]->iovad, 1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
2008 deferred_flush[i].iova[j]); 1897 deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2032 if (list_size == HIGH_WATER_MARK) 1921 if (list_size == HIGH_WATER_MARK)
2033 flush_unmaps(); 1922 flush_unmaps();
2034 1923
2035 iommu_id = dom->iommu - g_iommus; 1924 iommu_id = dom->iommu->seq_id;
1925
2036 next = deferred_flush[iommu_id].next; 1926 next = deferred_flush[iommu_id].next;
2037 deferred_flush[iommu_id].domain[next] = dom; 1927 deferred_flush[iommu_id].domain[next] = dom;
2038 deferred_flush[iommu_id].iova[next] = iova; 1928 deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,15 +2238,6 @@ static void __init iommu_exit_mempool(void)
2348 2238
2349} 2239}
2350 2240
2351void __init detect_intel_iommu(void)
2352{
2353 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2354 return;
2355 if (early_dmar_detect()) {
2356 iommu_detected = 1;
2357 }
2358}
2359
2360static void __init init_no_remapping_devices(void) 2241static void __init init_no_remapping_devices(void)
2361{ 2242{
2362 struct dmar_drhd_unit *drhd; 2243 struct dmar_drhd_unit *drhd;
@@ -2403,12 +2284,19 @@ int __init intel_iommu_init(void)
2403{ 2284{
2404 int ret = 0; 2285 int ret = 0;
2405 2286
2406 if (no_iommu || swiotlb || dmar_disabled)
2407 return -ENODEV;
2408
2409 if (dmar_table_init()) 2287 if (dmar_table_init())
2410 return -ENODEV; 2288 return -ENODEV;
2411 2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2412 iommu_init_mempool(); 2300 iommu_init_mempool();
2413 dmar_init_reserved_ranges(); 2301 dmar_init_reserved_ranges();
2414 2302
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122e..2142c01e0143 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include "iova.h" 28#include "iova.h"
29#include <linux/io.h> 29#include <linux/io.h>
30 30#include <asm/cacheflush.h>
31/* 31#include "dma_remapping.h"
32 * We need a fixed PAGE_SIZE of 4K irrespective of
33 * arch PAGE_SIZE for IOMMU page tables.
34 */
35#define PAGE_SHIFT_4K (12)
36#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
37#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
38#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
39
40#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
41#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
42#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
43 32
44/* 33/*
45 * Intel IOMMU register specification per version 1.0 public spec. 34 * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
63#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ 52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
64#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ 53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
65#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ 54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
66 60
67#define OFFSET_STRIDE (9) 61#define OFFSET_STRIDE (9)
68/* 62/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
126#define ecap_max_iotlb_offset(e) \ 120#define ecap_max_iotlb_offset(e) \
127 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
128#define ecap_coherent(e) ((e) & 0x1) 122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
129 127
130 128
131/* IOTLB_REG */ 129/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
141#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) 139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
142#define DMA_TLB_MAX_SIZE (0x3f) 140#define DMA_TLB_MAX_SIZE (0x3f)
143 141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
144/* PMEN_REG */ 153/* PMEN_REG */
145#define DMA_PMEN_EPM (((u32)1)<<31) 154#define DMA_PMEN_EPM (((u32)1)<<31)
146#define DMA_PMEN_PRS (((u32)1)<<0) 155#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
151#define DMA_GCMD_SFL (((u32)1) << 29) 160#define DMA_GCMD_SFL (((u32)1) << 29)
152#define DMA_GCMD_EAFL (((u32)1) << 28) 161#define DMA_GCMD_EAFL (((u32)1) << 28)
153#define DMA_GCMD_WBF (((u32)1) << 27) 162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
154 166
155/* GSTS_REG */ 167/* GSTS_REG */
156#define DMA_GSTS_TES (((u32)1) << 31) 168#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
158#define DMA_GSTS_FLS (((u32)1) << 29) 170#define DMA_GSTS_FLS (((u32)1) << 29)
159#define DMA_GSTS_AFLS (((u32)1) << 28) 171#define DMA_GSTS_AFLS (((u32)1) << 28)
160#define DMA_GSTS_WBFS (((u32)1) << 27) 172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
161 176
162/* CCMD_REG */ 177/* CCMD_REG */
163#define DMA_CCMD_ICC (((u64)1) << 63) 178#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
187#define dma_frcd_source_id(c) (c & 0xffff) 202#define dma_frcd_source_id(c) (c & 0xffff)
188#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ 203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
189 204
190/* 205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
191 * 0: Present 206
192 * 1-11: Reserved 207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
193 * 12-63: Context Ptr (12 - (haw-1)) 208{\
194 * 64-127: Reserved 209 cycles_t start_time = get_cycles();\
195 */ 210 while (1) {\
196struct root_entry { 211 sts = op (iommu->reg + offset);\
197 u64 val; 212 if (cond)\
198 u64 rsvd1; 213 break;\
199}; 214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
200#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) 215 panic("DMAR hardware is malfunctioning\n");\
201static inline bool root_present(struct root_entry *root) 216 cpu_relax();\
202{ 217 }\
203 return (root->val & 1);
204}
205static inline void set_root_present(struct root_entry *root)
206{
207 root->val |= 1;
208}
209static inline void set_root_value(struct root_entry *root, unsigned long value)
210{
211 root->val |= value & PAGE_MASK_4K;
212} 218}
213 219
214struct context_entry; 220#define QI_LENGTH 256 /* queue length */
215static inline struct context_entry *
216get_context_addr_from_root(struct root_entry *root)
217{
218 return (struct context_entry *)
219 (root_present(root)?phys_to_virt(
220 root->val & PAGE_MASK_4K):
221 NULL);
222}
223
224/*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235struct context_entry {
236 u64 lo;
237 u64 hi;
238};
239#define context_present(c) ((c).lo & 1)
240#define context_fault_disable(c) (((c).lo >> 1) & 1)
241#define context_translation_type(c) (((c).lo >> 2) & 3)
242#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
243#define context_address_width(c) ((c).hi & 7)
244#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
245
246#define context_set_present(c) do {(c).lo |= 1;} while (0)
247#define context_set_fault_enable(c) \
248 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
249#define context_set_translation_type(c, val) \
250 do { \
251 (c).lo &= (((u64)-1) << 4) | 3; \
252 (c).lo |= ((val) & 3) << 2; \
253 } while (0)
254#define CONTEXT_TT_MULTI_LEVEL 0
255#define context_set_address_root(c, val) \
256 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
257#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
258#define context_set_domain_id(c, val) \
259 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
260#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
261 221
262/* 222enum {
263 * 0: readable 223 QI_FREE,
264 * 1: writable 224 QI_IN_USE,
265 * 2-6: reserved 225 QI_DONE
266 * 7: super page
267 * 8-11: available
268 * 12-63: Host physcial address
269 */
270struct dma_pte {
271 u64 val;
272}; 226};
273#define dma_clear_pte(p) do {(p).val = 0;} while (0)
274
275#define DMA_PTE_READ (1)
276#define DMA_PTE_WRITE (2)
277 227
278#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 228#define QI_CC_TYPE 0x1
279#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 229#define QI_IOTLB_TYPE 0x2
280#define dma_set_pte_prot(p, prot) \ 230#define QI_DIOTLB_TYPE 0x3
281 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 231#define QI_IEC_TYPE 0x4
282#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) 232#define QI_IWD_TYPE 0x5
283#define dma_set_pte_addr(p, addr) do {\
284 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
285#define dma_pte_present(p) (((p).val & 3) != 0)
286 233
287struct intel_iommu; 234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
288 237
289struct dmar_domain { 238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
290 int id; /* domain id */ 239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
291 struct intel_iommu *iommu; /* back pointer to owning iommu */
292 240
293 struct list_head devices; /* all devices' list */ 241struct qi_desc {
294 struct iova_domain iovad; /* iova's that belong to this domain */ 242 u64 low, high;
243};
295 244
296 struct dma_pte *pgd; /* virtual address */ 245struct q_inval {
297 spinlock_t mapping_lock; /* page table lock */ 246 spinlock_t q_lock;
298 int gaw; /* max guest address width */ 247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
299 253
300 /* adjusted guest address width, 0 is level 2 30-bit */ 254#ifdef CONFIG_INTR_REMAP
301 int agaw; 255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
302 258
303#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 259#define INTR_REMAP_TABLE_ENTRIES 65536
304 int flags;
305};
306 260
307/* PCI domain-device relationship */ 261struct ir_table {
308struct device_domain_info { 262 struct irte *base;
309 struct list_head link; /* link to domain siblings */
310 struct list_head global; /* link to global list */
311 u8 bus; /* PCI bus numer */
312 u8 devfn; /* PCI devfn number */
313 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
314 struct dmar_domain *domain; /* pointer to domain */
315}; 263};
316 264#endif
317extern int init_dmars(void);
318 265
319struct intel_iommu { 266struct intel_iommu {
320 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
321 u64 cap; 268 u64 cap;
322 u64 ecap; 269 u64 ecap;
323 unsigned long *domain_ids; /* bitmap of domains */
324 struct dmar_domain **domains; /* ptr to domains */
325 int seg; 270 int seg;
326 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
327 spinlock_t lock; /* protect context, domain ids */
328 spinlock_t register_lock; /* protect register handling */ 272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
329 struct root_entry *root_entry; /* virtual address */ 279 struct root_entry *root_entry; /* virtual address */
330 280
331 unsigned int irq; 281 unsigned int irq;
332 unsigned char name[7]; /* Device Name */ 282 unsigned char name[7]; /* Device Name */
333 struct msi_msg saved_msg; 283 struct msi_msg saved_msg;
334 struct sys_device sysdev; 284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
335}; 290};
336 291
337#ifndef CONFIG_DMAR_GFX_WA 292static inline void __iommu_flush_cache(
338static inline void iommu_prepare_gfx_mapping(void) 293 struct intel_iommu *iommu, void *addr, int size)
339{ 294{
340 return; 295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
341} 297}
342#endif /* !CONFIG_DMAR_GFX_WA */
343 298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
344#endif 307#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 000000000000..bb642cc5e18c
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
1#include <linux/dmar.h>
2#include <linux/spinlock.h>
3#include <linux/jiffies.h>
4#include <linux/pci.h>
5#include <linux/irq.h>
6#include <asm/io_apic.h>
7#include "intel-iommu.h"
8#include "intr_remapping.h"
9
10static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num;
12int intr_remapping_enabled;
13
14static struct {
15 struct intel_iommu *iommu;
16 u16 irte_index;
17 u16 sub_handle;
18 u8 irte_mask;
19} irq_2_iommu[NR_IRQS];
20
21static DEFINE_SPINLOCK(irq_2_ir_lock);
22
23int irq_remapped(int irq)
24{
25 if (irq > NR_IRQS)
26 return 0;
27
28 if (!irq_2_iommu[irq].iommu)
29 return 0;
30
31 return 1;
32}
33
34int get_irte(int irq, struct irte *entry)
35{
36 int index;
37
38 if (!entry || irq > NR_IRQS)
39 return -1;
40
41 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) {
43 spin_unlock(&irq_2_ir_lock);
44 return -1;
45 }
46
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
49
50 spin_unlock(&irq_2_ir_lock);
51 return 0;
52}
53
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{
56 struct ir_table *table = iommu->ir_table;
57 u16 index, start_index;
58 unsigned int mask = 0;
59 int i;
60
61 if (!count)
62 return -1;
63
64 /*
65 * start the IRTE search from index 0.
66 */
67 index = start_index = 0;
68
69 if (count > 1) {
70 count = __roundup_pow_of_two(count);
71 mask = ilog2(count);
72 }
73
74 if (mask > ecap_max_handle_mask(iommu->ecap)) {
75 printk(KERN_ERR
76 "Requested mask %x exceeds the max invalidation handle"
77 " mask value %Lx\n", mask,
78 ecap_max_handle_mask(iommu->ecap));
79 return -1;
80 }
81
82 spin_lock(&irq_2_ir_lock);
83 do {
84 for (i = index; i < index + count; i++)
85 if (table->base[i].present)
86 break;
87 /* empty index found */
88 if (i == index + count)
89 break;
90
91 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
92
93 if (index == start_index) {
94 spin_unlock(&irq_2_ir_lock);
95 printk(KERN_ERR "can't allocate an IRTE\n");
96 return -1;
97 }
98 } while (1);
99
100 for (i = index; i < index + count; i++)
101 table->base[i].present = 1;
102
103 irq_2_iommu[irq].iommu = iommu;
104 irq_2_iommu[irq].irte_index = index;
105 irq_2_iommu[irq].sub_handle = 0;
106 irq_2_iommu[irq].irte_mask = mask;
107
108 spin_unlock(&irq_2_ir_lock);
109
110 return index;
111}
112
113static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
114{
115 struct qi_desc desc;
116
117 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
118 | QI_IEC_SELECTIVE;
119 desc.high = 0;
120
121 qi_submit_sync(&desc, iommu);
122}
123
124int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{
126 int index;
127
128 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
130 spin_unlock(&irq_2_ir_lock);
131 return -1;
132 }
133
134 *sub_handle = irq_2_iommu[irq].sub_handle;
135 index = irq_2_iommu[irq].irte_index;
136 spin_unlock(&irq_2_ir_lock);
137 return index;
138}
139
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{
142 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147
148 irq_2_iommu[irq].iommu = iommu;
149 irq_2_iommu[irq].irte_index = index;
150 irq_2_iommu[irq].sub_handle = subhandle;
151 irq_2_iommu[irq].irte_mask = 0;
152
153 spin_unlock(&irq_2_ir_lock);
154
155 return 0;
156}
157
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{
160 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
162 spin_unlock(&irq_2_ir_lock);
163 return -1;
164 }
165
166 irq_2_iommu[irq].iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0;
170
171 spin_unlock(&irq_2_ir_lock);
172
173 return 0;
174}
175
176int modify_irte(int irq, struct irte *irte_modified)
177{
178 int index;
179 struct irte *irte;
180 struct intel_iommu *iommu;
181
182 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
184 spin_unlock(&irq_2_ir_lock);
185 return -1;
186 }
187
188 iommu = irq_2_iommu[irq].iommu;
189
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
191 irte = &iommu->ir_table->base[index];
192
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
194 __iommu_flush_cache(iommu, irte, sizeof(*irte));
195
196 qi_flush_iec(iommu, index, 0);
197
198 spin_unlock(&irq_2_ir_lock);
199 return 0;
200}
201
202int flush_irte(int irq)
203{
204 int index;
205 struct intel_iommu *iommu;
206
207 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
209 spin_unlock(&irq_2_ir_lock);
210 return -1;
211 }
212
213 iommu = irq_2_iommu[irq].iommu;
214
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
216
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
218 spin_unlock(&irq_2_ir_lock);
219
220 return 0;
221}
222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
233struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
234{
235 struct dmar_drhd_unit *drhd;
236
237 drhd = dmar_find_matched_drhd_unit(dev);
238 if (!drhd)
239 return NULL;
240
241 return drhd->iommu;
242}
243
244int free_irte(int irq)
245{
246 int index, i;
247 struct irte *irte;
248 struct intel_iommu *iommu;
249
250 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
252 spin_unlock(&irq_2_ir_lock);
253 return -1;
254 }
255
256 iommu = irq_2_iommu[irq].iommu;
257
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
259 irte = &iommu->ir_table->base[index];
260
261 if (!irq_2_iommu[irq].sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
265 }
266
267 irq_2_iommu[irq].iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0;
271
272 spin_unlock(&irq_2_ir_lock);
273
274 return 0;
275}
276
277static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
278{
279 u64 addr;
280 u32 cmd, sts;
281 unsigned long flags;
282
283 addr = virt_to_phys((void *)iommu->ir_table->base);
284
285 spin_lock_irqsave(&iommu->register_lock, flags);
286
287 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
288 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
289
290 /* Set interrupt-remapping table pointer */
291 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
292 writel(cmd, iommu->reg + DMAR_GCMD_REG);
293
294 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
295 readl, (sts & DMA_GSTS_IRTPS), sts);
296 spin_unlock_irqrestore(&iommu->register_lock, flags);
297
298 /*
299 * global invalidation of interrupt entry cache before enabling
300 * interrupt-remapping.
301 */
302 qi_global_iec(iommu);
303
304 spin_lock_irqsave(&iommu->register_lock, flags);
305
306 /* Enable interrupt-remapping */
307 cmd = iommu->gcmd | DMA_GCMD_IRE;
308 iommu->gcmd |= DMA_GCMD_IRE;
309 writel(cmd, iommu->reg + DMAR_GCMD_REG);
310
311 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
312 readl, (sts & DMA_GSTS_IRES), sts);
313
314 spin_unlock_irqrestore(&iommu->register_lock, flags);
315}
316
317
318static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
319{
320 struct ir_table *ir_table;
321 struct page *pages;
322
323 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
324 GFP_KERNEL);
325
326 if (!iommu->ir_table)
327 return -ENOMEM;
328
329 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
330
331 if (!pages) {
332 printk(KERN_ERR "failed to allocate pages of order %d\n",
333 INTR_REMAP_PAGE_ORDER);
334 kfree(iommu->ir_table);
335 return -ENOMEM;
336 }
337
338 ir_table->base = page_address(pages);
339
340 iommu_set_intr_remapping(iommu, mode);
341 return 0;
342}
343
344int __init enable_intr_remapping(int eim)
345{
346 struct dmar_drhd_unit *drhd;
347 int setup = 0;
348
349 /*
350 * check for the Interrupt-remapping support
351 */
352 for_each_drhd_unit(drhd) {
353 struct intel_iommu *iommu = drhd->iommu;
354
355 if (!ecap_ir_support(iommu->ecap))
356 continue;
357
358 if (eim && !ecap_eim_support(iommu->ecap)) {
359 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
360 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
361 return -1;
362 }
363 }
364
365 /*
366 * Enable queued invalidation for all the DRHD's.
367 */
368 for_each_drhd_unit(drhd) {
369 int ret;
370 struct intel_iommu *iommu = drhd->iommu;
371 ret = dmar_enable_qi(iommu);
372
373 if (ret) {
374 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
375 " invalidation, ecap %Lx, ret %d\n",
376 drhd->reg_base_addr, iommu->ecap, ret);
377 return -1;
378 }
379 }
380
381 /*
382 * Setup Interrupt-remapping for all the DRHD's now.
383 */
384 for_each_drhd_unit(drhd) {
385 struct intel_iommu *iommu = drhd->iommu;
386
387 if (!ecap_ir_support(iommu->ecap))
388 continue;
389
390 if (setup_intr_remapping(iommu, eim))
391 goto error;
392
393 setup = 1;
394 }
395
396 if (!setup)
397 goto error;
398
399 intr_remapping_enabled = 1;
400
401 return 0;
402
403error:
404 /*
405 * handle error condition gracefully here!
406 */
407 return -1;
408}
409
410static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
411 struct intel_iommu *iommu)
412{
413 struct acpi_dmar_hardware_unit *drhd;
414 struct acpi_dmar_device_scope *scope;
415 void *start, *end;
416
417 drhd = (struct acpi_dmar_hardware_unit *)header;
418
419 start = (void *)(drhd + 1);
420 end = ((void *)drhd) + header->length;
421
422 while (start < end) {
423 scope = start;
424 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
425 if (ir_ioapic_num == MAX_IO_APICS) {
426 printk(KERN_WARNING "Exceeded Max IO APICS\n");
427 return -1;
428 }
429
430 printk(KERN_INFO "IOAPIC id %d under DRHD base"
431 " 0x%Lx\n", scope->enumeration_id,
432 drhd->address);
433
434 ir_ioapic[ir_ioapic_num].iommu = iommu;
435 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
436 ir_ioapic_num++;
437 }
438 start += scope->length;
439 }
440
441 return 0;
442}
443
444/*
445 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
446 * hardware unit.
447 */
448int __init parse_ioapics_under_ir(void)
449{
450 struct dmar_drhd_unit *drhd;
451 int ir_supported = 0;
452
453 for_each_drhd_unit(drhd) {
454 struct intel_iommu *iommu = drhd->iommu;
455
456 if (ecap_ir_support(iommu->ecap)) {
457 if (ir_parse_ioapic_scope(drhd->hdr, iommu))
458 return -1;
459
460 ir_supported = 1;
461 }
462 }
463
464 if (ir_supported && ir_ioapic_num != nr_ioapics) {
465 printk(KERN_WARNING
466 "Not all IO-APIC's listed under remapping hardware\n");
467 return -1;
468 }
469
470 return ir_supported;
471}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 000000000000..05f2635bbe4e
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
1#include "intel-iommu.h"
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6};
7
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 4e2c1e517f06..fcd2f01277b6 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -7,6 +7,8 @@
7#include <asm/apicdef.h> 7#include <asm/apicdef.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9#include <asm/system.h> 9#include <asm/system.h>
10#include <asm/cpufeature.h>
11#include <asm/msr.h>
10 12
11#define ARCH_APICTIMER_STOPS_ON_C3 1 13#define ARCH_APICTIMER_STOPS_ON_C3 1
12 14
@@ -47,32 +49,75 @@ extern int disable_apic;
47#ifdef CONFIG_PARAVIRT 49#ifdef CONFIG_PARAVIRT
48#include <asm/paravirt.h> 50#include <asm/paravirt.h>
49#else 51#else
50#define apic_write native_apic_write
51#define apic_write_atomic native_apic_write_atomic
52#define apic_read native_apic_read
53#define setup_boot_clock setup_boot_APIC_clock 52#define setup_boot_clock setup_boot_APIC_clock
54#define setup_secondary_clock setup_secondary_APIC_clock 53#define setup_secondary_clock setup_secondary_APIC_clock
55#endif 54#endif
56 55
57extern int is_vsmp_box(void); 56extern int is_vsmp_box(void);
58 57
59static inline void native_apic_write(unsigned long reg, u32 v) 58static inline void native_apic_mem_write(u32 reg, u32 v)
60{ 59{
61 *((volatile u32 *)(APIC_BASE + reg)) = v; 60 *((volatile u32 *)(APIC_BASE + reg)) = v;
62} 61}
63 62
64static inline void native_apic_write_atomic(unsigned long reg, u32 v) 63static inline void native_apic_mem_write_atomic(u32 reg, u32 v)
65{ 64{
66 (void)xchg((u32 *)(APIC_BASE + reg), v); 65 (void)xchg((u32 *)(APIC_BASE + reg), v);
67} 66}
68 67
69static inline u32 native_apic_read(unsigned long reg) 68static inline u32 native_apic_mem_read(u32 reg)
70{ 69{
71 return *((volatile u32 *)(APIC_BASE + reg)); 70 return *((volatile u32 *)(APIC_BASE + reg));
72} 71}
73 72
74extern void apic_wait_icr_idle(void); 73static inline void native_apic_msr_write(u32 reg, u32 v)
75extern u32 safe_apic_wait_icr_idle(void); 74{
75 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
76 reg == APIC_LVR)
77 return;
78
79 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
80}
81
82static inline u32 native_apic_msr_read(u32 reg)
83{
84 u32 low, high;
85
86 if (reg == APIC_DFR)
87 return -1;
88
89 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
90 return low;
91}
92
93#ifndef CONFIG_X86_32
94extern int x2apic, x2apic_preenabled;
95extern void check_x2apic(void);
96extern void enable_x2apic(void);
97extern void enable_IR_x2apic(void);
98extern void x2apic_icr_write(u32 low, u32 id);
99#endif
100
101struct apic_ops {
102 u32 (*read)(u32 reg);
103 void (*write)(u32 reg, u32 v);
104 void (*write_atomic)(u32 reg, u32 v);
105 u64 (*icr_read)(void);
106 void (*icr_write)(u32 low, u32 high);
107 void (*wait_icr_idle)(void);
108 u32 (*safe_wait_icr_idle)(void);
109};
110
111extern struct apic_ops *apic_ops;
112
113#define apic_read (apic_ops->read)
114#define apic_write (apic_ops->write)
115#define apic_write_atomic (apic_ops->write_atomic)
116#define apic_icr_read (apic_ops->icr_read)
117#define apic_icr_write (apic_ops->icr_write)
118#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
119#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
120
76extern int get_physical_broadcast(void); 121extern int get_physical_broadcast(void);
77 122
78#ifdef CONFIG_X86_GOOD_APIC 123#ifdef CONFIG_X86_GOOD_APIC
@@ -85,6 +130,15 @@ extern int get_physical_broadcast(void);
85# define apic_write_around(x, y) apic_write_atomic((x), (y)) 130# define apic_write_around(x, y) apic_write_atomic((x), (y))
86#endif 131#endif
87 132
133#ifdef CONFIG_X86_64
134static inline void ack_x2APIC_irq(void)
135{
136 /* Docs say use 0 for future compatibility */
137 native_apic_msr_write(APIC_EOI, 0);
138}
139#endif
140
141
88static inline void ack_APIC_irq(void) 142static inline void ack_APIC_irq(void)
89{ 143{
90 /* 144 /*
@@ -95,7 +149,11 @@ static inline void ack_APIC_irq(void)
95 */ 149 */
96 150
97 /* Docs say use 0 for future compatibility */ 151 /* Docs say use 0 for future compatibility */
152#ifdef CONFIG_X86_32
98 apic_write_around(APIC_EOI, 0); 153 apic_write_around(APIC_EOI, 0);
154#else
155 native_apic_mem_write(APIC_EOI, 0);
156#endif
99} 157}
100 158
101extern int lapic_get_maxlvt(void); 159extern int lapic_get_maxlvt(void);
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index 6b9008c78731..bcae297b30b2 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -105,6 +105,7 @@
105#define APIC_TMICT 0x380 105#define APIC_TMICT 0x380
106#define APIC_TMCCT 0x390 106#define APIC_TMCCT 0x390
107#define APIC_TDCR 0x3E0 107#define APIC_TDCR 0x3E0
108#define APIC_SELF_IPI 0x3F0
108#define APIC_TDR_DIV_TMBASE (1 << 2) 109#define APIC_TDR_DIV_TMBASE (1 << 2)
109#define APIC_TDR_DIV_1 0xB 110#define APIC_TDR_DIV_1 0xB
110#define APIC_TDR_DIV_2 0x0 111#define APIC_TDR_DIV_2 0x0
@@ -128,6 +129,8 @@
128#define APIC_EILVT3 0x530 129#define APIC_EILVT3 0x530
129 130
130#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 131#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
132#define APIC_BASE_MSR 0x800
133#define X2APIC_ENABLE (1UL << 10)
131 134
132#ifdef CONFIG_X86_32 135#ifdef CONFIG_X86_32
133# define MAX_IO_APICS 64 136# define MAX_IO_APICS 64
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 75ef959db329..5be9510ee012 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -90,6 +90,7 @@
90#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 90#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
91#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 91#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
92#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 92#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
93#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
93 94
94/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 95/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
95#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ 96#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
@@ -188,6 +189,7 @@ extern const char * const x86_power_flags[32];
188#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) 189#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
189#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) 190#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
190#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) 191#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
192#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
191 193
192#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 194#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
193# define cpu_has_invlpg 1 195# define cpu_has_invlpg 1
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 0f8504627c41..2871b3fccb21 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -24,17 +24,24 @@ struct genapic {
24 void (*send_IPI_mask)(cpumask_t mask, int vector); 24 void (*send_IPI_mask)(cpumask_t mask, int vector);
25 void (*send_IPI_allbutself)(int vector); 25 void (*send_IPI_allbutself)(int vector);
26 void (*send_IPI_all)(int vector); 26 void (*send_IPI_all)(int vector);
27 void (*send_IPI_self)(int vector);
27 /* */ 28 /* */
28 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 29 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
29 unsigned int (*phys_pkg_id)(int index_msb); 30 unsigned int (*phys_pkg_id)(int index_msb);
31 unsigned int (*get_apic_id)(unsigned long x);
32 unsigned long (*set_apic_id)(unsigned int id);
33 unsigned long apic_id_mask;
30}; 34};
31 35
32extern struct genapic *genapic; 36extern struct genapic *genapic;
33 37
34extern struct genapic apic_flat; 38extern struct genapic apic_flat;
35extern struct genapic apic_physflat; 39extern struct genapic apic_physflat;
40extern struct genapic apic_x2apic_cluster;
41extern struct genapic apic_x2apic_phys;
36extern int acpi_madt_oem_check(char *, char *); 42extern int acpi_madt_oem_check(char *, char *);
37 43
44extern void apic_send_IPI_self(int vector);
38enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 45enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
39extern enum uv_system_type get_uv_system_type(void); 46extern enum uv_system_type get_uv_system_type(void);
40extern int is_uv_system(void); 47extern int is_uv_system(void);
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 77ba51df5668..ef7a995ee81f 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -73,7 +73,9 @@ extern void enable_IO_APIC(void);
73#endif 73#endif
74 74
75/* IPI functions */ 75/* IPI functions */
76#ifdef CONFIG_X86_32
76extern void send_IPI_self(int vector); 77extern void send_IPI_self(int vector);
78#endif
77extern void send_IPI(int dest, int vector); 79extern void send_IPI(int dest, int vector);
78 80
79/* Statistics */ 81/* Statistics */
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index 2f98df91f1f2..31112b6c595b 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 57
58extern struct irq_chip i8259A_chip; 58extern struct irq_chip i8259A_chip;
59 59
60extern void mask_8259A(void);
61extern void unmask_8259A(void);
62
60#endif /* __ASM_I8259_H__ */ 63#endif /* __ASM_I8259_H__ */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index 14f82bbcb5fd..8dc2622714c8 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
183extern int (*ioapic_renumber_irq)(int ioapic, int irq); 197extern int (*ioapic_renumber_irq)(int ioapic, int irq);
184extern void ioapic_init_mappings(void); 198extern void ioapic_init_mappings(void);
185 199
200#ifdef CONFIG_X86_64
201extern int save_mask_IO_APIC_setup(void);
202extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int);
204#endif
205
186#else /* !CONFIG_X86_IO_APIC */ 206#else /* !CONFIG_X86_IO_APIC */
187#define io_apic_assign_pci_irqs 0 207#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0; 208static const int timer_through_8259 = 0;
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index 196d63c28aa4..3d8d6a6c1f8e 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
49 return SET_APIC_DEST_FIELD(mask); 49 return SET_APIC_DEST_FIELD(mask);
50} 50}
51 51
52static inline void __xapic_wait_icr_idle(void)
53{
54 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
55 cpu_relax();
56}
57
52static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, 58static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
53 unsigned int dest) 59 unsigned int dest)
54{ 60{
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
64 /* 70 /*
65 * Wait for idle. 71 * Wait for idle.
66 */ 72 */
67 apic_wait_icr_idle(); 73 __xapic_wait_icr_idle();
68 74
69 /* 75 /*
70 * No need to touch the target chip field 76 * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
74 /* 80 /*
75 * Send the IPI. The write to APIC_ICR fires this off. 81 * Send the IPI. The write to APIC_ICR fires this off.
76 */ 82 */
77 apic_write(APIC_ICR, cfg); 83 native_apic_mem_write(APIC_ICR, cfg);
78} 84}
79 85
80/* 86/*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
92 if (unlikely(vector == NMI_VECTOR)) 98 if (unlikely(vector == NMI_VECTOR))
93 safe_apic_wait_icr_idle(); 99 safe_apic_wait_icr_idle();
94 else 100 else
95 apic_wait_icr_idle(); 101 __xapic_wait_icr_idle();
96 102
97 /* 103 /*
98 * prepare target chip field 104 * prepare target chip field
99 */ 105 */
100 cfg = __prepare_ICR2(mask); 106 cfg = __prepare_ICR2(mask);
101 apic_write(APIC_ICR2, cfg); 107 native_apic_mem_write(APIC_ICR2, cfg);
102 108
103 /* 109 /*
104 * program the ICR 110 * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
108 /* 114 /*
109 * Send the IPI. The write to APIC_ICR fires this off. 115 * Send the IPI. The write to APIC_ICR fires this off.
110 */ 116 */
111 apic_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
112} 118}
113 119
114static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 000000000000..78242c6ffa58
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index 0b2cde5e1b74..3d2b455581ec 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define phys_pkg_id (genapic->phys_pkg_id) 31#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self)
33extern void setup_apic_routing(void); 35extern void setup_apic_routing(void);
34#else 36#else
35#define INT_DELIVERY_MODE dest_LowestPrio 37#define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
54 56
55static inline int apic_id_registered(void) 57static inline int apic_id_registered(void)
56{ 58{
57 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 59 return physid_isset(read_apic_id(), phys_cpu_present_map);
58} 60}
59 61
60static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index e4b29ba37de6..a55518aa5a2d 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
4#include <asm/apic.h> 4#include <asm/apic.h>
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7#define APIC_ID_MASK (0xFFu<<24) 7#define APIC_ID_MASK (genapic->apic_id_mask)
8#define GET_APIC_ID(x) (((x)>>24)&0xFFu) 8#define GET_APIC_ID(x) (genapic->get_apic_id(x))
9#define SET_APIC_ID(x) (((x)<<24)) 9#define SET_APIC_ID(x) (genapic->set_apic_id(x))
10#else 10#else
11#define APIC_ID_MASK (0xF<<24) 11#define APIC_ID_MASK (0xF<<24)
12static inline unsigned get_apic_id(unsigned long x) 12static inline unsigned get_apic_id(unsigned long x)
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h
index fbc8ad256f5a..b3556ec3bca5 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/mach-es7000/mach_apic.h
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
141extern unsigned int boot_cpu_physical_apicid; 141extern unsigned int boot_cpu_physical_apicid;
142static inline int check_phys_apicid_present(int cpu_physical_apicid) 142static inline int check_phys_apicid_present(int cpu_physical_apicid)
143{ 143{
144 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 144 boot_cpu_physical_apicid = read_apic_id();
145 return (1); 145 return (1);
146} 146}
147 147
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 296f29ce426d..57fd85935e5a 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -48,4 +48,8 @@
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50 50
51#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3)
53#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
54#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
51#endif /* ASM_MSIDEF_H */ 55#endif /* ASM_MSIDEF_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index ef5e8ec6a6ab..08f89e385a92 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -200,13 +200,6 @@ struct pv_irq_ops {
200 200
201struct pv_apic_ops { 201struct pv_apic_ops {
202#ifdef CONFIG_X86_LOCAL_APIC 202#ifdef CONFIG_X86_LOCAL_APIC
203 /*
204 * Direct APIC operations, principally for VMI. Ideally
205 * these shouldn't be in this interface.
206 */
207 void (*apic_write)(unsigned long reg, u32 v);
208 void (*apic_write_atomic)(unsigned long reg, u32 v);
209 u32 (*apic_read)(unsigned long reg);
210 void (*setup_boot_clock)(void); 203 void (*setup_boot_clock)(void);
211 void (*setup_secondary_clock)(void); 204 void (*setup_secondary_clock)(void);
212 205
@@ -888,24 +881,6 @@ static inline void slow_down_io(void)
888} 881}
889 882
890#ifdef CONFIG_X86_LOCAL_APIC 883#ifdef CONFIG_X86_LOCAL_APIC
891/*
892 * Basic functions accessing APICs.
893 */
894static inline void apic_write(unsigned long reg, u32 v)
895{
896 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
897}
898
899static inline void apic_write_atomic(unsigned long reg, u32 v)
900{
901 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
902}
903
904static inline u32 apic_read(unsigned long reg)
905{
906 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
907}
908
909static inline void setup_boot_clock(void) 884static inline void setup_boot_clock(void)
910{ 885{
911 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 886 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index c2784b3e0b77..1896cdb0076a 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -163,30 +163,33 @@ extern int safe_smp_processor_id(void);
163 163
164#ifdef CONFIG_X86_LOCAL_APIC 164#ifdef CONFIG_X86_LOCAL_APIC
165 165
166#ifndef CONFIG_X86_64
166static inline int logical_smp_processor_id(void) 167static inline int logical_smp_processor_id(void)
167{ 168{
168 /* we don't want to mark this access volatile - bad code generation */ 169 /* we don't want to mark this access volatile - bad code generation */
169 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 170 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
170} 171}
171 172
172#ifndef CONFIG_X86_64 173#include <mach_apicdef.h>
173static inline unsigned int read_apic_id(void) 174static inline unsigned int read_apic_id(void)
174{ 175{
175 return *(u32 *)(APIC_BASE + APIC_ID); 176 unsigned int reg;
177
178 reg = *(u32 *)(APIC_BASE + APIC_ID);
179
180 return GET_APIC_ID(reg);
176} 181}
177#else
178extern unsigned int read_apic_id(void);
179#endif 182#endif
180 183
181 184
182# ifdef APIC_DEFINITION 185# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
183extern int hard_smp_processor_id(void); 186extern int hard_smp_processor_id(void);
184# else 187# else
185# include <mach_apicdef.h> 188#include <mach_apicdef.h>
186static inline int hard_smp_processor_id(void) 189static inline int hard_smp_processor_id(void)
187{ 190{
188 /* we don't want to mark this access volatile - bad code generation */ 191 /* we don't want to mark this access volatile - bad code generation */
189 return GET_APIC_ID(read_apic_id()); 192 return read_apic_id();
190} 193}
191# endif /* APIC_DEFINITION */ 194# endif /* APIC_DEFINITION */
192 195
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b847551..c360c558e59e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27 27
28#ifdef CONFIG_DMAR 28#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
29struct intel_iommu; 29struct intel_iommu;
30 30
31struct dmar_drhd_unit {
32 struct list_head list; /* list of drhd units */
33 struct acpi_dmar_header *hdr; /* ACPI header */
34 u64 reg_base_addr; /* register base address*/
35 struct pci_dev **devices; /* target device array */
36 int devices_cnt; /* target device count */
37 u8 ignored:1; /* ignore drhd */
38 u8 include_all:1;
39 struct intel_iommu *iommu;
40};
41
42extern struct list_head dmar_drhd_units;
43
44#define for_each_drhd_unit(drhd) \
45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void);
50
51/* Intel IOMMU detection */
52extern void detect_intel_iommu(void);
53
54
55extern int parse_ioapics_under_ir(void);
56extern int alloc_iommu(struct dmar_drhd_unit *);
57#else
58static inline void detect_intel_iommu(void)
59{
60 return;
61}
62
63static inline int dmar_table_init(void)
64{
65 return -ENODEV;
66}
67#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
68
69#ifdef CONFIG_INTR_REMAP
70extern int intr_remapping_enabled;
71extern int enable_intr_remapping(int);
72
73struct irte {
74 union {
75 struct {
76 __u64 present : 1,
77 fpd : 1,
78 dst_mode : 1,
79 redir_hint : 1,
80 trigger_mode : 1,
81 dlvry_mode : 3,
82 avail : 4,
83 __reserved_1 : 4,
84 vector : 8,
85 __reserved_2 : 8,
86 dest_id : 32;
87 };
88 __u64 low;
89 };
90
91 union {
92 struct {
93 __u64 sid : 16,
94 sq : 2,
95 svt : 2,
96 __reserved_3 : 44;
97 };
98 __u64 high;
99 };
100};
101extern int get_irte(int irq, struct irte *entry);
102extern int modify_irte(int irq, struct irte *irte_modified);
103extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
104extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
105 u16 sub_handle);
106extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
107extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
108extern int flush_irte(int irq);
109extern int free_irte(int irq);
110
111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
113extern struct intel_iommu *map_ioapic_to_ir(int apic);
114#else
115#define irq_remapped(irq) (0)
116#define enable_intr_remapping(mode) (-1)
117#define intr_remapping_enabled (0)
118#endif
119
120#ifdef CONFIG_DMAR
31extern const char *dmar_get_fault_reason(u8 fault_reason); 121extern const char *dmar_get_fault_reason(u8 fault_reason);
32 122
33/* Can't use the common MSI interrupt functions 123/* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
40extern int dmar_set_interrupt(struct intel_iommu *iommu); 130extern int dmar_set_interrupt(struct intel_iommu *iommu);
41extern int arch_setup_dmar_msi(unsigned int irq); 131extern int arch_setup_dmar_msi(unsigned int irq);
42 132
43/* Intel IOMMU detection and initialization functions */ 133extern int iommu_detected, no_iommu;
44extern void detect_intel_iommu(void);
45extern int intel_iommu_init(void);
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49
50extern struct list_head dmar_drhd_units;
51extern struct list_head dmar_rmrr_units; 134extern struct list_head dmar_rmrr_units;
52
53struct dmar_drhd_unit {
54 struct list_head list; /* list of drhd units */
55 u64 reg_base_addr; /* register base address*/
56 struct pci_dev **devices; /* target device array */
57 int devices_cnt; /* target device count */
58 u8 ignored:1; /* ignore drhd */
59 u8 include_all:1;
60 struct intel_iommu *iommu;
61};
62
63struct dmar_rmrr_unit { 135struct dmar_rmrr_unit {
64 struct list_head list; /* list of rmrr units */ 136 struct list_head list; /* list of rmrr units */
137 struct acpi_dmar_header *hdr; /* ACPI header */
65 u64 base_address; /* reserved base address*/ 138 u64 base_address; /* reserved base address*/
66 u64 end_address; /* reserved end address */ 139 u64 end_address; /* reserved end address */
67 struct pci_dev **devices; /* target devices */ 140 struct pci_dev **devices; /* target devices */
68 int devices_cnt; /* target device count */ 141 int devices_cnt; /* target device count */
69}; 142};
70 143
71#define for_each_drhd_unit(drhd) \
72 list_for_each_entry(drhd, &dmar_drhd_units, list)
73#define for_each_rmrr_units(rmrr) \ 144#define for_each_rmrr_units(rmrr) \
74 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
146/* Intel DMAR initialization functions */
147extern int intel_iommu_init(void);
148extern int dmar_disabled;
75#else 149#else
76static inline void detect_intel_iommu(void)
77{
78 return;
79}
80static inline int intel_iommu_init(void) 150static inline int intel_iommu_init(void)
81{ 151{
152#ifdef CONFIG_INTR_REMAP
153 return dmar_dev_scope_init();
154#else
82 return -ENODEV; 155 return -ENODEV;
156#endif
83} 157}
84
85#endif /* !CONFIG_DMAR */ 158#endif /* !CONFIG_DMAR */
86#endif /* __DMAR_H__ */ 159#endif /* __DMAR_H__ */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..8d9411bc60f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
65#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
65 66
66#ifdef CONFIG_IRQ_PER_CPU 67#ifdef CONFIG_IRQ_PER_CPU
67# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 68# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be36010..909b2231fa93 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
89 set_balance_irq_affinity(irq, cpumask); 89 set_balance_irq_affinity(irq, cpumask);
90 90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 91#ifdef CONFIG_GENERIC_PENDING_IRQ
92 set_pending_irq(irq, cpumask); 92 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags;
94
95 spin_lock_irqsave(&desc->lock, flags);
96 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags);
98 } else
99 set_pending_irq(irq, cpumask);
93#else 100#else
94 desc->affinity = cpumask; 101 desc->affinity = cpumask;
95 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);