aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-26 10:29:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-26 10:29:23 -0400
commit6dec3a10a7a6093af10cef7ac56021150afd6451 (patch)
treef4d8511f023e3e0c203baf889d4a0f3925882381 /arch
parent29308333fbe2cc61258c1c470f9403960428beb2 (diff)
parent10a010f6953b5a14ba2f0be40a4fce1bea220875 (diff)
Merge branch 'x86/x2apic' into x86/core
Conflicts: include/asm-x86/i8259.h include/asm-x86/msidef.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c41
-rw-r--r--arch/x86/kernel/apic_64.c237
-rw-r--r--arch/x86/kernel/cpu/common_64.c2
-rw-r--r--arch/x86/kernel/cpu/feature_names.c2
-rw-r--r--arch/x86/kernel/genapic_64.c83
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c5
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/mach-generic/bigsmp.c5
-rw-r--r--arch/x86/mach-generic/es7000.c3
-rw-r--r--arch/x86/mach-generic/numaq.c4
-rw-r--r--arch/x86/mach-generic/summit.c5
-rw-r--r--arch/x86/xen/enlighten.c45
26 files changed, 1462 insertions, 156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3cba0b45600..4d621eb90bbf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1652,6 +1652,14 @@ config DMAR_FLOPPY_WA
1652 workaround will setup a 1:1 mapping for the first 1652 workaround will setup a 1:1 mapping for the first
1653 16M to make floppy (an ISA device) work. 1653 16M to make floppy (an ISA device) work.
1654 1654
1655config INTR_REMAP
1656 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1657 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1658 help
1659 Supports Interrupt remapping for IO-APIC and MSI devices.
1660 To use x2apic mode in the CPU's which support x2APIC enhancements or
1661 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1662
1655source "drivers/pci/pcie/Kconfig" 1663source "drivers/pci/pcie/Kconfig"
1656 1664
1657source "drivers/pci/Kconfig" 1665source "drivers/pci/Kconfig"
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..a07ec14f3312 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 104ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 106 obj-y += bios_uv.o
107 obj-y += genx2apic_cluster.o
108 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 109 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 110 obj-$(CONFIG_AUDIT) += audit_64.o
109 111
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d71290..12e260e8fb2a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
761 761
762 set_fixmap_nocache(FIX_APIC_BASE, address); 762 set_fixmap_nocache(FIX_APIC_BASE, address);
763 if (boot_cpu_physical_apicid == -1U) { 763 if (boot_cpu_physical_apicid == -1U) {
764 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 764 boot_cpu_physical_apicid = read_apic_id();
765#ifdef CONFIG_X86_32 765#ifdef CONFIG_X86_32
766 apic_version[boot_cpu_physical_apicid] = 766 apic_version[boot_cpu_physical_apicid] =
767 GET_APIC_VERSION(apic_read(APIC_LVR)); 767 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
1337 acpi_ioapic = 1; 1337 acpi_ioapic = 1;
1338 1338
1339 smp_found_config = 1; 1339 smp_found_config = 1;
1340#ifdef CONFIG_X86_32
1340 setup_apic_routing(); 1341 setup_apic_routing();
1342#endif
1341 } 1343 }
1342 } 1344 }
1343 if (error == -EINVAL) { 1345 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index d6c898358371..f93c18f5b79d 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 145 return lapic_get_version() >= 0x14;
146} 146}
147 147
148void apic_wait_icr_idle(void) 148/*
149 * Paravirt kernels also might be using these below ops. So we still
150 * use generic apic_read()/apic_write(), which might be pointing to different
151 * ops in PARAVIRT case.
152 */
153void xapic_wait_icr_idle(void)
149{ 154{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 155 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 156 cpu_relax();
152} 157}
153 158
154u32 safe_apic_wait_icr_idle(void) 159u32 safe_xapic_wait_icr_idle(void)
155{ 160{
156 u32 send_status; 161 u32 send_status;
157 int timeout; 162 int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 172 return send_status;
168} 173}
169 174
175void xapic_icr_write(u32 low, u32 id)
176{
177 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
178 apic_write(APIC_ICR, low);
179}
180
181u64 xapic_icr_read(void)
182{
183 u32 icr1, icr2;
184
185 icr2 = apic_read(APIC_ICR2);
186 icr1 = apic_read(APIC_ICR);
187
188 return icr1 | ((u64)icr2 << 32);
189}
190
191static struct apic_ops xapic_ops = {
192 .read = native_apic_mem_read,
193 .write = native_apic_mem_write,
194 .icr_read = xapic_icr_read,
195 .icr_write = xapic_icr_write,
196 .wait_icr_idle = xapic_wait_icr_idle,
197 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
198};
199
200struct apic_ops __read_mostly *apic_ops = &xapic_ops;
201EXPORT_SYMBOL_GPL(apic_ops);
202
170/** 203/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 204 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 205 */
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1238 * default configuration (or the MP table is broken).
1206 */ 1239 */
1207 if (boot_cpu_physical_apicid == -1U) 1240 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1241 boot_cpu_physical_apicid = read_apic_id();
1209 1242
1210} 1243}
1211 1244
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1275 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1276 */
1244#ifdef CONFIG_CRASH_DUMP 1277#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1278 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1279#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1280 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1281
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 7f1f030da7ee..cd63c0bc6180 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,6 +40,7 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
@@ -46,6 +48,11 @@
46static int disable_apic_timer __cpuinitdata; 48static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 49static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 50int disable_apic;
51int disable_x2apic;
52int x2apic;
53
54/* x2apic enabled before OS handover */
55int x2apic_preenabled;
49 56
50/* Local APIC timer works in C2 */ 57/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 58int local_apic_timer_c2_ok;
@@ -119,13 +126,13 @@ static int modern_apic(void)
119 return lapic_get_version() >= 0x14; 126 return lapic_get_version() >= 0x14;
120} 127}
121 128
122void apic_wait_icr_idle(void) 129void xapic_wait_icr_idle(void)
123{ 130{
124 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 131 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
125 cpu_relax(); 132 cpu_relax();
126} 133}
127 134
128u32 safe_apic_wait_icr_idle(void) 135u32 safe_xapic_wait_icr_idle(void)
129{ 136{
130 u32 send_status; 137 u32 send_status;
131 int timeout; 138 int timeout;
@@ -141,6 +148,69 @@ u32 safe_apic_wait_icr_idle(void)
141 return send_status; 148 return send_status;
142} 149}
143 150
151void xapic_icr_write(u32 low, u32 id)
152{
153 apic_write(APIC_ICR2, id << 24);
154 apic_write(APIC_ICR, low);
155}
156
157u64 xapic_icr_read(void)
158{
159 u32 icr1, icr2;
160
161 icr2 = apic_read(APIC_ICR2);
162 icr1 = apic_read(APIC_ICR);
163
164 return (icr1 | ((u64)icr2 << 32));
165}
166
167static struct apic_ops xapic_ops = {
168 .read = native_apic_mem_read,
169 .write = native_apic_mem_write,
170 .icr_read = xapic_icr_read,
171 .icr_write = xapic_icr_write,
172 .wait_icr_idle = xapic_wait_icr_idle,
173 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
174};
175
176struct apic_ops __read_mostly *apic_ops = &xapic_ops;
177
178EXPORT_SYMBOL_GPL(apic_ops);
179
180static void x2apic_wait_icr_idle(void)
181{
182 /* no need to wait for icr idle in x2apic */
183 return;
184}
185
186static u32 safe_x2apic_wait_icr_idle(void)
187{
188 /* no need to wait for icr idle in x2apic */
189 return 0;
190}
191
192void x2apic_icr_write(u32 low, u32 id)
193{
194 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
195}
196
197u64 x2apic_icr_read(void)
198{
199 unsigned long val;
200
201 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
202 return val;
203}
204
205static struct apic_ops x2apic_ops = {
206 .read = native_apic_msr_read,
207 .write = native_apic_msr_write,
208 .icr_read = x2apic_icr_read,
209 .icr_write = x2apic_icr_write,
210 .wait_icr_idle = x2apic_wait_icr_idle,
211 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
212};
213
144/** 214/**
145 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 215 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
146 */ 216 */
@@ -630,10 +700,10 @@ int __init verify_local_APIC(void)
630 /* 700 /*
631 * The ID register is read/write in a real APIC. 701 * The ID register is read/write in a real APIC.
632 */ 702 */
633 reg0 = read_apic_id(); 703 reg0 = apic_read(APIC_ID);
634 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 704 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
635 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 705 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
636 reg1 = read_apic_id(); 706 reg1 = apic_read(APIC_ID);
637 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 707 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
638 apic_write(APIC_ID, reg0); 708 apic_write(APIC_ID, reg0);
639 if (reg1 != (reg0 ^ APIC_ID_MASK)) 709 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -834,6 +904,125 @@ void __cpuinit end_local_APIC_setup(void)
834 apic_pm_activate(); 904 apic_pm_activate();
835} 905}
836 906
907void check_x2apic(void)
908{
909 int msr, msr2;
910
911 rdmsr(MSR_IA32_APICBASE, msr, msr2);
912
913 if (msr & X2APIC_ENABLE) {
914 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
915 x2apic_preenabled = x2apic = 1;
916 apic_ops = &x2apic_ops;
917 }
918}
919
920void enable_x2apic(void)
921{
922 int msr, msr2;
923
924 rdmsr(MSR_IA32_APICBASE, msr, msr2);
925 if (!(msr & X2APIC_ENABLE)) {
926 printk("Enabling x2apic\n");
927 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
928 }
929}
930
931void enable_IR_x2apic(void)
932{
933#ifdef CONFIG_INTR_REMAP
934 int ret;
935 unsigned long flags;
936
937 if (!cpu_has_x2apic)
938 return;
939
940 if (!x2apic_preenabled && disable_x2apic) {
941 printk(KERN_INFO
942 "Skipped enabling x2apic and Interrupt-remapping "
943 "because of nox2apic\n");
944 return;
945 }
946
947 if (x2apic_preenabled && disable_x2apic)
948 panic("Bios already enabled x2apic, can't enforce nox2apic");
949
950 if (!x2apic_preenabled && skip_ioapic_setup) {
951 printk(KERN_INFO
952 "Skipped enabling x2apic and Interrupt-remapping "
953 "because of skipping io-apic setup\n");
954 return;
955 }
956
957 ret = dmar_table_init();
958 if (ret) {
959 printk(KERN_INFO
960 "dmar_table_init() failed with %d:\n", ret);
961
962 if (x2apic_preenabled)
963 panic("x2apic enabled by bios. But IR enabling failed");
964 else
965 printk(KERN_INFO
966 "Not enabling x2apic,Intr-remapping\n");
967 return;
968 }
969
970 local_irq_save(flags);
971 mask_8259A();
972 save_mask_IO_APIC_setup();
973
974 ret = enable_intr_remapping(1);
975
976 if (ret && x2apic_preenabled) {
977 local_irq_restore(flags);
978 panic("x2apic enabled by bios. But IR enabling failed");
979 }
980
981 if (ret)
982 goto end;
983
984 if (!x2apic) {
985 x2apic = 1;
986 apic_ops = &x2apic_ops;
987 enable_x2apic();
988 }
989end:
990 if (ret)
991 /*
992 * IR enabling failed
993 */
994 restore_IO_APIC_setup();
995 else
996 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
997
998 unmask_8259A();
999 local_irq_restore(flags);
1000
1001 if (!ret) {
1002 if (!x2apic_preenabled)
1003 printk(KERN_INFO
1004 "Enabled x2apic and interrupt-remapping\n");
1005 else
1006 printk(KERN_INFO
1007 "Enabled Interrupt-remapping\n");
1008 } else
1009 printk(KERN_ERR
1010 "Failed to enable Interrupt-remapping and x2apic\n");
1011#else
1012 if (!cpu_has_x2apic)
1013 return;
1014
1015 if (x2apic_preenabled)
1016 panic("x2apic enabled prior OS handover,"
1017 " enable CONFIG_INTR_REMAP");
1018
1019 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1020 " and x2apic\n");
1021#endif
1022
1023 return;
1024}
1025
837/* 1026/*
838 * Detect and enable local APICs on non-SMP boards. 1027 * Detect and enable local APICs on non-SMP boards.
839 * Original code written by Keir Fraser. 1028 * Original code written by Keir Fraser.
@@ -873,7 +1062,7 @@ void __init early_init_lapic_mapping(void)
873 * Fetch the APIC ID of the BSP in case we have a 1062 * Fetch the APIC ID of the BSP in case we have a
874 * default configuration (or the MP table is broken). 1063 * default configuration (or the MP table is broken).
875 */ 1064 */
876 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1065 boot_cpu_physical_apicid = read_apic_id();
877} 1066}
878 1067
879/** 1068/**
@@ -881,6 +1070,11 @@ void __init early_init_lapic_mapping(void)
881 */ 1070 */
882void __init init_apic_mappings(void) 1071void __init init_apic_mappings(void)
883{ 1072{
1073 if (x2apic) {
1074 boot_cpu_physical_apicid = read_apic_id();
1075 return;
1076 }
1077
884 /* 1078 /*
885 * If no local APIC can be found then set up a fake all 1079 * If no local APIC can be found then set up a fake all
886 * zeroes page to simulate the local APIC and another 1080 * zeroes page to simulate the local APIC and another
@@ -900,7 +1094,7 @@ void __init init_apic_mappings(void)
900 * Fetch the APIC ID of the BSP in case we have a 1094 * Fetch the APIC ID of the BSP in case we have a
901 * default configuration (or the MP table is broken). 1095 * default configuration (or the MP table is broken).
902 */ 1096 */
903 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1097 boot_cpu_physical_apicid = read_apic_id();
904} 1098}
905 1099
906/* 1100/*
@@ -919,6 +1113,9 @@ int __init APIC_init_uniprocessor(void)
919 return -1; 1113 return -1;
920 } 1114 }
921 1115
1116 enable_IR_x2apic();
1117 setup_apic_routing();
1118
922 verify_local_APIC(); 1119 verify_local_APIC();
923 1120
924 connect_bsp_APIC(); 1121 connect_bsp_APIC();
@@ -1100,6 +1297,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1100 cpu_set(cpu, cpu_present_map); 1297 cpu_set(cpu, cpu_present_map);
1101} 1298}
1102 1299
1300int hard_smp_processor_id(void)
1301{
1302 return read_apic_id();
1303}
1304
1103/* 1305/*
1104 * Power management 1306 * Power management
1105 */ 1307 */
@@ -1136,7 +1338,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1136 1338
1137 maxlvt = lapic_get_maxlvt(); 1339 maxlvt = lapic_get_maxlvt();
1138 1340
1139 apic_pm_state.apic_id = read_apic_id(); 1341 apic_pm_state.apic_id = apic_read(APIC_ID);
1140 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1342 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1141 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1343 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1142 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1344 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1171,10 +1373,14 @@ static int lapic_resume(struct sys_device *dev)
1171 maxlvt = lapic_get_maxlvt(); 1373 maxlvt = lapic_get_maxlvt();
1172 1374
1173 local_irq_save(flags); 1375 local_irq_save(flags);
1174 rdmsr(MSR_IA32_APICBASE, l, h); 1376 if (!x2apic) {
1175 l &= ~MSR_IA32_APICBASE_BASE; 1377 rdmsr(MSR_IA32_APICBASE, l, h);
1176 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1378 l &= ~MSR_IA32_APICBASE_BASE;
1177 wrmsr(MSR_IA32_APICBASE, l, h); 1379 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1380 wrmsr(MSR_IA32_APICBASE, l, h);
1381 } else
1382 enable_x2apic();
1383
1178 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1384 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1179 apic_write(APIC_ID, apic_pm_state.apic_id); 1385 apic_write(APIC_ID, apic_pm_state.apic_id);
1180 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1386 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1314,6 +1520,15 @@ __cpuinit int apic_is_clustered_box(void)
1314 return (clusters > 2); 1520 return (clusters > 2);
1315} 1521}
1316 1522
1523static __init int setup_nox2apic(char *str)
1524{
1525 disable_x2apic = 1;
1526 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1527 return 0;
1528}
1529early_param("nox2apic", setup_nox2apic);
1530
1531
1317/* 1532/*
1318 * APIC command line parameters 1533 * APIC command line parameters
1319 */ 1534 */
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017e..6f9b8924bdc0 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -597,6 +597,8 @@ void __cpuinit cpu_init(void)
597 barrier(); 597 barrier();
598 598
599 check_efer(); 599 check_efer();
600 if (cpu != 0 && x2apic)
601 enable_x2apic();
600 602
601 /* 603 /*
602 * set up and load the per-CPU TSS 604 * set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..0bf4d37a0483 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
45 /* Intel-defined (#2) */ 45 /* Intel-defined (#2) */
46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
48 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", 48 NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50 50
51 /* VIA/Cyrix/Centaur-defined */ 51 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 1fa8be5bd217..b3ba969c50d2 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -21,81 +21,52 @@
21#include <asm/ipi.h> 21#include <asm/ipi.h>
22#include <asm/genapic.h> 22#include <asm/genapic.h>
23 23
24#ifdef CONFIG_ACPI 24extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 25extern struct genapic apic_physflat;
26#endif 26extern struct genapic apic_x2xpic_uv_x;
27 27extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 28extern struct genapic apic_x2apic_cluster;
29 29
30struct genapic __read_mostly *genapic = &apic_flat; 30struct genapic __read_mostly *genapic = &apic_flat;
31 31
32static enum uv_system_type uv_system_type; 32static struct genapic *apic_probe[] __initdata = {
33 &apic_x2apic_uv_x,
34 &apic_x2apic_phys,
35 &apic_x2apic_cluster,
36 &apic_physflat,
37 NULL,
38};
33 39
34/* 40/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 41 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 42 */
37void __init setup_apic_routing(void) 43void __init setup_apic_routing(void)
38{ 44{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 45 if (genapic == &apic_flat) {
40 genapic = &apic_x2apic_uv_x; 46 if (max_physical_apicid >= 8)
41 else 47 genapic = &apic_physflat;
42#ifdef CONFIG_ACPI 48 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
43 /* 49 }
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
60} 50}
61 51
62/* Same for both flat and physical. */ 52/* Same for both flat and physical. */
63 53
64void send_IPI_self(int vector) 54void apic_send_IPI_self(int vector)
65{ 55{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 56 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 57}
68 58
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 59int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 60{
71 if (!strcmp(oem_id, "SGI")) { 61 int i;
72 if (!strcmp(oem_table_id, "UVL")) 62
73 uv_system_type = UV_LEGACY_APIC; 63 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 64 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 65 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 66 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 67 genapic->name);
68 return 1;
69 }
78 } 70 }
79 return 0; 71 return 0;
80} 72}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..ef3f3182d50a
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic && intr_remapping_enabled)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int x2apic_read_id(void)
124{
125 return apic_read(APIC_ID);
126}
127
128static unsigned int phys_pkg_id(int index_msb)
129{
130 return x2apic_read_id() >> index_msb;
131}
132
133static void x2apic_send_IPI_self(int vector)
134{
135 apic_write(APIC_SELF_IPI, vector);
136}
137
138static void init_x2apic_ldr(void)
139{
140 int cpu = smp_processor_id();
141
142 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
143 return;
144}
145
146struct genapic apic_x2apic_cluster = {
147 .name = "cluster x2apic",
148 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
149 .int_delivery_mode = dest_LowestPrio,
150 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
151 .target_cpus = x2apic_target_cpus,
152 .vector_allocation_domain = x2apic_vector_allocation_domain,
153 .apic_id_registered = x2apic_apic_id_registered,
154 .init_apic_ldr = init_x2apic_ldr,
155 .send_IPI_all = x2apic_send_IPI_all,
156 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
157 .send_IPI_mask = x2apic_send_IPI_mask,
158 .send_IPI_self = x2apic_send_IPI_self,
159 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
160 .phys_pkg_id = phys_pkg_id,
161 .get_apic_id = get_apic_id,
162 .set_apic_id = set_apic_id,
163 .apic_id_mask = (0xFFFFFFFFu),
164};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..3229c68aedd4
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && intr_remapping_enabled && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int x2apic_read_id(void)
122{
123 return apic_read(APIC_ID);
124}
125
126static unsigned int phys_pkg_id(int index_msb)
127{
128 return x2apic_read_id() >> index_msb;
129}
130
131void x2apic_send_IPI_self(int vector)
132{
133 apic_write(APIC_SELF_IPI, vector);
134}
135
136void init_x2apic_ldr(void)
137{
138 return;
139}
140
141struct genapic apic_x2apic_phys = {
142 .name = "physical x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_Fixed,
145 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888a..3fe472223a99 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,35 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 58DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 59EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 60
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
123 return 1; 152 return 1;
124} 153}
125 154
155static void uv_init_apic_ldr(void)
156{
157}
158
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 159static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 160{
128 int cpu; 161 int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 171 return BAD_APICID;
139} 172}
140 173
174static unsigned int get_apic_id(unsigned long x)
175{
176 unsigned int id;
177
178 WARN_ON(preemptible() && num_online_cpus() > 1);
179 id = x | __get_cpu_var(x2apic_extra_bits);
180
181 return id;
182}
183
184static unsigned long set_apic_id(unsigned int id)
185{
186 unsigned long x;
187
188 /* maskout x2apic_extra_bits ? */
189 x = id;
190 return x;
191}
192
193static unsigned int uv_read_apic_id(void)
194{
195
196 return get_apic_id(apic_read(APIC_ID));
197}
198
141static unsigned int phys_pkg_id(int index_msb) 199static unsigned int phys_pkg_id(int index_msb)
142{ 200{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 201 return uv_read_apic_id() >> index_msb;
144} 202}
145 203
146#ifdef ZZZ /* Needs x2apic patch */ 204#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
152 210
153struct genapic apic_x2apic_uv_x = { 211struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 212 .name = "UV large system",
213 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 214 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 215 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 216 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 217 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 218 .apic_id_registered = uv_apic_id_registered,
219 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 220 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 221 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 222 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 223 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 224 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 225 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
226 .get_apic_id = get_apic_id,
227 .set_apic_id = set_apic_id,
228 .apic_id_mask = (0xFFFFFFFFu),
166}; 229};
167 230
168static __cpuinit void set_x2apic_extra_bits(int pnode) 231static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -399,3 +462,5 @@ void __cpuinit uv_cpu_init(void)
399 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 462 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
400 set_x2apic_extra_bits(uv_hub_info->pnode); 463 set_x2apic_extra_bits(uv_hub_info->pnode);
401} 464}
465
466
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index de9aa0e3a9c5..98e4db5373f3 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1494,7 +1494,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1494 smp_processor_id(), hard_smp_processor_id()); 1494 smp_processor_id(), hard_smp_processor_id());
1495 v = apic_read(APIC_ID); 1495 v = apic_read(APIC_ID);
1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1496 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1497 GET_APIC_ID(read_apic_id())); 1497 GET_APIC_ID(v));
1498 v = apic_read(APIC_LVR); 1498 v = apic_read(APIC_LVR);
1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1499 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1500 ver = GET_APIC_VERSION(v); 1500 ver = GET_APIC_VERSION(v);
@@ -1702,8 +1702,7 @@ void disable_IO_APIC(void)
1702 entry.dest_mode = 0; /* Physical */ 1702 entry.dest_mode = 0; /* Physical */
1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1703 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1704 entry.vector = 0; 1704 entry.vector = 0;
1705 entry.dest.physical.physical_dest = 1705 entry.dest.physical.physical_dest = read_apic_id();
1706 GET_APIC_ID(read_apic_id());
1707 1706
1708 /* 1707 /*
1709 * Add it to the IO-APIC irq-routing table: 1708 * Add it to the IO-APIC irq-routing table:
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 8269434d1707..b9950dae59b7 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,6 +50,7 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
@@ -108,6 +110,9 @@ DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -834,18 +907,98 @@ void setup_vector_irq(int cpu)
834 907
835 908
836static struct irq_chip ioapic_chip; 909static struct irq_chip ioapic_chip;
910#ifdef CONFIG_INTR_REMAP
911static struct irq_chip ir_ioapic_chip;
912#endif
837 913
838static void ioapic_register_intr(int irq, unsigned long trigger) 914static void ioapic_register_intr(int irq, unsigned long trigger)
839{ 915{
840 if (trigger) { 916 if (trigger)
841 irq_desc[irq].status |= IRQ_LEVEL; 917 irq_desc[irq].status |= IRQ_LEVEL;
842 set_irq_chip_and_handler_name(irq, &ioapic_chip, 918 else
843 handle_fasteoi_irq, "fasteoi");
844 } else {
845 irq_desc[irq].status &= ~IRQ_LEVEL; 919 irq_desc[irq].status &= ~IRQ_LEVEL;
920
921#ifdef CONFIG_INTR_REMAP
922 if (irq_remapped(irq)) {
923 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
924 if (trigger)
925 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
926 handle_fasteoi_irq,
927 "fasteoi");
928 else
929 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
930 handle_edge_irq, "edge");
931 return;
932 }
933#endif
934 if (trigger)
935 set_irq_chip_and_handler_name(irq, &ioapic_chip,
936 handle_fasteoi_irq,
937 "fasteoi");
938 else
846 set_irq_chip_and_handler_name(irq, &ioapic_chip, 939 set_irq_chip_and_handler_name(irq, &ioapic_chip,
847 handle_edge_irq, "edge"); 940 handle_edge_irq, "edge");
941}
942
943static int setup_ioapic_entry(int apic, int irq,
944 struct IO_APIC_route_entry *entry,
945 unsigned int destination, int trigger,
946 int polarity, int vector)
947{
948 /*
949 * add it to the IO-APIC irq-routing table:
950 */
951 memset(entry,0,sizeof(*entry));
952
953#ifdef CONFIG_INTR_REMAP
954 if (intr_remapping_enabled) {
955 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
956 struct irte irte;
957 struct IR_IO_APIC_route_entry *ir_entry =
958 (struct IR_IO_APIC_route_entry *) entry;
959 int index;
960
961 if (!iommu)
962 panic("No mapping iommu for ioapic %d\n", apic);
963
964 index = alloc_irte(iommu, irq, 1);
965 if (index < 0)
966 panic("Failed to allocate IRTE for ioapic %d\n", apic);
967
968 memset(&irte, 0, sizeof(irte));
969
970 irte.present = 1;
971 irte.dst_mode = INT_DEST_MODE;
972 irte.trigger_mode = trigger;
973 irte.dlvry_mode = INT_DELIVERY_MODE;
974 irte.vector = vector;
975 irte.dest_id = IRTE_DEST(destination);
976
977 modify_irte(irq, &irte);
978
979 ir_entry->index2 = (index >> 15) & 0x1;
980 ir_entry->zero = 0;
981 ir_entry->format = 1;
982 ir_entry->index = (index & 0x7fff);
983 } else
984#endif
985 {
986 entry->delivery_mode = INT_DELIVERY_MODE;
987 entry->dest_mode = INT_DEST_MODE;
988 entry->dest = destination;
848 } 989 }
990
991 entry->mask = 0; /* enable IRQ */
992 entry->trigger = trigger;
993 entry->polarity = polarity;
994 entry->vector = vector;
995
996 /* Mask level triggered irqs.
997 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
998 */
999 if (trigger)
1000 entry->mask = 1;
1001 return 0;
849} 1002}
850 1003
851static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1004static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -870,24 +1023,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
870 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1023 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
871 irq, trigger, polarity); 1024 irq, trigger, polarity);
872 1025
873 /*
874 * add it to the IO-APIC irq-routing table:
875 */
876 memset(&entry,0,sizeof(entry));
877 1026
878 entry.delivery_mode = INT_DELIVERY_MODE; 1027 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
879 entry.dest_mode = INT_DEST_MODE; 1028 cpu_mask_to_apicid(mask), trigger, polarity,
880 entry.dest = cpu_mask_to_apicid(mask); 1029 cfg->vector)) {
881 entry.mask = 0; /* enable IRQ */ 1030 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
882 entry.trigger = trigger; 1031 mp_ioapics[apic].mp_apicid, pin);
883 entry.polarity = polarity; 1032 __clear_irq_vector(irq);
884 entry.vector = cfg->vector; 1033 return;
885 1034 }
886 /* Mask level triggered irqs.
887 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
888 */
889 if (trigger)
890 entry.mask = 1;
891 1035
892 ioapic_register_intr(irq, trigger); 1036 ioapic_register_intr(irq, trigger);
893 if (irq < 16) 1037 if (irq < 16)
@@ -939,6 +1083,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
939{ 1083{
940 struct IO_APIC_route_entry entry; 1084 struct IO_APIC_route_entry entry;
941 1085
1086 if (intr_remapping_enabled)
1087 return;
1088
942 memset(&entry, 0, sizeof(entry)); 1089 memset(&entry, 0, sizeof(entry));
943 1090
944 /* 1091 /*
@@ -1085,6 +1232,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1085void __apicdebuginit print_local_APIC(void * dummy) 1232void __apicdebuginit print_local_APIC(void * dummy)
1086{ 1233{
1087 unsigned int v, ver, maxlvt; 1234 unsigned int v, ver, maxlvt;
1235 unsigned long icr;
1088 1236
1089 if (apic_verbosity == APIC_QUIET) 1237 if (apic_verbosity == APIC_QUIET)
1090 return; 1238 return;
@@ -1092,7 +1240,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1092 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1240 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1093 smp_processor_id(), hard_smp_processor_id()); 1241 smp_processor_id(), hard_smp_processor_id());
1094 v = apic_read(APIC_ID); 1242 v = apic_read(APIC_ID);
1095 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1243 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1096 v = apic_read(APIC_LVR); 1244 v = apic_read(APIC_LVR);
1097 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1245 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1098 ver = GET_APIC_VERSION(v); 1246 ver = GET_APIC_VERSION(v);
@@ -1128,10 +1276,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1128 v = apic_read(APIC_ESR); 1276 v = apic_read(APIC_ESR);
1129 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1277 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1130 1278
1131 v = apic_read(APIC_ICR); 1279 icr = apic_icr_read();
1132 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1280 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1133 v = apic_read(APIC_ICR2); 1281 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1134 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_LVTT); 1283 v = apic_read(APIC_LVTT);
1137 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1286,7 +1433,7 @@ void disable_IO_APIC(void)
1286 entry.dest_mode = 0; /* Physical */ 1433 entry.dest_mode = 0; /* Physical */
1287 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1434 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1288 entry.vector = 0; 1435 entry.vector = 0;
1289 entry.dest = GET_APIC_ID(read_apic_id()); 1436 entry.dest = read_apic_id();
1290 1437
1291 /* 1438 /*
1292 * Add it to the IO-APIC irq-routing table: 1439 * Add it to the IO-APIC irq-routing table:
@@ -1392,6 +1539,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1392 */ 1539 */
1393 1540
1394#ifdef CONFIG_SMP 1541#ifdef CONFIG_SMP
1542
1543#ifdef CONFIG_INTR_REMAP
1544static void ir_irq_migration(struct work_struct *work);
1545
1546static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1547
1548/*
1549 * Migrate the IO-APIC irq in the presence of intr-remapping.
1550 *
1551 * For edge triggered, irq migration is a simple atomic update(of vector
1552 * and cpu destination) of IRTE and flush the hardware cache.
1553 *
1554 * For level triggered, we need to modify the io-apic RTE aswell with the update
1555 * vector information, along with modifying IRTE with vector and destination.
1556 * So irq migration for level triggered is little bit more complex compared to
1557 * edge triggered migration. But the good news is, we use the same algorithm
1558 * for level triggered migration as we have today, only difference being,
1559 * we now initiate the irq migration from process context instead of the
1560 * interrupt context.
1561 *
1562 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1563 * suppression) to the IO-APIC, level triggered irq migration will also be
1564 * as simple as edge triggered migration and we can do the irq migration
1565 * with a simple atomic update to IO-APIC RTE.
1566 */
1567static void migrate_ioapic_irq(int irq, cpumask_t mask)
1568{
1569 struct irq_cfg *cfg = irq_cfg + irq;
1570 struct irq_desc *desc = irq_desc + irq;
1571 cpumask_t tmp, cleanup_mask;
1572 struct irte irte;
1573 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1574 unsigned int dest;
1575 unsigned long flags;
1576
1577 cpus_and(tmp, mask, cpu_online_map);
1578 if (cpus_empty(tmp))
1579 return;
1580
1581 if (get_irte(irq, &irte))
1582 return;
1583
1584 if (assign_irq_vector(irq, mask))
1585 return;
1586
1587 cpus_and(tmp, cfg->domain, mask);
1588 dest = cpu_mask_to_apicid(tmp);
1589
1590 if (modify_ioapic_rte) {
1591 spin_lock_irqsave(&ioapic_lock, flags);
1592 __target_IO_APIC_irq(irq, dest, cfg->vector);
1593 spin_unlock_irqrestore(&ioapic_lock, flags);
1594 }
1595
1596 irte.vector = cfg->vector;
1597 irte.dest_id = IRTE_DEST(dest);
1598
1599 /*
1600 * Modified the IRTE and flushes the Interrupt entry cache.
1601 */
1602 modify_irte(irq, &irte);
1603
1604 if (cfg->move_in_progress) {
1605 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1606 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1607 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1608 cfg->move_in_progress = 0;
1609 }
1610
1611 irq_desc[irq].affinity = mask;
1612}
1613
1614static int migrate_irq_remapped_level(int irq)
1615{
1616 int ret = -1;
1617
1618 mask_IO_APIC_irq(irq);
1619
1620 if (io_apic_level_ack_pending(irq)) {
1621 /*
1622 * Interrupt in progress. Migrating irq now will change the
1623 * vector information in the IO-APIC RTE and that will confuse
1624 * the EOI broadcast performed by cpu.
1625 * So, delay the irq migration to the next instance.
1626 */
1627 schedule_delayed_work(&ir_migration_work, 1);
1628 goto unmask;
1629 }
1630
1631 /* everthing is clear. we have right of way */
1632 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1633
1634 ret = 0;
1635 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1636 cpus_clear(irq_desc[irq].pending_mask);
1637
1638unmask:
1639 unmask_IO_APIC_irq(irq);
1640 return ret;
1641}
1642
1643static void ir_irq_migration(struct work_struct *work)
1644{
1645 int irq;
1646
1647 for (irq = 0; irq < NR_IRQS; irq++) {
1648 struct irq_desc *desc = irq_desc + irq;
1649 if (desc->status & IRQ_MOVE_PENDING) {
1650 unsigned long flags;
1651
1652 spin_lock_irqsave(&desc->lock, flags);
1653 if (!desc->chip->set_affinity ||
1654 !(desc->status & IRQ_MOVE_PENDING)) {
1655 desc->status &= ~IRQ_MOVE_PENDING;
1656 spin_unlock_irqrestore(&desc->lock, flags);
1657 continue;
1658 }
1659
1660 desc->chip->set_affinity(irq,
1661 irq_desc[irq].pending_mask);
1662 spin_unlock_irqrestore(&desc->lock, flags);
1663 }
1664 }
1665}
1666
1667/*
1668 * Migrates the IRQ destination in the process context.
1669 */
1670static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1671{
1672 if (irq_desc[irq].status & IRQ_LEVEL) {
1673 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1674 irq_desc[irq].pending_mask = mask;
1675 migrate_irq_remapped_level(irq);
1676 return;
1677 }
1678
1679 migrate_ioapic_irq(irq, mask);
1680}
1681#endif
1682
1395asmlinkage void smp_irq_move_cleanup_interrupt(void) 1683asmlinkage void smp_irq_move_cleanup_interrupt(void)
1396{ 1684{
1397 unsigned vector, me; 1685 unsigned vector, me;
@@ -1448,6 +1736,17 @@ static void irq_complete_move(unsigned int irq)
1448#else 1736#else
1449static inline void irq_complete_move(unsigned int irq) {} 1737static inline void irq_complete_move(unsigned int irq) {}
1450#endif 1738#endif
1739#ifdef CONFIG_INTR_REMAP
1740static void ack_x2apic_level(unsigned int irq)
1741{
1742 ack_x2APIC_irq();
1743}
1744
1745static void ack_x2apic_edge(unsigned int irq)
1746{
1747 ack_x2APIC_irq();
1748}
1749#endif
1451 1750
1452static void ack_apic_edge(unsigned int irq) 1751static void ack_apic_edge(unsigned int irq)
1453{ 1752{
@@ -1522,6 +1821,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1522 .retrigger = ioapic_retrigger_irq, 1821 .retrigger = ioapic_retrigger_irq,
1523}; 1822};
1524 1823
1824#ifdef CONFIG_INTR_REMAP
1825static struct irq_chip ir_ioapic_chip __read_mostly = {
1826 .name = "IR-IO-APIC",
1827 .startup = startup_ioapic_irq,
1828 .mask = mask_IO_APIC_irq,
1829 .unmask = unmask_IO_APIC_irq,
1830 .ack = ack_x2apic_edge,
1831 .eoi = ack_x2apic_level,
1832#ifdef CONFIG_SMP
1833 .set_affinity = set_ir_ioapic_affinity_irq,
1834#endif
1835 .retrigger = ioapic_retrigger_irq,
1836};
1837#endif
1838
1525static inline void init_IO_APIC_traps(void) 1839static inline void init_IO_APIC_traps(void)
1526{ 1840{
1527 int irq; 1841 int irq;
@@ -1707,6 +2021,8 @@ static inline void __init check_timer(void)
1707 * 8259A. 2021 * 8259A.
1708 */ 2022 */
1709 if (pin1 == -1) { 2023 if (pin1 == -1) {
2024 if (intr_remapping_enabled)
2025 panic("BIOS bug: timer not connected to IO-APIC");
1710 pin1 = pin2; 2026 pin1 = pin2;
1711 apic1 = apic2; 2027 apic1 = apic2;
1712 no_pin1 = 1; 2028 no_pin1 = 1;
@@ -1733,6 +2049,8 @@ static inline void __init check_timer(void)
1733 clear_IO_APIC_pin(0, pin1); 2049 clear_IO_APIC_pin(0, pin1);
1734 goto out; 2050 goto out;
1735 } 2051 }
2052 if (intr_remapping_enabled)
2053 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1736 clear_IO_APIC_pin(apic1, pin1); 2054 clear_IO_APIC_pin(apic1, pin1);
1737 if (!no_pin1) 2055 if (!no_pin1)
1738 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2056 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1972,6 +2290,9 @@ void destroy_irq(unsigned int irq)
1972 2290
1973 dynamic_irq_cleanup(irq); 2291 dynamic_irq_cleanup(irq);
1974 2292
2293#ifdef CONFIG_INTR_REMAP
2294 free_irte(irq);
2295#endif
1975 spin_lock_irqsave(&vector_lock, flags); 2296 spin_lock_irqsave(&vector_lock, flags);
1976 __clear_irq_vector(irq); 2297 __clear_irq_vector(irq);
1977 spin_unlock_irqrestore(&vector_lock, flags); 2298 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1990,11 +2311,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1990 2311
1991 tmp = TARGET_CPUS; 2312 tmp = TARGET_CPUS;
1992 err = assign_irq_vector(irq, tmp); 2313 err = assign_irq_vector(irq, tmp);
1993 if (!err) { 2314 if (err)
1994 cpus_and(tmp, cfg->domain, tmp); 2315 return err;
1995 dest = cpu_mask_to_apicid(tmp); 2316
2317 cpus_and(tmp, cfg->domain, tmp);
2318 dest = cpu_mask_to_apicid(tmp);
2319
2320#ifdef CONFIG_INTR_REMAP
2321 if (irq_remapped(irq)) {
2322 struct irte irte;
2323 int ir_index;
2324 u16 sub_handle;
2325
2326 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2327 BUG_ON(ir_index == -1);
2328
2329 memset (&irte, 0, sizeof(irte));
2330
2331 irte.present = 1;
2332 irte.dst_mode = INT_DEST_MODE;
2333 irte.trigger_mode = 0; /* edge */
2334 irte.dlvry_mode = INT_DELIVERY_MODE;
2335 irte.vector = cfg->vector;
2336 irte.dest_id = IRTE_DEST(dest);
2337
2338 modify_irte(irq, &irte);
1996 2339
1997 msg->address_hi = MSI_ADDR_BASE_HI; 2340 msg->address_hi = MSI_ADDR_BASE_HI;
2341 msg->data = sub_handle;
2342 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2343 MSI_ADDR_IR_SHV |
2344 MSI_ADDR_IR_INDEX1(ir_index) |
2345 MSI_ADDR_IR_INDEX2(ir_index);
2346 } else
2347#endif
2348 {
2349 msg->address_hi = MSI_ADDR_BASE_HI;
1998 msg->address_lo = 2350 msg->address_lo =
1999 MSI_ADDR_BASE_LO | 2351 MSI_ADDR_BASE_LO |
2000 ((INT_DEST_MODE == 0) ? 2352 ((INT_DEST_MODE == 0) ?
@@ -2044,6 +2396,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2044 write_msi_msg(irq, &msg); 2396 write_msi_msg(irq, &msg);
2045 irq_desc[irq].affinity = mask; 2397 irq_desc[irq].affinity = mask;
2046} 2398}
2399
2400#ifdef CONFIG_INTR_REMAP
2401/*
2402 * Migrate the MSI irq to another cpumask. This migration is
2403 * done in the process context using interrupt-remapping hardware.
2404 */
2405static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2406{
2407 struct irq_cfg *cfg = irq_cfg + irq;
2408 unsigned int dest;
2409 cpumask_t tmp, cleanup_mask;
2410 struct irte irte;
2411
2412 cpus_and(tmp, mask, cpu_online_map);
2413 if (cpus_empty(tmp))
2414 return;
2415
2416 if (get_irte(irq, &irte))
2417 return;
2418
2419 if (assign_irq_vector(irq, mask))
2420 return;
2421
2422 cpus_and(tmp, cfg->domain, mask);
2423 dest = cpu_mask_to_apicid(tmp);
2424
2425 irte.vector = cfg->vector;
2426 irte.dest_id = IRTE_DEST(dest);
2427
2428 /*
2429 * atomically update the IRTE with the new destination and vector.
2430 */
2431 modify_irte(irq, &irte);
2432
2433 /*
2434 * After this point, all the interrupts will start arriving
2435 * at the new destination. So, time to cleanup the previous
2436 * vector allocation.
2437 */
2438 if (cfg->move_in_progress) {
2439 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2440 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2441 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2442 cfg->move_in_progress = 0;
2443 }
2444
2445 irq_desc[irq].affinity = mask;
2446}
2447#endif
2047#endif /* CONFIG_SMP */ 2448#endif /* CONFIG_SMP */
2048 2449
2049/* 2450/*
@@ -2061,26 +2462,157 @@ static struct irq_chip msi_chip = {
2061 .retrigger = ioapic_retrigger_irq, 2462 .retrigger = ioapic_retrigger_irq,
2062}; 2463};
2063 2464
2064int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2465#ifdef CONFIG_INTR_REMAP
2466static struct irq_chip msi_ir_chip = {
2467 .name = "IR-PCI-MSI",
2468 .unmask = unmask_msi_irq,
2469 .mask = mask_msi_irq,
2470 .ack = ack_x2apic_edge,
2471#ifdef CONFIG_SMP
2472 .set_affinity = ir_set_msi_irq_affinity,
2473#endif
2474 .retrigger = ioapic_retrigger_irq,
2475};
2476
2477/*
2478 * Map the PCI dev to the corresponding remapping hardware unit
2479 * and allocate 'nvec' consecutive interrupt-remapping table entries
2480 * in it.
2481 */
2482static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2483{
2484 struct intel_iommu *iommu;
2485 int index;
2486
2487 iommu = map_dev_to_ir(dev);
2488 if (!iommu) {
2489 printk(KERN_ERR
2490 "Unable to map PCI %s to iommu\n", pci_name(dev));
2491 return -ENOENT;
2492 }
2493
2494 index = alloc_irte(iommu, irq, nvec);
2495 if (index < 0) {
2496 printk(KERN_ERR
2497 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2498 pci_name(dev));
2499 return -ENOSPC;
2500 }
2501 return index;
2502}
2503#endif
2504
2505static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2065{ 2506{
2507 int ret;
2066 struct msi_msg msg; 2508 struct msi_msg msg;
2509
2510 ret = msi_compose_msg(dev, irq, &msg);
2511 if (ret < 0)
2512 return ret;
2513
2514 set_irq_msi(irq, desc);
2515 write_msi_msg(irq, &msg);
2516
2517#ifdef CONFIG_INTR_REMAP
2518 if (irq_remapped(irq)) {
2519 struct irq_desc *desc = irq_desc + irq;
2520 /*
2521 * irq migration in process context
2522 */
2523 desc->status |= IRQ_MOVE_PCNTXT;
2524 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2525 } else
2526#endif
2527 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2528
2529 return 0;
2530}
2531
2532int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2533{
2067 int irq, ret; 2534 int irq, ret;
2535
2068 irq = create_irq(); 2536 irq = create_irq();
2069 if (irq < 0) 2537 if (irq < 0)
2070 return irq; 2538 return irq;
2071 2539
2072 ret = msi_compose_msg(dev, irq, &msg); 2540#ifdef CONFIG_INTR_REMAP
2541 if (!intr_remapping_enabled)
2542 goto no_ir;
2543
2544 ret = msi_alloc_irte(dev, irq, 1);
2545 if (ret < 0)
2546 goto error;
2547no_ir:
2548#endif
2549 ret = setup_msi_irq(dev, desc, irq);
2073 if (ret < 0) { 2550 if (ret < 0) {
2074 destroy_irq(irq); 2551 destroy_irq(irq);
2075 return ret; 2552 return ret;
2076 } 2553 }
2554 return 0;
2077 2555
2078 set_irq_msi(irq, desc); 2556#ifdef CONFIG_INTR_REMAP
2079 write_msi_msg(irq, &msg); 2557error:
2558 destroy_irq(irq);
2559 return ret;
2560#endif
2561}
2080 2562
2081 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2563int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2564{
2565 int irq, ret, sub_handle;
2566 struct msi_desc *desc;
2567#ifdef CONFIG_INTR_REMAP
2568 struct intel_iommu *iommu = 0;
2569 int index = 0;
2570#endif
2082 2571
2572 sub_handle = 0;
2573 list_for_each_entry(desc, &dev->msi_list, list) {
2574 irq = create_irq();
2575 if (irq < 0)
2576 return irq;
2577#ifdef CONFIG_INTR_REMAP
2578 if (!intr_remapping_enabled)
2579 goto no_ir;
2580
2581 if (!sub_handle) {
2582 /*
2583 * allocate the consecutive block of IRTE's
2584 * for 'nvec'
2585 */
2586 index = msi_alloc_irte(dev, irq, nvec);
2587 if (index < 0) {
2588 ret = index;
2589 goto error;
2590 }
2591 } else {
2592 iommu = map_dev_to_ir(dev);
2593 if (!iommu) {
2594 ret = -ENOENT;
2595 goto error;
2596 }
2597 /*
2598 * setup the mapping between the irq and the IRTE
2599 * base index, the sub_handle pointing to the
2600 * appropriate interrupt remap table entry.
2601 */
2602 set_irte_irq(irq, iommu, index, sub_handle);
2603 }
2604no_ir:
2605#endif
2606 ret = setup_msi_irq(dev, desc, irq);
2607 if (ret < 0)
2608 goto error;
2609 sub_handle++;
2610 }
2083 return 0; 2611 return 0;
2612
2613error:
2614 destroy_irq(irq);
2615 return ret;
2084} 2616}
2085 2617
2086void arch_teardown_msi_irq(unsigned int irq) 2618void arch_teardown_msi_irq(unsigned int irq)
@@ -2328,6 +2860,10 @@ void __init setup_ioapic_dest(void)
2328 setup_IO_APIC_irq(ioapic, pin, irq, 2860 setup_IO_APIC_irq(ioapic, pin, irq,
2329 irq_trigger(irq_entry), 2861 irq_trigger(irq_entry),
2330 irq_polarity(irq_entry)); 2862 irq_polarity(irq_entry));
2863#ifdef CONFIG_INTR_REMAP
2864 else if (intr_remapping_enabled)
2865 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2866#endif
2331 else 2867 else
2332 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2868 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2333 } 2869 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6ae005ccaed8..e362c6ab4d35 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..5744789a78f4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
373 373
374struct pv_apic_ops pv_apic_ops = { 374struct pv_apic_ops pv_apic_ops = {
375#ifdef CONFIG_X86_LOCAL_APIC 375#ifdef CONFIG_X86_LOCAL_APIC
376 .apic_write = native_apic_write,
377 .apic_read = native_apic_read,
378 .setup_boot_clock = setup_boot_APIC_clock, 376 .setup_boot_clock = setup_boot_APIC_clock,
379 .setup_secondary_clock = setup_secondary_APIC_clock, 377 .setup_secondary_clock = setup_secondary_APIC_clock,
380 .startup_ipi_hook = paravirt_nop, 378 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b520dae02bf4..792b87853a76 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -731,6 +731,8 @@ void __init setup_arch(char **cmdline_p)
731 num_physpages = max_pfn; 731 num_physpages = max_pfn;
732 732
733 check_efer(); 733 check_efer();
734 if (cpu_has_x2apic)
735 check_x2apic();
734 736
735 /* How many end-of-memory variables you have, grandma! */ 737 /* How many end-of-memory variables you have, grandma! */
736 /* need this before calling reserve_initrd */ 738 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 332512767f4f..626618bf2f81 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -546,8 +547,7 @@ static inline void __inquire_remote_apic(int apicid)
546 printk(KERN_CONT 547 printk(KERN_CONT
547 "a previous APIC delivery may have failed\n"); 548 "a previous APIC delivery may have failed\n");
548 549
549 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 550 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
550 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
551 551
552 timeout = 0; 552 timeout = 0;
553 do { 553 do {
@@ -579,11 +579,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
579 int maxlvt; 579 int maxlvt;
580 580
581 /* Target chip */ 581 /* Target chip */
582 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
583
584 /* Boot on the stack */ 582 /* Boot on the stack */
585 /* Kick the second */ 583 /* Kick the second */
586 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 584 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
587 585
588 pr_debug("Waiting for send to finish...\n"); 586 pr_debug("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 587 send_status = safe_apic_wait_icr_idle();
@@ -636,13 +634,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
636 /* 634 /*
637 * Turn INIT on target chip 635 * Turn INIT on target chip
638 */ 636 */
639 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
640
641 /* 637 /*
642 * Send IPI 638 * Send IPI
643 */ 639 */
644 apic_write(APIC_ICR, 640 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
645 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 641 phys_apicid);
646 642
647 pr_debug("Waiting for send to finish...\n"); 643 pr_debug("Waiting for send to finish...\n");
648 send_status = safe_apic_wait_icr_idle(); 644 send_status = safe_apic_wait_icr_idle();
@@ -652,10 +648,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
652 pr_debug("Deasserting INIT.\n"); 648 pr_debug("Deasserting INIT.\n");
653 649
654 /* Target chip */ 650 /* Target chip */
655 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
656
657 /* Send IPI */ 651 /* Send IPI */
658 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 652 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
659 653
660 pr_debug("Waiting for send to finish...\n"); 654 pr_debug("Waiting for send to finish...\n");
661 send_status = safe_apic_wait_icr_idle(); 655 send_status = safe_apic_wait_icr_idle();
@@ -698,11 +692,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
698 */ 692 */
699 693
700 /* Target chip */ 694 /* Target chip */
701 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
702
703 /* Boot on the stack */ 695 /* Boot on the stack */
704 /* Kick the second */ 696 /* Kick the second */
705 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 697 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
698 phys_apicid);
706 699
707 /* 700 /*
708 * Give the other CPU some time to accept the IPI. 701 * Give the other CPU some time to accept the IPI.
@@ -1136,10 +1129,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1136 * Setup boot CPU information 1129 * Setup boot CPU information
1137 */ 1130 */
1138 smp_store_cpu_info(0); /* Final full version of the data */ 1131 smp_store_cpu_info(0); /* Final full version of the data */
1132#ifdef CONFIG_X86_32
1139 boot_cpu_logical_apicid = logical_smp_processor_id(); 1133 boot_cpu_logical_apicid = logical_smp_processor_id();
1134#endif
1140 current_thread_info()->cpu = 0; /* needed? */ 1135 current_thread_info()->cpu = 0; /* needed? */
1141 set_cpu_sibling_map(0); 1136 set_cpu_sibling_map(0);
1142 1137
1138#ifdef CONFIG_X86_64
1139 enable_IR_x2apic();
1140 setup_apic_routing();
1141#endif
1142
1143 if (smp_sanity_check(max_cpus) < 0) { 1143 if (smp_sanity_check(max_cpus) < 0) {
1144 printk(KERN_INFO "SMP disabled\n"); 1144 printk(KERN_INFO "SMP disabled\n");
1145 disable_smp(); 1145 disable_smp();
@@ -1147,9 +1147,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1147 } 1147 }
1148 1148
1149 preempt_disable(); 1149 preempt_disable();
1150 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1150 if (read_apic_id() != boot_cpu_physical_apicid) {
1151 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1151 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1152 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1152 read_apic_id(), boot_cpu_physical_apicid);
1153 /* Or can we switch back to PIC here? */ 1153 /* Or can we switch back to PIC here? */
1154 } 1154 }
1155 preempt_enable(); 1155 preempt_enable();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 0a1b1a9d922d..45c27c4e2a6e 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -904,8 +904,8 @@ static inline int __init activate_vmi(void)
904#endif 904#endif
905 905
906#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
907 para_fill(pv_apic_ops.apic_read, APICRead); 907 para_fill(apic_ops->read, APICRead);
908 para_fill(pv_apic_ops.apic_write, APICWrite); 908 para_fill(apic_ops->write, APICWrite);
909#endif 909#endif
910 910
911 /* 911 /*
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 0313a5eec412..756fc489652b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
55#include <linux/lguest_launcher.h> 55#include <linux/lguest_launcher.h>
56#include <linux/virtio_console.h> 56#include <linux/virtio_console.h>
57#include <linux/pm.h> 57#include <linux/pm.h>
58#include <asm/apic.h>
58#include <asm/lguest.h> 59#include <asm/lguest.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 784 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 785 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 786#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 787static void lguest_apic_write(u32 reg, u32 v)
787{ 788{
788} 789}
789 790
790static u32 lguest_apic_read(unsigned long reg) 791static u32 lguest_apic_read(u32 reg)
791{ 792{
792 return 0; 793 return 0;
793} 794}
795
796static u64 lguest_apic_icr_read(void)
797{
798 return 0;
799}
800
801static void lguest_apic_icr_write(u32 low, u32 id)
802{
803 /* Warn to see if there's any stray references */
804 WARN_ON(1);
805}
806
807static void lguest_apic_wait_icr_idle(void)
808{
809 return;
810}
811
812static u32 lguest_apic_safe_wait_icr_idle(void)
813{
814 return 0;
815}
816
817static struct apic_ops lguest_basic_apic_ops = {
818 .read = lguest_apic_read,
819 .write = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_read = lguest_apic_read;
995#endif 1025#endif
996 1026
997 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d771714559..b31f2800638e 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,17 +5,16 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 15#include <asm/mach-bigsmp/mach_apicdef.h>
16#include <linux/smp.h>
17#include <asm/mach-bigsmp/mach_apic.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 18#include <asm/mach-bigsmp/mach_ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c4..9b30547d746e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,16 +4,15 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/mach-es7000/mach_apicdef.h>
15#include <linux/smp.h>
17#include <asm/mach-es7000/mach_apic.h> 16#include <asm/mach-es7000/mach_apic.h>
18#include <asm/mach-es7000/mach_ipi.h> 17#include <asm/mach-es7000/mach_ipi.h>
19#include <asm/mach-es7000/mach_mpparse.h> 18#include <asm/mach-es7000/mach_mpparse.h>
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c4..95c07efff6b7 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,8 +11,9 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h>
16#include <asm/mach-numaq/mach_apicdef.h> 14#include <asm/mach-numaq/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-numaq/mach_apic.h>
17#include <asm/mach-numaq/mach_ipi.h> 17#include <asm/mach-numaq/mach_ipi.h>
18#include <asm/mach-numaq/mach_mpparse.h> 18#include <asm/mach-numaq/mach_mpparse.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 19#include <asm/mach-numaq/mach_wakecpu.h>
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1e..752edd96b1bf 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,17 +4,16 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h>
17#include <asm/mach-summit/mach_apicdef.h> 14#include <asm/mach-summit/mach_apicdef.h>
15#include <linux/smp.h>
16#include <asm/mach-summit/mach_apic.h>
18#include <asm/mach-summit/mach_ipi.h> 17#include <asm/mach-summit/mach_ipi.h>
19#include <asm/mach-summit/mach_mpparse.h> 18#include <asm/mach-summit/mach_mpparse.h>
20 19
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9ff6e3cbf08f..8d28925ebed9 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
36#include <xen/hvc-console.h> 36#include <xen/hvc-console.h>
37 37
38#include <asm/paravirt.h> 38#include <asm/paravirt.h>
39#include <asm/apic.h>
39#include <asm/page.h> 40#include <asm/page.h>
40#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
580} 581}
581 582
582#ifdef CONFIG_X86_LOCAL_APIC 583#ifdef CONFIG_X86_LOCAL_APIC
583static u32 xen_apic_read(unsigned long reg) 584static u32 xen_apic_read(u32 reg)
584{ 585{
585 return 0; 586 return 0;
586} 587}
587 588
588static void xen_apic_write(unsigned long reg, u32 val) 589static void xen_apic_write(u32 reg, u32 val)
589{ 590{
590 /* Warn to see if there's any stray references */ 591 /* Warn to see if there's any stray references */
591 WARN_ON(1); 592 WARN_ON(1);
592} 593}
594
595static u64 xen_apic_icr_read(void)
596{
597 return 0;
598}
599
600static void xen_apic_icr_write(u32 low, u32 id)
601{
602 /* Warn to see if there's any stray references */
603 WARN_ON(1);
604}
605
606static void xen_apic_wait_icr_idle(void)
607{
608 return;
609}
610
611static u32 xen_safe_apic_wait_icr_idle(void)
612{
613 return 0;
614}
615
616static struct apic_ops xen_basic_apic_ops = {
617 .read = xen_apic_read,
618 .write = xen_apic_write,
619 .icr_read = xen_apic_icr_read,
620 .icr_write = xen_apic_icr_write,
621 .wait_icr_idle = xen_apic_wait_icr_idle,
622 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
623};
624
593#endif 625#endif
594 626
595static void xen_flush_tlb(void) 627static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1273 1305
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1306static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1307#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write,
1277 .apic_read = xen_apic_read,
1278 .setup_boot_clock = paravirt_nop, 1308 .setup_boot_clock = paravirt_nop,
1279 .setup_secondary_clock = paravirt_nop, 1309 .setup_secondary_clock = paravirt_nop,
1280 .startup_ipi_hook = paravirt_nop, 1310 .startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
1677 pv_apic_ops = xen_apic_ops; 1707 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1708 pv_mmu_ops = xen_mmu_ops;
1679 1709
1710#ifdef CONFIG_X86_LOCAL_APIC
1711 /*
1712 * set up the basic apic ops.
1713 */
1714 apic_ops = &xen_basic_apic_ops;
1715#endif
1716
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1717 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1718 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1719 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;