aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c89
-rw-r--r--arch/x86/kernel/apic_64.c237
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/common_64.c40
-rw-r--r--arch/x86/kernel/cpu/feature_names.c2
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c164
-rw-r--r--arch/x86/kernel/genx2apic_phys.c159
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c10
-rw-r--r--arch/x86/kernel/io_apic_64.c608
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/signal_64.c11
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/syscall_64.c4
-rw-r--r--arch/x86/kernel/traps_64.c9
-rw-r--r--arch/x86/kernel/vmi_32.c4
26 files changed, 1479 insertions, 211 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..a07ec14f3312 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 104ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 106 obj-y += bios_uv.o
107 obj-y += genx2apic_cluster.o
108 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 109 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 110 obj-$(CONFIG_AUDIT) += audit_64.o
109 111
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fa88a1d71290..12e260e8fb2a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -761,7 +761,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
761 761
762 set_fixmap_nocache(FIX_APIC_BASE, address); 762 set_fixmap_nocache(FIX_APIC_BASE, address);
763 if (boot_cpu_physical_apicid == -1U) { 763 if (boot_cpu_physical_apicid == -1U) {
764 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 764 boot_cpu_physical_apicid = read_apic_id();
765#ifdef CONFIG_X86_32 765#ifdef CONFIG_X86_32
766 apic_version[boot_cpu_physical_apicid] = 766 apic_version[boot_cpu_physical_apicid] =
767 GET_APIC_VERSION(apic_read(APIC_LVR)); 767 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1337,7 +1337,9 @@ static void __init acpi_process_madt(void)
1337 acpi_ioapic = 1; 1337 acpi_ioapic = 1;
1338 1338
1339 smp_found_config = 1; 1339 smp_found_config = 1;
1340#ifdef CONFIG_X86_32
1340 setup_apic_routing(); 1341 setup_apic_routing();
1342#endif
1341 } 1343 }
1342 } 1344 }
1343 if (error == -EINVAL) { 1345 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 039a8d4aaf62..84318edad8fb 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,13 +145,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 145 return lapic_get_version() >= 0x14;
146} 146}
147 147
148void apic_wait_icr_idle(void) 148/*
149 * Paravirt kernels also might be using these below ops. So we still
150 * use generic apic_read()/apic_write(), which might be pointing to different
151 * ops in PARAVIRT case.
152 */
153void xapic_wait_icr_idle(void)
149{ 154{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 155 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 156 cpu_relax();
152} 157}
153 158
154u32 safe_apic_wait_icr_idle(void) 159u32 safe_xapic_wait_icr_idle(void)
155{ 160{
156 u32 send_status; 161 u32 send_status;
157 int timeout; 162 int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 172 return send_status;
168} 173}
169 174
175void xapic_icr_write(u32 low, u32 id)
176{
177 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
178 apic_write(APIC_ICR, low);
179}
180
181u64 xapic_icr_read(void)
182{
183 u32 icr1, icr2;
184
185 icr2 = apic_read(APIC_ICR2);
186 icr1 = apic_read(APIC_ICR);
187
188 return icr1 | ((u64)icr2 << 32);
189}
190
191static struct apic_ops xapic_ops = {
192 .read = native_apic_mem_read,
193 .write = native_apic_mem_write,
194 .icr_read = xapic_icr_read,
195 .icr_write = xapic_icr_write,
196 .wait_icr_idle = xapic_wait_icr_idle,
197 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
198};
199
200struct apic_ops __read_mostly *apic_ops = &xapic_ops;
201EXPORT_SYMBOL_GPL(apic_ops);
202
170/** 203/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 204 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 205 */
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1238 * default configuration (or the MP table is broken).
1206 */ 1239 */
1207 if (boot_cpu_physical_apicid == -1U) 1240 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1241 boot_cpu_physical_apicid = read_apic_id();
1209 1242
1210} 1243}
1211 1244
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1275 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1276 */
1244#ifdef CONFIG_CRASH_DUMP 1277#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1278 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1279#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1280 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1281
@@ -1321,54 +1354,6 @@ void smp_error_interrupt(struct pt_regs *regs)
1321 irq_exit(); 1354 irq_exit();
1322} 1355}
1323 1356
1324#ifdef CONFIG_SMP
1325void __init smp_intr_init(void)
1326{
1327 /*
1328 * IRQ0 must be given a fixed assignment and initialized,
1329 * because it's used before the IO-APIC is set up.
1330 */
1331 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1332
1333 /*
1334 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1335 * IPI, driven by wakeup.
1336 */
1337 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1338
1339 /* IPI for invalidation */
1340 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1341
1342 /* IPI for generic function call */
1343 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1344
1345 /* IPI for single call function */
1346 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1347 call_function_single_interrupt);
1348}
1349#endif
1350
1351/*
1352 * Initialize APIC interrupts
1353 */
1354void __init apic_intr_init(void)
1355{
1356#ifdef CONFIG_SMP
1357 smp_intr_init();
1358#endif
1359 /* self generated IPI for local APIC timer */
1360 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1361
1362 /* IPI vectors for APIC spurious and error interrupts */
1363 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1364 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1365
1366 /* thermal monitor LVT interrupt */
1367#ifdef CONFIG_X86_MCE_P4THERMAL
1368 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1369#endif
1370}
1371
1372/** 1357/**
1373 * connect_bsp_APIC - attach the APIC to the interrupt system 1358 * connect_bsp_APIC - attach the APIC to the interrupt system
1374 */ 1359 */
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 7f1f030da7ee..cd63c0bc6180 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,6 +40,7 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
@@ -46,6 +48,11 @@
46static int disable_apic_timer __cpuinitdata; 48static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 49static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 50int disable_apic;
51int disable_x2apic;
52int x2apic;
53
54/* x2apic enabled before OS handover */
55int x2apic_preenabled;
49 56
50/* Local APIC timer works in C2 */ 57/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 58int local_apic_timer_c2_ok;
@@ -119,13 +126,13 @@ static int modern_apic(void)
119 return lapic_get_version() >= 0x14; 126 return lapic_get_version() >= 0x14;
120} 127}
121 128
122void apic_wait_icr_idle(void) 129void xapic_wait_icr_idle(void)
123{ 130{
124 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 131 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
125 cpu_relax(); 132 cpu_relax();
126} 133}
127 134
128u32 safe_apic_wait_icr_idle(void) 135u32 safe_xapic_wait_icr_idle(void)
129{ 136{
130 u32 send_status; 137 u32 send_status;
131 int timeout; 138 int timeout;
@@ -141,6 +148,69 @@ u32 safe_apic_wait_icr_idle(void)
141 return send_status; 148 return send_status;
142} 149}
143 150
151void xapic_icr_write(u32 low, u32 id)
152{
153 apic_write(APIC_ICR2, id << 24);
154 apic_write(APIC_ICR, low);
155}
156
157u64 xapic_icr_read(void)
158{
159 u32 icr1, icr2;
160
161 icr2 = apic_read(APIC_ICR2);
162 icr1 = apic_read(APIC_ICR);
163
164 return (icr1 | ((u64)icr2 << 32));
165}
166
167static struct apic_ops xapic_ops = {
168 .read = native_apic_mem_read,
169 .write = native_apic_mem_write,
170 .icr_read = xapic_icr_read,
171 .icr_write = xapic_icr_write,
172 .wait_icr_idle = xapic_wait_icr_idle,
173 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
174};
175
176struct apic_ops __read_mostly *apic_ops = &xapic_ops;
177
178EXPORT_SYMBOL_GPL(apic_ops);
179
180static void x2apic_wait_icr_idle(void)
181{
182 /* no need to wait for icr idle in x2apic */
183 return;
184}
185
186static u32 safe_x2apic_wait_icr_idle(void)
187{
188 /* no need to wait for icr idle in x2apic */
189 return 0;
190}
191
192void x2apic_icr_write(u32 low, u32 id)
193{
194 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
195}
196
197u64 x2apic_icr_read(void)
198{
199 unsigned long val;
200
201 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
202 return val;
203}
204
205static struct apic_ops x2apic_ops = {
206 .read = native_apic_msr_read,
207 .write = native_apic_msr_write,
208 .icr_read = x2apic_icr_read,
209 .icr_write = x2apic_icr_write,
210 .wait_icr_idle = x2apic_wait_icr_idle,
211 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
212};
213
144/** 214/**
145 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 215 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
146 */ 216 */
@@ -630,10 +700,10 @@ int __init verify_local_APIC(void)
630 /* 700 /*
631 * The ID register is read/write in a real APIC. 701 * The ID register is read/write in a real APIC.
632 */ 702 */
633 reg0 = read_apic_id(); 703 reg0 = apic_read(APIC_ID);
634 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 704 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
635 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 705 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
636 reg1 = read_apic_id(); 706 reg1 = apic_read(APIC_ID);
637 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 707 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
638 apic_write(APIC_ID, reg0); 708 apic_write(APIC_ID, reg0);
639 if (reg1 != (reg0 ^ APIC_ID_MASK)) 709 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -834,6 +904,125 @@ void __cpuinit end_local_APIC_setup(void)
834 apic_pm_activate(); 904 apic_pm_activate();
835} 905}
836 906
907void check_x2apic(void)
908{
909 int msr, msr2;
910
911 rdmsr(MSR_IA32_APICBASE, msr, msr2);
912
913 if (msr & X2APIC_ENABLE) {
914 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
915 x2apic_preenabled = x2apic = 1;
916 apic_ops = &x2apic_ops;
917 }
918}
919
920void enable_x2apic(void)
921{
922 int msr, msr2;
923
924 rdmsr(MSR_IA32_APICBASE, msr, msr2);
925 if (!(msr & X2APIC_ENABLE)) {
926 printk("Enabling x2apic\n");
927 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
928 }
929}
930
931void enable_IR_x2apic(void)
932{
933#ifdef CONFIG_INTR_REMAP
934 int ret;
935 unsigned long flags;
936
937 if (!cpu_has_x2apic)
938 return;
939
940 if (!x2apic_preenabled && disable_x2apic) {
941 printk(KERN_INFO
942 "Skipped enabling x2apic and Interrupt-remapping "
943 "because of nox2apic\n");
944 return;
945 }
946
947 if (x2apic_preenabled && disable_x2apic)
948 panic("Bios already enabled x2apic, can't enforce nox2apic");
949
950 if (!x2apic_preenabled && skip_ioapic_setup) {
951 printk(KERN_INFO
952 "Skipped enabling x2apic and Interrupt-remapping "
953 "because of skipping io-apic setup\n");
954 return;
955 }
956
957 ret = dmar_table_init();
958 if (ret) {
959 printk(KERN_INFO
960 "dmar_table_init() failed with %d:\n", ret);
961
962 if (x2apic_preenabled)
963 panic("x2apic enabled by bios. But IR enabling failed");
964 else
965 printk(KERN_INFO
966 "Not enabling x2apic,Intr-remapping\n");
967 return;
968 }
969
970 local_irq_save(flags);
971 mask_8259A();
972 save_mask_IO_APIC_setup();
973
974 ret = enable_intr_remapping(1);
975
976 if (ret && x2apic_preenabled) {
977 local_irq_restore(flags);
978 panic("x2apic enabled by bios. But IR enabling failed");
979 }
980
981 if (ret)
982 goto end;
983
984 if (!x2apic) {
985 x2apic = 1;
986 apic_ops = &x2apic_ops;
987 enable_x2apic();
988 }
989end:
990 if (ret)
991 /*
992 * IR enabling failed
993 */
994 restore_IO_APIC_setup();
995 else
996 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
997
998 unmask_8259A();
999 local_irq_restore(flags);
1000
1001 if (!ret) {
1002 if (!x2apic_preenabled)
1003 printk(KERN_INFO
1004 "Enabled x2apic and interrupt-remapping\n");
1005 else
1006 printk(KERN_INFO
1007 "Enabled Interrupt-remapping\n");
1008 } else
1009 printk(KERN_ERR
1010 "Failed to enable Interrupt-remapping and x2apic\n");
1011#else
1012 if (!cpu_has_x2apic)
1013 return;
1014
1015 if (x2apic_preenabled)
1016 panic("x2apic enabled prior OS handover,"
1017 " enable CONFIG_INTR_REMAP");
1018
1019 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1020 " and x2apic\n");
1021#endif
1022
1023 return;
1024}
1025
837/* 1026/*
838 * Detect and enable local APICs on non-SMP boards. 1027 * Detect and enable local APICs on non-SMP boards.
839 * Original code written by Keir Fraser. 1028 * Original code written by Keir Fraser.
@@ -873,7 +1062,7 @@ void __init early_init_lapic_mapping(void)
873 * Fetch the APIC ID of the BSP in case we have a 1062 * Fetch the APIC ID of the BSP in case we have a
874 * default configuration (or the MP table is broken). 1063 * default configuration (or the MP table is broken).
875 */ 1064 */
876 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1065 boot_cpu_physical_apicid = read_apic_id();
877} 1066}
878 1067
879/** 1068/**
@@ -881,6 +1070,11 @@ void __init early_init_lapic_mapping(void)
881 */ 1070 */
882void __init init_apic_mappings(void) 1071void __init init_apic_mappings(void)
883{ 1072{
1073 if (x2apic) {
1074 boot_cpu_physical_apicid = read_apic_id();
1075 return;
1076 }
1077
884 /* 1078 /*
885 * If no local APIC can be found then set up a fake all 1079 * If no local APIC can be found then set up a fake all
886 * zeroes page to simulate the local APIC and another 1080 * zeroes page to simulate the local APIC and another
@@ -900,7 +1094,7 @@ void __init init_apic_mappings(void)
900 * Fetch the APIC ID of the BSP in case we have a 1094 * Fetch the APIC ID of the BSP in case we have a
901 * default configuration (or the MP table is broken). 1095 * default configuration (or the MP table is broken).
902 */ 1096 */
903 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1097 boot_cpu_physical_apicid = read_apic_id();
904} 1098}
905 1099
906/* 1100/*
@@ -919,6 +1113,9 @@ int __init APIC_init_uniprocessor(void)
919 return -1; 1113 return -1;
920 } 1114 }
921 1115
1116 enable_IR_x2apic();
1117 setup_apic_routing();
1118
922 verify_local_APIC(); 1119 verify_local_APIC();
923 1120
924 connect_bsp_APIC(); 1121 connect_bsp_APIC();
@@ -1100,6 +1297,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1100 cpu_set(cpu, cpu_present_map); 1297 cpu_set(cpu, cpu_present_map);
1101} 1298}
1102 1299
1300int hard_smp_processor_id(void)
1301{
1302 return read_apic_id();
1303}
1304
1103/* 1305/*
1104 * Power management 1306 * Power management
1105 */ 1307 */
@@ -1136,7 +1338,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1136 1338
1137 maxlvt = lapic_get_maxlvt(); 1339 maxlvt = lapic_get_maxlvt();
1138 1340
1139 apic_pm_state.apic_id = read_apic_id(); 1341 apic_pm_state.apic_id = apic_read(APIC_ID);
1140 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1342 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1141 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1343 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1142 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1344 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1171,10 +1373,14 @@ static int lapic_resume(struct sys_device *dev)
1171 maxlvt = lapic_get_maxlvt(); 1373 maxlvt = lapic_get_maxlvt();
1172 1374
1173 local_irq_save(flags); 1375 local_irq_save(flags);
1174 rdmsr(MSR_IA32_APICBASE, l, h); 1376 if (!x2apic) {
1175 l &= ~MSR_IA32_APICBASE_BASE; 1377 rdmsr(MSR_IA32_APICBASE, l, h);
1176 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1378 l &= ~MSR_IA32_APICBASE_BASE;
1177 wrmsr(MSR_IA32_APICBASE, l, h); 1379 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1380 wrmsr(MSR_IA32_APICBASE, l, h);
1381 } else
1382 enable_x2apic();
1383
1178 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1384 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1179 apic_write(APIC_ID, apic_pm_state.apic_id); 1385 apic_write(APIC_ID, apic_pm_state.apic_id);
1180 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1386 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1314,6 +1520,15 @@ __cpuinit int apic_is_clustered_box(void)
1314 return (clusters > 2); 1520 return (clusters > 2);
1315} 1521}
1316 1522
1523static __init int setup_nox2apic(char *str)
1524{
1525 disable_x2apic = 1;
1526 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1527 return 0;
1528}
1529early_param("nox2apic", setup_nox2apic);
1530
1531
1317/* 1532/*
1318 * APIC command line parameters 1533 * APIC command line parameters
1319 */ 1534 */
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index aa89387006fe..505543a75a56 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -22,7 +22,7 @@
22 22
23#define __NO_STUBS 1 23#define __NO_STUBS 1
24#undef __SYSCALL 24#undef __SYSCALL
25#undef _ASM_X86_64_UNISTD_H_ 25#undef ASM_X86__UNISTD_64_H
26#define __SYSCALL(nr, sym) [nr] = 1, 26#define __SYSCALL(nr, sym) [nr] = 1,
27static char syscalls[] = { 27static char syscalls[] = {
28#include <asm/unistd.h> 28#include <asm/unistd.h>
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index dd6e3f15017e..cc6efe86249d 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -493,17 +493,20 @@ void pda_init(int cpu)
493 /* others are initialized in smpboot.c */ 493 /* others are initialized in smpboot.c */
494 pda->pcurrent = &init_task; 494 pda->pcurrent = &init_task;
495 pda->irqstackptr = boot_cpu_stack; 495 pda->irqstackptr = boot_cpu_stack;
496 pda->irqstackptr += IRQSTACKSIZE - 64;
496 } else { 497 } else {
497 pda->irqstackptr = (char *) 498 if (!pda->irqstackptr) {
498 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); 499 pda->irqstackptr = (char *)
499 if (!pda->irqstackptr) 500 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
500 panic("cannot allocate irqstack for cpu %d", cpu); 501 if (!pda->irqstackptr)
502 panic("cannot allocate irqstack for cpu %d",
503 cpu);
504 pda->irqstackptr += IRQSTACKSIZE - 64;
505 }
501 506
502 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 507 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
503 pda->nodenumber = cpu_to_node(cpu); 508 pda->nodenumber = cpu_to_node(cpu);
504 } 509 }
505
506 pda->irqstackptr += IRQSTACKSIZE-64;
507} 510}
508 511
509char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 512char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
@@ -597,23 +600,28 @@ void __cpuinit cpu_init(void)
597 barrier(); 600 barrier();
598 601
599 check_efer(); 602 check_efer();
603 if (cpu != 0 && x2apic)
604 enable_x2apic();
600 605
601 /* 606 /*
602 * set up and load the per-CPU TSS 607 * set up and load the per-CPU TSS
603 */ 608 */
604 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 609 if (!orig_ist->ist[0]) {
605 static const unsigned int order[N_EXCEPTION_STACKS] = { 610 static const unsigned int order[N_EXCEPTION_STACKS] = {
606 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 611 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
607 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 612 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
608 }; 613 };
609 if (cpu) { 614 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
610 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); 615 if (cpu) {
611 if (!estacks) 616 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
612 panic("Cannot allocate exception stack %ld %d\n", 617 if (!estacks)
613 v, cpu); 618 panic("Cannot allocate exception "
619 "stack %ld %d\n", v, cpu);
620 }
621 estacks += PAGE_SIZE << order[v];
622 orig_ist->ist[v] = t->x86_tss.ist[v] =
623 (unsigned long)estacks;
614 } 624 }
615 estacks += PAGE_SIZE << order[v];
616 orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
617 } 625 }
618 626
619 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); 627 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index e43ad4ad4cba..0bf4d37a0483 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -45,7 +45,7 @@ const char * const x86_cap_flags[NCAPINTS*32] = {
45 /* Intel-defined (#2) */ 45 /* Intel-defined (#2) */
46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 46 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, 47 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
48 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", 48 NULL, NULL, "dca", "sse4_1", "sse4_2", "x2apic", NULL, "popcnt",
49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 49 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
50 50
51 /* VIA/Cyrix/Centaur-defined */ 51 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb1444..6c9bfc9e1e95 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..fed9f68efd66
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,164 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int x2apic_read_id(void)
124{
125 return apic_read(APIC_ID);
126}
127
128static unsigned int phys_pkg_id(int index_msb)
129{
130 return x2apic_read_id() >> index_msb;
131}
132
133static void x2apic_send_IPI_self(int vector)
134{
135 apic_write(APIC_SELF_IPI, vector);
136}
137
138static void init_x2apic_ldr(void)
139{
140 int cpu = smp_processor_id();
141
142 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
143 return;
144}
145
146struct genapic apic_x2apic_cluster = {
147 .name = "cluster x2apic",
148 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
149 .int_delivery_mode = dest_LowestPrio,
150 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
151 .target_cpus = x2apic_target_cpus,
152 .vector_allocation_domain = x2apic_vector_allocation_domain,
153 .apic_id_registered = x2apic_apic_id_registered,
154 .init_apic_ldr = init_x2apic_ldr,
155 .send_IPI_all = x2apic_send_IPI_all,
156 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
157 .send_IPI_mask = x2apic_send_IPI_mask,
158 .send_IPI_self = x2apic_send_IPI_self,
159 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
160 .phys_pkg_id = phys_pkg_id,
161 .get_apic_id = get_apic_id,
162 .set_apic_id = set_apic_id,
163 .apic_id_mask = (0xFFFFFFFFu),
164};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..958d537b4cc9
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int x2apic_read_id(void)
122{
123 return apic_read(APIC_ID);
124}
125
126static unsigned int phys_pkg_id(int index_msb)
127{
128 return x2apic_read_id() >> index_msb;
129}
130
131void x2apic_send_IPI_self(int vector)
132{
133 apic_write(APIC_SELF_IPI, vector);
134}
135
136void init_x2apic_ldr(void)
137{
138 return;
139}
140
141struct genapic apic_x2apic_phys = {
142 .name = "physical x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_Fixed,
145 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 2cfcbded888a..3fe472223a99 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,35 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 58DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 59EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 60
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
123 return 1; 152 return 1;
124} 153}
125 154
155static void uv_init_apic_ldr(void)
156{
157}
158
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 159static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 160{
128 int cpu; 161 int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 171 return BAD_APICID;
139} 172}
140 173
174static unsigned int get_apic_id(unsigned long x)
175{
176 unsigned int id;
177
178 WARN_ON(preemptible() && num_online_cpus() > 1);
179 id = x | __get_cpu_var(x2apic_extra_bits);
180
181 return id;
182}
183
184static unsigned long set_apic_id(unsigned int id)
185{
186 unsigned long x;
187
188 /* maskout x2apic_extra_bits ? */
189 x = id;
190 return x;
191}
192
193static unsigned int uv_read_apic_id(void)
194{
195
196 return get_apic_id(apic_read(APIC_ID));
197}
198
141static unsigned int phys_pkg_id(int index_msb) 199static unsigned int phys_pkg_id(int index_msb)
142{ 200{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 201 return uv_read_apic_id() >> index_msb;
144} 202}
145 203
146#ifdef ZZZ /* Needs x2apic patch */ 204#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
152 210
153struct genapic apic_x2apic_uv_x = { 211struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 212 .name = "UV large system",
213 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 214 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 215 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 216 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 217 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 218 .apic_id_registered = uv_apic_id_registered,
219 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 220 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 221 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 222 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 223 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 224 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 225 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
226 .get_apic_id = get_apic_id,
227 .set_apic_id = set_apic_id,
228 .apic_id_mask = (0xFFFFFFFFu),
166}; 229};
167 230
168static __cpuinit void set_x2apic_extra_bits(int pnode) 231static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -399,3 +462,5 @@ void __cpuinit uv_cpu_init(void)
399 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 462 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
400 set_x2apic_extra_bits(uv_hub_info->pnode); 463 set_x2apic_extra_bits(uv_hub_info->pnode);
401} 464}
465
466
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec4..26ea3ea3fb30 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,6 +46,7 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
@@ -1490,7 +1491,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1491 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1492 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1493 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1494 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1495 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1496 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1497 ver = GET_APIC_VERSION(v);
@@ -1698,8 +1699,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1699 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1700 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1701 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1702 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1703
1704 /* 1704 /*
1705 * Add it to the IO-APIC irq-routing table: 1705 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1725,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1725 unsigned char old_id;
1726 unsigned long flags; 1726 unsigned long flags;
1727 1727
1728#ifdef CONFIG_X86_NUMAQ 1728 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1729 return;
1731#endif
1732 1730
1733 /* 1731 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1732 * Don't check I/O APIC IDs for xAPIC systems. They have
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18f..e63282e78864 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,6 +50,7 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882 1031
883 entry.delivery_mode = INT_DELIVERY_MODE; 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
884 entry.dest_mode = INT_DEST_MODE; 1033 cpu_mask_to_apicid(mask), trigger, polarity,
885 entry.dest = cpu_mask_to_apicid(mask); 1034 cfg->vector)) {
886 entry.mask = 0; /* enable IRQ */ 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
887 entry.trigger = trigger; 1036 mp_ioapics[apic].mp_apicid, pin);
888 entry.polarity = polarity; 1037 __clear_irq_vector(irq);
889 entry.vector = cfg->vector; 1038 return;
890 1039 }
891 /* Mask level triggered irqs.
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
893 */
894 if (trigger)
895 entry.mask = 1;
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -1090,6 +1237,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1090void __apicdebuginit print_local_APIC(void * dummy) 1237void __apicdebuginit print_local_APIC(void * dummy)
1091{ 1238{
1092 unsigned int v, ver, maxlvt; 1239 unsigned int v, ver, maxlvt;
1240 unsigned long icr;
1093 1241
1094 if (apic_verbosity == APIC_QUIET) 1242 if (apic_verbosity == APIC_QUIET)
1095 return; 1243 return;
@@ -1097,7 +1245,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1245 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1246 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1247 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1248 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1249 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1250 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1251 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1281,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1281 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1282 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1283
1136 v = apic_read(APIC_ICR); 1284 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1285 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1286 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1287
1141 v = apic_read(APIC_LVTT); 1288 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1289 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1291,7 +1438,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1438 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1439 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1440 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1441 entry.dest = read_apic_id();
1295 1442
1296 /* 1443 /*
1297 * Add it to the IO-APIC irq-routing table: 1444 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1544 */
1398 1545
1399#ifdef CONFIG_SMP 1546#ifdef CONFIG_SMP
1547
1548#ifdef CONFIG_INTR_REMAP
1549static void ir_irq_migration(struct work_struct *work);
1550
1551static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1552
1553/*
1554 * Migrate the IO-APIC irq in the presence of intr-remapping.
1555 *
1556 * For edge triggered, irq migration is a simple atomic update(of vector
1557 * and cpu destination) of IRTE and flush the hardware cache.
1558 *
1559 * For level triggered, we need to modify the io-apic RTE aswell with the update
1560 * vector information, along with modifying IRTE with vector and destination.
1561 * So irq migration for level triggered is little bit more complex compared to
1562 * edge triggered migration. But the good news is, we use the same algorithm
1563 * for level triggered migration as we have today, only difference being,
1564 * we now initiate the irq migration from process context instead of the
1565 * interrupt context.
1566 *
1567 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1568 * suppression) to the IO-APIC, level triggered irq migration will also be
1569 * as simple as edge triggered migration and we can do the irq migration
1570 * with a simple atomic update to IO-APIC RTE.
1571 */
1572static void migrate_ioapic_irq(int irq, cpumask_t mask)
1573{
1574 struct irq_cfg *cfg = irq_cfg + irq;
1575 struct irq_desc *desc = irq_desc + irq;
1576 cpumask_t tmp, cleanup_mask;
1577 struct irte irte;
1578 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1579 unsigned int dest;
1580 unsigned long flags;
1581
1582 cpus_and(tmp, mask, cpu_online_map);
1583 if (cpus_empty(tmp))
1584 return;
1585
1586 if (get_irte(irq, &irte))
1587 return;
1588
1589 if (assign_irq_vector(irq, mask))
1590 return;
1591
1592 cpus_and(tmp, cfg->domain, mask);
1593 dest = cpu_mask_to_apicid(tmp);
1594
1595 if (modify_ioapic_rte) {
1596 spin_lock_irqsave(&ioapic_lock, flags);
1597 __target_IO_APIC_irq(irq, dest, cfg->vector);
1598 spin_unlock_irqrestore(&ioapic_lock, flags);
1599 }
1600
1601 irte.vector = cfg->vector;
1602 irte.dest_id = IRTE_DEST(dest);
1603
1604 /*
1605 * Modified the IRTE and flushes the Interrupt entry cache.
1606 */
1607 modify_irte(irq, &irte);
1608
1609 if (cfg->move_in_progress) {
1610 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1611 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1612 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1613 cfg->move_in_progress = 0;
1614 }
1615
1616 irq_desc[irq].affinity = mask;
1617}
1618
1619static int migrate_irq_remapped_level(int irq)
1620{
1621 int ret = -1;
1622
1623 mask_IO_APIC_irq(irq);
1624
1625 if (io_apic_level_ack_pending(irq)) {
1626 /*
1627 * Interrupt in progress. Migrating irq now will change the
1628 * vector information in the IO-APIC RTE and that will confuse
1629 * the EOI broadcast performed by cpu.
1630 * So, delay the irq migration to the next instance.
1631 */
1632 schedule_delayed_work(&ir_migration_work, 1);
1633 goto unmask;
1634 }
1635
1636 /* everthing is clear. we have right of way */
1637 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1638
1639 ret = 0;
1640 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1641 cpus_clear(irq_desc[irq].pending_mask);
1642
1643unmask:
1644 unmask_IO_APIC_irq(irq);
1645 return ret;
1646}
1647
1648static void ir_irq_migration(struct work_struct *work)
1649{
1650 int irq;
1651
1652 for (irq = 0; irq < NR_IRQS; irq++) {
1653 struct irq_desc *desc = irq_desc + irq;
1654 if (desc->status & IRQ_MOVE_PENDING) {
1655 unsigned long flags;
1656
1657 spin_lock_irqsave(&desc->lock, flags);
1658 if (!desc->chip->set_affinity ||
1659 !(desc->status & IRQ_MOVE_PENDING)) {
1660 desc->status &= ~IRQ_MOVE_PENDING;
1661 spin_unlock_irqrestore(&desc->lock, flags);
1662 continue;
1663 }
1664
1665 desc->chip->set_affinity(irq,
1666 irq_desc[irq].pending_mask);
1667 spin_unlock_irqrestore(&desc->lock, flags);
1668 }
1669 }
1670}
1671
1672/*
1673 * Migrates the IRQ destination in the process context.
1674 */
1675static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1676{
1677 if (irq_desc[irq].status & IRQ_LEVEL) {
1678 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1679 irq_desc[irq].pending_mask = mask;
1680 migrate_irq_remapped_level(irq);
1681 return;
1682 }
1683
1684 migrate_ioapic_irq(irq, mask);
1685}
1686#endif
1687
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1688asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1689{
1402 unsigned vector, me; 1690 unsigned vector, me;
@@ -1453,6 +1741,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1741#else
1454static inline void irq_complete_move(unsigned int irq) {} 1742static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1743#endif
1744#ifdef CONFIG_INTR_REMAP
1745static void ack_x2apic_level(unsigned int irq)
1746{
1747 ack_x2APIC_irq();
1748}
1749
1750static void ack_x2apic_edge(unsigned int irq)
1751{
1752 ack_x2APIC_irq();
1753}
1754#endif
1456 1755
1457static void ack_apic_edge(unsigned int irq) 1756static void ack_apic_edge(unsigned int irq)
1458{ 1757{
@@ -1527,6 +1826,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1826 .retrigger = ioapic_retrigger_irq,
1528}; 1827};
1529 1828
1829#ifdef CONFIG_INTR_REMAP
1830static struct irq_chip ir_ioapic_chip __read_mostly = {
1831 .name = "IR-IO-APIC",
1832 .startup = startup_ioapic_irq,
1833 .mask = mask_IO_APIC_irq,
1834 .unmask = unmask_IO_APIC_irq,
1835 .ack = ack_x2apic_edge,
1836 .eoi = ack_x2apic_level,
1837#ifdef CONFIG_SMP
1838 .set_affinity = set_ir_ioapic_affinity_irq,
1839#endif
1840 .retrigger = ioapic_retrigger_irq,
1841};
1842#endif
1843
1530static inline void init_IO_APIC_traps(void) 1844static inline void init_IO_APIC_traps(void)
1531{ 1845{
1532 int irq; 1846 int irq;
@@ -1712,6 +2026,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2026 * 8259A.
1713 */ 2027 */
1714 if (pin1 == -1) { 2028 if (pin1 == -1) {
2029 if (intr_remapping_enabled)
2030 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2031 pin1 = pin2;
1716 apic1 = apic2; 2032 apic1 = apic2;
1717 no_pin1 = 1; 2033 no_pin1 = 1;
@@ -1738,6 +2054,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2054 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2055 goto out;
1740 } 2056 }
2057 if (intr_remapping_enabled)
2058 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2059 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2060 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2061 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1977,6 +2295,9 @@ void destroy_irq(unsigned int irq)
1977 2295
1978 dynamic_irq_cleanup(irq); 2296 dynamic_irq_cleanup(irq);
1979 2297
2298#ifdef CONFIG_INTR_REMAP
2299 free_irte(irq);
2300#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2301 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2302 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2303 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2316,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2316
1996 tmp = TARGET_CPUS; 2317 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2318 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2319 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2320 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2321
2322 cpus_and(tmp, cfg->domain, tmp);
2323 dest = cpu_mask_to_apicid(tmp);
2324
2325#ifdef CONFIG_INTR_REMAP
2326 if (irq_remapped(irq)) {
2327 struct irte irte;
2328 int ir_index;
2329 u16 sub_handle;
2330
2331 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2332 BUG_ON(ir_index == -1);
2333
2334 memset (&irte, 0, sizeof(irte));
2335
2336 irte.present = 1;
2337 irte.dst_mode = INT_DEST_MODE;
2338 irte.trigger_mode = 0; /* edge */
2339 irte.dlvry_mode = INT_DELIVERY_MODE;
2340 irte.vector = cfg->vector;
2341 irte.dest_id = IRTE_DEST(dest);
2342
2343 modify_irte(irq, &irte);
2001 2344
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2345 msg->address_hi = MSI_ADDR_BASE_HI;
2346 msg->data = sub_handle;
2347 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2348 MSI_ADDR_IR_SHV |
2349 MSI_ADDR_IR_INDEX1(ir_index) |
2350 MSI_ADDR_IR_INDEX2(ir_index);
2351 } else
2352#endif
2353 {
2354 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2355 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2356 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2357 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2401,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2401 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2402 irq_desc[irq].affinity = mask;
2051} 2403}
2404
2405#ifdef CONFIG_INTR_REMAP
2406/*
2407 * Migrate the MSI irq to another cpumask. This migration is
2408 * done in the process context using interrupt-remapping hardware.
2409 */
2410static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2411{
2412 struct irq_cfg *cfg = irq_cfg + irq;
2413 unsigned int dest;
2414 cpumask_t tmp, cleanup_mask;
2415 struct irte irte;
2416
2417 cpus_and(tmp, mask, cpu_online_map);
2418 if (cpus_empty(tmp))
2419 return;
2420
2421 if (get_irte(irq, &irte))
2422 return;
2423
2424 if (assign_irq_vector(irq, mask))
2425 return;
2426
2427 cpus_and(tmp, cfg->domain, mask);
2428 dest = cpu_mask_to_apicid(tmp);
2429
2430 irte.vector = cfg->vector;
2431 irte.dest_id = IRTE_DEST(dest);
2432
2433 /*
2434 * atomically update the IRTE with the new destination and vector.
2435 */
2436 modify_irte(irq, &irte);
2437
2438 /*
2439 * After this point, all the interrupts will start arriving
2440 * at the new destination. So, time to cleanup the previous
2441 * vector allocation.
2442 */
2443 if (cfg->move_in_progress) {
2444 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2445 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2446 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2447 cfg->move_in_progress = 0;
2448 }
2449
2450 irq_desc[irq].affinity = mask;
2451}
2452#endif
2052#endif /* CONFIG_SMP */ 2453#endif /* CONFIG_SMP */
2053 2454
2054/* 2455/*
@@ -2066,26 +2467,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2467 .retrigger = ioapic_retrigger_irq,
2067}; 2468};
2068 2469
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2470#ifdef CONFIG_INTR_REMAP
2471static struct irq_chip msi_ir_chip = {
2472 .name = "IR-PCI-MSI",
2473 .unmask = unmask_msi_irq,
2474 .mask = mask_msi_irq,
2475 .ack = ack_x2apic_edge,
2476#ifdef CONFIG_SMP
2477 .set_affinity = ir_set_msi_irq_affinity,
2478#endif
2479 .retrigger = ioapic_retrigger_irq,
2480};
2481
2482/*
2483 * Map the PCI dev to the corresponding remapping hardware unit
2484 * and allocate 'nvec' consecutive interrupt-remapping table entries
2485 * in it.
2486 */
2487static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2488{
2489 struct intel_iommu *iommu;
2490 int index;
2491
2492 iommu = map_dev_to_ir(dev);
2493 if (!iommu) {
2494 printk(KERN_ERR
2495 "Unable to map PCI %s to iommu\n", pci_name(dev));
2496 return -ENOENT;
2497 }
2498
2499 index = alloc_irte(iommu, irq, nvec);
2500 if (index < 0) {
2501 printk(KERN_ERR
2502 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2503 pci_name(dev));
2504 return -ENOSPC;
2505 }
2506 return index;
2507}
2508#endif
2509
2510static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2070{ 2511{
2512 int ret;
2071 struct msi_msg msg; 2513 struct msi_msg msg;
2514
2515 ret = msi_compose_msg(dev, irq, &msg);
2516 if (ret < 0)
2517 return ret;
2518
2519 set_irq_msi(irq, desc);
2520 write_msi_msg(irq, &msg);
2521
2522#ifdef CONFIG_INTR_REMAP
2523 if (irq_remapped(irq)) {
2524 struct irq_desc *desc = irq_desc + irq;
2525 /*
2526 * irq migration in process context
2527 */
2528 desc->status |= IRQ_MOVE_PCNTXT;
2529 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2530 } else
2531#endif
2532 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2533
2534 return 0;
2535}
2536
2537int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2538{
2072 int irq, ret; 2539 int irq, ret;
2540
2073 irq = create_irq(); 2541 irq = create_irq();
2074 if (irq < 0) 2542 if (irq < 0)
2075 return irq; 2543 return irq;
2076 2544
2077 ret = msi_compose_msg(dev, irq, &msg); 2545#ifdef CONFIG_INTR_REMAP
2546 if (!intr_remapping_enabled)
2547 goto no_ir;
2548
2549 ret = msi_alloc_irte(dev, irq, 1);
2550 if (ret < 0)
2551 goto error;
2552no_ir:
2553#endif
2554 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2555 if (ret < 0) {
2079 destroy_irq(irq); 2556 destroy_irq(irq);
2080 return ret; 2557 return ret;
2081 } 2558 }
2559 return 0;
2082 2560
2083 set_irq_msi(irq, desc); 2561#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2562error:
2563 destroy_irq(irq);
2564 return ret;
2565#endif
2566}
2085 2567
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2568int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2569{
2570 int irq, ret, sub_handle;
2571 struct msi_desc *desc;
2572#ifdef CONFIG_INTR_REMAP
2573 struct intel_iommu *iommu = 0;
2574 int index = 0;
2575#endif
2087 2576
2577 sub_handle = 0;
2578 list_for_each_entry(desc, &dev->msi_list, list) {
2579 irq = create_irq();
2580 if (irq < 0)
2581 return irq;
2582#ifdef CONFIG_INTR_REMAP
2583 if (!intr_remapping_enabled)
2584 goto no_ir;
2585
2586 if (!sub_handle) {
2587 /*
2588 * allocate the consecutive block of IRTE's
2589 * for 'nvec'
2590 */
2591 index = msi_alloc_irte(dev, irq, nvec);
2592 if (index < 0) {
2593 ret = index;
2594 goto error;
2595 }
2596 } else {
2597 iommu = map_dev_to_ir(dev);
2598 if (!iommu) {
2599 ret = -ENOENT;
2600 goto error;
2601 }
2602 /*
2603 * setup the mapping between the irq and the IRTE
2604 * base index, the sub_handle pointing to the
2605 * appropriate interrupt remap table entry.
2606 */
2607 set_irte_irq(irq, iommu, index, sub_handle);
2608 }
2609no_ir:
2610#endif
2611 ret = setup_msi_irq(dev, desc, irq);
2612 if (ret < 0)
2613 goto error;
2614 sub_handle++;
2615 }
2088 return 0; 2616 return 0;
2617
2618error:
2619 destroy_irq(irq);
2620 return ret;
2089} 2621}
2090 2622
2091void arch_teardown_msi_irq(unsigned int irq) 2623void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2865,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2865 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2866 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2867 irq_polarity(irq_entry));
2868#ifdef CONFIG_INTR_REMAP
2869 else if (intr_remapping_enabled)
2870 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2871#endif
2336 else 2872 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2873 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2874 }
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee1..9200a1e2752d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
74 } 74 }
75} 75}
76 76
77/*
78 * IRQ2 is cascade interrupt to second interrupt controller
79 */
80static struct irqaction irq2 = {
81 .handler = no_action,
82 .mask = CPU_MASK_NONE,
83 .name = "cascade",
84};
85
77/* Overridden in paravirt.c */ 86/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79 88
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
98 set_intr_gate(vector, interrupt[i]); 107 set_intr_gate(vector, interrupt[i]);
99 } 108 }
100 109
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116
117 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup.
120 */
121 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
122
123 /* IPI for invalidation */
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
131#endif
132
133#ifdef CONFIG_X86_LOCAL_APIC
134 /* self generated IPI for local APIC timer */
135 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
136
137 /* IPI vectors for APIC spurious and error interrupts */
138 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
139 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
140#endif
141
142#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
143 /* thermal monitor LVT interrupt */
144 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
145#endif
146
147 if (!acpi_ioapic)
148 setup_irq(2, &irq2);
149
101 /* setup after call gates are initialised (usually add in 150 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates) 151 * the architecture specific gates)
103 */ 152 */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 678090508a62..e5d23675bb7c 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index b8c45610b20a..2434467ddf72 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 94da4d52d798..5744789a78f4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
373 373
374struct pv_apic_ops pv_apic_ops = { 374struct pv_apic_ops pv_apic_ops = {
375#ifdef CONFIG_X86_LOCAL_APIC 375#ifdef CONFIG_X86_LOCAL_APIC
376 .apic_write = native_apic_write,
377 .apic_read = native_apic_read,
378 .setup_boot_clock = setup_boot_APIC_clock, 376 .setup_boot_clock = setup_boot_APIC_clock,
379 .setup_secondary_clock = setup_secondary_APIC_clock, 377 .setup_secondary_clock = setup_secondary_APIC_clock,
380 .startup_ipi_hook = paravirt_nop, 378 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 68b48e3fbcbd..59f07e14d083 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -739,6 +739,8 @@ void __init setup_arch(char **cmdline_p)
739 num_physpages = max_pfn; 739 num_physpages = max_pfn;
740 740
741 check_efer(); 741 check_efer();
742 if (cpu_has_x2apic)
743 check_x2apic();
742 744
743 /* How many end-of-memory variables you have, grandma! */ 745 /* How many end-of-memory variables you have, grandma! */
744 /* need this before calling reserve_initrd */ 746 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index b45ef8ddd651..ca316b5b742c 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -104,7 +104,16 @@ static inline int restore_i387(struct _fpstate __user *buf)
104 clts(); 104 clts();
105 task_thread_info(current)->status |= TS_USEDFPU; 105 task_thread_info(current)->status |= TS_USEDFPU;
106 } 106 }
107 return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); 107 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
108 if (unlikely(err)) {
109 /*
110 * Encountered an error while doing the restore from the
111 * user buffer, clear the fpu state.
112 */
113 clear_fpu(tsk);
114 clear_used_math();
115 }
116 return err;
108} 117}
109 118
110/* 119/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 91055d7fc1b0..04f78ab51b45 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
550 printk(KERN_CONT 551 printk(KERN_CONT
551 "a previous APIC delivery may have failed\n"); 552 "a previous APIC delivery may have failed\n");
552 553
553 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 554 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
554 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
555 555
556 timeout = 0; 556 timeout = 0;
557 do { 557 do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
583 int maxlvt; 583 int maxlvt;
584 584
585 /* Target chip */ 585 /* Target chip */
586 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
587
588 /* Boot on the stack */ 586 /* Boot on the stack */
589 /* Kick the second */ 587 /* Kick the second */
590 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 588 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
591 589
592 pr_debug("Waiting for send to finish...\n"); 590 pr_debug("Waiting for send to finish...\n");
593 send_status = safe_apic_wait_icr_idle(); 591 send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
640 /* 638 /*
641 * Turn INIT on target chip 639 * Turn INIT on target chip
642 */ 640 */
643 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
644
645 /* 641 /*
646 * Send IPI 642 * Send IPI
647 */ 643 */
648 apic_write(APIC_ICR, 644 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
649 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 645 phys_apicid);
650 646
651 pr_debug("Waiting for send to finish...\n"); 647 pr_debug("Waiting for send to finish...\n");
652 send_status = safe_apic_wait_icr_idle(); 648 send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
656 pr_debug("Deasserting INIT.\n"); 652 pr_debug("Deasserting INIT.\n");
657 653
658 /* Target chip */ 654 /* Target chip */
659 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
660
661 /* Send IPI */ 655 /* Send IPI */
662 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 656 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
663 657
664 pr_debug("Waiting for send to finish...\n"); 658 pr_debug("Waiting for send to finish...\n");
665 send_status = safe_apic_wait_icr_idle(); 659 send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
702 */ 696 */
703 697
704 /* Target chip */ 698 /* Target chip */
705 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
706
707 /* Boot on the stack */ 699 /* Boot on the stack */
708 /* Kick the second */ 700 /* Kick the second */
709 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 701 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
702 phys_apicid);
710 703
711 /* 704 /*
712 * Give the other CPU some time to accept the IPI. 705 * Give the other CPU some time to accept the IPI.
@@ -1150,10 +1143,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1150 * Setup boot CPU information 1143 * Setup boot CPU information
1151 */ 1144 */
1152 smp_store_cpu_info(0); /* Final full version of the data */ 1145 smp_store_cpu_info(0); /* Final full version of the data */
1146#ifdef CONFIG_X86_32
1153 boot_cpu_logical_apicid = logical_smp_processor_id(); 1147 boot_cpu_logical_apicid = logical_smp_processor_id();
1148#endif
1154 current_thread_info()->cpu = 0; /* needed? */ 1149 current_thread_info()->cpu = 0; /* needed? */
1155 set_cpu_sibling_map(0); 1150 set_cpu_sibling_map(0);
1156 1151
1152#ifdef CONFIG_X86_64
1153 enable_IR_x2apic();
1154 setup_apic_routing();
1155#endif
1156
1157 if (smp_sanity_check(max_cpus) < 0) { 1157 if (smp_sanity_check(max_cpus) < 0) {
1158 printk(KERN_INFO "SMP disabled\n"); 1158 printk(KERN_INFO "SMP disabled\n");
1159 disable_smp(); 1159 disable_smp();
@@ -1161,9 +1161,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1161 } 1161 }
1162 1162
1163 preempt_disable(); 1163 preempt_disable();
1164 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1164 if (read_apic_id() != boot_cpu_physical_apicid) {
1165 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1165 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1166 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1166 read_apic_id(), boot_cpu_physical_apicid);
1167 /* Or can we switch back to PIC here? */ 1167 /* Or can we switch back to PIC here? */
1168 } 1168 }
1169 preempt_enable(); 1169 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044ba..7b987852e876 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 170d43c17487..3d1be4f0fac5 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -8,12 +8,12 @@
8#define __NO_STUBS 8#define __NO_STUBS
9 9
10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 10#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
11#undef _ASM_X86_64_UNISTD_H_ 11#undef ASM_X86__UNISTD_64_H
12#include <asm/unistd_64.h> 12#include <asm/unistd_64.h>
13 13
14#undef __SYSCALL 14#undef __SYSCALL
15#define __SYSCALL(nr, sym) [nr] = sym, 15#define __SYSCALL(nr, sym) [nr] = sym,
16#undef _ASM_X86_64_UNISTD_H_ 16#undef ASM_X86__UNISTD_64_H
17 17
18typedef void (*sys_call_ptr_t)(void); 18typedef void (*sys_call_ptr_t)(void);
19 19
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 3f18d73f420c..513caaca7115 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1131,7 +1131,14 @@ asmlinkage void math_state_restore(void)
1131 } 1131 }
1132 1132
1133 clts(); /* Allow maths ops (or we recurse) */ 1133 clts(); /* Allow maths ops (or we recurse) */
1134 restore_fpu_checking(&me->thread.xstate->fxsave); 1134 /*
1135 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1136 */
1137 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
1138 stts();
1139 force_sig(SIGSEGV, me);
1140 return;
1141 }
1135 task_thread_info(me)->status |= TS_USEDFPU; 1142 task_thread_info(me)->status |= TS_USEDFPU;
1136 me->fpu_counter++; 1143 me->fpu_counter++;
1137} 1144}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 6ca515d6db54..61531d5c9507 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*