aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r--arch/x86/kernel/apic/Makefile22
-rw-r--r--arch/x86/kernel/apic/apic.c554
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c30
-rw-r--r--arch/x86/kernel/apic/apic_noop.c17
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c45
-rw-r--r--arch/x86/kernel/apic/es7000_32.c46
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c48
-rw-r--r--arch/x86/kernel/apic/io_apic.c1639
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/numaq_32.c62
-rw-r--r--arch/x86/kernel/apic/probe_32.c120
-rw-r--r--arch/x86/kernel/apic/probe_64.c69
-rw-r--r--arch/x86/kernel/apic/summit_32.c50
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c224
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c117
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c219
17 files changed, 1504 insertions, 2337 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b457c4..767fd04f2843 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,23 +2,25 @@
2# Makefile for local APIC drivers and for the IO-APIC code 2# Makefile for local APIC drivers and for the IO-APIC code
3# 3#
4 4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o 5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o ipi.o
6ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y) 6obj-y += hw_nmi.o
7obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
8endif
9obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
10 7
11obj-$(CONFIG_X86_IO_APIC) += io_apic.o 8obj-$(CONFIG_X86_IO_APIC) += io_apic.o
12obj-$(CONFIG_SMP) += ipi.o 9obj-$(CONFIG_SMP) += ipi.o
13 10
14ifeq ($(CONFIG_X86_64),y) 11ifeq ($(CONFIG_X86_64),y)
15obj-y += apic_flat_64.o 12# APIC probe will depend on the listing order here
16obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
17obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
18obj-$(CONFIG_X86_UV) += x2apic_uv_x.o 13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
15obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
16obj-y += apic_flat_64.o
19endif 17endif
20 18
21obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o 19# APIC probe will depend on the listing order here
22obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 20obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
23obj-$(CONFIG_X86_ES7000) += es7000_32.o
24obj-$(CONFIG_X86_SUMMIT) += summit_32.o 21obj-$(CONFIG_X86_SUMMIT) += summit_32.o
22obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
23obj-$(CONFIG_X86_ES7000) += es7000_32.o
24
25# For 32bit, probe_32 need to be listed last
26obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e3b534cda49a..b9338b8cf420 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -24,14 +24,13 @@
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/ioport.h> 25#include <linux/ioport.h>
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/sysdev.h> 27#include <linux/syscore_ops.h>
28#include <linux/delay.h> 28#include <linux/delay.h>
29#include <linux/timex.h> 29#include <linux/timex.h>
30#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h> 32#include <linux/cpu.h>
33#include <linux/dmi.h> 33#include <linux/dmi.h>
34#include <linux/nmi.h>
35#include <linux/smp.h> 34#include <linux/smp.h>
36#include <linux/mm.h> 35#include <linux/mm.h>
37 36
@@ -44,14 +43,15 @@
44#include <asm/i8259.h> 43#include <asm/i8259.h>
45#include <asm/proto.h> 44#include <asm/proto.h>
46#include <asm/apic.h> 45#include <asm/apic.h>
46#include <asm/io_apic.h>
47#include <asm/desc.h> 47#include <asm/desc.h>
48#include <asm/hpet.h> 48#include <asm/hpet.h>
49#include <asm/idle.h> 49#include <asm/idle.h>
50#include <asm/mtrr.h> 50#include <asm/mtrr.h>
51#include <asm/smp.h> 51#include <asm/smp.h>
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h>
54#include <asm/tsc.h> 53#include <asm/tsc.h>
54#include <asm/hypervisor.h>
55 55
56unsigned int num_processors; 56unsigned int num_processors;
57 57
@@ -79,12 +79,21 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
80 80
81#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
82
83/*
84 * On x86_32, the mapping between cpu and logical apicid may vary
85 * depending on apic in use. The following early percpu variable is
86 * used for the mapping. This is where the behaviors of x86_64 and 32
87 * actually diverge. Let's keep it ugly for now.
88 */
89DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
90
82/* 91/*
83 * Knob to control our willingness to enable the local APIC. 92 * Knob to control our willingness to enable the local APIC.
84 * 93 *
85 * +1=force-enable 94 * +1=force-enable
86 */ 95 */
87static int force_enable_local_apic; 96static int force_enable_local_apic __initdata;
88/* 97/*
89 * APIC command line parameters 98 * APIC command line parameters
90 */ 99 */
@@ -154,7 +163,7 @@ early_param("nox2apic", setup_nox2apic);
154unsigned long mp_lapic_addr; 163unsigned long mp_lapic_addr;
155int disable_apic; 164int disable_apic;
156/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 165/* Disable local APIC timer from the kernel commandline or via dmi quirk */
157static int disable_apic_timer __cpuinitdata; 166static int disable_apic_timer __initdata;
158/* Local APIC timer works in C2 */ 167/* Local APIC timer works in C2 */
159int local_apic_timer_c2_ok; 168int local_apic_timer_c2_ok;
160EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 169EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -178,29 +187,8 @@ static struct resource lapic_resource = {
178 187
179static unsigned int calibration_result; 188static unsigned int calibration_result;
180 189
181static int lapic_next_event(unsigned long delta,
182 struct clock_event_device *evt);
183static void lapic_timer_setup(enum clock_event_mode mode,
184 struct clock_event_device *evt);
185static void lapic_timer_broadcast(const struct cpumask *mask);
186static void apic_pm_activate(void); 190static void apic_pm_activate(void);
187 191
188/*
189 * The local apic timer can be used for any function which is CPU local.
190 */
191static struct clock_event_device lapic_clockevent = {
192 .name = "lapic",
193 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
194 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
195 .shift = 32,
196 .set_mode = lapic_timer_setup,
197 .set_next_event = lapic_next_event,
198 .broadcast = lapic_timer_broadcast,
199 .rating = 100,
200 .irq = -1,
201};
202static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
203
204static unsigned long apic_phys; 192static unsigned long apic_phys;
205 193
206/* 194/*
@@ -239,7 +227,7 @@ static int modern_apic(void)
239 * right after this call apic become NOOP driven 227 * right after this call apic become NOOP driven
240 * so apic->write/read doesn't do anything 228 * so apic->write/read doesn't do anything
241 */ 229 */
242void apic_disable(void) 230static void __init apic_disable(void)
243{ 231{
244 pr_info("APIC: switched to apic NOOP\n"); 232 pr_info("APIC: switched to apic NOOP\n");
245 apic = &apic_noop; 233 apic = &apic_noop;
@@ -283,23 +271,6 @@ u64 native_apic_icr_read(void)
283 return icr1 | ((u64)icr2 << 32); 271 return icr1 | ((u64)icr2 << 32);
284} 272}
285 273
286/**
287 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
288 */
289void __cpuinit enable_NMI_through_LVT0(void)
290{
291 unsigned int v;
292
293 /* unmask and set to NMI */
294 v = APIC_DM_NMI;
295
296 /* Level triggered for 82489DX (32bit mode) */
297 if (!lapic_is_integrated())
298 v |= APIC_LVT_LEVEL_TRIGGER;
299
300 apic_write(APIC_LVT0, v);
301}
302
303#ifdef CONFIG_X86_32 274#ifdef CONFIG_X86_32
304/** 275/**
305 * get_physical_broadcast - Get number of physical broadcast IDs 276 * get_physical_broadcast - Get number of physical broadcast IDs
@@ -370,38 +341,89 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
370} 341}
371 342
372/* 343/*
373 * Setup extended LVT, AMD specific (K8, family 10h) 344 * Setup extended LVT, AMD specific
374 * 345 *
375 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 346 * Software should use the LVT offsets the BIOS provides. The offsets
376 * MCE interrupts are supported. Thus MCE offset must be set to 0. 347 * are determined by the subsystems using it like those for MCE
348 * threshold or IBS. On K8 only offset 0 (APIC500) and MCE interrupts
349 * are supported. Beginning with family 10h at least 4 offsets are
350 * available.
377 * 351 *
378 * If mask=1, the LVT entry does not generate interrupts while mask=0 352 * Since the offsets must be consistent for all cores, we keep track
379 * enables the vector. See also the BKDGs. 353 * of the LVT offsets in software and reserve the offset for the same
354 * vector also to be used on other cores. An offset is freed by
355 * setting the entry to APIC_EILVT_MASKED.
356 *
357 * If the BIOS is right, there should be no conflicts. Otherwise a
358 * "[Firmware Bug]: ..." error message is generated. However, if
359 * software does not properly determines the offsets, it is not
360 * necessarily a BIOS bug.
380 */ 361 */
381 362
382#define APIC_EILVT_LVTOFF_MCE 0 363static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
383#define APIC_EILVT_LVTOFF_IBS 1
384 364
385static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) 365static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
386{ 366{
387 unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); 367 return (old & APIC_EILVT_MASKED)
388 unsigned int v = (mask << 16) | (msg_type << 8) | vector; 368 || (new == APIC_EILVT_MASKED)
389 369 || ((new & ~APIC_EILVT_MASKED) == old);
390 apic_write(reg, v);
391} 370}
392 371
393u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) 372static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
394{ 373{
395 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); 374 unsigned int rsvd; /* 0: uninitialized */
396 return APIC_EILVT_LVTOFF_MCE; 375
376 if (offset >= APIC_EILVT_NR_MAX)
377 return ~0;
378
379 rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
380 do {
381 if (rsvd &&
382 !eilvt_entry_is_changeable(rsvd, new))
383 /* may not change if vectors are different */
384 return rsvd;
385 rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
386 } while (rsvd != new);
387
388 return new;
397} 389}
398 390
399u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) 391/*
392 * If mask=1, the LVT entry does not generate interrupts while mask=0
393 * enables the vector. See also the BKDGs. Must be called with
394 * preemption disabled.
395 */
396
397int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
400{ 398{
401 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 399 unsigned long reg = APIC_EILVTn(offset);
402 return APIC_EILVT_LVTOFF_IBS; 400 unsigned int new, old, reserved;
401
402 new = (mask << 16) | (msg_type << 8) | vector;
403 old = apic_read(reg);
404 reserved = reserve_eilvt_offset(offset, new);
405
406 if (reserved != new) {
407 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
408 "vector 0x%x, but the register is already in use for "
409 "vector 0x%x on another cpu\n",
410 smp_processor_id(), reg, offset, new, reserved);
411 return -EINVAL;
412 }
413
414 if (!eilvt_entry_is_changeable(old, new)) {
415 pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
416 "vector 0x%x, but the register is already in use for "
417 "vector 0x%x on this cpu\n",
418 smp_processor_id(), reg, offset, new, old);
419 return -EBUSY;
420 }
421
422 apic_write(reg, new);
423
424 return 0;
403} 425}
404EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); 426EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
405 427
406/* 428/*
407 * Program the next event, relative to now 429 * Program the next event, relative to now
@@ -459,6 +481,23 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
459#endif 481#endif
460} 482}
461 483
484
485/*
486 * The local apic timer can be used for any function which is CPU local.
487 */
488static struct clock_event_device lapic_clockevent = {
489 .name = "lapic",
490 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
491 | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
492 .shift = 32,
493 .set_mode = lapic_timer_setup,
494 .set_next_event = lapic_next_event,
495 .broadcast = lapic_timer_broadcast,
496 .rating = 100,
497 .irq = -1,
498};
499static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
500
462/* 501/*
463 * Setup the local APIC timer for this CPU. Copy the initialized values 502 * Setup the local APIC timer for this CPU. Copy the initialized values
464 * of the boot CPU and register the clock event in the framework. 503 * of the boot CPU and register the clock event in the framework.
@@ -467,7 +506,7 @@ static void __cpuinit setup_APIC_timer(void)
467{ 506{
468 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 507 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
469 508
470 if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) { 509 if (this_cpu_has(X86_FEATURE_ARAT)) {
471 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP; 510 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
472 /* Make LAPIC timer preferrable over percpu HPET */ 511 /* Make LAPIC timer preferrable over percpu HPET */
473 lapic_clockevent.rating = 150; 512 lapic_clockevent.rating = 150;
@@ -635,7 +674,7 @@ static int __init calibrate_APIC_clock(void)
635 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 674 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
636 lapic_clockevent.shift); 675 lapic_clockevent.shift);
637 lapic_clockevent.max_delta_ns = 676 lapic_clockevent.max_delta_ns =
638 clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); 677 clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
639 lapic_clockevent.min_delta_ns = 678 lapic_clockevent.min_delta_ns =
640 clockevent_delta2ns(0xF, &lapic_clockevent); 679 clockevent_delta2ns(0xF, &lapic_clockevent);
641 680
@@ -750,11 +789,7 @@ void __init setup_boot_APIC_clock(void)
750 * PIT/HPET going. Otherwise register lapic as a dummy 789 * PIT/HPET going. Otherwise register lapic as a dummy
751 * device. 790 * device.
752 */ 791 */
753 if (nmi_watchdog != NMI_IO_APIC) 792 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
754 lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
755 else
756 pr_warning("APIC timer registered as dummy,"
757 " due to nmi_watchdog=%d!\n", nmi_watchdog);
758 793
759 /* Setup the lapic or request the broadcast */ 794 /* Setup the lapic or request the broadcast */
760 setup_APIC_timer(); 795 setup_APIC_timer();
@@ -1146,12 +1181,15 @@ static void __cpuinit lapic_setup_esr(void)
1146 oldvalue, value); 1181 oldvalue, value);
1147} 1182}
1148 1183
1149
1150/** 1184/**
1151 * setup_local_APIC - setup the local APIC 1185 * setup_local_APIC - setup the local APIC
1186 *
1187 * Used to setup local APIC while initializing BSP or bringin up APs.
1188 * Always called with preemption disabled.
1152 */ 1189 */
1153void __cpuinit setup_local_APIC(void) 1190void __cpuinit setup_local_APIC(void)
1154{ 1191{
1192 int cpu = smp_processor_id();
1155 unsigned int value, queued; 1193 unsigned int value, queued;
1156 int i, j, acked = 0; 1194 int i, j, acked = 0;
1157 unsigned long long tsc = 0, ntsc; 1195 unsigned long long tsc = 0, ntsc;
@@ -1161,7 +1199,7 @@ void __cpuinit setup_local_APIC(void)
1161 rdtscll(tsc); 1199 rdtscll(tsc);
1162 1200
1163 if (disable_apic) { 1201 if (disable_apic) {
1164 arch_disable_smp_support(); 1202 disable_ioapic_support();
1165 return; 1203 return;
1166 } 1204 }
1167 1205
@@ -1176,8 +1214,6 @@ void __cpuinit setup_local_APIC(void)
1176#endif 1214#endif
1177 perf_events_lapic_init(); 1215 perf_events_lapic_init();
1178 1216
1179 preempt_disable();
1180
1181 /* 1217 /*
1182 * Double-check whether this APIC is really registered. 1218 * Double-check whether this APIC is really registered.
1183 * This is meaningless in clustered apic mode, so we skip it. 1219 * This is meaningless in clustered apic mode, so we skip it.
@@ -1191,6 +1227,30 @@ void __cpuinit setup_local_APIC(void)
1191 */ 1227 */
1192 apic->init_apic_ldr(); 1228 apic->init_apic_ldr();
1193 1229
1230#ifdef CONFIG_X86_32
1231 /*
1232 * APIC LDR is initialized. If logical_apicid mapping was
1233 * initialized during get_smp_config(), make sure it matches the
1234 * actual value.
1235 */
1236 i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
1237 WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
1238 /* always use the value from LDR */
1239 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1240 logical_smp_processor_id();
1241
1242 /*
1243 * Some NUMA implementations (NUMAQ) don't initialize apicid to
1244 * node mapping during NUMA init. Now that logical apicid is
1245 * guaranteed to be known, give it another chance. This is already
1246 * a bit too late - percpu allocation has already happened without
1247 * proper NUMA affinity.
1248 */
1249 if (apic->x86_32_numa_cpu_node)
1250 set_apicid_to_node(early_per_cpu(x86_cpu_to_apicid, cpu),
1251 apic->x86_32_numa_cpu_node(cpu));
1252#endif
1253
1194 /* 1254 /*
1195 * Set Task Priority to 'accept all'. We never change this 1255 * Set Task Priority to 'accept all'. We never change this
1196 * later on. 1256 * later on.
@@ -1293,21 +1353,19 @@ void __cpuinit setup_local_APIC(void)
1293 * TODO: set up through-local-APIC from through-I/O-APIC? --macro 1353 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
1294 */ 1354 */
1295 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; 1355 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
1296 if (!smp_processor_id() && (pic_mode || !value)) { 1356 if (!cpu && (pic_mode || !value)) {
1297 value = APIC_DM_EXTINT; 1357 value = APIC_DM_EXTINT;
1298 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", 1358 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
1299 smp_processor_id());
1300 } else { 1359 } else {
1301 value = APIC_DM_EXTINT | APIC_LVT_MASKED; 1360 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
1302 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", 1361 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
1303 smp_processor_id());
1304 } 1362 }
1305 apic_write(APIC_LVT0, value); 1363 apic_write(APIC_LVT0, value);
1306 1364
1307 /* 1365 /*
1308 * only the BP should see the LINT1 NMI signal, obviously. 1366 * only the BP should see the LINT1 NMI signal, obviously.
1309 */ 1367 */
1310 if (!smp_processor_id()) 1368 if (!cpu)
1311 value = APIC_DM_NMI; 1369 value = APIC_DM_NMI;
1312 else 1370 else
1313 value = APIC_DM_NMI | APIC_LVT_MASKED; 1371 value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -1315,11 +1373,9 @@ void __cpuinit setup_local_APIC(void)
1315 value |= APIC_LVT_LEVEL_TRIGGER; 1373 value |= APIC_LVT_LEVEL_TRIGGER;
1316 apic_write(APIC_LVT1, value); 1374 apic_write(APIC_LVT1, value);
1317 1375
1318 preempt_enable();
1319
1320#ifdef CONFIG_X86_MCE_INTEL 1376#ifdef CONFIG_X86_MCE_INTEL
1321 /* Recheck CMCI information after local APIC is up on CPU #0 */ 1377 /* Recheck CMCI information after local APIC is up on CPU #0 */
1322 if (smp_processor_id() == 0) 1378 if (!cpu)
1323 cmci_recheck(); 1379 cmci_recheck();
1324#endif 1380#endif
1325} 1381}
@@ -1338,10 +1394,22 @@ void __cpuinit end_local_APIC_setup(void)
1338 } 1394 }
1339#endif 1395#endif
1340 1396
1341 setup_apic_nmi_watchdog(NULL);
1342 apic_pm_activate(); 1397 apic_pm_activate();
1343} 1398}
1344 1399
1400void __init bsp_end_local_APIC_setup(void)
1401{
1402 end_local_APIC_setup();
1403
1404 /*
1405 * Now that local APIC setup is completed for BP, configure the fault
1406 * handling for interrupt remapping.
1407 */
1408 if (intr_remapping_enabled)
1409 enable_drhd_fault_handling();
1410
1411}
1412
1345#ifdef CONFIG_X86_X2APIC 1413#ifdef CONFIG_X86_X2APIC
1346void check_x2apic(void) 1414void check_x2apic(void)
1347{ 1415{
@@ -1394,7 +1462,6 @@ int __init enable_IR(void)
1394void __init enable_IR_x2apic(void) 1462void __init enable_IR_x2apic(void)
1395{ 1463{
1396 unsigned long flags; 1464 unsigned long flags;
1397 struct IO_APIC_route_entry **ioapic_entries = NULL;
1398 int ret, x2apic_enabled = 0; 1465 int ret, x2apic_enabled = 0;
1399 int dmar_table_init_ret; 1466 int dmar_table_init_ret;
1400 1467
@@ -1402,13 +1469,7 @@ void __init enable_IR_x2apic(void)
1402 if (dmar_table_init_ret && !x2apic_supported()) 1469 if (dmar_table_init_ret && !x2apic_supported())
1403 return; 1470 return;
1404 1471
1405 ioapic_entries = alloc_ioapic_entries(); 1472 ret = save_ioapic_entries();
1406 if (!ioapic_entries) {
1407 pr_err("Allocate ioapic_entries failed\n");
1408 goto out;
1409 }
1410
1411 ret = save_IO_APIC_setup(ioapic_entries);
1412 if (ret) { 1473 if (ret) {
1413 pr_info("Saving IO-APIC state failed: %d\n", ret); 1474 pr_info("Saving IO-APIC state failed: %d\n", ret);
1414 goto out; 1475 goto out;
@@ -1416,7 +1477,7 @@ void __init enable_IR_x2apic(void)
1416 1477
1417 local_irq_save(flags); 1478 local_irq_save(flags);
1418 legacy_pic->mask_all(); 1479 legacy_pic->mask_all();
1419 mask_IO_APIC_setup(ioapic_entries); 1480 mask_ioapic_entries();
1420 1481
1421 if (dmar_table_init_ret) 1482 if (dmar_table_init_ret)
1422 ret = 0; 1483 ret = 0;
@@ -1427,7 +1488,8 @@ void __init enable_IR_x2apic(void)
1427 /* IR is required if there is APIC ID > 255 even when running 1488 /* IR is required if there is APIC ID > 255 even when running
1428 * under KVM 1489 * under KVM
1429 */ 1490 */
1430 if (max_physical_apicid > 255 || !kvm_para_available()) 1491 if (max_physical_apicid > 255 ||
1492 !hypervisor_x2apic_available())
1431 goto nox2apic; 1493 goto nox2apic;
1432 /* 1494 /*
1433 * without IR all CPUs can be addressed by IOAPIC/MSI 1495 * without IR all CPUs can be addressed by IOAPIC/MSI
@@ -1446,14 +1508,11 @@ void __init enable_IR_x2apic(void)
1446 1508
1447nox2apic: 1509nox2apic:
1448 if (!ret) /* IR enabling failed */ 1510 if (!ret) /* IR enabling failed */
1449 restore_IO_APIC_setup(ioapic_entries); 1511 restore_ioapic_entries();
1450 legacy_pic->restore_mask(); 1512 legacy_pic->restore_mask();
1451 local_irq_restore(flags); 1513 local_irq_restore(flags);
1452 1514
1453out: 1515out:
1454 if (ioapic_entries)
1455 free_ioapic_entries(ioapic_entries);
1456
1457 if (x2apic_enabled) 1516 if (x2apic_enabled)
1458 return; 1517 return;
1459 1518
@@ -1481,13 +1540,60 @@ static int __init detect_init_APIC(void)
1481 return 0; 1540 return 0;
1482} 1541}
1483#else 1542#else
1543
1544static int __init apic_verify(void)
1545{
1546 u32 features, h, l;
1547
1548 /*
1549 * The APIC feature bit should now be enabled
1550 * in `cpuid'
1551 */
1552 features = cpuid_edx(1);
1553 if (!(features & (1 << X86_FEATURE_APIC))) {
1554 pr_warning("Could not enable APIC!\n");
1555 return -1;
1556 }
1557 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1558 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1559
1560 /* The BIOS may have set up the APIC at some other address */
1561 rdmsr(MSR_IA32_APICBASE, l, h);
1562 if (l & MSR_IA32_APICBASE_ENABLE)
1563 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1564
1565 pr_info("Found and enabled local APIC!\n");
1566 return 0;
1567}
1568
1569int __init apic_force_enable(unsigned long addr)
1570{
1571 u32 h, l;
1572
1573 if (disable_apic)
1574 return -1;
1575
1576 /*
1577 * Some BIOSes disable the local APIC in the APIC_BASE
1578 * MSR. This can only be done in software for Intel P6 or later
1579 * and AMD K7 (Model > 1) or later.
1580 */
1581 rdmsr(MSR_IA32_APICBASE, l, h);
1582 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1583 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1584 l &= ~MSR_IA32_APICBASE_BASE;
1585 l |= MSR_IA32_APICBASE_ENABLE | addr;
1586 wrmsr(MSR_IA32_APICBASE, l, h);
1587 enabled_via_apicbase = 1;
1588 }
1589 return apic_verify();
1590}
1591
1484/* 1592/*
1485 * Detect and initialize APIC 1593 * Detect and initialize APIC
1486 */ 1594 */
1487static int __init detect_init_APIC(void) 1595static int __init detect_init_APIC(void)
1488{ 1596{
1489 u32 h, l, features;
1490
1491 /* Disabled by kernel option? */ 1597 /* Disabled by kernel option? */
1492 if (disable_apic) 1598 if (disable_apic)
1493 return -1; 1599 return -1;
@@ -1517,38 +1623,12 @@ static int __init detect_init_APIC(void)
1517 "you can enable it with \"lapic\"\n"); 1623 "you can enable it with \"lapic\"\n");
1518 return -1; 1624 return -1;
1519 } 1625 }
1520 /* 1626 if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
1521 * Some BIOSes disable the local APIC in the APIC_BASE 1627 return -1;
1522 * MSR. This can only be done in software for Intel P6 or later 1628 } else {
1523 * and AMD K7 (Model > 1) or later. 1629 if (apic_verify())
1524 */ 1630 return -1;
1525 rdmsr(MSR_IA32_APICBASE, l, h);
1526 if (!(l & MSR_IA32_APICBASE_ENABLE)) {
1527 pr_info("Local APIC disabled by BIOS -- reenabling.\n");
1528 l &= ~MSR_IA32_APICBASE_BASE;
1529 l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
1530 wrmsr(MSR_IA32_APICBASE, l, h);
1531 enabled_via_apicbase = 1;
1532 }
1533 }
1534 /*
1535 * The APIC feature bit should now be enabled
1536 * in `cpuid'
1537 */
1538 features = cpuid_edx(1);
1539 if (!(features & (1 << X86_FEATURE_APIC))) {
1540 pr_warning("Could not enable APIC!\n");
1541 return -1;
1542 } 1631 }
1543 set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1544 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
1545
1546 /* The BIOS may have set up the APIC at some other address */
1547 rdmsr(MSR_IA32_APICBASE, l, h);
1548 if (l & MSR_IA32_APICBASE_ENABLE)
1549 mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
1550
1551 pr_info("Found and enabled local APIC!\n");
1552 1632
1553 apic_pm_activate(); 1633 apic_pm_activate();
1554 1634
@@ -1560,28 +1640,6 @@ no_apic:
1560} 1640}
1561#endif 1641#endif
1562 1642
1563#ifdef CONFIG_X86_64
1564void __init early_init_lapic_mapping(void)
1565{
1566 /*
1567 * If no local APIC can be found then go out
1568 * : it means there is no mpatable and MADT
1569 */
1570 if (!smp_found_config)
1571 return;
1572
1573 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
1574 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1575 APIC_BASE, mp_lapic_addr);
1576
1577 /*
1578 * Fetch the APIC ID of the BSP in case we have a
1579 * default configuration (or the MP table is broken).
1580 */
1581 boot_cpu_physical_apicid = read_apic_id();
1582}
1583#endif
1584
1585/** 1643/**
1586 * init_apic_mappings - initialize APIC mappings 1644 * init_apic_mappings - initialize APIC mappings
1587 */ 1645 */
@@ -1607,10 +1665,7 @@ void __init init_apic_mappings(void)
1607 * acpi_register_lapic_address() 1665 * acpi_register_lapic_address()
1608 */ 1666 */
1609 if (!acpi_lapic && !smp_found_config) 1667 if (!acpi_lapic && !smp_found_config)
1610 set_fixmap_nocache(FIX_APIC_BASE, apic_phys); 1668 register_lapic_address(apic_phys);
1611
1612 apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
1613 APIC_BASE, apic_phys);
1614 } 1669 }
1615 1670
1616 /* 1671 /*
@@ -1632,11 +1687,27 @@ void __init init_apic_mappings(void)
1632 } 1687 }
1633} 1688}
1634 1689
1690void __init register_lapic_address(unsigned long address)
1691{
1692 mp_lapic_addr = address;
1693
1694 if (!x2apic_mode) {
1695 set_fixmap_nocache(FIX_APIC_BASE, address);
1696 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1697 APIC_BASE, mp_lapic_addr);
1698 }
1699 if (boot_cpu_physical_apicid == -1U) {
1700 boot_cpu_physical_apicid = read_apic_id();
1701 apic_version[boot_cpu_physical_apicid] =
1702 GET_APIC_VERSION(apic_read(APIC_LVR));
1703 }
1704}
1705
1635/* 1706/*
1636 * This initializes the IO-APIC and APIC hardware if this is 1707 * This initializes the IO-APIC and APIC hardware if this is
1637 * a UP kernel. 1708 * a UP kernel.
1638 */ 1709 */
1639int apic_version[MAX_APICS]; 1710int apic_version[MAX_LOCAL_APIC];
1640 1711
1641int __init APIC_init_uniprocessor(void) 1712int __init APIC_init_uniprocessor(void)
1642{ 1713{
@@ -1665,10 +1736,7 @@ int __init APIC_init_uniprocessor(void)
1665 } 1736 }
1666#endif 1737#endif
1667 1738
1668#ifndef CONFIG_SMP
1669 enable_IR_x2apic();
1670 default_setup_apic_routing(); 1739 default_setup_apic_routing();
1671#endif
1672 1740
1673 verify_local_APIC(); 1741 verify_local_APIC();
1674 connect_bsp_APIC(); 1742 connect_bsp_APIC();
@@ -1697,24 +1765,17 @@ int __init APIC_init_uniprocessor(void)
1697 enable_IO_APIC(); 1765 enable_IO_APIC();
1698#endif 1766#endif
1699 1767
1700 end_local_APIC_setup(); 1768 bsp_end_local_APIC_setup();
1701 1769
1702#ifdef CONFIG_X86_IO_APIC 1770#ifdef CONFIG_X86_IO_APIC
1703 if (smp_found_config && !skip_ioapic_setup && nr_ioapics) 1771 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1704 setup_IO_APIC(); 1772 setup_IO_APIC();
1705 else { 1773 else {
1706 nr_ioapics = 0; 1774 nr_ioapics = 0;
1707 localise_nmi_watchdog();
1708 } 1775 }
1709#else
1710 localise_nmi_watchdog();
1711#endif 1776#endif
1712 1777
1713 x86_init.timers.setup_percpu_clockev(); 1778 x86_init.timers.setup_percpu_clockev();
1714#ifdef CONFIG_X86_64
1715 check_nmi_watchdog();
1716#endif
1717
1718 return 0; 1779 return 0;
1719} 1780}
1720 1781
@@ -1753,30 +1814,41 @@ void smp_spurious_interrupt(struct pt_regs *regs)
1753 */ 1814 */
1754void smp_error_interrupt(struct pt_regs *regs) 1815void smp_error_interrupt(struct pt_regs *regs)
1755{ 1816{
1756 u32 v, v1; 1817 u32 v0, v1;
1818 u32 i = 0;
1819 static const char * const error_interrupt_reason[] = {
1820 "Send CS error", /* APIC Error Bit 0 */
1821 "Receive CS error", /* APIC Error Bit 1 */
1822 "Send accept error", /* APIC Error Bit 2 */
1823 "Receive accept error", /* APIC Error Bit 3 */
1824 "Redirectable IPI", /* APIC Error Bit 4 */
1825 "Send illegal vector", /* APIC Error Bit 5 */
1826 "Received illegal vector", /* APIC Error Bit 6 */
1827 "Illegal register address", /* APIC Error Bit 7 */
1828 };
1757 1829
1758 exit_idle(); 1830 exit_idle();
1759 irq_enter(); 1831 irq_enter();
1760 /* First tickle the hardware, only then report what went on. -- REW */ 1832 /* First tickle the hardware, only then report what went on. -- REW */
1761 v = apic_read(APIC_ESR); 1833 v0 = apic_read(APIC_ESR);
1762 apic_write(APIC_ESR, 0); 1834 apic_write(APIC_ESR, 0);
1763 v1 = apic_read(APIC_ESR); 1835 v1 = apic_read(APIC_ESR);
1764 ack_APIC_irq(); 1836 ack_APIC_irq();
1765 atomic_inc(&irq_err_count); 1837 atomic_inc(&irq_err_count);
1766 1838
1767 /* 1839 apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)",
1768 * Here is what the APIC error bits mean: 1840 smp_processor_id(), v0 , v1);
1769 * 0: Send CS error 1841
1770 * 1: Receive CS error 1842 v1 = v1 & 0xff;
1771 * 2: Send accept error 1843 while (v1) {
1772 * 3: Receive accept error 1844 if (v1 & 0x1)
1773 * 4: Reserved 1845 apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
1774 * 5: Send illegal vector 1846 i++;
1775 * 6: Received illegal vector 1847 v1 >>= 1;
1776 * 7: Illegal register address 1848 };
1777 */ 1849
1778 pr_debug("APIC error on CPU%d: %02x(%02x)\n", 1850 apic_printk(APIC_DEBUG, KERN_CONT "\n");
1779 smp_processor_id(), v , v1); 1851
1780 irq_exit(); 1852 irq_exit();
1781} 1853}
1782 1854
@@ -1873,17 +1945,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1873{ 1945{
1874 int cpu; 1946 int cpu;
1875 1947
1876 /*
1877 * Validate version
1878 */
1879 if (version == 0x0) {
1880 pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
1881 "fixing up to 0x10. (tell your hw vendor)\n",
1882 version);
1883 version = 0x10;
1884 }
1885 apic_version[apicid] = version;
1886
1887 if (num_processors >= nr_cpu_ids) { 1948 if (num_processors >= nr_cpu_ids) {
1888 int max = nr_cpu_ids; 1949 int max = nr_cpu_ids;
1889 int thiscpu = max + disabled_cpus; 1950 int thiscpu = max + disabled_cpus;
@@ -1897,22 +1958,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
1897 } 1958 }
1898 1959
1899 num_processors++; 1960 num_processors++;
1900 cpu = cpumask_next_zero(-1, cpu_present_mask);
1901
1902 if (version != apic_version[boot_cpu_physical_apicid])
1903 WARN_ONCE(1,
1904 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1905 apic_version[boot_cpu_physical_apicid], cpu, version);
1906
1907 physid_set(apicid, phys_cpu_present_map);
1908 if (apicid == boot_cpu_physical_apicid) { 1961 if (apicid == boot_cpu_physical_apicid) {
1909 /* 1962 /*
1910 * x86_bios_cpu_apicid is required to have processors listed 1963 * x86_bios_cpu_apicid is required to have processors listed
1911 * in same order as logical cpu numbers. Hence the first 1964 * in same order as logical cpu numbers. Hence the first
1912 * entry is BSP, and so on. 1965 * entry is BSP, and so on.
1966 * boot_cpu_init() already hold bit 0 in cpu_present_mask
1967 * for BSP.
1913 */ 1968 */
1914 cpu = 0; 1969 cpu = 0;
1970 } else
1971 cpu = cpumask_next_zero(-1, cpu_present_mask);
1972
1973 /*
1974 * Validate version
1975 */
1976 if (version == 0x0) {
1977 pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
1978 cpu, apicid);
1979 version = 0x10;
1915 } 1980 }
1981 apic_version[apicid] = version;
1982
1983 if (version != apic_version[boot_cpu_physical_apicid]) {
1984 pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
1985 apic_version[boot_cpu_physical_apicid], cpu, version);
1986 }
1987
1988 physid_set(apicid, phys_cpu_present_map);
1916 if (apicid > max_physical_apicid) 1989 if (apicid > max_physical_apicid)
1917 max_physical_apicid = apicid; 1990 max_physical_apicid = apicid;
1918 1991
@@ -1920,7 +1993,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
1920 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1993 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1921 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1994 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1922#endif 1995#endif
1923 1996#ifdef CONFIG_X86_32
1997 early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
1998 apic->x86_32_early_logical_apicid(cpu);
1999#endif
1924 set_cpu_possible(cpu, true); 2000 set_cpu_possible(cpu, true);
1925 set_cpu_present(cpu, true); 2001 set_cpu_present(cpu, true);
1926} 2002}
@@ -1940,17 +2016,6 @@ void default_init_apic_ldr(void)
1940 apic_write(APIC_LDR, val); 2016 apic_write(APIC_LDR, val);
1941} 2017}
1942 2018
1943#ifdef CONFIG_X86_32
1944int default_apicid_to_node(int logical_apicid)
1945{
1946#ifdef CONFIG_SMP
1947 return apicid_2_node[hard_smp_processor_id()];
1948#else
1949 return 0;
1950#endif
1951}
1952#endif
1953
1954/* 2019/*
1955 * Power management 2020 * Power management
1956 */ 2021 */
@@ -1979,7 +2044,7 @@ static struct {
1979 unsigned int apic_thmr; 2044 unsigned int apic_thmr;
1980} apic_pm_state; 2045} apic_pm_state;
1981 2046
1982static int lapic_suspend(struct sys_device *dev, pm_message_t state) 2047static int lapic_suspend(void)
1983{ 2048{
1984 unsigned long flags; 2049 unsigned long flags;
1985 int maxlvt; 2050 int maxlvt;
@@ -2017,34 +2082,24 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
2017 return 0; 2082 return 0;
2018} 2083}
2019 2084
2020static int lapic_resume(struct sys_device *dev) 2085static void lapic_resume(void)
2021{ 2086{
2022 unsigned int l, h; 2087 unsigned int l, h;
2023 unsigned long flags; 2088 unsigned long flags;
2024 int maxlvt; 2089 int maxlvt;
2025 int ret = 0;
2026 struct IO_APIC_route_entry **ioapic_entries = NULL;
2027 2090
2028 if (!apic_pm_state.active) 2091 if (!apic_pm_state.active)
2029 return 0; 2092 return;
2030 2093
2031 local_irq_save(flags); 2094 local_irq_save(flags);
2032 if (intr_remapping_enabled) { 2095 if (intr_remapping_enabled) {
2033 ioapic_entries = alloc_ioapic_entries(); 2096 /*
2034 if (!ioapic_entries) { 2097 * IO-APIC and PIC have their own resume routines.
2035 WARN(1, "Alloc ioapic_entries in lapic resume failed."); 2098 * We just mask them here to make sure the interrupt
2036 ret = -ENOMEM; 2099 * subsystem is completely quiet while we enable x2apic
2037 goto restore; 2100 * and interrupt-remapping.
2038 } 2101 */
2039 2102 mask_ioapic_entries();
2040 ret = save_IO_APIC_setup(ioapic_entries);
2041 if (ret) {
2042 WARN(1, "Saving IO-APIC state failed: %d\n", ret);
2043 free_ioapic_entries(ioapic_entries);
2044 goto restore;
2045 }
2046
2047 mask_IO_APIC_setup(ioapic_entries);
2048 legacy_pic->mask_all(); 2103 legacy_pic->mask_all();
2049 } 2104 }
2050 2105
@@ -2087,16 +2142,10 @@ static int lapic_resume(struct sys_device *dev)
2087 apic_write(APIC_ESR, 0); 2142 apic_write(APIC_ESR, 0);
2088 apic_read(APIC_ESR); 2143 apic_read(APIC_ESR);
2089 2144
2090 if (intr_remapping_enabled) { 2145 if (intr_remapping_enabled)
2091 reenable_intr_remapping(x2apic_mode); 2146 reenable_intr_remapping(x2apic_mode);
2092 legacy_pic->restore_mask();
2093 restore_IO_APIC_setup(ioapic_entries);
2094 free_ioapic_entries(ioapic_entries);
2095 }
2096restore:
2097 local_irq_restore(flags);
2098 2147
2099 return ret; 2148 local_irq_restore(flags);
2100} 2149}
2101 2150
2102/* 2151/*
@@ -2104,17 +2153,11 @@ restore:
2104 * are needed on every CPU up until machine_halt/restart/poweroff. 2153 * are needed on every CPU up until machine_halt/restart/poweroff.
2105 */ 2154 */
2106 2155
2107static struct sysdev_class lapic_sysclass = { 2156static struct syscore_ops lapic_syscore_ops = {
2108 .name = "lapic",
2109 .resume = lapic_resume, 2157 .resume = lapic_resume,
2110 .suspend = lapic_suspend, 2158 .suspend = lapic_suspend,
2111}; 2159};
2112 2160
2113static struct sys_device device_lapic = {
2114 .id = 0,
2115 .cls = &lapic_sysclass,
2116};
2117
2118static void __cpuinit apic_pm_activate(void) 2161static void __cpuinit apic_pm_activate(void)
2119{ 2162{
2120 apic_pm_state.active = 1; 2163 apic_pm_state.active = 1;
@@ -2122,16 +2165,11 @@ static void __cpuinit apic_pm_activate(void)
2122 2165
2123static int __init init_lapic_sysfs(void) 2166static int __init init_lapic_sysfs(void)
2124{ 2167{
2125 int error;
2126
2127 if (!cpu_has_apic)
2128 return 0;
2129 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ 2168 /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
2169 if (cpu_has_apic)
2170 register_syscore_ops(&lapic_syscore_ops);
2130 2171
2131 error = sysdev_class_register(&lapic_sysclass); 2172 return 0;
2132 if (!error)
2133 error = sysdev_register(&device_lapic);
2134 return error;
2135} 2173}
2136 2174
2137/* local apic needs to resume before other devices access its registers. */ 2175/* local apic needs to resume before other devices access its registers. */
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..f7a41e4cae47 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -16,6 +16,7 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/module.h>
19#include <asm/smp.h> 20#include <asm/smp.h>
20#include <asm/apic.h> 21#include <asm/apic.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
@@ -24,6 +25,12 @@
24#include <acpi/acpi_bus.h> 25#include <acpi/acpi_bus.h>
25#endif 26#endif
26 27
28static struct apic apic_physflat;
29static struct apic apic_flat;
30
31struct apic __read_mostly *apic = &apic_flat;
32EXPORT_SYMBOL_GPL(apic);
33
27static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 34static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
28{ 35{
29 return 1; 36 return 1;
@@ -164,7 +171,7 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
164 return initial_apic_id >> index_msb; 171 return initial_apic_id >> index_msb;
165} 172}
166 173
167struct apic apic_flat = { 174static struct apic apic_flat = {
168 .name = "flat", 175 .name = "flat",
169 .probe = NULL, 176 .probe = NULL,
170 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 177 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
@@ -185,8 +192,6 @@ struct apic apic_flat = {
185 .ioapic_phys_id_map = NULL, 192 .ioapic_phys_id_map = NULL,
186 .setup_apic_routing = NULL, 193 .setup_apic_routing = NULL,
187 .multi_timer_check = NULL, 194 .multi_timer_check = NULL,
188 .apicid_to_node = NULL,
189 .cpu_to_logical_apicid = NULL,
190 .cpu_present_to_apicid = default_cpu_present_to_apicid, 195 .cpu_present_to_apicid = default_cpu_present_to_apicid,
191 .apicid_to_cpu_present = NULL, 196 .apicid_to_cpu_present = NULL,
192 .setup_portio_remap = NULL, 197 .setup_portio_remap = NULL,
@@ -314,10 +319,18 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
314 return per_cpu(x86_cpu_to_apicid, cpu); 319 return per_cpu(x86_cpu_to_apicid, cpu);
315} 320}
316 321
317struct apic apic_physflat = { 322static int physflat_probe(void)
323{
324 if (apic == &apic_physflat || num_possible_cpus() > 8)
325 return 1;
326
327 return 0;
328}
329
330static struct apic apic_physflat = {
318 331
319 .name = "physical flat", 332 .name = "physical flat",
320 .probe = NULL, 333 .probe = physflat_probe,
321 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 334 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
322 .apic_id_registered = flat_apic_id_registered, 335 .apic_id_registered = flat_apic_id_registered,
323 336
@@ -337,8 +350,6 @@ struct apic apic_physflat = {
337 .ioapic_phys_id_map = NULL, 350 .ioapic_phys_id_map = NULL,
338 .setup_apic_routing = NULL, 351 .setup_apic_routing = NULL,
339 .multi_timer_check = NULL, 352 .multi_timer_check = NULL,
340 .apicid_to_node = NULL,
341 .cpu_to_logical_apicid = NULL,
342 .cpu_present_to_apicid = default_cpu_present_to_apicid, 353 .cpu_present_to_apicid = default_cpu_present_to_apicid,
343 .apicid_to_cpu_present = NULL, 354 .apicid_to_cpu_present = NULL,
344 .setup_portio_remap = NULL, 355 .setup_portio_remap = NULL,
@@ -373,3 +384,8 @@ struct apic apic_physflat = {
373 .wait_icr_idle = native_apic_wait_icr_idle, 384 .wait_icr_idle = native_apic_wait_icr_idle,
374 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 385 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
375}; 386};
387
388/*
389 * We need to check for physflat first, so this order is important.
390 */
391apic_drivers(apic_physflat, apic_flat);
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..775b82bc655c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
54 return 0; 54 return 0;
55} 55}
56 56
57static int noop_cpu_to_logical_apicid(int cpu)
58{
59 return 0;
60}
61
62static int noop_phys_pkg_id(int cpuid_apic, int index_msb) 57static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
63{ 58{
64 return 0; 59 return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
113 cpumask_set_cpu(cpu, retmask); 108 cpumask_set_cpu(cpu, retmask);
114} 109}
115 110
116int noop_apicid_to_node(int logical_apicid)
117{
118 /* we're always on node 0 */
119 return 0;
120}
121
122static u32 noop_apic_read(u32 reg) 111static u32 noop_apic_read(u32 reg)
123{ 112{
124 WARN_ON_ONCE((cpu_has_apic && !disable_apic)); 113 WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -153,9 +142,7 @@ struct apic apic_noop = {
153 .ioapic_phys_id_map = default_ioapic_phys_id_map, 142 .ioapic_phys_id_map = default_ioapic_phys_id_map,
154 .setup_apic_routing = NULL, 143 .setup_apic_routing = NULL,
155 .multi_timer_check = NULL, 144 .multi_timer_check = NULL,
156 .apicid_to_node = noop_apicid_to_node,
157 145
158 .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
159 .cpu_present_to_apicid = default_cpu_present_to_apicid, 146 .cpu_present_to_apicid = default_cpu_present_to_apicid,
160 .apicid_to_cpu_present = physid_set_mask_of_physid, 147 .apicid_to_cpu_present = physid_set_mask_of_physid,
161 148
@@ -197,4 +184,8 @@ struct apic apic_noop = {
197 .icr_write = noop_apic_icr_write, 184 .icr_write = noop_apic_icr_write,
198 .wait_icr_idle = noop_apic_wait_icr_idle, 185 .wait_icr_idle = noop_apic_wait_icr_idle,
199 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, 186 .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
187
188#ifdef CONFIG_X86_32
189 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
190#endif
200}; 191};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..efd737e827f4 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
45 return 1; 45 return 1;
46} 46}
47 47
48static int bigsmp_early_logical_apicid(int cpu)
49{
50 /* on bigsmp, logical apicid is the same as physical */
51 return early_per_cpu(x86_cpu_to_apicid, cpu);
52}
53
48static inline unsigned long calculate_ldr(int cpu) 54static inline unsigned long calculate_ldr(int cpu)
49{ 55{
50 unsigned long val, id; 56 unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
80 nr_ioapics); 86 nr_ioapics);
81} 87}
82 88
83static int bigsmp_apicid_to_node(int logical_apicid)
84{
85 return apicid_2_node[hard_smp_processor_id()];
86}
87
88static int bigsmp_cpu_present_to_apicid(int mps_cpu) 89static int bigsmp_cpu_present_to_apicid(int mps_cpu)
89{ 90{
90 if (mps_cpu < nr_cpu_ids) 91 if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
93 return BAD_APICID; 94 return BAD_APICID;
94} 95}
95 96
96/* Mapping from cpu number to logical apicid */
97static inline int bigsmp_cpu_to_logical_apicid(int cpu)
98{
99 if (cpu >= nr_cpu_ids)
100 return BAD_APICID;
101 return cpu_physical_id(cpu);
102}
103
104static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 97static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
105{ 98{
106 /* For clustered we don't have a good way to do this yet - hack */ 99 /* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
115/* As we are using single CPU as destination, pick only one CPU here */ 108/* As we are using single CPU as destination, pick only one CPU here */
116static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) 109static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
117{ 110{
118 return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); 111 int cpu = cpumask_first(cpumask);
112
113 if (cpu < nr_cpu_ids)
114 return cpu_physical_id(cpu);
115 return BAD_APICID;
119} 116}
120 117
121static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 118static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
129 */ 126 */
130 for_each_cpu_and(cpu, cpumask, andmask) { 127 for_each_cpu_and(cpu, cpumask, andmask) {
131 if (cpumask_test_cpu(cpu, cpu_online_mask)) 128 if (cpumask_test_cpu(cpu, cpu_online_mask))
132 break; 129 return cpu_physical_id(cpu);
133 } 130 }
134 return bigsmp_cpu_to_logical_apicid(cpu); 131 return BAD_APICID;
135} 132}
136 133
137static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) 134static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -196,7 +193,7 @@ static int probe_bigsmp(void)
196 return dmi_bigsmp; 193 return dmi_bigsmp;
197} 194}
198 195
199struct apic apic_bigsmp = { 196static struct apic apic_bigsmp = {
200 197
201 .name = "bigsmp", 198 .name = "bigsmp",
202 .probe = probe_bigsmp, 199 .probe = probe_bigsmp,
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
219 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, 216 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
220 .setup_apic_routing = bigsmp_setup_apic_routing, 217 .setup_apic_routing = bigsmp_setup_apic_routing,
221 .multi_timer_check = NULL, 218 .multi_timer_check = NULL,
222 .apicid_to_node = bigsmp_apicid_to_node,
223 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
224 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, 219 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
225 .apicid_to_cpu_present = physid_set_mask_of_physid, 220 .apicid_to_cpu_present = physid_set_mask_of_physid,
226 .setup_portio_remap = NULL, 221 .setup_portio_remap = NULL,
@@ -256,4 +251,16 @@ struct apic apic_bigsmp = {
256 .icr_write = native_apic_icr_write, 251 .icr_write = native_apic_icr_write,
257 .wait_icr_idle = native_apic_wait_icr_idle, 252 .wait_icr_idle = native_apic_wait_icr_idle,
258 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 253 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
254
255 .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
259}; 256};
257
258struct apic * __init generic_bigsmp_probe(void)
259{
260 if (probe_bigsmp())
261 return &apic_bigsmp;
262
263 return NULL;
264}
265
266apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..9536b3fe43f8 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
460 return physid_isset(bit, phys_cpu_present_map); 460 return physid_isset(bit, phys_cpu_present_map);
461} 461}
462 462
463static int es7000_early_logical_apicid(int cpu)
464{
465 /* on es7000, logical apicid is the same as physical */
466 return early_per_cpu(x86_bios_cpu_apicid, cpu);
467}
468
463static unsigned long calculate_ldr(int cpu) 469static unsigned long calculate_ldr(int cpu)
464{ 470{
465 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); 471 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,6 @@ static void es7000_setup_apic_routing(void)
504 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); 510 nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
505} 511}
506 512
507static int es7000_apicid_to_node(int logical_apicid)
508{
509 return 0;
510}
511
512
513static int es7000_cpu_present_to_apicid(int mps_cpu) 513static int es7000_cpu_present_to_apicid(int mps_cpu)
514{ 514{
515 if (!mps_cpu) 515 if (!mps_cpu)
@@ -528,18 +528,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
528 ++cpu_id; 528 ++cpu_id;
529} 529}
530 530
531/* Mapping from cpu number to logical apicid */
532static int es7000_cpu_to_logical_apicid(int cpu)
533{
534#ifdef CONFIG_SMP
535 if (cpu >= nr_cpu_ids)
536 return BAD_APICID;
537 return cpu_2_logical_apicid[cpu];
538#else
539 return logical_smp_processor_id();
540#endif
541}
542
543static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) 531static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
544{ 532{
545 /* For clustered we don't have a good way to do this yet - hack */ 533 /* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +549,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
561 * The cpus in the mask must all be on the apic cluster. 549 * The cpus in the mask must all be on the apic cluster.
562 */ 550 */
563 for_each_cpu(cpu, cpumask) { 551 for_each_cpu(cpu, cpumask) {
564 int new_apicid = es7000_cpu_to_logical_apicid(cpu); 552 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
565 553
566 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 554 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
567 WARN(1, "Not a valid mask!"); 555 WARN(1, "Not a valid mask!");
@@ -578,7 +566,7 @@ static unsigned int
578es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, 566es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
579 const struct cpumask *andmask) 567 const struct cpumask *andmask)
580{ 568{
581 int apicid = es7000_cpu_to_logical_apicid(0); 569 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
582 cpumask_var_t cpumask; 570 cpumask_var_t cpumask;
583 571
584 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 572 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -632,7 +620,7 @@ static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
632} 620}
633 621
634/* We've been warned by a false positive warning.Use __refdata to keep calm. */ 622/* We've been warned by a false positive warning.Use __refdata to keep calm. */
635struct apic __refdata apic_es7000_cluster = { 623static struct apic __refdata apic_es7000_cluster = {
636 624
637 .name = "es7000", 625 .name = "es7000",
638 .probe = probe_es7000, 626 .probe = probe_es7000,
@@ -655,8 +643,6 @@ struct apic __refdata apic_es7000_cluster = {
655 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 643 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
656 .setup_apic_routing = es7000_setup_apic_routing, 644 .setup_apic_routing = es7000_setup_apic_routing,
657 .multi_timer_check = NULL, 645 .multi_timer_check = NULL,
658 .apicid_to_node = es7000_apicid_to_node,
659 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
660 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 646 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
661 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 647 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
662 .setup_portio_remap = NULL, 648 .setup_portio_remap = NULL,
@@ -695,9 +681,11 @@ struct apic __refdata apic_es7000_cluster = {
695 .icr_write = native_apic_icr_write, 681 .icr_write = native_apic_icr_write,
696 .wait_icr_idle = native_apic_wait_icr_idle, 682 .wait_icr_idle = native_apic_wait_icr_idle,
697 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 683 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
684
685 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
698}; 686};
699 687
700struct apic __refdata apic_es7000 = { 688static struct apic __refdata apic_es7000 = {
701 689
702 .name = "es7000", 690 .name = "es7000",
703 .probe = probe_es7000, 691 .probe = probe_es7000,
@@ -720,8 +708,6 @@ struct apic __refdata apic_es7000 = {
720 .ioapic_phys_id_map = es7000_ioapic_phys_id_map, 708 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
721 .setup_apic_routing = es7000_setup_apic_routing, 709 .setup_apic_routing = es7000_setup_apic_routing,
722 .multi_timer_check = NULL, 710 .multi_timer_check = NULL,
723 .apicid_to_node = es7000_apicid_to_node,
724 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
725 .cpu_present_to_apicid = es7000_cpu_present_to_apicid, 711 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
726 .apicid_to_cpu_present = es7000_apicid_to_cpu_present, 712 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
727 .setup_portio_remap = NULL, 713 .setup_portio_remap = NULL,
@@ -758,4 +744,12 @@ struct apic __refdata apic_es7000 = {
758 .icr_write = native_apic_icr_write, 744 .icr_write = native_apic_icr_write,
759 .wait_icr_idle = native_apic_wait_icr_idle, 745 .wait_icr_idle = native_apic_wait_icr_idle,
760 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 746 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
747
748 .x86_32_early_logical_apicid = es7000_early_logical_apicid,
761}; 749};
750
751/*
752 * Need to check for es7000 followed by es7000_cluster, so this order
753 * in apic_drivers is important.
754 */
755apic_drivers(apic_es7000, apic_es7000_cluster);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd6942f0e9..d5e57db0f7be 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -16,20 +16,33 @@
16#include <linux/kprobes.h> 16#include <linux/kprobes.h>
17#include <linux/nmi.h> 17#include <linux/nmi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/delay.h>
19 20
21#ifdef CONFIG_HARDLOCKUP_DETECTOR
22u64 hw_nmi_get_sample_period(int watchdog_thresh)
23{
24 return (u64)(cpu_khz) * 1000 * watchdog_thresh;
25}
26#endif
27
28#ifdef arch_trigger_all_cpu_backtrace
20/* For reliability, we're prepared to waste bits here. */ 29/* For reliability, we're prepared to waste bits here. */
21static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; 30static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
22 31
23u64 hw_nmi_get_sample_period(void) 32/* "in progress" flag of arch_trigger_all_cpu_backtrace */
24{ 33static unsigned long backtrace_flag;
25 return (u64)(cpu_khz) * 1000 * 60;
26}
27 34
28#ifdef ARCH_HAS_NMI_WATCHDOG
29void arch_trigger_all_cpu_backtrace(void) 35void arch_trigger_all_cpu_backtrace(void)
30{ 36{
31 int i; 37 int i;
32 38
39 if (test_and_set_bit(0, &backtrace_flag))
40 /*
41 * If there is already a trigger_all_cpu_backtrace() in progress
42 * (backtrace_flag == 1), don't output double cpu dump infos.
43 */
44 return;
45
33 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); 46 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
34 47
35 printk(KERN_INFO "sending NMI to all CPUs:\n"); 48 printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -41,6 +54,9 @@ void arch_trigger_all_cpu_backtrace(void)
41 break; 54 break;
42 mdelay(1); 55 mdelay(1);
43 } 56 }
57
58 clear_bit(0, &backtrace_flag);
59 smp_mb__after_clear_bit();
44} 60}
45 61
46static int __kprobes 62static int __kprobes
@@ -49,11 +65,10 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
49{ 65{
50 struct die_args *args = __args; 66 struct die_args *args = __args;
51 struct pt_regs *regs; 67 struct pt_regs *regs;
52 int cpu = smp_processor_id(); 68 int cpu;
53 69
54 switch (cmd) { 70 switch (cmd) {
55 case DIE_NMI: 71 case DIE_NMI:
56 case DIE_NMI_IPI:
57 break; 72 break;
58 73
59 default: 74 default:
@@ -61,6 +76,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
61 } 76 }
62 77
63 regs = args->regs; 78 regs = args->regs;
79 cpu = smp_processor_id();
64 80
65 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { 81 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
66 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; 82 static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -68,7 +84,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
68 arch_spin_lock(&lock); 84 arch_spin_lock(&lock);
69 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); 85 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
70 show_regs(regs); 86 show_regs(regs);
71 dump_stack();
72 arch_spin_unlock(&lock); 87 arch_spin_unlock(&lock);
73 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); 88 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
74 return NOTIFY_STOP; 89 return NOTIFY_STOP;
@@ -80,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
80static __read_mostly struct notifier_block backtrace_notifier = { 95static __read_mostly struct notifier_block backtrace_notifier = {
81 .notifier_call = arch_trigger_all_cpu_backtrace_handler, 96 .notifier_call = arch_trigger_all_cpu_backtrace_handler,
82 .next = NULL, 97 .next = NULL,
83 .priority = 1 98 .priority = NMI_LOCAL_LOW_PRIOR,
84}; 99};
85 100
86static int __init register_trigger_all_cpu_backtrace(void) 101static int __init register_trigger_all_cpu_backtrace(void)
@@ -90,18 +105,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
90} 105}
91early_initcall(register_trigger_all_cpu_backtrace); 106early_initcall(register_trigger_all_cpu_backtrace);
92#endif 107#endif
93
94/* STUB calls to mimic old nmi_watchdog behaviour */
95#if defined(CONFIG_X86_LOCAL_APIC)
96unsigned int nmi_watchdog = NMI_NONE;
97EXPORT_SYMBOL(nmi_watchdog);
98void acpi_nmi_enable(void) { return; }
99void acpi_nmi_disable(void) { return; }
100#endif
101atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
102EXPORT_SYMBOL(nmi_active);
103int unknown_nmi_panic;
104void cpu_nmi_set_wd_enabled(void) { return; }
105void stop_apic_nmi_watchdog(void *unused) { return; }
106void setup_apic_nmi_watchdog(void *unused) { return; }
107int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c5b8f3dddb5..e5293394b548 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -30,7 +30,7 @@
30#include <linux/compiler.h> 30#include <linux/compiler.h>
31#include <linux/acpi.h> 31#include <linux/acpi.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/sysdev.h> 33#include <linux/syscore_ops.h>
34#include <linux/msi.h> 34#include <linux/msi.h>
35#include <linux/htirq.h> 35#include <linux/htirq.h>
36#include <linux/freezer.h> 36#include <linux/freezer.h>
@@ -54,7 +54,6 @@
54#include <asm/dma.h> 54#include <asm/dma.h>
55#include <asm/timer.h> 55#include <asm/timer.h>
56#include <asm/i8259.h> 56#include <asm/i8259.h>
57#include <asm/nmi.h>
58#include <asm/msidef.h> 57#include <asm/msidef.h>
59#include <asm/hypertransport.h> 58#include <asm/hypertransport.h>
60#include <asm/setup.h> 59#include <asm/setup.h>
@@ -77,17 +76,40 @@ int sis_apic_bug = -1;
77static DEFINE_RAW_SPINLOCK(ioapic_lock); 76static DEFINE_RAW_SPINLOCK(ioapic_lock);
78static DEFINE_RAW_SPINLOCK(vector_lock); 77static DEFINE_RAW_SPINLOCK(vector_lock);
79 78
80/* 79static struct ioapic {
81 * # of IRQ routing registers 80 /*
82 */ 81 * # of IRQ routing registers
83int nr_ioapic_registers[MAX_IO_APICS]; 82 */
83 int nr_registers;
84 /*
85 * Saved state during suspend/resume, or while enabling intr-remap.
86 */
87 struct IO_APIC_route_entry *saved_registers;
88 /* I/O APIC config */
89 struct mpc_ioapic mp_config;
90 /* IO APIC gsi routing info */
91 struct mp_ioapic_gsi gsi_config;
92 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
93} ioapics[MAX_IO_APICS];
84 94
85/* I/O APIC entries */ 95#define mpc_ioapic_ver(id) ioapics[id].mp_config.apicver
86struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
87int nr_ioapics;
88 96
89/* IO APIC gsi routing info */ 97int mpc_ioapic_id(int id)
90struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; 98{
99 return ioapics[id].mp_config.apicid;
100}
101
102unsigned int mpc_ioapic_addr(int id)
103{
104 return ioapics[id].mp_config.apicaddr;
105}
106
107struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int id)
108{
109 return &ioapics[id].gsi_config;
110}
111
112int nr_ioapics;
91 113
92/* The one past the highest gsi number used */ 114/* The one past the highest gsi number used */
93u32 gsi_top; 115u32 gsi_top;
@@ -109,7 +131,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
109 131
110int skip_ioapic_setup; 132int skip_ioapic_setup;
111 133
112void arch_disable_smp_support(void) 134/**
135 * disable_ioapic_support() - disables ioapic support at runtime
136 */
137void disable_ioapic_support(void)
113{ 138{
114#ifdef CONFIG_PCI 139#ifdef CONFIG_PCI
115 noioapicquirk = 1; 140 noioapicquirk = 1;
@@ -121,25 +146,45 @@ void arch_disable_smp_support(void)
121static int __init parse_noapic(char *str) 146static int __init parse_noapic(char *str)
122{ 147{
123 /* disable IO-APIC */ 148 /* disable IO-APIC */
124 arch_disable_smp_support(); 149 disable_ioapic_support();
125 return 0; 150 return 0;
126} 151}
127early_param("noapic", parse_noapic); 152early_param("noapic", parse_noapic);
128 153
154static int io_apic_setup_irq_pin(unsigned int irq, int node,
155 struct io_apic_irq_attr *attr);
156
157/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
158void mp_save_irq(struct mpc_intsrc *m)
159{
160 int i;
161
162 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
163 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
164 m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
165 m->srcbusirq, m->dstapic, m->dstirq);
166
167 for (i = 0; i < mp_irq_entries; i++) {
168 if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
169 return;
170 }
171
172 memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
173 if (++mp_irq_entries == MAX_IRQ_SOURCES)
174 panic("Max # of irq sources exceeded!!\n");
175}
176
129struct irq_pin_list { 177struct irq_pin_list {
130 int apic, pin; 178 int apic, pin;
131 struct irq_pin_list *next; 179 struct irq_pin_list *next;
132}; 180};
133 181
134static struct irq_pin_list *get_one_free_irq_2_pin(int node) 182static struct irq_pin_list *alloc_irq_pin_list(int node)
135{ 183{
136 struct irq_pin_list *pin; 184 return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
137
138 pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
139
140 return pin;
141} 185}
142 186
187
143/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ 188/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
144#ifdef CONFIG_SPARSE_IRQ 189#ifdef CONFIG_SPARSE_IRQ
145static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; 190static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -150,25 +195,32 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
150int __init arch_early_irq_init(void) 195int __init arch_early_irq_init(void)
151{ 196{
152 struct irq_cfg *cfg; 197 struct irq_cfg *cfg;
153 struct irq_desc *desc; 198 int count, node, i;
154 int count;
155 int node;
156 int i;
157 199
158 if (!legacy_pic->nr_legacy_irqs) { 200 if (!legacy_pic->nr_legacy_irqs) {
159 nr_irqs_gsi = 0; 201 nr_irqs_gsi = 0;
160 io_apic_irqs = ~0UL; 202 io_apic_irqs = ~0UL;
161 } 203 }
162 204
205 for (i = 0; i < nr_ioapics; i++) {
206 ioapics[i].saved_registers =
207 kzalloc(sizeof(struct IO_APIC_route_entry) *
208 ioapics[i].nr_registers, GFP_KERNEL);
209 if (!ioapics[i].saved_registers)
210 pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
211 }
212
163 cfg = irq_cfgx; 213 cfg = irq_cfgx;
164 count = ARRAY_SIZE(irq_cfgx); 214 count = ARRAY_SIZE(irq_cfgx);
165 node= cpu_to_node(boot_cpu_id); 215 node = cpu_to_node(0);
216
217 /* Make sure the legacy interrupts are marked in the bitmap */
218 irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
166 219
167 for (i = 0; i < count; i++) { 220 for (i = 0; i < count; i++) {
168 desc = irq_to_desc(i); 221 irq_set_chip_data(i, &cfg[i]);
169 desc->chip_data = &cfg[i]; 222 zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
170 zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); 223 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
171 zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
172 /* 224 /*
173 * For legacy IRQ's, start with assigning irq0 to irq15 to 225 * For legacy IRQ's, start with assigning irq0 to irq15 to
174 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. 226 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -183,170 +235,88 @@ int __init arch_early_irq_init(void)
183} 235}
184 236
185#ifdef CONFIG_SPARSE_IRQ 237#ifdef CONFIG_SPARSE_IRQ
186struct irq_cfg *irq_cfg(unsigned int irq) 238static struct irq_cfg *irq_cfg(unsigned int irq)
187{ 239{
188 struct irq_cfg *cfg = NULL; 240 return irq_get_chip_data(irq);
189 struct irq_desc *desc;
190
191 desc = irq_to_desc(irq);
192 if (desc)
193 cfg = desc->chip_data;
194
195 return cfg;
196} 241}
197 242
198static struct irq_cfg *get_one_free_irq_cfg(int node) 243static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
199{ 244{
200 struct irq_cfg *cfg; 245 struct irq_cfg *cfg;
201 246
202 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 247 cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
203 if (cfg) { 248 if (!cfg)
204 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { 249 return NULL;
205 kfree(cfg); 250 if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
206 cfg = NULL; 251 goto out_cfg;
207 } else if (!zalloc_cpumask_var_node(&cfg->old_domain, 252 if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
208 GFP_ATOMIC, node)) { 253 goto out_domain;
209 free_cpumask_var(cfg->domain);
210 kfree(cfg);
211 cfg = NULL;
212 }
213 }
214
215 return cfg; 254 return cfg;
255out_domain:
256 free_cpumask_var(cfg->domain);
257out_cfg:
258 kfree(cfg);
259 return NULL;
216} 260}
217 261
218int arch_init_chip_data(struct irq_desc *desc, int node) 262static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
219{
220 struct irq_cfg *cfg;
221
222 cfg = desc->chip_data;
223 if (!cfg) {
224 desc->chip_data = get_one_free_irq_cfg(node);
225 if (!desc->chip_data) {
226 printk(KERN_ERR "can not alloc irq_cfg\n");
227 BUG_ON(1);
228 }
229 }
230
231 return 0;
232}
233
234/* for move_irq_desc */
235static void
236init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
237{ 263{
238 struct irq_pin_list *old_entry, *head, *tail, *entry; 264 if (!cfg)
239
240 cfg->irq_2_pin = NULL;
241 old_entry = old_cfg->irq_2_pin;
242 if (!old_entry)
243 return;
244
245 entry = get_one_free_irq_2_pin(node);
246 if (!entry)
247 return; 265 return;
266 irq_set_chip_data(at, NULL);
267 free_cpumask_var(cfg->domain);
268 free_cpumask_var(cfg->old_domain);
269 kfree(cfg);
270}
248 271
249 entry->apic = old_entry->apic; 272#else
250 entry->pin = old_entry->pin;
251 head = entry;
252 tail = entry;
253 old_entry = old_entry->next;
254 while (old_entry) {
255 entry = get_one_free_irq_2_pin(node);
256 if (!entry) {
257 entry = head;
258 while (entry) {
259 head = entry->next;
260 kfree(entry);
261 entry = head;
262 }
263 /* still use the old one */
264 return;
265 }
266 entry->apic = old_entry->apic;
267 entry->pin = old_entry->pin;
268 tail->next = entry;
269 tail = entry;
270 old_entry = old_entry->next;
271 }
272 273
273 tail->next = NULL; 274struct irq_cfg *irq_cfg(unsigned int irq)
274 cfg->irq_2_pin = head; 275{
276 return irq < nr_irqs ? irq_cfgx + irq : NULL;
275} 277}
276 278
277static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) 279static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
278{ 280{
279 struct irq_pin_list *entry, *next; 281 return irq_cfgx + irq;
280 282}
281 if (old_cfg->irq_2_pin == cfg->irq_2_pin)
282 return;
283 283
284 entry = old_cfg->irq_2_pin; 284static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
285 285
286 while (entry) { 286#endif
287 next = entry->next;
288 kfree(entry);
289 entry = next;
290 }
291 old_cfg->irq_2_pin = NULL;
292}
293 287
294void arch_init_copy_chip_data(struct irq_desc *old_desc, 288static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
295 struct irq_desc *desc, int node)
296{ 289{
290 int res = irq_alloc_desc_at(at, node);
297 struct irq_cfg *cfg; 291 struct irq_cfg *cfg;
298 struct irq_cfg *old_cfg;
299 292
300 cfg = get_one_free_irq_cfg(node); 293 if (res < 0) {
301 294 if (res != -EEXIST)
302 if (!cfg) 295 return NULL;
303 return; 296 cfg = irq_get_chip_data(at);
304 297 if (cfg)
305 desc->chip_data = cfg; 298 return cfg;
306 299 }
307 old_cfg = old_desc->chip_data;
308
309 cfg->vector = old_cfg->vector;
310 cfg->move_in_progress = old_cfg->move_in_progress;
311 cpumask_copy(cfg->domain, old_cfg->domain);
312 cpumask_copy(cfg->old_domain, old_cfg->old_domain);
313
314 init_copy_irq_2_pin(old_cfg, cfg, node);
315}
316 300
317static void free_irq_cfg(struct irq_cfg *cfg) 301 cfg = alloc_irq_cfg(at, node);
318{ 302 if (cfg)
319 free_cpumask_var(cfg->domain); 303 irq_set_chip_data(at, cfg);
320 free_cpumask_var(cfg->old_domain); 304 else
321 kfree(cfg); 305 irq_free_desc(at);
306 return cfg;
322} 307}
323 308
324void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) 309static int alloc_irq_from(unsigned int from, int node)
325{ 310{
326 struct irq_cfg *old_cfg, *cfg; 311 return irq_alloc_desc_from(from, node);
327
328 old_cfg = old_desc->chip_data;
329 cfg = desc->chip_data;
330
331 if (old_cfg == cfg)
332 return;
333
334 if (old_cfg) {
335 free_irq_2_pin(old_cfg, cfg);
336 free_irq_cfg(old_cfg);
337 old_desc->chip_data = NULL;
338 }
339} 312}
340/* end for move_irq_desc */
341 313
342#else 314static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
343struct irq_cfg *irq_cfg(unsigned int irq)
344{ 315{
345 return irq < nr_irqs ? irq_cfgx + irq : NULL; 316 free_irq_cfg(at, cfg);
317 irq_free_desc(at);
346} 318}
347 319
348#endif
349
350struct io_apic { 320struct io_apic {
351 unsigned int index; 321 unsigned int index;
352 unsigned int unused[3]; 322 unsigned int unused[3];
@@ -358,7 +328,7 @@ struct io_apic {
358static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 328static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
359{ 329{
360 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 330 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
361 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK); 331 + (mpc_ioapic_addr(idx) & ~PAGE_MASK);
362} 332}
363 333
364static inline void io_apic_eoi(unsigned int apic, unsigned int vector) 334static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
@@ -451,7 +421,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
451 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 421 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
452} 422}
453 423
454void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 424static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
455{ 425{
456 unsigned long flags; 426 unsigned long flags;
457 raw_spin_lock_irqsave(&ioapic_lock, flags); 427 raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -481,7 +451,7 @@ static void ioapic_mask_entry(int apic, int pin)
481 * fast in the common case, and fast for shared ISA-space IRQs. 451 * fast in the common case, and fast for shared ISA-space IRQs.
482 */ 452 */
483static int 453static int
484add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) 454__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
485{ 455{
486 struct irq_pin_list **last, *entry; 456 struct irq_pin_list **last, *entry;
487 457
@@ -493,7 +463,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
493 last = &entry->next; 463 last = &entry->next;
494 } 464 }
495 465
496 entry = get_one_free_irq_2_pin(node); 466 entry = alloc_irq_pin_list(node);
497 if (!entry) { 467 if (!entry) {
498 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", 468 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
499 node, apic, pin); 469 node, apic, pin);
@@ -508,7 +478,7 @@ add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
508 478
509static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 479static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
510{ 480{
511 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) 481 if (__add_pin_to_irq_node(cfg, node, apic, pin))
512 panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); 482 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
513} 483}
514 484
@@ -571,11 +541,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
571 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 541 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
572} 542}
573 543
574static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
575{
576 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
577}
578
579static void io_apic_sync(struct irq_pin_list *entry) 544static void io_apic_sync(struct irq_pin_list *entry)
580{ 545{
581 /* 546 /*
@@ -587,44 +552,37 @@ static void io_apic_sync(struct irq_pin_list *entry)
587 readl(&io_apic->data); 552 readl(&io_apic->data);
588} 553}
589 554
590static void __mask_IO_APIC_irq(struct irq_cfg *cfg) 555static void mask_ioapic(struct irq_cfg *cfg)
591{ 556{
557 unsigned long flags;
558
559 raw_spin_lock_irqsave(&ioapic_lock, flags);
592 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 560 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
561 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
593} 562}
594 563
595static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 564static void mask_ioapic_irq(struct irq_data *data)
596{ 565{
597 struct irq_cfg *cfg = desc->chip_data; 566 mask_ioapic(data->chip_data);
598 unsigned long flags; 567}
599
600 BUG_ON(!cfg);
601 568
602 raw_spin_lock_irqsave(&ioapic_lock, flags); 569static void __unmask_ioapic(struct irq_cfg *cfg)
603 __mask_IO_APIC_irq(cfg); 570{
604 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 571 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
605} 572}
606 573
607static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) 574static void unmask_ioapic(struct irq_cfg *cfg)
608{ 575{
609 struct irq_cfg *cfg = desc->chip_data;
610 unsigned long flags; 576 unsigned long flags;
611 577
612 raw_spin_lock_irqsave(&ioapic_lock, flags); 578 raw_spin_lock_irqsave(&ioapic_lock, flags);
613 __unmask_IO_APIC_irq(cfg); 579 __unmask_ioapic(cfg);
614 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 580 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
615} 581}
616 582
617static void mask_IO_APIC_irq(unsigned int irq) 583static void unmask_ioapic_irq(struct irq_data *data)
618{ 584{
619 struct irq_desc *desc = irq_to_desc(irq); 585 unmask_ioapic(data->chip_data);
620
621 mask_IO_APIC_irq_desc(desc);
622}
623static void unmask_IO_APIC_irq(unsigned int irq)
624{
625 struct irq_desc *desc = irq_to_desc(irq);
626
627 unmask_IO_APIC_irq_desc(desc);
628} 586}
629 587
630static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 588static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -646,7 +604,7 @@ static void clear_IO_APIC (void)
646 int apic, pin; 604 int apic, pin;
647 605
648 for (apic = 0; apic < nr_ioapics; apic++) 606 for (apic = 0; apic < nr_ioapics; apic++)
649 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 607 for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
650 clear_IO_APIC_pin(apic, pin); 608 clear_IO_APIC_pin(apic, pin);
651} 609}
652 610
@@ -688,74 +646,43 @@ static int __init ioapic_pirq_setup(char *str)
688__setup("pirq=", ioapic_pirq_setup); 646__setup("pirq=", ioapic_pirq_setup);
689#endif /* CONFIG_X86_32 */ 647#endif /* CONFIG_X86_32 */
690 648
691struct IO_APIC_route_entry **alloc_ioapic_entries(void)
692{
693 int apic;
694 struct IO_APIC_route_entry **ioapic_entries;
695
696 ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
697 GFP_ATOMIC);
698 if (!ioapic_entries)
699 return 0;
700
701 for (apic = 0; apic < nr_ioapics; apic++) {
702 ioapic_entries[apic] =
703 kzalloc(sizeof(struct IO_APIC_route_entry) *
704 nr_ioapic_registers[apic], GFP_ATOMIC);
705 if (!ioapic_entries[apic])
706 goto nomem;
707 }
708
709 return ioapic_entries;
710
711nomem:
712 while (--apic >= 0)
713 kfree(ioapic_entries[apic]);
714 kfree(ioapic_entries);
715
716 return 0;
717}
718
719/* 649/*
720 * Saves all the IO-APIC RTE's 650 * Saves all the IO-APIC RTE's
721 */ 651 */
722int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 652int save_ioapic_entries(void)
723{ 653{
724 int apic, pin; 654 int apic, pin;
725 655 int err = 0;
726 if (!ioapic_entries)
727 return -ENOMEM;
728 656
729 for (apic = 0; apic < nr_ioapics; apic++) { 657 for (apic = 0; apic < nr_ioapics; apic++) {
730 if (!ioapic_entries[apic]) 658 if (!ioapics[apic].saved_registers) {
731 return -ENOMEM; 659 err = -ENOMEM;
660 continue;
661 }
732 662
733 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 663 for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
734 ioapic_entries[apic][pin] = 664 ioapics[apic].saved_registers[pin] =
735 ioapic_read_entry(apic, pin); 665 ioapic_read_entry(apic, pin);
736 } 666 }
737 667
738 return 0; 668 return err;
739} 669}
740 670
741/* 671/*
742 * Mask all IO APIC entries. 672 * Mask all IO APIC entries.
743 */ 673 */
744void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 674void mask_ioapic_entries(void)
745{ 675{
746 int apic, pin; 676 int apic, pin;
747 677
748 if (!ioapic_entries)
749 return;
750
751 for (apic = 0; apic < nr_ioapics; apic++) { 678 for (apic = 0; apic < nr_ioapics; apic++) {
752 if (!ioapic_entries[apic]) 679 if (!ioapics[apic].saved_registers)
753 break; 680 continue;
754 681
755 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 682 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
756 struct IO_APIC_route_entry entry; 683 struct IO_APIC_route_entry entry;
757 684
758 entry = ioapic_entries[apic][pin]; 685 entry = ioapics[apic].saved_registers[pin];
759 if (!entry.mask) { 686 if (!entry.mask) {
760 entry.mask = 1; 687 entry.mask = 1;
761 ioapic_write_entry(apic, pin, entry); 688 ioapic_write_entry(apic, pin, entry);
@@ -765,36 +692,23 @@ void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
765} 692}
766 693
767/* 694/*
768 * Restore IO APIC entries which was saved in ioapic_entries. 695 * Restore IO APIC entries which was saved in the ioapic structure.
769 */ 696 */
770int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) 697int restore_ioapic_entries(void)
771{ 698{
772 int apic, pin; 699 int apic, pin;
773 700
774 if (!ioapic_entries)
775 return -ENOMEM;
776
777 for (apic = 0; apic < nr_ioapics; apic++) { 701 for (apic = 0; apic < nr_ioapics; apic++) {
778 if (!ioapic_entries[apic]) 702 if (!ioapics[apic].saved_registers)
779 return -ENOMEM; 703 continue;
780 704
781 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) 705 for (pin = 0; pin < ioapics[apic].nr_registers; pin++)
782 ioapic_write_entry(apic, pin, 706 ioapic_write_entry(apic, pin,
783 ioapic_entries[apic][pin]); 707 ioapics[apic].saved_registers[pin]);
784 } 708 }
785 return 0; 709 return 0;
786} 710}
787 711
788void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
789{
790 int apic;
791
792 for (apic = 0; apic < nr_ioapics; apic++)
793 kfree(ioapic_entries[apic]);
794
795 kfree(ioapic_entries);
796}
797
798/* 712/*
799 * Find the IRQ entry number of a certain pin. 713 * Find the IRQ entry number of a certain pin.
800 */ 714 */
@@ -804,7 +718,7 @@ static int find_irq_entry(int apic, int pin, int type)
804 718
805 for (i = 0; i < mp_irq_entries; i++) 719 for (i = 0; i < mp_irq_entries; i++)
806 if (mp_irqs[i].irqtype == type && 720 if (mp_irqs[i].irqtype == type &&
807 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid || 721 (mp_irqs[i].dstapic == mpc_ioapic_id(apic) ||
808 mp_irqs[i].dstapic == MP_APIC_ALL) && 722 mp_irqs[i].dstapic == MP_APIC_ALL) &&
809 mp_irqs[i].dstirq == pin) 723 mp_irqs[i].dstirq == pin)
810 return i; 724 return i;
@@ -846,7 +760,7 @@ static int __init find_isa_irq_apic(int irq, int type)
846 if (i < mp_irq_entries) { 760 if (i < mp_irq_entries) {
847 int apic; 761 int apic;
848 for(apic = 0; apic < nr_ioapics; apic++) { 762 for(apic = 0; apic < nr_ioapics; apic++) {
849 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic) 763 if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic)
850 return apic; 764 return apic;
851 } 765 }
852 } 766 }
@@ -897,7 +811,7 @@ static int EISA_ELCR(unsigned int irq)
897#define default_MCA_trigger(idx) (1) 811#define default_MCA_trigger(idx) (1)
898#define default_MCA_polarity(idx) default_ISA_polarity(idx) 812#define default_MCA_polarity(idx) default_ISA_polarity(idx)
899 813
900static int MPBIOS_polarity(int idx) 814static int irq_polarity(int idx)
901{ 815{
902 int bus = mp_irqs[idx].srcbus; 816 int bus = mp_irqs[idx].srcbus;
903 int polarity; 817 int polarity;
@@ -939,7 +853,7 @@ static int MPBIOS_polarity(int idx)
939 return polarity; 853 return polarity;
940} 854}
941 855
942static int MPBIOS_trigger(int idx) 856static int irq_trigger(int idx)
943{ 857{
944 int bus = mp_irqs[idx].srcbus; 858 int bus = mp_irqs[idx].srcbus;
945 int trigger; 859 int trigger;
@@ -1011,20 +925,11 @@ static int MPBIOS_trigger(int idx)
1011 return trigger; 925 return trigger;
1012} 926}
1013 927
1014static inline int irq_polarity(int idx)
1015{
1016 return MPBIOS_polarity(idx);
1017}
1018
1019static inline int irq_trigger(int idx)
1020{
1021 return MPBIOS_trigger(idx);
1022}
1023
1024static int pin_2_irq(int idx, int apic, int pin) 928static int pin_2_irq(int idx, int apic, int pin)
1025{ 929{
1026 int irq; 930 int irq;
1027 int bus = mp_irqs[idx].srcbus; 931 int bus = mp_irqs[idx].srcbus;
932 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(apic);
1028 933
1029 /* 934 /*
1030 * Debugging check, we are in big trouble if this message pops up! 935 * Debugging check, we are in big trouble if this message pops up!
@@ -1035,7 +940,7 @@ static int pin_2_irq(int idx, int apic, int pin)
1035 if (test_bit(bus, mp_bus_not_pci)) { 940 if (test_bit(bus, mp_bus_not_pci)) {
1036 irq = mp_irqs[idx].srcbusirq; 941 irq = mp_irqs[idx].srcbusirq;
1037 } else { 942 } else {
1038 u32 gsi = mp_gsi_routing[apic].gsi_base + pin; 943 u32 gsi = gsi_cfg->gsi_base + pin;
1039 944
1040 if (gsi >= NR_IRQS_LEGACY) 945 if (gsi >= NR_IRQS_LEGACY)
1041 irq = gsi; 946 irq = gsi;
@@ -1086,7 +991,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
1086 int lbus = mp_irqs[i].srcbus; 991 int lbus = mp_irqs[i].srcbus;
1087 992
1088 for (apic = 0; apic < nr_ioapics; apic++) 993 for (apic = 0; apic < nr_ioapics; apic++)
1089 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || 994 if (mpc_ioapic_id(apic) == mp_irqs[i].dstapic ||
1090 mp_irqs[i].dstapic == MP_APIC_ALL) 995 mp_irqs[i].dstapic == MP_APIC_ALL)
1091 break; 996 break;
1092 997
@@ -1259,7 +1164,6 @@ void __setup_vector_irq(int cpu)
1259 /* Initialize vector_irq on a new cpu */ 1164 /* Initialize vector_irq on a new cpu */
1260 int irq, vector; 1165 int irq, vector;
1261 struct irq_cfg *cfg; 1166 struct irq_cfg *cfg;
1262 struct irq_desc *desc;
1263 1167
1264 /* 1168 /*
1265 * vector_lock will make sure that we don't run into irq vector 1169 * vector_lock will make sure that we don't run into irq vector
@@ -1268,9 +1172,10 @@ void __setup_vector_irq(int cpu)
1268 */ 1172 */
1269 raw_spin_lock(&vector_lock); 1173 raw_spin_lock(&vector_lock);
1270 /* Mark the inuse vectors */ 1174 /* Mark the inuse vectors */
1271 for_each_irq_desc(irq, desc) { 1175 for_each_active_irq(irq) {
1272 cfg = desc->chip_data; 1176 cfg = irq_get_chip_data(irq);
1273 1177 if (!cfg)
1178 continue;
1274 /* 1179 /*
1275 * If it is a legacy IRQ handled by the legacy PIC, this cpu 1180 * If it is a legacy IRQ handled by the legacy PIC, this cpu
1276 * will be part of the irq_cfg's domain. 1181 * will be part of the irq_cfg's domain.
@@ -1299,17 +1204,13 @@ void __setup_vector_irq(int cpu)
1299static struct irq_chip ioapic_chip; 1204static struct irq_chip ioapic_chip;
1300static struct irq_chip ir_ioapic_chip; 1205static struct irq_chip ir_ioapic_chip;
1301 1206
1302#define IOAPIC_AUTO -1
1303#define IOAPIC_EDGE 0
1304#define IOAPIC_LEVEL 1
1305
1306#ifdef CONFIG_X86_32 1207#ifdef CONFIG_X86_32
1307static inline int IO_APIC_irq_trigger(int irq) 1208static inline int IO_APIC_irq_trigger(int irq)
1308{ 1209{
1309 int apic, idx, pin; 1210 int apic, idx, pin;
1310 1211
1311 for (apic = 0; apic < nr_ioapics; apic++) { 1212 for (apic = 0; apic < nr_ioapics; apic++) {
1312 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1213 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
1313 idx = find_irq_entry(apic, pin, mp_INT); 1214 idx = find_irq_entry(apic, pin, mp_INT);
1314 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) 1215 if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
1315 return irq_trigger(idx); 1216 return irq_trigger(idx);
@@ -1327,41 +1228,37 @@ static inline int IO_APIC_irq_trigger(int irq)
1327} 1228}
1328#endif 1229#endif
1329 1230
1330static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) 1231static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
1232 unsigned long trigger)
1331{ 1233{
1234 struct irq_chip *chip = &ioapic_chip;
1235 irq_flow_handler_t hdl;
1236 bool fasteoi;
1332 1237
1333 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1238 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1334 trigger == IOAPIC_LEVEL) 1239 trigger == IOAPIC_LEVEL) {
1335 desc->status |= IRQ_LEVEL; 1240 irq_set_status_flags(irq, IRQ_LEVEL);
1336 else 1241 fasteoi = true;
1337 desc->status &= ~IRQ_LEVEL; 1242 } else {
1338 1243 irq_clear_status_flags(irq, IRQ_LEVEL);
1339 if (irq_remapped(irq)) { 1244 fasteoi = false;
1340 desc->status |= IRQ_MOVE_PCNTXT;
1341 if (trigger)
1342 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1343 handle_fasteoi_irq,
1344 "fasteoi");
1345 else
1346 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
1347 handle_edge_irq, "edge");
1348 return;
1349 } 1245 }
1350 1246
1351 if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || 1247 if (irq_remapped(cfg)) {
1352 trigger == IOAPIC_LEVEL) 1248 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1353 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1249 chip = &ir_ioapic_chip;
1354 handle_fasteoi_irq, 1250 fasteoi = trigger != 0;
1355 "fasteoi"); 1251 }
1356 else 1252
1357 set_irq_chip_and_handler_name(irq, &ioapic_chip, 1253 hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
1358 handle_edge_irq, "edge"); 1254 irq_set_chip_and_handler_name(irq, chip, hdl,
1255 fasteoi ? "fasteoi" : "edge");
1359} 1256}
1360 1257
1361int setup_ioapic_entry(int apic_id, int irq, 1258static int setup_ioapic_entry(int apic_id, int irq,
1362 struct IO_APIC_route_entry *entry, 1259 struct IO_APIC_route_entry *entry,
1363 unsigned int destination, int trigger, 1260 unsigned int destination, int trigger,
1364 int polarity, int vector, int pin) 1261 int polarity, int vector, int pin)
1365{ 1262{
1366 /* 1263 /*
1367 * add it to the IO-APIC irq-routing table: 1264 * add it to the IO-APIC irq-routing table:
@@ -1382,21 +1279,7 @@ int setup_ioapic_entry(int apic_id, int irq,
1382 if (index < 0) 1279 if (index < 0)
1383 panic("Failed to allocate IRTE for ioapic %d\n", apic_id); 1280 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1384 1281
1385 memset(&irte, 0, sizeof(irte)); 1282 prepare_irte(&irte, vector, destination);
1386
1387 irte.present = 1;
1388 irte.dst_mode = apic->irq_dest_mode;
1389 /*
1390 * Trigger mode in the IRTE will always be edge, and the
1391 * actual level or edge trigger will be setup in the IO-APIC
1392 * RTE. This will help simplify level triggered irq migration.
1393 * For more details, see the comments above explainig IO-APIC
1394 * irq migration in the presence of interrupt-remapping.
1395 */
1396 irte.trigger_mode = 0;
1397 irte.dlvry_mode = apic->irq_delivery_mode;
1398 irte.vector = vector;
1399 irte.dest_id = IRTE_DEST(destination);
1400 1283
1401 /* Set source-id of interrupt request */ 1284 /* Set source-id of interrupt request */
1402 set_ioapic_sid(&irte, apic_id); 1285 set_ioapic_sid(&irte, apic_id);
@@ -1431,18 +1314,14 @@ int setup_ioapic_entry(int apic_id, int irq,
1431 return 0; 1314 return 0;
1432} 1315}
1433 1316
1434static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc, 1317static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
1435 int trigger, int polarity) 1318 struct irq_cfg *cfg, int trigger, int polarity)
1436{ 1319{
1437 struct irq_cfg *cfg;
1438 struct IO_APIC_route_entry entry; 1320 struct IO_APIC_route_entry entry;
1439 unsigned int dest; 1321 unsigned int dest;
1440 1322
1441 if (!IO_APIC_IRQ(irq)) 1323 if (!IO_APIC_IRQ(irq))
1442 return; 1324 return;
1443
1444 cfg = desc->chip_data;
1445
1446 /* 1325 /*
1447 * For legacy irqs, cfg->domain starts with cpu 0 for legacy 1326 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
1448 * controllers like 8259. Now that IO-APIC can handle this irq, update 1327 * controllers like 8259. Now that IO-APIC can handle this irq, update
@@ -1459,58 +1338,45 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
1459 apic_printk(APIC_VERBOSE,KERN_DEBUG 1338 apic_printk(APIC_VERBOSE,KERN_DEBUG
1460 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1339 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1461 "IRQ %d Mode:%i Active:%i)\n", 1340 "IRQ %d Mode:%i Active:%i)\n",
1462 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector, 1341 apic_id, mpc_ioapic_id(apic_id), pin, cfg->vector,
1463 irq, trigger, polarity); 1342 irq, trigger, polarity);
1464 1343
1465 1344
1466 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry, 1345 if (setup_ioapic_entry(mpc_ioapic_id(apic_id), irq, &entry,
1467 dest, trigger, polarity, cfg->vector, pin)) { 1346 dest, trigger, polarity, cfg->vector, pin)) {
1468 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1347 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1469 mp_ioapics[apic_id].apicid, pin); 1348 mpc_ioapic_id(apic_id), pin);
1470 __clear_irq_vector(irq, cfg); 1349 __clear_irq_vector(irq, cfg);
1471 return; 1350 return;
1472 } 1351 }
1473 1352
1474 ioapic_register_intr(irq, desc, trigger); 1353 ioapic_register_intr(irq, cfg, trigger);
1475 if (irq < legacy_pic->nr_legacy_irqs) 1354 if (irq < legacy_pic->nr_legacy_irqs)
1476 legacy_pic->chip->mask(irq); 1355 legacy_pic->mask(irq);
1477 1356
1478 ioapic_write_entry(apic_id, pin, entry); 1357 ioapic_write_entry(apic_id, pin, entry);
1479} 1358}
1480 1359
1481static struct { 1360static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin)
1482 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
1483} mp_ioapic_routing[MAX_IO_APICS];
1484
1485static void __init setup_IO_APIC_irqs(void)
1486{ 1361{
1487 int apic_id, pin, idx, irq; 1362 if (idx != -1)
1488 int notcon = 0; 1363 return false;
1489 struct irq_desc *desc;
1490 struct irq_cfg *cfg;
1491 int node = cpu_to_node(boot_cpu_id);
1492 1364
1493 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1365 apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n",
1366 mpc_ioapic_id(apic_id), pin);
1367 return true;
1368}
1494 1369
1495 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) 1370static void __init __io_apic_setup_irqs(unsigned int apic_id)
1496 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { 1371{
1372 int idx, node = cpu_to_node(0);
1373 struct io_apic_irq_attr attr;
1374 unsigned int pin, irq;
1375
1376 for (pin = 0; pin < ioapics[apic_id].nr_registers; pin++) {
1497 idx = find_irq_entry(apic_id, pin, mp_INT); 1377 idx = find_irq_entry(apic_id, pin, mp_INT);
1498 if (idx == -1) { 1378 if (io_apic_pin_not_connected(idx, apic_id, pin))
1499 if (!notcon) {
1500 notcon = 1;
1501 apic_printk(APIC_VERBOSE,
1502 KERN_DEBUG " %d-%d",
1503 mp_ioapics[apic_id].apicid, pin);
1504 } else
1505 apic_printk(APIC_VERBOSE, " %d-%d",
1506 mp_ioapics[apic_id].apicid, pin);
1507 continue; 1379 continue;
1508 }
1509 if (notcon) {
1510 apic_printk(APIC_VERBOSE,
1511 " (apicid-pin) not connected\n");
1512 notcon = 0;
1513 }
1514 1380
1515 irq = pin_2_irq(idx, apic_id, pin); 1381 irq = pin_2_irq(idx, apic_id, pin);
1516 1382
@@ -1522,27 +1388,24 @@ static void __init setup_IO_APIC_irqs(void)
1522 * installed and if it returns 1: 1388 * installed and if it returns 1:
1523 */ 1389 */
1524 if (apic->multi_timer_check && 1390 if (apic->multi_timer_check &&
1525 apic->multi_timer_check(apic_id, irq)) 1391 apic->multi_timer_check(apic_id, irq))
1526 continue; 1392 continue;
1527 1393
1528 desc = irq_to_desc_alloc_node(irq, node); 1394 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
1529 if (!desc) { 1395 irq_polarity(idx));
1530 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1396
1531 continue; 1397 io_apic_setup_irq_pin(irq, node, &attr);
1532 }
1533 cfg = desc->chip_data;
1534 add_pin_to_irq_node(cfg, node, apic_id, pin);
1535 /*
1536 * don't mark it in pin_programmed, so later acpi could
1537 * set it correctly when irq < 16
1538 */
1539 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1540 irq_trigger(idx), irq_polarity(idx));
1541 } 1398 }
1399}
1542 1400
1543 if (notcon) 1401static void __init setup_IO_APIC_irqs(void)
1544 apic_printk(APIC_VERBOSE, 1402{
1545 " (apicid-pin) not connected\n"); 1403 unsigned int apic_id;
1404
1405 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1406
1407 for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1408 __io_apic_setup_irqs(apic_id);
1546} 1409}
1547 1410
1548/* 1411/*
@@ -1552,10 +1415,8 @@ static void __init setup_IO_APIC_irqs(void)
1552 */ 1415 */
1553void setup_IO_APIC_irq_extra(u32 gsi) 1416void setup_IO_APIC_irq_extra(u32 gsi)
1554{ 1417{
1555 int apic_id = 0, pin, idx, irq; 1418 int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
1556 int node = cpu_to_node(boot_cpu_id); 1419 struct io_apic_irq_attr attr;
1557 struct irq_desc *desc;
1558 struct irq_cfg *cfg;
1559 1420
1560 /* 1421 /*
1561 * Convert 'gsi' to 'ioapic.pin'. 1422 * Convert 'gsi' to 'ioapic.pin'.
@@ -1570,29 +1431,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
1570 return; 1431 return;
1571 1432
1572 irq = pin_2_irq(idx, apic_id, pin); 1433 irq = pin_2_irq(idx, apic_id, pin);
1573#ifdef CONFIG_SPARSE_IRQ
1574 desc = irq_to_desc(irq);
1575 if (desc)
1576 return;
1577#endif
1578 desc = irq_to_desc_alloc_node(irq, node);
1579 if (!desc) {
1580 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1581 return;
1582 }
1583 1434
1584 cfg = desc->chip_data; 1435 /* Only handle the non legacy irqs on secondary ioapics */
1585 add_pin_to_irq_node(cfg, node, apic_id, pin); 1436 if (apic_id == 0 || irq < NR_IRQS_LEGACY)
1586
1587 if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
1588 pr_debug("Pin %d-%d already programmed\n",
1589 mp_ioapics[apic_id].apicid, pin);
1590 return; 1437 return;
1591 }
1592 set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
1593 1438
1594 setup_IO_APIC_irq(apic_id, pin, irq, desc, 1439 set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx),
1595 irq_trigger(idx), irq_polarity(idx)); 1440 irq_polarity(idx));
1441
1442 io_apic_setup_irq_pin_once(irq, node, &attr);
1596} 1443}
1597 1444
1598/* 1445/*
@@ -1624,7 +1471,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1624 * The timer IRQ doesn't have to know that behind the 1471 * The timer IRQ doesn't have to know that behind the
1625 * scene we may have a 8259A-master in AEOI mode ... 1472 * scene we may have a 8259A-master in AEOI mode ...
1626 */ 1473 */
1627 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 1474 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
1475 "edge");
1628 1476
1629 /* 1477 /*
1630 * Add it to the IO-APIC irq-routing table: 1478 * Add it to the IO-APIC irq-routing table:
@@ -1642,13 +1490,12 @@ __apicdebuginit(void) print_IO_APIC(void)
1642 union IO_APIC_reg_03 reg_03; 1490 union IO_APIC_reg_03 reg_03;
1643 unsigned long flags; 1491 unsigned long flags;
1644 struct irq_cfg *cfg; 1492 struct irq_cfg *cfg;
1645 struct irq_desc *desc;
1646 unsigned int irq; 1493 unsigned int irq;
1647 1494
1648 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1495 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1649 for (i = 0; i < nr_ioapics; i++) 1496 for (i = 0; i < nr_ioapics; i++)
1650 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1497 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1651 mp_ioapics[i].apicid, nr_ioapic_registers[i]); 1498 mpc_ioapic_id(i), ioapics[i].nr_registers);
1652 1499
1653 /* 1500 /*
1654 * We are a bit conservative about what we expect. We have to 1501 * We are a bit conservative about what we expect. We have to
@@ -1668,7 +1515,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1668 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1515 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1669 1516
1670 printk("\n"); 1517 printk("\n");
1671 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); 1518 printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(apic));
1672 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1519 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1673 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1520 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1674 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1521 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1729,10 +1576,10 @@ __apicdebuginit(void) print_IO_APIC(void)
1729 } 1576 }
1730 } 1577 }
1731 printk(KERN_DEBUG "IRQ to pin mappings:\n"); 1578 printk(KERN_DEBUG "IRQ to pin mappings:\n");
1732 for_each_irq_desc(irq, desc) { 1579 for_each_active_irq(irq) {
1733 struct irq_pin_list *entry; 1580 struct irq_pin_list *entry;
1734 1581
1735 cfg = desc->chip_data; 1582 cfg = irq_get_chip_data(irq);
1736 if (!cfg) 1583 if (!cfg)
1737 continue; 1584 continue;
1738 entry = cfg->irq_2_pin; 1585 entry = cfg->irq_2_pin;
@@ -1962,7 +1809,7 @@ void __init enable_IO_APIC(void)
1962 for(apic = 0; apic < nr_ioapics; apic++) { 1809 for(apic = 0; apic < nr_ioapics; apic++) {
1963 int pin; 1810 int pin;
1964 /* See if any of the pins is in ExtINT mode */ 1811 /* See if any of the pins is in ExtINT mode */
1965 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1812 for (pin = 0; pin < ioapics[apic].nr_registers; pin++) {
1966 struct IO_APIC_route_entry entry; 1813 struct IO_APIC_route_entry entry;
1967 entry = ioapic_read_entry(apic, pin); 1814 entry = ioapic_read_entry(apic, pin);
1968 1815
@@ -2023,7 +1870,7 @@ void disable_IO_APIC(void)
2023 * 1870 *
2024 * With interrupt-remapping, for now we will use virtual wire A mode, 1871 * With interrupt-remapping, for now we will use virtual wire A mode,
2025 * as virtual wire B is little complex (need to configure both 1872 * as virtual wire B is little complex (need to configure both
2026 * IOAPIC RTE aswell as interrupt-remapping table entry). 1873 * IOAPIC RTE as well as interrupt-remapping table entry).
2027 * As this gets called during crash dump, keep this simple for now. 1874 * As this gets called during crash dump, keep this simple for now.
2028 */ 1875 */
2029 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { 1876 if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
@@ -2061,8 +1908,7 @@ void disable_IO_APIC(void)
2061 * 1908 *
2062 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 1909 * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
2063 */ 1910 */
2064 1911void __init setup_ioapic_ids_from_mpc_nocheck(void)
2065void __init setup_ioapic_ids_from_mpc(void)
2066{ 1912{
2067 union IO_APIC_reg_00 reg_00; 1913 union IO_APIC_reg_00 reg_00;
2068 physid_mask_t phys_id_present_map; 1914 physid_mask_t phys_id_present_map;
@@ -2071,15 +1917,6 @@ void __init setup_ioapic_ids_from_mpc(void)
2071 unsigned char old_id; 1917 unsigned char old_id;
2072 unsigned long flags; 1918 unsigned long flags;
2073 1919
2074 if (acpi_ioapic)
2075 return;
2076 /*
2077 * Don't check I/O APIC IDs for xAPIC systems. They have
2078 * no meaning without the serial APIC bus.
2079 */
2080 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2081 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2082 return;
2083 /* 1920 /*
2084 * This is broken; anything with a real cpu count has to 1921 * This is broken; anything with a real cpu count has to
2085 * circumvent this idiocy regardless. 1922 * circumvent this idiocy regardless.
@@ -2096,14 +1933,14 @@ void __init setup_ioapic_ids_from_mpc(void)
2096 reg_00.raw = io_apic_read(apic_id, 0); 1933 reg_00.raw = io_apic_read(apic_id, 0);
2097 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1934 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2098 1935
2099 old_id = mp_ioapics[apic_id].apicid; 1936 old_id = mpc_ioapic_id(apic_id);
2100 1937
2101 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) { 1938 if (mpc_ioapic_id(apic_id) >= get_physical_broadcast()) {
2102 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 1939 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2103 apic_id, mp_ioapics[apic_id].apicid); 1940 apic_id, mpc_ioapic_id(apic_id));
2104 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1941 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2105 reg_00.bits.ID); 1942 reg_00.bits.ID);
2106 mp_ioapics[apic_id].apicid = reg_00.bits.ID; 1943 ioapics[apic_id].mp_config.apicid = reg_00.bits.ID;
2107 } 1944 }
2108 1945
2109 /* 1946 /*
@@ -2112,9 +1949,9 @@ void __init setup_ioapic_ids_from_mpc(void)
2112 * 'stuck on smp_invalidate_needed IPI wait' messages. 1949 * 'stuck on smp_invalidate_needed IPI wait' messages.
2113 */ 1950 */
2114 if (apic->check_apicid_used(&phys_id_present_map, 1951 if (apic->check_apicid_used(&phys_id_present_map,
2115 mp_ioapics[apic_id].apicid)) { 1952 mpc_ioapic_id(apic_id))) {
2116 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 1953 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2117 apic_id, mp_ioapics[apic_id].apicid); 1954 apic_id, mpc_ioapic_id(apic_id));
2118 for (i = 0; i < get_physical_broadcast(); i++) 1955 for (i = 0; i < get_physical_broadcast(); i++)
2119 if (!physid_isset(i, phys_id_present_map)) 1956 if (!physid_isset(i, phys_id_present_map))
2120 break; 1957 break;
@@ -2123,36 +1960,39 @@ void __init setup_ioapic_ids_from_mpc(void)
2123 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 1960 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2124 i); 1961 i);
2125 physid_set(i, phys_id_present_map); 1962 physid_set(i, phys_id_present_map);
2126 mp_ioapics[apic_id].apicid = i; 1963 ioapics[apic_id].mp_config.apicid = i;
2127 } else { 1964 } else {
2128 physid_mask_t tmp; 1965 physid_mask_t tmp;
2129 apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); 1966 apic->apicid_to_cpu_present(mpc_ioapic_id(apic_id),
1967 &tmp);
2130 apic_printk(APIC_VERBOSE, "Setting %d in the " 1968 apic_printk(APIC_VERBOSE, "Setting %d in the "
2131 "phys_id_present_map\n", 1969 "phys_id_present_map\n",
2132 mp_ioapics[apic_id].apicid); 1970 mpc_ioapic_id(apic_id));
2133 physids_or(phys_id_present_map, phys_id_present_map, tmp); 1971 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2134 } 1972 }
2135 1973
2136
2137 /* 1974 /*
2138 * We need to adjust the IRQ routing table 1975 * We need to adjust the IRQ routing table
2139 * if the ID changed. 1976 * if the ID changed.
2140 */ 1977 */
2141 if (old_id != mp_ioapics[apic_id].apicid) 1978 if (old_id != mpc_ioapic_id(apic_id))
2142 for (i = 0; i < mp_irq_entries; i++) 1979 for (i = 0; i < mp_irq_entries; i++)
2143 if (mp_irqs[i].dstapic == old_id) 1980 if (mp_irqs[i].dstapic == old_id)
2144 mp_irqs[i].dstapic 1981 mp_irqs[i].dstapic
2145 = mp_ioapics[apic_id].apicid; 1982 = mpc_ioapic_id(apic_id);
2146 1983
2147 /* 1984 /*
2148 * Read the right value from the MPC table and 1985 * Update the ID register according to the right value
2149 * write it into the ID register. 1986 * from the MPC table if they are different.
2150 */ 1987 */
1988 if (mpc_ioapic_id(apic_id) == reg_00.bits.ID)
1989 continue;
1990
2151 apic_printk(APIC_VERBOSE, KERN_INFO 1991 apic_printk(APIC_VERBOSE, KERN_INFO
2152 "...changing IO-APIC physical APIC ID to %d ...", 1992 "...changing IO-APIC physical APIC ID to %d ...",
2153 mp_ioapics[apic_id].apicid); 1993 mpc_ioapic_id(apic_id));
2154 1994
2155 reg_00.bits.ID = mp_ioapics[apic_id].apicid; 1995 reg_00.bits.ID = mpc_ioapic_id(apic_id);
2156 raw_spin_lock_irqsave(&ioapic_lock, flags); 1996 raw_spin_lock_irqsave(&ioapic_lock, flags);
2157 io_apic_write(apic_id, 0, reg_00.raw); 1997 io_apic_write(apic_id, 0, reg_00.raw);
2158 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 1998 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2163,12 +2003,27 @@ void __init setup_ioapic_ids_from_mpc(void)
2163 raw_spin_lock_irqsave(&ioapic_lock, flags); 2003 raw_spin_lock_irqsave(&ioapic_lock, flags);
2164 reg_00.raw = io_apic_read(apic_id, 0); 2004 reg_00.raw = io_apic_read(apic_id, 0);
2165 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2005 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2166 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) 2006 if (reg_00.bits.ID != mpc_ioapic_id(apic_id))
2167 printk("could not set ID!\n"); 2007 printk("could not set ID!\n");
2168 else 2008 else
2169 apic_printk(APIC_VERBOSE, " ok.\n"); 2009 apic_printk(APIC_VERBOSE, " ok.\n");
2170 } 2010 }
2171} 2011}
2012
2013void __init setup_ioapic_ids_from_mpc(void)
2014{
2015
2016 if (acpi_ioapic)
2017 return;
2018 /*
2019 * Don't check I/O APIC IDs for xAPIC systems. They have
2020 * no meaning without the serial APIC bus.
2021 */
2022 if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
2023 || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
2024 return;
2025 setup_ioapic_ids_from_mpc_nocheck();
2026}
2172#endif 2027#endif
2173 2028
2174int no_timer_check __initdata; 2029int no_timer_check __initdata;
@@ -2239,29 +2094,26 @@ static int __init timer_irq_works(void)
2239 * an edge even if it isn't on the 8259A... 2094 * an edge even if it isn't on the 8259A...
2240 */ 2095 */
2241 2096
2242static unsigned int startup_ioapic_irq(unsigned int irq) 2097static unsigned int startup_ioapic_irq(struct irq_data *data)
2243{ 2098{
2244 int was_pending = 0; 2099 int was_pending = 0, irq = data->irq;
2245 unsigned long flags; 2100 unsigned long flags;
2246 struct irq_cfg *cfg;
2247 2101
2248 raw_spin_lock_irqsave(&ioapic_lock, flags); 2102 raw_spin_lock_irqsave(&ioapic_lock, flags);
2249 if (irq < legacy_pic->nr_legacy_irqs) { 2103 if (irq < legacy_pic->nr_legacy_irqs) {
2250 legacy_pic->chip->mask(irq); 2104 legacy_pic->mask(irq);
2251 if (legacy_pic->irq_pending(irq)) 2105 if (legacy_pic->irq_pending(irq))
2252 was_pending = 1; 2106 was_pending = 1;
2253 } 2107 }
2254 cfg = irq_cfg(irq); 2108 __unmask_ioapic(data->chip_data);
2255 __unmask_IO_APIC_irq(cfg);
2256 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2109 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2257 2110
2258 return was_pending; 2111 return was_pending;
2259} 2112}
2260 2113
2261static int ioapic_retrigger_irq(unsigned int irq) 2114static int ioapic_retrigger_irq(struct irq_data *data)
2262{ 2115{
2263 2116 struct irq_cfg *cfg = data->chip_data;
2264 struct irq_cfg *cfg = irq_cfg(irq);
2265 unsigned long flags; 2117 unsigned long flags;
2266 2118
2267 raw_spin_lock_irqsave(&vector_lock, flags); 2119 raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2312,7 +2164,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2312 * With interrupt-remapping, destination information comes 2164 * With interrupt-remapping, destination information comes
2313 * from interrupt-remapping table entry. 2165 * from interrupt-remapping table entry.
2314 */ 2166 */
2315 if (!irq_remapped(irq)) 2167 if (!irq_remapped(cfg))
2316 io_apic_write(apic, 0x11 + pin*2, dest); 2168 io_apic_write(apic, 0x11 + pin*2, dest);
2317 reg = io_apic_read(apic, 0x10 + pin*2); 2169 reg = io_apic_read(apic, 0x10 + pin*2);
2318 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2170 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2322,65 +2174,46 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2322} 2174}
2323 2175
2324/* 2176/*
2325 * Either sets desc->affinity to a valid value, and returns 2177 * Either sets data->affinity to a valid value, and returns
2326 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and 2178 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2327 * leaves desc->affinity untouched. 2179 * leaves data->affinity untouched.
2328 */ 2180 */
2329unsigned int 2181int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2330set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, 2182 unsigned int *dest_id)
2331 unsigned int *dest_id)
2332{ 2183{
2333 struct irq_cfg *cfg; 2184 struct irq_cfg *cfg = data->chip_data;
2334 unsigned int irq;
2335 2185
2336 if (!cpumask_intersects(mask, cpu_online_mask)) 2186 if (!cpumask_intersects(mask, cpu_online_mask))
2337 return -1; 2187 return -1;
2338 2188
2339 irq = desc->irq; 2189 if (assign_irq_vector(data->irq, data->chip_data, mask))
2340 cfg = desc->chip_data;
2341 if (assign_irq_vector(irq, cfg, mask))
2342 return -1; 2190 return -1;
2343 2191
2344 cpumask_copy(desc->affinity, mask); 2192 cpumask_copy(data->affinity, mask);
2345 2193
2346 *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); 2194 *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2347 return 0; 2195 return 0;
2348} 2196}
2349 2197
2350static int 2198static int
2351set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2199ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2200 bool force)
2352{ 2201{
2353 struct irq_cfg *cfg; 2202 unsigned int dest, irq = data->irq;
2354 unsigned long flags; 2203 unsigned long flags;
2355 unsigned int dest; 2204 int ret;
2356 unsigned int irq;
2357 int ret = -1;
2358
2359 irq = desc->irq;
2360 cfg = desc->chip_data;
2361 2205
2362 raw_spin_lock_irqsave(&ioapic_lock, flags); 2206 raw_spin_lock_irqsave(&ioapic_lock, flags);
2363 ret = set_desc_affinity(desc, mask, &dest); 2207 ret = __ioapic_set_affinity(data, mask, &dest);
2364 if (!ret) { 2208 if (!ret) {
2365 /* Only the high 8 bits are valid. */ 2209 /* Only the high 8 bits are valid. */
2366 dest = SET_APIC_LOGICAL_ID(dest); 2210 dest = SET_APIC_LOGICAL_ID(dest);
2367 __target_IO_APIC_irq(irq, dest, cfg); 2211 __target_IO_APIC_irq(irq, dest, data->chip_data);
2368 } 2212 }
2369 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2213 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2370
2371 return ret; 2214 return ret;
2372} 2215}
2373 2216
2374static int
2375set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2376{
2377 struct irq_desc *desc;
2378
2379 desc = irq_to_desc(irq);
2380
2381 return set_ioapic_affinity_irq_desc(desc, mask);
2382}
2383
2384#ifdef CONFIG_INTR_REMAP 2217#ifdef CONFIG_INTR_REMAP
2385 2218
2386/* 2219/*
@@ -2395,24 +2228,21 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
2395 * the interrupt-remapping table entry. 2228 * the interrupt-remapping table entry.
2396 */ 2229 */
2397static int 2230static int
2398migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) 2231ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2232 bool force)
2399{ 2233{
2400 struct irq_cfg *cfg; 2234 struct irq_cfg *cfg = data->chip_data;
2235 unsigned int dest, irq = data->irq;
2401 struct irte irte; 2236 struct irte irte;
2402 unsigned int dest;
2403 unsigned int irq;
2404 int ret = -1;
2405 2237
2406 if (!cpumask_intersects(mask, cpu_online_mask)) 2238 if (!cpumask_intersects(mask, cpu_online_mask))
2407 return ret; 2239 return -EINVAL;
2408 2240
2409 irq = desc->irq;
2410 if (get_irte(irq, &irte)) 2241 if (get_irte(irq, &irte))
2411 return ret; 2242 return -EBUSY;
2412 2243
2413 cfg = desc->chip_data;
2414 if (assign_irq_vector(irq, cfg, mask)) 2244 if (assign_irq_vector(irq, cfg, mask))
2415 return ret; 2245 return -EBUSY;
2416 2246
2417 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); 2247 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2418 2248
@@ -2427,29 +2257,14 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2427 if (cfg->move_in_progress) 2257 if (cfg->move_in_progress)
2428 send_cleanup_vector(cfg); 2258 send_cleanup_vector(cfg);
2429 2259
2430 cpumask_copy(desc->affinity, mask); 2260 cpumask_copy(data->affinity, mask);
2431
2432 return 0; 2261 return 0;
2433} 2262}
2434 2263
2435/*
2436 * Migrates the IRQ destination in the process context.
2437 */
2438static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2439 const struct cpumask *mask)
2440{
2441 return migrate_ioapic_irq_desc(desc, mask);
2442}
2443static int set_ir_ioapic_affinity_irq(unsigned int irq,
2444 const struct cpumask *mask)
2445{
2446 struct irq_desc *desc = irq_to_desc(irq);
2447
2448 return set_ir_ioapic_affinity_irq_desc(desc, mask);
2449}
2450#else 2264#else
2451static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, 2265static inline int
2452 const struct cpumask *mask) 2266ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
2267 bool force)
2453{ 2268{
2454 return 0; 2269 return 0;
2455} 2270}
@@ -2469,7 +2284,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2469 unsigned int irr; 2284 unsigned int irr;
2470 struct irq_desc *desc; 2285 struct irq_desc *desc;
2471 struct irq_cfg *cfg; 2286 struct irq_cfg *cfg;
2472 irq = __get_cpu_var(vector_irq)[vector]; 2287 irq = __this_cpu_read(vector_irq[vector]);
2473 2288
2474 if (irq == -1) 2289 if (irq == -1)
2475 continue; 2290 continue;
@@ -2503,7 +2318,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
2503 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); 2318 apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
2504 goto unlock; 2319 goto unlock;
2505 } 2320 }
2506 __get_cpu_var(vector_irq)[vector] = -1; 2321 __this_cpu_write(vector_irq[vector], -1);
2507unlock: 2322unlock:
2508 raw_spin_unlock(&desc->lock); 2323 raw_spin_unlock(&desc->lock);
2509 } 2324 }
@@ -2511,10 +2326,8 @@ unlock:
2511 irq_exit(); 2326 irq_exit();
2512} 2327}
2513 2328
2514static void __irq_complete_move(struct irq_desc **descp, unsigned vector) 2329static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
2515{ 2330{
2516 struct irq_desc *desc = *descp;
2517 struct irq_cfg *cfg = desc->chip_data;
2518 unsigned me; 2331 unsigned me;
2519 2332
2520 if (likely(!cfg->move_in_progress)) 2333 if (likely(!cfg->move_in_progress))
@@ -2526,31 +2339,28 @@ static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
2526 send_cleanup_vector(cfg); 2339 send_cleanup_vector(cfg);
2527} 2340}
2528 2341
2529static void irq_complete_move(struct irq_desc **descp) 2342static void irq_complete_move(struct irq_cfg *cfg)
2530{ 2343{
2531 __irq_complete_move(descp, ~get_irq_regs()->orig_ax); 2344 __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
2532} 2345}
2533 2346
2534void irq_force_complete_move(int irq) 2347void irq_force_complete_move(int irq)
2535{ 2348{
2536 struct irq_desc *desc = irq_to_desc(irq); 2349 struct irq_cfg *cfg = irq_get_chip_data(irq);
2537 struct irq_cfg *cfg = desc->chip_data;
2538 2350
2539 if (!cfg) 2351 if (!cfg)
2540 return; 2352 return;
2541 2353
2542 __irq_complete_move(&desc, cfg->vector); 2354 __irq_complete_move(cfg, cfg->vector);
2543} 2355}
2544#else 2356#else
2545static inline void irq_complete_move(struct irq_desc **descp) {} 2357static inline void irq_complete_move(struct irq_cfg *cfg) { }
2546#endif 2358#endif
2547 2359
2548static void ack_apic_edge(unsigned int irq) 2360static void ack_apic_edge(struct irq_data *data)
2549{ 2361{
2550 struct irq_desc *desc = irq_to_desc(irq); 2362 irq_complete_move(data->chip_data);
2551 2363 irq_move_irq(data);
2552 irq_complete_move(&desc);
2553 move_native_irq(irq);
2554 ack_APIC_irq(); 2364 ack_APIC_irq();
2555} 2365}
2556 2366
@@ -2572,19 +2382,21 @@ atomic_t irq_mis_count;
2572 * Otherwise, we simulate the EOI message manually by changing the trigger 2382 * Otherwise, we simulate the EOI message manually by changing the trigger
2573 * mode to edge and then back to level, with RTE being masked during this. 2383 * mode to edge and then back to level, with RTE being masked during this.
2574*/ 2384*/
2575static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2385static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2576{ 2386{
2577 struct irq_pin_list *entry; 2387 struct irq_pin_list *entry;
2388 unsigned long flags;
2578 2389
2390 raw_spin_lock_irqsave(&ioapic_lock, flags);
2579 for_each_irq_pin(entry, cfg->irq_2_pin) { 2391 for_each_irq_pin(entry, cfg->irq_2_pin) {
2580 if (mp_ioapics[entry->apic].apicver >= 0x20) { 2392 if (mpc_ioapic_ver(entry->apic) >= 0x20) {
2581 /* 2393 /*
2582 * Intr-remapping uses pin number as the virtual vector 2394 * Intr-remapping uses pin number as the virtual vector
2583 * in the RTE. Actual vector is programmed in 2395 * in the RTE. Actual vector is programmed in
2584 * intr-remapping table entry. Hence for the io-apic 2396 * intr-remapping table entry. Hence for the io-apic
2585 * EOI we use the pin number. 2397 * EOI we use the pin number.
2586 */ 2398 */
2587 if (irq_remapped(irq)) 2399 if (irq_remapped(cfg))
2588 io_apic_eoi(entry->apic, entry->pin); 2400 io_apic_eoi(entry->apic, entry->pin);
2589 else 2401 else
2590 io_apic_eoi(entry->apic, cfg->vector); 2402 io_apic_eoi(entry->apic, cfg->vector);
@@ -2593,36 +2405,21 @@ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2593 __unmask_and_level_IO_APIC_irq(entry); 2405 __unmask_and_level_IO_APIC_irq(entry);
2594 } 2406 }
2595 } 2407 }
2596}
2597
2598static void eoi_ioapic_irq(struct irq_desc *desc)
2599{
2600 struct irq_cfg *cfg;
2601 unsigned long flags;
2602 unsigned int irq;
2603
2604 irq = desc->irq;
2605 cfg = desc->chip_data;
2606
2607 raw_spin_lock_irqsave(&ioapic_lock, flags);
2608 __eoi_ioapic_irq(irq, cfg);
2609 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2408 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2610} 2409}
2611 2410
2612static void ack_apic_level(unsigned int irq) 2411static void ack_apic_level(struct irq_data *data)
2613{ 2412{
2614 struct irq_desc *desc = irq_to_desc(irq); 2413 struct irq_cfg *cfg = data->chip_data;
2414 int i, do_unmask_irq = 0, irq = data->irq;
2615 unsigned long v; 2415 unsigned long v;
2616 int i;
2617 struct irq_cfg *cfg;
2618 int do_unmask_irq = 0;
2619 2416
2620 irq_complete_move(&desc); 2417 irq_complete_move(cfg);
2621#ifdef CONFIG_GENERIC_PENDING_IRQ 2418#ifdef CONFIG_GENERIC_PENDING_IRQ
2622 /* If we are moving the irq we need to mask it */ 2419 /* If we are moving the irq we need to mask it */
2623 if (unlikely(desc->status & IRQ_MOVE_PENDING)) { 2420 if (unlikely(irqd_is_setaffinity_pending(data))) {
2624 do_unmask_irq = 1; 2421 do_unmask_irq = 1;
2625 mask_IO_APIC_irq_desc(desc); 2422 mask_ioapic(cfg);
2626 } 2423 }
2627#endif 2424#endif
2628 2425
@@ -2658,7 +2455,6 @@ static void ack_apic_level(unsigned int irq)
2658 * we use the above logic (mask+edge followed by unmask+level) from 2455 * we use the above logic (mask+edge followed by unmask+level) from
2659 * Manfred Spraul to clear the remote IRR. 2456 * Manfred Spraul to clear the remote IRR.
2660 */ 2457 */
2661 cfg = desc->chip_data;
2662 i = cfg->vector; 2458 i = cfg->vector;
2663 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2459 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2664 2460
@@ -2678,7 +2474,7 @@ static void ack_apic_level(unsigned int irq)
2678 if (!(v & (1 << (i & 0x1f)))) { 2474 if (!(v & (1 << (i & 0x1f)))) {
2679 atomic_inc(&irq_mis_count); 2475 atomic_inc(&irq_mis_count);
2680 2476
2681 eoi_ioapic_irq(desc); 2477 eoi_ioapic_irq(irq, cfg);
2682 } 2478 }
2683 2479
2684 /* Now we can move and renable the irq */ 2480 /* Now we can move and renable the irq */
@@ -2709,61 +2505,57 @@ static void ack_apic_level(unsigned int irq)
2709 * accurate and is causing problems then it is a hardware bug 2505 * accurate and is causing problems then it is a hardware bug
2710 * and you can go talk to the chipset vendor about it. 2506 * and you can go talk to the chipset vendor about it.
2711 */ 2507 */
2712 cfg = desc->chip_data;
2713 if (!io_apic_level_ack_pending(cfg)) 2508 if (!io_apic_level_ack_pending(cfg))
2714 move_masked_irq(irq); 2509 irq_move_masked_irq(data);
2715 unmask_IO_APIC_irq_desc(desc); 2510 unmask_ioapic(cfg);
2716 } 2511 }
2717} 2512}
2718 2513
2719#ifdef CONFIG_INTR_REMAP 2514#ifdef CONFIG_INTR_REMAP
2720static void ir_ack_apic_edge(unsigned int irq) 2515static void ir_ack_apic_edge(struct irq_data *data)
2721{ 2516{
2722 ack_APIC_irq(); 2517 ack_APIC_irq();
2723} 2518}
2724 2519
2725static void ir_ack_apic_level(unsigned int irq) 2520static void ir_ack_apic_level(struct irq_data *data)
2726{ 2521{
2727 struct irq_desc *desc = irq_to_desc(irq);
2728
2729 ack_APIC_irq(); 2522 ack_APIC_irq();
2730 eoi_ioapic_irq(desc); 2523 eoi_ioapic_irq(data->irq, data->chip_data);
2731} 2524}
2732#endif /* CONFIG_INTR_REMAP */ 2525#endif /* CONFIG_INTR_REMAP */
2733 2526
2734static struct irq_chip ioapic_chip __read_mostly = { 2527static struct irq_chip ioapic_chip __read_mostly = {
2735 .name = "IO-APIC", 2528 .name = "IO-APIC",
2736 .startup = startup_ioapic_irq, 2529 .irq_startup = startup_ioapic_irq,
2737 .mask = mask_IO_APIC_irq, 2530 .irq_mask = mask_ioapic_irq,
2738 .unmask = unmask_IO_APIC_irq, 2531 .irq_unmask = unmask_ioapic_irq,
2739 .ack = ack_apic_edge, 2532 .irq_ack = ack_apic_edge,
2740 .eoi = ack_apic_level, 2533 .irq_eoi = ack_apic_level,
2741#ifdef CONFIG_SMP 2534#ifdef CONFIG_SMP
2742 .set_affinity = set_ioapic_affinity_irq, 2535 .irq_set_affinity = ioapic_set_affinity,
2743#endif 2536#endif
2744 .retrigger = ioapic_retrigger_irq, 2537 .irq_retrigger = ioapic_retrigger_irq,
2745}; 2538};
2746 2539
2747static struct irq_chip ir_ioapic_chip __read_mostly = { 2540static struct irq_chip ir_ioapic_chip __read_mostly = {
2748 .name = "IR-IO-APIC", 2541 .name = "IR-IO-APIC",
2749 .startup = startup_ioapic_irq, 2542 .irq_startup = startup_ioapic_irq,
2750 .mask = mask_IO_APIC_irq, 2543 .irq_mask = mask_ioapic_irq,
2751 .unmask = unmask_IO_APIC_irq, 2544 .irq_unmask = unmask_ioapic_irq,
2752#ifdef CONFIG_INTR_REMAP 2545#ifdef CONFIG_INTR_REMAP
2753 .ack = ir_ack_apic_edge, 2546 .irq_ack = ir_ack_apic_edge,
2754 .eoi = ir_ack_apic_level, 2547 .irq_eoi = ir_ack_apic_level,
2755#ifdef CONFIG_SMP 2548#ifdef CONFIG_SMP
2756 .set_affinity = set_ir_ioapic_affinity_irq, 2549 .irq_set_affinity = ir_ioapic_set_affinity,
2757#endif 2550#endif
2758#endif 2551#endif
2759 .retrigger = ioapic_retrigger_irq, 2552 .irq_retrigger = ioapic_retrigger_irq,
2760}; 2553};
2761 2554
2762static inline void init_IO_APIC_traps(void) 2555static inline void init_IO_APIC_traps(void)
2763{ 2556{
2764 int irq;
2765 struct irq_desc *desc;
2766 struct irq_cfg *cfg; 2557 struct irq_cfg *cfg;
2558 unsigned int irq;
2767 2559
2768 /* 2560 /*
2769 * NOTE! The local APIC isn't very good at handling 2561 * NOTE! The local APIC isn't very good at handling
@@ -2776,8 +2568,8 @@ static inline void init_IO_APIC_traps(void)
2776 * Also, we've got to be careful not to trash gate 2568 * Also, we've got to be careful not to trash gate
2777 * 0x80, because int 0x80 is hm, kind of importantish. ;) 2569 * 0x80, because int 0x80 is hm, kind of importantish. ;)
2778 */ 2570 */
2779 for_each_irq_desc(irq, desc) { 2571 for_each_active_irq(irq) {
2780 cfg = desc->chip_data; 2572 cfg = irq_get_chip_data(irq);
2781 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { 2573 if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
2782 /* 2574 /*
2783 * Hmm.. We don't have an entry for this, 2575 * Hmm.. We don't have an entry for this,
@@ -2788,7 +2580,7 @@ static inline void init_IO_APIC_traps(void)
2788 legacy_pic->make_irq(irq); 2580 legacy_pic->make_irq(irq);
2789 else 2581 else
2790 /* Strange. Oh, well.. */ 2582 /* Strange. Oh, well.. */
2791 desc->chip = &no_irq_chip; 2583 irq_set_chip(irq, &no_irq_chip);
2792 } 2584 }
2793 } 2585 }
2794} 2586}
@@ -2797,7 +2589,7 @@ static inline void init_IO_APIC_traps(void)
2797 * The local APIC irq-chip implementation: 2589 * The local APIC irq-chip implementation:
2798 */ 2590 */
2799 2591
2800static void mask_lapic_irq(unsigned int irq) 2592static void mask_lapic_irq(struct irq_data *data)
2801{ 2593{
2802 unsigned long v; 2594 unsigned long v;
2803 2595
@@ -2805,7 +2597,7 @@ static void mask_lapic_irq(unsigned int irq)
2805 apic_write(APIC_LVT0, v | APIC_LVT_MASKED); 2597 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
2806} 2598}
2807 2599
2808static void unmask_lapic_irq(unsigned int irq) 2600static void unmask_lapic_irq(struct irq_data *data)
2809{ 2601{
2810 unsigned long v; 2602 unsigned long v;
2811 2603
@@ -2813,43 +2605,25 @@ static void unmask_lapic_irq(unsigned int irq)
2813 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 2605 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2814} 2606}
2815 2607
2816static void ack_lapic_irq(unsigned int irq) 2608static void ack_lapic_irq(struct irq_data *data)
2817{ 2609{
2818 ack_APIC_irq(); 2610 ack_APIC_irq();
2819} 2611}
2820 2612
2821static struct irq_chip lapic_chip __read_mostly = { 2613static struct irq_chip lapic_chip __read_mostly = {
2822 .name = "local-APIC", 2614 .name = "local-APIC",
2823 .mask = mask_lapic_irq, 2615 .irq_mask = mask_lapic_irq,
2824 .unmask = unmask_lapic_irq, 2616 .irq_unmask = unmask_lapic_irq,
2825 .ack = ack_lapic_irq, 2617 .irq_ack = ack_lapic_irq,
2826}; 2618};
2827 2619
2828static void lapic_register_intr(int irq, struct irq_desc *desc) 2620static void lapic_register_intr(int irq)
2829{ 2621{
2830 desc->status &= ~IRQ_LEVEL; 2622 irq_clear_status_flags(irq, IRQ_LEVEL);
2831 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, 2623 irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2832 "edge"); 2624 "edge");
2833} 2625}
2834 2626
2835static void __init setup_nmi(void)
2836{
2837 /*
2838 * Dirty trick to enable the NMI watchdog ...
2839 * We put the 8259A master into AEOI mode and
2840 * unmask on all local APICs LVT0 as NMI.
2841 *
2842 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
2843 * is from Maciej W. Rozycki - so we do not have to EOI from
2844 * the NMI handler or the timer interrupt.
2845 */
2846 apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
2847
2848 enable_NMI_through_LVT0();
2849
2850 apic_printk(APIC_VERBOSE, " done.\n");
2851}
2852
2853/* 2627/*
2854 * This looks a bit hackish but it's about the only one way of sending 2628 * This looks a bit hackish but it's about the only one way of sending
2855 * a few INTA cycles to 8259As and any associated glue logic. ICR does 2629 * a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2930,9 +2704,8 @@ int timer_through_8259 __initdata;
2930 */ 2704 */
2931static inline void __init check_timer(void) 2705static inline void __init check_timer(void)
2932{ 2706{
2933 struct irq_desc *desc = irq_to_desc(0); 2707 struct irq_cfg *cfg = irq_get_chip_data(0);
2934 struct irq_cfg *cfg = desc->chip_data; 2708 int node = cpu_to_node(0);
2935 int node = cpu_to_node(boot_cpu_id);
2936 int apic1, pin1, apic2, pin2; 2709 int apic1, pin1, apic2, pin2;
2937 unsigned long flags; 2710 unsigned long flags;
2938 int no_pin1 = 0; 2711 int no_pin1 = 0;
@@ -2942,7 +2715,7 @@ static inline void __init check_timer(void)
2942 /* 2715 /*
2943 * get/set the timer IRQ vector: 2716 * get/set the timer IRQ vector:
2944 */ 2717 */
2945 legacy_pic->chip->mask(0); 2718 legacy_pic->mask(0);
2946 assign_irq_vector(0, cfg, apic->target_cpus()); 2719 assign_irq_vector(0, cfg, apic->target_cpus());
2947 2720
2948 /* 2721 /*
@@ -2956,15 +2729,6 @@ static inline void __init check_timer(void)
2956 */ 2729 */
2957 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2730 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2958 legacy_pic->init(1); 2731 legacy_pic->init(1);
2959#ifdef CONFIG_X86_32
2960 {
2961 unsigned int ver;
2962
2963 ver = apic_read(APIC_LVR);
2964 ver = GET_APIC_VERSION(ver);
2965 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2966 }
2967#endif
2968 2732
2969 pin1 = find_isa_irq_pin(0, mp_INT); 2733 pin1 = find_isa_irq_pin(0, mp_INT);
2970 apic1 = find_isa_irq_apic(0, mp_INT); 2734 apic1 = find_isa_irq_apic(0, mp_INT);
@@ -3001,7 +2765,7 @@ static inline void __init check_timer(void)
3001 add_pin_to_irq_node(cfg, node, apic1, pin1); 2765 add_pin_to_irq_node(cfg, node, apic1, pin1);
3002 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2766 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
3003 } else { 2767 } else {
3004 /* for edge trigger, setup_IO_APIC_irq already 2768 /* for edge trigger, setup_ioapic_irq already
3005 * leave it unmasked. 2769 * leave it unmasked.
3006 * so only need to unmask if it is level-trigger 2770 * so only need to unmask if it is level-trigger
3007 * do we really have level trigger timer? 2771 * do we really have level trigger timer?
@@ -3009,13 +2773,9 @@ static inline void __init check_timer(void)
3009 int idx; 2773 int idx;
3010 idx = find_irq_entry(apic1, pin1, mp_INT); 2774 idx = find_irq_entry(apic1, pin1, mp_INT);
3011 if (idx != -1 && irq_trigger(idx)) 2775 if (idx != -1 && irq_trigger(idx))
3012 unmask_IO_APIC_irq_desc(desc); 2776 unmask_ioapic(cfg);
3013 } 2777 }
3014 if (timer_irq_works()) { 2778 if (timer_irq_works()) {
3015 if (nmi_watchdog == NMI_IO_APIC) {
3016 setup_nmi();
3017 legacy_pic->chip->unmask(0);
3018 }
3019 if (disable_timer_pin_1 > 0) 2779 if (disable_timer_pin_1 > 0)
3020 clear_IO_APIC_pin(0, pin1); 2780 clear_IO_APIC_pin(0, pin1);
3021 goto out; 2781 goto out;
@@ -3037,48 +2797,34 @@ static inline void __init check_timer(void)
3037 */ 2797 */
3038 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); 2798 replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
3039 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2799 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
3040 legacy_pic->chip->unmask(0); 2800 legacy_pic->unmask(0);
3041 if (timer_irq_works()) { 2801 if (timer_irq_works()) {
3042 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2802 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
3043 timer_through_8259 = 1; 2803 timer_through_8259 = 1;
3044 if (nmi_watchdog == NMI_IO_APIC) {
3045 legacy_pic->chip->mask(0);
3046 setup_nmi();
3047 legacy_pic->chip->unmask(0);
3048 }
3049 goto out; 2804 goto out;
3050 } 2805 }
3051 /* 2806 /*
3052 * Cleanup, just in case ... 2807 * Cleanup, just in case ...
3053 */ 2808 */
3054 local_irq_disable(); 2809 local_irq_disable();
3055 legacy_pic->chip->mask(0); 2810 legacy_pic->mask(0);
3056 clear_IO_APIC_pin(apic2, pin2); 2811 clear_IO_APIC_pin(apic2, pin2);
3057 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2812 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
3058 } 2813 }
3059 2814
3060 if (nmi_watchdog == NMI_IO_APIC) {
3061 apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
3062 "through the IO-APIC - disabling NMI Watchdog!\n");
3063 nmi_watchdog = NMI_NONE;
3064 }
3065#ifdef CONFIG_X86_32
3066 timer_ack = 0;
3067#endif
3068
3069 apic_printk(APIC_QUIET, KERN_INFO 2815 apic_printk(APIC_QUIET, KERN_INFO
3070 "...trying to set up timer as Virtual Wire IRQ...\n"); 2816 "...trying to set up timer as Virtual Wire IRQ...\n");
3071 2817
3072 lapic_register_intr(0, desc); 2818 lapic_register_intr(0);
3073 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 2819 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
3074 legacy_pic->chip->unmask(0); 2820 legacy_pic->unmask(0);
3075 2821
3076 if (timer_irq_works()) { 2822 if (timer_irq_works()) {
3077 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 2823 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3078 goto out; 2824 goto out;
3079 } 2825 }
3080 local_irq_disable(); 2826 local_irq_disable();
3081 legacy_pic->chip->mask(0); 2827 legacy_pic->mask(0);
3082 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 2828 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3083 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 2829 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
3084 2830
@@ -3144,7 +2890,7 @@ void __init setup_IO_APIC(void)
3144} 2890}
3145 2891
3146/* 2892/*
3147 * Called after all the initialization is done. If we didnt find any 2893 * Called after all the initialization is done. If we didn't find any
3148 * APIC bugs then we can allow the modify fast path 2894 * APIC bugs then we can allow the modify fast path
3149 */ 2895 */
3150 2896
@@ -3157,136 +2903,84 @@ static int __init io_apic_bug_finalize(void)
3157 2903
3158late_initcall(io_apic_bug_finalize); 2904late_initcall(io_apic_bug_finalize);
3159 2905
3160struct sysfs_ioapic_data { 2906static void resume_ioapic_id(int ioapic_id)
3161 struct sys_device dev;
3162 struct IO_APIC_route_entry entry[0];
3163};
3164static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
3165
3166static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
3167{ 2907{
3168 struct IO_APIC_route_entry *entry;
3169 struct sysfs_ioapic_data *data;
3170 int i;
3171
3172 data = container_of(dev, struct sysfs_ioapic_data, dev);
3173 entry = data->entry;
3174 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
3175 *entry = ioapic_read_entry(dev->id, i);
3176
3177 return 0;
3178}
3179
3180static int ioapic_resume(struct sys_device *dev)
3181{
3182 struct IO_APIC_route_entry *entry;
3183 struct sysfs_ioapic_data *data;
3184 unsigned long flags; 2908 unsigned long flags;
3185 union IO_APIC_reg_00 reg_00; 2909 union IO_APIC_reg_00 reg_00;
3186 int i;
3187 2910
3188 data = container_of(dev, struct sysfs_ioapic_data, dev);
3189 entry = data->entry;
3190 2911
3191 raw_spin_lock_irqsave(&ioapic_lock, flags); 2912 raw_spin_lock_irqsave(&ioapic_lock, flags);
3192 reg_00.raw = io_apic_read(dev->id, 0); 2913 reg_00.raw = io_apic_read(ioapic_id, 0);
3193 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { 2914 if (reg_00.bits.ID != mpc_ioapic_id(ioapic_id)) {
3194 reg_00.bits.ID = mp_ioapics[dev->id].apicid; 2915 reg_00.bits.ID = mpc_ioapic_id(ioapic_id);
3195 io_apic_write(dev->id, 0, reg_00.raw); 2916 io_apic_write(ioapic_id, 0, reg_00.raw);
3196 } 2917 }
3197 raw_spin_unlock_irqrestore(&ioapic_lock, flags); 2918 raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3198 for (i = 0; i < nr_ioapic_registers[dev->id]; i++) 2919}
3199 ioapic_write_entry(dev->id, i, entry[i]);
3200 2920
3201 return 0; 2921static void ioapic_resume(void)
2922{
2923 int ioapic_id;
2924
2925 for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
2926 resume_ioapic_id(ioapic_id);
2927
2928 restore_ioapic_entries();
3202} 2929}
3203 2930
3204static struct sysdev_class ioapic_sysdev_class = { 2931static struct syscore_ops ioapic_syscore_ops = {
3205 .name = "ioapic", 2932 .suspend = save_ioapic_entries,
3206 .suspend = ioapic_suspend,
3207 .resume = ioapic_resume, 2933 .resume = ioapic_resume,
3208}; 2934};
3209 2935
3210static int __init ioapic_init_sysfs(void) 2936static int __init ioapic_init_ops(void)
3211{ 2937{
3212 struct sys_device * dev; 2938 register_syscore_ops(&ioapic_syscore_ops);
3213 int i, size, error;
3214
3215 error = sysdev_class_register(&ioapic_sysdev_class);
3216 if (error)
3217 return error;
3218
3219 for (i = 0; i < nr_ioapics; i++ ) {
3220 size = sizeof(struct sys_device) + nr_ioapic_registers[i]
3221 * sizeof(struct IO_APIC_route_entry);
3222 mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
3223 if (!mp_ioapic_data[i]) {
3224 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
3225 continue;
3226 }
3227 dev = &mp_ioapic_data[i]->dev;
3228 dev->id = i;
3229 dev->cls = &ioapic_sysdev_class;
3230 error = sysdev_register(dev);
3231 if (error) {
3232 kfree(mp_ioapic_data[i]);
3233 mp_ioapic_data[i] = NULL;
3234 printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
3235 continue;
3236 }
3237 }
3238 2939
3239 return 0; 2940 return 0;
3240} 2941}
3241 2942
3242device_initcall(ioapic_init_sysfs); 2943device_initcall(ioapic_init_ops);
3243 2944
3244/* 2945/*
3245 * Dynamic irq allocate and deallocation 2946 * Dynamic irq allocate and deallocation
3246 */ 2947 */
3247unsigned int create_irq_nr(unsigned int irq_want, int node) 2948unsigned int create_irq_nr(unsigned int from, int node)
3248{ 2949{
3249 /* Allocate an unused irq */ 2950 struct irq_cfg *cfg;
3250 unsigned int irq;
3251 unsigned int new;
3252 unsigned long flags; 2951 unsigned long flags;
3253 struct irq_cfg *cfg_new = NULL; 2952 unsigned int ret = 0;
3254 struct irq_desc *desc_new = NULL; 2953 int irq;
3255
3256 irq = 0;
3257 if (irq_want < nr_irqs_gsi)
3258 irq_want = nr_irqs_gsi;
3259
3260 raw_spin_lock_irqsave(&vector_lock, flags);
3261 for (new = irq_want; new < nr_irqs; new++) {
3262 desc_new = irq_to_desc_alloc_node(new, node);
3263 if (!desc_new) {
3264 printk(KERN_INFO "can not get irq_desc for %d\n", new);
3265 continue;
3266 }
3267 cfg_new = desc_new->chip_data;
3268
3269 if (cfg_new->vector != 0)
3270 continue;
3271 2954
3272 desc_new = move_irq_desc(desc_new, node); 2955 if (from < nr_irqs_gsi)
3273 cfg_new = desc_new->chip_data; 2956 from = nr_irqs_gsi;
3274 2957
3275 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) 2958 irq = alloc_irq_from(from, node);
3276 irq = new; 2959 if (irq < 0)
3277 break; 2960 return 0;
2961 cfg = alloc_irq_cfg(irq, node);
2962 if (!cfg) {
2963 free_irq_at(irq, NULL);
2964 return 0;
3278 } 2965 }
3279 raw_spin_unlock_irqrestore(&vector_lock, flags);
3280 2966
3281 if (irq > 0) 2967 raw_spin_lock_irqsave(&vector_lock, flags);
3282 dynamic_irq_init_keep_chip_data(irq); 2968 if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
2969 ret = irq;
2970 raw_spin_unlock_irqrestore(&vector_lock, flags);
3283 2971
3284 return irq; 2972 if (ret) {
2973 irq_set_chip_data(irq, cfg);
2974 irq_clear_status_flags(irq, IRQ_NOREQUEST);
2975 } else {
2976 free_irq_at(irq, cfg);
2977 }
2978 return ret;
3285} 2979}
3286 2980
3287int create_irq(void) 2981int create_irq(void)
3288{ 2982{
3289 int node = cpu_to_node(boot_cpu_id); 2983 int node = cpu_to_node(0);
3290 unsigned int irq_want; 2984 unsigned int irq_want;
3291 int irq; 2985 int irq;
3292 2986
@@ -3301,14 +2995,17 @@ int create_irq(void)
3301 2995
3302void destroy_irq(unsigned int irq) 2996void destroy_irq(unsigned int irq)
3303{ 2997{
2998 struct irq_cfg *cfg = irq_get_chip_data(irq);
3304 unsigned long flags; 2999 unsigned long flags;
3305 3000
3306 dynamic_irq_cleanup_keep_chip_data(irq); 3001 irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3307 3002
3308 free_irte(irq); 3003 if (irq_remapped(cfg))
3004 free_irte(irq);
3309 raw_spin_lock_irqsave(&vector_lock, flags); 3005 raw_spin_lock_irqsave(&vector_lock, flags);
3310 __clear_irq_vector(irq, get_irq_chip_data(irq)); 3006 __clear_irq_vector(irq, cfg);
3311 raw_spin_unlock_irqrestore(&vector_lock, flags); 3007 raw_spin_unlock_irqrestore(&vector_lock, flags);
3008 free_irq_at(irq, cfg);
3312} 3009}
3313 3010
3314/* 3011/*
@@ -3332,7 +3029,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3332 3029
3333 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); 3030 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3334 3031
3335 if (irq_remapped(irq)) { 3032 if (irq_remapped(cfg)) {
3336 struct irte irte; 3033 struct irte irte;
3337 int ir_index; 3034 int ir_index;
3338 u16 sub_handle; 3035 u16 sub_handle;
@@ -3340,14 +3037,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3340 ir_index = map_irq_to_irte_handle(irq, &sub_handle); 3037 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
3341 BUG_ON(ir_index == -1); 3038 BUG_ON(ir_index == -1);
3342 3039
3343 memset (&irte, 0, sizeof(irte)); 3040 prepare_irte(&irte, cfg->vector, dest);
3344
3345 irte.present = 1;
3346 irte.dst_mode = apic->irq_dest_mode;
3347 irte.trigger_mode = 0; /* edge */
3348 irte.dlvry_mode = apic->irq_delivery_mode;
3349 irte.vector = cfg->vector;
3350 irte.dest_id = IRTE_DEST(dest);
3351 3041
3352 /* Set source-id of interrupt request */ 3042 /* Set source-id of interrupt request */
3353 if (pdev) 3043 if (pdev)
@@ -3392,26 +3082,24 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
3392} 3082}
3393 3083
3394#ifdef CONFIG_SMP 3084#ifdef CONFIG_SMP
3395static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3085static int
3086msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3396{ 3087{
3397 struct irq_desc *desc = irq_to_desc(irq); 3088 struct irq_cfg *cfg = data->chip_data;
3398 struct irq_cfg *cfg;
3399 struct msi_msg msg; 3089 struct msi_msg msg;
3400 unsigned int dest; 3090 unsigned int dest;
3401 3091
3402 if (set_desc_affinity(desc, mask, &dest)) 3092 if (__ioapic_set_affinity(data, mask, &dest))
3403 return -1; 3093 return -1;
3404 3094
3405 cfg = desc->chip_data; 3095 __get_cached_msi_msg(data->msi_desc, &msg);
3406
3407 get_cached_msi_msg_desc(desc, &msg);
3408 3096
3409 msg.data &= ~MSI_DATA_VECTOR_MASK; 3097 msg.data &= ~MSI_DATA_VECTOR_MASK;
3410 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3098 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3411 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3099 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3412 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3100 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3413 3101
3414 write_msi_msg_desc(desc, &msg); 3102 __write_msi_msg(data->msi_desc, &msg);
3415 3103
3416 return 0; 3104 return 0;
3417} 3105}
@@ -3421,17 +3109,17 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3421 * done in the process context using interrupt-remapping hardware. 3109 * done in the process context using interrupt-remapping hardware.
3422 */ 3110 */
3423static int 3111static int
3424ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) 3112ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3113 bool force)
3425{ 3114{
3426 struct irq_desc *desc = irq_to_desc(irq); 3115 struct irq_cfg *cfg = data->chip_data;
3427 struct irq_cfg *cfg = desc->chip_data; 3116 unsigned int dest, irq = data->irq;
3428 unsigned int dest;
3429 struct irte irte; 3117 struct irte irte;
3430 3118
3431 if (get_irte(irq, &irte)) 3119 if (get_irte(irq, &irte))
3432 return -1; 3120 return -1;
3433 3121
3434 if (set_desc_affinity(desc, mask, &dest)) 3122 if (__ioapic_set_affinity(data, mask, &dest))
3435 return -1; 3123 return -1;
3436 3124
3437 irte.vector = cfg->vector; 3125 irte.vector = cfg->vector;
@@ -3461,27 +3149,27 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
3461 * which implement the MSI or MSI-X Capability Structure. 3149 * which implement the MSI or MSI-X Capability Structure.
3462 */ 3150 */
3463static struct irq_chip msi_chip = { 3151static struct irq_chip msi_chip = {
3464 .name = "PCI-MSI", 3152 .name = "PCI-MSI",
3465 .unmask = unmask_msi_irq, 3153 .irq_unmask = unmask_msi_irq,
3466 .mask = mask_msi_irq, 3154 .irq_mask = mask_msi_irq,
3467 .ack = ack_apic_edge, 3155 .irq_ack = ack_apic_edge,
3468#ifdef CONFIG_SMP 3156#ifdef CONFIG_SMP
3469 .set_affinity = set_msi_irq_affinity, 3157 .irq_set_affinity = msi_set_affinity,
3470#endif 3158#endif
3471 .retrigger = ioapic_retrigger_irq, 3159 .irq_retrigger = ioapic_retrigger_irq,
3472}; 3160};
3473 3161
3474static struct irq_chip msi_ir_chip = { 3162static struct irq_chip msi_ir_chip = {
3475 .name = "IR-PCI-MSI", 3163 .name = "IR-PCI-MSI",
3476 .unmask = unmask_msi_irq, 3164 .irq_unmask = unmask_msi_irq,
3477 .mask = mask_msi_irq, 3165 .irq_mask = mask_msi_irq,
3478#ifdef CONFIG_INTR_REMAP 3166#ifdef CONFIG_INTR_REMAP
3479 .ack = ir_ack_apic_edge, 3167 .irq_ack = ir_ack_apic_edge,
3480#ifdef CONFIG_SMP 3168#ifdef CONFIG_SMP
3481 .set_affinity = ir_set_msi_irq_affinity, 3169 .irq_set_affinity = ir_msi_set_affinity,
3482#endif 3170#endif
3483#endif 3171#endif
3484 .retrigger = ioapic_retrigger_irq, 3172 .irq_retrigger = ioapic_retrigger_irq,
3485}; 3173};
3486 3174
3487/* 3175/*
@@ -3513,40 +3201,35 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
3513 3201
3514static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) 3202static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3515{ 3203{
3516 int ret; 3204 struct irq_chip *chip = &msi_chip;
3517 struct msi_msg msg; 3205 struct msi_msg msg;
3206 int ret;
3518 3207
3519 ret = msi_compose_msg(dev, irq, &msg, -1); 3208 ret = msi_compose_msg(dev, irq, &msg, -1);
3520 if (ret < 0) 3209 if (ret < 0)
3521 return ret; 3210 return ret;
3522 3211
3523 set_irq_msi(irq, msidesc); 3212 irq_set_msi_desc(irq, msidesc);
3524 write_msi_msg(irq, &msg); 3213 write_msi_msg(irq, &msg);
3525 3214
3526 if (irq_remapped(irq)) { 3215 if (irq_remapped(irq_get_chip_data(irq))) {
3527 struct irq_desc *desc = irq_to_desc(irq); 3216 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3528 /* 3217 chip = &msi_ir_chip;
3529 * irq migration in process context 3218 }
3530 */ 3219
3531 desc->status |= IRQ_MOVE_PCNTXT; 3220 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3532 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
3533 } else
3534 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3535 3221
3536 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); 3222 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
3537 3223
3538 return 0; 3224 return 0;
3539} 3225}
3540 3226
3541int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3227int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3542{ 3228{
3543 unsigned int irq; 3229 int node, ret, sub_handle, index = 0;
3544 int ret, sub_handle; 3230 unsigned int irq, irq_want;
3545 struct msi_desc *msidesc; 3231 struct msi_desc *msidesc;
3546 unsigned int irq_want;
3547 struct intel_iommu *iommu = NULL; 3232 struct intel_iommu *iommu = NULL;
3548 int index = 0;
3549 int node;
3550 3233
3551 /* x86 doesn't support multiple MSI yet */ 3234 /* x86 doesn't support multiple MSI yet */
3552 if (type == PCI_CAP_ID_MSI && nvec > 1) 3235 if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3599,31 +3282,31 @@ error:
3599 return ret; 3282 return ret;
3600} 3283}
3601 3284
3602void arch_teardown_msi_irq(unsigned int irq) 3285void native_teardown_msi_irq(unsigned int irq)
3603{ 3286{
3604 destroy_irq(irq); 3287 destroy_irq(irq);
3605} 3288}
3606 3289
3607#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) 3290#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3608#ifdef CONFIG_SMP 3291#ifdef CONFIG_SMP
3609static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3292static int
3293dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
3294 bool force)
3610{ 3295{
3611 struct irq_desc *desc = irq_to_desc(irq); 3296 struct irq_cfg *cfg = data->chip_data;
3612 struct irq_cfg *cfg; 3297 unsigned int dest, irq = data->irq;
3613 struct msi_msg msg; 3298 struct msi_msg msg;
3614 unsigned int dest;
3615 3299
3616 if (set_desc_affinity(desc, mask, &dest)) 3300 if (__ioapic_set_affinity(data, mask, &dest))
3617 return -1; 3301 return -1;
3618 3302
3619 cfg = desc->chip_data;
3620
3621 dmar_msi_read(irq, &msg); 3303 dmar_msi_read(irq, &msg);
3622 3304
3623 msg.data &= ~MSI_DATA_VECTOR_MASK; 3305 msg.data &= ~MSI_DATA_VECTOR_MASK;
3624 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3306 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3625 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3307 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3626 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3308 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3309 msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3627 3310
3628 dmar_msi_write(irq, &msg); 3311 dmar_msi_write(irq, &msg);
3629 3312
@@ -3633,14 +3316,14 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3633#endif /* CONFIG_SMP */ 3316#endif /* CONFIG_SMP */
3634 3317
3635static struct irq_chip dmar_msi_type = { 3318static struct irq_chip dmar_msi_type = {
3636 .name = "DMAR_MSI", 3319 .name = "DMAR_MSI",
3637 .unmask = dmar_msi_unmask, 3320 .irq_unmask = dmar_msi_unmask,
3638 .mask = dmar_msi_mask, 3321 .irq_mask = dmar_msi_mask,
3639 .ack = ack_apic_edge, 3322 .irq_ack = ack_apic_edge,
3640#ifdef CONFIG_SMP 3323#ifdef CONFIG_SMP
3641 .set_affinity = dmar_msi_set_affinity, 3324 .irq_set_affinity = dmar_msi_set_affinity,
3642#endif 3325#endif
3643 .retrigger = ioapic_retrigger_irq, 3326 .irq_retrigger = ioapic_retrigger_irq,
3644}; 3327};
3645 3328
3646int arch_setup_dmar_msi(unsigned int irq) 3329int arch_setup_dmar_msi(unsigned int irq)
@@ -3652,8 +3335,8 @@ int arch_setup_dmar_msi(unsigned int irq)
3652 if (ret < 0) 3335 if (ret < 0)
3653 return ret; 3336 return ret;
3654 dmar_msi_write(irq, &msg); 3337 dmar_msi_write(irq, &msg);
3655 set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, 3338 irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
3656 "edge"); 3339 "edge");
3657 return 0; 3340 return 0;
3658} 3341}
3659#endif 3342#endif
@@ -3661,26 +3344,24 @@ int arch_setup_dmar_msi(unsigned int irq)
3661#ifdef CONFIG_HPET_TIMER 3344#ifdef CONFIG_HPET_TIMER
3662 3345
3663#ifdef CONFIG_SMP 3346#ifdef CONFIG_SMP
3664static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) 3347static int hpet_msi_set_affinity(struct irq_data *data,
3348 const struct cpumask *mask, bool force)
3665{ 3349{
3666 struct irq_desc *desc = irq_to_desc(irq); 3350 struct irq_cfg *cfg = data->chip_data;
3667 struct irq_cfg *cfg;
3668 struct msi_msg msg; 3351 struct msi_msg msg;
3669 unsigned int dest; 3352 unsigned int dest;
3670 3353
3671 if (set_desc_affinity(desc, mask, &dest)) 3354 if (__ioapic_set_affinity(data, mask, &dest))
3672 return -1; 3355 return -1;
3673 3356
3674 cfg = desc->chip_data; 3357 hpet_msi_read(data->handler_data, &msg);
3675
3676 hpet_msi_read(irq, &msg);
3677 3358
3678 msg.data &= ~MSI_DATA_VECTOR_MASK; 3359 msg.data &= ~MSI_DATA_VECTOR_MASK;
3679 msg.data |= MSI_DATA_VECTOR(cfg->vector); 3360 msg.data |= MSI_DATA_VECTOR(cfg->vector);
3680 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3361 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
3681 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3362 msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3682 3363
3683 hpet_msi_write(irq, &msg); 3364 hpet_msi_write(data->handler_data, &msg);
3684 3365
3685 return 0; 3366 return 0;
3686} 3367}
@@ -3688,34 +3369,34 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
3688#endif /* CONFIG_SMP */ 3369#endif /* CONFIG_SMP */
3689 3370
3690static struct irq_chip ir_hpet_msi_type = { 3371static struct irq_chip ir_hpet_msi_type = {
3691 .name = "IR-HPET_MSI", 3372 .name = "IR-HPET_MSI",
3692 .unmask = hpet_msi_unmask, 3373 .irq_unmask = hpet_msi_unmask,
3693 .mask = hpet_msi_mask, 3374 .irq_mask = hpet_msi_mask,
3694#ifdef CONFIG_INTR_REMAP 3375#ifdef CONFIG_INTR_REMAP
3695 .ack = ir_ack_apic_edge, 3376 .irq_ack = ir_ack_apic_edge,
3696#ifdef CONFIG_SMP 3377#ifdef CONFIG_SMP
3697 .set_affinity = ir_set_msi_irq_affinity, 3378 .irq_set_affinity = ir_msi_set_affinity,
3698#endif 3379#endif
3699#endif 3380#endif
3700 .retrigger = ioapic_retrigger_irq, 3381 .irq_retrigger = ioapic_retrigger_irq,
3701}; 3382};
3702 3383
3703static struct irq_chip hpet_msi_type = { 3384static struct irq_chip hpet_msi_type = {
3704 .name = "HPET_MSI", 3385 .name = "HPET_MSI",
3705 .unmask = hpet_msi_unmask, 3386 .irq_unmask = hpet_msi_unmask,
3706 .mask = hpet_msi_mask, 3387 .irq_mask = hpet_msi_mask,
3707 .ack = ack_apic_edge, 3388 .irq_ack = ack_apic_edge,
3708#ifdef CONFIG_SMP 3389#ifdef CONFIG_SMP
3709 .set_affinity = hpet_msi_set_affinity, 3390 .irq_set_affinity = hpet_msi_set_affinity,
3710#endif 3391#endif
3711 .retrigger = ioapic_retrigger_irq, 3392 .irq_retrigger = ioapic_retrigger_irq,
3712}; 3393};
3713 3394
3714int arch_setup_hpet_msi(unsigned int irq, unsigned int id) 3395int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3715{ 3396{
3716 int ret; 3397 struct irq_chip *chip = &hpet_msi_type;
3717 struct msi_msg msg; 3398 struct msi_msg msg;
3718 struct irq_desc *desc = irq_to_desc(irq); 3399 int ret;
3719 3400
3720 if (intr_remapping_enabled) { 3401 if (intr_remapping_enabled) {
3721 struct intel_iommu *iommu = map_hpet_to_ir(id); 3402 struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3733,15 +3414,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3733 if (ret < 0) 3414 if (ret < 0)
3734 return ret; 3415 return ret;
3735 3416
3736 hpet_msi_write(irq, &msg); 3417 hpet_msi_write(irq_get_handler_data(irq), &msg);
3737 desc->status |= IRQ_MOVE_PCNTXT; 3418 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3738 if (irq_remapped(irq)) 3419 if (irq_remapped(irq_get_chip_data(irq)))
3739 set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, 3420 chip = &ir_hpet_msi_type;
3740 handle_edge_irq, "edge");
3741 else
3742 set_irq_chip_and_handler_name(irq, &hpet_msi_type,
3743 handle_edge_irq, "edge");
3744 3421
3422 irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
3745 return 0; 3423 return 0;
3746} 3424}
3747#endif 3425#endif
@@ -3768,33 +3446,30 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3768 write_ht_irq_msg(irq, &msg); 3446 write_ht_irq_msg(irq, &msg);
3769} 3447}
3770 3448
3771static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) 3449static int
3450ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3772{ 3451{
3773 struct irq_desc *desc = irq_to_desc(irq); 3452 struct irq_cfg *cfg = data->chip_data;
3774 struct irq_cfg *cfg;
3775 unsigned int dest; 3453 unsigned int dest;
3776 3454
3777 if (set_desc_affinity(desc, mask, &dest)) 3455 if (__ioapic_set_affinity(data, mask, &dest))
3778 return -1; 3456 return -1;
3779 3457
3780 cfg = desc->chip_data; 3458 target_ht_irq(data->irq, dest, cfg->vector);
3781
3782 target_ht_irq(irq, dest, cfg->vector);
3783
3784 return 0; 3459 return 0;
3785} 3460}
3786 3461
3787#endif 3462#endif
3788 3463
3789static struct irq_chip ht_irq_chip = { 3464static struct irq_chip ht_irq_chip = {
3790 .name = "PCI-HT", 3465 .name = "PCI-HT",
3791 .mask = mask_ht_irq, 3466 .irq_mask = mask_ht_irq,
3792 .unmask = unmask_ht_irq, 3467 .irq_unmask = unmask_ht_irq,
3793 .ack = ack_apic_edge, 3468 .irq_ack = ack_apic_edge,
3794#ifdef CONFIG_SMP 3469#ifdef CONFIG_SMP
3795 .set_affinity = set_ht_irq_affinity, 3470 .irq_set_affinity = ht_set_affinity,
3796#endif 3471#endif
3797 .retrigger = ioapic_retrigger_irq, 3472 .irq_retrigger = ioapic_retrigger_irq,
3798}; 3473};
3799 3474
3800int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 3475int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3831,7 +3506,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3831 3506
3832 write_ht_irq_msg(irq, &msg); 3507 write_ht_irq_msg(irq, &msg);
3833 3508
3834 set_irq_chip_and_handler_name(irq, &ht_irq_chip, 3509 irq_set_chip_and_handler_name(irq, &ht_irq_chip,
3835 handle_edge_irq, "edge"); 3510 handle_edge_irq, "edge");
3836 3511
3837 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); 3512 dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
@@ -3840,7 +3515,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3840} 3515}
3841#endif /* CONFIG_HT_IRQ */ 3516#endif /* CONFIG_HT_IRQ */
3842 3517
3843int __init io_apic_get_redir_entries (int ioapic) 3518static int
3519io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
3520{
3521 struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
3522 int ret;
3523
3524 if (!cfg)
3525 return -EINVAL;
3526 ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
3527 if (!ret)
3528 setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg,
3529 attr->trigger, attr->polarity);
3530 return ret;
3531}
3532
3533int io_apic_setup_irq_pin_once(unsigned int irq, int node,
3534 struct io_apic_irq_attr *attr)
3535{
3536 unsigned int id = attr->ioapic, pin = attr->ioapic_pin;
3537 int ret;
3538
3539 /* Avoid redundant programming */
3540 if (test_bit(pin, ioapics[id].pin_programmed)) {
3541 pr_debug("Pin %d-%d already programmed\n",
3542 mpc_ioapic_id(id), pin);
3543 return 0;
3544 }
3545 ret = io_apic_setup_irq_pin(irq, node, attr);
3546 if (!ret)
3547 set_bit(pin, ioapics[id].pin_programmed);
3548 return ret;
3549}
3550
3551static int __init io_apic_get_redir_entries(int ioapic)
3844{ 3552{
3845 union IO_APIC_reg_01 reg_01; 3553 union IO_APIC_reg_01 reg_01;
3846 unsigned long flags; 3554 unsigned long flags;
@@ -3856,7 +3564,7 @@ int __init io_apic_get_redir_entries (int ioapic)
3856 return reg_01.bits.entries + 1; 3564 return reg_01.bits.entries + 1;
3857} 3565}
3858 3566
3859void __init probe_nr_irqs_gsi(void) 3567static void __init probe_nr_irqs_gsi(void)
3860{ 3568{
3861 int nr; 3569 int nr;
3862 3570
@@ -3867,6 +3575,11 @@ void __init probe_nr_irqs_gsi(void)
3867 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); 3575 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3868} 3576}
3869 3577
3578int get_nr_irqs_gsi(void)
3579{
3580 return nr_irqs_gsi;
3581}
3582
3870#ifdef CONFIG_SPARSE_IRQ 3583#ifdef CONFIG_SPARSE_IRQ
3871int __init arch_probe_nr_irqs(void) 3584int __init arch_probe_nr_irqs(void)
3872{ 3585{
@@ -3885,104 +3598,28 @@ int __init arch_probe_nr_irqs(void)
3885 if (nr < nr_irqs) 3598 if (nr < nr_irqs)
3886 nr_irqs = nr; 3599 nr_irqs = nr;
3887 3600
3888 return 0; 3601 return NR_IRQS_LEGACY;
3889} 3602}
3890#endif 3603#endif
3891 3604
3892static int __io_apic_set_pci_routing(struct device *dev, int irq, 3605int io_apic_set_pci_routing(struct device *dev, int irq,
3893 struct io_apic_irq_attr *irq_attr) 3606 struct io_apic_irq_attr *irq_attr)
3894{ 3607{
3895 struct irq_desc *desc;
3896 struct irq_cfg *cfg;
3897 int node; 3608 int node;
3898 int ioapic, pin;
3899 int trigger, polarity;
3900 3609
3901 ioapic = irq_attr->ioapic;
3902 if (!IO_APIC_IRQ(irq)) { 3610 if (!IO_APIC_IRQ(irq)) {
3903 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 3611 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
3904 ioapic); 3612 irq_attr->ioapic);
3905 return -EINVAL; 3613 return -EINVAL;
3906 } 3614 }
3907 3615
3908 if (dev) 3616 node = dev ? dev_to_node(dev) : cpu_to_node(0);
3909 node = dev_to_node(dev);
3910 else
3911 node = cpu_to_node(boot_cpu_id);
3912
3913 desc = irq_to_desc_alloc_node(irq, node);
3914 if (!desc) {
3915 printk(KERN_INFO "can not get irq_desc %d\n", irq);
3916 return 0;
3917 }
3918
3919 pin = irq_attr->ioapic_pin;
3920 trigger = irq_attr->trigger;
3921 polarity = irq_attr->polarity;
3922 3617
3923 /* 3618 return io_apic_setup_irq_pin_once(irq, node, irq_attr);
3924 * IRQs < 16 are already in the irq_2_pin[] map
3925 */
3926 if (irq >= legacy_pic->nr_legacy_irqs) {
3927 cfg = desc->chip_data;
3928 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3929 printk(KERN_INFO "can not add pin %d for irq %d\n",
3930 pin, irq);
3931 return 0;
3932 }
3933 }
3934
3935 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
3936
3937 return 0;
3938}
3939
3940int io_apic_set_pci_routing(struct device *dev, int irq,
3941 struct io_apic_irq_attr *irq_attr)
3942{
3943 int ioapic, pin;
3944 /*
3945 * Avoid pin reprogramming. PRTs typically include entries
3946 * with redundant pin->gsi mappings (but unique PCI devices);
3947 * we only program the IOAPIC on the first.
3948 */
3949 ioapic = irq_attr->ioapic;
3950 pin = irq_attr->ioapic_pin;
3951 if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
3952 pr_debug("Pin %d-%d already programmed\n",
3953 mp_ioapics[ioapic].apicid, pin);
3954 return 0;
3955 }
3956 set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
3957
3958 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3959}
3960
3961u8 __init io_apic_unique_id(u8 id)
3962{
3963#ifdef CONFIG_X86_32
3964 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3965 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3966 return io_apic_get_unique_id(nr_ioapics, id);
3967 else
3968 return id;
3969#else
3970 int i;
3971 DECLARE_BITMAP(used, 256);
3972
3973 bitmap_zero(used, 256);
3974 for (i = 0; i < nr_ioapics; i++) {
3975 struct mpc_ioapic *ia = &mp_ioapics[i];
3976 __set_bit(ia->apicid, used);
3977 }
3978 if (!test_bit(id, used))
3979 return id;
3980 return find_first_zero_bit(used, 256);
3981#endif
3982} 3619}
3983 3620
3984#ifdef CONFIG_X86_32 3621#ifdef CONFIG_X86_32
3985int __init io_apic_get_unique_id(int ioapic, int apic_id) 3622static int __init io_apic_get_unique_id(int ioapic, int apic_id)
3986{ 3623{
3987 union IO_APIC_reg_00 reg_00; 3624 union IO_APIC_reg_00 reg_00;
3988 static physid_mask_t apic_id_map = PHYSID_MASK_NONE; 3625 static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -4055,9 +3692,32 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
4055 3692
4056 return apic_id; 3693 return apic_id;
4057} 3694}
3695
3696static u8 __init io_apic_unique_id(u8 id)
3697{
3698 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3699 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3700 return io_apic_get_unique_id(nr_ioapics, id);
3701 else
3702 return id;
3703}
3704#else
3705static u8 __init io_apic_unique_id(u8 id)
3706{
3707 int i;
3708 DECLARE_BITMAP(used, 256);
3709
3710 bitmap_zero(used, 256);
3711 for (i = 0; i < nr_ioapics; i++) {
3712 __set_bit(mpc_ioapic_id(i), used);
3713 }
3714 if (!test_bit(id, used))
3715 return id;
3716 return find_first_zero_bit(used, 256);
3717}
4058#endif 3718#endif
4059 3719
4060int __init io_apic_get_version(int ioapic) 3720static int __init io_apic_get_version(int ioapic)
4061{ 3721{
4062 union IO_APIC_reg_01 reg_01; 3722 union IO_APIC_reg_01 reg_01;
4063 unsigned long flags; 3723 unsigned long flags;
@@ -4102,14 +3762,14 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
4102void __init setup_ioapic_dest(void) 3762void __init setup_ioapic_dest(void)
4103{ 3763{
4104 int pin, ioapic, irq, irq_entry; 3764 int pin, ioapic, irq, irq_entry;
4105 struct irq_desc *desc;
4106 const struct cpumask *mask; 3765 const struct cpumask *mask;
3766 struct irq_data *idata;
4107 3767
4108 if (skip_ioapic_setup == 1) 3768 if (skip_ioapic_setup == 1)
4109 return; 3769 return;
4110 3770
4111 for (ioapic = 0; ioapic < nr_ioapics; ioapic++) 3771 for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
4112 for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { 3772 for (pin = 0; pin < ioapics[ioapic].nr_registers; pin++) {
4113 irq_entry = find_irq_entry(ioapic, pin, mp_INT); 3773 irq_entry = find_irq_entry(ioapic, pin, mp_INT);
4114 if (irq_entry == -1) 3774 if (irq_entry == -1)
4115 continue; 3775 continue;
@@ -4118,21 +3778,20 @@ void __init setup_ioapic_dest(void)
4118 if ((ioapic > 0) && (irq > 16)) 3778 if ((ioapic > 0) && (irq > 16))
4119 continue; 3779 continue;
4120 3780
4121 desc = irq_to_desc(irq); 3781 idata = irq_get_irq_data(irq);
4122 3782
4123 /* 3783 /*
4124 * Honour affinities which have been set in early boot 3784 * Honour affinities which have been set in early boot
4125 */ 3785 */
4126 if (desc->status & 3786 if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
4127 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 3787 mask = idata->affinity;
4128 mask = desc->affinity;
4129 else 3788 else
4130 mask = apic->target_cpus(); 3789 mask = apic->target_cpus();
4131 3790
4132 if (intr_remapping_enabled) 3791 if (intr_remapping_enabled)
4133 set_ir_ioapic_affinity_irq_desc(desc, mask); 3792 ir_ioapic_set_affinity(idata, mask, false);
4134 else 3793 else
4135 set_ioapic_affinity_irq_desc(desc, mask); 3794 ioapic_set_affinity(idata, mask, false);
4136 } 3795 }
4137 3796
4138} 3797}
@@ -4172,7 +3831,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
4172 return res; 3831 return res;
4173} 3832}
4174 3833
4175void __init ioapic_init_mappings(void) 3834void __init ioapic_and_gsi_init(void)
4176{ 3835{
4177 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; 3836 unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
4178 struct resource *ioapic_res; 3837 struct resource *ioapic_res;
@@ -4181,7 +3840,7 @@ void __init ioapic_init_mappings(void)
4181 ioapic_res = ioapic_setup_resources(nr_ioapics); 3840 ioapic_res = ioapic_setup_resources(nr_ioapics);
4182 for (i = 0; i < nr_ioapics; i++) { 3841 for (i = 0; i < nr_ioapics; i++) {
4183 if (smp_found_config) { 3842 if (smp_found_config) {
4184 ioapic_phys = mp_ioapics[i].apicaddr; 3843 ioapic_phys = mpc_ioapic_addr(i);
4185#ifdef CONFIG_X86_32 3844#ifdef CONFIG_X86_32
4186 if (!ioapic_phys) { 3845 if (!ioapic_phys) {
4187 printk(KERN_ERR 3846 printk(KERN_ERR
@@ -4210,6 +3869,8 @@ fake_ioapic_page:
4210 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; 3869 ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
4211 ioapic_res++; 3870 ioapic_res++;
4212 } 3871 }
3872
3873 probe_nr_irqs_gsi();
4213} 3874}
4214 3875
4215void __init ioapic_insert_resources(void) 3876void __init ioapic_insert_resources(void)
@@ -4234,10 +3895,14 @@ int mp_find_ioapic(u32 gsi)
4234{ 3895{
4235 int i = 0; 3896 int i = 0;
4236 3897
3898 if (nr_ioapics == 0)
3899 return -1;
3900
4237 /* Find the IOAPIC that manages this GSI. */ 3901 /* Find the IOAPIC that manages this GSI. */
4238 for (i = 0; i < nr_ioapics; i++) { 3902 for (i = 0; i < nr_ioapics; i++) {
4239 if ((gsi >= mp_gsi_routing[i].gsi_base) 3903 struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
4240 && (gsi <= mp_gsi_routing[i].gsi_end)) 3904 if ((gsi >= gsi_cfg->gsi_base)
3905 && (gsi <= gsi_cfg->gsi_end))
4241 return i; 3906 return i;
4242 } 3907 }
4243 3908
@@ -4247,18 +3912,22 @@ int mp_find_ioapic(u32 gsi)
4247 3912
4248int mp_find_ioapic_pin(int ioapic, u32 gsi) 3913int mp_find_ioapic_pin(int ioapic, u32 gsi)
4249{ 3914{
3915 struct mp_ioapic_gsi *gsi_cfg;
3916
4250 if (WARN_ON(ioapic == -1)) 3917 if (WARN_ON(ioapic == -1))
4251 return -1; 3918 return -1;
4252 if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end)) 3919
3920 gsi_cfg = mp_ioapic_gsi_routing(ioapic);
3921 if (WARN_ON(gsi > gsi_cfg->gsi_end))
4253 return -1; 3922 return -1;
4254 3923
4255 return gsi - mp_gsi_routing[ioapic].gsi_base; 3924 return gsi - gsi_cfg->gsi_base;
4256} 3925}
4257 3926
4258static int bad_ioapic(unsigned long address) 3927static __init int bad_ioapic(unsigned long address)
4259{ 3928{
4260 if (nr_ioapics >= MAX_IO_APICS) { 3929 if (nr_ioapics >= MAX_IO_APICS) {
4261 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " 3930 printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
4262 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); 3931 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
4263 return 1; 3932 return 1;
4264 } 3933 }
@@ -4274,40 +3943,42 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4274{ 3943{
4275 int idx = 0; 3944 int idx = 0;
4276 int entries; 3945 int entries;
3946 struct mp_ioapic_gsi *gsi_cfg;
4277 3947
4278 if (bad_ioapic(address)) 3948 if (bad_ioapic(address))
4279 return; 3949 return;
4280 3950
4281 idx = nr_ioapics; 3951 idx = nr_ioapics;
4282 3952
4283 mp_ioapics[idx].type = MP_IOAPIC; 3953 ioapics[idx].mp_config.type = MP_IOAPIC;
4284 mp_ioapics[idx].flags = MPC_APIC_USABLE; 3954 ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
4285 mp_ioapics[idx].apicaddr = address; 3955 ioapics[idx].mp_config.apicaddr = address;
4286 3956
4287 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 3957 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4288 mp_ioapics[idx].apicid = io_apic_unique_id(id); 3958 ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
4289 mp_ioapics[idx].apicver = io_apic_get_version(idx); 3959 ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
4290 3960
4291 /* 3961 /*
4292 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 3962 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4293 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 3963 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4294 */ 3964 */
4295 entries = io_apic_get_redir_entries(idx); 3965 entries = io_apic_get_redir_entries(idx);
4296 mp_gsi_routing[idx].gsi_base = gsi_base; 3966 gsi_cfg = mp_ioapic_gsi_routing(idx);
4297 mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; 3967 gsi_cfg->gsi_base = gsi_base;
3968 gsi_cfg->gsi_end = gsi_base + entries - 1;
4298 3969
4299 /* 3970 /*
4300 * The number of IO-APIC IRQ registers (== #pins): 3971 * The number of IO-APIC IRQ registers (== #pins):
4301 */ 3972 */
4302 nr_ioapic_registers[idx] = entries; 3973 ioapics[idx].nr_registers = entries;
4303 3974
4304 if (mp_gsi_routing[idx].gsi_end >= gsi_top) 3975 if (gsi_cfg->gsi_end >= gsi_top)
4305 gsi_top = mp_gsi_routing[idx].gsi_end + 1; 3976 gsi_top = gsi_cfg->gsi_end + 1;
4306 3977
4307 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " 3978 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4308 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, 3979 "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
4309 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr, 3980 mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
4310 mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end); 3981 gsi_cfg->gsi_base, gsi_cfg->gsi_end);
4311 3982
4312 nr_ioapics++; 3983 nr_ioapics++;
4313} 3984}
@@ -4315,20 +3986,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4315/* Enable IOAPIC early just for system timer */ 3986/* Enable IOAPIC early just for system timer */
4316void __init pre_init_apic_IRQ0(void) 3987void __init pre_init_apic_IRQ0(void)
4317{ 3988{
4318 struct irq_cfg *cfg; 3989 struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
4319 struct irq_desc *desc;
4320 3990
4321 printk(KERN_INFO "Early APIC setup for system timer0\n"); 3991 printk(KERN_INFO "Early APIC setup for system timer0\n");
4322#ifndef CONFIG_SMP 3992#ifndef CONFIG_SMP
4323 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 3993 physid_set_mask_of_physid(boot_cpu_physical_apicid,
3994 &phys_cpu_present_map);
4324#endif 3995#endif
4325 desc = irq_to_desc_alloc_node(0, 0);
4326
4327 setup_local_APIC(); 3996 setup_local_APIC();
4328 3997
4329 cfg = irq_cfg(0); 3998 io_apic_setup_irq_pin(0, 0, &attr);
4330 add_pin_to_irq_node(cfg, 0, 0, 0); 3999 irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
4331 set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); 4000 "edge");
4332
4333 setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
4334} 4001}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
56 local_irq_restore(flags); 56 local_irq_restore(flags);
57} 57}
58 58
59#ifdef CONFIG_X86_32
60
59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, 61void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
60 int vector) 62 int vector)
61{ 63{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
71 local_irq_save(flags); 73 local_irq_save(flags);
72 for_each_cpu(query_cpu, mask) 74 for_each_cpu(query_cpu, mask)
73 __default_send_IPI_dest_field( 75 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector, 76 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
75 apic->dest_logical); 77 vector, apic->dest_logical);
76 local_irq_restore(flags); 78 local_irq_restore(flags);
77} 79}
78 80
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
90 if (query_cpu == this_cpu) 92 if (query_cpu == this_cpu)
91 continue; 93 continue;
92 __default_send_IPI_dest_field( 94 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector, 95 early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
94 apic->dest_logical); 96 vector, apic->dest_logical);
95 } 97 }
96 local_irq_restore(flags); 98 local_irq_restore(flags);
97} 99}
98 100
99#ifdef CONFIG_X86_32
100
101/* 101/*
102 * This is only used on smaller machines. 102 * This is only used on smaller machines.
103 */ 103 */
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index a43f71cb30f8..000000000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
1/*
2 * NMI watchdog support on APIC systems
3 *
4 * Started by Ingo Molnar <mingo@redhat.com>
5 *
6 * Fixes:
7 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
8 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
9 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
10 * Pavel Machek and
11 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
12 */
13
14#include <asm/apic.h>
15
16#include <linux/nmi.h>
17#include <linux/mm.h>
18#include <linux/delay.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/slab.h>
22#include <linux/sysdev.h>
23#include <linux/sysctl.h>
24#include <linux/percpu.h>
25#include <linux/kprobes.h>
26#include <linux/cpumask.h>
27#include <linux/kernel_stat.h>
28#include <linux/kdebug.h>
29#include <linux/smp.h>
30
31#include <asm/i8259.h>
32#include <asm/io_apic.h>
33#include <asm/proto.h>
34#include <asm/timer.h>
35
36#include <asm/mce.h>
37
38#include <asm/mach_traps.h>
39
40int unknown_nmi_panic;
41int nmi_watchdog_enabled;
42
43/* For reliability, we're prepared to waste bits here. */
44static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
45
46/* nmi_active:
47 * >0: the lapic NMI watchdog is active, but can be disabled
48 * <0: the lapic NMI watchdog has not been set up, and cannot
49 * be enabled
50 * 0: the lapic NMI watchdog is disabled, but can be enabled
51 */
52atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
53EXPORT_SYMBOL(nmi_active);
54
55unsigned int nmi_watchdog = NMI_NONE;
56EXPORT_SYMBOL(nmi_watchdog);
57
58static int panic_on_timeout;
59
60static unsigned int nmi_hz = HZ;
61static DEFINE_PER_CPU(short, wd_enabled);
62static int endflag __initdata;
63
64static inline unsigned int get_nmi_count(int cpu)
65{
66 return per_cpu(irq_stat, cpu).__nmi_count;
67}
68
69static inline int mce_in_progress(void)
70{
71#if defined(CONFIG_X86_MCE)
72 return atomic_read(&mce_entry) > 0;
73#endif
74 return 0;
75}
76
77/*
78 * Take the local apic timer and PIT/HPET into account. We don't
79 * know which one is active, when we have highres/dyntick on
80 */
81static inline unsigned int get_timer_irqs(int cpu)
82{
83 return per_cpu(irq_stat, cpu).apic_timer_irqs +
84 per_cpu(irq_stat, cpu).irq0_irqs;
85}
86
87#ifdef CONFIG_SMP
88/*
89 * The performance counters used by NMI_LOCAL_APIC don't trigger when
90 * the CPU is idle. To make sure the NMI watchdog really ticks on all
91 * CPUs during the test make them busy.
92 */
93static __init void nmi_cpu_busy(void *data)
94{
95 local_irq_enable_in_hardirq();
96 /*
97 * Intentionally don't use cpu_relax here. This is
98 * to make sure that the performance counter really ticks,
99 * even if there is a simulator or similar that catches the
100 * pause instruction. On a real HT machine this is fine because
101 * all other CPUs are busy with "useless" delay loops and don't
102 * care if they get somewhat less cycles.
103 */
104 while (endflag == 0)
105 mb();
106}
107#endif
108
109static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
110{
111 printk(KERN_CONT "\n");
112
113 printk(KERN_WARNING
114 "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
115 cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
116
117 printk(KERN_WARNING
118 "Please report this to bugzilla.kernel.org,\n");
119 printk(KERN_WARNING
120 "and attach the output of the 'dmesg' command.\n");
121
122 per_cpu(wd_enabled, cpu) = 0;
123 atomic_dec(&nmi_active);
124}
125
126static void __acpi_nmi_disable(void *__unused)
127{
128 apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
129}
130
131int __init check_nmi_watchdog(void)
132{
133 unsigned int *prev_nmi_count;
134 int cpu;
135
136 if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
137 return 0;
138
139 prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
140 if (!prev_nmi_count)
141 goto error;
142
143 printk(KERN_INFO "Testing NMI watchdog ... ");
144
145#ifdef CONFIG_SMP
146 if (nmi_watchdog == NMI_LOCAL_APIC)
147 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
148#endif
149
150 for_each_possible_cpu(cpu)
151 prev_nmi_count[cpu] = get_nmi_count(cpu);
152 local_irq_enable();
153 mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
154
155 for_each_online_cpu(cpu) {
156 if (!per_cpu(wd_enabled, cpu))
157 continue;
158 if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
159 report_broken_nmi(cpu, prev_nmi_count);
160 }
161 endflag = 1;
162 if (!atomic_read(&nmi_active)) {
163 kfree(prev_nmi_count);
164 atomic_set(&nmi_active, -1);
165 goto error;
166 }
167 printk("OK.\n");
168
169 /*
170 * now that we know it works we can reduce NMI frequency to
171 * something more reasonable; makes a difference in some configs
172 */
173 if (nmi_watchdog == NMI_LOCAL_APIC)
174 nmi_hz = lapic_adjust_nmi_hz(1);
175
176 kfree(prev_nmi_count);
177 return 0;
178error:
179 if (nmi_watchdog == NMI_IO_APIC) {
180 if (!timer_through_8259)
181 legacy_pic->chip->mask(0);
182 on_each_cpu(__acpi_nmi_disable, NULL, 1);
183 }
184
185#ifdef CONFIG_X86_32
186 timer_ack = 0;
187#endif
188 return -1;
189}
190
191static int __init setup_nmi_watchdog(char *str)
192{
193 unsigned int nmi;
194
195 if (!strncmp(str, "panic", 5)) {
196 panic_on_timeout = 1;
197 str = strchr(str, ',');
198 if (!str)
199 return 1;
200 ++str;
201 }
202
203 if (!strncmp(str, "lapic", 5))
204 nmi_watchdog = NMI_LOCAL_APIC;
205 else if (!strncmp(str, "ioapic", 6))
206 nmi_watchdog = NMI_IO_APIC;
207 else {
208 get_option(&str, &nmi);
209 if (nmi >= NMI_INVALID)
210 return 0;
211 nmi_watchdog = nmi;
212 }
213
214 return 1;
215}
216__setup("nmi_watchdog=", setup_nmi_watchdog);
217
218/*
219 * Suspend/resume support
220 */
221#ifdef CONFIG_PM
222
223static int nmi_pm_active; /* nmi_active before suspend */
224
225static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
226{
227 /* only CPU0 goes here, other CPUs should be offline */
228 nmi_pm_active = atomic_read(&nmi_active);
229 stop_apic_nmi_watchdog(NULL);
230 BUG_ON(atomic_read(&nmi_active) != 0);
231 return 0;
232}
233
234static int lapic_nmi_resume(struct sys_device *dev)
235{
236 /* only CPU0 goes here, other CPUs should be offline */
237 if (nmi_pm_active > 0) {
238 setup_apic_nmi_watchdog(NULL);
239 touch_nmi_watchdog();
240 }
241 return 0;
242}
243
244static struct sysdev_class nmi_sysclass = {
245 .name = "lapic_nmi",
246 .resume = lapic_nmi_resume,
247 .suspend = lapic_nmi_suspend,
248};
249
250static struct sys_device device_lapic_nmi = {
251 .id = 0,
252 .cls = &nmi_sysclass,
253};
254
255static int __init init_lapic_nmi_sysfs(void)
256{
257 int error;
258
259 /*
260 * should really be a BUG_ON but b/c this is an
261 * init call, it just doesn't work. -dcz
262 */
263 if (nmi_watchdog != NMI_LOCAL_APIC)
264 return 0;
265
266 if (atomic_read(&nmi_active) < 0)
267 return 0;
268
269 error = sysdev_class_register(&nmi_sysclass);
270 if (!error)
271 error = sysdev_register(&device_lapic_nmi);
272 return error;
273}
274
275/* must come after the local APIC's device_initcall() */
276late_initcall(init_lapic_nmi_sysfs);
277
278#endif /* CONFIG_PM */
279
280static void __acpi_nmi_enable(void *__unused)
281{
282 apic_write(APIC_LVT0, APIC_DM_NMI);
283}
284
285/*
286 * Enable timer based NMIs on all CPUs:
287 */
288void acpi_nmi_enable(void)
289{
290 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
291 on_each_cpu(__acpi_nmi_enable, NULL, 1);
292}
293
294/*
295 * Disable timer based NMIs on all CPUs:
296 */
297void acpi_nmi_disable(void)
298{
299 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
300 on_each_cpu(__acpi_nmi_disable, NULL, 1);
301}
302
303/*
304 * This function is called as soon the LAPIC NMI watchdog driver has everything
305 * in place and it's ready to check if the NMIs belong to the NMI watchdog
306 */
307void cpu_nmi_set_wd_enabled(void)
308{
309 __get_cpu_var(wd_enabled) = 1;
310}
311
312void setup_apic_nmi_watchdog(void *unused)
313{
314 if (__get_cpu_var(wd_enabled))
315 return;
316
317 /* cheap hack to support suspend/resume */
318 /* if cpu0 is not active neither should the other cpus */
319 if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
320 return;
321
322 switch (nmi_watchdog) {
323 case NMI_LOCAL_APIC:
324 if (lapic_watchdog_init(nmi_hz) < 0) {
325 __get_cpu_var(wd_enabled) = 0;
326 return;
327 }
328 /* FALL THROUGH */
329 case NMI_IO_APIC:
330 __get_cpu_var(wd_enabled) = 1;
331 atomic_inc(&nmi_active);
332 }
333}
334
335void stop_apic_nmi_watchdog(void *unused)
336{
337 /* only support LOCAL and IO APICs for now */
338 if (!nmi_watchdog_active())
339 return;
340 if (__get_cpu_var(wd_enabled) == 0)
341 return;
342 if (nmi_watchdog == NMI_LOCAL_APIC)
343 lapic_watchdog_stop();
344 else
345 __acpi_nmi_disable(NULL);
346 __get_cpu_var(wd_enabled) = 0;
347 atomic_dec(&nmi_active);
348}
349
350/*
351 * the best way to detect whether a CPU has a 'hard lockup' problem
352 * is to check it's local APIC timer IRQ counts. If they are not
353 * changing then that CPU has some problem.
354 *
355 * as these watchdog NMI IRQs are generated on every CPU, we only
356 * have to check the current processor.
357 *
358 * since NMIs don't listen to _any_ locks, we have to be extremely
359 * careful not to rely on unsafe variables. The printk might lock
360 * up though, so we have to break up any console locks first ...
361 * [when there will be more tty-related locks, break them up here too!]
362 */
363
364static DEFINE_PER_CPU(unsigned, last_irq_sum);
365static DEFINE_PER_CPU(long, alert_counter);
366static DEFINE_PER_CPU(int, nmi_touch);
367
368void touch_nmi_watchdog(void)
369{
370 if (nmi_watchdog_active()) {
371 unsigned cpu;
372
373 /*
374 * Tell other CPUs to reset their alert counters. We cannot
375 * do it ourselves because the alert count increase is not
376 * atomic.
377 */
378 for_each_present_cpu(cpu) {
379 if (per_cpu(nmi_touch, cpu) != 1)
380 per_cpu(nmi_touch, cpu) = 1;
381 }
382 }
383
384 /*
385 * Tickle the softlockup detector too:
386 */
387 touch_softlockup_watchdog();
388}
389EXPORT_SYMBOL(touch_nmi_watchdog);
390
391notrace __kprobes int
392nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
393{
394 /*
395 * Since current_thread_info()-> is always on the stack, and we
396 * always switch the stack NMI-atomically, it's safe to use
397 * smp_processor_id().
398 */
399 unsigned int sum;
400 int touched = 0;
401 int cpu = smp_processor_id();
402 int rc = 0;
403
404 sum = get_timer_irqs(cpu);
405
406 if (__get_cpu_var(nmi_touch)) {
407 __get_cpu_var(nmi_touch) = 0;
408 touched = 1;
409 }
410
411 /* We can be called before check_nmi_watchdog, hence NULL check. */
412 if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
413 static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
414
415 raw_spin_lock(&lock);
416 printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
417 show_regs(regs);
418 dump_stack();
419 raw_spin_unlock(&lock);
420 cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
421
422 rc = 1;
423 }
424
425 /* Could check oops_in_progress here too, but it's safer not to */
426 if (mce_in_progress())
427 touched = 1;
428
429 /* if the none of the timers isn't firing, this cpu isn't doing much */
430 if (!touched && __get_cpu_var(last_irq_sum) == sum) {
431 /*
432 * Ayiee, looks like this CPU is stuck ...
433 * wait a few IRQs (5 seconds) before doing the oops ...
434 */
435 __this_cpu_inc(alert_counter);
436 if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
437 /*
438 * die_nmi will return ONLY if NOTIFY_STOP happens..
439 */
440 die_nmi("BUG: NMI Watchdog detected LOCKUP",
441 regs, panic_on_timeout);
442 } else {
443 __get_cpu_var(last_irq_sum) = sum;
444 __this_cpu_write(alert_counter, 0);
445 }
446
447 /* see if the nmi watchdog went off */
448 if (!__get_cpu_var(wd_enabled))
449 return rc;
450 switch (nmi_watchdog) {
451 case NMI_LOCAL_APIC:
452 rc |= lapic_wd_event(nmi_hz);
453 break;
454 case NMI_IO_APIC:
455 /*
456 * don't know how to accurately check for this.
457 * just assume it was a watchdog timer interrupt
458 * This matches the old behaviour.
459 */
460 rc = 1;
461 break;
462 }
463 return rc;
464}
465
466#ifdef CONFIG_SYSCTL
467
468static void enable_ioapic_nmi_watchdog_single(void *unused)
469{
470 __get_cpu_var(wd_enabled) = 1;
471 atomic_inc(&nmi_active);
472 __acpi_nmi_enable(NULL);
473}
474
475static void enable_ioapic_nmi_watchdog(void)
476{
477 on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
478 touch_nmi_watchdog();
479}
480
481static void disable_ioapic_nmi_watchdog(void)
482{
483 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
484}
485
486static int __init setup_unknown_nmi_panic(char *str)
487{
488 unknown_nmi_panic = 1;
489 return 1;
490}
491__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
492
493static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
494{
495 unsigned char reason = get_nmi_reason();
496 char buf[64];
497
498 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
499 die_nmi(buf, regs, 1); /* Always panic here */
500 return 0;
501}
502
503/*
504 * proc handler for /proc/sys/kernel/nmi
505 */
506int proc_nmi_enabled(struct ctl_table *table, int write,
507 void __user *buffer, size_t *length, loff_t *ppos)
508{
509 int old_state;
510
511 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
512 old_state = nmi_watchdog_enabled;
513 proc_dointvec(table, write, buffer, length, ppos);
514 if (!!old_state == !!nmi_watchdog_enabled)
515 return 0;
516
517 if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
518 printk(KERN_WARNING
519 "NMI watchdog is permanently disabled\n");
520 return -EIO;
521 }
522
523 if (nmi_watchdog == NMI_LOCAL_APIC) {
524 if (nmi_watchdog_enabled)
525 enable_lapic_nmi_watchdog();
526 else
527 disable_lapic_nmi_watchdog();
528 } else if (nmi_watchdog == NMI_IO_APIC) {
529 if (nmi_watchdog_enabled)
530 enable_ioapic_nmi_watchdog();
531 else
532 disable_ioapic_nmi_watchdog();
533 } else {
534 printk(KERN_WARNING
535 "NMI watchdog doesn't know what hardware to touch\n");
536 return -EIO;
537 }
538 return 0;
539}
540
541#endif /* CONFIG_SYSCTL */
542
543int do_nmi_callback(struct pt_regs *regs, int cpu)
544{
545#ifdef CONFIG_SYSCTL
546 if (unknown_nmi_panic)
547 return unknown_nmi_panic_callback(regs, cpu);
548#endif
549 return 0;
550}
551
552void arch_trigger_all_cpu_backtrace(void)
553{
554 int i;
555
556 cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
557
558 printk(KERN_INFO "sending NMI to all CPUs:\n");
559 apic->send_IPI_all(NMI_VECTOR);
560
561 /* Wait for up to 10 seconds for all CPUs to do the backtrace */
562 for (i = 0; i < 10 * 1000; i++) {
563 if (cpumask_empty(to_cpumask(backtrace_mask)))
564 break;
565 mdelay(1);
566 }
567}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 3e28401f161c..c4a61ca1349a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -26,6 +26,7 @@
26#include <linux/nodemask.h> 26#include <linux/nodemask.h>
27#include <linux/topology.h> 27#include <linux/topology.h>
28#include <linux/bootmem.h> 28#include <linux/bootmem.h>
29#include <linux/memblock.h>
29#include <linux/threads.h> 30#include <linux/threads.h>
30#include <linux/cpumask.h> 31#include <linux/cpumask.h>
31#include <linux/kernel.h> 32#include <linux/kernel.h>
@@ -47,8 +48,6 @@
47#include <asm/e820.h> 48#include <asm/e820.h>
48#include <asm/ipi.h> 49#include <asm/ipi.h>
49 50
50#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
51
52int found_numaq; 51int found_numaq;
53 52
54/* 53/*
@@ -78,31 +77,20 @@ int quad_local_to_mp_bus_id[NR_CPUS/4][4];
78static inline void numaq_register_node(int node, struct sys_cfg_data *scd) 77static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
79{ 78{
80 struct eachquadmem *eq = scd->eq + node; 79 struct eachquadmem *eq = scd->eq + node;
80 u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20;
81 u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20;
82 int ret;
81 83
82 node_set_online(node); 84 node_set(node, numa_nodes_parsed);
83 85 ret = numa_add_memblk(node, start, end);
84 /* Convert to pages */ 86 BUG_ON(ret < 0);
85 node_start_pfn[node] =
86 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
87
88 node_end_pfn[node] =
89 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
90
91 e820_register_active_regions(node, node_start_pfn[node],
92 node_end_pfn[node]);
93
94 memory_present(node, node_start_pfn[node], node_end_pfn[node]);
95
96 node_remap_size[node] = node_memmap_size_bytes(node,
97 node_start_pfn[node],
98 node_end_pfn[node]);
99} 87}
100 88
101/* 89/*
102 * Function: smp_dump_qct() 90 * Function: smp_dump_qct()
103 * 91 *
104 * Description: gets memory layout from the quad config table. This 92 * Description: gets memory layout from the quad config table. This
105 * function also updates node_online_map with the nodes (quads) present. 93 * function also updates numa_nodes_parsed with the nodes (quads) present.
106 */ 94 */
107static void __init smp_dump_qct(void) 95static void __init smp_dump_qct(void)
108{ 96{
@@ -111,7 +99,6 @@ static void __init smp_dump_qct(void)
111 99
112 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); 100 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
113 101
114 nodes_clear(node_online_map);
115 for_each_node(node) { 102 for_each_node(node) {
116 if (scd->quads_present31_0 & (1 << node)) 103 if (scd->quads_present31_0 & (1 << node))
117 numaq_register_node(node, scd); 104 numaq_register_node(node, scd);
@@ -281,14 +268,14 @@ static __init void early_check_numaq(void)
281 } 268 }
282} 269}
283 270
284int __init get_memcfg_numaq(void) 271int __init numaq_numa_init(void)
285{ 272{
286 early_check_numaq(); 273 early_check_numaq();
287 if (!found_numaq) 274 if (!found_numaq)
288 return 0; 275 return -ENOENT;
289 smp_dump_qct(); 276 smp_dump_qct();
290 277
291 return 1; 278 return 0;
292} 279}
293 280
294#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) 281#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
@@ -372,13 +359,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
372 return physids_promote(0xFUL, retmap); 359 return physids_promote(0xFUL, retmap);
373} 360}
374 361
375static inline int numaq_cpu_to_logical_apicid(int cpu)
376{
377 if (cpu >= nr_cpu_ids)
378 return BAD_APICID;
379 return cpu_2_logical_apicid[cpu];
380}
381
382/* 362/*
383 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent 363 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
384 * cpu to APIC ID relation to properly interact with the intelligent 364 * cpu to APIC ID relation to properly interact with the intelligent
@@ -397,6 +377,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
397 return logical_apicid >> 4; 377 return logical_apicid >> 4;
398} 378}
399 379
380static int numaq_numa_cpu_node(int cpu)
381{
382 int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
383
384 if (logical_apicid != BAD_APICID)
385 return numaq_apicid_to_node(logical_apicid);
386 return NUMA_NO_NODE;
387}
388
400static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) 389static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
401{ 390{
402 int node = numaq_apicid_to_node(logical_apicid); 391 int node = numaq_apicid_to_node(logical_apicid);
@@ -483,8 +472,8 @@ static void numaq_setup_portio_remap(void)
483 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); 472 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
484} 473}
485 474
486/* Use __refdata to keep false positive warning calm. */ 475/* Use __refdata to keep false positive warning calm. */
487struct apic __refdata apic_numaq = { 476static struct apic __refdata apic_numaq = {
488 477
489 .name = "NUMAQ", 478 .name = "NUMAQ",
490 .probe = probe_numaq, 479 .probe = probe_numaq,
@@ -507,8 +496,6 @@ struct apic __refdata apic_numaq = {
507 .ioapic_phys_id_map = numaq_ioapic_phys_id_map, 496 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
508 .setup_apic_routing = numaq_setup_apic_routing, 497 .setup_apic_routing = numaq_setup_apic_routing,
509 .multi_timer_check = numaq_multi_timer_check, 498 .multi_timer_check = numaq_multi_timer_check,
510 .apicid_to_node = numaq_apicid_to_node,
511 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
512 .cpu_present_to_apicid = numaq_cpu_present_to_apicid, 499 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
513 .apicid_to_cpu_present = numaq_apicid_to_cpu_present, 500 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
514 .setup_portio_remap = numaq_setup_portio_remap, 501 .setup_portio_remap = numaq_setup_portio_remap,
@@ -546,4 +533,9 @@ struct apic __refdata apic_numaq = {
546 .icr_write = native_apic_icr_write, 533 .icr_write = native_apic_icr_write,
547 .wait_icr_idle = native_apic_wait_icr_idle, 534 .wait_icr_idle = native_apic_wait_icr_idle,
548 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 535 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
536
537 .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
538 .x86_32_numa_cpu_node = numaq_numa_cpu_node,
549}; 539};
540
541apic_driver(apic_numaq);
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..b5254ad044ab 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -52,29 +52,9 @@ static int __init print_ipi_mode(void)
52} 52}
53late_initcall(print_ipi_mode); 53late_initcall(print_ipi_mode);
54 54
55void __init default_setup_apic_routing(void) 55static int default_x86_32_early_logical_apicid(int cpu)
56{ 56{
57 int version = apic_version[boot_cpu_physical_apicid]; 57 return 1 << cpu;
58
59 if (num_possible_cpus() > 8) {
60 switch (boot_cpu_data.x86_vendor) {
61 case X86_VENDOR_INTEL:
62 if (!APIC_XAPIC(version)) {
63 def_to_bigsmp = 0;
64 break;
65 }
66 /* If P4 and above fall through */
67 case X86_VENDOR_AMD:
68 def_to_bigsmp = 1;
69 }
70 }
71
72#ifdef CONFIG_X86_BIGSMP
73 generic_bigsmp_probe();
74#endif
75
76 if (apic->setup_apic_routing)
77 apic->setup_apic_routing();
78} 58}
79 59
80static void setup_apic_flat_routing(void) 60static void setup_apic_flat_routing(void)
@@ -107,7 +87,7 @@ static int probe_default(void)
107 return 1; 87 return 1;
108} 88}
109 89
110struct apic apic_default = { 90static struct apic apic_default = {
111 91
112 .name = "default", 92 .name = "default",
113 .probe = probe_default, 93 .probe = probe_default,
@@ -130,8 +110,6 @@ struct apic apic_default = {
130 .ioapic_phys_id_map = default_ioapic_phys_id_map, 110 .ioapic_phys_id_map = default_ioapic_phys_id_map,
131 .setup_apic_routing = setup_apic_flat_routing, 111 .setup_apic_routing = setup_apic_flat_routing,
132 .multi_timer_check = NULL, 112 .multi_timer_check = NULL,
133 .apicid_to_node = default_apicid_to_node,
134 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
135 .cpu_present_to_apicid = default_cpu_present_to_apicid, 113 .cpu_present_to_apicid = default_cpu_present_to_apicid,
136 .apicid_to_cpu_present = physid_set_mask_of_physid, 114 .apicid_to_cpu_present = physid_set_mask_of_physid,
137 .setup_portio_remap = NULL, 115 .setup_portio_remap = NULL,
@@ -167,46 +145,26 @@ struct apic apic_default = {
167 .icr_write = native_apic_icr_write, 145 .icr_write = native_apic_icr_write,
168 .wait_icr_idle = native_apic_wait_icr_idle, 146 .wait_icr_idle = native_apic_wait_icr_idle,
169 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 147 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
148
149 .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
170}; 150};
171 151
172extern struct apic apic_numaq; 152apic_driver(apic_default);
173extern struct apic apic_summit;
174extern struct apic apic_bigsmp;
175extern struct apic apic_es7000;
176extern struct apic apic_es7000_cluster;
177 153
178struct apic *apic = &apic_default; 154struct apic *apic = &apic_default;
179EXPORT_SYMBOL_GPL(apic); 155EXPORT_SYMBOL_GPL(apic);
180 156
181static struct apic *apic_probe[] __initdata = {
182#ifdef CONFIG_X86_NUMAQ
183 &apic_numaq,
184#endif
185#ifdef CONFIG_X86_SUMMIT
186 &apic_summit,
187#endif
188#ifdef CONFIG_X86_BIGSMP
189 &apic_bigsmp,
190#endif
191#ifdef CONFIG_X86_ES7000
192 &apic_es7000,
193 &apic_es7000_cluster,
194#endif
195 &apic_default, /* must be last */
196 NULL,
197};
198
199static int cmdline_apic __initdata; 157static int cmdline_apic __initdata;
200static int __init parse_apic(char *arg) 158static int __init parse_apic(char *arg)
201{ 159{
202 int i; 160 struct apic **drv;
203 161
204 if (!arg) 162 if (!arg)
205 return -EINVAL; 163 return -EINVAL;
206 164
207 for (i = 0; apic_probe[i]; i++) { 165 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
208 if (!strcmp(apic_probe[i]->name, arg)) { 166 if (!strcmp((*drv)->name, arg)) {
209 apic = apic_probe[i]; 167 apic = *drv;
210 cmdline_apic = 1; 168 cmdline_apic = 1;
211 return 0; 169 return 0;
212 } 170 }
@@ -217,38 +175,58 @@ static int __init parse_apic(char *arg)
217} 175}
218early_param("apic", parse_apic); 176early_param("apic", parse_apic);
219 177
220void __init generic_bigsmp_probe(void) 178void __init default_setup_apic_routing(void)
221{ 179{
180 int version = apic_version[boot_cpu_physical_apicid];
181
182 if (num_possible_cpus() > 8) {
183 switch (boot_cpu_data.x86_vendor) {
184 case X86_VENDOR_INTEL:
185 if (!APIC_XAPIC(version)) {
186 def_to_bigsmp = 0;
187 break;
188 }
189 /* If P4 and above fall through */
190 case X86_VENDOR_AMD:
191 def_to_bigsmp = 1;
192 }
193 }
194
222#ifdef CONFIG_X86_BIGSMP 195#ifdef CONFIG_X86_BIGSMP
223 /* 196 /*
224 * This routine is used to switch to bigsmp mode when 197 * This is used to switch to bigsmp mode when
225 * - There is no apic= option specified by the user 198 * - There is no apic= option specified by the user
226 * - generic_apic_probe() has chosen apic_default as the sub_arch 199 * - generic_apic_probe() has chosen apic_default as the sub_arch
227 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support 200 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
228 */ 201 */
229 202
230 if (!cmdline_apic && apic == &apic_default) { 203 if (!cmdline_apic && apic == &apic_default) {
231 if (apic_bigsmp.probe()) { 204 struct apic *bigsmp = generic_bigsmp_probe();
232 apic = &apic_bigsmp; 205 if (bigsmp) {
206 apic = bigsmp;
233 printk(KERN_INFO "Overriding APIC driver with %s\n", 207 printk(KERN_INFO "Overriding APIC driver with %s\n",
234 apic->name); 208 apic->name);
235 } 209 }
236 } 210 }
237#endif 211#endif
212
213 if (apic->setup_apic_routing)
214 apic->setup_apic_routing();
238} 215}
239 216
240void __init generic_apic_probe(void) 217void __init generic_apic_probe(void)
241{ 218{
242 if (!cmdline_apic) { 219 if (!cmdline_apic) {
243 int i; 220 struct apic **drv;
244 for (i = 0; apic_probe[i]; i++) { 221
245 if (apic_probe[i]->probe()) { 222 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
246 apic = apic_probe[i]; 223 if ((*drv)->probe()) {
224 apic = *drv;
247 break; 225 break;
248 } 226 }
249 } 227 }
250 /* Not visible without early console */ 228 /* Not visible without early console */
251 if (!apic_probe[i]) 229 if (drv == __apicdrivers_end)
252 panic("Didn't find an APIC driver"); 230 panic("Didn't find an APIC driver");
253 } 231 }
254 printk(KERN_INFO "Using APIC driver %s\n", apic->name); 232 printk(KERN_INFO "Using APIC driver %s\n", apic->name);
@@ -259,16 +237,16 @@ void __init generic_apic_probe(void)
259int __init 237int __init
260generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) 238generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
261{ 239{
262 int i; 240 struct apic **drv;
263 241
264 for (i = 0; apic_probe[i]; ++i) { 242 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
265 if (!apic_probe[i]->mps_oem_check) 243 if (!((*drv)->mps_oem_check))
266 continue; 244 continue;
267 if (!apic_probe[i]->mps_oem_check(mpc, oem, productid)) 245 if (!(*drv)->mps_oem_check(mpc, oem, productid))
268 continue; 246 continue;
269 247
270 if (!cmdline_apic) { 248 if (!cmdline_apic) {
271 apic = apic_probe[i]; 249 apic = *drv;
272 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 250 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
273 apic->name); 251 apic->name);
274 } 252 }
@@ -279,16 +257,16 @@ generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
279 257
280int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 258int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
281{ 259{
282 int i; 260 struct apic **drv;
283 261
284 for (i = 0; apic_probe[i]; ++i) { 262 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
285 if (!apic_probe[i]->acpi_madt_oem_check) 263 if (!(*drv)->acpi_madt_oem_check)
286 continue; 264 continue;
287 if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) 265 if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id))
288 continue; 266 continue;
289 267
290 if (!cmdline_apic) { 268 if (!cmdline_apic) {
291 apic = apic_probe[i]; 269 apic = *drv;
292 printk(KERN_INFO "Switched to APIC driver `%s'.\n", 270 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
293 apic->name); 271 apic->name);
294 } 272 }
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 83e9be4778e2..3fe986698929 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -23,27 +23,6 @@
23#include <asm/ipi.h> 23#include <asm/ipi.h>
24#include <asm/setup.h> 24#include <asm/setup.h>
25 25
26extern struct apic apic_flat;
27extern struct apic apic_physflat;
28extern struct apic apic_x2xpic_uv_x;
29extern struct apic apic_x2apic_phys;
30extern struct apic apic_x2apic_cluster;
31
32struct apic __read_mostly *apic = &apic_flat;
33EXPORT_SYMBOL_GPL(apic);
34
35static struct apic *apic_probe[] __initdata = {
36#ifdef CONFIG_X86_UV
37 &apic_x2apic_uv_x,
38#endif
39#ifdef CONFIG_X86_X2APIC
40 &apic_x2apic_phys,
41 &apic_x2apic_cluster,
42#endif
43 &apic_physflat,
44 NULL,
45};
46
47static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) 26static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
48{ 27{
49 return hard_smp_processor_id() >> index_msb; 28 return hard_smp_processor_id() >> index_msb;
@@ -54,35 +33,25 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
54 */ 33 */
55void __init default_setup_apic_routing(void) 34void __init default_setup_apic_routing(void)
56{ 35{
57#ifdef CONFIG_X86_X2APIC 36 struct apic **drv;
58 if (x2apic_mode
59#ifdef CONFIG_X86_UV
60 && apic != &apic_x2apic_uv_x
61#endif
62 ) {
63 if (x2apic_phys)
64 apic = &apic_x2apic_phys;
65 else
66 apic = &apic_x2apic_cluster;
67 }
68#endif
69 37
70 if (apic == &apic_flat && num_possible_cpus() > 8) 38 enable_IR_x2apic();
71 apic = &apic_physflat;
72 39
73 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); 40 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
41 if ((*drv)->probe && (*drv)->probe()) {
42 if (apic != *drv) {
43 apic = *drv;
44 pr_info("Switched APIC routing to %s.\n",
45 apic->name);
46 }
47 break;
48 }
49 }
74 50
75 if (is_vsmp_box()) { 51 if (is_vsmp_box()) {
76 /* need to update phys_pkg_id */ 52 /* need to update phys_pkg_id */
77 apic->phys_pkg_id = apicid_phys_pkg_id; 53 apic->phys_pkg_id = apicid_phys_pkg_id;
78 } 54 }
79
80 /*
81 * Now that apic routing model is selected, configure the
82 * fault handling for intr remapping.
83 */
84 if (intr_remapping_enabled)
85 enable_drhd_fault_handling();
86} 55}
87 56
88/* Same for both flat and physical. */ 57/* Same for both flat and physical. */
@@ -94,13 +63,15 @@ void apic_send_IPI_self(int vector)
94 63
95int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 64int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
96{ 65{
97 int i; 66 struct apic **drv;
98 67
99 for (i = 0; apic_probe[i]; ++i) { 68 for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
100 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 69 if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) {
101 apic = apic_probe[i]; 70 if (apic != *drv) {
102 printk(KERN_INFO "Setting APIC routing to %s.\n", 71 apic = *drv;
103 apic->name); 72 pr_info("Setting APIC routing to %s.\n",
73 apic->name);
74 }
104 return 1; 75 return 1;
105 } 76 }
106 } 77 }
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..19114423c58c 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
194 return 1; 194 return 1;
195} 195}
196 196
197static void summit_init_apic_ldr(void) 197static int summit_early_logical_apicid(int cpu)
198{ 198{
199 unsigned long val, id;
200 int count = 0; 199 int count = 0;
201 u8 my_id = (u8)hard_smp_processor_id(); 200 u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
202 u8 my_cluster = APIC_CLUSTER(my_id); 201 u8 my_cluster = APIC_CLUSTER(my_id);
203#ifdef CONFIG_SMP 202#ifdef CONFIG_SMP
204 u8 lid; 203 u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
206 205
207 /* Create logical APIC IDs by counting CPUs already in cluster. */ 206 /* Create logical APIC IDs by counting CPUs already in cluster. */
208 for (count = 0, i = nr_cpu_ids; --i >= 0; ) { 207 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
209 lid = cpu_2_logical_apicid[i]; 208 lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
210 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) 209 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
211 ++count; 210 ++count;
212 } 211 }
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
214 /* We only have a 4 wide bitmap in cluster mode. If a deranged 213 /* We only have a 4 wide bitmap in cluster mode. If a deranged
215 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ 214 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
216 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); 215 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
217 id = my_cluster | (1UL << count); 216 return my_cluster | (1UL << count);
217}
218
219static void summit_init_apic_ldr(void)
220{
221 int cpu = smp_processor_id();
222 unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
223 unsigned long val;
224
218 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); 225 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
219 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; 226 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
220 val |= SET_APIC_LOGICAL_ID(id); 227 val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
232 nr_ioapics); 239 nr_ioapics);
233} 240}
234 241
235static int summit_apicid_to_node(int logical_apicid)
236{
237#ifdef CONFIG_SMP
238 return apicid_2_node[hard_smp_processor_id()];
239#else
240 return 0;
241#endif
242}
243
244/* Mapping from cpu number to logical apicid */
245static inline int summit_cpu_to_logical_apicid(int cpu)
246{
247#ifdef CONFIG_SMP
248 if (cpu >= nr_cpu_ids)
249 return BAD_APICID;
250 return cpu_2_logical_apicid[cpu];
251#else
252 return logical_smp_processor_id();
253#endif
254}
255
256static int summit_cpu_present_to_apicid(int mps_cpu) 242static int summit_cpu_present_to_apicid(int mps_cpu)
257{ 243{
258 if (mps_cpu < nr_cpu_ids) 244 if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
286 * The cpus in the mask must all be on the apic cluster. 272 * The cpus in the mask must all be on the apic cluster.
287 */ 273 */
288 for_each_cpu(cpu, cpumask) { 274 for_each_cpu(cpu, cpumask) {
289 int new_apicid = summit_cpu_to_logical_apicid(cpu); 275 int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
290 276
291 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { 277 if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
292 printk("%s: Not a valid mask!\n", __func__); 278 printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
301static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, 287static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
302 const struct cpumask *andmask) 288 const struct cpumask *andmask)
303{ 289{
304 int apicid = summit_cpu_to_logical_apicid(0); 290 int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
305 cpumask_var_t cpumask; 291 cpumask_var_t cpumask;
306 292
307 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 293 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -505,7 +491,7 @@ void setup_summit(void)
505} 491}
506#endif 492#endif
507 493
508struct apic apic_summit = { 494static struct apic apic_summit = {
509 495
510 .name = "summit", 496 .name = "summit",
511 .probe = probe_summit, 497 .probe = probe_summit,
@@ -528,8 +514,6 @@ struct apic apic_summit = {
528 .ioapic_phys_id_map = summit_ioapic_phys_id_map, 514 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
529 .setup_apic_routing = summit_setup_apic_routing, 515 .setup_apic_routing = summit_setup_apic_routing,
530 .multi_timer_check = NULL, 516 .multi_timer_check = NULL,
531 .apicid_to_node = summit_apicid_to_node,
532 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
533 .cpu_present_to_apicid = summit_cpu_present_to_apicid, 517 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
534 .apicid_to_cpu_present = summit_apicid_to_cpu_present, 518 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
535 .setup_portio_remap = NULL, 519 .setup_portio_remap = NULL,
@@ -565,4 +549,8 @@ struct apic apic_summit = {
565 .icr_write = native_apic_icr_write, 549 .icr_write = native_apic_icr_write,
566 .wait_icr_idle = native_apic_wait_icr_idle, 550 .wait_icr_idle = native_apic_wait_icr_idle,
567 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, 551 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
552
553 .x86_32_early_logical_apicid = summit_early_logical_apicid,
568}; 554};
555
556apic_driver(apic_summit);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..500795875827 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -5,118 +5,95 @@
5#include <linux/ctype.h> 5#include <linux/ctype.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/dmar.h> 7#include <linux/dmar.h>
8#include <linux/cpu.h>
8 9
9#include <asm/smp.h> 10#include <asm/smp.h>
10#include <asm/apic.h> 11#include <asm/x2apic.h>
11#include <asm/ipi.h>
12 12
13static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); 13static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
15static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
14 16
15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 17static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{ 18{
17 return x2apic_enabled(); 19 return x2apic_enabled();
18} 20}
19 21
20/* 22static inline u32 x2apic_cluster(int cpu)
21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
24static const struct cpumask *x2apic_target_cpus(void)
25{ 23{
26 return cpu_online_mask; 24 return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
27}
28
29/*
30 * for now each logical cpu is in its own vector allocation domain.
31 */
32static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
33{
34 cpumask_clear(retmask);
35 cpumask_set_cpu(cpu, retmask);
36} 25}
37 26
38static void 27static void
39 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest) 28__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
40{ 29{
41 unsigned long cfg; 30 struct cpumask *cpus_in_cluster_ptr;
31 struct cpumask *ipi_mask_ptr;
32 unsigned int cpu, this_cpu;
33 unsigned long flags;
34 u32 dest;
35
36 x2apic_wrmsr_fence();
37
38 local_irq_save(flags);
42 39
43 cfg = __prepare_ICR(0, vector, dest); 40 this_cpu = smp_processor_id();
44 41
45 /* 42 /*
46 * send the IPI. 43 * We are to modify mask, so we need an own copy
44 * and be sure it's manipulated with irq off.
47 */ 45 */
48 native_x2apic_icr_write(cfg, apicid); 46 ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
49} 47 cpumask_copy(ipi_mask_ptr, mask);
50 48
51/* 49 /*
52 * for now, we send the IPI's one by one in the cpumask. 50 * The idea is to send one IPI per cluster.
53 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group 51 */
54 * at once. We have 16 cpu's in a cluster. This will minimize IPI register 52 for_each_cpu(cpu, ipi_mask_ptr) {
55 * writes. 53 unsigned long i;
56 */
57static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
58{
59 unsigned long query_cpu;
60 unsigned long flags;
61 54
62 x2apic_wrmsr_fence(); 55 cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
56 dest = 0;
63 57
64 local_irq_save(flags); 58 /* Collect cpus in cluster. */
65 for_each_cpu(query_cpu, mask) { 59 for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
66 __x2apic_send_IPI_dest( 60 if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
67 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 61 dest |= per_cpu(x86_cpu_to_logical_apicid, i);
68 vector, apic->dest_logical); 62 }
63
64 if (!dest)
65 continue;
66
67 __x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
68 /*
69 * Cluster sibling cpus should be discared now so
70 * we would not send IPI them second time.
71 */
72 cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
69 } 73 }
74
70 local_irq_restore(flags); 75 local_irq_restore(flags);
71} 76}
72 77
78static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
79{
80 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
81}
82
73static void 83static void
74 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 84 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
75{ 85{
76 unsigned long this_cpu = smp_processor_id(); 86 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
77 unsigned long query_cpu;
78 unsigned long flags;
79
80 x2apic_wrmsr_fence();
81
82 local_irq_save(flags);
83 for_each_cpu(query_cpu, mask) {
84 if (query_cpu == this_cpu)
85 continue;
86 __x2apic_send_IPI_dest(
87 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
88 vector, apic->dest_logical);
89 }
90 local_irq_restore(flags);
91} 87}
92 88
93static void x2apic_send_IPI_allbutself(int vector) 89static void x2apic_send_IPI_allbutself(int vector)
94{ 90{
95 unsigned long this_cpu = smp_processor_id(); 91 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
96 unsigned long query_cpu;
97 unsigned long flags;
98
99 x2apic_wrmsr_fence();
100
101 local_irq_save(flags);
102 for_each_online_cpu(query_cpu) {
103 if (query_cpu == this_cpu)
104 continue;
105 __x2apic_send_IPI_dest(
106 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
107 vector, apic->dest_logical);
108 }
109 local_irq_restore(flags);
110} 92}
111 93
112static void x2apic_send_IPI_all(int vector) 94static void x2apic_send_IPI_all(int vector)
113{ 95{
114 x2apic_send_IPI_mask(cpu_online_mask, vector); 96 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
115}
116
117static int x2apic_apic_id_registered(void)
118{
119 return 1;
120} 97}
121 98
122static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 99static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
@@ -151,43 +128,90 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
151 return per_cpu(x86_cpu_to_logical_apicid, cpu); 128 return per_cpu(x86_cpu_to_logical_apicid, cpu);
152} 129}
153 130
154static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) 131static void init_x2apic_ldr(void)
155{ 132{
156 unsigned int id; 133 unsigned int this_cpu = smp_processor_id();
134 unsigned int cpu;
157 135
158 id = x; 136 per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
159 return id; 137
138 __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
139 for_each_online_cpu(cpu) {
140 if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
141 continue;
142 __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
143 __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
144 }
160} 145}
161 146
162static unsigned long set_apic_id(unsigned int id) 147 /*
148 * At CPU state changes, update the x2apic cluster sibling info.
149 */
150static int __cpuinit
151update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
163{ 152{
164 unsigned long x; 153 unsigned int this_cpu = (unsigned long)hcpu;
154 unsigned int cpu;
155 int err = 0;
156
157 switch (action) {
158 case CPU_UP_PREPARE:
159 if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
160 GFP_KERNEL)) {
161 err = -ENOMEM;
162 } else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
163 GFP_KERNEL)) {
164 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
165 err = -ENOMEM;
166 }
167 break;
168 case CPU_UP_CANCELED:
169 case CPU_UP_CANCELED_FROZEN:
170 case CPU_DEAD:
171 for_each_online_cpu(cpu) {
172 if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
173 continue;
174 __cpu_clear(this_cpu, per_cpu(cpus_in_cluster, cpu));
175 __cpu_clear(cpu, per_cpu(cpus_in_cluster, this_cpu));
176 }
177 free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
178 free_cpumask_var(per_cpu(ipi_mask, this_cpu));
179 break;
180 }
165 181
166 x = id; 182 return notifier_from_errno(err);
167 return x;
168} 183}
169 184
170static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) 185static struct notifier_block __refdata x2apic_cpu_notifier = {
171{ 186 .notifier_call = update_clusterinfo,
172 return initial_apicid >> index_msb; 187};
173}
174 188
175static void x2apic_send_IPI_self(int vector) 189static int x2apic_init_cpu_notifier(void)
176{ 190{
177 apic_write(APIC_SELF_IPI, vector); 191 int cpu = smp_processor_id();
192
193 zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
194 zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
195
196 BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
197
198 __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
199 register_hotcpu_notifier(&x2apic_cpu_notifier);
200 return 1;
178} 201}
179 202
180static void init_x2apic_ldr(void) 203static int x2apic_cluster_probe(void)
181{ 204{
182 int cpu = smp_processor_id(); 205 if (x2apic_mode)
183 206 return x2apic_init_cpu_notifier();
184 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); 207 else
208 return 0;
185} 209}
186 210
187struct apic apic_x2apic_cluster = { 211static struct apic apic_x2apic_cluster = {
188 212
189 .name = "cluster x2apic", 213 .name = "cluster x2apic",
190 .probe = NULL, 214 .probe = x2apic_cluster_probe,
191 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 215 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
192 .apic_id_registered = x2apic_apic_id_registered, 216 .apic_id_registered = x2apic_apic_id_registered,
193 217
@@ -206,18 +230,16 @@ struct apic apic_x2apic_cluster = {
206 .ioapic_phys_id_map = NULL, 230 .ioapic_phys_id_map = NULL,
207 .setup_apic_routing = NULL, 231 .setup_apic_routing = NULL,
208 .multi_timer_check = NULL, 232 .multi_timer_check = NULL,
209 .apicid_to_node = NULL,
210 .cpu_to_logical_apicid = NULL,
211 .cpu_present_to_apicid = default_cpu_present_to_apicid, 233 .cpu_present_to_apicid = default_cpu_present_to_apicid,
212 .apicid_to_cpu_present = NULL, 234 .apicid_to_cpu_present = NULL,
213 .setup_portio_remap = NULL, 235 .setup_portio_remap = NULL,
214 .check_phys_apicid_present = default_check_phys_apicid_present, 236 .check_phys_apicid_present = default_check_phys_apicid_present,
215 .enable_apic_mode = NULL, 237 .enable_apic_mode = NULL,
216 .phys_pkg_id = x2apic_cluster_phys_pkg_id, 238 .phys_pkg_id = x2apic_phys_pkg_id,
217 .mps_oem_check = NULL, 239 .mps_oem_check = NULL,
218 240
219 .get_apic_id = x2apic_cluster_phys_get_apic_id, 241 .get_apic_id = x2apic_get_apic_id,
220 .set_apic_id = set_apic_id, 242 .set_apic_id = x2apic_set_apic_id,
221 .apic_id_mask = 0xFFFFFFFFu, 243 .apic_id_mask = 0xFFFFFFFFu,
222 244
223 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
@@ -242,3 +264,5 @@ struct apic apic_x2apic_cluster = {
242 .wait_icr_idle = native_x2apic_wait_icr_idle, 264 .wait_icr_idle = native_x2apic_wait_icr_idle,
243 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, 265 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
244}; 266};
267
268apic_driver(apic_x2apic_cluster);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..f5373dfde21e 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -7,11 +7,12 @@
7#include <linux/dmar.h> 7#include <linux/dmar.h>
8 8
9#include <asm/smp.h> 9#include <asm/smp.h>
10#include <asm/apic.h> 10#include <asm/x2apic.h>
11#include <asm/ipi.h>
12 11
13int x2apic_phys; 12int x2apic_phys;
14 13
14static struct apic apic_x2apic_phys;
15
15static int set_x2apic_phys_mode(char *arg) 16static int set_x2apic_phys_mode(char *arg)
16{ 17{
17 x2apic_phys = 1; 18 x2apic_phys = 1;
@@ -27,94 +28,46 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 28 return 0;
28} 29}
29 30
30/* 31static void
31 * need to use more than cpu 0, because we need more vectors when 32__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
32 * MSI-X are used.
33 */
34static const struct cpumask *x2apic_target_cpus(void)
35{
36 return cpu_online_mask;
37}
38
39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
40{
41 cpumask_clear(retmask);
42 cpumask_set_cpu(cpu, retmask);
43}
44
45static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
46 unsigned int dest)
47{
48 unsigned long cfg;
49
50 cfg = __prepare_ICR(0, vector, dest);
51
52 /*
53 * send the IPI.
54 */
55 native_x2apic_icr_write(cfg, apicid);
56}
57
58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
59{ 33{
60 unsigned long query_cpu; 34 unsigned long query_cpu;
35 unsigned long this_cpu;
61 unsigned long flags; 36 unsigned long flags;
62 37
63 x2apic_wrmsr_fence(); 38 x2apic_wrmsr_fence();
64 39
65 local_irq_save(flags); 40 local_irq_save(flags);
41
42 this_cpu = smp_processor_id();
66 for_each_cpu(query_cpu, mask) { 43 for_each_cpu(query_cpu, mask) {
44 if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu)
45 continue;
67 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), 46 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
68 vector, APIC_DEST_PHYSICAL); 47 vector, APIC_DEST_PHYSICAL);
69 } 48 }
70 local_irq_restore(flags); 49 local_irq_restore(flags);
71} 50}
72 51
52static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
53{
54 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
55}
56
73static void 57static void
74 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 58 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
75{ 59{
76 unsigned long this_cpu = smp_processor_id(); 60 __x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
77 unsigned long query_cpu;
78 unsigned long flags;
79
80 x2apic_wrmsr_fence();
81
82 local_irq_save(flags);
83 for_each_cpu(query_cpu, mask) {
84 if (query_cpu != this_cpu)
85 __x2apic_send_IPI_dest(
86 per_cpu(x86_cpu_to_apicid, query_cpu),
87 vector, APIC_DEST_PHYSICAL);
88 }
89 local_irq_restore(flags);
90} 61}
91 62
92static void x2apic_send_IPI_allbutself(int vector) 63static void x2apic_send_IPI_allbutself(int vector)
93{ 64{
94 unsigned long this_cpu = smp_processor_id(); 65 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
95 unsigned long query_cpu;
96 unsigned long flags;
97
98 x2apic_wrmsr_fence();
99
100 local_irq_save(flags);
101 for_each_online_cpu(query_cpu) {
102 if (query_cpu == this_cpu)
103 continue;
104 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
105 vector, APIC_DEST_PHYSICAL);
106 }
107 local_irq_restore(flags);
108} 66}
109 67
110static void x2apic_send_IPI_all(int vector) 68static void x2apic_send_IPI_all(int vector)
111{ 69{
112 x2apic_send_IPI_mask(cpu_online_mask, vector); 70 __x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
113}
114
115static int x2apic_apic_id_registered(void)
116{
117 return 1;
118} 71}
119 72
120static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 73static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
@@ -149,34 +102,22 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
149 return per_cpu(x86_cpu_to_apicid, cpu); 102 return per_cpu(x86_cpu_to_apicid, cpu);
150} 103}
151 104
152static unsigned int x2apic_phys_get_apic_id(unsigned long x) 105static void init_x2apic_ldr(void)
153{
154 return x;
155}
156
157static unsigned long set_apic_id(unsigned int id)
158{
159 return id;
160}
161
162static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
163{ 106{
164 return initial_apicid >> index_msb;
165} 107}
166 108
167static void x2apic_send_IPI_self(int vector) 109static int x2apic_phys_probe(void)
168{ 110{
169 apic_write(APIC_SELF_IPI, vector); 111 if (x2apic_mode && x2apic_phys)
170} 112 return 1;
171 113
172static void init_x2apic_ldr(void) 114 return apic == &apic_x2apic_phys;
173{
174} 115}
175 116
176struct apic apic_x2apic_phys = { 117static struct apic apic_x2apic_phys = {
177 118
178 .name = "physical x2apic", 119 .name = "physical x2apic",
179 .probe = NULL, 120 .probe = x2apic_phys_probe,
180 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 121 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
181 .apic_id_registered = x2apic_apic_id_registered, 122 .apic_id_registered = x2apic_apic_id_registered,
182 123
@@ -195,8 +136,6 @@ struct apic apic_x2apic_phys = {
195 .ioapic_phys_id_map = NULL, 136 .ioapic_phys_id_map = NULL,
196 .setup_apic_routing = NULL, 137 .setup_apic_routing = NULL,
197 .multi_timer_check = NULL, 138 .multi_timer_check = NULL,
198 .apicid_to_node = NULL,
199 .cpu_to_logical_apicid = NULL,
200 .cpu_present_to_apicid = default_cpu_present_to_apicid, 139 .cpu_present_to_apicid = default_cpu_present_to_apicid,
201 .apicid_to_cpu_present = NULL, 140 .apicid_to_cpu_present = NULL,
202 .setup_portio_remap = NULL, 141 .setup_portio_remap = NULL,
@@ -205,8 +144,8 @@ struct apic apic_x2apic_phys = {
205 .phys_pkg_id = x2apic_phys_pkg_id, 144 .phys_pkg_id = x2apic_phys_pkg_id,
206 .mps_oem_check = NULL, 145 .mps_oem_check = NULL,
207 146
208 .get_apic_id = x2apic_phys_get_apic_id, 147 .get_apic_id = x2apic_get_apic_id,
209 .set_apic_id = set_apic_id, 148 .set_apic_id = x2apic_set_apic_id,
210 .apic_id_mask = 0xFFFFFFFFu, 149 .apic_id_mask = 0xFFFFFFFFu,
211 150
212 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 151 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
@@ -231,3 +170,5 @@ struct apic apic_x2apic_phys = {
231 .wait_icr_idle = native_x2apic_wait_icr_idle, 170 .wait_icr_idle = native_x2apic_wait_icr_idle,
232 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle, 171 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
233}; 172};
173
174apic_driver(apic_x2apic_phys);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54cb248..adc66c3a1fef 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * SGI UV APIC functions (note: not an Intel compatible APIC) 6 * SGI UV APIC functions (note: not an Intel compatible APIC)
7 * 7 *
8 * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10#include <linux/cpumask.h> 10#include <linux/cpumask.h>
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -23,6 +23,8 @@
23#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/pci.h> 24#include <linux/pci.h>
25#include <linux/kdebug.h> 25#include <linux/kdebug.h>
26#include <linux/delay.h>
27#include <linux/crash_dump.h>
26 28
27#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
28#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
@@ -34,6 +36,14 @@
34#include <asm/ipi.h> 36#include <asm/ipi.h>
35#include <asm/smp.h> 37#include <asm/smp.h>
36#include <asm/x86_init.h> 38#include <asm/x86_init.h>
39#include <asm/emergency-restart.h>
40#include <asm/nmi.h>
41
42/* BMC sets a bit this MMR non-zero before sending an NMI */
43#define UVH_NMI_MMR UVH_SCRATCH5
44#define UVH_NMI_MMR_CLEAR (UVH_NMI_MMR + 8)
45#define UV_NMI_PENDING_MASK (1UL << 63)
46DEFINE_PER_CPU(unsigned long, cpu_last_nmi_count);
37 47
38DEFINE_PER_CPU(int, x2apic_extra_bits); 48DEFINE_PER_CPU(int, x2apic_extra_bits);
39 49
@@ -41,10 +51,25 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
41 51
42static enum uv_system_type uv_system_type; 52static enum uv_system_type uv_system_type;
43static u64 gru_start_paddr, gru_end_paddr; 53static u64 gru_start_paddr, gru_end_paddr;
54static union uvh_apicid uvh_apicid;
44int uv_min_hub_revision_id; 55int uv_min_hub_revision_id;
45EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); 56EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
57unsigned int uv_apicid_hibits;
58EXPORT_SYMBOL_GPL(uv_apicid_hibits);
46static DEFINE_SPINLOCK(uv_nmi_lock); 59static DEFINE_SPINLOCK(uv_nmi_lock);
47 60
61static struct apic apic_x2apic_uv_x;
62
63static unsigned long __init uv_early_read_mmr(unsigned long addr)
64{
65 unsigned long val, *mmr;
66
67 mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
68 val = *mmr;
69 early_iounmap(mmr, sizeof(*mmr));
70 return val;
71}
72
48static inline bool is_GRU_range(u64 start, u64 end) 73static inline bool is_GRU_range(u64 start, u64 end)
49{ 74{
50 return start >= gru_start_paddr && end <= gru_end_paddr; 75 return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -55,27 +80,63 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
55 return is_ISA_range(start, end) || is_GRU_range(start, end); 80 return is_ISA_range(start, end) || is_GRU_range(start, end);
56} 81}
57 82
58static int early_get_nodeid(void) 83static int __init early_get_pnodeid(void)
59{ 84{
60 union uvh_node_id_u node_id; 85 union uvh_node_id_u node_id;
61 unsigned long *mmr; 86 union uvh_rh_gam_config_mmr_u m_n_config;
62 87 int pnode;
63 mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
64 node_id.v = *mmr;
65 early_iounmap(mmr, sizeof(*mmr));
66 88
67 /* Currently, all blades have same revision number */ 89 /* Currently, all blades have same revision number */
90 node_id.v = uv_early_read_mmr(UVH_NODE_ID);
91 m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
68 uv_min_hub_revision_id = node_id.s.revision; 92 uv_min_hub_revision_id = node_id.s.revision;
69 93
70 return node_id.s.node_id; 94 if (node_id.s.part_number == UV2_HUB_PART_NUMBER)
95 uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
96
97 uv_hub_info->hub_revision = uv_min_hub_revision_id;
98 pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
99 return pnode;
100}
101
102static void __init early_get_apic_pnode_shift(void)
103{
104 uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
105 if (!uvh_apicid.v)
106 /*
107 * Old bios, use default value
108 */
109 uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
110}
111
112/*
113 * Add an extra bit as dictated by bios to the destination apicid of
114 * interrupts potentially passing through the UV HUB. This prevents
115 * a deadlock between interrupts and IO port operations.
116 */
117static void __init uv_set_apicid_hibit(void)
118{
119 union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
120
121 if (is_uv1_hub()) {
122 apicid_mask.v =
123 uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
124 uv_apicid_hibits =
125 apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
126 }
71} 127}
72 128
73static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 129static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
74{ 130{
75 int nodeid; 131 int pnodeid, is_uv1, is_uv2;
76 132
77 if (!strcmp(oem_id, "SGI")) { 133 is_uv1 = !strcmp(oem_id, "SGI");
78 nodeid = early_get_nodeid(); 134 is_uv2 = !strcmp(oem_id, "SGI2");
135 if (is_uv1 || is_uv2) {
136 uv_hub_info->hub_revision =
137 is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE;
138 pnodeid = early_get_pnodeid();
139 early_get_apic_pnode_shift();
79 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; 140 x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
80 x86_platform.nmi_init = uv_nmi_init; 141 x86_platform.nmi_init = uv_nmi_init;
81 if (!strcmp(oem_table_id, "UVL")) 142 if (!strcmp(oem_table_id, "UVL"))
@@ -83,9 +144,10 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
83 else if (!strcmp(oem_table_id, "UVX")) 144 else if (!strcmp(oem_table_id, "UVX"))
84 uv_system_type = UV_X2APIC; 145 uv_system_type = UV_X2APIC;
85 else if (!strcmp(oem_table_id, "UVH")) { 146 else if (!strcmp(oem_table_id, "UVH")) {
86 __get_cpu_var(x2apic_extra_bits) = 147 __this_cpu_write(x2apic_extra_bits,
87 nodeid << (UV_APIC_PNODE_SHIFT - 1); 148 pnodeid << uvh_apicid.s.pnode_shift);
88 uv_system_type = UV_NON_UNIQUE_APIC; 149 uv_system_type = UV_NON_UNIQUE_APIC;
150 uv_set_apicid_hibit();
89 return 1; 151 return 1;
90 } 152 }
91 } 153 }
@@ -139,6 +201,7 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
139 int pnode; 201 int pnode;
140 202
141 pnode = uv_apicid_to_pnode(phys_apicid); 203 pnode = uv_apicid_to_pnode(phys_apicid);
204 phys_apicid |= uv_apicid_hibits;
142 val = (1UL << UVH_IPI_INT_SEND_SHFT) | 205 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
143 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | 206 (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
144 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | 207 ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
@@ -220,7 +283,7 @@ static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
220 int cpu = cpumask_first(cpumask); 283 int cpu = cpumask_first(cpumask);
221 284
222 if ((unsigned)cpu < nr_cpu_ids) 285 if ((unsigned)cpu < nr_cpu_ids)
223 return per_cpu(x86_cpu_to_apicid, cpu); 286 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
224 else 287 else
225 return BAD_APICID; 288 return BAD_APICID;
226} 289}
@@ -239,7 +302,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
239 if (cpumask_test_cpu(cpu, cpu_online_mask)) 302 if (cpumask_test_cpu(cpu, cpu_online_mask))
240 break; 303 break;
241 } 304 }
242 return per_cpu(x86_cpu_to_apicid, cpu); 305 return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
243} 306}
244 307
245static unsigned int x2apic_get_apic_id(unsigned long x) 308static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -247,7 +310,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
247 unsigned int id; 310 unsigned int id;
248 311
249 WARN_ON(preemptible() && num_online_cpus() > 1); 312 WARN_ON(preemptible() && num_online_cpus() > 1);
250 id = x | __get_cpu_var(x2apic_extra_bits); 313 id = x | __this_cpu_read(x2apic_extra_bits);
251 314
252 return id; 315 return id;
253} 316}
@@ -277,10 +340,15 @@ static void uv_send_IPI_self(int vector)
277 apic_write(APIC_SELF_IPI, vector); 340 apic_write(APIC_SELF_IPI, vector);
278} 341}
279 342
280struct apic __refdata apic_x2apic_uv_x = { 343static int uv_probe(void)
344{
345 return apic == &apic_x2apic_uv_x;
346}
347
348static struct apic __refdata apic_x2apic_uv_x = {
281 349
282 .name = "UV large system", 350 .name = "UV large system",
283 .probe = NULL, 351 .probe = uv_probe,
284 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 352 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
285 .apic_id_registered = uv_apic_id_registered, 353 .apic_id_registered = uv_apic_id_registered,
286 354
@@ -299,8 +367,6 @@ struct apic __refdata apic_x2apic_uv_x = {
299 .ioapic_phys_id_map = NULL, 367 .ioapic_phys_id_map = NULL,
300 .setup_apic_routing = NULL, 368 .setup_apic_routing = NULL,
301 .multi_timer_check = NULL, 369 .multi_timer_check = NULL,
302 .apicid_to_node = NULL,
303 .cpu_to_logical_apicid = NULL,
304 .cpu_present_to_apicid = default_cpu_present_to_apicid, 370 .cpu_present_to_apicid = default_cpu_present_to_apicid,
305 .apicid_to_cpu_present = NULL, 371 .apicid_to_cpu_present = NULL,
306 .setup_portio_remap = NULL, 372 .setup_portio_remap = NULL,
@@ -339,7 +405,7 @@ struct apic __refdata apic_x2apic_uv_x = {
339 405
340static __cpuinit void set_x2apic_extra_bits(int pnode) 406static __cpuinit void set_x2apic_extra_bits(int pnode)
341{ 407{
342 __get_cpu_var(x2apic_extra_bits) = (pnode << 6); 408 __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
343} 409}
344 410
345/* 411/*
@@ -363,14 +429,14 @@ struct redir_addr {
363#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 429#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
364 430
365static __initdata struct redir_addr redir_addrs[] = { 431static __initdata struct redir_addr redir_addrs[] = {
366 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, 432 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
367 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, 433 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
368 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, 434 {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
369}; 435};
370 436
371static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) 437static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
372{ 438{
373 union uvh_si_alias0_overlay_config_u alias; 439 union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
374 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; 440 union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
375 int i; 441 int i;
376 442
@@ -430,12 +496,19 @@ static __init void map_mmr_high(int max_pnode)
430static __init void map_mmioh_high(int max_pnode) 496static __init void map_mmioh_high(int max_pnode)
431{ 497{
432 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; 498 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
433 int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; 499 int shift;
434 500
435 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 501 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
436 if (mmioh.s.enable) 502 if (is_uv1_hub() && mmioh.s1.enable) {
437 map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, 503 shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
504 map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io,
438 max_pnode, map_uc); 505 max_pnode, map_uc);
506 }
507 if (is_uv2_hub() && mmioh.s2.enable) {
508 shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
509 map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io,
510 max_pnode, map_uc);
511 }
439} 512}
440 513
441static __init void map_low_mmrs(void) 514static __init void map_low_mmrs(void)
@@ -559,14 +632,14 @@ late_initcall(uv_init_heartbeat);
559 632
560/* Direct Legacy VGA I/O traffic to designated IOH */ 633/* Direct Legacy VGA I/O traffic to designated IOH */
561int uv_set_vga_state(struct pci_dev *pdev, bool decode, 634int uv_set_vga_state(struct pci_dev *pdev, bool decode,
562 unsigned int command_bits, bool change_bridge) 635 unsigned int command_bits, u32 flags)
563{ 636{
564 int domain, bus, rc; 637 int domain, bus, rc;
565 638
566 PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n", 639 PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
567 pdev->devfn, decode, command_bits, change_bridge); 640 pdev->devfn, decode, command_bits, flags);
568 641
569 if (!change_bridge) 642 if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
570 return 0; 643 return 0;
571 644
572 if ((command_bits & PCI_COMMAND_IO) == 0) 645 if ((command_bits & PCI_COMMAND_IO) == 0)
@@ -602,18 +675,46 @@ void __cpuinit uv_cpu_init(void)
602 */ 675 */
603int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 676int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
604{ 677{
605 if (reason != DIE_NMI_IPI) 678 unsigned long real_uv_nmi;
679 int bid;
680
681 if (reason != DIE_NMIUNKNOWN)
606 return NOTIFY_OK; 682 return NOTIFY_OK;
607 683
608 if (in_crash_kexec) 684 if (in_crash_kexec)
609 /* do nothing if entering the crash kernel */ 685 /* do nothing if entering the crash kernel */
610 return NOTIFY_OK; 686 return NOTIFY_OK;
687
688 /*
689 * Each blade has an MMR that indicates when an NMI has been sent
690 * to cpus on the blade. If an NMI is detected, atomically
691 * clear the MMR and update a per-blade NMI count used to
692 * cause each cpu on the blade to notice a new NMI.
693 */
694 bid = uv_numa_blade_id();
695 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
696
697 if (unlikely(real_uv_nmi)) {
698 spin_lock(&uv_blade_info[bid].nmi_lock);
699 real_uv_nmi = (uv_read_local_mmr(UVH_NMI_MMR) & UV_NMI_PENDING_MASK);
700 if (real_uv_nmi) {
701 uv_blade_info[bid].nmi_count++;
702 uv_write_local_mmr(UVH_NMI_MMR_CLEAR, UV_NMI_PENDING_MASK);
703 }
704 spin_unlock(&uv_blade_info[bid].nmi_lock);
705 }
706
707 if (likely(__get_cpu_var(cpu_last_nmi_count) == uv_blade_info[bid].nmi_count))
708 return NOTIFY_DONE;
709
710 __get_cpu_var(cpu_last_nmi_count) = uv_blade_info[bid].nmi_count;
711
611 /* 712 /*
612 * Use a lock so only one cpu prints at a time 713 * Use a lock so only one cpu prints at a time.
613 * to prevent intermixed output. 714 * This prevents intermixed output.
614 */ 715 */
615 spin_lock(&uv_nmi_lock); 716 spin_lock(&uv_nmi_lock);
616 pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); 717 pr_info("UV NMI stack dump cpu %u:\n", smp_processor_id());
617 dump_stack(); 718 dump_stack();
618 spin_unlock(&uv_nmi_lock); 719 spin_unlock(&uv_nmi_lock);
619 720
@@ -621,7 +722,8 @@ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
621} 722}
622 723
623static struct notifier_block uv_dump_stack_nmi_nb = { 724static struct notifier_block uv_dump_stack_nmi_nb = {
624 .notifier_call = uv_handle_nmi 725 .notifier_call = uv_handle_nmi,
726 .priority = NMI_LOCAL_LOW_PRIOR - 1,
625}; 727};
626 728
627void uv_register_nmi_notifier(void) 729void uv_register_nmi_notifier(void)
@@ -644,28 +746,34 @@ void uv_nmi_init(void)
644 746
645void __init uv_system_init(void) 747void __init uv_system_init(void)
646{ 748{
647 union uvh_si_addr_map_config_u m_n_config; 749 union uvh_rh_gam_config_mmr_u m_n_config;
750 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
648 union uvh_node_id_u node_id; 751 union uvh_node_id_u node_id;
649 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 752 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
650 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 753 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
651 int gnode_extra, max_pnode = 0; 754 int gnode_extra, max_pnode = 0;
652 unsigned long mmr_base, present, paddr; 755 unsigned long mmr_base, present, paddr;
653 unsigned short pnode_mask; 756 unsigned short pnode_mask, pnode_io_mask;
654 757
758 printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2");
655 map_low_mmrs(); 759 map_low_mmrs();
656 760
657 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 761 m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
658 m_val = m_n_config.s.m_skt; 762 m_val = m_n_config.s.m_skt;
659 n_val = m_n_config.s.n_skt; 763 n_val = m_n_config.s.n_skt;
764 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
765 n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io;
660 mmr_base = 766 mmr_base =
661 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & 767 uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
662 ~UV_MMR_ENABLE; 768 ~UV_MMR_ENABLE;
663 pnode_mask = (1 << n_val) - 1; 769 pnode_mask = (1 << n_val) - 1;
770 pnode_io_mask = (1 << n_io) - 1;
771
664 node_id.v = uv_read_local_mmr(UVH_NODE_ID); 772 node_id.v = uv_read_local_mmr(UVH_NODE_ID);
665 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; 773 gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
666 gnode_upper = ((unsigned long)gnode_extra << m_val); 774 gnode_upper = ((unsigned long)gnode_extra << m_val);
667 printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", 775 printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
668 n_val, m_val, gnode_upper, gnode_extra); 776 n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
669 777
670 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); 778 printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
671 779
@@ -675,8 +783,9 @@ void __init uv_system_init(void)
675 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); 783 printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
676 784
677 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); 785 bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
678 uv_blade_info = kmalloc(bytes, GFP_KERNEL); 786 uv_blade_info = kzalloc(bytes, GFP_KERNEL);
679 BUG_ON(!uv_blade_info); 787 BUG_ON(!uv_blade_info);
788
680 for (blade = 0; blade < uv_num_possible_blades(); blade++) 789 for (blade = 0; blade < uv_num_possible_blades(); blade++)
681 uv_blade_info[blade].memory_nid = -1; 790 uv_blade_info[blade].memory_nid = -1;
682 791
@@ -698,10 +807,11 @@ void __init uv_system_init(void)
698 for (j = 0; j < 64; j++) { 807 for (j = 0; j < 64; j++) {
699 if (!test_bit(j, &present)) 808 if (!test_bit(j, &present))
700 continue; 809 continue;
701 pnode = (i * 64 + j); 810 pnode = (i * 64 + j) & pnode_mask;
702 uv_blade_info[blade].pnode = pnode; 811 uv_blade_info[blade].pnode = pnode;
703 uv_blade_info[blade].nr_possible_cpus = 0; 812 uv_blade_info[blade].nr_possible_cpus = 0;
704 uv_blade_info[blade].nr_online_cpus = 0; 813 uv_blade_info[blade].nr_online_cpus = 0;
814 spin_lock_init(&uv_blade_info[blade].nmi_lock);
705 max_pnode = max(pnode, max_pnode); 815 max_pnode = max(pnode, max_pnode);
706 blade++; 816 blade++;
707 } 817 }
@@ -716,6 +826,13 @@ void __init uv_system_init(void)
716 int apicid = per_cpu(x86_cpu_to_apicid, cpu); 826 int apicid = per_cpu(x86_cpu_to_apicid, cpu);
717 827
718 nid = cpu_to_node(cpu); 828 nid = cpu_to_node(cpu);
829 /*
830 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
831 */
832 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
833 uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
834 uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
835
719 pnode = uv_apicid_to_pnode(apicid); 836 pnode = uv_apicid_to_pnode(apicid);
720 blade = boot_pnode_to_blade(pnode); 837 blade = boot_pnode_to_blade(pnode);
721 lcpu = uv_blade_info[blade].nr_possible_cpus; 838 lcpu = uv_blade_info[blade].nr_possible_cpus;
@@ -731,7 +848,6 @@ void __init uv_system_init(void)
731 uv_cpu_hub_info(cpu)->numa_blade_id = blade; 848 uv_cpu_hub_info(cpu)->numa_blade_id = blade;
732 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu; 849 uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
733 uv_cpu_hub_info(cpu)->pnode = pnode; 850 uv_cpu_hub_info(cpu)->pnode = pnode;
734 uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
735 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1; 851 uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
736 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; 852 uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
737 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; 853 uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -755,7 +871,7 @@ void __init uv_system_init(void)
755 871
756 map_gru_high(max_pnode); 872 map_gru_high(max_pnode);
757 map_mmr_high(max_pnode); 873 map_mmr_high(max_pnode);
758 map_mmioh_high(max_pnode); 874 map_mmioh_high(max_pnode & pnode_io_mask);
759 875
760 uv_cpu_init(); 876 uv_cpu_init();
761 uv_scir_register_cpu_notifier(); 877 uv_scir_register_cpu_notifier();
@@ -764,4 +880,13 @@ void __init uv_system_init(void)
764 880
765 /* register Legacy VGA I/O redirection handler */ 881 /* register Legacy VGA I/O redirection handler */
766 pci_register_set_vga_state(uv_set_vga_state); 882 pci_register_set_vga_state(uv_set_vga_state);
883
884 /*
885 * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
886 * EFI is not enabled in the kdump kernel.
887 */
888 if (is_kdump_kernel())
889 reboot_type = BOOT_ACPI;
767} 890}
891
892apic_driver(apic_x2apic_uv_x);