aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorKerstin Jonsson <kerstin.jonsson@ericsson.com>2010-05-24 15:13:15 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2010-05-24 16:31:35 -0400
commit8c3ba8d049247dc06b6dcee1711a11b26647aa44 (patch)
treeac39a120922bcfb9c97195b9ecc70e6eaf0f633a /arch/x86
parent7e125f7b9cbfce4101191b8076d606c517a73066 (diff)
x86, apic: ack all pending irqs when crashed/on kexec
When the SMP kernel decides to crash_kexec() the local APICs may have pending interrupts in their vector tables. The setup routine for the local APIC has a deficient mechanism for clearing these interrupts, it only handles interrupts that has already been dispatched to the local core for servicing (the ISR register) safely, it doesn't consider lower prioritized queued interrupts stored in the IRR register. If you have more than one pending interrupt within the same 32 bit word in the LAPIC vector table registers you may find yourself entering the IO APIC setup with pending interrupts left in the LAPIC. This is a situation for wich the IO APIC setup is not prepared. Depending of what/which interrupt vector/vectors are stuck in the APIC tables your system may show various degrees of malfunctioning. That was the reason why the check_timer() failed in our system, the timer interrupts was blocked by pending interrupts from the old kernel when routed trough the IO APIC. Additional comment from Jiri Bohac: ============== If this should go into stable release, I'd add some kind of limit on the number of iterations, just to be safe from hard to debug lock-ups: +if (loops++ > MAX_LOOPS) { + printk("LAPIC pending clean-up") + break; +} while (queued); with MAX_LOOPS something like 1E9 this would leave plenty of time for the pending IRQs to be cleared and would and still cause at most a second of delay if the loop were to lock-up for whatever reason. [trenn@suse.de: V2: Use tsc if avail to bail out after 1 sec due to possible virtual apic_read calls which may take rather long (suggested by: Avi Kivity <avi@redhat.com>) If no tsc is available bail out quickly after cpu_khz, if we broke out too early and still have irqs pending (which should never happen?) we still get a WARN_ON... V3: - Fixed indentation -> checkpatch clean - max_loops must be signed V4: - Fix typo, mixed up tsc and ntsc in first rdtscll() call V5: Adjust WARN_ON() condition to also catch error in cpu_has_tsc case] Cc: <jbohac@novell.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Kerstin Jonsson <kerstin.jonsson@ericsson.com> Cc: Avi Kivity <avi@redhat.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Tested-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Thomas Renninger <trenn@suse.de> LKML-Reference: <201005241913.o4OJDGWM010865@imap1.linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/apic/apic.c41
1 files changed, 33 insertions, 8 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index e5a4a1e01618..c02cc692985c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -51,6 +51,7 @@
51#include <asm/smp.h> 51#include <asm/smp.h>
52#include <asm/mce.h> 52#include <asm/mce.h>
53#include <asm/kvm_para.h> 53#include <asm/kvm_para.h>
54#include <asm/tsc.h>
54 55
55unsigned int num_processors; 56unsigned int num_processors;
56 57
@@ -1151,8 +1152,13 @@ static void __cpuinit lapic_setup_esr(void)
1151 */ 1152 */
1152void __cpuinit setup_local_APIC(void) 1153void __cpuinit setup_local_APIC(void)
1153{ 1154{
1154 unsigned int value; 1155 unsigned int value, queued;
1155 int i, j; 1156 int i, j, acked = 0;
1157 unsigned long long tsc = 0, ntsc;
1158 long long max_loops = cpu_khz;
1159
1160 if (cpu_has_tsc)
1161 rdtscll(tsc);
1156 1162
1157 if (disable_apic) { 1163 if (disable_apic) {
1158 arch_disable_smp_support(); 1164 arch_disable_smp_support();
@@ -1204,13 +1210,32 @@ void __cpuinit setup_local_APIC(void)
1204 * the interrupt. Hence a vector might get locked. It was noticed 1210 * the interrupt. Hence a vector might get locked. It was noticed
1205 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. 1211 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
1206 */ 1212 */
1207 for (i = APIC_ISR_NR - 1; i >= 0; i--) { 1213 do {
1208 value = apic_read(APIC_ISR + i*0x10); 1214 queued = 0;
1209 for (j = 31; j >= 0; j--) { 1215 for (i = APIC_ISR_NR - 1; i >= 0; i--)
1210 if (value & (1<<j)) 1216 queued |= apic_read(APIC_IRR + i*0x10);
1211 ack_APIC_irq(); 1217
1218 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
1219 value = apic_read(APIC_ISR + i*0x10);
1220 for (j = 31; j >= 0; j--) {
1221 if (value & (1<<j)) {
1222 ack_APIC_irq();
1223 acked++;
1224 }
1225 }
1212 } 1226 }
1213 } 1227 if (acked > 256) {
1228 printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
1229 acked);
1230 break;
1231 }
1232 if (cpu_has_tsc) {
1233 rdtscll(ntsc);
1234 max_loops = (cpu_khz << 10) - (ntsc - tsc);
1235 } else
1236 max_loops--;
1237 } while (queued && max_loops > 0);
1238 WARN_ON(max_loops <= 0);
1214 1239
1215 /* 1240 /*
1216 * Now that we are all set up, enable the APIC 1241 * Now that we are all set up, enable the APIC