aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic/io_apic.c
diff options
context:
space:
mode:
authorBrandon Philips <bphilips@suse.de>2010-02-07 16:02:50 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-19 00:53:15 -0500
commiteb5b3794062824ba12d883901eea49ea89d0a678 (patch)
treea0a53204d7efa00d11ded6bcc166c36dbdd087d6 /arch/x86/kernel/apic/io_apic.c
parent0a832320f1bae6a4169bf683e201378f2437cfc1 (diff)
x86, irq: Keep chip_data in create_irq_nr and destroy_irq
Version 4: use get_irq_chip_data() in destroy_irq() to get rid of some local vars. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de> Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Brandon Phiilps <bphilips@suse.de> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel/apic/io_apic.c')
-rw-r--r--arch/x86/kernel/apic/io_apic.c20
1 files changed, 5 insertions, 15 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5e4cce254e4..e93a76bc867 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3278 } 3278 }
3279 spin_unlock_irqrestore(&vector_lock, flags); 3279 spin_unlock_irqrestore(&vector_lock, flags);
3280 3280
3281 if (irq > 0) { 3281 if (irq > 0)
3282 dynamic_irq_init(irq); 3282 dynamic_irq_init_keep_chip_data(irq);
3283 /* restore it, in case dynamic_irq_init clear it */ 3283
3284 if (desc_new)
3285 desc_new->chip_data = cfg_new;
3286 }
3287 return irq; 3284 return irq;
3288} 3285}
3289 3286
@@ -3305,19 +3302,12 @@ int create_irq(void)
3305void destroy_irq(unsigned int irq) 3302void destroy_irq(unsigned int irq)
3306{ 3303{
3307 unsigned long flags; 3304 unsigned long flags;
3308 struct irq_cfg *cfg;
3309 struct irq_desc *desc;
3310 3305
3311 /* store it, in case dynamic_irq_cleanup clear it */ 3306 dynamic_irq_cleanup_keep_chip_data(irq);
3312 desc = irq_to_desc(irq);
3313 cfg = desc->chip_data;
3314 dynamic_irq_cleanup(irq);
3315 /* connect back irq_cfg */
3316 desc->chip_data = cfg;
3317 3307
3318 free_irte(irq); 3308 free_irte(irq);
3319 spin_lock_irqsave(&vector_lock, flags); 3309 spin_lock_irqsave(&vector_lock, flags);
3320 __clear_irq_vector(irq, cfg); 3310 __clear_irq_vector(irq, get_irq_chip_data(irq));
3321 spin_unlock_irqrestore(&vector_lock, flags); 3311 spin_unlock_irqrestore(&vector_lock, flags);
3322} 3312}
3323 3313