aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorBrandon Phiilps <bphilips@suse.de>2010-02-10 04:20:06 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-10 17:27:28 -0500
commitced5b697a76d325e7a7ac7d382dbbb632c765093 (patch)
tree1a0a56d4415afcd16d034aa3bc5c0a6ba06c8a52 /arch
parente28cab42f384745c8a947a9ccd51e4aae52f5d51 (diff)
x86: Avoid race condition in pci_enable_msix()
Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de> Signed-off-by: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps <bphilips@suse.de> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/apic/io_apic.c18
1 files changed, 5 insertions, 13 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..c86591b906fa 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node)
3228 } 3228 }
3229 spin_unlock_irqrestore(&vector_lock, flags); 3229 spin_unlock_irqrestore(&vector_lock, flags);
3230 3230
3231 if (irq > 0) { 3231 if (irq > 0)
3232 dynamic_irq_init(irq); 3232 dynamic_irq_init_keep_chip_data(irq);
3233 /* restore it, in case dynamic_irq_init clear it */ 3233
3234 if (desc_new)
3235 desc_new->chip_data = cfg_new;
3236 }
3237 return irq; 3234 return irq;
3238} 3235}
3239 3236
@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq)
3256{ 3253{
3257 unsigned long flags; 3254 unsigned long flags;
3258 struct irq_cfg *cfg; 3255 struct irq_cfg *cfg;
3259 struct irq_desc *desc;
3260 3256
3261 /* store it, in case dynamic_irq_cleanup clear it */ 3257 dynamic_irq_cleanup_keep_chip_data(irq);
3262 desc = irq_to_desc(irq);
3263 cfg = desc->chip_data;
3264 dynamic_irq_cleanup(irq);
3265 /* connect back irq_cfg */
3266 desc->chip_data = cfg;
3267 3258
3268 free_irte(irq); 3259 free_irte(irq);
3269 spin_lock_irqsave(&vector_lock, flags); 3260 spin_lock_irqsave(&vector_lock, flags);
3261 cfg = irq_to_desc(irq)->chip_data;
3270 __clear_irq_vector(irq, cfg); 3262 __clear_irq_vector(irq, cfg);
3271 spin_unlock_irqrestore(&vector_lock, flags); 3263 spin_unlock_irqrestore(&vector_lock, flags);
3272} 3264}