diff options
author | Brandon Phiilps <bphilips@suse.de> | 2010-02-10 04:20:06 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-10 17:27:28 -0500 |
commit | ced5b697a76d325e7a7ac7d382dbbb632c765093 (patch) | |
tree | 1a0a56d4415afcd16d034aa3bc5c0a6ba06c8a52 /arch/x86/kernel/apic | |
parent | e28cab42f384745c8a947a9ccd51e4aae52f5d51 (diff) |
x86: Avoid race condition in pci_enable_msix()
Keep chip_data in create_irq_nr and destroy_irq.
When two drivers are setting up MSI-X at the same time via
pci_enable_msix() there is a race. See this dmesg excerpt:
[ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[ 85.170611] alloc irq_desc for 99 on node -1
[ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[ 85.170614] alloc kstat_irqs on node -1
[ 85.170616] alloc irq_2_iommu on node -1
[ 85.170617] alloc irq_desc for 100 on node -1
[ 85.170619] alloc kstat_irqs on node -1
[ 85.170621] alloc irq_2_iommu on node -1
[ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[ 85.170626] alloc irq_desc for 101 on node -1
[ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[ 85.170630] alloc kstat_irqs on node -1
[ 85.170631] alloc irq_2_iommu on node -1
[ 85.170635] alloc irq_desc for 102 on node -1
[ 85.170636] alloc kstat_irqs on node -1
[ 85.170639] alloc irq_2_iommu on node -1
[ 85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088
As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.
ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().
igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:
cfg_new = irq_desc_ptrs[102]->chip_data;
if (cfg_new->vector != 0)
continue;
This hits the NULL deref.
Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():
destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;
Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.
Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Brandon Phililps <bphilips@suse.de>
Cc: stable@kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/kernel/apic')
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 18 |
1 files changed, 5 insertions, 13 deletions
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca7816d..c86591b906fa 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3228 | } | 3228 | } |
3229 | spin_unlock_irqrestore(&vector_lock, flags); | 3229 | spin_unlock_irqrestore(&vector_lock, flags); |
3230 | 3230 | ||
3231 | if (irq > 0) { | 3231 | if (irq > 0) |
3232 | dynamic_irq_init(irq); | 3232 | dynamic_irq_init_keep_chip_data(irq); |
3233 | /* restore it, in case dynamic_irq_init clear it */ | 3233 | |
3234 | if (desc_new) | ||
3235 | desc_new->chip_data = cfg_new; | ||
3236 | } | ||
3237 | return irq; | 3234 | return irq; |
3238 | } | 3235 | } |
3239 | 3236 | ||
@@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq) | |||
3256 | { | 3253 | { |
3257 | unsigned long flags; | 3254 | unsigned long flags; |
3258 | struct irq_cfg *cfg; | 3255 | struct irq_cfg *cfg; |
3259 | struct irq_desc *desc; | ||
3260 | 3256 | ||
3261 | /* store it, in case dynamic_irq_cleanup clear it */ | 3257 | dynamic_irq_cleanup_keep_chip_data(irq); |
3262 | desc = irq_to_desc(irq); | ||
3263 | cfg = desc->chip_data; | ||
3264 | dynamic_irq_cleanup(irq); | ||
3265 | /* connect back irq_cfg */ | ||
3266 | desc->chip_data = cfg; | ||
3267 | 3258 | ||
3268 | free_irte(irq); | 3259 | free_irte(irq); |
3269 | spin_lock_irqsave(&vector_lock, flags); | 3260 | spin_lock_irqsave(&vector_lock, flags); |
3261 | cfg = irq_to_desc(irq)->chip_data; | ||
3270 | __clear_irq_vector(irq, cfg); | 3262 | __clear_irq_vector(irq, cfg); |
3271 | spin_unlock_irqrestore(&vector_lock, flags); | 3263 | spin_unlock_irqrestore(&vector_lock, flags); |
3272 | } | 3264 | } |