From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001 From: Brandon Phiilps Date: Wed, 10 Feb 2010 01:20:06 -0800 Subject: x86: Avoid race condition in pci_enable_msix() Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- kernel/irq/chip.c | 52 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'kernel/irq/chip.c') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa28f666..d70394f12ee9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,11 +18,7 @@ #include "internals.h" -/** - * dynamic_irq_init - initialize a dynamically allocated irq - * @irq: irq number to initialize - */ -void dynamic_irq_init(unsigned int irq) +static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc; unsigned long flags; @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) desc->depth = 1; desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->action = NULL; desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) } /** - * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * dynamic_irq_init - initialize a dynamically allocated irq * @irq: irq number to initialize */ -void dynamic_irq_cleanup(unsigned int irq) +void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_init_x(irq, false); +} + +/** + * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_init_keep_chip_data(unsigned int irq) +{ + dynamic_irq_init_x(irq, true); +} + +static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) } desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, false); +} + +/** + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, true); +} + /** * set_irq_chip - set the irq chip for an irq -- cgit v1.2.2 From dfff0615d28bdb3e8d213e5537dd069265912667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 12 Feb 2010 21:58:11 +0100 Subject: tree-wide: fix typos "ass?o[sc]iac?te" -> "associate" in comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Uwe Kleine-König Signed-off-by: Jiri Kosina --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/irq/chip.c') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa28f666..ec8a96382461 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -520,7 +520,7 @@ out: * signal. The occurence is latched into the irq controller hardware * and must be acked in order to be reenabled. After the ack another * interrupt can happen on the same source even before the first one - * is handled by the assosiacted event handler. If this happens it + * is handled by the associated event handler. If this happens it * might be necessary to disable (mask) the interrupt depending on the * controller hardware. This requires to reenable the interrupt inside * of the loop which handles the interrupts which have arrived while -- cgit v1.2.2 From 0b1adaa031a55e44f5dd942f234bf09d28e8a0d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Mar 2010 19:45:54 +0100 Subject: genirq: Prevent oneshot irq thread race Lars-Peter pointed out that the oneshot threaded interrupt handler code has the following race: CPU0 CPU1 hande_level_irq(irq X) mask_ack_irq(irq X) handle_IRQ_event(irq X) wake_up(thread_handler) thread handler(irq X) runs finalize_oneshot(irq X) does not unmask due to !(desc->status & IRQ_MASKED) return from irq does not unmask due to (desc->status & IRQ_ONESHOT) This leaves the interrupt line masked forever. The reason for this is the inconsistent handling of the IRQ_MASKED flag. Instead of setting it in the mask function the oneshot support sets the flag after waking up the irq thread. The solution for this is to set/clear the IRQ_MASKED status whenever we mask/unmask an interrupt line. That's the easy part, but that cleanup opens another race: CPU0 CPU1 hande_level_irq(irq) mask_ack_irq(irq) handle_IRQ_event(irq) wake_up(thread_handler) thread handler(irq) runs finalize_oneshot_irq(irq) unmask(irq) irq triggers again handle_level_irq(irq) mask_ack_irq(irq) return from irq due to IRQ_INPROGRESS return from irq does not unmask due to (desc->status & IRQ_ONESHOT) This requires that we synchronize finalize_oneshot_irq() with the primary handler. If IRQ_INPROGESS is set we wait until the primary handler on the other CPU has returned before unmasking the interrupt line again. We probably have never seen that problem because it does not happen on UP and on SMP the irqbalancer protects us by pinning the primary handler and the thread to the same CPU. Reported-by: Lars-Peter Clausen Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/irq/chip.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'kernel/irq/chip.c') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index d70394f12ee9..71eba24a39a2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq) if (desc->chip->ack) desc->chip->ack(irq); } + desc->status |= IRQ_MASKED; +} + +static inline void mask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->mask) { + desc->chip->mask(irq); + desc->status |= IRQ_MASKED; + } +} + +static inline void unmask_irq(struct irq_desc *desc, int irq) +{ + if (desc->chip->unmask) { + desc->chip->unmask(irq); + desc->status &= ~IRQ_MASKED; + } } /* @@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) raw_spin_lock(&desc->lock); desc->status &= ~IRQ_INPROGRESS; - if (unlikely(desc->status & IRQ_ONESHOT)) - desc->status |= IRQ_MASKED; - else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask) - desc->chip->unmask(irq); + if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT))) + unmask_irq(desc, irq); out_unlock: raw_spin_unlock(&desc->lock); } @@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) action = desc->action; if (unlikely(!action || (desc->status & IRQ_DISABLED))) { desc->status |= IRQ_PENDING; - if (desc->chip->mask) - desc->chip->mask(irq); + mask_irq(desc, irq); goto out; } @@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) irqreturn_t action_ret; if (unlikely(!action)) { - desc->chip->mask(irq); + mask_irq(desc, irq); goto out_unlock; } @@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) if (unlikely((desc->status & (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_MASKED))) { - desc->chip->unmask(irq); - desc->status &= ~IRQ_MASKED; + unmask_irq(desc, irq); } desc->status &= ~IRQ_PENDING; -- cgit v1.2.2 From 860652bfb890bd861c999ec39fcffabe5b712f85 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Wed, 24 Mar 2010 12:59:20 +0100 Subject: genirq: Move two IRQ functions from .init.text to .text Both functions should not be marked as __init, since they be called from modules after the init section is freed. Signed-off-by: Henrik Kretzschmar Cc: Yinghai Lu Cc: Peter Zijlstra Cc: Jiri Kosina LKML-Reference: <1269431961-5731-1-git-send-email-henne@nachtwindheim.de> Signed-off-by: Thomas Gleixner --- kernel/irq/chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/irq/chip.c') diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 71eba24a39a2..3c2d6e7737f0 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -729,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip, __set_irq_handler(irq, handle, 0, name); } -void __init set_irq_noprobe(unsigned int irq) +void set_irq_noprobe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -744,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } -void __init set_irq_probe(unsigned int irq) +void set_irq_probe(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; -- cgit v1.2.2