diff options
| author | Brandon Phiilps <bphilips@suse.de> | 2010-02-10 04:20:06 -0500 |
|---|---|---|
| committer | H. Peter Anvin <hpa@zytor.com> | 2010-02-10 17:27:28 -0500 |
| commit | ced5b697a76d325e7a7ac7d382dbbb632c765093 (patch) | |
| tree | 1a0a56d4415afcd16d034aa3bc5c0a6ba06c8a52 /kernel | |
| parent | e28cab42f384745c8a947a9ccd51e4aae52f5d51 (diff) | |
x86: Avoid race condition in pci_enable_msix()
Keep chip_data in create_irq_nr and destroy_irq.
When two drivers are setting up MSI-X at the same time via
pci_enable_msix() there is a race. See this dmesg excerpt:
[ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X
[ 85.170611] alloc irq_desc for 99 on node -1
[ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X
[ 85.170614] alloc kstat_irqs on node -1
[ 85.170616] alloc irq_2_iommu on node -1
[ 85.170617] alloc irq_desc for 100 on node -1
[ 85.170619] alloc kstat_irqs on node -1
[ 85.170621] alloc irq_2_iommu on node -1
[ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X
[ 85.170626] alloc irq_desc for 101 on node -1
[ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X
[ 85.170630] alloc kstat_irqs on node -1
[ 85.170631] alloc irq_2_iommu on node -1
[ 85.170635] alloc irq_desc for 102 on node -1
[ 85.170636] alloc kstat_irqs on node -1
[ 85.170639] alloc irq_2_iommu on node -1
[ 85.170646] BUG: unable to handle kernel NULL pointer dereference
at 0000000000000088
As you can see igb and ixgbe are both alternating on create_irq_nr()
via pci_enable_msix() in their probe function.
ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe
choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and
calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data =
NULL via dynamic_irq_init().
igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[]
via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this:
cfg_new = irq_desc_ptrs[102]->chip_data;
if (cfg_new->vector != 0)
continue;
This hits the NULL deref.
Another possible race exists via pci_disable_msix() in a driver or in
the number of error paths that call free_msi_irqs():
destroy_irq()
dynamic_irq_cleanup() which sets desc->chip_data = NULL
...race window...
desc->chip_data = cfg;
Remove the save and restore code for cfg in create_irq_nr() and
destroy_irq() and take the desc->lock when checking the irq_cfg.
Reported-and-analyzed-by: Brandon Philips <bphilips@suse.de>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org>
Signed-off-by: Brandon Phililps <bphilips@suse.de>
Cc: stable@kernel.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/irq/chip.c | 52 |
1 files changed, 43 insertions, 9 deletions
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa28f666..d70394f12ee9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
| @@ -18,11 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include "internals.h" | 19 | #include "internals.h" |
| 20 | 20 | ||
| 21 | /** | 21 | static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) |
| 22 | * dynamic_irq_init - initialize a dynamically allocated irq | ||
| 23 | * @irq: irq number to initialize | ||
| 24 | */ | ||
| 25 | void dynamic_irq_init(unsigned int irq) | ||
| 26 | { | 22 | { |
| 27 | struct irq_desc *desc; | 23 | struct irq_desc *desc; |
| 28 | unsigned long flags; | 24 | unsigned long flags; |
| @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) | |||
| 41 | desc->depth = 1; | 37 | desc->depth = 1; |
| 42 | desc->msi_desc = NULL; | 38 | desc->msi_desc = NULL; |
| 43 | desc->handler_data = NULL; | 39 | desc->handler_data = NULL; |
| 44 | desc->chip_data = NULL; | 40 | if (!keep_chip_data) |
| 41 | desc->chip_data = NULL; | ||
| 45 | desc->action = NULL; | 42 | desc->action = NULL; |
| 46 | desc->irq_count = 0; | 43 | desc->irq_count = 0; |
| 47 | desc->irqs_unhandled = 0; | 44 | desc->irqs_unhandled = 0; |
| @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) | |||
| 55 | } | 52 | } |
| 56 | 53 | ||
| 57 | /** | 54 | /** |
| 58 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | 55 | * dynamic_irq_init - initialize a dynamically allocated irq |
| 59 | * @irq: irq number to initialize | 56 | * @irq: irq number to initialize |
| 60 | */ | 57 | */ |
| 61 | void dynamic_irq_cleanup(unsigned int irq) | 58 | void dynamic_irq_init(unsigned int irq) |
| 59 | { | ||
| 60 | dynamic_irq_init_x(irq, false); | ||
| 61 | } | ||
| 62 | |||
| 63 | /** | ||
| 64 | * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq | ||
| 65 | * @irq: irq number to initialize | ||
| 66 | * | ||
| 67 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 68 | */ | ||
| 69 | void dynamic_irq_init_keep_chip_data(unsigned int irq) | ||
| 70 | { | ||
| 71 | dynamic_irq_init_x(irq, true); | ||
| 72 | } | ||
| 73 | |||
| 74 | static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) | ||
| 62 | { | 75 | { |
| 63 | struct irq_desc *desc = irq_to_desc(irq); | 76 | struct irq_desc *desc = irq_to_desc(irq); |
| 64 | unsigned long flags; | 77 | unsigned long flags; |
| @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
| 77 | } | 90 | } |
| 78 | desc->msi_desc = NULL; | 91 | desc->msi_desc = NULL; |
| 79 | desc->handler_data = NULL; | 92 | desc->handler_data = NULL; |
| 80 | desc->chip_data = NULL; | 93 | if (!keep_chip_data) |
| 94 | desc->chip_data = NULL; | ||
| 81 | desc->handle_irq = handle_bad_irq; | 95 | desc->handle_irq = handle_bad_irq; |
| 82 | desc->chip = &no_irq_chip; | 96 | desc->chip = &no_irq_chip; |
| 83 | desc->name = NULL; | 97 | desc->name = NULL; |
| @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
| 85 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 99 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
| 86 | } | 100 | } |
| 87 | 101 | ||
| 102 | /** | ||
| 103 | * dynamic_irq_cleanup - cleanup a dynamically allocated irq | ||
| 104 | * @irq: irq number to initialize | ||
| 105 | */ | ||
| 106 | void dynamic_irq_cleanup(unsigned int irq) | ||
| 107 | { | ||
| 108 | dynamic_irq_cleanup_x(irq, false); | ||
| 109 | } | ||
| 110 | |||
| 111 | /** | ||
| 112 | * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq | ||
| 113 | * @irq: irq number to initialize | ||
| 114 | * | ||
| 115 | * does not set irq_to_desc(irq)->chip_data to NULL | ||
| 116 | */ | ||
| 117 | void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) | ||
| 118 | { | ||
| 119 | dynamic_irq_cleanup_x(irq, true); | ||
| 120 | } | ||
| 121 | |||
| 88 | 122 | ||
| 89 | /** | 123 | /** |
| 90 | * set_irq_chip - set the irq chip for an irq | 124 | * set_irq_chip - set the irq chip for an irq |
