aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-12-29 04:47:22 -0500
committerThomas Gleixner <tglx@linutronix.de>2017-12-29 15:13:05 -0500
commitbc976233a872c0f20f018fb1e89264a541584e25 (patch)
treefe2808dd6cebfe0d671f36b7edda2efeba99e045
parent702cb0a02813299d6911b775c637906ae21b737d (diff)
genirq/msi, x86/vector: Prevent reservation mode for non maskable MSI
The new reservation mode for interrupts assigns a dummy vector when the interrupt is allocated and assigns a real vector when the interrupt is requested. The reservation mode prevents vector pressure when devices with a large amount of queues/interrupts are initialized, but only a minimal subset of those queues/interrupts is actually used. This mode has an issue with MSI interrupts which cannot be masked. If the driver is not careful or the hardware emits an interrupt before the device irq is requestd by the driver then the interrupt ends up on the dummy vector as a spurious interrupt which can cause malfunction of the device or in the worst case a lockup of the machine. Change the logic for the reservation mode so that the early activation of MSI interrupts checks whether: - the device is a PCI/MSI device - the reservation mode of the underlying irqdomain is activated - PCI/MSI masking is globally enabled - the PCI/MSI device uses either MSI-X, which supports masking, or MSI with the maskbit supported. If one of those conditions is false, then clear the reservation mode flag in the irq data of the interrupt and invoke irq_domain_activate_irq() with the reserve argument cleared. In the x86 vector code, clear the can_reserve flag in the vector allocation data so a subsequent free_irq() won't create the same situation again. The interrupt stays assigned to a real vector until pci_disable_msi() is invoked and all allocations are undone. Fixes: 4900be83602b ("x86/vector/msi: Switch to global reservation mode") Reported-by: Alexandru Chirvasitu <achirvasub@gmail.com> Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Alexandru Chirvasitu <achirvasub@gmail.com> Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Cc: Dou Liyang <douly.fnst@cn.fujitsu.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Maciej W. Rozycki <macro@linux-mips.org> Cc: Mikael Pettersson <mikpelinux@gmail.com> Cc: Josh Poulson <jopoulso@microsoft.com> Cc: Mihai Costache <v-micos@microsoft.com> Cc: Stephen Hemminger <sthemmin@microsoft.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang <haiyangz@microsoft.com> Cc: Dexuan Cui <decui@microsoft.com> Cc: Simon Xiao <sixiao@microsoft.com> Cc: Saeed Mahameed <saeedm@mellanox.com> Cc: Jork Loeser <Jork.Loeser@microsoft.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: devel@linuxdriverproject.org Cc: KY Srinivasan <kys@microsoft.com> Cc: Alan Cox <alan@linux.intel.com> Cc: Sakari Ailus <sakari.ailus@intel.com>, Cc: linux-media@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712291406420.1899@nanos Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712291409460.1899@nanos
-rw-r--r--arch/x86/kernel/apic/vector.c12
-rw-r--r--kernel/irq/msi.c37
2 files changed, 44 insertions, 5 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 52c85c8147e9..f8b03bb8e725 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -369,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
369 int ret; 369 int ret;
370 370
371 ret = assign_irq_vector_any_locked(irqd); 371 ret = assign_irq_vector_any_locked(irqd);
372 if (!ret) 372 if (!ret) {
373 apicd->has_reserved = false; 373 apicd->has_reserved = false;
374 /*
375 * Core might have disabled reservation mode after
376 * allocating the irq descriptor. Ideally this should
377 * happen before allocation time, but that would require
378 * completely convoluted ways of transporting that
379 * information.
380 */
381 if (!irqd_can_reserve(irqd))
382 apicd->can_reserve = false;
383 }
374 return ret; 384 return ret;
375} 385}
376 386
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 9ba954331171..2f3c4f5382cc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -339,11 +339,38 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
339 return ret; 339 return ret;
340} 340}
341 341
342static bool msi_check_reservation_mode(struct msi_domain_info *info) 342/*
343 * Carefully check whether the device can use reservation mode. If
344 * reservation mode is enabled then the early activation will assign a
345 * dummy vector to the device. If the PCI/MSI device does not support
346 * masking of the entry then this can result in spurious interrupts when
347 * the device driver is not absolutely careful. But even then a malfunction
348 * of the hardware could result in a spurious interrupt on the dummy vector
349 * and render the device unusable. If the entry can be masked then the core
350 * logic will prevent the spurious interrupt and reservation mode can be
351 * used. For now reservation mode is restricted to PCI/MSI.
352 */
353static bool msi_check_reservation_mode(struct irq_domain *domain,
354 struct msi_domain_info *info,
355 struct device *dev)
343{ 356{
357 struct msi_desc *desc;
358
359 if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
360 return false;
361
344 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE)) 362 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
345 return false; 363 return false;
346 return true; 364
365 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
366 return false;
367
368 /*
369 * Checking the first MSI descriptor is sufficient. MSIX supports
370 * masking and MSI does so when the maskbit is set.
371 */
372 desc = first_msi_entry(dev);
373 return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
347} 374}
348 375
349/** 376/**
@@ -394,7 +421,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
394 if (ops->msi_finish) 421 if (ops->msi_finish)
395 ops->msi_finish(&arg, 0); 422 ops->msi_finish(&arg, 0);
396 423
397 can_reserve = msi_check_reservation_mode(info); 424 can_reserve = msi_check_reservation_mode(domain, info, dev);
398 425
399 for_each_msi_entry(desc, dev) { 426 for_each_msi_entry(desc, dev) {
400 virq = desc->irq; 427 virq = desc->irq;
@@ -412,7 +439,9 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
412 continue; 439 continue;
413 440
414 irq_data = irq_domain_get_irq_data(domain, desc->irq); 441 irq_data = irq_domain_get_irq_data(domain, desc->irq);
415 ret = irq_domain_activate_irq(irq_data, true); 442 if (!can_reserve)
443 irqd_clr_can_reserve(irq_data);
444 ret = irq_domain_activate_irq(irq_data, can_reserve);
416 if (ret) 445 if (ret)
417 goto cleanup; 446 goto cleanup;
418 } 447 }