aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-07-12 05:20:18 -0400
committerBjorn Helgaas <bhelgaas@google.com>2016-07-21 16:57:03 -0400
commit4ef33685aa0957d771e068b60a5f3ca6b47ade1c (patch)
tree34e9fee3bd177657e18f56a4d5d0701f6f3c20f5
parentaff171641d181ea573380efc3f559c9de4741fc5 (diff)
PCI: Spread interrupt vectors in pci_alloc_irq_vectors()
Set the affinity_mask in the PCI device before allocating vectors so that the affinity can be propagated through the MSI descriptor structures to the core IRQ code. To facilitate this, new __pci_enable_msi_range() and __pci_enable_msix_range() helpers are factored out of their not prefixed variants which assigning the new IRQ affinity mask in the PCI device so that the low-level interrupt code can perform the interrupt affinity assignment and do node-local allocations. A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors() so that this function can also be used by drivers that don't wish to use the automatic affinity assignment. [bhelgaas: omit "else" after "return" consistently] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Alexander Gordeev <agordeev@redhat.com>
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt4
-rw-r--r--drivers/pci/msi.c134
-rw-r--r--include/linux/pci.h2
3 files changed, 95 insertions, 45 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 0ac612b8c3fb..c55df2911136 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -99,6 +99,10 @@ PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
99MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in 99MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
100case the device does not support legacy interrupt lines. 100case the device does not support legacy interrupt lines.
101 101
102By default this function will spread the interrupts around the available
103CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
104flag.
105
102To get the Linux IRQ numbers passed to request_irq() and free_irq() and the 106To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
103vectors, use the following function: 107vectors, use the following function:
104 108
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5e5ab478ea7d..a02981efdad5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
569 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 569 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
570 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 570 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
571 entry->nvec_used = nvec; 571 entry->nvec_used = nvec;
572 entry->affinity = dev->irq_affinity;
572 573
573 if (control & PCI_MSI_FLAGS_64BIT) 574 if (control & PCI_MSI_FLAGS_64BIT)
574 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 575 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
680static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, 681static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
681 struct msix_entry *entries, int nvec) 682 struct msix_entry *entries, int nvec)
682{ 683{
684 const struct cpumask *mask = NULL;
683 struct msi_desc *entry; 685 struct msi_desc *entry;
684 int i; 686 int cpu = -1, i;
685 687
686 for (i = 0; i < nvec; i++) { 688 for (i = 0; i < nvec; i++) {
689 if (dev->irq_affinity) {
690 cpu = cpumask_next(cpu, dev->irq_affinity);
691 if (cpu >= nr_cpu_ids)
692 cpu = cpumask_first(dev->irq_affinity);
693 mask = cpumask_of(cpu);
694 }
695
687 entry = alloc_msi_entry(&dev->dev); 696 entry = alloc_msi_entry(&dev->dev);
688 if (!entry) { 697 if (!entry) {
689 if (!i) 698 if (!i)
@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
703 entry->msi_attrib.default_irq = dev->irq; 712 entry->msi_attrib.default_irq = dev->irq;
704 entry->mask_base = base; 713 entry->mask_base = base;
705 entry->nvec_used = 1; 714 entry->nvec_used = 1;
715 entry->affinity = mask;
706 716
707 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); 717 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
708 } 718 }
@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void)
1028} 1038}
1029EXPORT_SYMBOL(pci_msi_enabled); 1039EXPORT_SYMBOL(pci_msi_enabled);
1030 1040
1031/** 1041static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
1032 * pci_enable_msi_range - configure device's MSI capability structure 1042 unsigned int flags)
1033 * @dev: device to configure
1034 * @minvec: minimal number of interrupts to configure
1035 * @maxvec: maximum number of interrupts to configure
1036 *
1037 * This function tries to allocate a maximum possible number of interrupts in a
1038 * range between @minvec and @maxvec. It returns a negative errno if an error
1039 * occurs. If it succeeds, it returns the actual number of interrupts allocated
1040 * and updates the @dev's irq member to the lowest new interrupt number;
1041 * the other interrupt numbers allocated to this device are consecutive.
1042 **/
1043int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
1044{ 1043{
1045 int nvec; 1044 int nvec;
1046 int rc; 1045 int rc;
@@ -1063,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
1063 nvec = pci_msi_vec_count(dev); 1062 nvec = pci_msi_vec_count(dev);
1064 if (nvec < 0) 1063 if (nvec < 0)
1065 return nvec; 1064 return nvec;
1066 else if (nvec < minvec) 1065 if (nvec < minvec)
1067 return -EINVAL; 1066 return -EINVAL;
1068 else if (nvec > maxvec) 1067
1068 if (nvec > maxvec)
1069 nvec = maxvec; 1069 nvec = maxvec;
1070 1070
1071 do { 1071 for (;;) {
1072 if (!(flags & PCI_IRQ_NOAFFINITY)) {
1073 dev->irq_affinity = irq_create_affinity_mask(&nvec);
1074 if (nvec < minvec)
1075 return -ENOSPC;
1076 }
1077
1072 rc = msi_capability_init(dev, nvec); 1078 rc = msi_capability_init(dev, nvec);
1073 if (rc < 0) { 1079 if (rc == 0)
1080 return nvec;
1081
1082 kfree(dev->irq_affinity);
1083 dev->irq_affinity = NULL;
1084
1085 if (rc < 0)
1074 return rc; 1086 return rc;
1075 } else if (rc > 0) { 1087 if (rc < minvec)
1076 if (rc < minvec) 1088 return -ENOSPC;
1089
1090 nvec = rc;
1091 }
1092}
1093
1094/**
1095 * pci_enable_msi_range - configure device's MSI capability structure
1096 * @dev: device to configure
1097 * @minvec: minimal number of interrupts to configure
1098 * @maxvec: maximum number of interrupts to configure
1099 *
1100 * This function tries to allocate a maximum possible number of interrupts in a
1101 * range between @minvec and @maxvec. It returns a negative errno if an error
1102 * occurs. If it succeeds, it returns the actual number of interrupts allocated
1103 * and updates the @dev's irq member to the lowest new interrupt number;
1104 * the other interrupt numbers allocated to this device are consecutive.
1105 **/
1106int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
1107{
1108 return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
1109}
1110EXPORT_SYMBOL(pci_enable_msi_range);
1111
1112static int __pci_enable_msix_range(struct pci_dev *dev,
1113 struct msix_entry *entries, int minvec, int maxvec,
1114 unsigned int flags)
1115{
1116 int nvec = maxvec;
1117 int rc;
1118
1119 if (maxvec < minvec)
1120 return -ERANGE;
1121
1122 for (;;) {
1123 if (!(flags & PCI_IRQ_NOAFFINITY)) {
1124 dev->irq_affinity = irq_create_affinity_mask(&nvec);
1125 if (nvec < minvec)
1077 return -ENOSPC; 1126 return -ENOSPC;
1078 nvec = rc;
1079 } 1127 }
1080 } while (rc);
1081 1128
1082 return nvec; 1129 rc = pci_enable_msix(dev, entries, nvec);
1130 if (rc == 0)
1131 return nvec;
1132
1133 kfree(dev->irq_affinity);
1134 dev->irq_affinity = NULL;
1135
1136 if (rc < 0)
1137 return rc;
1138 if (rc < minvec)
1139 return -ENOSPC;
1140
1141 nvec = rc;
1142 }
1083} 1143}
1084EXPORT_SYMBOL(pci_enable_msi_range);
1085 1144
1086/** 1145/**
1087 * pci_enable_msix_range - configure device's MSI-X capability structure 1146 * pci_enable_msix_range - configure device's MSI-X capability structure
@@ -1099,26 +1158,10 @@ EXPORT_SYMBOL(pci_enable_msi_range);
1099 * with new allocated MSI-X interrupts. 1158 * with new allocated MSI-X interrupts.
1100 **/ 1159 **/
1101int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, 1160int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
1102 int minvec, int maxvec) 1161 int minvec, int maxvec)
1103{ 1162{
1104 int nvec = maxvec; 1163 return __pci_enable_msix_range(dev, entries, minvec, maxvec,
1105 int rc; 1164 PCI_IRQ_NOAFFINITY);
1106
1107 if (maxvec < minvec)
1108 return -ERANGE;
1109
1110 do {
1111 rc = pci_enable_msix(dev, entries, nvec);
1112 if (rc < 0) {
1113 return rc;
1114 } else if (rc > 0) {
1115 if (rc < minvec)
1116 return -ENOSPC;
1117 nvec = rc;
1118 }
1119 } while (rc);
1120
1121 return nvec;
1122} 1165}
1123EXPORT_SYMBOL(pci_enable_msix_range); 1166EXPORT_SYMBOL(pci_enable_msix_range);
1124 1167
@@ -1145,13 +1188,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
1145 int vecs = -ENOSPC; 1188 int vecs = -ENOSPC;
1146 1189
1147 if (!(flags & PCI_IRQ_NOMSIX)) { 1190 if (!(flags & PCI_IRQ_NOMSIX)) {
1148 vecs = pci_enable_msix_range(dev, NULL, min_vecs, max_vecs); 1191 vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
1192 flags);
1149 if (vecs > 0) 1193 if (vecs > 0)
1150 return vecs; 1194 return vecs;
1151 } 1195 }
1152 1196
1153 if (!(flags & PCI_IRQ_NOMSI)) { 1197 if (!(flags & PCI_IRQ_NOMSI)) {
1154 vecs = pci_enable_msi_range(dev, min_vecs, max_vecs); 1198 vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
1155 if (vecs > 0) 1199 if (vecs > 0)
1156 return vecs; 1200 return vecs;
1157 } 1201 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 52ecd49e8049..f1406619f868 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -320,6 +320,7 @@ struct pci_dev {
320 * directly, use the values stored here. They might be different! 320 * directly, use the values stored here. They might be different!
321 */ 321 */
322 unsigned int irq; 322 unsigned int irq;
323 struct cpumask *irq_affinity;
323 struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ 324 struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
324 325
325 bool match_driver; /* Skip attaching driver */ 326 bool match_driver; /* Skip attaching driver */
@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
1240#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ 1241#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */
1241#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ 1242#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */
1242#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ 1243#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */
1244#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */
1243 1245
1244/* kmem_cache style wrapper around pci_alloc_consistent() */ 1246/* kmem_cache style wrapper around pci_alloc_consistent() */
1245 1247