diff options
author | Christoph Hellwig <hch@lst.de> | 2016-07-12 05:20:18 -0400 |
---|---|---|
committer | Bjorn Helgaas <bhelgaas@google.com> | 2016-07-21 16:57:03 -0400 |
commit | 4ef33685aa0957d771e068b60a5f3ca6b47ade1c (patch) | |
tree | 34e9fee3bd177657e18f56a4d5d0701f6f3c20f5 | |
parent | aff171641d181ea573380efc3f559c9de4741fc5 (diff) |
PCI: Spread interrupt vectors in pci_alloc_irq_vectors()
Set the affinity_mask in the PCI device before allocating vectors so that
the affinity can be propagated through the MSI descriptor structures to the
core IRQ code. To facilitate this, new __pci_enable_msi_range() and
__pci_enable_msix_range() helpers are factored out of their not prefixed
variants which assigning the new IRQ affinity mask in the PCI device so
that the low-level interrupt code can perform the interrupt affinity
assignment and do node-local allocations.
A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors() so that
this function can also be used by drivers that don't wish to use the
automatic affinity assignment.
[bhelgaas: omit "else" after "return" consistently]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alexander Gordeev <agordeev@redhat.com>
-rw-r--r-- | Documentation/PCI/MSI-HOWTO.txt | 4 | ||||
-rw-r--r-- | drivers/pci/msi.c | 134 | ||||
-rw-r--r-- | include/linux/pci.h | 2 |
3 files changed, 95 insertions, 45 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 0ac612b8c3fb..c55df2911136 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt | |||
@@ -99,6 +99,10 @@ PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support | |||
99 | MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in | 99 | MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in |
100 | case the device does not support legacy interrupt lines. | 100 | case the device does not support legacy interrupt lines. |
101 | 101 | ||
102 | By default this function will spread the interrupts around the available | ||
103 | CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY | ||
104 | flag. | ||
105 | |||
102 | To get the Linux IRQ numbers passed to request_irq() and free_irq() and the | 106 | To get the Linux IRQ numbers passed to request_irq() and free_irq() and the |
103 | vectors, use the following function: | 107 | vectors, use the following function: |
104 | 108 | ||
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5e5ab478ea7d..a02981efdad5 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c | |||
@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | |||
569 | entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; | 569 | entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; |
570 | entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); | 570 | entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); |
571 | entry->nvec_used = nvec; | 571 | entry->nvec_used = nvec; |
572 | entry->affinity = dev->irq_affinity; | ||
572 | 573 | ||
573 | if (control & PCI_MSI_FLAGS_64BIT) | 574 | if (control & PCI_MSI_FLAGS_64BIT) |
574 | entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; | 575 | entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; |
@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) | |||
680 | static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | 681 | static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, |
681 | struct msix_entry *entries, int nvec) | 682 | struct msix_entry *entries, int nvec) |
682 | { | 683 | { |
684 | const struct cpumask *mask = NULL; | ||
683 | struct msi_desc *entry; | 685 | struct msi_desc *entry; |
684 | int i; | 686 | int cpu = -1, i; |
685 | 687 | ||
686 | for (i = 0; i < nvec; i++) { | 688 | for (i = 0; i < nvec; i++) { |
689 | if (dev->irq_affinity) { | ||
690 | cpu = cpumask_next(cpu, dev->irq_affinity); | ||
691 | if (cpu >= nr_cpu_ids) | ||
692 | cpu = cpumask_first(dev->irq_affinity); | ||
693 | mask = cpumask_of(cpu); | ||
694 | } | ||
695 | |||
687 | entry = alloc_msi_entry(&dev->dev); | 696 | entry = alloc_msi_entry(&dev->dev); |
688 | if (!entry) { | 697 | if (!entry) { |
689 | if (!i) | 698 | if (!i) |
@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | |||
703 | entry->msi_attrib.default_irq = dev->irq; | 712 | entry->msi_attrib.default_irq = dev->irq; |
704 | entry->mask_base = base; | 713 | entry->mask_base = base; |
705 | entry->nvec_used = 1; | 714 | entry->nvec_used = 1; |
715 | entry->affinity = mask; | ||
706 | 716 | ||
707 | list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); | 717 | list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); |
708 | } | 718 | } |
@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void) | |||
1028 | } | 1038 | } |
1029 | EXPORT_SYMBOL(pci_msi_enabled); | 1039 | EXPORT_SYMBOL(pci_msi_enabled); |
1030 | 1040 | ||
1031 | /** | 1041 | static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, |
1032 | * pci_enable_msi_range - configure device's MSI capability structure | 1042 | unsigned int flags) |
1033 | * @dev: device to configure | ||
1034 | * @minvec: minimal number of interrupts to configure | ||
1035 | * @maxvec: maximum number of interrupts to configure | ||
1036 | * | ||
1037 | * This function tries to allocate a maximum possible number of interrupts in a | ||
1038 | * range between @minvec and @maxvec. It returns a negative errno if an error | ||
1039 | * occurs. If it succeeds, it returns the actual number of interrupts allocated | ||
1040 | * and updates the @dev's irq member to the lowest new interrupt number; | ||
1041 | * the other interrupt numbers allocated to this device are consecutive. | ||
1042 | **/ | ||
1043 | int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) | ||
1044 | { | 1043 | { |
1045 | int nvec; | 1044 | int nvec; |
1046 | int rc; | 1045 | int rc; |
@@ -1063,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) | |||
1063 | nvec = pci_msi_vec_count(dev); | 1062 | nvec = pci_msi_vec_count(dev); |
1064 | if (nvec < 0) | 1063 | if (nvec < 0) |
1065 | return nvec; | 1064 | return nvec; |
1066 | else if (nvec < minvec) | 1065 | if (nvec < minvec) |
1067 | return -EINVAL; | 1066 | return -EINVAL; |
1068 | else if (nvec > maxvec) | 1067 | |
1068 | if (nvec > maxvec) | ||
1069 | nvec = maxvec; | 1069 | nvec = maxvec; |
1070 | 1070 | ||
1071 | do { | 1071 | for (;;) { |
1072 | if (!(flags & PCI_IRQ_NOAFFINITY)) { | ||
1073 | dev->irq_affinity = irq_create_affinity_mask(&nvec); | ||
1074 | if (nvec < minvec) | ||
1075 | return -ENOSPC; | ||
1076 | } | ||
1077 | |||
1072 | rc = msi_capability_init(dev, nvec); | 1078 | rc = msi_capability_init(dev, nvec); |
1073 | if (rc < 0) { | 1079 | if (rc == 0) |
1080 | return nvec; | ||
1081 | |||
1082 | kfree(dev->irq_affinity); | ||
1083 | dev->irq_affinity = NULL; | ||
1084 | |||
1085 | if (rc < 0) | ||
1074 | return rc; | 1086 | return rc; |
1075 | } else if (rc > 0) { | 1087 | if (rc < minvec) |
1076 | if (rc < minvec) | 1088 | return -ENOSPC; |
1089 | |||
1090 | nvec = rc; | ||
1091 | } | ||
1092 | } | ||
1093 | |||
1094 | /** | ||
1095 | * pci_enable_msi_range - configure device's MSI capability structure | ||
1096 | * @dev: device to configure | ||
1097 | * @minvec: minimal number of interrupts to configure | ||
1098 | * @maxvec: maximum number of interrupts to configure | ||
1099 | * | ||
1100 | * This function tries to allocate a maximum possible number of interrupts in a | ||
1101 | * range between @minvec and @maxvec. It returns a negative errno if an error | ||
1102 | * occurs. If it succeeds, it returns the actual number of interrupts allocated | ||
1103 | * and updates the @dev's irq member to the lowest new interrupt number; | ||
1104 | * the other interrupt numbers allocated to this device are consecutive. | ||
1105 | **/ | ||
1106 | int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) | ||
1107 | { | ||
1108 | return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY); | ||
1109 | } | ||
1110 | EXPORT_SYMBOL(pci_enable_msi_range); | ||
1111 | |||
1112 | static int __pci_enable_msix_range(struct pci_dev *dev, | ||
1113 | struct msix_entry *entries, int minvec, int maxvec, | ||
1114 | unsigned int flags) | ||
1115 | { | ||
1116 | int nvec = maxvec; | ||
1117 | int rc; | ||
1118 | |||
1119 | if (maxvec < minvec) | ||
1120 | return -ERANGE; | ||
1121 | |||
1122 | for (;;) { | ||
1123 | if (!(flags & PCI_IRQ_NOAFFINITY)) { | ||
1124 | dev->irq_affinity = irq_create_affinity_mask(&nvec); | ||
1125 | if (nvec < minvec) | ||
1077 | return -ENOSPC; | 1126 | return -ENOSPC; |
1078 | nvec = rc; | ||
1079 | } | 1127 | } |
1080 | } while (rc); | ||
1081 | 1128 | ||
1082 | return nvec; | 1129 | rc = pci_enable_msix(dev, entries, nvec); |
1130 | if (rc == 0) | ||
1131 | return nvec; | ||
1132 | |||
1133 | kfree(dev->irq_affinity); | ||
1134 | dev->irq_affinity = NULL; | ||
1135 | |||
1136 | if (rc < 0) | ||
1137 | return rc; | ||
1138 | if (rc < minvec) | ||
1139 | return -ENOSPC; | ||
1140 | |||
1141 | nvec = rc; | ||
1142 | } | ||
1083 | } | 1143 | } |
1084 | EXPORT_SYMBOL(pci_enable_msi_range); | ||
1085 | 1144 | ||
1086 | /** | 1145 | /** |
1087 | * pci_enable_msix_range - configure device's MSI-X capability structure | 1146 | * pci_enable_msix_range - configure device's MSI-X capability structure |
@@ -1099,26 +1158,10 @@ EXPORT_SYMBOL(pci_enable_msi_range); | |||
1099 | * with new allocated MSI-X interrupts. | 1158 | * with new allocated MSI-X interrupts. |
1100 | **/ | 1159 | **/ |
1101 | int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, | 1160 | int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, |
1102 | int minvec, int maxvec) | 1161 | int minvec, int maxvec) |
1103 | { | 1162 | { |
1104 | int nvec = maxvec; | 1163 | return __pci_enable_msix_range(dev, entries, minvec, maxvec, |
1105 | int rc; | 1164 | PCI_IRQ_NOAFFINITY); |
1106 | |||
1107 | if (maxvec < minvec) | ||
1108 | return -ERANGE; | ||
1109 | |||
1110 | do { | ||
1111 | rc = pci_enable_msix(dev, entries, nvec); | ||
1112 | if (rc < 0) { | ||
1113 | return rc; | ||
1114 | } else if (rc > 0) { | ||
1115 | if (rc < minvec) | ||
1116 | return -ENOSPC; | ||
1117 | nvec = rc; | ||
1118 | } | ||
1119 | } while (rc); | ||
1120 | |||
1121 | return nvec; | ||
1122 | } | 1165 | } |
1123 | EXPORT_SYMBOL(pci_enable_msix_range); | 1166 | EXPORT_SYMBOL(pci_enable_msix_range); |
1124 | 1167 | ||
@@ -1145,13 +1188,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, | |||
1145 | int vecs = -ENOSPC; | 1188 | int vecs = -ENOSPC; |
1146 | 1189 | ||
1147 | if (!(flags & PCI_IRQ_NOMSIX)) { | 1190 | if (!(flags & PCI_IRQ_NOMSIX)) { |
1148 | vecs = pci_enable_msix_range(dev, NULL, min_vecs, max_vecs); | 1191 | vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, |
1192 | flags); | ||
1149 | if (vecs > 0) | 1193 | if (vecs > 0) |
1150 | return vecs; | 1194 | return vecs; |
1151 | } | 1195 | } |
1152 | 1196 | ||
1153 | if (!(flags & PCI_IRQ_NOMSI)) { | 1197 | if (!(flags & PCI_IRQ_NOMSI)) { |
1154 | vecs = pci_enable_msi_range(dev, min_vecs, max_vecs); | 1198 | vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags); |
1155 | if (vecs > 0) | 1199 | if (vecs > 0) |
1156 | return vecs; | 1200 | return vecs; |
1157 | } | 1201 | } |
diff --git a/include/linux/pci.h b/include/linux/pci.h index 52ecd49e8049..f1406619f868 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h | |||
@@ -320,6 +320,7 @@ struct pci_dev { | |||
320 | * directly, use the values stored here. They might be different! | 320 | * directly, use the values stored here. They might be different! |
321 | */ | 321 | */ |
322 | unsigned int irq; | 322 | unsigned int irq; |
323 | struct cpumask *irq_affinity; | ||
323 | struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ | 324 | struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ |
324 | 325 | ||
325 | bool match_driver; /* Skip attaching driver */ | 326 | bool match_driver; /* Skip attaching driver */ |
@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, | |||
1240 | #define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ | 1241 | #define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ |
1241 | #define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ | 1242 | #define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ |
1242 | #define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ | 1243 | #define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ |
1244 | #define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */ | ||
1243 | 1245 | ||
1244 | /* kmem_cache style wrapper around pci_alloc_consistent() */ | 1246 | /* kmem_cache style wrapper around pci_alloc_consistent() */ |
1245 | 1247 | ||