summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2016-09-15 14:54:40 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-09-15 14:54:40 -0400
commit0a30d69195604f136a4e3bfaf453f742e583ce95 (patch)
tree6589250e91787090ac98b0efff1ae7d8022b4594
parent16217dc79dbc599b110dda26d0421df47904bba4 (diff)
parentee8d41e53efe14bfc5ea5866e1178b06d78a7c95 (diff)
Merge branch 'irq/for-block' into irq/core
Add the new irq spreading infrastructure.
-rw-r--r--drivers/base/platform-msi.c3
-rw-r--r--drivers/pci/msi.c161
-rw-r--r--drivers/staging/fsl-mc/bus/mc-msi.c3
-rw-r--r--include/linux/interrupt.h14
-rw-r--r--include/linux/msi.h5
-rw-r--r--include/linux/pci.h6
-rw-r--r--kernel/irq/affinity.c167
-rw-r--r--kernel/irq/irqdesc.c31
-rw-r--r--kernel/irq/msi.c26
9 files changed, 293 insertions, 123 deletions
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 279e53989374..be6a599bc0c1 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -142,13 +142,12 @@ static int platform_msi_alloc_descs_with_irq(struct device *dev, int virq,
142 } 142 }
143 143
144 for (i = 0; i < nvec; i++) { 144 for (i = 0; i < nvec; i++) {
145 desc = alloc_msi_entry(dev); 145 desc = alloc_msi_entry(dev, 1, NULL);
146 if (!desc) 146 if (!desc)
147 break; 147 break;
148 148
149 desc->platform.msi_priv_data = data; 149 desc->platform.msi_priv_data = data;
150 desc->platform.msi_index = base + i; 150 desc->platform.msi_index = base + i;
151 desc->nvec_used = 1;
152 desc->irq = virq ? virq + i : 0; 151 desc->irq = virq ? virq + i : 0;
153 152
154 list_add_tail(&desc->list, dev_to_msi_list(dev)); 153 list_add_tail(&desc->list, dev_to_msi_list(dev));
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 137b4c5fb638..bfdd0744b686 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -550,15 +550,23 @@ error_attrs:
550 return ret; 550 return ret;
551} 551}
552 552
553static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) 553static struct msi_desc *
554msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
554{ 555{
555 u16 control; 556 struct cpumask *masks = NULL;
556 struct msi_desc *entry; 557 struct msi_desc *entry;
558 u16 control;
559
560 if (affinity) {
561 masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
562 if (!masks)
563 pr_err("Unable to allocate affinity masks, ignoring\n");
564 }
557 565
558 /* MSI Entry Initialization */ 566 /* MSI Entry Initialization */
559 entry = alloc_msi_entry(&dev->dev); 567 entry = alloc_msi_entry(&dev->dev, nvec, masks);
560 if (!entry) 568 if (!entry)
561 return NULL; 569 goto out;
562 570
563 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); 571 pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
564 572
@@ -569,8 +577,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
569 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ 577 entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
570 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; 578 entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
571 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); 579 entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
572 entry->nvec_used = nvec;
573 entry->affinity = dev->irq_affinity;
574 580
575 if (control & PCI_MSI_FLAGS_64BIT) 581 if (control & PCI_MSI_FLAGS_64BIT)
576 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; 582 entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -581,6 +587,8 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
581 if (entry->msi_attrib.maskbit) 587 if (entry->msi_attrib.maskbit)
582 pci_read_config_dword(dev, entry->mask_pos, &entry->masked); 588 pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
583 589
590out:
591 kfree(masks);
584 return entry; 592 return entry;
585} 593}
586 594
@@ -609,7 +617,7 @@ static int msi_verify_entries(struct pci_dev *dev)
609 * an error, and a positive return value indicates the number of interrupts 617 * an error, and a positive return value indicates the number of interrupts
610 * which could have been allocated. 618 * which could have been allocated.
611 */ 619 */
612static int msi_capability_init(struct pci_dev *dev, int nvec) 620static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
613{ 621{
614 struct msi_desc *entry; 622 struct msi_desc *entry;
615 int ret; 623 int ret;
@@ -617,7 +625,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
617 625
618 pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ 626 pci_msi_set_enable(dev, 0); /* Disable MSI during set up */
619 627
620 entry = msi_setup_entry(dev, nvec); 628 entry = msi_setup_entry(dev, nvec, affinity);
621 if (!entry) 629 if (!entry)
622 return -ENOMEM; 630 return -ENOMEM;
623 631
@@ -680,28 +688,29 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
680} 688}
681 689
682static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, 690static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
683 struct msix_entry *entries, int nvec) 691 struct msix_entry *entries, int nvec,
692 bool affinity)
684{ 693{
685 const struct cpumask *mask = NULL; 694 struct cpumask *curmsk, *masks = NULL;
686 struct msi_desc *entry; 695 struct msi_desc *entry;
687 int cpu = -1, i; 696 int ret, i;
688 697
689 for (i = 0; i < nvec; i++) { 698 if (affinity) {
690 if (dev->irq_affinity) { 699 masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
691 cpu = cpumask_next(cpu, dev->irq_affinity); 700 if (!masks)
692 if (cpu >= nr_cpu_ids) 701 pr_err("Unable to allocate affinity masks, ignoring\n");
693 cpu = cpumask_first(dev->irq_affinity); 702 }
694 mask = cpumask_of(cpu);
695 }
696 703
697 entry = alloc_msi_entry(&dev->dev); 704 for (i = 0, curmsk = masks; i < nvec; i++) {
705 entry = alloc_msi_entry(&dev->dev, 1, curmsk);
698 if (!entry) { 706 if (!entry) {
699 if (!i) 707 if (!i)
700 iounmap(base); 708 iounmap(base);
701 else 709 else
702 free_msi_irqs(dev); 710 free_msi_irqs(dev);
703 /* No enough memory. Don't try again */ 711 /* No enough memory. Don't try again */
704 return -ENOMEM; 712 ret = -ENOMEM;
713 goto out;
705 } 714 }
706 715
707 entry->msi_attrib.is_msix = 1; 716 entry->msi_attrib.is_msix = 1;
@@ -712,12 +721,14 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
712 entry->msi_attrib.entry_nr = i; 721 entry->msi_attrib.entry_nr = i;
713 entry->msi_attrib.default_irq = dev->irq; 722 entry->msi_attrib.default_irq = dev->irq;
714 entry->mask_base = base; 723 entry->mask_base = base;
715 entry->nvec_used = 1;
716 entry->affinity = mask;
717 724
718 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); 725 list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
726 if (masks)
727 curmsk++;
719 } 728 }
720 729 ret = 0;
730out:
731 kfree(masks);
721 return 0; 732 return 0;
722} 733}
723 734
@@ -746,8 +757,8 @@ static void msix_program_entries(struct pci_dev *dev,
746 * single MSI-X irq. A return of zero indicates the successful setup of 757 * single MSI-X irq. A return of zero indicates the successful setup of
747 * requested MSI-X entries with allocated irqs or non-zero for otherwise. 758 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
748 **/ 759 **/
749static int msix_capability_init(struct pci_dev *dev, 760static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
750 struct msix_entry *entries, int nvec) 761 int nvec, bool affinity)
751{ 762{
752 int ret; 763 int ret;
753 u16 control; 764 u16 control;
@@ -762,7 +773,7 @@ static int msix_capability_init(struct pci_dev *dev,
762 if (!base) 773 if (!base)
763 return -ENOMEM; 774 return -ENOMEM;
764 775
765 ret = msix_setup_entries(dev, base, entries, nvec); 776 ret = msix_setup_entries(dev, base, entries, nvec, affinity);
766 if (ret) 777 if (ret)
767 return ret; 778 return ret;
768 779
@@ -942,22 +953,8 @@ int pci_msix_vec_count(struct pci_dev *dev)
942} 953}
943EXPORT_SYMBOL(pci_msix_vec_count); 954EXPORT_SYMBOL(pci_msix_vec_count);
944 955
945/** 956static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
946 * pci_enable_msix - configure device's MSI-X capability structure 957 int nvec, bool affinity)
947 * @dev: pointer to the pci_dev data structure of MSI-X device function
948 * @entries: pointer to an array of MSI-X entries (optional)
949 * @nvec: number of MSI-X irqs requested for allocation by device driver
950 *
951 * Setup the MSI-X capability structure of device function with the number
952 * of requested irqs upon its software driver call to request for
953 * MSI-X mode enabled on its hardware device function. A return of zero
954 * indicates the successful configuration of MSI-X capability structure
955 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
956 * Or a return of > 0 indicates that driver request is exceeding the number
957 * of irqs or MSI-X vectors available. Driver should use the returned value to
958 * re-send its request.
959 **/
960int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
961{ 958{
962 int nr_entries; 959 int nr_entries;
963 int i, j; 960 int i, j;
@@ -989,7 +986,27 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
989 dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); 986 dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
990 return -EINVAL; 987 return -EINVAL;
991 } 988 }
992 return msix_capability_init(dev, entries, nvec); 989 return msix_capability_init(dev, entries, nvec, affinity);
990}
991
992/**
993 * pci_enable_msix - configure device's MSI-X capability structure
994 * @dev: pointer to the pci_dev data structure of MSI-X device function
995 * @entries: pointer to an array of MSI-X entries (optional)
996 * @nvec: number of MSI-X irqs requested for allocation by device driver
997 *
998 * Setup the MSI-X capability structure of device function with the number
999 * of requested irqs upon its software driver call to request for
1000 * MSI-X mode enabled on its hardware device function. A return of zero
1001 * indicates the successful configuration of MSI-X capability structure
1002 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
1003 * Or a return of > 0 indicates that driver request is exceeding the number
1004 * of irqs or MSI-X vectors available. Driver should use the returned value to
1005 * re-send its request.
1006 **/
1007int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
1008{
1009 return __pci_enable_msix(dev, entries, nvec, false);
993} 1010}
994EXPORT_SYMBOL(pci_enable_msix); 1011EXPORT_SYMBOL(pci_enable_msix);
995 1012
@@ -1042,6 +1059,7 @@ EXPORT_SYMBOL(pci_msi_enabled);
1042static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, 1059static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
1043 unsigned int flags) 1060 unsigned int flags)
1044{ 1061{
1062 bool affinity = flags & PCI_IRQ_AFFINITY;
1045 int nvec; 1063 int nvec;
1046 int rc; 1064 int rc;
1047 1065
@@ -1070,19 +1088,17 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
1070 nvec = maxvec; 1088 nvec = maxvec;
1071 1089
1072 for (;;) { 1090 for (;;) {
1073 if (flags & PCI_IRQ_AFFINITY) { 1091 if (affinity) {
1074 dev->irq_affinity = irq_create_affinity_mask(&nvec); 1092 nvec = irq_calc_affinity_vectors(dev->irq_affinity,
1093 nvec);
1075 if (nvec < minvec) 1094 if (nvec < minvec)
1076 return -ENOSPC; 1095 return -ENOSPC;
1077 } 1096 }
1078 1097
1079 rc = msi_capability_init(dev, nvec); 1098 rc = msi_capability_init(dev, nvec, affinity);
1080 if (rc == 0) 1099 if (rc == 0)
1081 return nvec; 1100 return nvec;
1082 1101
1083 kfree(dev->irq_affinity);
1084 dev->irq_affinity = NULL;
1085
1086 if (rc < 0) 1102 if (rc < 0)
1087 return rc; 1103 return rc;
1088 if (rc < minvec) 1104 if (rc < minvec)
@@ -1114,26 +1130,24 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
1114 struct msix_entry *entries, int minvec, int maxvec, 1130 struct msix_entry *entries, int minvec, int maxvec,
1115 unsigned int flags) 1131 unsigned int flags)
1116{ 1132{
1117 int nvec = maxvec; 1133 bool affinity = flags & PCI_IRQ_AFFINITY;
1118 int rc; 1134 int rc, nvec = maxvec;
1119 1135
1120 if (maxvec < minvec) 1136 if (maxvec < minvec)
1121 return -ERANGE; 1137 return -ERANGE;
1122 1138
1123 for (;;) { 1139 for (;;) {
1124 if (flags & PCI_IRQ_AFFINITY) { 1140 if (affinity) {
1125 dev->irq_affinity = irq_create_affinity_mask(&nvec); 1141 nvec = irq_calc_affinity_vectors(dev->irq_affinity,
1142 nvec);
1126 if (nvec < minvec) 1143 if (nvec < minvec)
1127 return -ENOSPC; 1144 return -ENOSPC;
1128 } 1145 }
1129 1146
1130 rc = pci_enable_msix(dev, entries, nvec); 1147 rc = __pci_enable_msix(dev, entries, nvec, affinity);
1131 if (rc == 0) 1148 if (rc == 0)
1132 return nvec; 1149 return nvec;
1133 1150
1134 kfree(dev->irq_affinity);
1135 dev->irq_affinity = NULL;
1136
1137 if (rc < 0) 1151 if (rc < 0)
1138 return rc; 1152 return rc;
1139 if (rc < minvec) 1153 if (rc < minvec)
@@ -1257,6 +1271,37 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1257} 1271}
1258EXPORT_SYMBOL(pci_irq_vector); 1272EXPORT_SYMBOL(pci_irq_vector);
1259 1273
1274/**
1275 * pci_irq_get_affinity - return the affinity of a particular msi vector
1276 * @dev: PCI device to operate on
1277 * @nr: device-relative interrupt vector index (0-based).
1278 */
1279const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
1280{
1281 if (dev->msix_enabled) {
1282 struct msi_desc *entry;
1283 int i = 0;
1284
1285 for_each_pci_msi_entry(entry, dev) {
1286 if (i == nr)
1287 return entry->affinity;
1288 i++;
1289 }
1290 WARN_ON_ONCE(1);
1291 return NULL;
1292 } else if (dev->msi_enabled) {
1293 struct msi_desc *entry = first_pci_msi_entry(dev);
1294
1295 if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used))
1296 return NULL;
1297
1298 return &entry->affinity[nr];
1299 } else {
1300 return cpu_possible_mask;
1301 }
1302}
1303EXPORT_SYMBOL(pci_irq_get_affinity);
1304
1260struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) 1305struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
1261{ 1306{
1262 return to_pci_dev(desc->dev); 1307 return to_pci_dev(desc->dev);
diff --git a/drivers/staging/fsl-mc/bus/mc-msi.c b/drivers/staging/fsl-mc/bus/mc-msi.c
index c7be156ae5e0..4fd8e41ef468 100644
--- a/drivers/staging/fsl-mc/bus/mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/mc-msi.c
@@ -213,7 +213,7 @@ static int fsl_mc_msi_alloc_descs(struct device *dev, unsigned int irq_count)
213 struct msi_desc *msi_desc; 213 struct msi_desc *msi_desc;
214 214
215 for (i = 0; i < irq_count; i++) { 215 for (i = 0; i < irq_count; i++) {
216 msi_desc = alloc_msi_entry(dev); 216 msi_desc = alloc_msi_entry(dev, 1, NULL);
217 if (!msi_desc) { 217 if (!msi_desc) {
218 dev_err(dev, "Failed to allocate msi entry\n"); 218 dev_err(dev, "Failed to allocate msi entry\n");
219 error = -ENOMEM; 219 error = -ENOMEM;
@@ -221,7 +221,6 @@ static int fsl_mc_msi_alloc_descs(struct device *dev, unsigned int irq_count)
221 } 221 }
222 222
223 msi_desc->fsl_mc.msi_index = i; 223 msi_desc->fsl_mc.msi_index = i;
224 msi_desc->nvec_used = 1;
225 INIT_LIST_HEAD(&msi_desc->list); 224 INIT_LIST_HEAD(&msi_desc->list);
226 list_add_tail(&msi_desc->list, dev_to_msi_list(dev)); 225 list_add_tail(&msi_desc->list, dev_to_msi_list(dev));
227 } 226 }
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b6683f0ffc9f..72f0721f75e7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -278,7 +278,8 @@ extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
278extern int 278extern int
279irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); 279irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
280 280
281struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs); 281struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
282int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec);
282 283
283#else /* CONFIG_SMP */ 284#else /* CONFIG_SMP */
284 285
@@ -311,11 +312,18 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
311 return 0; 312 return 0;
312} 313}
313 314
314static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) 315static inline struct cpumask *
316irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
315{ 317{
316 *nr_vecs = 1;
317 return NULL; 318 return NULL;
318} 319}
320
321static inline int
322irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
323{
324 return maxvec;
325}
326
319#endif /* CONFIG_SMP */ 327#endif /* CONFIG_SMP */
320 328
321/* 329/*
diff --git a/include/linux/msi.h b/include/linux/msi.h
index e8c81fbd5f9c..0db320b7bb15 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -68,7 +68,7 @@ struct msi_desc {
68 unsigned int nvec_used; 68 unsigned int nvec_used;
69 struct device *dev; 69 struct device *dev;
70 struct msi_msg msg; 70 struct msi_msg msg;
71 const struct cpumask *affinity; 71 struct cpumask *affinity;
72 72
73 union { 73 union {
74 /* PCI MSI/X specific data */ 74 /* PCI MSI/X specific data */
@@ -123,7 +123,8 @@ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc)
123} 123}
124#endif /* CONFIG_PCI_MSI */ 124#endif /* CONFIG_PCI_MSI */
125 125
126struct msi_desc *alloc_msi_entry(struct device *dev); 126struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
127 const struct cpumask *affinity);
127void free_msi_entry(struct msi_desc *entry); 128void free_msi_entry(struct msi_desc *entry);
128void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); 129void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
129void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); 130void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 0ab835965669..3b0a8004f313 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1300,6 +1300,7 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
1300 unsigned int max_vecs, unsigned int flags); 1300 unsigned int max_vecs, unsigned int flags);
1301void pci_free_irq_vectors(struct pci_dev *dev); 1301void pci_free_irq_vectors(struct pci_dev *dev);
1302int pci_irq_vector(struct pci_dev *dev, unsigned int nr); 1302int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
1303const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
1303 1304
1304#else 1305#else
1305static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } 1306static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1342,6 +1343,11 @@ static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
1342 return -EINVAL; 1343 return -EINVAL;
1343 return dev->irq; 1344 return dev->irq;
1344} 1345}
1346static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
1347 int vec)
1348{
1349 return cpu_possible_mask;
1350}
1345#endif 1351#endif
1346 1352
1347#ifdef CONFIG_PCIEPORTBUS 1353#ifdef CONFIG_PCIEPORTBUS
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 32f6cfcff212..17f51d63da56 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -4,60 +4,151 @@
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/cpu.h> 5#include <linux/cpu.h>
6 6
7static int get_first_sibling(unsigned int cpu) 7static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
8 int cpus_per_vec)
8{ 9{
9 unsigned int ret; 10 const struct cpumask *siblmsk;
11 int cpu, sibl;
10 12
11 ret = cpumask_first(topology_sibling_cpumask(cpu)); 13 for ( ; cpus_per_vec > 0; ) {
12 if (ret < nr_cpu_ids) 14 cpu = cpumask_first(nmsk);
13 return ret; 15
14 return cpu; 16 /* Should not happen, but I'm too lazy to think about it */
17 if (cpu >= nr_cpu_ids)
18 return;
19
20 cpumask_clear_cpu(cpu, nmsk);
21 cpumask_set_cpu(cpu, irqmsk);
22 cpus_per_vec--;
23
24 /* If the cpu has siblings, use them first */
25 siblmsk = topology_sibling_cpumask(cpu);
26 for (sibl = -1; cpus_per_vec > 0; ) {
27 sibl = cpumask_next(sibl, siblmsk);
28 if (sibl >= nr_cpu_ids)
29 break;
30 if (!cpumask_test_and_clear_cpu(sibl, nmsk))
31 continue;
32 cpumask_set_cpu(sibl, irqmsk);
33 cpus_per_vec--;
34 }
35 }
36}
37
38static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
39{
40 int n, nodes;
41
42 /* Calculate the number of nodes in the supplied affinity mask */
43 for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
44 if (cpumask_intersects(mask, cpumask_of_node(n))) {
45 node_set(n, *nodemsk);
46 nodes++;
47 }
48 }
49 return nodes;
15} 50}
16 51
17/* 52/**
18 * Take a map of online CPUs and the number of available interrupt vectors 53 * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
19 * and generate an output cpumask suitable for spreading MSI/MSI-X vectors 54 * @affinity: The affinity mask to spread. If NULL cpu_online_mask
20 * so that they are distributed as good as possible around the CPUs. If 55 * is used
21 * more vectors than CPUs are available we'll map one to each CPU, 56 * @nvecs: The number of vectors
22 * otherwise we map one to the first sibling of each socket.
23 * 57 *
24 * If there are more vectors than CPUs we will still only have one bit 58 * Returns the masks pointer or NULL if allocation failed.
25 * set per CPU, but interrupt code will keep on assigning the vectors from
26 * the start of the bitmap until we run out of vectors.
27 */ 59 */
28struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) 60struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
61 int nvec)
29{ 62{
30 struct cpumask *affinity_mask; 63 int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
31 unsigned int max_vecs = *nr_vecs; 64 nodemask_t nodemsk = NODE_MASK_NONE;
65 struct cpumask *masks;
66 cpumask_var_t nmsk;
32 67
33 if (max_vecs == 1) 68 if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
34 return NULL; 69 return NULL;
35 70
36 affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL); 71 masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
37 if (!affinity_mask) { 72 if (!masks)
38 *nr_vecs = 1; 73 goto out;
39 return NULL;
40 }
41 74
75 /* Stabilize the cpumasks */
42 get_online_cpus(); 76 get_online_cpus();
43 if (max_vecs >= num_online_cpus()) { 77 /* If the supplied affinity mask is NULL, use cpu online mask */
44 cpumask_copy(affinity_mask, cpu_online_mask); 78 if (!affinity)
45 *nr_vecs = num_online_cpus(); 79 affinity = cpu_online_mask;
46 } else { 80
47 unsigned int vecs = 0, cpu; 81 nodes = get_nodes_in_cpumask(affinity, &nodemsk);
48
49 for_each_online_cpu(cpu) {
50 if (cpu == get_first_sibling(cpu)) {
51 cpumask_set_cpu(cpu, affinity_mask);
52 vecs++;
53 }
54 82
55 if (--max_vecs == 0) 83 /*
84 * If the number of nodes in the mask is less than or equal the
85 * number of vectors we just spread the vectors across the nodes.
86 */
87 if (nvec <= nodes) {
88 for_each_node_mask(n, nodemsk) {
89 cpumask_copy(masks + curvec, cpumask_of_node(n));
90 if (++curvec == nvec)
56 break; 91 break;
57 } 92 }
58 *nr_vecs = vecs; 93 goto outonl;
59 } 94 }
95
96 /* Spread the vectors per node */
97 vecs_per_node = nvec / nodes;
98 /* Account for rounding errors */
99 extra_vecs = nvec - (nodes * vecs_per_node);
100
101 for_each_node_mask(n, nodemsk) {
102 int ncpus, v, vecs_to_assign = vecs_per_node;
103
104 /* Get the cpus on this node which are in the mask */
105 cpumask_and(nmsk, affinity, cpumask_of_node(n));
106
107 /* Calculate the number of cpus per vector */
108 ncpus = cpumask_weight(nmsk);
109
110 for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
111 cpus_per_vec = ncpus / vecs_to_assign;
112
113 /* Account for extra vectors to compensate rounding errors */
114 if (extra_vecs) {
115 cpus_per_vec++;
116 if (!--extra_vecs)
117 vecs_per_node++;
118 }
119 irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
120 }
121
122 if (curvec >= nvec)
123 break;
124 }
125
126outonl:
60 put_online_cpus(); 127 put_online_cpus();
128out:
129 free_cpumask_var(nmsk);
130 return masks;
131}
132
133/**
134 * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
135 * @affinity: The affinity mask to spread. If NULL cpu_online_mask
136 * is used
137 * @maxvec: The maximum number of vectors available
138 */
139int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
140{
141 int cpus, ret;
61 142
62 return affinity_mask; 143 /* Stabilize the cpumasks */
144 get_online_cpus();
145 /* If the supplied affinity mask is NULL, use cpu online mask */
146 if (!affinity)
147 affinity = cpu_online_mask;
148
149 cpus = cpumask_weight(affinity);
150 ret = (cpus < maxvec) ? cpus : maxvec;
151
152 put_online_cpus();
153 return ret;
63} 154}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 93b51727abaa..00bb0aeea1d0 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -424,25 +424,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
424 const struct cpumask *mask = NULL; 424 const struct cpumask *mask = NULL;
425 struct irq_desc *desc; 425 struct irq_desc *desc;
426 unsigned int flags; 426 unsigned int flags;
427 int i, cpu = -1; 427 int i;
428 428
429 if (affinity && cpumask_empty(affinity)) 429 /* Validate affinity mask(s) */
430 return -EINVAL; 430 if (affinity) {
431 for (i = 0, mask = affinity; i < cnt; i++, mask++) {
432 if (cpumask_empty(mask))
433 return -EINVAL;
434 }
435 }
431 436
432 flags = affinity ? IRQD_AFFINITY_MANAGED : 0; 437 flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
438 mask = NULL;
433 439
434 for (i = 0; i < cnt; i++) { 440 for (i = 0; i < cnt; i++) {
435 if (affinity) { 441 if (affinity) {
436 cpu = cpumask_next(cpu, affinity); 442 node = cpu_to_node(cpumask_first(affinity));
437 if (cpu >= nr_cpu_ids) 443 mask = affinity;
438 cpu = cpumask_first(affinity); 444 affinity++;
439 node = cpu_to_node(cpu);
440
441 /*
442 * For single allocations we use the caller provided
443 * mask otherwise we use the mask of the target cpu
444 */
445 mask = cnt == 1 ? affinity : cpumask_of(cpu);
446 } 445 }
447 desc = alloc_desc(start + i, node, flags, mask, owner); 446 desc = alloc_desc(start + i, node, flags, mask, owner);
448 if (!desc) 447 if (!desc)
@@ -670,9 +669,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
670 * @cnt: Number of consecutive irqs to allocate. 669 * @cnt: Number of consecutive irqs to allocate.
671 * @node: Preferred node on which the irq descriptor should be allocated 670 * @node: Preferred node on which the irq descriptor should be allocated
672 * @owner: Owning module (can be NULL) 671 * @owner: Owning module (can be NULL)
673 * @affinity: Optional pointer to an affinity mask which hints where the 672 * @affinity: Optional pointer to an affinity mask array of size @cnt which
674 * irq descriptors should be allocated and which default 673 * hints where the irq descriptors should be allocated and which
675 * affinities to use 674 * default affinities to use
676 * 675 *
677 * Returns the first irq number or error code 676 * Returns the first irq number or error code
678 */ 677 */
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 19e9dfbe97fa..8a3e872798f3 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -18,20 +18,42 @@
18/* Temparory solution for building, will be removed later */ 18/* Temparory solution for building, will be removed later */
19#include <linux/pci.h> 19#include <linux/pci.h>
20 20
21struct msi_desc *alloc_msi_entry(struct device *dev) 21/**
22 * alloc_msi_entry - Allocate an initialize msi_entry
23 * @dev: Pointer to the device for which this is allocated
24 * @nvec: The number of vectors used in this entry
25 * @affinity: Optional pointer to an affinity mask array size of @nvec
26 *
27 * If @affinity is not NULL then a an affinity array[@nvec] is allocated
28 * and the affinity masks from @affinity are copied.
29 */
30struct msi_desc *
31alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
22{ 32{
23 struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); 33 struct msi_desc *desc;
34
35 desc = kzalloc(sizeof(*desc), GFP_KERNEL);
24 if (!desc) 36 if (!desc)
25 return NULL; 37 return NULL;
26 38
27 INIT_LIST_HEAD(&desc->list); 39 INIT_LIST_HEAD(&desc->list);
28 desc->dev = dev; 40 desc->dev = dev;
41 desc->nvec_used = nvec;
42 if (affinity) {
43 desc->affinity = kmemdup(affinity,
44 nvec * sizeof(*desc->affinity), GFP_KERNEL);
45 if (!desc->affinity) {
46 kfree(desc);
47 return NULL;
48 }
49 }
29 50
30 return desc; 51 return desc;
31} 52}
32 53
33void free_msi_entry(struct msi_desc *entry) 54void free_msi_entry(struct msi_desc *entry)
34{ 55{
56 kfree(entry->affinity);
35 kfree(entry); 57 kfree(entry);
36} 58}
37 59