diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2016-09-14 10:18:49 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-09-14 16:11:09 -0400 |
commit | e75eafb9b0395c338230b0eef2cc92ca8d20dee2 (patch) | |
tree | 1bb64d6fbd2a82db976c8fe145a49d1e90f9e2aa | |
parent | 34c3d9819fda464be4f1bec59b63353814f76c73 (diff) |
genirq/msi: Switch to new irq spreading infrastructure
Switch MSI over to the new spreading code. If a pci device contains a valid
pointer to a cpumask, then this mask is used for spreading otherwise the
online cpu mask is used. This allows a driver to restrict the spread to a
subset of CPUs, e.g. cpus on a particular node.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: axboe@fb.com
Cc: keith.busch@intel.com
Cc: agordeev@redhat.com
Cc: linux-block@vger.kernel.org
Link: http://lkml.kernel.org/r/1473862739-15032-4-git-send-email-hch@lst.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | drivers/pci/msi.c | 128 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 31 |
2 files changed, 87 insertions, 72 deletions
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 0db72ba24003..06100dde0e86 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c | |||
@@ -549,15 +549,23 @@ error_attrs: | |||
549 | return ret; | 549 | return ret; |
550 | } | 550 | } |
551 | 551 | ||
552 | static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | 552 | static struct msi_desc * |
553 | msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity) | ||
553 | { | 554 | { |
554 | u16 control; | 555 | struct cpumask *masks = NULL; |
555 | struct msi_desc *entry; | 556 | struct msi_desc *entry; |
557 | u16 control; | ||
558 | |||
559 | if (affinity) { | ||
560 | masks = irq_create_affinity_masks(dev->irq_affinity, nvec); | ||
561 | if (!masks) | ||
562 | pr_err("Unable to allocate affinity masks, ignoring\n"); | ||
563 | } | ||
556 | 564 | ||
557 | /* MSI Entry Initialization */ | 565 | /* MSI Entry Initialization */ |
558 | entry = alloc_msi_entry(&dev->dev, nvec, NULL); | 566 | entry = alloc_msi_entry(&dev->dev, nvec, masks); |
559 | if (!entry) | 567 | if (!entry) |
560 | return NULL; | 568 | goto out; |
561 | 569 | ||
562 | pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); | 570 | pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); |
563 | 571 | ||
@@ -568,7 +576,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | |||
568 | entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ | 576 | entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */ |
569 | entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; | 577 | entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; |
570 | entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); | 578 | entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); |
571 | entry->affinity = dev->irq_affinity; | ||
572 | 579 | ||
573 | if (control & PCI_MSI_FLAGS_64BIT) | 580 | if (control & PCI_MSI_FLAGS_64BIT) |
574 | entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; | 581 | entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; |
@@ -579,6 +586,8 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) | |||
579 | if (entry->msi_attrib.maskbit) | 586 | if (entry->msi_attrib.maskbit) |
580 | pci_read_config_dword(dev, entry->mask_pos, &entry->masked); | 587 | pci_read_config_dword(dev, entry->mask_pos, &entry->masked); |
581 | 588 | ||
589 | out: | ||
590 | kfree(masks); | ||
582 | return entry; | 591 | return entry; |
583 | } | 592 | } |
584 | 593 | ||
@@ -607,7 +616,7 @@ static int msi_verify_entries(struct pci_dev *dev) | |||
607 | * an error, and a positive return value indicates the number of interrupts | 616 | * an error, and a positive return value indicates the number of interrupts |
608 | * which could have been allocated. | 617 | * which could have been allocated. |
609 | */ | 618 | */ |
610 | static int msi_capability_init(struct pci_dev *dev, int nvec) | 619 | static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity) |
611 | { | 620 | { |
612 | struct msi_desc *entry; | 621 | struct msi_desc *entry; |
613 | int ret; | 622 | int ret; |
@@ -615,7 +624,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) | |||
615 | 624 | ||
616 | pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ | 625 | pci_msi_set_enable(dev, 0); /* Disable MSI during set up */ |
617 | 626 | ||
618 | entry = msi_setup_entry(dev, nvec); | 627 | entry = msi_setup_entry(dev, nvec, affinity); |
619 | if (!entry) | 628 | if (!entry) |
620 | return -ENOMEM; | 629 | return -ENOMEM; |
621 | 630 | ||
@@ -678,28 +687,29 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) | |||
678 | } | 687 | } |
679 | 688 | ||
680 | static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | 689 | static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, |
681 | struct msix_entry *entries, int nvec) | 690 | struct msix_entry *entries, int nvec, |
691 | bool affinity) | ||
682 | { | 692 | { |
683 | const struct cpumask *mask = NULL; | 693 | struct cpumask *curmsk, *masks = NULL; |
684 | struct msi_desc *entry; | 694 | struct msi_desc *entry; |
685 | int cpu = -1, i; | 695 | int ret, i; |
686 | |||
687 | for (i = 0; i < nvec; i++) { | ||
688 | if (dev->irq_affinity) { | ||
689 | cpu = cpumask_next(cpu, dev->irq_affinity); | ||
690 | if (cpu >= nr_cpu_ids) | ||
691 | cpu = cpumask_first(dev->irq_affinity); | ||
692 | mask = cpumask_of(cpu); | ||
693 | } | ||
694 | 696 | ||
695 | entry = alloc_msi_entry(&dev->dev, 1, NULL); | 697 | if (affinity) { |
698 | masks = irq_create_affinity_masks(dev->irq_affinity, nvec); | ||
699 | if (!masks) | ||
700 | pr_err("Unable to allocate affinity masks, ignoring\n"); | ||
701 | } | ||
702 | |||
703 | for (i = 0, curmsk = masks; i < nvec; i++) { | ||
704 | entry = alloc_msi_entry(&dev->dev, 1, curmsk); | ||
696 | if (!entry) { | 705 | if (!entry) { |
697 | if (!i) | 706 | if (!i) |
698 | iounmap(base); | 707 | iounmap(base); |
699 | else | 708 | else |
700 | free_msi_irqs(dev); | 709 | free_msi_irqs(dev); |
701 | /* No enough memory. Don't try again */ | 710 | /* No enough memory. Don't try again */ |
702 | return -ENOMEM; | 711 | ret = -ENOMEM; |
712 | goto out; | ||
703 | } | 713 | } |
704 | 714 | ||
705 | entry->msi_attrib.is_msix = 1; | 715 | entry->msi_attrib.is_msix = 1; |
@@ -710,11 +720,14 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, | |||
710 | entry->msi_attrib.entry_nr = i; | 720 | entry->msi_attrib.entry_nr = i; |
711 | entry->msi_attrib.default_irq = dev->irq; | 721 | entry->msi_attrib.default_irq = dev->irq; |
712 | entry->mask_base = base; | 722 | entry->mask_base = base; |
713 | entry->affinity = mask; | ||
714 | 723 | ||
715 | list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); | 724 | list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); |
725 | if (masks) | ||
726 | curmsk++; | ||
716 | } | 727 | } |
717 | 728 | ret = 0; | |
729 | out: | ||
730 | kfree(masks); | ||
718 | return 0; | 731 | return 0; |
719 | } | 732 | } |
720 | 733 | ||
@@ -743,8 +756,8 @@ static void msix_program_entries(struct pci_dev *dev, | |||
743 | * single MSI-X irq. A return of zero indicates the successful setup of | 756 | * single MSI-X irq. A return of zero indicates the successful setup of |
744 | * requested MSI-X entries with allocated irqs or non-zero for otherwise. | 757 | * requested MSI-X entries with allocated irqs or non-zero for otherwise. |
745 | **/ | 758 | **/ |
746 | static int msix_capability_init(struct pci_dev *dev, | 759 | static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, |
747 | struct msix_entry *entries, int nvec) | 760 | int nvec, bool affinity) |
748 | { | 761 | { |
749 | int ret; | 762 | int ret; |
750 | u16 control; | 763 | u16 control; |
@@ -759,7 +772,7 @@ static int msix_capability_init(struct pci_dev *dev, | |||
759 | if (!base) | 772 | if (!base) |
760 | return -ENOMEM; | 773 | return -ENOMEM; |
761 | 774 | ||
762 | ret = msix_setup_entries(dev, base, entries, nvec); | 775 | ret = msix_setup_entries(dev, base, entries, nvec, affinity); |
763 | if (ret) | 776 | if (ret) |
764 | return ret; | 777 | return ret; |
765 | 778 | ||
@@ -939,22 +952,8 @@ int pci_msix_vec_count(struct pci_dev *dev) | |||
939 | } | 952 | } |
940 | EXPORT_SYMBOL(pci_msix_vec_count); | 953 | EXPORT_SYMBOL(pci_msix_vec_count); |
941 | 954 | ||
942 | /** | 955 | static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, |
943 | * pci_enable_msix - configure device's MSI-X capability structure | 956 | int nvec, bool affinity) |
944 | * @dev: pointer to the pci_dev data structure of MSI-X device function | ||
945 | * @entries: pointer to an array of MSI-X entries (optional) | ||
946 | * @nvec: number of MSI-X irqs requested for allocation by device driver | ||
947 | * | ||
948 | * Setup the MSI-X capability structure of device function with the number | ||
949 | * of requested irqs upon its software driver call to request for | ||
950 | * MSI-X mode enabled on its hardware device function. A return of zero | ||
951 | * indicates the successful configuration of MSI-X capability structure | ||
952 | * with new allocated MSI-X irqs. A return of < 0 indicates a failure. | ||
953 | * Or a return of > 0 indicates that driver request is exceeding the number | ||
954 | * of irqs or MSI-X vectors available. Driver should use the returned value to | ||
955 | * re-send its request. | ||
956 | **/ | ||
957 | int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | ||
958 | { | 957 | { |
959 | int nr_entries; | 958 | int nr_entries; |
960 | int i, j; | 959 | int i, j; |
@@ -986,7 +985,27 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | |||
986 | dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); | 985 | dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); |
987 | return -EINVAL; | 986 | return -EINVAL; |
988 | } | 987 | } |
989 | return msix_capability_init(dev, entries, nvec); | 988 | return msix_capability_init(dev, entries, nvec, affinity); |
989 | } | ||
990 | |||
991 | /** | ||
992 | * pci_enable_msix - configure device's MSI-X capability structure | ||
993 | * @dev: pointer to the pci_dev data structure of MSI-X device function | ||
994 | * @entries: pointer to an array of MSI-X entries (optional) | ||
995 | * @nvec: number of MSI-X irqs requested for allocation by device driver | ||
996 | * | ||
997 | * Setup the MSI-X capability structure of device function with the number | ||
998 | * of requested irqs upon its software driver call to request for | ||
999 | * MSI-X mode enabled on its hardware device function. A return of zero | ||
1000 | * indicates the successful configuration of MSI-X capability structure | ||
1001 | * with new allocated MSI-X irqs. A return of < 0 indicates a failure. | ||
1002 | * Or a return of > 0 indicates that driver request is exceeding the number | ||
1003 | * of irqs or MSI-X vectors available. Driver should use the returned value to | ||
1004 | * re-send its request. | ||
1005 | **/ | ||
1006 | int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) | ||
1007 | { | ||
1008 | return __pci_enable_msix(dev, entries, nvec, false); | ||
990 | } | 1009 | } |
991 | EXPORT_SYMBOL(pci_enable_msix); | 1010 | EXPORT_SYMBOL(pci_enable_msix); |
992 | 1011 | ||
@@ -1039,6 +1058,7 @@ EXPORT_SYMBOL(pci_msi_enabled); | |||
1039 | static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, | 1058 | static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, |
1040 | unsigned int flags) | 1059 | unsigned int flags) |
1041 | { | 1060 | { |
1061 | bool affinity = flags & PCI_IRQ_AFFINITY; | ||
1042 | int nvec; | 1062 | int nvec; |
1043 | int rc; | 1063 | int rc; |
1044 | 1064 | ||
@@ -1067,19 +1087,17 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, | |||
1067 | nvec = maxvec; | 1087 | nvec = maxvec; |
1068 | 1088 | ||
1069 | for (;;) { | 1089 | for (;;) { |
1070 | if (flags & PCI_IRQ_AFFINITY) { | 1090 | if (affinity) { |
1071 | dev->irq_affinity = irq_create_affinity_mask(&nvec); | 1091 | nvec = irq_calc_affinity_vectors(dev->irq_affinity, |
1092 | nvec); | ||
1072 | if (nvec < minvec) | 1093 | if (nvec < minvec) |
1073 | return -ENOSPC; | 1094 | return -ENOSPC; |
1074 | } | 1095 | } |
1075 | 1096 | ||
1076 | rc = msi_capability_init(dev, nvec); | 1097 | rc = msi_capability_init(dev, nvec, affinity); |
1077 | if (rc == 0) | 1098 | if (rc == 0) |
1078 | return nvec; | 1099 | return nvec; |
1079 | 1100 | ||
1080 | kfree(dev->irq_affinity); | ||
1081 | dev->irq_affinity = NULL; | ||
1082 | |||
1083 | if (rc < 0) | 1101 | if (rc < 0) |
1084 | return rc; | 1102 | return rc; |
1085 | if (rc < minvec) | 1103 | if (rc < minvec) |
@@ -1111,26 +1129,24 @@ static int __pci_enable_msix_range(struct pci_dev *dev, | |||
1111 | struct msix_entry *entries, int minvec, int maxvec, | 1129 | struct msix_entry *entries, int minvec, int maxvec, |
1112 | unsigned int flags) | 1130 | unsigned int flags) |
1113 | { | 1131 | { |
1114 | int nvec = maxvec; | 1132 | bool affinity = flags & PCI_IRQ_AFFINITY; |
1115 | int rc; | 1133 | int rc, nvec = maxvec; |
1116 | 1134 | ||
1117 | if (maxvec < minvec) | 1135 | if (maxvec < minvec) |
1118 | return -ERANGE; | 1136 | return -ERANGE; |
1119 | 1137 | ||
1120 | for (;;) { | 1138 | for (;;) { |
1121 | if (flags & PCI_IRQ_AFFINITY) { | 1139 | if (affinity) { |
1122 | dev->irq_affinity = irq_create_affinity_mask(&nvec); | 1140 | nvec = irq_calc_affinity_vectors(dev->irq_affinity, |
1141 | nvec); | ||
1123 | if (nvec < minvec) | 1142 | if (nvec < minvec) |
1124 | return -ENOSPC; | 1143 | return -ENOSPC; |
1125 | } | 1144 | } |
1126 | 1145 | ||
1127 | rc = pci_enable_msix(dev, entries, nvec); | 1146 | rc = __pci_enable_msix(dev, entries, nvec, affinity); |
1128 | if (rc == 0) | 1147 | if (rc == 0) |
1129 | return nvec; | 1148 | return nvec; |
1130 | 1149 | ||
1131 | kfree(dev->irq_affinity); | ||
1132 | dev->irq_affinity = NULL; | ||
1133 | |||
1134 | if (rc < 0) | 1150 | if (rc < 0) |
1135 | return rc; | 1151 | return rc; |
1136 | if (rc < minvec) | 1152 | if (rc < minvec) |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a623b44f2d4b..5a5a685aba33 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -236,25 +236,24 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, | |||
236 | const struct cpumask *mask = NULL; | 236 | const struct cpumask *mask = NULL; |
237 | struct irq_desc *desc; | 237 | struct irq_desc *desc; |
238 | unsigned int flags; | 238 | unsigned int flags; |
239 | int i, cpu = -1; | 239 | int i; |
240 | 240 | ||
241 | if (affinity && cpumask_empty(affinity)) | 241 | /* Validate affinity mask(s) */ |
242 | return -EINVAL; | 242 | if (affinity) { |
243 | for (i = 0, mask = affinity; i < cnt; i++, mask++) { | ||
244 | if (cpumask_empty(mask)) | ||
245 | return -EINVAL; | ||
246 | } | ||
247 | } | ||
243 | 248 | ||
244 | flags = affinity ? IRQD_AFFINITY_MANAGED : 0; | 249 | flags = affinity ? IRQD_AFFINITY_MANAGED : 0; |
250 | mask = NULL; | ||
245 | 251 | ||
246 | for (i = 0; i < cnt; i++) { | 252 | for (i = 0; i < cnt; i++) { |
247 | if (affinity) { | 253 | if (affinity) { |
248 | cpu = cpumask_next(cpu, affinity); | 254 | node = cpu_to_node(cpumask_first(affinity)); |
249 | if (cpu >= nr_cpu_ids) | 255 | mask = affinity; |
250 | cpu = cpumask_first(affinity); | 256 | affinity++; |
251 | node = cpu_to_node(cpu); | ||
252 | |||
253 | /* | ||
254 | * For single allocations we use the caller provided | ||
255 | * mask otherwise we use the mask of the target cpu | ||
256 | */ | ||
257 | mask = cnt == 1 ? affinity : cpumask_of(cpu); | ||
258 | } | 257 | } |
259 | desc = alloc_desc(start + i, node, flags, mask, owner); | 258 | desc = alloc_desc(start + i, node, flags, mask, owner); |
260 | if (!desc) | 259 | if (!desc) |
@@ -481,9 +480,9 @@ EXPORT_SYMBOL_GPL(irq_free_descs); | |||
481 | * @cnt: Number of consecutive irqs to allocate. | 480 | * @cnt: Number of consecutive irqs to allocate. |
482 | * @node: Preferred node on which the irq descriptor should be allocated | 481 | * @node: Preferred node on which the irq descriptor should be allocated |
483 | * @owner: Owning module (can be NULL) | 482 | * @owner: Owning module (can be NULL) |
484 | * @affinity: Optional pointer to an affinity mask which hints where the | 483 | * @affinity: Optional pointer to an affinity mask array of size @cnt which |
485 | * irq descriptors should be allocated and which default | 484 | * hints where the irq descriptors should be allocated and which |
486 | * affinities to use | 485 | * default affinities to use |
487 | * | 486 | * |
488 | * Returns the first irq number or error code | 487 | * Returns the first irq number or error code |
489 | */ | 488 | */ |