aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2019-02-16 12:13:08 -0500
committerThomas Gleixner <tglx@linutronix.de>2019-02-18 05:21:27 -0500
commit9cfef55bb57e7620c63087be18a76351628f8d0f (patch)
treea9fad17b1a863cd92ad2ae1b95a98ed9fd0c7b83
parent0145c30e896d26e638d27c957d9eed72893c1c92 (diff)
genirq/affinity: Store interrupt sets size in struct irq_affinity
The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback will be added to struct affinity_desc, which will be invoked by the core code. The callback will get the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. To support this, two modifications for the handling of struct irq_affinity are required: 1) The (optional) interrupt sets size information is contained in a separate array of integers and struct irq_affinity contains a pointer to it. This is cumbersome and as the maximum number of interrupt sets is small, there is no reason to have separate storage. Moving the size array into struct affinity_desc avoids indirections and makes the code simpler. 2) At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const'. With the upcoming callback to recalculate the number and size of interrupt sets, it's necessary to remove the 'const' qualifier. Otherwise the callback would not be able to update the data. Implement #1 and store the interrupt sets size in 'struct irq_affinity'. No functional change. [ tglx: Fixed the memcpy() size so it won't copy beyond the size of the source. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Bjorn Helgaas <helgaas@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg <sagi@grimberg.me> Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch <keith.busch@intel.com> Cc: Sumit Saxena <sumit.saxena@broadcom.com> Cc: Kashyap Desai <kashyap.desai@broadcom.com> Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com> Link: https://lkml.kernel.org/r/20190216172228.423723127@linutronix.de
-rw-r--r--drivers/nvme/host/pci.c7
-rw-r--r--include/linux/interrupt.h9
-rw-r--r--kernel/irq/affinity.c16
3 files changed, 21 insertions, 11 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9bc585415d9b..21ffd671b6ed 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2081,12 +2081,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
2081static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) 2081static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
2082{ 2082{
2083 struct pci_dev *pdev = to_pci_dev(dev->dev); 2083 struct pci_dev *pdev = to_pci_dev(dev->dev);
2084 int irq_sets[2];
2085 struct irq_affinity affd = { 2084 struct irq_affinity affd = {
2086 .pre_vectors = 1, 2085 .pre_vectors = 1,
2087 .nr_sets = ARRAY_SIZE(irq_sets), 2086 .nr_sets = 2,
2088 .sets = irq_sets,
2089 }; 2087 };
2088 unsigned int *irq_sets = affd.set_size;
2090 int result = 0; 2089 int result = 0;
2091 unsigned int irq_queues, this_p_queues; 2090 unsigned int irq_queues, this_p_queues;
2092 2091
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7389c2011..5afdfd5dc39b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -241,20 +241,23 @@ struct irq_affinity_notify {
241 void (*release)(struct kref *ref); 241 void (*release)(struct kref *ref);
242}; 242};
243 243
244#define IRQ_AFFINITY_MAX_SETS 4
245
244/** 246/**
245 * struct irq_affinity - Description for automatic irq affinity assignements 247 * struct irq_affinity - Description for automatic irq affinity assignements
246 * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of 248 * @pre_vectors: Don't apply affinity to @pre_vectors at beginning of
247 * the MSI(-X) vector space 249 * the MSI(-X) vector space
248 * @post_vectors: Don't apply affinity to @post_vectors at end of 250 * @post_vectors: Don't apply affinity to @post_vectors at end of
249 * the MSI(-X) vector space 251 * the MSI(-X) vector space
250 * @nr_sets: Length of passed in *sets array 252 * @nr_sets: The number of interrupt sets for which affinity
251 * @sets: Number of affinitized sets 253 * spreading is required
254 * @set_size: Array holding the size of each interrupt set
252 */ 255 */
253struct irq_affinity { 256struct irq_affinity {
254 unsigned int pre_vectors; 257 unsigned int pre_vectors;
255 unsigned int post_vectors; 258 unsigned int post_vectors;
256 unsigned int nr_sets; 259 unsigned int nr_sets;
257 unsigned int *sets; 260 unsigned int set_size[IRQ_AFFINITY_MAX_SETS];
258}; 261};
259 262
260/** 263/**
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 82e8799374e9..278289c091bb 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -238,9 +238,10 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
238 * Returns the irq_affinity_desc pointer or NULL if allocation failed. 238 * Returns the irq_affinity_desc pointer or NULL if allocation failed.
239 */ 239 */
240struct irq_affinity_desc * 240struct irq_affinity_desc *
241irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd) 241irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
242{ 242{
243 unsigned int affvecs, curvec, usedvecs, nr_sets, i; 243 unsigned int affvecs, curvec, usedvecs, nr_sets, i;
244 unsigned int set_size[IRQ_AFFINITY_MAX_SETS];
244 struct irq_affinity_desc *masks = NULL; 245 struct irq_affinity_desc *masks = NULL;
245 246
246 /* 247 /*
@@ -250,6 +251,9 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
250 if (nvecs == affd->pre_vectors + affd->post_vectors) 251 if (nvecs == affd->pre_vectors + affd->post_vectors)
251 return NULL; 252 return NULL;
252 253
254 if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
255 return NULL;
256
253 masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); 257 masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
254 if (!masks) 258 if (!masks)
255 return NULL; 259 return NULL;
@@ -263,11 +267,15 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
263 */ 267 */
264 affvecs = nvecs - affd->pre_vectors - affd->post_vectors; 268 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
265 nr_sets = affd->nr_sets; 269 nr_sets = affd->nr_sets;
266 if (!nr_sets) 270 if (!nr_sets) {
267 nr_sets = 1; 271 nr_sets = 1;
272 set_size[0] = affvecs;
273 } else {
274 memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int));
275 }
268 276
269 for (i = 0, usedvecs = 0; i < nr_sets; i++) { 277 for (i = 0, usedvecs = 0; i < nr_sets; i++) {
270 unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs; 278 unsigned int this_vecs = set_size[i];
271 int ret; 279 int ret;
272 280
273 ret = irq_build_affinity_masks(affd, curvec, this_vecs, 281 ret = irq_build_affinity_masks(affd, curvec, this_vecs,
@@ -314,7 +322,7 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
314 unsigned int i; 322 unsigned int i;
315 323
316 for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) 324 for (i = 0, set_vecs = 0; i < affd->nr_sets; i++)
317 set_vecs += affd->sets[i]; 325 set_vecs += affd->set_size[i];
318 } else { 326 } else {
319 get_online_cpus(); 327 get_online_cpus();
320 set_vecs = cpumask_weight(cpu_possible_mask); 328 set_vecs = cpumask_weight(cpu_possible_mask);