summaryrefslogtreecommitdiffstats
path: root/kernel/irq/affinity.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2019-02-16 12:13:09 -0500
committerThomas Gleixner <tglx@linutronix.de>2019-02-18 05:21:28 -0500
commitc66d4bd110a1f8a68c1a88bfbf866eb50c6464b7 (patch)
tree772a8ffe770a1386abd0a5f541cc2b38f2f4c1cd /kernel/irq/affinity.c
parent9cfef55bb57e7620c63087be18a76351628f8d0f (diff)
genirq/affinity: Add new callback for (re)calculating interrupt sets
The interrupt affinity spreading mechanism supports to spread out affinities for one or more interrupt sets. A interrupt set contains one or more interrupts. Each set is mapped to a specific functionality of a device, e.g. general I/O queues and read I/O queus of multiqueue block devices. The number of interrupts per set is defined by the driver. It depends on the total number of available interrupts for the device, which is determined by the PCI capabilites and the availability of underlying CPU resources, and the number of queues which the device provides and the driver wants to instantiate. The driver passes initial configuration for the interrupt allocation via a pointer to struct irq_affinity. Right now the allocation mechanism is complex as it requires to have a loop in the driver to determine the maximum number of interrupts which are provided by the PCI capabilities and the underlying CPU resources. This loop would have to be replicated in every driver which wants to utilize this mechanism. That's unwanted code duplication and error prone. In order to move this into generic facilities it is required to have a mechanism, which allows the recalculation of the interrupt sets and their size, in the core code. As the core code does not have any knowledge about the underlying device, a driver specific callback is required in struct irq_affinity, which can be invoked by the core code. The callback gets the number of available interupts as an argument, so the driver can calculate the corresponding number and size of interrupt sets. At the moment the struct irq_affinity pointer which is handed in from the driver and passed through to several core functions is marked 'const', but for the callback to be able to modify the data in the struct it's required to remove the 'const' qualifier. Add the optional callback to struct irq_affinity, which allows drivers to recalculate the number and size of interrupt sets and remove the 'const' qualifier. For simple invocations, which do not supply a callback, a default callback is installed, which just sets nr_sets to 1 and transfers the number of spreadable vectors to the set_size array at index 0. This is for now guarded by a check for nr_sets != 0 to keep the NVME driver working until it is converted to the callback mechanism. To make sure that the driver configuration is correct under all circumstances the callback is invoked even when there are no interrupts for queues left, i.e. the pre/post requirements already exhaust the numner of available interrupts. At the PCI layer irq_create_affinity_masks() has to be invoked even for the case where the legacy interrupt is used. That ensures that the callback is invoked and the device driver can adjust to that situation. [ tglx: Fixed the simple case (no sets required). Moved the sanity check for nr_sets after the invocation of the callback so it catches broken drivers. Fixed the kernel doc comments for struct irq_affinity and de-'This patch'-ed the changelog ] Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Bjorn Helgaas <helgaas@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: linux-block@vger.kernel.org Cc: Sagi Grimberg <sagi@grimberg.me> Cc: linux-nvme@lists.infradead.org Cc: linux-pci@vger.kernel.org Cc: Keith Busch <keith.busch@intel.com> Cc: Sumit Saxena <sumit.saxena@broadcom.com> Cc: Kashyap Desai <kashyap.desai@broadcom.com> Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com> Link: https://lkml.kernel.org/r/20190216172228.512444498@linutronix.de
Diffstat (limited to 'kernel/irq/affinity.c')
-rw-r--r--kernel/irq/affinity.c62
1 files changed, 44 insertions, 18 deletions
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 278289c091bb..d737dc60ab52 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -230,6 +230,12 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
230 return ret; 230 return ret;
231} 231}
232 232
233static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
234{
235 affd->nr_sets = 1;
236 affd->set_size[0] = affvecs;
237}
238
233/** 239/**
234 * irq_create_affinity_masks - Create affinity masks for multiqueue spreading 240 * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
235 * @nvecs: The total number of vectors 241 * @nvecs: The total number of vectors
@@ -240,20 +246,46 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
240struct irq_affinity_desc * 246struct irq_affinity_desc *
241irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) 247irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
242{ 248{
243 unsigned int affvecs, curvec, usedvecs, nr_sets, i; 249 unsigned int affvecs, curvec, usedvecs, i;
244 unsigned int set_size[IRQ_AFFINITY_MAX_SETS];
245 struct irq_affinity_desc *masks = NULL; 250 struct irq_affinity_desc *masks = NULL;
246 251
247 /* 252 /*
248 * If there aren't any vectors left after applying the pre/post 253 * Determine the number of vectors which need interrupt affinities
249 * vectors don't bother with assigning affinity. 254 * assigned. If the pre/post request exhausts the available vectors
255 * then nothing to do here except for invoking the calc_sets()
256 * callback so the device driver can adjust to the situation. If there
257 * is only a single vector, then managing the queue is pointless as
258 * well.
250 */ 259 */
251 if (nvecs == affd->pre_vectors + affd->post_vectors) 260 if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors)
252 return NULL; 261 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
262 else
263 affvecs = 0;
264
265 /*
266 * Simple invocations do not provide a calc_sets() callback. Install
267 * the generic one. The check for affd->nr_sets is a temporary
268 * workaround and will be removed after the NVME driver is converted
269 * over.
270 */
271 if (!affd->nr_sets && !affd->calc_sets)
272 affd->calc_sets = default_calc_sets;
273
274 /*
275 * If the device driver provided a calc_sets() callback let it
276 * recalculate the number of sets and their size. The check will go
277 * away once the NVME driver is converted over.
278 */
279 if (affd->calc_sets)
280 affd->calc_sets(affd, affvecs);
253 281
254 if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) 282 if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
255 return NULL; 283 return NULL;
256 284
285 /* Nothing to assign? */
286 if (!affvecs)
287 return NULL;
288
257 masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); 289 masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
258 if (!masks) 290 if (!masks)
259 return NULL; 291 return NULL;
@@ -261,21 +293,13 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
261 /* Fill out vectors at the beginning that don't need affinity */ 293 /* Fill out vectors at the beginning that don't need affinity */
262 for (curvec = 0; curvec < affd->pre_vectors; curvec++) 294 for (curvec = 0; curvec < affd->pre_vectors; curvec++)
263 cpumask_copy(&masks[curvec].mask, irq_default_affinity); 295 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
296
264 /* 297 /*
265 * Spread on present CPUs starting from affd->pre_vectors. If we 298 * Spread on present CPUs starting from affd->pre_vectors. If we
266 * have multiple sets, build each sets affinity mask separately. 299 * have multiple sets, build each sets affinity mask separately.
267 */ 300 */
268 affvecs = nvecs - affd->pre_vectors - affd->post_vectors; 301 for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
269 nr_sets = affd->nr_sets; 302 unsigned int this_vecs = affd->set_size[i];
270 if (!nr_sets) {
271 nr_sets = 1;
272 set_size[0] = affvecs;
273 } else {
274 memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int));
275 }
276
277 for (i = 0, usedvecs = 0; i < nr_sets; i++) {
278 unsigned int this_vecs = set_size[i];
279 int ret; 303 int ret;
280 304
281 ret = irq_build_affinity_masks(affd, curvec, this_vecs, 305 ret = irq_build_affinity_masks(affd, curvec, this_vecs,
@@ -318,7 +342,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
318 if (resv > minvec) 342 if (resv > minvec)
319 return 0; 343 return 0;
320 344
321 if (affd->nr_sets) { 345 if (affd->calc_sets) {
346 set_vecs = maxvec - resv;
347 } else if (affd->nr_sets) {
322 unsigned int i; 348 unsigned int i;
323 349
324 for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) 350 for (i = 0, set_vecs = 0; i < affd->nr_sets; i++)