aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-09-13 17:29:51 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-09-25 14:52:02 -0400
commit4900be83602b6be07366d3e69f756c1959f4169a (patch)
treeaeacf40415a7d27cd63a1cecc93e39db67b22b51
parent2db1f959d9dc16035f2eb44ed5fdb2789b754d6a (diff)
x86/vector/msi: Switch to global reservation mode
Devices with many queues allocate a huge number of interrupts and get assigned a vector for each of them, even if the queues are not active and the interrupts never requested. This causes problems with the decision whether the global vector space is sufficient for CPU hot unplug operations. Change it to a reservation scheme, which allows overcommitment. When the interrupt is allocated and initialized the vector assignment merily updates the reservation request counter in the matrix allocator. This counter is used to emit warnings when the reservation exceeds the available vector space, but does not affect CPU offline operations. Like the managed interrupts the corresponding MSI/DMAR/IOAPIC entries are directed to the special shutdown vector. When the interrupt is requested, then the activation code tries to assign a real vector. If that succeeds the interrupt is started up and functional. If that fails, then subsequently request_irq() fails with -ENOSPC. This allows a clear separation of inactive and active modes and simplifies the final decisions whether the global vector space is sufficient for CPU offline operations. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Juergen Gross <jgross@suse.com> Tested-by: Yu Chen <yu.c.chen@intel.com> Acked-by: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rui Zhang <rui.zhang@intel.com> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Len Brown <lenb@kernel.org> Link: https://lkml.kernel.org/r/20170913213156.184211133@linutronix.de
-rw-r--r--arch/x86/kernel/apic/vector.c97
1 files changed, 63 insertions, 34 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3f53572c89cb..46a9ae921819 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -33,7 +33,9 @@ struct apic_chip_data {
33 unsigned int irq; 33 unsigned int irq;
34 struct hlist_node clist; 34 struct hlist_node clist;
35 unsigned int move_in_progress : 1, 35 unsigned int move_in_progress : 1,
36 is_managed : 1; 36 is_managed : 1,
37 can_reserve : 1,
38 has_reserved : 1;
37}; 39};
38 40
39struct irq_domain *x86_vector_domain; 41struct irq_domain *x86_vector_domain;
@@ -175,9 +177,31 @@ static int reserve_managed_vector(struct irq_data *irqd)
175 return ret; 177 return ret;
176} 178}
177 179
180static void reserve_irq_vector_locked(struct irq_data *irqd)
181{
182 struct apic_chip_data *apicd = apic_chip_data(irqd);
183
184 irq_matrix_reserve(vector_matrix);
185 apicd->can_reserve = true;
186 apicd->has_reserved = true;
187 trace_vector_reserve(irqd->irq, 0);
188 vector_assign_managed_shutdown(irqd);
189}
190
191static int reserve_irq_vector(struct irq_data *irqd)
192{
193 unsigned long flags;
194
195 raw_spin_lock_irqsave(&vector_lock, flags);
196 reserve_irq_vector_locked(irqd);
197 raw_spin_unlock_irqrestore(&vector_lock, flags);
198 return 0;
199}
200
178static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) 201static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
179{ 202{
180 struct apic_chip_data *apicd = apic_chip_data(irqd); 203 struct apic_chip_data *apicd = apic_chip_data(irqd);
204 bool resvd = apicd->has_reserved;
181 unsigned int cpu = apicd->cpu; 205 unsigned int cpu = apicd->cpu;
182 int vector = apicd->vector; 206 int vector = apicd->vector;
183 207
@@ -191,10 +215,10 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
191 if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) 215 if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
192 return 0; 216 return 0;
193 217
194 vector = irq_matrix_alloc(vector_matrix, dest, false, &cpu); 218 vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
195 if (vector > 0) 219 if (vector > 0)
196 apic_update_vector(irqd, vector, cpu); 220 apic_update_vector(irqd, vector, cpu);
197 trace_vector_alloc(irqd->irq, vector, false, vector); 221 trace_vector_alloc(irqd->irq, vector, resvd, vector);
198 return vector; 222 return vector;
199} 223}
200 224
@@ -252,7 +276,11 @@ assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
252 return reserve_managed_vector(irqd); 276 return reserve_managed_vector(irqd);
253 if (info->mask) 277 if (info->mask)
254 return assign_irq_vector(irqd, info->mask); 278 return assign_irq_vector(irqd, info->mask);
255 return assign_irq_vector_any(irqd); 279 if (info->type != X86_IRQ_ALLOC_TYPE_MSI &&
280 info->type != X86_IRQ_ALLOC_TYPE_MSIX)
281 return assign_irq_vector_any(irqd);
282 /* For MSI(X) make only a global reservation with no guarantee */
283 return reserve_irq_vector(irqd);
256} 284}
257 285
258static int 286static int
@@ -314,17 +342,35 @@ static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
314 unsigned long flags; 342 unsigned long flags;
315 343
316 trace_vector_deactivate(irqd->irq, apicd->is_managed, 344 trace_vector_deactivate(irqd->irq, apicd->is_managed,
317 false, false); 345 apicd->can_reserve, false);
318 346
319 if (apicd->is_managed) 347 /* Regular fixed assigned interrupt */
348 if (!apicd->is_managed && !apicd->can_reserve)
349 return;
350 /* If the interrupt has a global reservation, nothing to do */
351 if (apicd->has_reserved)
320 return; 352 return;
321 353
322 raw_spin_lock_irqsave(&vector_lock, flags); 354 raw_spin_lock_irqsave(&vector_lock, flags);
323 clear_irq_vector(irqd); 355 clear_irq_vector(irqd);
324 vector_assign_managed_shutdown(irqd); 356 if (apicd->can_reserve)
357 reserve_irq_vector_locked(irqd);
358 else
359 vector_assign_managed_shutdown(irqd);
325 raw_spin_unlock_irqrestore(&vector_lock, flags); 360 raw_spin_unlock_irqrestore(&vector_lock, flags);
326} 361}
327 362
363static int activate_reserved(struct irq_data *irqd)
364{
365 struct apic_chip_data *apicd = apic_chip_data(irqd);
366 int ret;
367
368 ret = assign_irq_vector_any_locked(irqd);
369 if (!ret)
370 apicd->has_reserved = false;
371 return ret;
372}
373
328static int activate_managed(struct irq_data *irqd) 374static int activate_managed(struct irq_data *irqd)
329{ 375{
330 const struct cpumask *dest = irq_data_get_affinity_mask(irqd); 376 const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
@@ -357,16 +403,19 @@ static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
357 int ret = 0; 403 int ret = 0;
358 404
359 trace_vector_activate(irqd->irq, apicd->is_managed, 405 trace_vector_activate(irqd->irq, apicd->is_managed,
360 false, early); 406 apicd->can_reserve, early);
361 407
362 if (!apicd->is_managed) 408 /* Nothing to do for fixed assigned vectors */
409 if (!apicd->can_reserve && !apicd->is_managed)
363 return 0; 410 return 0;
364 411
365 raw_spin_lock_irqsave(&vector_lock, flags); 412 raw_spin_lock_irqsave(&vector_lock, flags);
366 if (early || irqd_is_managed_and_shutdown(irqd)) 413 if (early || irqd_is_managed_and_shutdown(irqd))
367 vector_assign_managed_shutdown(irqd); 414 vector_assign_managed_shutdown(irqd);
368 else 415 else if (apicd->is_managed)
369 ret = activate_managed(irqd); 416 ret = activate_managed(irqd);
417 else if (apicd->has_reserved)
418 ret = activate_reserved(irqd);
370 raw_spin_unlock_irqrestore(&vector_lock, flags); 419 raw_spin_unlock_irqrestore(&vector_lock, flags);
371 return ret; 420 return ret;
372} 421}
@@ -376,8 +425,11 @@ static void vector_free_reserved_and_managed(struct irq_data *irqd)
376 const struct cpumask *dest = irq_data_get_affinity_mask(irqd); 425 const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
377 struct apic_chip_data *apicd = apic_chip_data(irqd); 426 struct apic_chip_data *apicd = apic_chip_data(irqd);
378 427
379 trace_vector_teardown(irqd->irq, apicd->is_managed, false); 428 trace_vector_teardown(irqd->irq, apicd->is_managed,
429 apicd->has_reserved);
380 430
431 if (apicd->has_reserved)
432 irq_matrix_remove_reserved(vector_matrix);
381 if (apicd->is_managed) 433 if (apicd->is_managed)
382 irq_matrix_remove_managed(vector_matrix, dest); 434 irq_matrix_remove_managed(vector_matrix, dest);
383} 435}
@@ -604,22 +656,6 @@ int __init arch_early_irq_init(void)
604} 656}
605 657
606#ifdef CONFIG_SMP 658#ifdef CONFIG_SMP
607/* Temporary hack to keep things working */
608static void vector_update_shutdown_irqs(void)
609{
610 struct irq_desc *desc;
611 int irq;
612
613 for_each_irq_desc(irq, desc) {
614 struct irq_data *irqd = irq_desc_get_irq_data(desc);
615 struct apic_chip_data *ad = apic_chip_data(irqd);
616
617 if (!ad || !ad->vector || ad->cpu != smp_processor_id())
618 continue;
619 this_cpu_write(vector_irq[ad->vector], desc);
620 irq_matrix_assign(vector_matrix, ad->vector);
621 }
622}
623 659
624static struct irq_desc *__setup_vector_irq(int vector) 660static struct irq_desc *__setup_vector_irq(int vector)
625{ 661{
@@ -655,13 +691,6 @@ void lapic_online(void)
655 */ 691 */
656 for (vector = 0; vector < NR_VECTORS; vector++) 692 for (vector = 0; vector < NR_VECTORS; vector++)
657 this_cpu_write(vector_irq[vector], __setup_vector_irq(vector)); 693 this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
658
659 /*
660 * Until the rewrite of the managed interrupt management is in
661 * place it's necessary to walk the irq descriptors and check for
662 * interrupts which are targeted at this CPU.
663 */
664 vector_update_shutdown_irqs();
665} 694}
666 695
667void lapic_offline(void) 696void lapic_offline(void)