aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-09-13 17:29:50 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-09-25 14:52:01 -0400
commit2db1f959d9dc16035f2eb44ed5fdb2789b754d6a (patch)
tree7ef78d79f3ad72b6dbb45151a83276179da60ea9
parent90ad9e2d91067983f3328e21b306323877e5f48a (diff)
x86/vector: Handle managed interrupts proper
Managed interrupts need to reserve interrupt vectors permanently, but as long as the interrupt is deactivated, the vector should not be active. Reserve a new system vector, which can be used to initially initialize MSI/DMAR/IOAPIC entries. In that situation the interrupts are disabled in the corresponding MSI/DMAR/IOAPIC devices. So the vector should never be sent to any CPU. When the managed interrupt is started up, a real vector is assigned from the managed vector space and configured in MSI/DMAR/IOAPIC. This allows a clear separation of inactive and active modes and simplifies the final decisions whether the global vector space is sufficient for CPU offline operations. The vector space can be reserved even on offline CPUs and will survive CPU offline/online operations. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Juergen Gross <jgross@suse.com> Tested-by: Yu Chen <yu.c.chen@intel.com> Acked-by: Juergen Gross <jgross@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Alok Kataria <akataria@vmware.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Rui Zhang <rui.zhang@intel.com> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Len Brown <lenb@kernel.org> Link: https://lkml.kernel.org/r/20170913213156.104616625@linutronix.de
-rw-r--r--arch/x86/include/asm/irq_vectors.h8
-rw-r--r--arch/x86/kernel/apic/vector.c190
2 files changed, 174 insertions, 24 deletions
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index aaf8d28b5d00..1e9bd28f842d 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -101,12 +101,8 @@
101#define POSTED_INTR_NESTED_VECTOR 0xf0 101#define POSTED_INTR_NESTED_VECTOR 0xf0
102#endif 102#endif
103 103
104/* 104#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
105 * Local APIC timer IRQ vector is on a different priority level, 105#define LOCAL_TIMER_VECTOR 0xee
106 * to work around the 'lost local interrupt if more than 2 IRQ
107 * sources per level' errata.
108 */
109#define LOCAL_TIMER_VECTOR 0xef
110 106
111#define NR_VECTORS 256 107#define NR_VECTORS 256
112 108
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f08d44fabef4..3f53572c89cb 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -32,7 +32,8 @@ struct apic_chip_data {
32 unsigned int prev_cpu; 32 unsigned int prev_cpu;
33 unsigned int irq; 33 unsigned int irq;
34 struct hlist_node clist; 34 struct hlist_node clist;
35 u8 move_in_progress : 1; 35 unsigned int move_in_progress : 1,
36 is_managed : 1;
36}; 37};
37 38
38struct irq_domain *x86_vector_domain; 39struct irq_domain *x86_vector_domain;
@@ -152,6 +153,28 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
152 per_cpu(vector_irq, newcpu)[newvec] = desc; 153 per_cpu(vector_irq, newcpu)[newvec] = desc;
153} 154}
154 155
156static void vector_assign_managed_shutdown(struct irq_data *irqd)
157{
158 unsigned int cpu = cpumask_first(cpu_online_mask);
159
160 apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
161}
162
163static int reserve_managed_vector(struct irq_data *irqd)
164{
165 const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
166 struct apic_chip_data *apicd = apic_chip_data(irqd);
167 unsigned long flags;
168 int ret;
169
170 raw_spin_lock_irqsave(&vector_lock, flags);
171 apicd->is_managed = true;
172 ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
173 raw_spin_unlock_irqrestore(&vector_lock, flags);
174 trace_vector_reserve_managed(irqd->irq, ret);
175 return ret;
176}
177
155static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) 178static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
156{ 179{
157 struct apic_chip_data *apicd = apic_chip_data(irqd); 180 struct apic_chip_data *apicd = apic_chip_data(irqd);
@@ -200,20 +223,65 @@ static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
200 return ret; 223 return ret;
201} 224}
202 225
203static int assign_irq_vector_policy(struct irq_data *irqd, 226static int assign_irq_vector_any_locked(struct irq_data *irqd)
204 struct irq_alloc_info *info, int node) 227{
228 int node = irq_data_get_node(irqd);
229
230 if (node != NUMA_NO_NODE) {
231 if (!assign_vector_locked(irqd, cpumask_of_node(node)))
232 return 0;
233 }
234 return assign_vector_locked(irqd, cpu_online_mask);
235}
236
237static int assign_irq_vector_any(struct irq_data *irqd)
238{
239 unsigned long flags;
240 int ret;
241
242 raw_spin_lock_irqsave(&vector_lock, flags);
243 ret = assign_irq_vector_any_locked(irqd);
244 raw_spin_unlock_irqrestore(&vector_lock, flags);
245 return ret;
246}
247
248static int
249assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
205{ 250{
251 if (irqd_affinity_is_managed(irqd))
252 return reserve_managed_vector(irqd);
206 if (info->mask) 253 if (info->mask)
207 return assign_irq_vector(irqd, info->mask); 254 return assign_irq_vector(irqd, info->mask);
208 if (node != NUMA_NO_NODE && 255 return assign_irq_vector_any(irqd);
209 !assign_irq_vector(irqd, cpumask_of_node(node))) 256}
257
258static int
259assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
260{
261 const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
262 struct apic_chip_data *apicd = apic_chip_data(irqd);
263 int vector, cpu;
264
265 cpumask_and(vector_searchmask, vector_searchmask, affmsk);
266 cpu = cpumask_first(vector_searchmask);
267 if (cpu >= nr_cpu_ids)
268 return -EINVAL;
269 /* set_affinity might call here for nothing */
270 if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
210 return 0; 271 return 0;
211 return assign_irq_vector(irqd, cpu_online_mask); 272 vector = irq_matrix_alloc_managed(vector_matrix, cpu);
273 trace_vector_alloc_managed(irqd->irq, vector, vector);
274 if (vector < 0)
275 return vector;
276 apic_update_vector(irqd, vector, cpu);
277 apic_update_irq_cfg(irqd, vector, cpu);
278 return 0;
212} 279}
213 280
214static void clear_irq_vector(struct irq_data *irqd) 281static void clear_irq_vector(struct irq_data *irqd)
215{ 282{
216 struct apic_chip_data *apicd = apic_chip_data(irqd); 283 struct apic_chip_data *apicd = apic_chip_data(irqd);
284 bool managed = irqd_affinity_is_managed(irqd);
217 unsigned int vector = apicd->vector; 285 unsigned int vector = apicd->vector;
218 286
219 lockdep_assert_held(&vector_lock); 287 lockdep_assert_held(&vector_lock);
@@ -225,7 +293,7 @@ static void clear_irq_vector(struct irq_data *irqd)
225 apicd->prev_cpu); 293 apicd->prev_cpu);
226 294
227 per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; 295 per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
228 irq_matrix_free(vector_matrix, apicd->cpu, vector, false); 296 irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
229 apicd->vector = 0; 297 apicd->vector = 0;
230 298
231 /* Clean up move in progress */ 299 /* Clean up move in progress */
@@ -234,12 +302,86 @@ static void clear_irq_vector(struct irq_data *irqd)
234 return; 302 return;
235 303
236 per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; 304 per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
237 irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, false); 305 irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
238 apicd->prev_vector = 0; 306 apicd->prev_vector = 0;
239 apicd->move_in_progress = 0; 307 apicd->move_in_progress = 0;
240 hlist_del_init(&apicd->clist); 308 hlist_del_init(&apicd->clist);
241} 309}
242 310
311static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
312{
313 struct apic_chip_data *apicd = apic_chip_data(irqd);
314 unsigned long flags;
315
316 trace_vector_deactivate(irqd->irq, apicd->is_managed,
317 false, false);
318
319 if (apicd->is_managed)
320 return;
321
322 raw_spin_lock_irqsave(&vector_lock, flags);
323 clear_irq_vector(irqd);
324 vector_assign_managed_shutdown(irqd);
325 raw_spin_unlock_irqrestore(&vector_lock, flags);
326}
327
328static int activate_managed(struct irq_data *irqd)
329{
330 const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
331 int ret;
332
333 cpumask_and(vector_searchmask, dest, cpu_online_mask);
334 if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
335 /* Something in the core code broke! Survive gracefully */
336 pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
337 return EINVAL;
338 }
339
340 ret = assign_managed_vector(irqd, vector_searchmask);
341 /*
342 * This should not happen. The vector reservation got buggered. Handle
343 * it gracefully.
344 */
345 if (WARN_ON_ONCE(ret < 0)) {
346 pr_err("Managed startup irq %u, no vector available\n",
347 irqd->irq);
348 }
349 return ret;
350}
351
352static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
353 bool early)
354{
355 struct apic_chip_data *apicd = apic_chip_data(irqd);
356 unsigned long flags;
357 int ret = 0;
358
359 trace_vector_activate(irqd->irq, apicd->is_managed,
360 false, early);
361
362 if (!apicd->is_managed)
363 return 0;
364
365 raw_spin_lock_irqsave(&vector_lock, flags);
366 if (early || irqd_is_managed_and_shutdown(irqd))
367 vector_assign_managed_shutdown(irqd);
368 else
369 ret = activate_managed(irqd);
370 raw_spin_unlock_irqrestore(&vector_lock, flags);
371 return ret;
372}
373
374static void vector_free_reserved_and_managed(struct irq_data *irqd)
375{
376 const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
377 struct apic_chip_data *apicd = apic_chip_data(irqd);
378
379 trace_vector_teardown(irqd->irq, apicd->is_managed, false);
380
381 if (apicd->is_managed)
382 irq_matrix_remove_managed(vector_matrix, dest);
383}
384
243static void x86_vector_free_irqs(struct irq_domain *domain, 385static void x86_vector_free_irqs(struct irq_domain *domain,
244 unsigned int virq, unsigned int nr_irqs) 386 unsigned int virq, unsigned int nr_irqs)
245{ 387{
@@ -253,6 +395,7 @@ static void x86_vector_free_irqs(struct irq_domain *domain,
253 if (irqd && irqd->chip_data) { 395 if (irqd && irqd->chip_data) {
254 raw_spin_lock_irqsave(&vector_lock, flags); 396 raw_spin_lock_irqsave(&vector_lock, flags);
255 clear_irq_vector(irqd); 397 clear_irq_vector(irqd);
398 vector_free_reserved_and_managed(irqd);
256 apicd = irqd->chip_data; 399 apicd = irqd->chip_data;
257 irq_domain_reset_irq_data(irqd); 400 irq_domain_reset_irq_data(irqd);
258 raw_spin_unlock_irqrestore(&vector_lock, flags); 401 raw_spin_unlock_irqrestore(&vector_lock, flags);
@@ -310,7 +453,7 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
310 continue; 453 continue;
311 } 454 }
312 455
313 err = assign_irq_vector_policy(irqd, info, node); 456 err = assign_irq_vector_policy(irqd, info);
314 trace_vector_setup(virq + i, false, err); 457 trace_vector_setup(virq + i, false, err);
315 if (err) 458 if (err)
316 goto error; 459 goto error;
@@ -368,6 +511,8 @@ void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
368static const struct irq_domain_ops x86_vector_domain_ops = { 511static const struct irq_domain_ops x86_vector_domain_ops = {
369 .alloc = x86_vector_alloc_irqs, 512 .alloc = x86_vector_alloc_irqs,
370 .free = x86_vector_free_irqs, 513 .free = x86_vector_free_irqs,
514 .activate = x86_vector_activate,
515 .deactivate = x86_vector_deactivate,
371#ifdef CONFIG_GENERIC_IRQ_DEBUGFS 516#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
372 .debug_show = x86_vector_debug_show, 517 .debug_show = x86_vector_debug_show,
373#endif 518#endif
@@ -531,13 +676,13 @@ static int apic_set_affinity(struct irq_data *irqd,
531{ 676{
532 int err; 677 int err;
533 678
534 if (!IS_ENABLED(CONFIG_SMP)) 679 raw_spin_lock(&vector_lock);
535 return -EPERM; 680 cpumask_and(vector_searchmask, dest, cpu_online_mask);
536 681 if (irqd_affinity_is_managed(irqd))
537 if (!cpumask_intersects(dest, cpu_online_mask)) 682 err = assign_managed_vector(irqd, vector_searchmask);
538 return -EINVAL; 683 else
539 684 err = assign_vector_locked(irqd, vector_searchmask);
540 err = assign_irq_vector(irqd, dest); 685 raw_spin_unlock(&vector_lock);
541 return err ? err : IRQ_SET_MASK_OK; 686 return err ? err : IRQ_SET_MASK_OK;
542} 687}
543 688
@@ -577,9 +722,18 @@ static void free_moved_vector(struct apic_chip_data *apicd)
577{ 722{
578 unsigned int vector = apicd->prev_vector; 723 unsigned int vector = apicd->prev_vector;
579 unsigned int cpu = apicd->prev_cpu; 724 unsigned int cpu = apicd->prev_cpu;
725 bool managed = apicd->is_managed;
726
727 /*
728 * This should never happen. Managed interrupts are not
729 * migrated except on CPU down, which does not involve the
730 * cleanup vector. But try to keep the accounting correct
731 * nevertheless.
732 */
733 WARN_ON_ONCE(managed);
580 734
581 trace_vector_free_moved(apicd->irq, vector, false); 735 trace_vector_free_moved(apicd->irq, vector, managed);
582 irq_matrix_free(vector_matrix, cpu, vector, false); 736 irq_matrix_free(vector_matrix, cpu, vector, managed);
583 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 737 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
584 hlist_del_init(&apicd->clist); 738 hlist_del_init(&apicd->clist);
585 apicd->prev_vector = 0; 739 apicd->prev_vector = 0;