aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2015-12-31 11:30:54 -0500
committerThomas Gleixner <tglx@linutronix.de>2016-01-15 07:44:02 -0500
commit98229aa36caa9c769b13565523de9b813013c703 (patch)
tree04f1a0ca0081de1d100102c51bc99e86cb2de1e7 /arch/x86
parent90a2282e23f0522e4b3f797ad447c5e91bf7fe32 (diff)
x86/irq: Plug vector cleanup race
We still can end up with a stale vector due to the following: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 So we need to serialize the vector assignment against a pending cleanup. The solution is rather simple now. We not only check for the move_in_progress flag in assign_irq_vector(), we also check whether there is still a cleanup pending in the old_domain cpumask. If so, we return -EBUSY to the caller and let him deal with it. Though we have to be careful in the cpu unplug case. If the cleanout has not yet completed then the following setaffinity() call would return -EBUSY. Add code which prevents this. Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com Reported-and-tested-by: Joe Lawrence <joe.lawrence@stratus.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Borislav Petkov <bp@alien8.de> Cc: Jiang Liu <jiang.liu@linux.intel.com> Cc: Jeremiah Mahler <jmmahler@gmail.com> Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck <linux@roeck-us.net> Cc: stable@vger.kernel.org #4.3+ Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/apic/vector.c63
1 files changed, 53 insertions, 10 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5f7883578880..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -120,7 +120,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
120 static int current_offset = VECTOR_OFFSET_START % 16; 120 static int current_offset = VECTOR_OFFSET_START % 16;
121 int cpu, vector; 121 int cpu, vector;
122 122
123 if (d->move_in_progress) 123 /*
124 * If there is still a move in progress or the previous move has not
125 * been cleaned up completely, tell the caller to come back later.
126 */
127 if (d->move_in_progress ||
128 cpumask_intersects(d->old_domain, cpu_online_mask))
124 return -EBUSY; 129 return -EBUSY;
125 130
126 /* Only try and allocate irqs on cpus that are present */ 131 /* Only try and allocate irqs on cpus that are present */
@@ -259,7 +264,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
259 data->cfg.vector = 0; 264 data->cfg.vector = 0;
260 cpumask_clear(data->domain); 265 cpumask_clear(data->domain);
261 266
262 if (likely(!data->move_in_progress)) 267 /*
268 * If move is in progress or the old_domain mask is not empty,
269 * i.e. the cleanup IPI has not been processed yet, we need to remove
270 * the old references to desc from all cpus vector tables.
271 */
272 if (!data->move_in_progress && cpumask_empty(data->old_domain))
263 return; 273 return;
264 274
265 desc = irq_to_desc(irq); 275 desc = irq_to_desc(irq);
@@ -579,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
579 goto unlock; 589 goto unlock;
580 590
581 /* 591 /*
582 * Check if the irq migration is in progress. If so, we 592 * Nothing to cleanup if irq migration is in progress
583 * haven't received the cleanup request yet for this irq. 593 * or this cpu is not set in the cleanup mask.
584 */ 594 */
585 if (data->move_in_progress) 595 if (data->move_in_progress ||
596 !cpumask_test_cpu(me, data->old_domain))
586 goto unlock; 597 goto unlock;
587 598
599 /*
600 * We have two cases to handle here:
601 * 1) vector is unchanged but the target mask got reduced
602 * 2) vector and the target mask has changed
603 *
604 * #1 is obvious, but in #2 we have two vectors with the same
605 * irq descriptor: the old and the new vector. So we need to
606 * make sure that we only cleanup the old vector. The new
607 * vector has the current @vector number in the config and
608 * this cpu is part of the target mask. We better leave that
609 * one alone.
610 */
588 if (vector == data->cfg.vector && 611 if (vector == data->cfg.vector &&
589 cpumask_test_cpu(me, data->domain)) 612 cpumask_test_cpu(me, data->domain))
590 goto unlock; 613 goto unlock;
@@ -602,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
602 goto unlock; 625 goto unlock;
603 } 626 }
604 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 627 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
628 cpumask_clear_cpu(me, data->old_domain);
605unlock: 629unlock:
606 raw_spin_unlock(&desc->lock); 630 raw_spin_unlock(&desc->lock);
607 } 631 }
@@ -645,13 +669,32 @@ void irq_force_complete_move(struct irq_desc *desc)
645 __irq_complete_move(cfg, cfg->vector); 669 __irq_complete_move(cfg, cfg->vector);
646 670
647 /* 671 /*
648 * Remove this cpu from the cleanup mask. The IPI might have been sent 672 * This is tricky. If the cleanup of @data->old_domain has not been
649 * just before the cpu was removed from the offline mask, but has not 673 * done yet, then the following setaffinity call will fail with
650 * been processed because the CPU has interrupts disabled and is on 674 * -EBUSY. This can leave the interrupt in a stale state.
651 * the way out. 675 *
676 * The cleanup cannot make progress because we hold @desc->lock. So in
677 * case @data->old_domain is not yet cleaned up, we need to drop the
678 * lock and acquire it again. @desc cannot go away, because the
679 * hotplug code holds the sparse irq lock.
652 */ 680 */
653 raw_spin_lock(&vector_lock); 681 raw_spin_lock(&vector_lock);
654 cpumask_clear_cpu(smp_processor_id(), data->old_domain); 682 /* Clean out all offline cpus (including ourself) first. */
683 cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
684 while (!cpumask_empty(data->old_domain)) {
685 raw_spin_unlock(&vector_lock);
686 raw_spin_unlock(&desc->lock);
687 cpu_relax();
688 raw_spin_lock(&desc->lock);
689 /*
690 * Reevaluate apic_chip_data. It might have been cleared after
691 * we dropped @desc->lock.
692 */
693 data = apic_chip_data(irqdata);
694 if (!data)
695 return;
696 raw_spin_lock(&vector_lock);
697 }
655 raw_spin_unlock(&vector_lock); 698 raw_spin_unlock(&vector_lock);
656} 699}
657#endif 700#endif