1 files changed, 145 insertions, 76 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask;
+static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
 static struct irq_chip lapic_controller;
 #ifdef  CONFIG_X86_IO_APIC
 static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
         */
        static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
        static int current_offset = VECTOR_OFFSET_START % 16;
-        int cpu, err;
+        int cpu, vector;
-        if (d->move_in_progress)
+        /*
+         * If there is still a move in progress or the previous move has not
+         * been cleaned up completely, tell the caller to come back later.
+         */
+        if (d->move_in_progress ||
+            cpumask_intersects(d->old_domain, cpu_online_mask))
                return -EBUSY;
        /* Only try and allocate irqs on cpus that are present */
-        err = -ENOSPC;
        cpumask_clear(d->old_domain);
+        cpumask_clear(searched_cpumask);
        cpu = cpumask_first_and(mask, cpu_online_mask);
        while (cpu < nr_cpu_ids) {
-                int new_cpu, vector, offset;
+                int new_cpu, offset;
+                /* Get the possible target cpus for @mask/@cpu from the apic */
                apic->vector_allocation_domain(cpu, vector_cpumask, mask);
+                /*
+                 * Clear the offline cpus from @vector_cpumask for searching
+                 * and verify whether the result overlaps with @mask. If true,
+                 * then the call to apic->cpu_mask_to_apicid_and() will
+                 * succeed as well. If not, no point in trying to find a
+                 * vector in this mask.
+                 */
+                cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
+                if (!cpumask_intersects(vector_searchmask, mask))
+                        goto next_cpu;
                if (cpumask_subset(vector_cpumask, d->domain)) {
-                        err = 0;
                        if (cpumask_equal(vector_cpumask, d->domain))
-                                break;
+                                goto success;
                        /*
-                         * New cpumask using the vector is a proper subset of
+                         * Mark the cpus which are not longer in the mask for
-                         * the current in use mask. So cleanup the vector
+                         * cleanup.
-                         * allocation for the members that are not used anymore.
                         */
-                        cpumask_andnot(d->old_domain, d->domain,
+                        cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
-                                       vector_cpumask);
+                        vector = d->cfg.vector;
-                        d->move_in_progress =
+                        goto update;
-                           cpumask_intersects(d->old_domain, cpu_online_mask);
-                        cpumask_and(d->domain, d->domain, vector_cpumask);
-                        break;
                }
                vector = current_vector;
@@ -158,45 +170,60 @@ next:
                        vector = FIRST_EXTERNAL_VECTOR + offset;
                }
-                if (unlikely(current_vector == vector)) {
+                /* If the search wrapped around, try the next cpu */
-                        cpumask_or(d->old_domain, d->old_domain,
+                if (unlikely(current_vector == vector))
-                                   vector_cpumask);
+                        goto next_cpu;
-                        cpumask_andnot(vector_cpumask, mask, d->old_domain);
-                        cpu = cpumask_first_and(vector_cpumask,
-                                                cpu_online_mask);
-                        continue;
-                }
                if (test_bit(vector, used_vectors))
                        goto next;
-                for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {
+                for_each_cpu(new_cpu, vector_searchmask) {
                        if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
                                goto next;
                }
                /* Found one! */
                current_vector = vector;
                current_offset = offset;
-                if (d->cfg.vector) {
+                /* Schedule the old vector for cleanup on all cpus */
+                if (d->cfg.vector)
                        cpumask_copy(d->old_domain, d->domain);
-                        d->move_in_progress =
+                for_each_cpu(new_cpu, vector_searchmask)
-                           cpumask_intersects(d->old_domain, cpu_online_mask);
-                }
-                for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
                        per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-                d->cfg.vector = vector;
+                goto update;
-                cpumask_copy(d->domain, vector_cpumask);
-                err = 0;
-                break;
-        }
-        if (!err) {
+next_cpu:
-                /* cache destination APIC IDs into cfg->dest_apicid */
+                /*
-                err = apic->cpu_mask_to_apicid_and(mask, d->domain,
+                 * We exclude the current @vector_cpumask from the requested
-                                                   &d->cfg.dest_apicid);
+                 * @mask and try again with the next online cpu in the
+                 * result. We cannot modify @mask, so we use @vector_cpumask
+                 * as a temporary buffer here as it will be reassigned when
+                 * calling apic->vector_allocation_domain() above.
+                 */
+                cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
+                cpumask_andnot(vector_cpumask, mask, searched_cpumask);
+                cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
+                continue;
        }
+        return -ENOSPC;
-        return err;
+update:
+        /*
+         * Exclude offline cpus from the cleanup mask and set the
+         * move_in_progress flag when the result is not empty.
+         */
+        cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
+        d->move_in_progress = !cpumask_empty(d->old_domain);
+        d->cfg.vector = vector;
+        cpumask_copy(d->domain, vector_cpumask);
+success:
+        /*
+         * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
+         * as we already established, that mask & d->domain & cpu_online_mask
+         * is not empty.
+         */
+        BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
+                                            &d->cfg.dest_apicid));
+        return 0;
 }
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
 {
        struct irq_desc *desc;
-        unsigned long flags;
        int cpu, vector;
-        raw_spin_lock_irqsave(&vector_lock, flags);
        BUG_ON(!data->cfg.vector);
        vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
        data->cfg.vector = 0;
        cpumask_clear(data->domain);
-        if (likely(!data->move_in_progress)) {
+        /*
-                raw_spin_unlock_irqrestore(&vector_lock, flags);
+         * If move is in progress or the old_domain mask is not empty,
+         * i.e. the cleanup IPI has not been processed yet, we need to remove
+         * the old references to desc from all cpus vector tables.
+         */
+        if (!data->move_in_progress && cpumask_empty(data->old_domain))
                return;
-        }
        desc = irq_to_desc(irq);
        for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
                }
        }
        data->move_in_progress = 0;
-        raw_spin_unlock_irqrestore(&vector_lock, flags);
 }
 void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 static void x86_vector_free_irqs(struct irq_domain *domain,
                                 unsigned int virq, unsigned int nr_irqs)
 {
+        struct apic_chip_data *apic_data;
        struct irq_data *irq_data;
+        unsigned long flags;
        int i;
        for (i = 0; i < nr_irqs; i++) {
                irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
                if (irq_data && irq_data->chip_data) {
+                        raw_spin_lock_irqsave(&vector_lock, flags);
                        clear_irq_vector(virq + i, irq_data->chip_data);
-                        free_apic_chip_data(irq_data->chip_data);
+                        apic_data = irq_data->chip_data;
+                        irq_domain_reset_irq_data(irq_data);
+                        raw_spin_unlock_irqrestore(&vector_lock, flags);
+                        free_apic_chip_data(apic_data);
 #ifdef  CONFIG_X86_IO_APIC
                        if (virq + i < nr_legacy_irqs())
                                legacy_irq_data[virq + i] = NULL;
 #endif
-                        irq_domain_reset_irq_data(irq_data);
                }
        }
 }
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
        arch_init_htirq_domain(x86_vector_domain);
        BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
+        BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
+        BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
        return arch_early_ioapic_init();
 }
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
        err = assign_irq_vector(irq, data, dest);
-        if (err) {
+        return err ? err : IRQ_SET_MASK_OK;
-                if (assign_irq_vector(irq, data,
-                                      irq_data_get_affinity_mask(irq_data)))
-                        pr_err("Failed to recover vector for irq %d\n", irq);
-                return err;
-        }
-        return IRQ_SET_MASK_OK;
 }
 static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
 #ifdef CONFIG_SMP
 static void __send_cleanup_vector(struct apic_chip_data *data)
 {
-        cpumask_var_t cleanup_mask;
+        raw_spin_lock(&vector_lock);
+        cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-        if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-                unsigned int i;
-                for_each_cpu_and(i, data->old_domain, cpu_online_mask)
-                        apic->send_IPI_mask(cpumask_of(i),
-                                            IRQ_MOVE_CLEANUP_VECTOR);
-        } else {
-                cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
-                apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-                free_cpumask_var(cleanup_mask);
-        }
        data->move_in_progress = 0;
+        if (!cpumask_empty(data->old_domain))
+                apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
+        raw_spin_unlock(&vector_lock);
 }
 void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                /*
-                 * Check if the irq migration is in progress. If so, we
+                 * Nothing to cleanup if irq migration is in progress
-                 * haven't received the cleanup request yet for this irq.
+                 * or this cpu is not set in the cleanup mask.
                 */
-                if (data->move_in_progress)
+                if (data->move_in_progress ||
+                    !cpumask_test_cpu(me, data->old_domain))
                        goto unlock;
+                /*
+                 * We have two cases to handle here:
+                 * 1) vector is unchanged but the target mask got reduced
+                 * 2) vector and the target mask has changed
+                 *
+                 * #1 is obvious, but in #2 we have two vectors with the same
+                 * irq descriptor: the old and the new vector. So we need to
+                 * make sure that we only cleanup the old vector. The new
+                 * vector has the current @vector number in the config and
+                 * this cpu is part of the target mask. We better leave that
+                 * one alone.
+                 */
                if (vector == data->cfg.vector &&
                    cpumask_test_cpu(me, data->domain))
                        goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
                        goto unlock;
                }
                __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+                cpumask_clear_cpu(me, data->old_domain);
 unlock:
                raw_spin_unlock(&desc->lock);
        }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
        __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
 }
-void irq_force_complete_move(int irq)
+/*
+ * Called with @desc->lock held and interrupts disabled.
+ */
+void irq_force_complete_move(struct irq_desc *desc)
 {
-        struct irq_cfg *cfg = irq_cfg(irq);
+        struct irq_data *irqdata = irq_desc_get_irq_data(desc);
+        struct apic_chip_data *data = apic_chip_data(irqdata);
+        struct irq_cfg *cfg = data ? &data->cfg : NULL;
-        if (cfg)
+        if (!cfg)
-                __irq_complete_move(cfg, cfg->vector);
+                return;
+        __irq_complete_move(cfg, cfg->vector);
+        /*
+         * This is tricky. If the cleanup of @data->old_domain has not been
+         * done yet, then the following setaffinity call will fail with
+         * -EBUSY. This can leave the interrupt in a stale state.
+         *
+         * The cleanup cannot make progress because we hold @desc->lock. So in
+         * case @data->old_domain is not yet cleaned up, we need to drop the
+         * lock and acquire it again. @desc cannot go away, because the
+         * hotplug code holds the sparse irq lock.
+         */
+        raw_spin_lock(&vector_lock);
+        /* Clean out all offline cpus (including ourself) first. */
+        cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
+        while (!cpumask_empty(data->old_domain)) {
+                raw_spin_unlock(&vector_lock);
+                raw_spin_unlock(&desc->lock);
+                cpu_relax();
+                raw_spin_lock(&desc->lock);
+                /*
+                 * Reevaluate apic_chip_data. It might have been cleared after
+                 * we dropped @desc->lock.
+                 */
+                data = apic_chip_data(irqdata);
+                if (!data)
+                        return;
+                raw_spin_lock(&vector_lock);
+        }
+        raw_spin_unlock(&vector_lock);
 }
 #endif

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
31	struct irq_domain *x86_vector_domain;	31	struct irq_domain *x86_vector_domain;
32	EXPORT_SYMBOL_GPL(x86_vector_domain);	32	EXPORT_SYMBOL_GPL(x86_vector_domain);
33	static DEFINE_RAW_SPINLOCK(vector_lock);	33	static DEFINE_RAW_SPINLOCK(vector_lock);
34	static cpumask_var_t vector_cpumask;	34	static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
35	static struct irq_chip lapic_controller;	35	static struct irq_chip lapic_controller;
36	#ifdef CONFIG_X86_IO_APIC	36	#ifdef CONFIG_X86_IO_APIC
37	static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];	37	static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
118	*/	118	*/
119	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;	119	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
120	static int current_offset = VECTOR_OFFSET_START % 16;	120	static int current_offset = VECTOR_OFFSET_START % 16;
121	int cpu, err;	121	int cpu, vector;
122		122
123	if (d->move_in_progress)	123	/*
		124	* If there is still a move in progress or the previous move has not
		125	* been cleaned up completely, tell the caller to come back later.
		126	*/
		127	if (d->move_in_progress \|\|
		128	cpumask_intersects(d->old_domain, cpu_online_mask))
124	return -EBUSY;	129	return -EBUSY;
125		130
126	/* Only try and allocate irqs on cpus that are present */	131	/* Only try and allocate irqs on cpus that are present */
127	err = -ENOSPC;
128	cpumask_clear(d->old_domain);	132	cpumask_clear(d->old_domain);
		133	cpumask_clear(searched_cpumask);
129	cpu = cpumask_first_and(mask, cpu_online_mask);	134	cpu = cpumask_first_and(mask, cpu_online_mask);
130	while (cpu < nr_cpu_ids) {	135	while (cpu < nr_cpu_ids) {
131	int new_cpu, vector, offset;	136	int new_cpu, offset;
132		137
		138	/* Get the possible target cpus for @mask/@cpu from the apic */
133	apic->vector_allocation_domain(cpu, vector_cpumask, mask);	139	apic->vector_allocation_domain(cpu, vector_cpumask, mask);
134		140
		141	/*
		142	* Clear the offline cpus from @vector_cpumask for searching
		143	* and verify whether the result overlaps with @mask. If true,
		144	* then the call to apic->cpu_mask_to_apicid_and() will
		145	* succeed as well. If not, no point in trying to find a
		146	* vector in this mask.
		147	*/
		148	cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
		149	if (!cpumask_intersects(vector_searchmask, mask))
		150	goto next_cpu;
		151
135	if (cpumask_subset(vector_cpumask, d->domain)) {	152	if (cpumask_subset(vector_cpumask, d->domain)) {
136	err = 0;
137	if (cpumask_equal(vector_cpumask, d->domain))	153	if (cpumask_equal(vector_cpumask, d->domain))
138	break;	154	goto success;
139	/*	155	/*
140	* New cpumask using the vector is a proper subset of	156	* Mark the cpus which are not longer in the mask for
141	* the current in use mask. So cleanup the vector	157	* cleanup.
142	* allocation for the members that are not used anymore.
143	*/	158	*/
144	cpumask_andnot(d->old_domain, d->domain,	159	cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
145	vector_cpumask);	160	vector = d->cfg.vector;
146	d->move_in_progress =	161	goto update;
147	cpumask_intersects(d->old_domain, cpu_online_mask);
148	cpumask_and(d->domain, d->domain, vector_cpumask);
149	break;
150	}	162	}
151		163
152	vector = current_vector;	164	vector = current_vector;
@@ -158,45 +170,60 @@ next:
158	vector = FIRST_EXTERNAL_VECTOR + offset;	170	vector = FIRST_EXTERNAL_VECTOR + offset;
159	}	171	}
160		172
161	if (unlikely(current_vector == vector)) {	173	/* If the search wrapped around, try the next cpu */
162	cpumask_or(d->old_domain, d->old_domain,	174	if (unlikely(current_vector == vector))
163	vector_cpumask);	175	goto next_cpu;
164	cpumask_andnot(vector_cpumask, mask, d->old_domain);
165	cpu = cpumask_first_and(vector_cpumask,
166	cpu_online_mask);
167	continue;
168	}
169		176
170	if (test_bit(vector, used_vectors))	177	if (test_bit(vector, used_vectors))
171	goto next;	178	goto next;
172		179
173	for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) {	180	for_each_cpu(new_cpu, vector_searchmask) {
174	if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))	181	if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
175	goto next;	182	goto next;
176	}	183	}
177	/* Found one! */	184	/* Found one! */
178	current_vector = vector;	185	current_vector = vector;
179	current_offset = offset;	186	current_offset = offset;
180	if (d->cfg.vector) {	187	/* Schedule the old vector for cleanup on all cpus */
		188	if (d->cfg.vector)
181	cpumask_copy(d->old_domain, d->domain);	189	cpumask_copy(d->old_domain, d->domain);
182	d->move_in_progress =	190	for_each_cpu(new_cpu, vector_searchmask)
183	cpumask_intersects(d->old_domain, cpu_online_mask);
184	}
185	for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
186	per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);	191	per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
187	d->cfg.vector = vector;	192	goto update;
188	cpumask_copy(d->domain, vector_cpumask);
189	err = 0;
190	break;
191	}
192		193
193	if (!err) {	194	next_cpu:
194	/* cache destination APIC IDs into cfg->dest_apicid */	195	/*
195	err = apic->cpu_mask_to_apicid_and(mask, d->domain,	196	* We exclude the current @vector_cpumask from the requested
196	&d->cfg.dest_apicid);	197	* @mask and try again with the next online cpu in the
		198	* result. We cannot modify @mask, so we use @vector_cpumask
		199	* as a temporary buffer here as it will be reassigned when
		200	* calling apic->vector_allocation_domain() above.
		201	*/
		202	cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
		203	cpumask_andnot(vector_cpumask, mask, searched_cpumask);
		204	cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
		205	continue;
197	}	206	}
		207	return -ENOSPC;
198		208
199	return err;	209	update:
		210	/*
		211	* Exclude offline cpus from the cleanup mask and set the
		212	* move_in_progress flag when the result is not empty.
		213	*/
		214	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
		215	d->move_in_progress = !cpumask_empty(d->old_domain);
		216	d->cfg.vector = vector;
		217	cpumask_copy(d->domain, vector_cpumask);
		218	success:
		219	/*
		220	* Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
		221	* as we already established, that mask & d->domain & cpu_online_mask
		222	* is not empty.
		223	*/
		224	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
		225	&d->cfg.dest_apicid));
		226	return 0;
200	}	227	}
201		228
202	static int assign_irq_vector(int irq, struct apic_chip_data *data,	229	static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
226	static void clear_irq_vector(int irq, struct apic_chip_data *data)	253	static void clear_irq_vector(int irq, struct apic_chip_data *data)
227	{	254	{
228	struct irq_desc *desc;	255	struct irq_desc *desc;
229	unsigned long flags;
230	int cpu, vector;	256	int cpu, vector;
231		257
232	raw_spin_lock_irqsave(&vector_lock, flags);
233	BUG_ON(!data->cfg.vector);	258	BUG_ON(!data->cfg.vector);
234		259
235	vector = data->cfg.vector;	260	vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
239	data->cfg.vector = 0;	264	data->cfg.vector = 0;
240	cpumask_clear(data->domain);	265	cpumask_clear(data->domain);
241		266
242	if (likely(!data->move_in_progress)) {	267	/*
243	raw_spin_unlock_irqrestore(&vector_lock, flags);	268	* If move is in progress or the old_domain mask is not empty,
		269	* i.e. the cleanup IPI has not been processed yet, we need to remove
		270	* the old references to desc from all cpus vector tables.
		271	*/
		272	if (!data->move_in_progress && cpumask_empty(data->old_domain))
244	return;	273	return;
245	}
246		274
247	desc = irq_to_desc(irq);	275	desc = irq_to_desc(irq);
248	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {	276	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
255	}	283	}
256	}	284	}
257	data->move_in_progress = 0;	285	data->move_in_progress = 0;
258	raw_spin_unlock_irqrestore(&vector_lock, flags);
259	}	286	}
260		287
261	void init_irq_alloc_info(struct irq_alloc_info *info,	288	void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info dst, struct irq_alloc_info src)
276	static void x86_vector_free_irqs(struct irq_domain *domain,	303	static void x86_vector_free_irqs(struct irq_domain *domain,
277	unsigned int virq, unsigned int nr_irqs)	304	unsigned int virq, unsigned int nr_irqs)
278	{	305	{
		306	struct apic_chip_data *apic_data;
279	struct irq_data *irq_data;	307	struct irq_data *irq_data;
		308	unsigned long flags;
280	int i;	309	int i;
281		310
282	for (i = 0; i < nr_irqs; i++) {	311	for (i = 0; i < nr_irqs; i++) {
283	irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);	312	irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
284	if (irq_data && irq_data->chip_data) {	313	if (irq_data && irq_data->chip_data) {
		314	raw_spin_lock_irqsave(&vector_lock, flags);
285	clear_irq_vector(virq + i, irq_data->chip_data);	315	clear_irq_vector(virq + i, irq_data->chip_data);
286	free_apic_chip_data(irq_data->chip_data);	316	apic_data = irq_data->chip_data;
		317	irq_domain_reset_irq_data(irq_data);
		318	raw_spin_unlock_irqrestore(&vector_lock, flags);
		319	free_apic_chip_data(apic_data);
287	#ifdef CONFIG_X86_IO_APIC	320	#ifdef CONFIG_X86_IO_APIC
288	if (virq + i < nr_legacy_irqs())	321	if (virq + i < nr_legacy_irqs())
289	legacy_irq_data[virq + i] = NULL;	322	legacy_irq_data[virq + i] = NULL;
290	#endif	323	#endif
291	irq_domain_reset_irq_data(irq_data);
292	}	324	}
293	}	325	}
294	}	326	}
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
406	arch_init_htirq_domain(x86_vector_domain);	438	arch_init_htirq_domain(x86_vector_domain);
407		439
408	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));	440	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
		441	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
		442	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
409		443
410	return arch_early_ioapic_init();	444	return arch_early_ioapic_init();
411	}	445	}
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
494	return -EINVAL;	528	return -EINVAL;
495		529
496	err = assign_irq_vector(irq, data, dest);	530	err = assign_irq_vector(irq, data, dest);
497	if (err) {	531	return err ? err : IRQ_SET_MASK_OK;
498	if (assign_irq_vector(irq, data,
499	irq_data_get_affinity_mask(irq_data)))
500	pr_err("Failed to recover vector for irq %d\n", irq);
501	return err;
502	}
503
504	return IRQ_SET_MASK_OK;
505	}	532	}
506		533
507	static struct irq_chip lapic_controller = {	534	static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
513	#ifdef CONFIG_SMP	540	#ifdef CONFIG_SMP
514	static void __send_cleanup_vector(struct apic_chip_data *data)	541	static void __send_cleanup_vector(struct apic_chip_data *data)
515	{	542	{
516	cpumask_var_t cleanup_mask;	543	raw_spin_lock(&vector_lock);
517		544	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
518	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
519	unsigned int i;
520
521	for_each_cpu_and(i, data->old_domain, cpu_online_mask)
522	apic->send_IPI_mask(cpumask_of(i),
523	IRQ_MOVE_CLEANUP_VECTOR);
524	} else {
525	cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
526	apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
527	free_cpumask_var(cleanup_mask);
528	}
529	data->move_in_progress = 0;	545	data->move_in_progress = 0;
		546	if (!cpumask_empty(data->old_domain))
		547	apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
		548	raw_spin_unlock(&vector_lock);
530	}	549	}
531		550
532	void send_cleanup_vector(struct irq_cfg *cfg)	551	void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
570	goto unlock;	589	goto unlock;
571		590
572	/*	591	/*
573	* Check if the irq migration is in progress. If so, we	592	* Nothing to cleanup if irq migration is in progress
574	* haven't received the cleanup request yet for this irq.	593	* or this cpu is not set in the cleanup mask.
575	*/	594	*/
576	if (data->move_in_progress)	595	if (data->move_in_progress \|\|
		596	!cpumask_test_cpu(me, data->old_domain))
577	goto unlock;	597	goto unlock;
578		598
		599	/*
		600	* We have two cases to handle here:
		601	* 1) vector is unchanged but the target mask got reduced
		602	* 2) vector and the target mask has changed
		603	*
		604	* #1 is obvious, but in #2 we have two vectors with the same
		605	* irq descriptor: the old and the new vector. So we need to
		606	* make sure that we only cleanup the old vector. The new
		607	* vector has the current @vector number in the config and
		608	* this cpu is part of the target mask. We better leave that
		609	* one alone.
		610	*/
579	if (vector == data->cfg.vector &&	611	if (vector == data->cfg.vector &&
580	cpumask_test_cpu(me, data->domain))	612	cpumask_test_cpu(me, data->domain))
581	goto unlock;	613	goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
593	goto unlock;	625	goto unlock;
594	}	626	}
595	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);	627	__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
		628	cpumask_clear_cpu(me, data->old_domain);
596	unlock:	629	unlock:
597	raw_spin_unlock(&desc->lock);	630	raw_spin_unlock(&desc->lock);
598	}	631	}
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
621	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);	654	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
622	}	655	}
623		656
624	void irq_force_complete_move(int irq)	657	/*
		658	* Called with @desc->lock held and interrupts disabled.
		659	*/
		660	void irq_force_complete_move(struct irq_desc *desc)
625	{	661	{
626	struct irq_cfg *cfg = irq_cfg(irq);	662	struct irq_data *irqdata = irq_desc_get_irq_data(desc);
		663	struct apic_chip_data *data = apic_chip_data(irqdata);
		664	struct irq_cfg *cfg = data ? &data->cfg : NULL;
627		665
628	if (cfg)	666	if (!cfg)
629	__irq_complete_move(cfg, cfg->vector);	667	return;
		668
		669	__irq_complete_move(cfg, cfg->vector);
		670
		671	/*
		672	* This is tricky. If the cleanup of @data->old_domain has not been
		673	* done yet, then the following setaffinity call will fail with
		674	* -EBUSY. This can leave the interrupt in a stale state.
		675	*
		676	* The cleanup cannot make progress because we hold @desc->lock. So in
		677	* case @data->old_domain is not yet cleaned up, we need to drop the
		678	* lock and acquire it again. @desc cannot go away, because the
		679	* hotplug code holds the sparse irq lock.
		680	*/
		681	raw_spin_lock(&vector_lock);
		682	/* Clean out all offline cpus (including ourself) first. */
		683	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
		684	while (!cpumask_empty(data->old_domain)) {
		685	raw_spin_unlock(&vector_lock);
		686	raw_spin_unlock(&desc->lock);
		687	cpu_relax();
		688	raw_spin_lock(&desc->lock);
		689	/*
		690	* Reevaluate apic_chip_data. It might have been cleared after
		691	* we dropped @desc->lock.
		692	*/
		693	data = apic_chip_data(irqdata);
		694	if (!data)
		695	return;
		696	raw_spin_lock(&vector_lock);
		697	}
		698	raw_spin_unlock(&vector_lock);
630	}	699	}
631	#endif	700	#endif
632		701