aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/apic/vector.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/apic/vector.c')
-rw-r--r--arch/x86/kernel/apic/vector.c221
1 files changed, 145 insertions, 76 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 908cb37da171..3b670df4ba7b 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -31,7 +31,7 @@ struct apic_chip_data {
31struct irq_domain *x86_vector_domain; 31struct irq_domain *x86_vector_domain;
32EXPORT_SYMBOL_GPL(x86_vector_domain); 32EXPORT_SYMBOL_GPL(x86_vector_domain);
33static DEFINE_RAW_SPINLOCK(vector_lock); 33static DEFINE_RAW_SPINLOCK(vector_lock);
34static cpumask_var_t vector_cpumask; 34static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
35static struct irq_chip lapic_controller; 35static struct irq_chip lapic_controller;
36#ifdef CONFIG_X86_IO_APIC 36#ifdef CONFIG_X86_IO_APIC
37static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; 37static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
118 */ 118 */
119 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; 119 static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
120 static int current_offset = VECTOR_OFFSET_START % 16; 120 static int current_offset = VECTOR_OFFSET_START % 16;
121 int cpu, err; 121 int cpu, vector;
122 122
123 if (d->move_in_progress) 123 /*
124 * If there is still a move in progress or the previous move has not
125 * been cleaned up completely, tell the caller to come back later.
126 */
127 if (d->move_in_progress ||
128 cpumask_intersects(d->old_domain, cpu_online_mask))
124 return -EBUSY; 129 return -EBUSY;
125 130
126 /* Only try and allocate irqs on cpus that are present */ 131 /* Only try and allocate irqs on cpus that are present */
127 err = -ENOSPC;
128 cpumask_clear(d->old_domain); 132 cpumask_clear(d->old_domain);
133 cpumask_clear(searched_cpumask);
129 cpu = cpumask_first_and(mask, cpu_online_mask); 134 cpu = cpumask_first_and(mask, cpu_online_mask);
130 while (cpu < nr_cpu_ids) { 135 while (cpu < nr_cpu_ids) {
131 int new_cpu, vector, offset; 136 int new_cpu, offset;
132 137
138 /* Get the possible target cpus for @mask/@cpu from the apic */
133 apic->vector_allocation_domain(cpu, vector_cpumask, mask); 139 apic->vector_allocation_domain(cpu, vector_cpumask, mask);
134 140
141 /*
142 * Clear the offline cpus from @vector_cpumask for searching
143 * and verify whether the result overlaps with @mask. If true,
144 * then the call to apic->cpu_mask_to_apicid_and() will
145 * succeed as well. If not, no point in trying to find a
146 * vector in this mask.
147 */
148 cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
149 if (!cpumask_intersects(vector_searchmask, mask))
150 goto next_cpu;
151
135 if (cpumask_subset(vector_cpumask, d->domain)) { 152 if (cpumask_subset(vector_cpumask, d->domain)) {
136 err = 0;
137 if (cpumask_equal(vector_cpumask, d->domain)) 153 if (cpumask_equal(vector_cpumask, d->domain))
138 break; 154 goto success;
139 /* 155 /*
140 * New cpumask using the vector is a proper subset of 156 * Mark the cpus which are not longer in the mask for
141 * the current in use mask. So cleanup the vector 157 * cleanup.
142 * allocation for the members that are not used anymore.
143 */ 158 */
144 cpumask_andnot(d->old_domain, d->domain, 159 cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
145 vector_cpumask); 160 vector = d->cfg.vector;
146 d->move_in_progress = 161 goto update;
147 cpumask_intersects(d->old_domain, cpu_online_mask);
148 cpumask_and(d->domain, d->domain, vector_cpumask);
149 break;
150 } 162 }
151 163
152 vector = current_vector; 164 vector = current_vector;
@@ -158,45 +170,60 @@ next:
158 vector = FIRST_EXTERNAL_VECTOR + offset; 170 vector = FIRST_EXTERNAL_VECTOR + offset;
159 } 171 }
160 172
161 if (unlikely(current_vector == vector)) { 173 /* If the search wrapped around, try the next cpu */
162 cpumask_or(d->old_domain, d->old_domain, 174 if (unlikely(current_vector == vector))
163 vector_cpumask); 175 goto next_cpu;
164 cpumask_andnot(vector_cpumask, mask, d->old_domain);
165 cpu = cpumask_first_and(vector_cpumask,
166 cpu_online_mask);
167 continue;
168 }
169 176
170 if (test_bit(vector, used_vectors)) 177 if (test_bit(vector, used_vectors))
171 goto next; 178 goto next;
172 179
173 for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { 180 for_each_cpu(new_cpu, vector_searchmask) {
174 if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) 181 if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
175 goto next; 182 goto next;
176 } 183 }
177 /* Found one! */ 184 /* Found one! */
178 current_vector = vector; 185 current_vector = vector;
179 current_offset = offset; 186 current_offset = offset;
180 if (d->cfg.vector) { 187 /* Schedule the old vector for cleanup on all cpus */
188 if (d->cfg.vector)
181 cpumask_copy(d->old_domain, d->domain); 189 cpumask_copy(d->old_domain, d->domain);
182 d->move_in_progress = 190 for_each_cpu(new_cpu, vector_searchmask)
183 cpumask_intersects(d->old_domain, cpu_online_mask);
184 }
185 for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask)
186 per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); 191 per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
187 d->cfg.vector = vector; 192 goto update;
188 cpumask_copy(d->domain, vector_cpumask);
189 err = 0;
190 break;
191 }
192 193
193 if (!err) { 194next_cpu:
194 /* cache destination APIC IDs into cfg->dest_apicid */ 195 /*
195 err = apic->cpu_mask_to_apicid_and(mask, d->domain, 196 * We exclude the current @vector_cpumask from the requested
196 &d->cfg.dest_apicid); 197 * @mask and try again with the next online cpu in the
198 * result. We cannot modify @mask, so we use @vector_cpumask
199 * as a temporary buffer here as it will be reassigned when
200 * calling apic->vector_allocation_domain() above.
201 */
202 cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
203 cpumask_andnot(vector_cpumask, mask, searched_cpumask);
204 cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
205 continue;
197 } 206 }
207 return -ENOSPC;
198 208
199 return err; 209update:
210 /*
211 * Exclude offline cpus from the cleanup mask and set the
212 * move_in_progress flag when the result is not empty.
213 */
214 cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
215 d->move_in_progress = !cpumask_empty(d->old_domain);
216 d->cfg.vector = vector;
217 cpumask_copy(d->domain, vector_cpumask);
218success:
219 /*
220 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
221 * as we already established, that mask & d->domain & cpu_online_mask
222 * is not empty.
223 */
224 BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
225 &d->cfg.dest_apicid));
226 return 0;
200} 227}
201 228
202static int assign_irq_vector(int irq, struct apic_chip_data *data, 229static int assign_irq_vector(int irq, struct apic_chip_data *data,
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node,
226static void clear_irq_vector(int irq, struct apic_chip_data *data) 253static void clear_irq_vector(int irq, struct apic_chip_data *data)
227{ 254{
228 struct irq_desc *desc; 255 struct irq_desc *desc;
229 unsigned long flags;
230 int cpu, vector; 256 int cpu, vector;
231 257
232 raw_spin_lock_irqsave(&vector_lock, flags);
233 BUG_ON(!data->cfg.vector); 258 BUG_ON(!data->cfg.vector);
234 259
235 vector = data->cfg.vector; 260 vector = data->cfg.vector;
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
239 data->cfg.vector = 0; 264 data->cfg.vector = 0;
240 cpumask_clear(data->domain); 265 cpumask_clear(data->domain);
241 266
242 if (likely(!data->move_in_progress)) { 267 /*
243 raw_spin_unlock_irqrestore(&vector_lock, flags); 268 * If move is in progress or the old_domain mask is not empty,
269 * i.e. the cleanup IPI has not been processed yet, we need to remove
270 * the old references to desc from all cpus vector tables.
271 */
272 if (!data->move_in_progress && cpumask_empty(data->old_domain))
244 return; 273 return;
245 }
246 274
247 desc = irq_to_desc(irq); 275 desc = irq_to_desc(irq);
248 for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { 276 for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
255 } 283 }
256 } 284 }
257 data->move_in_progress = 0; 285 data->move_in_progress = 0;
258 raw_spin_unlock_irqrestore(&vector_lock, flags);
259} 286}
260 287
261void init_irq_alloc_info(struct irq_alloc_info *info, 288void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
276static void x86_vector_free_irqs(struct irq_domain *domain, 303static void x86_vector_free_irqs(struct irq_domain *domain,
277 unsigned int virq, unsigned int nr_irqs) 304 unsigned int virq, unsigned int nr_irqs)
278{ 305{
306 struct apic_chip_data *apic_data;
279 struct irq_data *irq_data; 307 struct irq_data *irq_data;
308 unsigned long flags;
280 int i; 309 int i;
281 310
282 for (i = 0; i < nr_irqs; i++) { 311 for (i = 0; i < nr_irqs; i++) {
283 irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); 312 irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
284 if (irq_data && irq_data->chip_data) { 313 if (irq_data && irq_data->chip_data) {
314 raw_spin_lock_irqsave(&vector_lock, flags);
285 clear_irq_vector(virq + i, irq_data->chip_data); 315 clear_irq_vector(virq + i, irq_data->chip_data);
286 free_apic_chip_data(irq_data->chip_data); 316 apic_data = irq_data->chip_data;
317 irq_domain_reset_irq_data(irq_data);
318 raw_spin_unlock_irqrestore(&vector_lock, flags);
319 free_apic_chip_data(apic_data);
287#ifdef CONFIG_X86_IO_APIC 320#ifdef CONFIG_X86_IO_APIC
288 if (virq + i < nr_legacy_irqs()) 321 if (virq + i < nr_legacy_irqs())
289 legacy_irq_data[virq + i] = NULL; 322 legacy_irq_data[virq + i] = NULL;
290#endif 323#endif
291 irq_domain_reset_irq_data(irq_data);
292 } 324 }
293 } 325 }
294} 326}
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void)
406 arch_init_htirq_domain(x86_vector_domain); 438 arch_init_htirq_domain(x86_vector_domain);
407 439
408 BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); 440 BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
441 BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
442 BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
409 443
410 return arch_early_ioapic_init(); 444 return arch_early_ioapic_init();
411} 445}
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data,
494 return -EINVAL; 528 return -EINVAL;
495 529
496 err = assign_irq_vector(irq, data, dest); 530 err = assign_irq_vector(irq, data, dest);
497 if (err) { 531 return err ? err : IRQ_SET_MASK_OK;
498 if (assign_irq_vector(irq, data,
499 irq_data_get_affinity_mask(irq_data)))
500 pr_err("Failed to recover vector for irq %d\n", irq);
501 return err;
502 }
503
504 return IRQ_SET_MASK_OK;
505} 532}
506 533
507static struct irq_chip lapic_controller = { 534static struct irq_chip lapic_controller = {
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = {
513#ifdef CONFIG_SMP 540#ifdef CONFIG_SMP
514static void __send_cleanup_vector(struct apic_chip_data *data) 541static void __send_cleanup_vector(struct apic_chip_data *data)
515{ 542{
516 cpumask_var_t cleanup_mask; 543 raw_spin_lock(&vector_lock);
517 544 cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
518 if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
519 unsigned int i;
520
521 for_each_cpu_and(i, data->old_domain, cpu_online_mask)
522 apic->send_IPI_mask(cpumask_of(i),
523 IRQ_MOVE_CLEANUP_VECTOR);
524 } else {
525 cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask);
526 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
527 free_cpumask_var(cleanup_mask);
528 }
529 data->move_in_progress = 0; 545 data->move_in_progress = 0;
546 if (!cpumask_empty(data->old_domain))
547 apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
548 raw_spin_unlock(&vector_lock);
530} 549}
531 550
532void send_cleanup_vector(struct irq_cfg *cfg) 551void send_cleanup_vector(struct irq_cfg *cfg)
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
570 goto unlock; 589 goto unlock;
571 590
572 /* 591 /*
573 * Check if the irq migration is in progress. If so, we 592 * Nothing to cleanup if irq migration is in progress
574 * haven't received the cleanup request yet for this irq. 593 * or this cpu is not set in the cleanup mask.
575 */ 594 */
576 if (data->move_in_progress) 595 if (data->move_in_progress ||
596 !cpumask_test_cpu(me, data->old_domain))
577 goto unlock; 597 goto unlock;
578 598
599 /*
600 * We have two cases to handle here:
601 * 1) vector is unchanged but the target mask got reduced
602 * 2) vector and the target mask has changed
603 *
604 * #1 is obvious, but in #2 we have two vectors with the same
605 * irq descriptor: the old and the new vector. So we need to
606 * make sure that we only cleanup the old vector. The new
607 * vector has the current @vector number in the config and
608 * this cpu is part of the target mask. We better leave that
609 * one alone.
610 */
579 if (vector == data->cfg.vector && 611 if (vector == data->cfg.vector &&
580 cpumask_test_cpu(me, data->domain)) 612 cpumask_test_cpu(me, data->domain))
581 goto unlock; 613 goto unlock;
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
593 goto unlock; 625 goto unlock;
594 } 626 }
595 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); 627 __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
628 cpumask_clear_cpu(me, data->old_domain);
596unlock: 629unlock:
597 raw_spin_unlock(&desc->lock); 630 raw_spin_unlock(&desc->lock);
598 } 631 }
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg)
621 __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); 654 __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
622} 655}
623 656
624void irq_force_complete_move(int irq) 657/*
658 * Called with @desc->lock held and interrupts disabled.
659 */
660void irq_force_complete_move(struct irq_desc *desc)
625{ 661{
626 struct irq_cfg *cfg = irq_cfg(irq); 662 struct irq_data *irqdata = irq_desc_get_irq_data(desc);
663 struct apic_chip_data *data = apic_chip_data(irqdata);
664 struct irq_cfg *cfg = data ? &data->cfg : NULL;
627 665
628 if (cfg) 666 if (!cfg)
629 __irq_complete_move(cfg, cfg->vector); 667 return;
668
669 __irq_complete_move(cfg, cfg->vector);
670
671 /*
672 * This is tricky. If the cleanup of @data->old_domain has not been
673 * done yet, then the following setaffinity call will fail with
674 * -EBUSY. This can leave the interrupt in a stale state.
675 *
676 * The cleanup cannot make progress because we hold @desc->lock. So in
677 * case @data->old_domain is not yet cleaned up, we need to drop the
678 * lock and acquire it again. @desc cannot go away, because the
679 * hotplug code holds the sparse irq lock.
680 */
681 raw_spin_lock(&vector_lock);
682 /* Clean out all offline cpus (including ourself) first. */
683 cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
684 while (!cpumask_empty(data->old_domain)) {
685 raw_spin_unlock(&vector_lock);
686 raw_spin_unlock(&desc->lock);
687 cpu_relax();
688 raw_spin_lock(&desc->lock);
689 /*
690 * Reevaluate apic_chip_data. It might have been cleared after
691 * we dropped @desc->lock.
692 */
693 data = apic_chip_data(irqdata);
694 if (!data)
695 return;
696 raw_spin_lock(&vector_lock);
697 }
698 raw_spin_unlock(&vector_lock);
630} 699}
631#endif 700#endif
632 701