diff options
Diffstat (limited to 'arch/x86/kernel/apic/vector.c')
-rw-r--r-- | arch/x86/kernel/apic/vector.c | 221 |
1 files changed, 145 insertions, 76 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c | |||
@@ -31,7 +31,7 @@ struct apic_chip_data { | |||
31 | struct irq_domain *x86_vector_domain; | 31 | struct irq_domain *x86_vector_domain; |
32 | EXPORT_SYMBOL_GPL(x86_vector_domain); | 32 | EXPORT_SYMBOL_GPL(x86_vector_domain); |
33 | static DEFINE_RAW_SPINLOCK(vector_lock); | 33 | static DEFINE_RAW_SPINLOCK(vector_lock); |
34 | static cpumask_var_t vector_cpumask; | 34 | static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; |
35 | static struct irq_chip lapic_controller; | 35 | static struct irq_chip lapic_controller; |
36 | #ifdef CONFIG_X86_IO_APIC | 36 | #ifdef CONFIG_X86_IO_APIC |
37 | static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; | 37 | static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; |
@@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, | |||
118 | */ | 118 | */ |
119 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; | 119 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
120 | static int current_offset = VECTOR_OFFSET_START % 16; | 120 | static int current_offset = VECTOR_OFFSET_START % 16; |
121 | int cpu, err; | 121 | int cpu, vector; |
122 | 122 | ||
123 | if (d->move_in_progress) | 123 | /* |
124 | * If there is still a move in progress or the previous move has not | ||
125 | * been cleaned up completely, tell the caller to come back later. | ||
126 | */ | ||
127 | if (d->move_in_progress || | ||
128 | cpumask_intersects(d->old_domain, cpu_online_mask)) | ||
124 | return -EBUSY; | 129 | return -EBUSY; |
125 | 130 | ||
126 | /* Only try and allocate irqs on cpus that are present */ | 131 | /* Only try and allocate irqs on cpus that are present */ |
127 | err = -ENOSPC; | ||
128 | cpumask_clear(d->old_domain); | 132 | cpumask_clear(d->old_domain); |
133 | cpumask_clear(searched_cpumask); | ||
129 | cpu = cpumask_first_and(mask, cpu_online_mask); | 134 | cpu = cpumask_first_and(mask, cpu_online_mask); |
130 | while (cpu < nr_cpu_ids) { | 135 | while (cpu < nr_cpu_ids) { |
131 | int new_cpu, vector, offset; | 136 | int new_cpu, offset; |
132 | 137 | ||
138 | /* Get the possible target cpus for @mask/@cpu from the apic */ | ||
133 | apic->vector_allocation_domain(cpu, vector_cpumask, mask); | 139 | apic->vector_allocation_domain(cpu, vector_cpumask, mask); |
134 | 140 | ||
141 | /* | ||
142 | * Clear the offline cpus from @vector_cpumask for searching | ||
143 | * and verify whether the result overlaps with @mask. If true, | ||
144 | * then the call to apic->cpu_mask_to_apicid_and() will | ||
145 | * succeed as well. If not, no point in trying to find a | ||
146 | * vector in this mask. | ||
147 | */ | ||
148 | cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); | ||
149 | if (!cpumask_intersects(vector_searchmask, mask)) | ||
150 | goto next_cpu; | ||
151 | |||
135 | if (cpumask_subset(vector_cpumask, d->domain)) { | 152 | if (cpumask_subset(vector_cpumask, d->domain)) { |
136 | err = 0; | ||
137 | if (cpumask_equal(vector_cpumask, d->domain)) | 153 | if (cpumask_equal(vector_cpumask, d->domain)) |
138 | break; | 154 | goto success; |
139 | /* | 155 | /* |
140 | * New cpumask using the vector is a proper subset of | 156 | * Mark the cpus which are not longer in the mask for |
141 | * the current in use mask. So cleanup the vector | 157 | * cleanup. |
142 | * allocation for the members that are not used anymore. | ||
143 | */ | 158 | */ |
144 | cpumask_andnot(d->old_domain, d->domain, | 159 | cpumask_andnot(d->old_domain, d->domain, vector_cpumask); |
145 | vector_cpumask); | 160 | vector = d->cfg.vector; |
146 | d->move_in_progress = | 161 | goto update; |
147 | cpumask_intersects(d->old_domain, cpu_online_mask); | ||
148 | cpumask_and(d->domain, d->domain, vector_cpumask); | ||
149 | break; | ||
150 | } | 162 | } |
151 | 163 | ||
152 | vector = current_vector; | 164 | vector = current_vector; |
@@ -158,45 +170,60 @@ next: | |||
158 | vector = FIRST_EXTERNAL_VECTOR + offset; | 170 | vector = FIRST_EXTERNAL_VECTOR + offset; |
159 | } | 171 | } |
160 | 172 | ||
161 | if (unlikely(current_vector == vector)) { | 173 | /* If the search wrapped around, try the next cpu */ |
162 | cpumask_or(d->old_domain, d->old_domain, | 174 | if (unlikely(current_vector == vector)) |
163 | vector_cpumask); | 175 | goto next_cpu; |
164 | cpumask_andnot(vector_cpumask, mask, d->old_domain); | ||
165 | cpu = cpumask_first_and(vector_cpumask, | ||
166 | cpu_online_mask); | ||
167 | continue; | ||
168 | } | ||
169 | 176 | ||
170 | if (test_bit(vector, used_vectors)) | 177 | if (test_bit(vector, used_vectors)) |
171 | goto next; | 178 | goto next; |
172 | 179 | ||
173 | for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { | 180 | for_each_cpu(new_cpu, vector_searchmask) { |
174 | if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) | 181 | if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) |
175 | goto next; | 182 | goto next; |
176 | } | 183 | } |
177 | /* Found one! */ | 184 | /* Found one! */ |
178 | current_vector = vector; | 185 | current_vector = vector; |
179 | current_offset = offset; | 186 | current_offset = offset; |
180 | if (d->cfg.vector) { | 187 | /* Schedule the old vector for cleanup on all cpus */ |
188 | if (d->cfg.vector) | ||
181 | cpumask_copy(d->old_domain, d->domain); | 189 | cpumask_copy(d->old_domain, d->domain); |
182 | d->move_in_progress = | 190 | for_each_cpu(new_cpu, vector_searchmask) |
183 | cpumask_intersects(d->old_domain, cpu_online_mask); | ||
184 | } | ||
185 | for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) | ||
186 | per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); | 191 | per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); |
187 | d->cfg.vector = vector; | 192 | goto update; |
188 | cpumask_copy(d->domain, vector_cpumask); | ||
189 | err = 0; | ||
190 | break; | ||
191 | } | ||
192 | 193 | ||
193 | if (!err) { | 194 | next_cpu: |
194 | /* cache destination APIC IDs into cfg->dest_apicid */ | 195 | /* |
195 | err = apic->cpu_mask_to_apicid_and(mask, d->domain, | 196 | * We exclude the current @vector_cpumask from the requested |
196 | &d->cfg.dest_apicid); | 197 | * @mask and try again with the next online cpu in the |
198 | * result. We cannot modify @mask, so we use @vector_cpumask | ||
199 | * as a temporary buffer here as it will be reassigned when | ||
200 | * calling apic->vector_allocation_domain() above. | ||
201 | */ | ||
202 | cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); | ||
203 | cpumask_andnot(vector_cpumask, mask, searched_cpumask); | ||
204 | cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); | ||
205 | continue; | ||
197 | } | 206 | } |
207 | return -ENOSPC; | ||
198 | 208 | ||
199 | return err; | 209 | update: |
210 | /* | ||
211 | * Exclude offline cpus from the cleanup mask and set the | ||
212 | * move_in_progress flag when the result is not empty. | ||
213 | */ | ||
214 | cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); | ||
215 | d->move_in_progress = !cpumask_empty(d->old_domain); | ||
216 | d->cfg.vector = vector; | ||
217 | cpumask_copy(d->domain, vector_cpumask); | ||
218 | success: | ||
219 | /* | ||
220 | * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail | ||
221 | * as we already established, that mask & d->domain & cpu_online_mask | ||
222 | * is not empty. | ||
223 | */ | ||
224 | BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, | ||
225 | &d->cfg.dest_apicid)); | ||
226 | return 0; | ||
200 | } | 227 | } |
201 | 228 | ||
202 | static int assign_irq_vector(int irq, struct apic_chip_data *data, | 229 | static int assign_irq_vector(int irq, struct apic_chip_data *data, |
@@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node, | |||
226 | static void clear_irq_vector(int irq, struct apic_chip_data *data) | 253 | static void clear_irq_vector(int irq, struct apic_chip_data *data) |
227 | { | 254 | { |
228 | struct irq_desc *desc; | 255 | struct irq_desc *desc; |
229 | unsigned long flags; | ||
230 | int cpu, vector; | 256 | int cpu, vector; |
231 | 257 | ||
232 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
233 | BUG_ON(!data->cfg.vector); | 258 | BUG_ON(!data->cfg.vector); |
234 | 259 | ||
235 | vector = data->cfg.vector; | 260 | vector = data->cfg.vector; |
@@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) | |||
239 | data->cfg.vector = 0; | 264 | data->cfg.vector = 0; |
240 | cpumask_clear(data->domain); | 265 | cpumask_clear(data->domain); |
241 | 266 | ||
242 | if (likely(!data->move_in_progress)) { | 267 | /* |
243 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 268 | * If move is in progress or the old_domain mask is not empty, |
269 | * i.e. the cleanup IPI has not been processed yet, we need to remove | ||
270 | * the old references to desc from all cpus vector tables. | ||
271 | */ | ||
272 | if (!data->move_in_progress && cpumask_empty(data->old_domain)) | ||
244 | return; | 273 | return; |
245 | } | ||
246 | 274 | ||
247 | desc = irq_to_desc(irq); | 275 | desc = irq_to_desc(irq); |
248 | for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { | 276 | for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { |
@@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) | |||
255 | } | 283 | } |
256 | } | 284 | } |
257 | data->move_in_progress = 0; | 285 | data->move_in_progress = 0; |
258 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
259 | } | 286 | } |
260 | 287 | ||
261 | void init_irq_alloc_info(struct irq_alloc_info *info, | 288 | void init_irq_alloc_info(struct irq_alloc_info *info, |
@@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) | |||
276 | static void x86_vector_free_irqs(struct irq_domain *domain, | 303 | static void x86_vector_free_irqs(struct irq_domain *domain, |
277 | unsigned int virq, unsigned int nr_irqs) | 304 | unsigned int virq, unsigned int nr_irqs) |
278 | { | 305 | { |
306 | struct apic_chip_data *apic_data; | ||
279 | struct irq_data *irq_data; | 307 | struct irq_data *irq_data; |
308 | unsigned long flags; | ||
280 | int i; | 309 | int i; |
281 | 310 | ||
282 | for (i = 0; i < nr_irqs; i++) { | 311 | for (i = 0; i < nr_irqs; i++) { |
283 | irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); | 312 | irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); |
284 | if (irq_data && irq_data->chip_data) { | 313 | if (irq_data && irq_data->chip_data) { |
314 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
285 | clear_irq_vector(virq + i, irq_data->chip_data); | 315 | clear_irq_vector(virq + i, irq_data->chip_data); |
286 | free_apic_chip_data(irq_data->chip_data); | 316 | apic_data = irq_data->chip_data; |
317 | irq_domain_reset_irq_data(irq_data); | ||
318 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
319 | free_apic_chip_data(apic_data); | ||
287 | #ifdef CONFIG_X86_IO_APIC | 320 | #ifdef CONFIG_X86_IO_APIC |
288 | if (virq + i < nr_legacy_irqs()) | 321 | if (virq + i < nr_legacy_irqs()) |
289 | legacy_irq_data[virq + i] = NULL; | 322 | legacy_irq_data[virq + i] = NULL; |
290 | #endif | 323 | #endif |
291 | irq_domain_reset_irq_data(irq_data); | ||
292 | } | 324 | } |
293 | } | 325 | } |
294 | } | 326 | } |
@@ -406,6 +438,8 @@ int __init arch_early_irq_init(void) | |||
406 | arch_init_htirq_domain(x86_vector_domain); | 438 | arch_init_htirq_domain(x86_vector_domain); |
407 | 439 | ||
408 | BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); | 440 | BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); |
441 | BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); | ||
442 | BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); | ||
409 | 443 | ||
410 | return arch_early_ioapic_init(); | 444 | return arch_early_ioapic_init(); |
411 | } | 445 | } |
@@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data, | |||
494 | return -EINVAL; | 528 | return -EINVAL; |
495 | 529 | ||
496 | err = assign_irq_vector(irq, data, dest); | 530 | err = assign_irq_vector(irq, data, dest); |
497 | if (err) { | 531 | return err ? err : IRQ_SET_MASK_OK; |
498 | if (assign_irq_vector(irq, data, | ||
499 | irq_data_get_affinity_mask(irq_data))) | ||
500 | pr_err("Failed to recover vector for irq %d\n", irq); | ||
501 | return err; | ||
502 | } | ||
503 | |||
504 | return IRQ_SET_MASK_OK; | ||
505 | } | 532 | } |
506 | 533 | ||
507 | static struct irq_chip lapic_controller = { | 534 | static struct irq_chip lapic_controller = { |
@@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = { | |||
513 | #ifdef CONFIG_SMP | 540 | #ifdef CONFIG_SMP |
514 | static void __send_cleanup_vector(struct apic_chip_data *data) | 541 | static void __send_cleanup_vector(struct apic_chip_data *data) |
515 | { | 542 | { |
516 | cpumask_var_t cleanup_mask; | 543 | raw_spin_lock(&vector_lock); |
517 | 544 | cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); | |
518 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
519 | unsigned int i; | ||
520 | |||
521 | for_each_cpu_and(i, data->old_domain, cpu_online_mask) | ||
522 | apic->send_IPI_mask(cpumask_of(i), | ||
523 | IRQ_MOVE_CLEANUP_VECTOR); | ||
524 | } else { | ||
525 | cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); | ||
526 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
527 | free_cpumask_var(cleanup_mask); | ||
528 | } | ||
529 | data->move_in_progress = 0; | 545 | data->move_in_progress = 0; |
546 | if (!cpumask_empty(data->old_domain)) | ||
547 | apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); | ||
548 | raw_spin_unlock(&vector_lock); | ||
530 | } | 549 | } |
531 | 550 | ||
532 | void send_cleanup_vector(struct irq_cfg *cfg) | 551 | void send_cleanup_vector(struct irq_cfg *cfg) |
@@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) | |||
570 | goto unlock; | 589 | goto unlock; |
571 | 590 | ||
572 | /* | 591 | /* |
573 | * Check if the irq migration is in progress. If so, we | 592 | * Nothing to cleanup if irq migration is in progress |
574 | * haven't received the cleanup request yet for this irq. | 593 | * or this cpu is not set in the cleanup mask. |
575 | */ | 594 | */ |
576 | if (data->move_in_progress) | 595 | if (data->move_in_progress || |
596 | !cpumask_test_cpu(me, data->old_domain)) | ||
577 | goto unlock; | 597 | goto unlock; |
578 | 598 | ||
599 | /* | ||
600 | * We have two cases to handle here: | ||
601 | * 1) vector is unchanged but the target mask got reduced | ||
602 | * 2) vector and the target mask has changed | ||
603 | * | ||
604 | * #1 is obvious, but in #2 we have two vectors with the same | ||
605 | * irq descriptor: the old and the new vector. So we need to | ||
606 | * make sure that we only cleanup the old vector. The new | ||
607 | * vector has the current @vector number in the config and | ||
608 | * this cpu is part of the target mask. We better leave that | ||
609 | * one alone. | ||
610 | */ | ||
579 | if (vector == data->cfg.vector && | 611 | if (vector == data->cfg.vector && |
580 | cpumask_test_cpu(me, data->domain)) | 612 | cpumask_test_cpu(me, data->domain)) |
581 | goto unlock; | 613 | goto unlock; |
@@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) | |||
593 | goto unlock; | 625 | goto unlock; |
594 | } | 626 | } |
595 | __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); | 627 | __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); |
628 | cpumask_clear_cpu(me, data->old_domain); | ||
596 | unlock: | 629 | unlock: |
597 | raw_spin_unlock(&desc->lock); | 630 | raw_spin_unlock(&desc->lock); |
598 | } | 631 | } |
@@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg) | |||
621 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); | 654 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); |
622 | } | 655 | } |
623 | 656 | ||
624 | void irq_force_complete_move(int irq) | 657 | /* |
658 | * Called with @desc->lock held and interrupts disabled. | ||
659 | */ | ||
660 | void irq_force_complete_move(struct irq_desc *desc) | ||
625 | { | 661 | { |
626 | struct irq_cfg *cfg = irq_cfg(irq); | 662 | struct irq_data *irqdata = irq_desc_get_irq_data(desc); |
663 | struct apic_chip_data *data = apic_chip_data(irqdata); | ||
664 | struct irq_cfg *cfg = data ? &data->cfg : NULL; | ||
627 | 665 | ||
628 | if (cfg) | 666 | if (!cfg) |
629 | __irq_complete_move(cfg, cfg->vector); | 667 | return; |
668 | |||
669 | __irq_complete_move(cfg, cfg->vector); | ||
670 | |||
671 | /* | ||
672 | * This is tricky. If the cleanup of @data->old_domain has not been | ||
673 | * done yet, then the following setaffinity call will fail with | ||
674 | * -EBUSY. This can leave the interrupt in a stale state. | ||
675 | * | ||
676 | * The cleanup cannot make progress because we hold @desc->lock. So in | ||
677 | * case @data->old_domain is not yet cleaned up, we need to drop the | ||
678 | * lock and acquire it again. @desc cannot go away, because the | ||
679 | * hotplug code holds the sparse irq lock. | ||
680 | */ | ||
681 | raw_spin_lock(&vector_lock); | ||
682 | /* Clean out all offline cpus (including ourself) first. */ | ||
683 | cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); | ||
684 | while (!cpumask_empty(data->old_domain)) { | ||
685 | raw_spin_unlock(&vector_lock); | ||
686 | raw_spin_unlock(&desc->lock); | ||
687 | cpu_relax(); | ||
688 | raw_spin_lock(&desc->lock); | ||
689 | /* | ||
690 | * Reevaluate apic_chip_data. It might have been cleared after | ||
691 | * we dropped @desc->lock. | ||
692 | */ | ||
693 | data = apic_chip_data(irqdata); | ||
694 | if (!data) | ||
695 | return; | ||
696 | raw_spin_lock(&vector_lock); | ||
697 | } | ||
698 | raw_spin_unlock(&vector_lock); | ||
630 | } | 699 | } |
631 | #endif | 700 | #endif |
632 | 701 | ||