diff options
Diffstat (limited to 'arch/x86/kernel/apic/vector.c')
| -rw-r--r-- | arch/x86/kernel/apic/vector.c | 221 |
1 files changed, 145 insertions, 76 deletions
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 908cb37da171..3b670df4ba7b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c | |||
| @@ -31,7 +31,7 @@ struct apic_chip_data { | |||
| 31 | struct irq_domain *x86_vector_domain; | 31 | struct irq_domain *x86_vector_domain; |
| 32 | EXPORT_SYMBOL_GPL(x86_vector_domain); | 32 | EXPORT_SYMBOL_GPL(x86_vector_domain); |
| 33 | static DEFINE_RAW_SPINLOCK(vector_lock); | 33 | static DEFINE_RAW_SPINLOCK(vector_lock); |
| 34 | static cpumask_var_t vector_cpumask; | 34 | static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; |
| 35 | static struct irq_chip lapic_controller; | 35 | static struct irq_chip lapic_controller; |
| 36 | #ifdef CONFIG_X86_IO_APIC | 36 | #ifdef CONFIG_X86_IO_APIC |
| 37 | static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; | 37 | static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; |
| @@ -118,35 +118,47 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, | |||
| 118 | */ | 118 | */ |
| 119 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; | 119 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
| 120 | static int current_offset = VECTOR_OFFSET_START % 16; | 120 | static int current_offset = VECTOR_OFFSET_START % 16; |
| 121 | int cpu, err; | 121 | int cpu, vector; |
| 122 | 122 | ||
| 123 | if (d->move_in_progress) | 123 | /* |
| 124 | * If there is still a move in progress or the previous move has not | ||
| 125 | * been cleaned up completely, tell the caller to come back later. | ||
| 126 | */ | ||
| 127 | if (d->move_in_progress || | ||
| 128 | cpumask_intersects(d->old_domain, cpu_online_mask)) | ||
| 124 | return -EBUSY; | 129 | return -EBUSY; |
| 125 | 130 | ||
| 126 | /* Only try and allocate irqs on cpus that are present */ | 131 | /* Only try and allocate irqs on cpus that are present */ |
| 127 | err = -ENOSPC; | ||
| 128 | cpumask_clear(d->old_domain); | 132 | cpumask_clear(d->old_domain); |
| 133 | cpumask_clear(searched_cpumask); | ||
| 129 | cpu = cpumask_first_and(mask, cpu_online_mask); | 134 | cpu = cpumask_first_and(mask, cpu_online_mask); |
| 130 | while (cpu < nr_cpu_ids) { | 135 | while (cpu < nr_cpu_ids) { |
| 131 | int new_cpu, vector, offset; | 136 | int new_cpu, offset; |
| 132 | 137 | ||
| 138 | /* Get the possible target cpus for @mask/@cpu from the apic */ | ||
| 133 | apic->vector_allocation_domain(cpu, vector_cpumask, mask); | 139 | apic->vector_allocation_domain(cpu, vector_cpumask, mask); |
| 134 | 140 | ||
| 141 | /* | ||
| 142 | * Clear the offline cpus from @vector_cpumask for searching | ||
| 143 | * and verify whether the result overlaps with @mask. If true, | ||
| 144 | * then the call to apic->cpu_mask_to_apicid_and() will | ||
| 145 | * succeed as well. If not, no point in trying to find a | ||
| 146 | * vector in this mask. | ||
| 147 | */ | ||
| 148 | cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); | ||
| 149 | if (!cpumask_intersects(vector_searchmask, mask)) | ||
| 150 | goto next_cpu; | ||
| 151 | |||
| 135 | if (cpumask_subset(vector_cpumask, d->domain)) { | 152 | if (cpumask_subset(vector_cpumask, d->domain)) { |
| 136 | err = 0; | ||
| 137 | if (cpumask_equal(vector_cpumask, d->domain)) | 153 | if (cpumask_equal(vector_cpumask, d->domain)) |
| 138 | break; | 154 | goto success; |
| 139 | /* | 155 | /* |
| 140 | * New cpumask using the vector is a proper subset of | 156 | * Mark the cpus which are not longer in the mask for |
| 141 | * the current in use mask. So cleanup the vector | 157 | * cleanup. |
| 142 | * allocation for the members that are not used anymore. | ||
| 143 | */ | 158 | */ |
| 144 | cpumask_andnot(d->old_domain, d->domain, | 159 | cpumask_andnot(d->old_domain, d->domain, vector_cpumask); |
| 145 | vector_cpumask); | 160 | vector = d->cfg.vector; |
| 146 | d->move_in_progress = | 161 | goto update; |
| 147 | cpumask_intersects(d->old_domain, cpu_online_mask); | ||
| 148 | cpumask_and(d->domain, d->domain, vector_cpumask); | ||
| 149 | break; | ||
| 150 | } | 162 | } |
| 151 | 163 | ||
| 152 | vector = current_vector; | 164 | vector = current_vector; |
| @@ -158,45 +170,60 @@ next: | |||
| 158 | vector = FIRST_EXTERNAL_VECTOR + offset; | 170 | vector = FIRST_EXTERNAL_VECTOR + offset; |
| 159 | } | 171 | } |
| 160 | 172 | ||
| 161 | if (unlikely(current_vector == vector)) { | 173 | /* If the search wrapped around, try the next cpu */ |
| 162 | cpumask_or(d->old_domain, d->old_domain, | 174 | if (unlikely(current_vector == vector)) |
| 163 | vector_cpumask); | 175 | goto next_cpu; |
| 164 | cpumask_andnot(vector_cpumask, mask, d->old_domain); | ||
| 165 | cpu = cpumask_first_and(vector_cpumask, | ||
| 166 | cpu_online_mask); | ||
| 167 | continue; | ||
| 168 | } | ||
| 169 | 176 | ||
| 170 | if (test_bit(vector, used_vectors)) | 177 | if (test_bit(vector, used_vectors)) |
| 171 | goto next; | 178 | goto next; |
| 172 | 179 | ||
| 173 | for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { | 180 | for_each_cpu(new_cpu, vector_searchmask) { |
| 174 | if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) | 181 | if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) |
| 175 | goto next; | 182 | goto next; |
| 176 | } | 183 | } |
| 177 | /* Found one! */ | 184 | /* Found one! */ |
| 178 | current_vector = vector; | 185 | current_vector = vector; |
| 179 | current_offset = offset; | 186 | current_offset = offset; |
| 180 | if (d->cfg.vector) { | 187 | /* Schedule the old vector for cleanup on all cpus */ |
| 188 | if (d->cfg.vector) | ||
| 181 | cpumask_copy(d->old_domain, d->domain); | 189 | cpumask_copy(d->old_domain, d->domain); |
| 182 | d->move_in_progress = | 190 | for_each_cpu(new_cpu, vector_searchmask) |
| 183 | cpumask_intersects(d->old_domain, cpu_online_mask); | ||
| 184 | } | ||
| 185 | for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) | ||
| 186 | per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); | 191 | per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); |
| 187 | d->cfg.vector = vector; | 192 | goto update; |
| 188 | cpumask_copy(d->domain, vector_cpumask); | ||
| 189 | err = 0; | ||
| 190 | break; | ||
| 191 | } | ||
| 192 | 193 | ||
| 193 | if (!err) { | 194 | next_cpu: |
| 194 | /* cache destination APIC IDs into cfg->dest_apicid */ | 195 | /* |
| 195 | err = apic->cpu_mask_to_apicid_and(mask, d->domain, | 196 | * We exclude the current @vector_cpumask from the requested |
| 196 | &d->cfg.dest_apicid); | 197 | * @mask and try again with the next online cpu in the |
| 198 | * result. We cannot modify @mask, so we use @vector_cpumask | ||
| 199 | * as a temporary buffer here as it will be reassigned when | ||
| 200 | * calling apic->vector_allocation_domain() above. | ||
| 201 | */ | ||
| 202 | cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); | ||
| 203 | cpumask_andnot(vector_cpumask, mask, searched_cpumask); | ||
| 204 | cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); | ||
| 205 | continue; | ||
| 197 | } | 206 | } |
| 207 | return -ENOSPC; | ||
| 198 | 208 | ||
| 199 | return err; | 209 | update: |
| 210 | /* | ||
| 211 | * Exclude offline cpus from the cleanup mask and set the | ||
| 212 | * move_in_progress flag when the result is not empty. | ||
| 213 | */ | ||
| 214 | cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); | ||
| 215 | d->move_in_progress = !cpumask_empty(d->old_domain); | ||
| 216 | d->cfg.vector = vector; | ||
| 217 | cpumask_copy(d->domain, vector_cpumask); | ||
| 218 | success: | ||
| 219 | /* | ||
| 220 | * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail | ||
| 221 | * as we already established, that mask & d->domain & cpu_online_mask | ||
| 222 | * is not empty. | ||
| 223 | */ | ||
| 224 | BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, | ||
| 225 | &d->cfg.dest_apicid)); | ||
| 226 | return 0; | ||
| 200 | } | 227 | } |
| 201 | 228 | ||
| 202 | static int assign_irq_vector(int irq, struct apic_chip_data *data, | 229 | static int assign_irq_vector(int irq, struct apic_chip_data *data, |
| @@ -226,10 +253,8 @@ static int assign_irq_vector_policy(int irq, int node, | |||
| 226 | static void clear_irq_vector(int irq, struct apic_chip_data *data) | 253 | static void clear_irq_vector(int irq, struct apic_chip_data *data) |
| 227 | { | 254 | { |
| 228 | struct irq_desc *desc; | 255 | struct irq_desc *desc; |
| 229 | unsigned long flags; | ||
| 230 | int cpu, vector; | 256 | int cpu, vector; |
| 231 | 257 | ||
| 232 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
| 233 | BUG_ON(!data->cfg.vector); | 258 | BUG_ON(!data->cfg.vector); |
| 234 | 259 | ||
| 235 | vector = data->cfg.vector; | 260 | vector = data->cfg.vector; |
| @@ -239,10 +264,13 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) | |||
| 239 | data->cfg.vector = 0; | 264 | data->cfg.vector = 0; |
| 240 | cpumask_clear(data->domain); | 265 | cpumask_clear(data->domain); |
| 241 | 266 | ||
| 242 | if (likely(!data->move_in_progress)) { | 267 | /* |
| 243 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 268 | * If move is in progress or the old_domain mask is not empty, |
| 269 | * i.e. the cleanup IPI has not been processed yet, we need to remove | ||
| 270 | * the old references to desc from all cpus vector tables. | ||
| 271 | */ | ||
| 272 | if (!data->move_in_progress && cpumask_empty(data->old_domain)) | ||
| 244 | return; | 273 | return; |
| 245 | } | ||
| 246 | 274 | ||
| 247 | desc = irq_to_desc(irq); | 275 | desc = irq_to_desc(irq); |
| 248 | for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { | 276 | for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { |
| @@ -255,7 +283,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) | |||
| 255 | } | 283 | } |
| 256 | } | 284 | } |
| 257 | data->move_in_progress = 0; | 285 | data->move_in_progress = 0; |
| 258 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 259 | } | 286 | } |
| 260 | 287 | ||
| 261 | void init_irq_alloc_info(struct irq_alloc_info *info, | 288 | void init_irq_alloc_info(struct irq_alloc_info *info, |
| @@ -276,19 +303,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) | |||
| 276 | static void x86_vector_free_irqs(struct irq_domain *domain, | 303 | static void x86_vector_free_irqs(struct irq_domain *domain, |
| 277 | unsigned int virq, unsigned int nr_irqs) | 304 | unsigned int virq, unsigned int nr_irqs) |
| 278 | { | 305 | { |
| 306 | struct apic_chip_data *apic_data; | ||
| 279 | struct irq_data *irq_data; | 307 | struct irq_data *irq_data; |
| 308 | unsigned long flags; | ||
| 280 | int i; | 309 | int i; |
| 281 | 310 | ||
| 282 | for (i = 0; i < nr_irqs; i++) { | 311 | for (i = 0; i < nr_irqs; i++) { |
| 283 | irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); | 312 | irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); |
| 284 | if (irq_data && irq_data->chip_data) { | 313 | if (irq_data && irq_data->chip_data) { |
| 314 | raw_spin_lock_irqsave(&vector_lock, flags); | ||
| 285 | clear_irq_vector(virq + i, irq_data->chip_data); | 315 | clear_irq_vector(virq + i, irq_data->chip_data); |
| 286 | free_apic_chip_data(irq_data->chip_data); | 316 | apic_data = irq_data->chip_data; |
| 317 | irq_domain_reset_irq_data(irq_data); | ||
| 318 | raw_spin_unlock_irqrestore(&vector_lock, flags); | ||
| 319 | free_apic_chip_data(apic_data); | ||
| 287 | #ifdef CONFIG_X86_IO_APIC | 320 | #ifdef CONFIG_X86_IO_APIC |
| 288 | if (virq + i < nr_legacy_irqs()) | 321 | if (virq + i < nr_legacy_irqs()) |
| 289 | legacy_irq_data[virq + i] = NULL; | 322 | legacy_irq_data[virq + i] = NULL; |
| 290 | #endif | 323 | #endif |
| 291 | irq_domain_reset_irq_data(irq_data); | ||
| 292 | } | 324 | } |
| 293 | } | 325 | } |
| 294 | } | 326 | } |
| @@ -406,6 +438,8 @@ int __init arch_early_irq_init(void) | |||
| 406 | arch_init_htirq_domain(x86_vector_domain); | 438 | arch_init_htirq_domain(x86_vector_domain); |
| 407 | 439 | ||
| 408 | BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); | 440 | BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); |
| 441 | BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); | ||
| 442 | BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); | ||
| 409 | 443 | ||
| 410 | return arch_early_ioapic_init(); | 444 | return arch_early_ioapic_init(); |
| 411 | } | 445 | } |
| @@ -494,14 +528,7 @@ static int apic_set_affinity(struct irq_data *irq_data, | |||
| 494 | return -EINVAL; | 528 | return -EINVAL; |
| 495 | 529 | ||
| 496 | err = assign_irq_vector(irq, data, dest); | 530 | err = assign_irq_vector(irq, data, dest); |
| 497 | if (err) { | 531 | return err ? err : IRQ_SET_MASK_OK; |
| 498 | if (assign_irq_vector(irq, data, | ||
| 499 | irq_data_get_affinity_mask(irq_data))) | ||
| 500 | pr_err("Failed to recover vector for irq %d\n", irq); | ||
| 501 | return err; | ||
| 502 | } | ||
| 503 | |||
| 504 | return IRQ_SET_MASK_OK; | ||
| 505 | } | 532 | } |
| 506 | 533 | ||
| 507 | static struct irq_chip lapic_controller = { | 534 | static struct irq_chip lapic_controller = { |
| @@ -513,20 +540,12 @@ static struct irq_chip lapic_controller = { | |||
| 513 | #ifdef CONFIG_SMP | 540 | #ifdef CONFIG_SMP |
| 514 | static void __send_cleanup_vector(struct apic_chip_data *data) | 541 | static void __send_cleanup_vector(struct apic_chip_data *data) |
| 515 | { | 542 | { |
| 516 | cpumask_var_t cleanup_mask; | 543 | raw_spin_lock(&vector_lock); |
| 517 | 544 | cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); | |
| 518 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
| 519 | unsigned int i; | ||
| 520 | |||
| 521 | for_each_cpu_and(i, data->old_domain, cpu_online_mask) | ||
| 522 | apic->send_IPI_mask(cpumask_of(i), | ||
| 523 | IRQ_MOVE_CLEANUP_VECTOR); | ||
| 524 | } else { | ||
| 525 | cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); | ||
| 526 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
| 527 | free_cpumask_var(cleanup_mask); | ||
| 528 | } | ||
| 529 | data->move_in_progress = 0; | 545 | data->move_in_progress = 0; |
| 546 | if (!cpumask_empty(data->old_domain)) | ||
| 547 | apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); | ||
| 548 | raw_spin_unlock(&vector_lock); | ||
| 530 | } | 549 | } |
| 531 | 550 | ||
| 532 | void send_cleanup_vector(struct irq_cfg *cfg) | 551 | void send_cleanup_vector(struct irq_cfg *cfg) |
| @@ -570,12 +589,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) | |||
| 570 | goto unlock; | 589 | goto unlock; |
| 571 | 590 | ||
| 572 | /* | 591 | /* |
| 573 | * Check if the irq migration is in progress. If so, we | 592 | * Nothing to cleanup if irq migration is in progress |
| 574 | * haven't received the cleanup request yet for this irq. | 593 | * or this cpu is not set in the cleanup mask. |
| 575 | */ | 594 | */ |
| 576 | if (data->move_in_progress) | 595 | if (data->move_in_progress || |
| 596 | !cpumask_test_cpu(me, data->old_domain)) | ||
| 577 | goto unlock; | 597 | goto unlock; |
| 578 | 598 | ||
| 599 | /* | ||
| 600 | * We have two cases to handle here: | ||
| 601 | * 1) vector is unchanged but the target mask got reduced | ||
| 602 | * 2) vector and the target mask has changed | ||
| 603 | * | ||
| 604 | * #1 is obvious, but in #2 we have two vectors with the same | ||
| 605 | * irq descriptor: the old and the new vector. So we need to | ||
| 606 | * make sure that we only cleanup the old vector. The new | ||
| 607 | * vector has the current @vector number in the config and | ||
| 608 | * this cpu is part of the target mask. We better leave that | ||
| 609 | * one alone. | ||
| 610 | */ | ||
| 579 | if (vector == data->cfg.vector && | 611 | if (vector == data->cfg.vector && |
| 580 | cpumask_test_cpu(me, data->domain)) | 612 | cpumask_test_cpu(me, data->domain)) |
| 581 | goto unlock; | 613 | goto unlock; |
| @@ -593,6 +625,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) | |||
| 593 | goto unlock; | 625 | goto unlock; |
| 594 | } | 626 | } |
| 595 | __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); | 627 | __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); |
| 628 | cpumask_clear_cpu(me, data->old_domain); | ||
| 596 | unlock: | 629 | unlock: |
| 597 | raw_spin_unlock(&desc->lock); | 630 | raw_spin_unlock(&desc->lock); |
| 598 | } | 631 | } |
| @@ -621,12 +654,48 @@ void irq_complete_move(struct irq_cfg *cfg) | |||
| 621 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); | 654 | __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); |
| 622 | } | 655 | } |
| 623 | 656 | ||
| 624 | void irq_force_complete_move(int irq) | 657 | /* |
| 658 | * Called with @desc->lock held and interrupts disabled. | ||
| 659 | */ | ||
| 660 | void irq_force_complete_move(struct irq_desc *desc) | ||
| 625 | { | 661 | { |
| 626 | struct irq_cfg *cfg = irq_cfg(irq); | 662 | struct irq_data *irqdata = irq_desc_get_irq_data(desc); |
| 663 | struct apic_chip_data *data = apic_chip_data(irqdata); | ||
| 664 | struct irq_cfg *cfg = data ? &data->cfg : NULL; | ||
| 627 | 665 | ||
| 628 | if (cfg) | 666 | if (!cfg) |
| 629 | __irq_complete_move(cfg, cfg->vector); | 667 | return; |
| 668 | |||
| 669 | __irq_complete_move(cfg, cfg->vector); | ||
| 670 | |||
| 671 | /* | ||
| 672 | * This is tricky. If the cleanup of @data->old_domain has not been | ||
| 673 | * done yet, then the following setaffinity call will fail with | ||
| 674 | * -EBUSY. This can leave the interrupt in a stale state. | ||
| 675 | * | ||
| 676 | * The cleanup cannot make progress because we hold @desc->lock. So in | ||
| 677 | * case @data->old_domain is not yet cleaned up, we need to drop the | ||
| 678 | * lock and acquire it again. @desc cannot go away, because the | ||
| 679 | * hotplug code holds the sparse irq lock. | ||
| 680 | */ | ||
| 681 | raw_spin_lock(&vector_lock); | ||
| 682 | /* Clean out all offline cpus (including ourself) first. */ | ||
| 683 | cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); | ||
| 684 | while (!cpumask_empty(data->old_domain)) { | ||
| 685 | raw_spin_unlock(&vector_lock); | ||
| 686 | raw_spin_unlock(&desc->lock); | ||
| 687 | cpu_relax(); | ||
| 688 | raw_spin_lock(&desc->lock); | ||
| 689 | /* | ||
| 690 | * Reevaluate apic_chip_data. It might have been cleared after | ||
| 691 | * we dropped @desc->lock. | ||
| 692 | */ | ||
| 693 | data = apic_chip_data(irqdata); | ||
| 694 | if (!data) | ||
| 695 | return; | ||
| 696 | raw_spin_lock(&vector_lock); | ||
| 697 | } | ||
| 698 | raw_spin_unlock(&vector_lock); | ||
| 630 | } | 699 | } |
| 631 | #endif | 700 | #endif |
| 632 | 701 | ||
