diff options
author | Yinghai Lu <yhlu.kernel@gmail.com> | 2008-08-19 23:50:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-16 10:52:52 -0400 |
commit | 8b8e8c1bf7275eca859fe551dfa484134eaf013b (patch) | |
tree | 3e2f950a8f34f419a59a31ddd12e9d7331911e3d /arch | |
parent | 6d50bc26836e16a9589e0b128d527c29e30d722a (diff) |
x86: remove irqbalance in kernel for 32 bit
This has been deprecated for years, the user space irqbalanced utility
works better with numa, has configurable policies, etc...
Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86/configs/i386_defconfig | 1 | ||||
-rw-r--r-- | arch/x86/kernel/io_apic_32.c | 402 | ||||
-rw-r--r-- | arch/x86/kernel/quirks.c | 3 |
4 files changed, 0 insertions, 414 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1004888e9b13..3e0eaaa1a339 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1254,14 +1254,6 @@ config EFI | |||
1254 | resultant kernel should continue to boot on existing non-EFI | 1254 | resultant kernel should continue to boot on existing non-EFI |
1255 | platforms. | 1255 | platforms. |
1256 | 1256 | ||
1257 | config IRQBALANCE | ||
1258 | def_bool y | ||
1259 | prompt "Enable kernel irq balancing" | ||
1260 | depends on X86_32 && SMP && X86_IO_APIC | ||
1261 | help | ||
1262 | The default yes will allow the kernel to do irq load balancing. | ||
1263 | Saying no will keep the kernel from doing irq load balancing. | ||
1264 | |||
1265 | config SECCOMP | 1257 | config SECCOMP |
1266 | def_bool y | 1258 | def_bool y |
1267 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1259 | prompt "Enable seccomp to safely compute untrusted bytecode" |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 52d0359719d7..13b8c86ae985 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -287,7 +287,6 @@ CONFIG_MTRR=y | |||
287 | # CONFIG_MTRR_SANITIZER is not set | 287 | # CONFIG_MTRR_SANITIZER is not set |
288 | CONFIG_X86_PAT=y | 288 | CONFIG_X86_PAT=y |
289 | CONFIG_EFI=y | 289 | CONFIG_EFI=y |
290 | # CONFIG_IRQBALANCE is not set | ||
291 | CONFIG_SECCOMP=y | 290 | CONFIG_SECCOMP=y |
292 | # CONFIG_HZ_100 is not set | 291 | # CONFIG_HZ_100 is not set |
293 | # CONFIG_HZ_250 is not set | 292 | # CONFIG_HZ_250 is not set |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 204884b1415a..668edf226067 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) | |||
371 | spin_unlock_irqrestore(&ioapic_lock, flags); | 371 | spin_unlock_irqrestore(&ioapic_lock, flags); |
372 | } | 372 | } |
373 | 373 | ||
374 | #if defined(CONFIG_IRQBALANCE) | ||
375 | # include <asm/processor.h> /* kernel_thread() */ | ||
376 | # include <linux/kernel_stat.h> /* kstat */ | ||
377 | # include <linux/slab.h> /* kmalloc() */ | ||
378 | # include <linux/timer.h> | ||
379 | |||
380 | #define IRQBALANCE_CHECK_ARCH -999 | ||
381 | #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) | ||
382 | #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) | ||
383 | #define BALANCED_IRQ_MORE_DELTA (HZ/10) | ||
384 | #define BALANCED_IRQ_LESS_DELTA (HZ) | ||
385 | |||
386 | static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; | ||
387 | static int physical_balance __read_mostly; | ||
388 | static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; | ||
389 | |||
390 | static struct irq_cpu_info { | ||
391 | unsigned long *last_irq; | ||
392 | unsigned long *irq_delta; | ||
393 | unsigned long irq; | ||
394 | } irq_cpu_data[NR_CPUS]; | ||
395 | |||
396 | #define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) | ||
397 | #define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) | ||
398 | #define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) | ||
399 | |||
400 | #define IDLE_ENOUGH(cpu,now) \ | ||
401 | (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) | ||
402 | |||
403 | #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) | ||
404 | |||
405 | #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) | ||
406 | |||
407 | static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL; | ||
408 | |||
409 | static cpumask_t *balance_irq_affinity; | ||
410 | |||
411 | |||
412 | static void __init irq_affinity_init_work(void *data) | ||
413 | { | ||
414 | struct dyn_array *da = data; | ||
415 | |||
416 | int i; | ||
417 | struct balance_irq_affinity *affinity; | ||
418 | |||
419 | affinity = *da->name; | ||
420 | |||
421 | for (i = 0; i < *da->nr; i++) | ||
422 | memcpy(&affinity[i], &balance_irq_affinity_init, | ||
423 | sizeof(struct balance_irq_affinity)); | ||
424 | |||
425 | } | ||
426 | |||
427 | DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work); | ||
428 | |||
429 | |||
430 | void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) | ||
431 | { | ||
432 | balance_irq_affinity[irq] = mask; | ||
433 | } | ||
434 | |||
435 | static unsigned long move(int curr_cpu, cpumask_t allowed_mask, | ||
436 | unsigned long now, int direction) | ||
437 | { | ||
438 | int search_idle = 1; | ||
439 | int cpu = curr_cpu; | ||
440 | |||
441 | goto inside; | ||
442 | |||
443 | do { | ||
444 | if (unlikely(cpu == curr_cpu)) | ||
445 | search_idle = 0; | ||
446 | inside: | ||
447 | if (direction == 1) { | ||
448 | cpu++; | ||
449 | if (cpu >= NR_CPUS) | ||
450 | cpu = 0; | ||
451 | } else { | ||
452 | cpu--; | ||
453 | if (cpu == -1) | ||
454 | cpu = NR_CPUS-1; | ||
455 | } | ||
456 | } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || | ||
457 | (search_idle && !IDLE_ENOUGH(cpu, now))); | ||
458 | |||
459 | return cpu; | ||
460 | } | ||
461 | |||
462 | static inline void balance_irq(int cpu, int irq) | ||
463 | { | ||
464 | unsigned long now = jiffies; | ||
465 | cpumask_t allowed_mask; | ||
466 | unsigned int new_cpu; | ||
467 | |||
468 | if (irqbalance_disabled) | ||
469 | return; | ||
470 | |||
471 | cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); | ||
472 | new_cpu = move(cpu, allowed_mask, now, 1); | ||
473 | if (cpu != new_cpu) | ||
474 | set_pending_irq(irq, cpumask_of_cpu(new_cpu)); | ||
475 | } | ||
476 | |||
477 | static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) | ||
478 | { | ||
479 | int i, j; | ||
480 | struct irq_desc *desc; | ||
481 | |||
482 | for_each_online_cpu(i) { | ||
483 | for (j = 0; j < nr_irqs; j++) { | ||
484 | desc = irq_to_desc(j); | ||
485 | if (!desc->action) | ||
486 | continue; | ||
487 | /* Is it a significant load ? */ | ||
488 | if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < | ||
489 | useful_load_threshold) | ||
490 | continue; | ||
491 | balance_irq(i, j); | ||
492 | } | ||
493 | } | ||
494 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
495 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
496 | return; | ||
497 | } | ||
498 | |||
499 | static void do_irq_balance(void) | ||
500 | { | ||
501 | int i, j; | ||
502 | unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); | ||
503 | unsigned long move_this_load = 0; | ||
504 | int max_loaded = 0, min_loaded = 0; | ||
505 | int load; | ||
506 | unsigned long useful_load_threshold = balanced_irq_interval + 10; | ||
507 | int selected_irq; | ||
508 | int tmp_loaded, first_attempt = 1; | ||
509 | unsigned long tmp_cpu_irq; | ||
510 | unsigned long imbalance = 0; | ||
511 | cpumask_t allowed_mask, target_cpu_mask, tmp; | ||
512 | struct irq_desc *desc; | ||
513 | |||
514 | for_each_possible_cpu(i) { | ||
515 | int package_index; | ||
516 | CPU_IRQ(i) = 0; | ||
517 | if (!cpu_online(i)) | ||
518 | continue; | ||
519 | package_index = CPU_TO_PACKAGEINDEX(i); | ||
520 | for (j = 0; j < nr_irqs; j++) { | ||
521 | unsigned long value_now, delta; | ||
522 | /* Is this an active IRQ or balancing disabled ? */ | ||
523 | desc = irq_to_desc(j); | ||
524 | if (!desc->action || irq_balancing_disabled(j)) | ||
525 | continue; | ||
526 | if (package_index == i) | ||
527 | IRQ_DELTA(package_index, j) = 0; | ||
528 | /* Determine the total count per processor per IRQ */ | ||
529 | value_now = (unsigned long) kstat_irqs_cpu(j, i); | ||
530 | |||
531 | /* Determine the activity per processor per IRQ */ | ||
532 | delta = value_now - LAST_CPU_IRQ(i, j); | ||
533 | |||
534 | /* Update last_cpu_irq[][] for the next time */ | ||
535 | LAST_CPU_IRQ(i, j) = value_now; | ||
536 | |||
537 | /* Ignore IRQs whose rate is less than the clock */ | ||
538 | if (delta < useful_load_threshold) | ||
539 | continue; | ||
540 | /* update the load for the processor or package total */ | ||
541 | IRQ_DELTA(package_index, j) += delta; | ||
542 | |||
543 | /* Keep track of the higher numbered sibling as well */ | ||
544 | if (i != package_index) | ||
545 | CPU_IRQ(i) += delta; | ||
546 | /* | ||
547 | * We have sibling A and sibling B in the package | ||
548 | * | ||
549 | * cpu_irq[A] = load for cpu A + load for cpu B | ||
550 | * cpu_irq[B] = load for cpu B | ||
551 | */ | ||
552 | CPU_IRQ(package_index) += delta; | ||
553 | } | ||
554 | } | ||
555 | /* Find the least loaded processor package */ | ||
556 | for_each_online_cpu(i) { | ||
557 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
558 | continue; | ||
559 | if (min_cpu_irq > CPU_IRQ(i)) { | ||
560 | min_cpu_irq = CPU_IRQ(i); | ||
561 | min_loaded = i; | ||
562 | } | ||
563 | } | ||
564 | max_cpu_irq = ULONG_MAX; | ||
565 | |||
566 | tryanothercpu: | ||
567 | /* | ||
568 | * Look for heaviest loaded processor. | ||
569 | * We may come back to get the next heaviest loaded processor. | ||
570 | * Skip processors with trivial loads. | ||
571 | */ | ||
572 | tmp_cpu_irq = 0; | ||
573 | tmp_loaded = -1; | ||
574 | for_each_online_cpu(i) { | ||
575 | if (i != CPU_TO_PACKAGEINDEX(i)) | ||
576 | continue; | ||
577 | if (max_cpu_irq <= CPU_IRQ(i)) | ||
578 | continue; | ||
579 | if (tmp_cpu_irq < CPU_IRQ(i)) { | ||
580 | tmp_cpu_irq = CPU_IRQ(i); | ||
581 | tmp_loaded = i; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | if (tmp_loaded == -1) { | ||
586 | /* | ||
587 | * In the case of small number of heavy interrupt sources, | ||
588 | * loading some of the cpus too much. We use Ingo's original | ||
589 | * approach to rotate them around. | ||
590 | */ | ||
591 | if (!first_attempt && imbalance >= useful_load_threshold) { | ||
592 | rotate_irqs_among_cpus(useful_load_threshold); | ||
593 | return; | ||
594 | } | ||
595 | goto not_worth_the_effort; | ||
596 | } | ||
597 | |||
598 | first_attempt = 0; /* heaviest search */ | ||
599 | max_cpu_irq = tmp_cpu_irq; /* load */ | ||
600 | max_loaded = tmp_loaded; /* processor */ | ||
601 | imbalance = (max_cpu_irq - min_cpu_irq) / 2; | ||
602 | |||
603 | /* | ||
604 | * if imbalance is less than approx 10% of max load, then | ||
605 | * observe diminishing returns action. - quit | ||
606 | */ | ||
607 | if (imbalance < (max_cpu_irq >> 3)) | ||
608 | goto not_worth_the_effort; | ||
609 | |||
610 | tryanotherirq: | ||
611 | /* if we select an IRQ to move that can't go where we want, then | ||
612 | * see if there is another one to try. | ||
613 | */ | ||
614 | move_this_load = 0; | ||
615 | selected_irq = -1; | ||
616 | for (j = 0; j < nr_irqs; j++) { | ||
617 | /* Is this an active IRQ? */ | ||
618 | desc = irq_to_desc(j); | ||
619 | if (!desc->action) | ||
620 | continue; | ||
621 | if (imbalance <= IRQ_DELTA(max_loaded, j)) | ||
622 | continue; | ||
623 | /* Try to find the IRQ that is closest to the imbalance | ||
624 | * without going over. | ||
625 | */ | ||
626 | if (move_this_load < IRQ_DELTA(max_loaded, j)) { | ||
627 | move_this_load = IRQ_DELTA(max_loaded, j); | ||
628 | selected_irq = j; | ||
629 | } | ||
630 | } | ||
631 | if (selected_irq == -1) | ||
632 | goto tryanothercpu; | ||
633 | |||
634 | imbalance = move_this_load; | ||
635 | |||
636 | /* For physical_balance case, we accumulated both load | ||
637 | * values in the one of the siblings cpu_irq[], | ||
638 | * to use the same code for physical and logical processors | ||
639 | * as much as possible. | ||
640 | * | ||
641 | * NOTE: the cpu_irq[] array holds the sum of the load for | ||
642 | * sibling A and sibling B in the slot for the lowest numbered | ||
643 | * sibling (A), _AND_ the load for sibling B in the slot for | ||
644 | * the higher numbered sibling. | ||
645 | * | ||
646 | * We seek the least loaded sibling by making the comparison | ||
647 | * (A+B)/2 vs B | ||
648 | */ | ||
649 | load = CPU_IRQ(min_loaded) >> 1; | ||
650 | for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { | ||
651 | if (load > CPU_IRQ(j)) { | ||
652 | /* This won't change cpu_sibling_map[min_loaded] */ | ||
653 | load = CPU_IRQ(j); | ||
654 | min_loaded = j; | ||
655 | } | ||
656 | } | ||
657 | |||
658 | cpus_and(allowed_mask, | ||
659 | cpu_online_map, | ||
660 | balance_irq_affinity[selected_irq]); | ||
661 | target_cpu_mask = cpumask_of_cpu(min_loaded); | ||
662 | cpus_and(tmp, target_cpu_mask, allowed_mask); | ||
663 | |||
664 | if (!cpus_empty(tmp)) { | ||
665 | /* mark for change destination */ | ||
666 | set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); | ||
667 | |||
668 | /* Since we made a change, come back sooner to | ||
669 | * check for more variation. | ||
670 | */ | ||
671 | balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, | ||
672 | balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); | ||
673 | return; | ||
674 | } | ||
675 | goto tryanotherirq; | ||
676 | |||
677 | not_worth_the_effort: | ||
678 | /* | ||
679 | * if we did not find an IRQ to move, then adjust the time interval | ||
680 | * upward | ||
681 | */ | ||
682 | balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, | ||
683 | balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); | ||
684 | return; | ||
685 | } | ||
686 | |||
687 | static int balanced_irq(void *unused) | ||
688 | { | ||
689 | int i; | ||
690 | unsigned long prev_balance_time = jiffies; | ||
691 | long time_remaining = balanced_irq_interval; | ||
692 | struct irq_desc *desc; | ||
693 | |||
694 | /* push everything to CPU 0 to give us a starting point. */ | ||
695 | for (i = 0 ; i < nr_irqs ; i++) { | ||
696 | desc = irq_to_desc(i); | ||
697 | desc->pending_mask = cpumask_of_cpu(0); | ||
698 | set_pending_irq(i, cpumask_of_cpu(0)); | ||
699 | } | ||
700 | |||
701 | set_freezable(); | ||
702 | for ( ; ; ) { | ||
703 | time_remaining = schedule_timeout_interruptible(time_remaining); | ||
704 | try_to_freeze(); | ||
705 | if (time_after(jiffies, | ||
706 | prev_balance_time+balanced_irq_interval)) { | ||
707 | preempt_disable(); | ||
708 | do_irq_balance(); | ||
709 | prev_balance_time = jiffies; | ||
710 | time_remaining = balanced_irq_interval; | ||
711 | preempt_enable(); | ||
712 | } | ||
713 | } | ||
714 | return 0; | ||
715 | } | ||
716 | |||
717 | static int __init balanced_irq_init(void) | ||
718 | { | ||
719 | int i; | ||
720 | struct cpuinfo_x86 *c; | ||
721 | cpumask_t tmp; | ||
722 | |||
723 | cpus_shift_right(tmp, cpu_online_map, 2); | ||
724 | c = &boot_cpu_data; | ||
725 | /* When not overwritten by the command line ask subarchitecture. */ | ||
726 | if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) | ||
727 | irqbalance_disabled = NO_BALANCE_IRQ; | ||
728 | if (irqbalance_disabled) | ||
729 | return 0; | ||
730 | |||
731 | /* disable irqbalance completely if there is only one processor online */ | ||
732 | if (num_online_cpus() < 2) { | ||
733 | irqbalance_disabled = 1; | ||
734 | return 0; | ||
735 | } | ||
736 | /* | ||
737 | * Enable physical balance only if more than 1 physical processor | ||
738 | * is present | ||
739 | */ | ||
740 | if (smp_num_siblings > 1 && !cpus_empty(tmp)) | ||
741 | physical_balance = 1; | ||
742 | |||
743 | for_each_online_cpu(i) { | ||
744 | irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); | ||
745 | irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); | ||
746 | if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { | ||
747 | printk(KERN_ERR "balanced_irq_init: out of memory"); | ||
748 | goto failed; | ||
749 | } | ||
750 | } | ||
751 | |||
752 | printk(KERN_INFO "Starting balanced_irq\n"); | ||
753 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) | ||
754 | return 0; | ||
755 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
756 | failed: | ||
757 | for_each_possible_cpu(i) { | ||
758 | kfree(irq_cpu_data[i].irq_delta); | ||
759 | irq_cpu_data[i].irq_delta = NULL; | ||
760 | kfree(irq_cpu_data[i].last_irq); | ||
761 | irq_cpu_data[i].last_irq = NULL; | ||
762 | } | ||
763 | return 0; | ||
764 | } | ||
765 | |||
766 | int __devinit irqbalance_disable(char *str) | ||
767 | { | ||
768 | irqbalance_disabled = 1; | ||
769 | return 1; | ||
770 | } | ||
771 | |||
772 | __setup("noirqbalance", irqbalance_disable); | ||
773 | |||
774 | late_initcall(balanced_irq_init); | ||
775 | #endif /* CONFIG_IRQBALANCE */ | ||
776 | #endif /* CONFIG_SMP */ | 374 | #endif /* CONFIG_SMP */ |
777 | 375 | ||
778 | #ifndef CONFIG_SMP | 376 | #ifndef CONFIG_SMP |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index f6a11b9b1f98..67465ed89310 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | |||
35 | if (!(word & (1 << 13))) { | 35 | if (!(word & (1 << 13))) { |
36 | dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " | 36 | dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " |
37 | "disabling irq balancing and affinity\n"); | 37 | "disabling irq balancing and affinity\n"); |
38 | #ifdef CONFIG_IRQBALANCE | ||
39 | irqbalance_disable(""); | ||
40 | #endif | ||
41 | noirqdebug_setup(""); | 38 | noirqdebug_setup(""); |
42 | #ifdef CONFIG_PROC_FS | 39 | #ifdef CONFIG_PROC_FS |
43 | no_irq_affinity = 1; | 40 | no_irq_affinity = 1; |