aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorYinghai Lu <yhlu.kernel@gmail.com>2008-08-19 23:50:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-16 10:52:52 -0400
commit8b8e8c1bf7275eca859fe551dfa484134eaf013b (patch)
tree3e2f950a8f34f419a59a31ddd12e9d7331911e3d /arch/x86/kernel
parent6d50bc26836e16a9589e0b128d527c29e30d722a (diff)
x86: remove irqbalance in kernel for 32 bit
This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/io_apic_32.c402
-rw-r--r--arch/x86/kernel/quirks.c3
2 files changed, 0 insertions, 405 deletions
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 204884b1415a..668edf226067 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
371 spin_unlock_irqrestore(&ioapic_lock, flags); 371 spin_unlock_irqrestore(&ioapic_lock, flags);
372} 372}
373 373
374#if defined(CONFIG_IRQBALANCE)
375# include <asm/processor.h> /* kernel_thread() */
376# include <linux/kernel_stat.h> /* kstat */
377# include <linux/slab.h> /* kmalloc() */
378# include <linux/timer.h>
379
380#define IRQBALANCE_CHECK_ARCH -999
381#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
382#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
383#define BALANCED_IRQ_MORE_DELTA (HZ/10)
384#define BALANCED_IRQ_LESS_DELTA (HZ)
385
386static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
387static int physical_balance __read_mostly;
388static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
389
390static struct irq_cpu_info {
391 unsigned long *last_irq;
392 unsigned long *irq_delta;
393 unsigned long irq;
394} irq_cpu_data[NR_CPUS];
395
396#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
397#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
398#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
399
400#define IDLE_ENOUGH(cpu,now) \
401 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
402
403#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
404
405#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
406
407static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
408
409static cpumask_t *balance_irq_affinity;
410
411
412static void __init irq_affinity_init_work(void *data)
413{
414 struct dyn_array *da = data;
415
416 int i;
417 struct balance_irq_affinity *affinity;
418
419 affinity = *da->name;
420
421 for (i = 0; i < *da->nr; i++)
422 memcpy(&affinity[i], &balance_irq_affinity_init,
423 sizeof(struct balance_irq_affinity));
424
425}
426
427DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
428
429
430void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
431{
432 balance_irq_affinity[irq] = mask;
433}
434
435static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
436 unsigned long now, int direction)
437{
438 int search_idle = 1;
439 int cpu = curr_cpu;
440
441 goto inside;
442
443 do {
444 if (unlikely(cpu == curr_cpu))
445 search_idle = 0;
446inside:
447 if (direction == 1) {
448 cpu++;
449 if (cpu >= NR_CPUS)
450 cpu = 0;
451 } else {
452 cpu--;
453 if (cpu == -1)
454 cpu = NR_CPUS-1;
455 }
456 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
457 (search_idle && !IDLE_ENOUGH(cpu, now)));
458
459 return cpu;
460}
461
462static inline void balance_irq(int cpu, int irq)
463{
464 unsigned long now = jiffies;
465 cpumask_t allowed_mask;
466 unsigned int new_cpu;
467
468 if (irqbalance_disabled)
469 return;
470
471 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
472 new_cpu = move(cpu, allowed_mask, now, 1);
473 if (cpu != new_cpu)
474 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
475}
476
477static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
478{
479 int i, j;
480 struct irq_desc *desc;
481
482 for_each_online_cpu(i) {
483 for (j = 0; j < nr_irqs; j++) {
484 desc = irq_to_desc(j);
485 if (!desc->action)
486 continue;
487 /* Is it a significant load ? */
488 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
489 useful_load_threshold)
490 continue;
491 balance_irq(i, j);
492 }
493 }
494 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
495 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
496 return;
497}
498
499static void do_irq_balance(void)
500{
501 int i, j;
502 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
503 unsigned long move_this_load = 0;
504 int max_loaded = 0, min_loaded = 0;
505 int load;
506 unsigned long useful_load_threshold = balanced_irq_interval + 10;
507 int selected_irq;
508 int tmp_loaded, first_attempt = 1;
509 unsigned long tmp_cpu_irq;
510 unsigned long imbalance = 0;
511 cpumask_t allowed_mask, target_cpu_mask, tmp;
512 struct irq_desc *desc;
513
514 for_each_possible_cpu(i) {
515 int package_index;
516 CPU_IRQ(i) = 0;
517 if (!cpu_online(i))
518 continue;
519 package_index = CPU_TO_PACKAGEINDEX(i);
520 for (j = 0; j < nr_irqs; j++) {
521 unsigned long value_now, delta;
522 /* Is this an active IRQ or balancing disabled ? */
523 desc = irq_to_desc(j);
524 if (!desc->action || irq_balancing_disabled(j))
525 continue;
526 if (package_index == i)
527 IRQ_DELTA(package_index, j) = 0;
528 /* Determine the total count per processor per IRQ */
529 value_now = (unsigned long) kstat_irqs_cpu(j, i);
530
531 /* Determine the activity per processor per IRQ */
532 delta = value_now - LAST_CPU_IRQ(i, j);
533
534 /* Update last_cpu_irq[][] for the next time */
535 LAST_CPU_IRQ(i, j) = value_now;
536
537 /* Ignore IRQs whose rate is less than the clock */
538 if (delta < useful_load_threshold)
539 continue;
540 /* update the load for the processor or package total */
541 IRQ_DELTA(package_index, j) += delta;
542
543 /* Keep track of the higher numbered sibling as well */
544 if (i != package_index)
545 CPU_IRQ(i) += delta;
546 /*
547 * We have sibling A and sibling B in the package
548 *
549 * cpu_irq[A] = load for cpu A + load for cpu B
550 * cpu_irq[B] = load for cpu B
551 */
552 CPU_IRQ(package_index) += delta;
553 }
554 }
555 /* Find the least loaded processor package */
556 for_each_online_cpu(i) {
557 if (i != CPU_TO_PACKAGEINDEX(i))
558 continue;
559 if (min_cpu_irq > CPU_IRQ(i)) {
560 min_cpu_irq = CPU_IRQ(i);
561 min_loaded = i;
562 }
563 }
564 max_cpu_irq = ULONG_MAX;
565
566tryanothercpu:
567 /*
568 * Look for heaviest loaded processor.
569 * We may come back to get the next heaviest loaded processor.
570 * Skip processors with trivial loads.
571 */
572 tmp_cpu_irq = 0;
573 tmp_loaded = -1;
574 for_each_online_cpu(i) {
575 if (i != CPU_TO_PACKAGEINDEX(i))
576 continue;
577 if (max_cpu_irq <= CPU_IRQ(i))
578 continue;
579 if (tmp_cpu_irq < CPU_IRQ(i)) {
580 tmp_cpu_irq = CPU_IRQ(i);
581 tmp_loaded = i;
582 }
583 }
584
585 if (tmp_loaded == -1) {
586 /*
587 * In the case of small number of heavy interrupt sources,
588 * loading some of the cpus too much. We use Ingo's original
589 * approach to rotate them around.
590 */
591 if (!first_attempt && imbalance >= useful_load_threshold) {
592 rotate_irqs_among_cpus(useful_load_threshold);
593 return;
594 }
595 goto not_worth_the_effort;
596 }
597
598 first_attempt = 0; /* heaviest search */
599 max_cpu_irq = tmp_cpu_irq; /* load */
600 max_loaded = tmp_loaded; /* processor */
601 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
602
603 /*
604 * if imbalance is less than approx 10% of max load, then
605 * observe diminishing returns action. - quit
606 */
607 if (imbalance < (max_cpu_irq >> 3))
608 goto not_worth_the_effort;
609
610tryanotherirq:
611 /* if we select an IRQ to move that can't go where we want, then
612 * see if there is another one to try.
613 */
614 move_this_load = 0;
615 selected_irq = -1;
616 for (j = 0; j < nr_irqs; j++) {
617 /* Is this an active IRQ? */
618 desc = irq_to_desc(j);
619 if (!desc->action)
620 continue;
621 if (imbalance <= IRQ_DELTA(max_loaded, j))
622 continue;
623 /* Try to find the IRQ that is closest to the imbalance
624 * without going over.
625 */
626 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
627 move_this_load = IRQ_DELTA(max_loaded, j);
628 selected_irq = j;
629 }
630 }
631 if (selected_irq == -1)
632 goto tryanothercpu;
633
634 imbalance = move_this_load;
635
636 /* For physical_balance case, we accumulated both load
637 * values in the one of the siblings cpu_irq[],
638 * to use the same code for physical and logical processors
639 * as much as possible.
640 *
641 * NOTE: the cpu_irq[] array holds the sum of the load for
642 * sibling A and sibling B in the slot for the lowest numbered
643 * sibling (A), _AND_ the load for sibling B in the slot for
644 * the higher numbered sibling.
645 *
646 * We seek the least loaded sibling by making the comparison
647 * (A+B)/2 vs B
648 */
649 load = CPU_IRQ(min_loaded) >> 1;
650 for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
651 if (load > CPU_IRQ(j)) {
652 /* This won't change cpu_sibling_map[min_loaded] */
653 load = CPU_IRQ(j);
654 min_loaded = j;
655 }
656 }
657
658 cpus_and(allowed_mask,
659 cpu_online_map,
660 balance_irq_affinity[selected_irq]);
661 target_cpu_mask = cpumask_of_cpu(min_loaded);
662 cpus_and(tmp, target_cpu_mask, allowed_mask);
663
664 if (!cpus_empty(tmp)) {
665 /* mark for change destination */
666 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
667
668 /* Since we made a change, come back sooner to
669 * check for more variation.
670 */
671 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
672 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
673 return;
674 }
675 goto tryanotherirq;
676
677not_worth_the_effort:
678 /*
679 * if we did not find an IRQ to move, then adjust the time interval
680 * upward
681 */
682 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
683 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
684 return;
685}
686
687static int balanced_irq(void *unused)
688{
689 int i;
690 unsigned long prev_balance_time = jiffies;
691 long time_remaining = balanced_irq_interval;
692 struct irq_desc *desc;
693
694 /* push everything to CPU 0 to give us a starting point. */
695 for (i = 0 ; i < nr_irqs ; i++) {
696 desc = irq_to_desc(i);
697 desc->pending_mask = cpumask_of_cpu(0);
698 set_pending_irq(i, cpumask_of_cpu(0));
699 }
700
701 set_freezable();
702 for ( ; ; ) {
703 time_remaining = schedule_timeout_interruptible(time_remaining);
704 try_to_freeze();
705 if (time_after(jiffies,
706 prev_balance_time+balanced_irq_interval)) {
707 preempt_disable();
708 do_irq_balance();
709 prev_balance_time = jiffies;
710 time_remaining = balanced_irq_interval;
711 preempt_enable();
712 }
713 }
714 return 0;
715}
716
717static int __init balanced_irq_init(void)
718{
719 int i;
720 struct cpuinfo_x86 *c;
721 cpumask_t tmp;
722
723 cpus_shift_right(tmp, cpu_online_map, 2);
724 c = &boot_cpu_data;
725 /* When not overwritten by the command line ask subarchitecture. */
726 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
727 irqbalance_disabled = NO_BALANCE_IRQ;
728 if (irqbalance_disabled)
729 return 0;
730
731 /* disable irqbalance completely if there is only one processor online */
732 if (num_online_cpus() < 2) {
733 irqbalance_disabled = 1;
734 return 0;
735 }
736 /*
737 * Enable physical balance only if more than 1 physical processor
738 * is present
739 */
740 if (smp_num_siblings > 1 && !cpus_empty(tmp))
741 physical_balance = 1;
742
743 for_each_online_cpu(i) {
744 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
745 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
746 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
747 printk(KERN_ERR "balanced_irq_init: out of memory");
748 goto failed;
749 }
750 }
751
752 printk(KERN_INFO "Starting balanced_irq\n");
753 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
754 return 0;
755 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
756failed:
757 for_each_possible_cpu(i) {
758 kfree(irq_cpu_data[i].irq_delta);
759 irq_cpu_data[i].irq_delta = NULL;
760 kfree(irq_cpu_data[i].last_irq);
761 irq_cpu_data[i].last_irq = NULL;
762 }
763 return 0;
764}
765
766int __devinit irqbalance_disable(char *str)
767{
768 irqbalance_disabled = 1;
769 return 1;
770}
771
772__setup("noirqbalance", irqbalance_disable);
773
774late_initcall(balanced_irq_init);
775#endif /* CONFIG_IRQBALANCE */
776#endif /* CONFIG_SMP */ 374#endif /* CONFIG_SMP */
777 375
778#ifndef CONFIG_SMP 376#ifndef CONFIG_SMP
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
35 if (!(word & (1 << 13))) { 35 if (!(word & (1 << 13))) {
36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " 36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
37 "disabling irq balancing and affinity\n"); 37 "disabling irq balancing and affinity\n");
38#ifdef CONFIG_IRQBALANCE
39 irqbalance_disable("");
40#endif
41 noirqdebug_setup(""); 38 noirqdebug_setup("");
42#ifdef CONFIG_PROC_FS 39#ifdef CONFIG_PROC_FS
43 no_irq_affinity = 1; 40 no_irq_affinity = 1;