aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYinghai Lu <yhlu.kernel@gmail.com>2008-08-19 23:50:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-16 10:52:52 -0400
commit8b8e8c1bf7275eca859fe551dfa484134eaf013b (patch)
tree3e2f950a8f34f419a59a31ddd12e9d7331911e3d
parent6d50bc26836e16a9589e0b128d527c29e30d722a (diff)
x86: remove irqbalance in kernel for 32 bit
This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu <yhlu.kernel@gmai.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/kernel/io_apic_32.c402
-rw-r--r--arch/x86/kernel/quirks.c3
-rw-r--r--include/linux/irq.h14
-rw-r--r--kernel/irq/manage.c3
6 files changed, 3 insertions, 428 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1004888e9b13..3e0eaaa1a339 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1254,14 +1254,6 @@ config EFI
1254 resultant kernel should continue to boot on existing non-EFI 1254 resultant kernel should continue to boot on existing non-EFI
1255 platforms. 1255 platforms.
1256 1256
1257config IRQBALANCE
1258 def_bool y
1259 prompt "Enable kernel irq balancing"
1260 depends on X86_32 && SMP && X86_IO_APIC
1261 help
1262 The default yes will allow the kernel to do irq load balancing.
1263 Saying no will keep the kernel from doing irq load balancing.
1264
1265config SECCOMP 1257config SECCOMP
1266 def_bool y 1258 def_bool y
1267 prompt "Enable seccomp to safely compute untrusted bytecode" 1259 prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 52d0359719d7..13b8c86ae985 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
287# CONFIG_MTRR_SANITIZER is not set 287# CONFIG_MTRR_SANITIZER is not set
288CONFIG_X86_PAT=y 288CONFIG_X86_PAT=y
289CONFIG_EFI=y 289CONFIG_EFI=y
290# CONFIG_IRQBALANCE is not set
291CONFIG_SECCOMP=y 290CONFIG_SECCOMP=y
292# CONFIG_HZ_100 is not set 291# CONFIG_HZ_100 is not set
293# CONFIG_HZ_250 is not set 292# CONFIG_HZ_250 is not set
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 204884b1415a..668edf226067 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
371 spin_unlock_irqrestore(&ioapic_lock, flags); 371 spin_unlock_irqrestore(&ioapic_lock, flags);
372} 372}
373 373
374#if defined(CONFIG_IRQBALANCE)
375# include <asm/processor.h> /* kernel_thread() */
376# include <linux/kernel_stat.h> /* kstat */
377# include <linux/slab.h> /* kmalloc() */
378# include <linux/timer.h>
379
380#define IRQBALANCE_CHECK_ARCH -999
381#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
382#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
383#define BALANCED_IRQ_MORE_DELTA (HZ/10)
384#define BALANCED_IRQ_LESS_DELTA (HZ)
385
386static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
387static int physical_balance __read_mostly;
388static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
389
390static struct irq_cpu_info {
391 unsigned long *last_irq;
392 unsigned long *irq_delta;
393 unsigned long irq;
394} irq_cpu_data[NR_CPUS];
395
396#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
397#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
398#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
399
400#define IDLE_ENOUGH(cpu,now) \
401 (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
402
403#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
404
405#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
406
407static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL;
408
409static cpumask_t *balance_irq_affinity;
410
411
412static void __init irq_affinity_init_work(void *data)
413{
414 struct dyn_array *da = data;
415
416 int i;
417 struct balance_irq_affinity *affinity;
418
419 affinity = *da->name;
420
421 for (i = 0; i < *da->nr; i++)
422 memcpy(&affinity[i], &balance_irq_affinity_init,
423 sizeof(struct balance_irq_affinity));
424
425}
426
427DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work);
428
429
430void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
431{
432 balance_irq_affinity[irq] = mask;
433}
434
435static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
436 unsigned long now, int direction)
437{
438 int search_idle = 1;
439 int cpu = curr_cpu;
440
441 goto inside;
442
443 do {
444 if (unlikely(cpu == curr_cpu))
445 search_idle = 0;
446inside:
447 if (direction == 1) {
448 cpu++;
449 if (cpu >= NR_CPUS)
450 cpu = 0;
451 } else {
452 cpu--;
453 if (cpu == -1)
454 cpu = NR_CPUS-1;
455 }
456 } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
457 (search_idle && !IDLE_ENOUGH(cpu, now)));
458
459 return cpu;
460}
461
462static inline void balance_irq(int cpu, int irq)
463{
464 unsigned long now = jiffies;
465 cpumask_t allowed_mask;
466 unsigned int new_cpu;
467
468 if (irqbalance_disabled)
469 return;
470
471 cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
472 new_cpu = move(cpu, allowed_mask, now, 1);
473 if (cpu != new_cpu)
474 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
475}
476
477static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
478{
479 int i, j;
480 struct irq_desc *desc;
481
482 for_each_online_cpu(i) {
483 for (j = 0; j < nr_irqs; j++) {
484 desc = irq_to_desc(j);
485 if (!desc->action)
486 continue;
487 /* Is it a significant load ? */
488 if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
489 useful_load_threshold)
490 continue;
491 balance_irq(i, j);
492 }
493 }
494 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
495 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
496 return;
497}
498
499static void do_irq_balance(void)
500{
501 int i, j;
502 unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
503 unsigned long move_this_load = 0;
504 int max_loaded = 0, min_loaded = 0;
505 int load;
506 unsigned long useful_load_threshold = balanced_irq_interval + 10;
507 int selected_irq;
508 int tmp_loaded, first_attempt = 1;
509 unsigned long tmp_cpu_irq;
510 unsigned long imbalance = 0;
511 cpumask_t allowed_mask, target_cpu_mask, tmp;
512 struct irq_desc *desc;
513
514 for_each_possible_cpu(i) {
515 int package_index;
516 CPU_IRQ(i) = 0;
517 if (!cpu_online(i))
518 continue;
519 package_index = CPU_TO_PACKAGEINDEX(i);
520 for (j = 0; j < nr_irqs; j++) {
521 unsigned long value_now, delta;
522 /* Is this an active IRQ or balancing disabled ? */
523 desc = irq_to_desc(j);
524 if (!desc->action || irq_balancing_disabled(j))
525 continue;
526 if (package_index == i)
527 IRQ_DELTA(package_index, j) = 0;
528 /* Determine the total count per processor per IRQ */
529 value_now = (unsigned long) kstat_irqs_cpu(j, i);
530
531 /* Determine the activity per processor per IRQ */
532 delta = value_now - LAST_CPU_IRQ(i, j);
533
534 /* Update last_cpu_irq[][] for the next time */
535 LAST_CPU_IRQ(i, j) = value_now;
536
537 /* Ignore IRQs whose rate is less than the clock */
538 if (delta < useful_load_threshold)
539 continue;
540 /* update the load for the processor or package total */
541 IRQ_DELTA(package_index, j) += delta;
542
543 /* Keep track of the higher numbered sibling as well */
544 if (i != package_index)
545 CPU_IRQ(i) += delta;
546 /*
547 * We have sibling A and sibling B in the package
548 *
549 * cpu_irq[A] = load for cpu A + load for cpu B
550 * cpu_irq[B] = load for cpu B
551 */
552 CPU_IRQ(package_index) += delta;
553 }
554 }
555 /* Find the least loaded processor package */
556 for_each_online_cpu(i) {
557 if (i != CPU_TO_PACKAGEINDEX(i))
558 continue;
559 if (min_cpu_irq > CPU_IRQ(i)) {
560 min_cpu_irq = CPU_IRQ(i);
561 min_loaded = i;
562 }
563 }
564 max_cpu_irq = ULONG_MAX;
565
566tryanothercpu:
567 /*
568 * Look for heaviest loaded processor.
569 * We may come back to get the next heaviest loaded processor.
570 * Skip processors with trivial loads.
571 */
572 tmp_cpu_irq = 0;
573 tmp_loaded = -1;
574 for_each_online_cpu(i) {
575 if (i != CPU_TO_PACKAGEINDEX(i))
576 continue;
577 if (max_cpu_irq <= CPU_IRQ(i))
578 continue;
579 if (tmp_cpu_irq < CPU_IRQ(i)) {
580 tmp_cpu_irq = CPU_IRQ(i);
581 tmp_loaded = i;
582 }
583 }
584
585 if (tmp_loaded == -1) {
586 /*
587 * In the case of small number of heavy interrupt sources,
588 * loading some of the cpus too much. We use Ingo's original
589 * approach to rotate them around.
590 */
591 if (!first_attempt && imbalance >= useful_load_threshold) {
592 rotate_irqs_among_cpus(useful_load_threshold);
593 return;
594 }
595 goto not_worth_the_effort;
596 }
597
598 first_attempt = 0; /* heaviest search */
599 max_cpu_irq = tmp_cpu_irq; /* load */
600 max_loaded = tmp_loaded; /* processor */
601 imbalance = (max_cpu_irq - min_cpu_irq) / 2;
602
603 /*
604 * if imbalance is less than approx 10% of max load, then
605 * observe diminishing returns action. - quit
606 */
607 if (imbalance < (max_cpu_irq >> 3))
608 goto not_worth_the_effort;
609
610tryanotherirq:
611 /* if we select an IRQ to move that can't go where we want, then
612 * see if there is another one to try.
613 */
614 move_this_load = 0;
615 selected_irq = -1;
616 for (j = 0; j < nr_irqs; j++) {
617 /* Is this an active IRQ? */
618 desc = irq_to_desc(j);
619 if (!desc->action)
620 continue;
621 if (imbalance <= IRQ_DELTA(max_loaded, j))
622 continue;
623 /* Try to find the IRQ that is closest to the imbalance
624 * without going over.
625 */
626 if (move_this_load < IRQ_DELTA(max_loaded, j)) {
627 move_this_load = IRQ_DELTA(max_loaded, j);
628 selected_irq = j;
629 }
630 }
631 if (selected_irq == -1)
632 goto tryanothercpu;
633
634 imbalance = move_this_load;
635
636 /* For physical_balance case, we accumulated both load
637 * values in the one of the siblings cpu_irq[],
638 * to use the same code for physical and logical processors
639 * as much as possible.
640 *
641 * NOTE: the cpu_irq[] array holds the sum of the load for
642 * sibling A and sibling B in the slot for the lowest numbered
643 * sibling (A), _AND_ the load for sibling B in the slot for
644 * the higher numbered sibling.
645 *
646 * We seek the least loaded sibling by making the comparison
647 * (A+B)/2 vs B
648 */
649 load = CPU_IRQ(min_loaded) >> 1;
650 for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
651 if (load > CPU_IRQ(j)) {
652 /* This won't change cpu_sibling_map[min_loaded] */
653 load = CPU_IRQ(j);
654 min_loaded = j;
655 }
656 }
657
658 cpus_and(allowed_mask,
659 cpu_online_map,
660 balance_irq_affinity[selected_irq]);
661 target_cpu_mask = cpumask_of_cpu(min_loaded);
662 cpus_and(tmp, target_cpu_mask, allowed_mask);
663
664 if (!cpus_empty(tmp)) {
665 /* mark for change destination */
666 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
667
668 /* Since we made a change, come back sooner to
669 * check for more variation.
670 */
671 balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
672 balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
673 return;
674 }
675 goto tryanotherirq;
676
677not_worth_the_effort:
678 /*
679 * if we did not find an IRQ to move, then adjust the time interval
680 * upward
681 */
682 balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
683 balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
684 return;
685}
686
687static int balanced_irq(void *unused)
688{
689 int i;
690 unsigned long prev_balance_time = jiffies;
691 long time_remaining = balanced_irq_interval;
692 struct irq_desc *desc;
693
694 /* push everything to CPU 0 to give us a starting point. */
695 for (i = 0 ; i < nr_irqs ; i++) {
696 desc = irq_to_desc(i);
697 desc->pending_mask = cpumask_of_cpu(0);
698 set_pending_irq(i, cpumask_of_cpu(0));
699 }
700
701 set_freezable();
702 for ( ; ; ) {
703 time_remaining = schedule_timeout_interruptible(time_remaining);
704 try_to_freeze();
705 if (time_after(jiffies,
706 prev_balance_time+balanced_irq_interval)) {
707 preempt_disable();
708 do_irq_balance();
709 prev_balance_time = jiffies;
710 time_remaining = balanced_irq_interval;
711 preempt_enable();
712 }
713 }
714 return 0;
715}
716
717static int __init balanced_irq_init(void)
718{
719 int i;
720 struct cpuinfo_x86 *c;
721 cpumask_t tmp;
722
723 cpus_shift_right(tmp, cpu_online_map, 2);
724 c = &boot_cpu_data;
725 /* When not overwritten by the command line ask subarchitecture. */
726 if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
727 irqbalance_disabled = NO_BALANCE_IRQ;
728 if (irqbalance_disabled)
729 return 0;
730
731 /* disable irqbalance completely if there is only one processor online */
732 if (num_online_cpus() < 2) {
733 irqbalance_disabled = 1;
734 return 0;
735 }
736 /*
737 * Enable physical balance only if more than 1 physical processor
738 * is present
739 */
740 if (smp_num_siblings > 1 && !cpus_empty(tmp))
741 physical_balance = 1;
742
743 for_each_online_cpu(i) {
744 irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
745 irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL);
746 if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
747 printk(KERN_ERR "balanced_irq_init: out of memory");
748 goto failed;
749 }
750 }
751
752 printk(KERN_INFO "Starting balanced_irq\n");
753 if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
754 return 0;
755 printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
756failed:
757 for_each_possible_cpu(i) {
758 kfree(irq_cpu_data[i].irq_delta);
759 irq_cpu_data[i].irq_delta = NULL;
760 kfree(irq_cpu_data[i].last_irq);
761 irq_cpu_data[i].last_irq = NULL;
762 }
763 return 0;
764}
765
766int __devinit irqbalance_disable(char *str)
767{
768 irqbalance_disabled = 1;
769 return 1;
770}
771
772__setup("noirqbalance", irqbalance_disable);
773
774late_initcall(balanced_irq_init);
775#endif /* CONFIG_IRQBALANCE */
776#endif /* CONFIG_SMP */ 374#endif /* CONFIG_SMP */
777 375
778#ifndef CONFIG_SMP 376#ifndef CONFIG_SMP
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index f6a11b9b1f98..67465ed89310 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
35 if (!(word & (1 << 13))) { 35 if (!(word & (1 << 13))) {
36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " 36 dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
37 "disabling irq balancing and affinity\n"); 37 "disabling irq balancing and affinity\n");
38#ifdef CONFIG_IRQBALANCE
39 irqbalance_disable("");
40#endif
41 noirqdebug_setup(""); 38 noirqdebug_setup("");
42#ifdef CONFIG_PROC_FS 39#ifdef CONFIG_PROC_FS
43 no_irq_affinity = 1; 40 no_irq_affinity = 1;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 704136138dc7..2445d2b3d5dc 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -185,7 +185,7 @@ struct irq_desc {
185 cpumask_t affinity; 185 cpumask_t affinity;
186 unsigned int cpu; 186 unsigned int cpu;
187#endif 187#endif
188#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) 188#ifdef CONFIG_GENERIC_PENDING_IRQ
189 cpumask_t pending_mask; 189 cpumask_t pending_mask;
190#endif 190#endif
191#ifdef CONFIG_PROC_FS 191#ifdef CONFIG_PROC_FS
@@ -241,13 +241,13 @@ extern int setup_irq(unsigned int irq, struct irqaction *new);
241 241
242#ifdef CONFIG_SMP 242#ifdef CONFIG_SMP
243 243
244#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) 244#ifdef CONFIG_GENERIC_PENDING_IRQ
245 245
246void set_pending_irq(unsigned int irq, cpumask_t mask); 246void set_pending_irq(unsigned int irq, cpumask_t mask);
247void move_native_irq(int irq); 247void move_native_irq(int irq);
248void move_masked_irq(int irq); 248void move_masked_irq(int irq);
249 249
250#else /* CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE */ 250#else /* CONFIG_GENERIC_PENDING_IRQ */
251 251
252static inline void move_irq(int irq) 252static inline void move_irq(int irq)
253{ 253{
@@ -274,14 +274,6 @@ static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
274 274
275#endif /* CONFIG_SMP */ 275#endif /* CONFIG_SMP */
276 276
277#ifdef CONFIG_IRQBALANCE
278extern void set_balance_irq_affinity(unsigned int irq, cpumask_t mask);
279#else
280static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
281{
282}
283#endif
284
285extern int no_irq_affinity; 277extern int no_irq_affinity;
286 278
287static inline int irq_balancing_disabled(unsigned int irq) 279static inline int irq_balancing_disabled(unsigned int irq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 6df49218632a..ddc956861a58 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -86,8 +86,6 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
86 if (!desc->chip->set_affinity) 86 if (!desc->chip->set_affinity)
87 return -EINVAL; 87 return -EINVAL;
88 88
89 set_balance_irq_affinity(irq, cpumask);
90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 89#ifdef CONFIG_GENERIC_PENDING_IRQ
92 if (desc->status & IRQ_MOVE_PCNTXT) { 90 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags; 91 unsigned long flags;
@@ -122,7 +120,6 @@ int irq_select_affinity(unsigned int irq)
122 desc->affinity = mask; 120 desc->affinity = mask;
123 desc->chip->set_affinity(irq, mask); 121 desc->chip->set_affinity(irq, mask);
124 122
125 set_balance_irq_affinity(irq, mask);
126 return 0; 123 return 0;
127} 124}
128#endif 125#endif