aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorJames Cleverdon <jamesclv@us.ibm.com>2005-11-05 11:25:53 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-14 22:55:13 -0500
commit6004e1b7effcbb385a6b7c790e4b8008682cf679 (patch)
treeb22c05874deeee6ee7ec75f98746717db5830d8d /arch/x86_64
parent89b831ef8bf5cfbb357dbc0a2e07700d7f20eec5 (diff)
[PATCH] i386/x86-64: Share interrupt vectors when there is a large number of interrupt sources
Here's a patch that builds on Natalie Protasevich's IRQ compression patch and tries to work for MPS boots as well as ACPI. It is meant for a 4-node IBM x460 NUMA box, which was dying because it had interrupt pins with GSI numbers > NR_IRQS and thus overflowed irq_desc. The problem is that this system has 270 GSIs (which are 1:1 mapped with I/O APIC RTEs) and an 8-node box would have 540. This is much bigger than NR_IRQS (224 for both i386 and x86_64). Also, there aren't enough vectors to go around. There are about 190 usable vectors, not counting the reserved ones and the unused vectors at 0x20 to 0x2F. So, my patch attempts to compress the GSI range and share vectors by sharing IRQs. Cc: "Protasevich, Natalie" <Natalie.Protasevich@unisys.com> Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/io_apic.c80
-rw-r--r--arch/x86_64/kernel/mpparse.c2
2 files changed, 74 insertions, 8 deletions
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c8eee20cd519..97154ab058b4 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS];
57 * Rough estimation of how many shared IRQs there are, can 57 * Rough estimation of how many shared IRQs there are, can
58 * be changed anytime. 58 * be changed anytime.
59 */ 59 */
60#define MAX_PLUS_SHARED_IRQS NR_IRQS 60#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) 61#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
62 62
63/* 63/*
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
85 int pin; \ 85 int pin; \
86 struct irq_pin_list *entry = irq_2_pin + irq; \ 86 struct irq_pin_list *entry = irq_2_pin + irq; \
87 \ 87 \
88 BUG_ON(irq >= NR_IRQS); \
88 for (;;) { \ 89 for (;;) { \
89 unsigned int reg; \ 90 unsigned int reg; \
90 pin = entry->pin; \ 91 pin = entry->pin; \
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
127} 128}
128#endif 129#endif
129 130
131static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
132
130/* 133/*
131 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 134 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
132 * shared ISA-space IRQs, so we have to support them. We are super 135 * shared ISA-space IRQs, so we have to support them. We are super
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
137 static int first_free_entry = NR_IRQS; 140 static int first_free_entry = NR_IRQS;
138 struct irq_pin_list *entry = irq_2_pin + irq; 141 struct irq_pin_list *entry = irq_2_pin + irq;
139 142
143 BUG_ON(irq >= NR_IRQS);
140 while (entry->next) 144 while (entry->next)
141 entry = irq_2_pin + entry->next; 145 entry = irq_2_pin + entry->next;
142 146
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
144 entry->next = first_free_entry; 148 entry->next = first_free_entry;
145 entry = irq_2_pin + entry->next; 149 entry = irq_2_pin + entry->next;
146 if (++first_free_entry >= PIN_MAP_SIZE) 150 if (++first_free_entry >= PIN_MAP_SIZE)
147 panic("io_apic.c: whoops"); 151 panic("io_apic.c: ran out of irq_2_pin entries!");
148 } 152 }
149 entry->apic = apic; 153 entry->apic = apic;
150 entry->pin = pin; 154 entry->pin = pin;
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
420 best_guess = irq; 424 best_guess = irq;
421 } 425 }
422 } 426 }
427 BUG_ON(best_guess >= NR_IRQS);
423 return best_guess; 428 return best_guess;
424} 429}
425 430
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx)
610 return MPBIOS_trigger(idx); 615 return MPBIOS_trigger(idx);
611} 616}
612 617
618static int next_irq = 16;
619
620/*
621 * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
622 * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
623 * from ACPI, which can reach 800 in large boxen.
624 *
625 * Compact the sparse GSI space into a sequential IRQ series and reuse
626 * vectors if possible.
627 */
628int gsi_irq_sharing(int gsi)
629{
630 int i, tries, vector;
631
632 BUG_ON(gsi >= NR_IRQ_VECTORS);
633
634 if (platform_legacy_irq(gsi))
635 return gsi;
636
637 if (gsi_2_irq[gsi] != 0xFF)
638 return (int)gsi_2_irq[gsi];
639
640 tries = NR_IRQS;
641 try_again:
642 vector = assign_irq_vector(gsi);
643
644 /*
645 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
646 * use of vector and if found, return that IRQ. However, we never want
647 * to share legacy IRQs, which usually have a different trigger mode
648 * than PCI.
649 */
650 for (i = 0; i < NR_IRQS; i++)
651 if (IO_APIC_VECTOR(i) == vector)
652 break;
653 if (platform_legacy_irq(i)) {
654 if (--tries >= 0) {
655 IO_APIC_VECTOR(i) = 0;
656 goto try_again;
657 }
658 panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
659 }
660 if (i < NR_IRQS) {
661 gsi_2_irq[gsi] = i;
662 printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
663 gsi, vector, i);
664 return i;
665 }
666
667 i = next_irq++;
668 BUG_ON(i >= NR_IRQS);
669 gsi_2_irq[gsi] = i;
670 IO_APIC_VECTOR(i) = vector;
671 printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
672 gsi, vector, i);
673 return i;
674}
675
613static int pin_2_irq(int idx, int apic, int pin) 676static int pin_2_irq(int idx, int apic, int pin)
614{ 677{
615 int irq, i; 678 int irq, i;
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin)
639 while (i < apic) 702 while (i < apic)
640 irq += nr_ioapic_registers[i++]; 703 irq += nr_ioapic_registers[i++];
641 irq += pin; 704 irq += pin;
705 irq = gsi_irq_sharing(irq);
642 break; 706 break;
643 } 707 }
644 default: 708 default:
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin)
648 break; 712 break;
649 } 713 }
650 } 714 }
715 BUG_ON(irq >= NR_IRQS);
651 716
652 /* 717 /*
653 * PCI IRQ command line redirection. Yes, limits are hardcoded. 718 * PCI IRQ command line redirection. Yes, limits are hardcoded.
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin)
663 } 728 }
664 } 729 }
665 } 730 }
731 BUG_ON(irq >= NR_IRQS);
666 return irq; 732 return irq;
667} 733}
668 734
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq)
690{ 756{
691 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 757 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
692 758
693 BUG_ON(irq >= NR_IRQ_VECTORS); 759 BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
694 if (IO_APIC_VECTOR(irq) > 0) 760 if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
695 return IO_APIC_VECTOR(irq); 761 return IO_APIC_VECTOR(irq);
696next: 762next:
697 current_vector += 8; 763 current_vector += 8;
@@ -699,9 +765,8 @@ next:
699 goto next; 765 goto next;
700 766
701 if (current_vector >= FIRST_SYSTEM_VECTOR) { 767 if (current_vector >= FIRST_SYSTEM_VECTOR) {
702 offset++; 768 /* If we run out of vectors on large boxen, must share them. */
703 if (!(offset%8)) 769 offset = (offset + 1) % 8;
704 return -ENOSPC;
705 current_vector = FIRST_DEVICE_VECTOR + offset; 770 current_vector = FIRST_DEVICE_VECTOR + offset;
706 } 771 }
707 772
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
1917 entry.polarity = active_high_low; 1982 entry.polarity = active_high_low;
1918 entry.mask = 1; /* Disabled (masked) */ 1983 entry.mask = 1; /* Disabled (masked) */
1919 1984
1985 irq = gsi_irq_sharing(irq);
1920 /* 1986 /*
1921 * IRQs < 16 are already in the irq_2_pin[] map 1987 * IRQs < 16 are already in the irq_2_pin[] map
1922 */ 1988 */
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index f16d38d09daf..8f6958e79455 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -218,7 +218,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
218 m->mpc_irqtype, m->mpc_irqflag & 3, 218 m->mpc_irqtype, m->mpc_irqflag & 3,
219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, 219 (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); 220 m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
221 if (++mp_irq_entries == MAX_IRQ_SOURCES) 221 if (++mp_irq_entries >= MAX_IRQ_SOURCES)
222 panic("Max # of irq sources exceeded!!\n"); 222 panic("Max # of irq sources exceeded!!\n");
223} 223}
224 224