aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/mpparse.c
diff options
context:
space:
mode:
authorNatalie Protasevich <Natalie.Protasevich@unisys.com>2005-06-23 03:08:41 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 12:45:13 -0400
commit701067c4661ebcdc155cc8f696acb24c016c058b (patch)
treec3566fe8dd278707273480c2ecc653bd6d291705 /arch/x86_64/kernel/mpparse.c
parent32ecd42b6f94d3ee320a22827b46bd19ccf924e5 (diff)
[PATCH] x86_64: avoid wasting IRQs
I suggest to change the way IRQs are handed out to PCI devices. Currently, each I/O APIC pin gets associated with an IRQ, no matter if the pin is used or not. It is expected that each pin can potentually be engaged by a device inserted into the corresponding PCI slot. However, this imposes severe limitation on systems that have designs that employ many I/O APICs, only utilizing couple lines of each, such as P64H2 chipset. It is used in ES7000, and currently, there is no way to boot the system with more that 9 I/O APICs. The simple change below allows to boot a system with say 64 (or more) I/O APICs, each providing 1 slot, which otherwise impossible because of the IRQ gaps created for unused lines on each I/O APIC. It does not resolve the problem with number of devices that exceeds number of possible IRQs, but eases up a tension for IRQs on any large system with potentually large number of devices. I only implemented this for the ACPI boot, since if the system is this big and using newer chipsets it is probably (better be!) an ACPI based system :). The change is completely "mechanical" and does not alter any internal structures or interrupt model/implementation. The patch works for both i386 and x86_64 archs. It works with MSIs just fine, and should not intervene with implementations like shared vectors, when they get worked out and incorporated. To illustrate, below is the interrupt distribution for 2-cell ES7000 with 20 I/O APICs, and an Ethernet card in the last slot, which should be eth1 and which was not configured because its IRQ exceeded allowable number (it actially turned out huge - 480!): zorro-tb2:~ # cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 0: 65716 30012 30007 30002 30009 30010 30010 30010 IO-APIC-edge timer 4: 373 0 725 280 0 0 0 0 IO-APIC-edge serial 8: 0 0 0 0 0 0 0 0 IO-APIC-edge rtc 9: 0 0 0 0 0 0 0 0 IO-APIC-level acpi 14: 39 3 0 0 0 0 0 0 IO-APIC-edge ide0 16: 108 13 0 0 0 0 0 0 IO-APIC-level uhci_hcd:usb1 18: 0 0 0 0 0 0 0 0 IO-APIC-level uhci_hcd:usb3 19: 15 0 0 0 0 0 0 0 IO-APIC-level uhci_hcd:usb2 23: 3 0 0 0 0 0 0 0 IO-APIC-level ehci_hcd:usb4 96: 4240 397 18 0 0 0 0 0 IO-APIC-level aic7xxx 97: 15 0 0 0 0 0 0 0 IO-APIC-level aic7xxx 192: 847 0 0 0 0 0 0 0 IO-APIC-level eth0 NMI: 0 0 0 0 0 0 0 0 LOC: 273423 274528 272829 274228 274092 273761 273827 273694 ERR: 7 MIS: 0 Even though the system doesn't have that many devices, some don't get enabled only because of IRQ numbering model. This is the IRQ picture after the patch was applied: zorro-tb2:~ # cat /proc/interrupts CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7 0: 44169 10004 10004 10001 10004 10003 10004 6135 IO-APIC-edge timer 4: 345 0 0 0 0 244 0 0 IO-APIC-edge serial 8: 0 0 0 0 0 0 0 0 IO-APIC-edge rtc 9: 0 0 0 0 0 0 0 0 IO-APIC-level acpi 14: 39 0 3 0 0 0 0 0 IO-APIC-edge ide0 17: 4425 0 9 0 0 0 0 0 IO-APIC-level aic7xxx 18: 15 0 0 0 0 0 0 0 IO-APIC-level aic7xxx, uhci_hcd:usb3 21: 231 0 0 0 0 0 0 0 IO-APIC-level uhci_hcd:usb1 22: 26 0 0 0 0 0 0 0 IO-APIC-level uhci_hcd:usb2 23: 3 0 0 0 0 0 0 0 IO-APIC-level ehci_hcd:usb4 24: 348 0 0 0 0 0 0 0 IO-APIC-level eth0 25: 6 192 0 0 0 0 0 0 IO-APIC-level eth1 NMI: 0 0 0 0 0 0 0 0 LOC: 107981 107636 108899 108698 108489 108326 108331 108254 ERR: 7 MIS: 0 Not only we see the card in the last I/O APIC, but we are not even close to using up available IRQs, since we didn't waste any. Signed-off-by: Natalie Protasevich <Natalie.Protasevich@unisys.com> Acked-by: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64/kernel/mpparse.c')
-rw-r--r--arch/x86_64/kernel/mpparse.c21
1 files changed, 20 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index ed6a5588146d..9c5aa2a790c7 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -906,11 +906,20 @@ void __init mp_config_acpi_legacy_irqs (void)
906 return; 906 return;
907} 907}
908 908
909#define MAX_GSI_NUM 4096
910
909int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) 911int mp_register_gsi(u32 gsi, int edge_level, int active_high_low)
910{ 912{
911 int ioapic = -1; 913 int ioapic = -1;
912 int ioapic_pin = 0; 914 int ioapic_pin = 0;
913 int idx, bit = 0; 915 int idx, bit = 0;
916 static int pci_irq = 16;
917 /*
918 * Mapping between Global System Interrupts, which
919 * represent all possible interrupts, to the IRQs
920 * assigned to actual devices.
921 */
922 static int gsi_to_irq[MAX_GSI_NUM];
914 923
915 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) 924 if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
916 return gsi; 925 return gsi;
@@ -945,11 +954,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low)
945 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { 954 if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
946 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", 955 Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
947 mp_ioapic_routing[ioapic].apic_id, ioapic_pin); 956 mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
948 return gsi; 957 return gsi_to_irq[gsi];
949 } 958 }
950 959
951 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); 960 mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
952 961
962 if (edge_level) {
963 /*
964 * For PCI devices assign IRQs in order, avoiding gaps
965 * due to unused I/O APIC pins.
966 */
967 int irq = gsi;
968 gsi = pci_irq++;
969 gsi_to_irq[irq] = gsi;
970 }
971
953 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, 972 io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
954 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, 973 edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1,
955 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); 974 active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1);