aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2006-10-04 05:16:51 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-04 10:55:29 -0400
commit550f2299ac8ffaba943cf211380d3a8d3fa75301 (patch)
tree9a8cb7b0d62f48f581f66ab4951aa6c9793131c0 /arch
parente500f57436b9056a245216c53113613928155eba (diff)
[PATCH] genirq: x86_64 irq: make vector_irq per cpu
This refactors the irq handling code to make the vectors a per cpu resource so the same vector number can be simultaneously used on multiple cpus for different irqs. This should make systems that were hitting limits on the total number of irqs much more livable. [akpm@osdl.org: build fix] [akpm@osdl.org: __target_IO_APIC_irq is unneeded on UP] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Rajesh Shah <rajesh.shah@intel.com> Cc: Andi Kleen <ak@muc.de> Cc: "Protasevich, Natalie" <Natalie.Protasevich@UNISYS.com> Cc: "Luck, Tony" <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/kernel/i8259.c4
-rw-r--r--arch/x86_64/kernel/io_apic.c181
-rw-r--r--arch/x86_64/kernel/irq.c2
3 files changed, 138 insertions, 49 deletions
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index bddde431871a..0612a33bb896 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -394,7 +394,7 @@ device_initcall(i8259A_init_sysfs);
394 */ 394 */
395 395
396static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL}; 396static struct irqaction irq2 = { no_action, 0, CPU_MASK_NONE, "cascade", NULL, NULL};
397int vector_irq[NR_VECTORS] __read_mostly = { 397DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
398 [0 ... FIRST_EXTERNAL_VECTOR - 1] = -1, 398 [0 ... FIRST_EXTERNAL_VECTOR - 1] = -1,
399 [FIRST_EXTERNAL_VECTOR + 0] = 0, 399 [FIRST_EXTERNAL_VECTOR + 0] = 0,
400 [FIRST_EXTERNAL_VECTOR + 1] = 1, 400 [FIRST_EXTERNAL_VECTOR + 1] = 1,
@@ -520,7 +520,7 @@ void __init init_IRQ(void)
520 * IRQ0 must be given a fixed assignment and initialized, 520 * IRQ0 must be given a fixed assignment and initialized,
521 * because it's used before the IO-APIC is set up. 521 * because it's used before the IO-APIC is set up.
522 */ 522 */
523 vector_irq[FIRST_DEVICE_VECTOR] = 0; 523 __get_cpu_var(vector_irq)[FIRST_DEVICE_VECTOR] = 0;
524 524
525 /* 525 /*
526 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 526 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9f849492c296..7d53f66394a7 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -44,7 +44,7 @@
44#include <asm/nmi.h> 44#include <asm/nmi.h>
45#include <asm/msidef.h> 45#include <asm/msidef.h>
46 46
47static int assign_irq_vector(int irq); 47static int assign_irq_vector(int irq, cpumask_t mask);
48 48
49#define __apicdebuginit __init 49#define __apicdebuginit __init
50 50
@@ -135,11 +135,35 @@ static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
135} 135}
136 136
137#ifdef CONFIG_SMP 137#ifdef CONFIG_SMP
138static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
139{
140 int apic, pin;
141 struct irq_pin_list *entry = irq_2_pin + irq;
142
143 BUG_ON(irq >= NR_IRQS);
144 for (;;) {
145 unsigned int reg;
146 apic = entry->apic;
147 pin = entry->pin;
148 if (pin == -1)
149 break;
150 io_apic_write(apic, 0x11 + pin*2, dest);
151 reg = io_apic_read(apic, 0x10 + pin*2);
152 reg &= ~0x000000ff;
153 reg |= vector;
154 io_apic_modify(apic, reg);
155 if (!entry->next)
156 break;
157 entry = irq_2_pin + entry->next;
158 }
159}
160
138static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) 161static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
139{ 162{
140 unsigned long flags; 163 unsigned long flags;
141 unsigned int dest; 164 unsigned int dest;
142 cpumask_t tmp; 165 cpumask_t tmp;
166 int vector;
143 167
144 cpus_and(tmp, mask, cpu_online_map); 168 cpus_and(tmp, mask, cpu_online_map);
145 if (cpus_empty(tmp)) 169 if (cpus_empty(tmp))
@@ -147,7 +171,13 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
147 171
148 cpus_and(mask, tmp, CPU_MASK_ALL); 172 cpus_and(mask, tmp, CPU_MASK_ALL);
149 173
150 dest = cpu_mask_to_apicid(mask); 174 vector = assign_irq_vector(irq, mask);
175 if (vector < 0)
176 return;
177
178 cpus_clear(tmp);
179 cpu_set(vector >> 8, tmp);
180 dest = cpu_mask_to_apicid(tmp);
151 181
152 /* 182 /*
153 * Only the high 8 bits are valid. 183 * Only the high 8 bits are valid.
@@ -155,7 +185,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
155 dest = SET_APIC_LOGICAL_ID(dest); 185 dest = SET_APIC_LOGICAL_ID(dest);
156 186
157 spin_lock_irqsave(&ioapic_lock, flags); 187 spin_lock_irqsave(&ioapic_lock, flags);
158 __DO_ACTION(1, = dest, ) 188 __target_IO_APIC_irq(irq, dest, vector & 0xff);
159 set_native_irq_info(irq, mask); 189 set_native_irq_info(irq, mask);
160 spin_unlock_irqrestore(&ioapic_lock, flags); 190 spin_unlock_irqrestore(&ioapic_lock, flags);
161} 191}
@@ -512,7 +542,7 @@ int gsi_irq_sharing(int gsi)
512 542
513 tries = NR_IRQS; 543 tries = NR_IRQS;
514 try_again: 544 try_again:
515 vector = assign_irq_vector(gsi); 545 vector = assign_irq_vector(gsi, TARGET_CPUS);
516 546
517 /* 547 /*
518 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous 548 * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
@@ -591,45 +621,77 @@ static inline int IO_APIC_irq_trigger(int irq)
591} 621}
592 622
593/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 623/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
594u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; 624unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 };
595 625
596static int __assign_irq_vector(int irq) 626static int __assign_irq_vector(int irq, cpumask_t mask)
597{ 627{
598 static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; 628 /*
599 int vector; 629 * NOTE! The local APIC isn't very good at handling
630 * multiple interrupts at the same interrupt level.
631 * As the interrupt level is determined by taking the
632 * vector number and shifting that right by 4, we
633 * want to spread these out a bit so that they don't
634 * all fall in the same interrupt level.
635 *
636 * Also, we've got to be careful not to trash gate
637 * 0x80, because int 0x80 is hm, kind of importantish. ;)
638 */
639 static struct {
640 int vector;
641 int offset;
642 } pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
643 int old_vector = -1;
644 int cpu;
600 645
601 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); 646 BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
602 647
603 if (IO_APIC_VECTOR(irq) > 0) { 648 if (IO_APIC_VECTOR(irq) > 0)
604 return IO_APIC_VECTOR(irq); 649 old_vector = IO_APIC_VECTOR(irq);
650 if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) {
651 return old_vector;
605 } 652 }
653
654 for_each_cpu_mask(cpu, mask) {
655 int vector, offset;
656 vector = pos[cpu].vector;
657 offset = pos[cpu].offset;
606next: 658next:
607 current_vector += 8; 659 vector += 8;
608 if (current_vector == IA32_SYSCALL_VECTOR) 660 if (vector >= FIRST_SYSTEM_VECTOR) {
609 goto next; 661 /* If we run out of vectors on large boxen, must share them. */
610 662 offset = (offset + 1) % 8;
611 if (current_vector >= FIRST_SYSTEM_VECTOR) { 663 vector = FIRST_DEVICE_VECTOR + offset;
612 /* If we run out of vectors on large boxen, must share them. */ 664 }
613 offset = (offset + 1) % 8; 665 if (unlikely(pos[cpu].vector == vector))
614 current_vector = FIRST_DEVICE_VECTOR + offset; 666 continue;
667 if (vector == IA32_SYSCALL_VECTOR)
668 goto next;
669 if (per_cpu(vector_irq, cpu)[vector] != -1)
670 goto next;
671 /* Found one! */
672 pos[cpu].vector = vector;
673 pos[cpu].offset = offset;
674 if (old_vector >= 0) {
675 int old_cpu = old_vector >> 8;
676 old_vector &= 0xff;
677 per_cpu(vector_irq, old_cpu)[old_vector] = -1;
678 }
679 per_cpu(vector_irq, cpu)[vector] = irq;
680 vector |= cpu << 8;
681 IO_APIC_VECTOR(irq) = vector;
682 return vector;
615 } 683 }
616 684 return -ENOSPC;
617 vector = current_vector;
618 vector_irq[vector] = irq;
619 IO_APIC_VECTOR(irq) = vector;
620
621 return vector;
622} 685}
623 686
624static int assign_irq_vector(int irq) 687static int assign_irq_vector(int irq, cpumask_t mask)
625{ 688{
626 int vector; 689 int vector;
627 unsigned long flags; 690 unsigned long flags;
628 691
629 spin_lock_irqsave(&vector_lock, flags); 692 spin_lock_irqsave(&vector_lock, flags);
630 vector = __assign_irq_vector(irq); 693 vector = __assign_irq_vector(irq, mask);
631 spin_unlock_irqrestore(&vector_lock, flags); 694 spin_unlock_irqrestore(&vector_lock, flags);
632
633 return vector; 695 return vector;
634} 696}
635 697
@@ -699,8 +761,15 @@ static void __init setup_IO_APIC_irqs(void)
699 continue; 761 continue;
700 762
701 if (IO_APIC_IRQ(irq)) { 763 if (IO_APIC_IRQ(irq)) {
702 vector = assign_irq_vector(irq); 764 cpumask_t mask;
703 entry.vector = vector; 765 vector = assign_irq_vector(irq, TARGET_CPUS);
766 if (vector < 0)
767 continue;
768
769 cpus_clear(mask);
770 cpu_set(vector >> 8, mask);
771 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
772 entry.vector = vector & 0xff;
704 773
705 ioapic_register_intr(irq, vector, IOAPIC_AUTO); 774 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
706 if (!apic && (irq < 16)) 775 if (!apic && (irq < 16))
@@ -1197,7 +1266,14 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
1197 1266
1198static int ioapic_retrigger_irq(unsigned int irq) 1267static int ioapic_retrigger_irq(unsigned int irq)
1199{ 1268{
1200 send_IPI_self(IO_APIC_VECTOR(irq)); 1269 cpumask_t mask;
1270 unsigned vector;
1271
1272 vector = irq_vector[irq];
1273 cpus_clear(mask);
1274 cpu_set(vector >> 8, mask);
1275
1276 send_IPI_mask(mask, vector & 0xff);
1201 1277
1202 return 1; 1278 return 1;
1203} 1279}
@@ -1419,7 +1495,7 @@ static inline void check_timer(void)
1419 * get/set the timer IRQ vector: 1495 * get/set the timer IRQ vector:
1420 */ 1496 */
1421 disable_8259A_irq(0); 1497 disable_8259A_irq(0);
1422 vector = assign_irq_vector(0); 1498 vector = assign_irq_vector(0, TARGET_CPUS);
1423 1499
1424 /* 1500 /*
1425 * Subtle, code in do_timer_interrupt() expects an AEOI 1501 * Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1662,7 +1738,7 @@ int create_irq(void)
1662 continue; 1738 continue;
1663 if (irq_vector[new] != 0) 1739 if (irq_vector[new] != 0)
1664 continue; 1740 continue;
1665 vector = __assign_irq_vector(new); 1741 vector = __assign_irq_vector(new, TARGET_CPUS);
1666 if (likely(vector > 0)) 1742 if (likely(vector > 0))
1667 irq = new; 1743 irq = new;
1668 break; 1744 break;
@@ -1698,12 +1774,12 @@ static int msi_msg_setup(struct pci_dev *pdev, unsigned int irq, struct msi_msg
1698 int vector; 1774 int vector;
1699 unsigned dest; 1775 unsigned dest;
1700 1776
1701 vector = assign_irq_vector(irq); 1777 vector = assign_irq_vector(irq, TARGET_CPUS);
1702 if (vector >= 0) { 1778 if (vector >= 0) {
1703 cpumask_t tmp; 1779 cpumask_t tmp;
1704 1780
1705 cpus_clear(tmp); 1781 cpus_clear(tmp);
1706 cpu_set(first_cpu(cpu_online_map), tmp); 1782 cpu_set(vector >> 8, tmp);
1707 dest = cpu_mask_to_apicid(tmp); 1783 dest = cpu_mask_to_apicid(tmp);
1708 1784
1709 msg->address_hi = MSI_ADDR_BASE_HI; 1785 msg->address_hi = MSI_ADDR_BASE_HI;
@@ -1738,9 +1814,13 @@ static void msi_msg_set_affinity(unsigned int irq, cpumask_t mask, struct msi_ms
1738 int vector; 1814 int vector;
1739 unsigned dest; 1815 unsigned dest;
1740 1816
1741 vector = assign_irq_vector(irq); 1817 vector = assign_irq_vector(irq, mask);
1742 if (vector > 0) { 1818 if (vector > 0) {
1743 dest = cpu_mask_to_apicid(mask); 1819 cpumask_t tmp;
1820
1821 cpus_clear(tmp);
1822 cpu_set(vector >> 8, tmp);
1823 dest = cpu_mask_to_apicid(tmp);
1744 1824
1745 msg->data &= ~MSI_DATA_VECTOR_MASK; 1825 msg->data &= ~MSI_DATA_VECTOR_MASK;
1746 msg->data |= MSI_DATA_VECTOR(vector); 1826 msg->data |= MSI_DATA_VECTOR(vector);
@@ -1783,6 +1863,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
1783{ 1863{
1784 struct IO_APIC_route_entry entry; 1864 struct IO_APIC_route_entry entry;
1785 unsigned long flags; 1865 unsigned long flags;
1866 int vector;
1867 cpumask_t mask;
1786 1868
1787 if (!IO_APIC_IRQ(irq)) { 1869 if (!IO_APIC_IRQ(irq)) {
1788 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", 1870 apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -1790,6 +1872,21 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
1790 return -EINVAL; 1872 return -EINVAL;
1791 } 1873 }
1792 1874
1875 irq = gsi_irq_sharing(irq);
1876 /*
1877 * IRQs < 16 are already in the irq_2_pin[] map
1878 */
1879 if (irq >= 16)
1880 add_pin_to_irq(irq, ioapic, pin);
1881
1882
1883 vector = assign_irq_vector(irq, TARGET_CPUS);
1884 if (vector < 0)
1885 return vector;
1886
1887 cpus_clear(mask);
1888 cpu_set(vector >> 8, mask);
1889
1793 /* 1890 /*
1794 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. 1891 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1795 * Note that we mask (disable) IRQs now -- these get enabled when the 1892 * Note that we mask (disable) IRQs now -- these get enabled when the
@@ -1800,19 +1897,11 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
1800 1897
1801 entry.delivery_mode = INT_DELIVERY_MODE; 1898 entry.delivery_mode = INT_DELIVERY_MODE;
1802 entry.dest_mode = INT_DEST_MODE; 1899 entry.dest_mode = INT_DEST_MODE;
1803 entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); 1900 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
1804 entry.trigger = triggering; 1901 entry.trigger = triggering;
1805 entry.polarity = polarity; 1902 entry.polarity = polarity;
1806 entry.mask = 1; /* Disabled (masked) */ 1903 entry.mask = 1; /* Disabled (masked) */
1807 1904 entry.vector = vector & 0xff;
1808 irq = gsi_irq_sharing(irq);
1809 /*
1810 * IRQs < 16 are already in the irq_2_pin[] map
1811 */
1812 if (irq >= 16)
1813 add_pin_to_irq(irq, ioapic, pin);
1814
1815 entry.vector = assign_irq_vector(irq);
1816 1905
1817 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " 1906 apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
1818 "IRQ %d Mode:%i Active:%i)\n", ioapic, 1907 "IRQ %d Mode:%i Active:%i)\n", ioapic,
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 4542fb031994..506f27c85ca5 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -110,7 +110,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
110 110
111 exit_idle(); 111 exit_idle();
112 irq_enter(); 112 irq_enter();
113 irq = vector_irq[vector]; 113 irq = __get_cpu_var(vector_irq)[vector];
114 114
115 if (unlikely(irq >= NR_IRQS)) { 115 if (unlikely(irq >= NR_IRQS)) {
116 printk(KERN_EMERG "%s: cannot handle IRQ %d\n", 116 printk(KERN_EMERG "%s: cannot handle IRQ %d\n",