aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2006-10-08 09:47:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-10-08 15:24:02 -0400
commitc7111c1318ee8890f385813f232fdb32643e2653 (patch)
treee21f8ee81f45f0c127a6233ed77d8052615d1fc8 /arch
parentb940d22d58c41b2ae491dca9232850f6f38f3653 (diff)
[PATCH] x86_64 irq: Allocate a vector across all cpus for genapic_flat.
The problem we can't take advantage of lowest priority delivery mode if the vectors are allocated for only one cpu at a time. Nor can we work around hardware that assumes lowest priority delivery mode is always used with several cpus. So this patch introduces the concept of a vector_allocation_domain. A set of cpus that will receive an irq on the same vector. Currently the code for implementing this is placed in the genapic structure so we can vary this depending on how we are using the io_apics. This allows us to restore the previous behaviour of genapic_flat without removing the benefits of having separate vector allocation for large machines. This should also fix the problem report where a hyperthreaded cpu was receving the irq on the wrong hyperthread when in logical delivery mode because the previous behaviour is restored. This patch properly records our allocation of the first 16 irqs to the first 16 available vectors on all cpus. This should be fine but it may run into problems with multiple interrupts at the same interrupt level. Except for some badly maintained comments in the code and the behaviour of the interrupt allocator I have no real understanding of that problem. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Acked-by: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c8
-rw-r--r--arch/x86_64/kernel/genapic_flat.c24
-rw-r--r--arch/x86_64/kernel/io_apic.c131
3 files changed, 115 insertions, 48 deletions
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
index cdb90e671b8..73d76308b95 100644
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -63,6 +63,13 @@ static cpumask_t cluster_target_cpus(void)
63 return cpumask_of_cpu(0); 63 return cpumask_of_cpu(0);
64} 64}
65 65
66static cpumask_t cluster_vector_allocation_domain(int cpu)
67{
68 cpumask_t domain = CPU_MASK_NONE;
69 cpu_set(cpu, domain);
70 return domain;
71}
72
66static void cluster_send_IPI_mask(cpumask_t mask, int vector) 73static void cluster_send_IPI_mask(cpumask_t mask, int vector)
67{ 74{
68 send_IPI_mask_sequence(mask, vector); 75 send_IPI_mask_sequence(mask, vector);
@@ -119,6 +126,7 @@ struct genapic apic_cluster = {
119 .int_delivery_mode = dest_Fixed, 126 .int_delivery_mode = dest_Fixed,
120 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 127 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
121 .target_cpus = cluster_target_cpus, 128 .target_cpus = cluster_target_cpus,
129 .vector_allocation_domain = cluster_vector_allocation_domain,
122 .apic_id_registered = cluster_apic_id_registered, 130 .apic_id_registered = cluster_apic_id_registered,
123 .init_apic_ldr = cluster_init_apic_ldr, 131 .init_apic_ldr = cluster_init_apic_ldr,
124 .send_IPI_all = cluster_send_IPI_all, 132 .send_IPI_all = cluster_send_IPI_all,
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 50ad153eaac..0dfc223c183 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -22,6 +22,20 @@ static cpumask_t flat_target_cpus(void)
22 return cpu_online_map; 22 return cpu_online_map;
23} 23}
24 24
25static cpumask_t flat_vector_allocation_domain(int cpu)
26{
27 /* Careful. Some cpus do not strictly honor the set of cpus
28 * specified in the interrupt destination when using lowest
29 * priority interrupt delivery mode.
30 *
31 * In particular there was a hyperthreading cpu observed to
32 * deliver interrupts to the wrong hyperthread when only one
33 * hyperthread was specified in the interrupt desitination.
34 */
35 cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
36 return domain;
37}
38
25/* 39/*
26 * Set up the logical destination ID. 40 * Set up the logical destination ID.
27 * 41 *
@@ -121,6 +135,7 @@ struct genapic apic_flat = {
121 .int_delivery_mode = dest_LowestPrio, 135 .int_delivery_mode = dest_LowestPrio,
122 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 136 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
123 .target_cpus = flat_target_cpus, 137 .target_cpus = flat_target_cpus,
138 .vector_allocation_domain = flat_vector_allocation_domain,
124 .apic_id_registered = flat_apic_id_registered, 139 .apic_id_registered = flat_apic_id_registered,
125 .init_apic_ldr = flat_init_apic_ldr, 140 .init_apic_ldr = flat_init_apic_ldr,
126 .send_IPI_all = flat_send_IPI_all, 141 .send_IPI_all = flat_send_IPI_all,
@@ -141,6 +156,14 @@ static cpumask_t physflat_target_cpus(void)
141 return cpumask_of_cpu(0); 156 return cpumask_of_cpu(0);
142} 157}
143 158
159static cpumask_t physflat_vector_allocation_domain(int cpu)
160{
161 cpumask_t domain = CPU_MASK_NONE;
162 cpu_set(cpu, domain);
163 return domain;
164}
165
166
144static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) 167static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
145{ 168{
146 send_IPI_mask_sequence(cpumask, vector); 169 send_IPI_mask_sequence(cpumask, vector);
@@ -179,6 +202,7 @@ struct genapic apic_physflat = {
179 .int_delivery_mode = dest_Fixed, 202 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 203 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 204 .target_cpus = physflat_target_cpus,
205 .vector_allocation_domain = physflat_vector_allocation_domain,
182 .apic_id_registered = flat_apic_id_registered, 206 .apic_id_registered = flat_apic_id_registered,
183 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ 207 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
184 .send_IPI_all = physflat_send_IPI_all, 208 .send_IPI_all = physflat_send_IPI_all,
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9c3b9b19d69..771bcf77daf 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -47,7 +47,7 @@
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49 49
50static int assign_irq_vector(int irq, cpumask_t mask); 50static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
51 51
52#define __apicdebuginit __init 52#define __apicdebuginit __init
53 53
@@ -174,12 +174,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
174 174
175 cpus_and(mask, tmp, CPU_MASK_ALL); 175 cpus_and(mask, tmp, CPU_MASK_ALL);
176 176
177 vector = assign_irq_vector(irq, mask); 177 vector = assign_irq_vector(irq, mask, &tmp);
178 if (vector < 0) 178 if (vector < 0)
179 return; 179 return;
180 180
181 cpus_clear(tmp);
182 cpu_set(vector >> 8, tmp);
183 dest = cpu_mask_to_apicid(tmp); 181 dest = cpu_mask_to_apicid(tmp);
184 182
185 /* 183 /*
@@ -188,7 +186,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
188 dest = SET_APIC_LOGICAL_ID(dest); 186 dest = SET_APIC_LOGICAL_ID(dest);
189 187
190 spin_lock_irqsave(&ioapic_lock, flags); 188 spin_lock_irqsave(&ioapic_lock, flags);
191 __target_IO_APIC_irq(irq, dest, vector & 0xff); 189 __target_IO_APIC_irq(irq, dest, vector);
192 set_native_irq_info(irq, mask); 190 set_native_irq_info(irq, mask);
193 spin_unlock_irqrestore(&ioapic_lock, flags); 191 spin_unlock_irqrestore(&ioapic_lock, flags);
194} 192}
@@ -563,9 +561,45 @@ static inline int IO_APIC_irq_trigger(int irq)
563} 561}
564 562
565/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ 563/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
566unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 }; 564static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = {
565 [0] = FIRST_EXTERNAL_VECTOR + 0,
566 [1] = FIRST_EXTERNAL_VECTOR + 1,
567 [2] = FIRST_EXTERNAL_VECTOR + 2,
568 [3] = FIRST_EXTERNAL_VECTOR + 3,
569 [4] = FIRST_EXTERNAL_VECTOR + 4,
570 [5] = FIRST_EXTERNAL_VECTOR + 5,
571 [6] = FIRST_EXTERNAL_VECTOR + 6,
572 [7] = FIRST_EXTERNAL_VECTOR + 7,
573 [8] = FIRST_EXTERNAL_VECTOR + 8,
574 [9] = FIRST_EXTERNAL_VECTOR + 9,
575 [10] = FIRST_EXTERNAL_VECTOR + 10,
576 [11] = FIRST_EXTERNAL_VECTOR + 11,
577 [12] = FIRST_EXTERNAL_VECTOR + 12,
578 [13] = FIRST_EXTERNAL_VECTOR + 13,
579 [14] = FIRST_EXTERNAL_VECTOR + 14,
580 [15] = FIRST_EXTERNAL_VECTOR + 15,
581};
582
583static cpumask_t irq_domain[NR_IRQ_VECTORS] __read_mostly = {
584 [0] = CPU_MASK_ALL,
585 [1] = CPU_MASK_ALL,
586 [2] = CPU_MASK_ALL,
587 [3] = CPU_MASK_ALL,
588 [4] = CPU_MASK_ALL,
589 [5] = CPU_MASK_ALL,
590 [6] = CPU_MASK_ALL,
591 [7] = CPU_MASK_ALL,
592 [8] = CPU_MASK_ALL,
593 [9] = CPU_MASK_ALL,
594 [10] = CPU_MASK_ALL,
595 [11] = CPU_MASK_ALL,
596 [12] = CPU_MASK_ALL,
597 [13] = CPU_MASK_ALL,
598 [14] = CPU_MASK_ALL,
599 [15] = CPU_MASK_ALL,
600};
567 601
568static int __assign_irq_vector(int irq, cpumask_t mask) 602static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
569{ 603{
570 /* 604 /*
571 * NOTE! The local APIC isn't very good at handling 605 * NOTE! The local APIC isn't very good at handling
@@ -589,14 +623,22 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
589 623
590 if (irq_vector[irq] > 0) 624 if (irq_vector[irq] > 0)
591 old_vector = irq_vector[irq]; 625 old_vector = irq_vector[irq];
592 if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) { 626 if (old_vector > 0) {
593 return old_vector; 627 cpus_and(*result, irq_domain[irq], mask);
628 if (!cpus_empty(*result))
629 return old_vector;
594 } 630 }
595 631
596 for_each_cpu_mask(cpu, mask) { 632 for_each_cpu_mask(cpu, mask) {
633 cpumask_t domain;
634 int first, new_cpu;
597 int vector, offset; 635 int vector, offset;
598 vector = pos[cpu].vector; 636
599 offset = pos[cpu].offset; 637 domain = vector_allocation_domain(cpu);
638 first = first_cpu(domain);
639
640 vector = pos[first].vector;
641 offset = pos[first].offset;
600next: 642next:
601 vector += 8; 643 vector += 8;
602 if (vector >= FIRST_SYSTEM_VECTOR) { 644 if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -604,35 +646,40 @@ next:
604 offset = (offset + 1) % 8; 646 offset = (offset + 1) % 8;
605 vector = FIRST_DEVICE_VECTOR + offset; 647 vector = FIRST_DEVICE_VECTOR + offset;
606 } 648 }
607 if (unlikely(pos[cpu].vector == vector)) 649 if (unlikely(pos[first].vector == vector))
608 continue; 650 continue;
609 if (vector == IA32_SYSCALL_VECTOR) 651 if (vector == IA32_SYSCALL_VECTOR)
610 goto next; 652 goto next;
611 if (per_cpu(vector_irq, cpu)[vector] != -1) 653 for_each_cpu_mask(new_cpu, domain)
612 goto next; 654 if (per_cpu(vector_irq, cpu)[vector] != -1)
655 goto next;
613 /* Found one! */ 656 /* Found one! */
614 pos[cpu].vector = vector; 657 for_each_cpu_mask(new_cpu, domain) {
615 pos[cpu].offset = offset; 658 pos[cpu].vector = vector;
659 pos[cpu].offset = offset;
660 }
616 if (old_vector >= 0) { 661 if (old_vector >= 0) {
617 int old_cpu = old_vector >> 8; 662 int old_cpu;
618 old_vector &= 0xff; 663 for_each_cpu_mask(old_cpu, domain)
619 per_cpu(vector_irq, old_cpu)[old_vector] = -1; 664 per_cpu(vector_irq, old_cpu)[old_vector] = -1;
620 } 665 }
621 per_cpu(vector_irq, cpu)[vector] = irq; 666 for_each_cpu_mask(new_cpu, domain)
622 vector |= cpu << 8; 667 per_cpu(vector_irq, new_cpu)[vector] = irq;
623 irq_vector[irq] = vector; 668 irq_vector[irq] = vector;
669 irq_domain[irq] = domain;
670 cpus_and(*result, domain, mask);
624 return vector; 671 return vector;
625 } 672 }
626 return -ENOSPC; 673 return -ENOSPC;
627} 674}
628 675
629static int assign_irq_vector(int irq, cpumask_t mask) 676static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
630{ 677{
631 int vector; 678 int vector;
632 unsigned long flags; 679 unsigned long flags;
633 680
634 spin_lock_irqsave(&vector_lock, flags); 681 spin_lock_irqsave(&vector_lock, flags);
635 vector = __assign_irq_vector(irq, mask); 682 vector = __assign_irq_vector(irq, mask, result);
636 spin_unlock_irqrestore(&vector_lock, flags); 683 spin_unlock_irqrestore(&vector_lock, flags);
637 return vector; 684 return vector;
638} 685}
@@ -704,14 +751,12 @@ static void __init setup_IO_APIC_irqs(void)
704 751
705 if (IO_APIC_IRQ(irq)) { 752 if (IO_APIC_IRQ(irq)) {
706 cpumask_t mask; 753 cpumask_t mask;
707 vector = assign_irq_vector(irq, TARGET_CPUS); 754 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
708 if (vector < 0) 755 if (vector < 0)
709 continue; 756 continue;
710 757
711 cpus_clear(mask);
712 cpu_set(vector >> 8, mask);
713 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); 758 entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
714 entry.vector = vector & 0xff; 759 entry.vector = vector;
715 760
716 ioapic_register_intr(irq, vector, IOAPIC_AUTO); 761 ioapic_register_intr(irq, vector, IOAPIC_AUTO);
717 if (!apic && (irq < 16)) 762 if (!apic && (irq < 16))
@@ -1430,12 +1475,13 @@ static inline void check_timer(void)
1430{ 1475{
1431 int apic1, pin1, apic2, pin2; 1476 int apic1, pin1, apic2, pin2;
1432 int vector; 1477 int vector;
1478 cpumask_t mask;
1433 1479
1434 /* 1480 /*
1435 * get/set the timer IRQ vector: 1481 * get/set the timer IRQ vector:
1436 */ 1482 */
1437 disable_8259A_irq(0); 1483 disable_8259A_irq(0);
1438 vector = assign_irq_vector(0, TARGET_CPUS); 1484 vector = assign_irq_vector(0, TARGET_CPUS, &mask);
1439 1485
1440 /* 1486 /*
1441 * Subtle, code in do_timer_interrupt() expects an AEOI 1487 * Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1667,6 +1713,7 @@ int create_irq(void)
1667 int new; 1713 int new;
1668 int vector = 0; 1714 int vector = 0;
1669 unsigned long flags; 1715 unsigned long flags;
1716 cpumask_t mask;
1670 1717
1671 irq = -ENOSPC; 1718 irq = -ENOSPC;
1672 spin_lock_irqsave(&vector_lock, flags); 1719 spin_lock_irqsave(&vector_lock, flags);
@@ -1675,7 +1722,7 @@ int create_irq(void)
1675 continue; 1722 continue;
1676 if (irq_vector[new] != 0) 1723 if (irq_vector[new] != 0)
1677 continue; 1724 continue;
1678 vector = __assign_irq_vector(new, TARGET_CPUS); 1725 vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
1679 if (likely(vector > 0)) 1726 if (likely(vector > 0))
1680 irq = new; 1727 irq = new;
1681 break; 1728 break;
@@ -1707,13 +1754,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1707{ 1754{
1708 int vector; 1755 int vector;
1709 unsigned dest; 1756 unsigned dest;
1757 cpumask_t tmp;
1710 1758
1711 vector = assign_irq_vector(irq, TARGET_CPUS); 1759 vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1712 if (vector >= 0) { 1760 if (vector >= 0) {
1713 cpumask_t tmp;
1714
1715 cpus_clear(tmp);
1716 cpu_set(vector >> 8, tmp);
1717 dest = cpu_mask_to_apicid(tmp); 1761 dest = cpu_mask_to_apicid(tmp);
1718 1762
1719 msg->address_hi = MSI_ADDR_BASE_HI; 1763 msg->address_hi = MSI_ADDR_BASE_HI;
@@ -1752,12 +1796,10 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1752 1796
1753 cpus_and(mask, tmp, CPU_MASK_ALL); 1797 cpus_and(mask, tmp, CPU_MASK_ALL);
1754 1798
1755 vector = assign_irq_vector(irq, mask); 1799 vector = assign_irq_vector(irq, mask, &tmp);
1756 if (vector < 0) 1800 if (vector < 0)
1757 return; 1801 return;
1758 1802
1759 cpus_clear(tmp);
1760 cpu_set(vector >> 8, tmp);
1761 dest = cpu_mask_to_apicid(tmp); 1803 dest = cpu_mask_to_apicid(tmp);
1762 1804
1763 read_msi_msg(irq, &msg); 1805 read_msi_msg(irq, &msg);
@@ -1844,12 +1886,10 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1844 1886
1845 cpus_and(mask, tmp, CPU_MASK_ALL); 1887 cpus_and(mask, tmp, CPU_MASK_ALL);
1846 1888
1847 vector = assign_irq_vector(irq, mask); 1889 vector = assign_irq_vector(irq, mask, &tmp);
1848 if (vector < 0) 1890 if (vector < 0)
1849 return; 1891 return;
1850 1892
1851 cpus_clear(tmp);
1852 cpu_set(vector >> 8, tmp);
1853 dest = cpu_mask_to_apicid(tmp); 1893 dest = cpu_mask_to_apicid(tmp);
1854 1894
1855 target_ht_irq(irq, dest, vector & 0xff); 1895 target_ht_irq(irq, dest, vector & 0xff);
@@ -1871,15 +1911,13 @@ static struct hw_interrupt_type ht_irq_chip = {
1871int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) 1911int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1872{ 1912{
1873 int vector; 1913 int vector;
1914 cpumask_t tmp;
1874 1915
1875 vector = assign_irq_vector(irq, TARGET_CPUS); 1916 vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1876 if (vector >= 0) { 1917 if (vector >= 0) {
1877 u32 low, high; 1918 u32 low, high;
1878 unsigned dest; 1919 unsigned dest;
1879 cpumask_t tmp;
1880 1920
1881 cpus_clear(tmp);
1882 cpu_set(vector >> 8, tmp);
1883 dest = cpu_mask_to_apicid(tmp); 1921 dest = cpu_mask_to_apicid(tmp);
1884 1922
1885 high = HT_IRQ_HIGH_DEST_ID(dest); 1923 high = HT_IRQ_HIGH_DEST_ID(dest);
@@ -1945,13 +1983,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
1945 add_pin_to_irq(irq, ioapic, pin); 1983 add_pin_to_irq(irq, ioapic, pin);
1946 1984
1947 1985
1948 vector = assign_irq_vector(irq, TARGET_CPUS); 1986 vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1949 if (vector < 0) 1987 if (vector < 0)
1950 return vector; 1988 return vector;
1951 1989
1952 cpus_clear(mask);
1953 cpu_set(vector >> 8, mask);
1954
1955 /* 1990 /*
1956 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. 1991 * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1957 * Note that we mask (disable) IRQs now -- these get enabled when the 1992 * Note that we mask (disable) IRQs now -- these get enabled when the