[PATCH] x86_64 irq: Allocate a vector across all cpus for genapic_flat.

The problem we can't take advantage of lowest priority delivery mode if the vectors are allocated for only one cpu at a time. Nor can we work around hardware that assumes lowest priority delivery mode is always used with several cpus. So this patch introduces the concept of a vector_allocation_domain. A set of cpus that will receive an irq on the same vector. Currently the code for implementing this is placed in the genapic structure so we can vary this depending on how we are using the io_apics. This allows us to restore the previous behaviour of genapic_flat without removing the benefits of having separate vector allocation for large machines. This should also fix the problem report where a hyperthreaded cpu was receving the irq on the wrong hyperthread when in logical delivery mode because the previous behaviour is restored. This patch properly records our allocation of the first 16 irqs to the first 16 available vectors on all cpus. This should be fine but it may run into problems with multiple interrupts at the same interrupt level. Except for some badly maintained comments in the code and the behaviour of the interrupt allocator I have no real understanding of that problem. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Acked-by: Muli Ben-Yehuda <muli@il.ibm.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Eric W. Biederman <ebiederm@xmission.com> 2006-10-08 09:47:55 -0400
committer: Linus Torvalds <torvalds@g5.osdl.org> 2006-10-08 15:24:02 -0400
commit: c7111c1318ee8890f385813f232fdb32643e2653 (patch)
tree: e21f8ee81f45f0c127a6233ed77d8052615d1fc8 /arch/x86_64/kernel
parent: b940d22d58c41b2ae491dca9232850f6f38f3653 (diff)
3 files changed, 115 insertions, 48 deletions
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
index cdb90e671b88..73d76308b955 100644
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -63,6 +63,13 @@ static cpumask_t cluster_target_cpus(void)
        return cpumask_of_cpu(0);
 }
+static cpumask_t cluster_vector_allocation_domain(int cpu)
+{
+        cpumask_t domain = CPU_MASK_NONE;
+        cpu_set(cpu, domain);
+        return domain;
+}
 static void cluster_send_IPI_mask(cpumask_t mask, int vector)
 {
        send_IPI_mask_sequence(mask, vector);
@@ -119,6 +126,7 @@ struct genapic apic_cluster = {
        .int_delivery_mode = dest_Fixed,
        .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
        .target_cpus = cluster_target_cpus,
+        .vector_allocation_domain = cluster_vector_allocation_domain,
        .apic_id_registered = cluster_apic_id_registered,
        .init_apic_ldr = cluster_init_apic_ldr,
        .send_IPI_all = cluster_send_IPI_all,
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 50ad153eaac4..0dfc223c1839 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -22,6 +22,20 @@ static cpumask_t flat_target_cpus(void)
        return cpu_online_map;
 }
+static cpumask_t flat_vector_allocation_domain(int cpu)
+{
+        /* Careful. Some cpus do not strictly honor the set of cpus
+         * specified in the interrupt destination when using lowest
+         * priority interrupt delivery mode.
+         *
+         * In particular there was a hyperthreading cpu observed to
+         * deliver interrupts to the wrong hyperthread when only one
+         * hyperthread was specified in the interrupt desitination.
+         */
+        cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
+        return domain;
+}
 /*
 * Set up the logical destination ID.
 *
@@ -121,6 +135,7 @@ struct genapic apic_flat =  {
        .int_delivery_mode = dest_LowestPrio,
        .int_dest_mode = (APIC_DEST_LOGICAL != 0),
        .target_cpus = flat_target_cpus,
+        .vector_allocation_domain = flat_vector_allocation_domain,
        .apic_id_registered = flat_apic_id_registered,
        .init_apic_ldr = flat_init_apic_ldr,
        .send_IPI_all = flat_send_IPI_all,
@@ -141,6 +156,14 @@ static cpumask_t physflat_target_cpus(void)
        return cpumask_of_cpu(0);
 }
+static cpumask_t physflat_vector_allocation_domain(int cpu)
+{
+        cpumask_t domain = CPU_MASK_NONE;
+        cpu_set(cpu, domain);
+        return domain;
+}
 static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
 {
        send_IPI_mask_sequence(cpumask, vector);
@@ -179,6 +202,7 @@ struct genapic apic_physflat =  {
        .int_delivery_mode = dest_Fixed,
        .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
        .target_cpus = physflat_target_cpus,
+        .vector_allocation_domain = physflat_vector_allocation_domain,
        .apic_id_registered = flat_apic_id_registered,
        .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
        .send_IPI_all = physflat_send_IPI_all,
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 9c3b9b19d699..771bcf77daf2 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -47,7 +47,7 @@
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
-static int assign_irq_vector(int irq, cpumask_t mask);
+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
 #define __apicdebuginit  __init
@@ -174,12 +174,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
        cpus_and(mask, tmp, CPU_MASK_ALL);
-        vector = assign_irq_vector(irq, mask);
+        vector = assign_irq_vector(irq, mask, &tmp);
        if (vector < 0)
                return;
-        cpus_clear(tmp);
-        cpu_set(vector >> 8, tmp);
        dest = cpu_mask_to_apicid(tmp);
        /*
@@ -188,7 +186,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
        dest = SET_APIC_LOGICAL_ID(dest);
        spin_lock_irqsave(&ioapic_lock, flags);
-        __target_IO_APIC_irq(irq, dest, vector & 0xff);
+        __target_IO_APIC_irq(irq, dest, vector);
        set_native_irq_info(irq, mask);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
@@ -563,9 +561,45 @@ static inline int IO_APIC_irq_trigger(int irq)
 }
 /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 };
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = {
+        [0] = FIRST_EXTERNAL_VECTOR + 0,
+        [1] = FIRST_EXTERNAL_VECTOR + 1,
+        [2] = FIRST_EXTERNAL_VECTOR + 2,
+        [3] = FIRST_EXTERNAL_VECTOR + 3,
+        [4] = FIRST_EXTERNAL_VECTOR + 4,
+        [5] = FIRST_EXTERNAL_VECTOR + 5,
+        [6] = FIRST_EXTERNAL_VECTOR + 6,
+        [7] = FIRST_EXTERNAL_VECTOR + 7,
+        [8] = FIRST_EXTERNAL_VECTOR + 8,
+        [9] = FIRST_EXTERNAL_VECTOR + 9,
+        [10] = FIRST_EXTERNAL_VECTOR + 10,
+        [11] = FIRST_EXTERNAL_VECTOR + 11,
+        [12] = FIRST_EXTERNAL_VECTOR + 12,
+        [13] = FIRST_EXTERNAL_VECTOR + 13,
+        [14] = FIRST_EXTERNAL_VECTOR + 14,
+        [15] = FIRST_EXTERNAL_VECTOR + 15,
+};
+static cpumask_t irq_domain[NR_IRQ_VECTORS] __read_mostly = {
+        [0] = CPU_MASK_ALL,
+        [1] = CPU_MASK_ALL,
+        [2] = CPU_MASK_ALL,
+        [3] = CPU_MASK_ALL,
+        [4] = CPU_MASK_ALL,
+        [5] = CPU_MASK_ALL,
+        [6] = CPU_MASK_ALL,
+        [7] = CPU_MASK_ALL,
+        [8] = CPU_MASK_ALL,
+        [9] = CPU_MASK_ALL,
+        [10] = CPU_MASK_ALL,
+        [11] = CPU_MASK_ALL,
+        [12] = CPU_MASK_ALL,
+        [13] = CPU_MASK_ALL,
+        [14] = CPU_MASK_ALL,
+        [15] = CPU_MASK_ALL,
+};
-static int __assign_irq_vector(int irq, cpumask_t mask)
+static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 {
        /*
         * NOTE! The local APIC isn't very good at handling
@@ -589,14 +623,22 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
        if (irq_vector[irq] > 0)
                old_vector = irq_vector[irq];
-        if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) {
+        if (old_vector > 0) {
-                return old_vector;
+                cpus_and(*result, irq_domain[irq], mask);
+                if (!cpus_empty(*result))
+                        return old_vector;
        }
        for_each_cpu_mask(cpu, mask) {
+                cpumask_t domain;
+                int first, new_cpu;
                int vector, offset;
-                vector = pos[cpu].vector;
-                offset = pos[cpu].offset;
+                domain = vector_allocation_domain(cpu);
+                first = first_cpu(domain);
+                vector = pos[first].vector;
+                offset = pos[first].offset;
 next:
                vector += 8;
                if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -604,35 +646,40 @@ next:
                        offset = (offset + 1) % 8;
                        vector = FIRST_DEVICE_VECTOR + offset;
                }
-                if (unlikely(pos[cpu].vector == vector))
+                if (unlikely(pos[first].vector == vector))
                        continue;
                if (vector == IA32_SYSCALL_VECTOR)
                        goto next;
-                if (per_cpu(vector_irq, cpu)[vector] != -1)
+                for_each_cpu_mask(new_cpu, domain)
-                        goto next;
+                        if (per_cpu(vector_irq, cpu)[vector] != -1)
+                                goto next;
                /* Found one! */
-                pos[cpu].vector = vector;
+                for_each_cpu_mask(new_cpu, domain) {
-                pos[cpu].offset = offset;
+                        pos[cpu].vector = vector;
+                        pos[cpu].offset = offset;
+                }
                if (old_vector >= 0) {
-                        int old_cpu = old_vector >> 8;
+                        int old_cpu;
-                        old_vector &= 0xff;
+                        for_each_cpu_mask(old_cpu, domain)
-                        per_cpu(vector_irq, old_cpu)[old_vector] = -1;
+                                per_cpu(vector_irq, old_cpu)[old_vector] = -1;
                }
-                per_cpu(vector_irq, cpu)[vector] = irq;
+                for_each_cpu_mask(new_cpu, domain)
-                vector |= cpu << 8;
+                        per_cpu(vector_irq, new_cpu)[vector] = irq;
                irq_vector[irq] = vector;
+                irq_domain[irq] = domain;
+                cpus_and(*result, domain, mask);
                return vector;
        }
        return -ENOSPC;
 }
-static int assign_irq_vector(int irq, cpumask_t mask)
+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 {
        int vector;
        unsigned long flags;
        spin_lock_irqsave(&vector_lock, flags);
-        vector = __assign_irq_vector(irq, mask);
+        vector = __assign_irq_vector(irq, mask, result);
        spin_unlock_irqrestore(&vector_lock, flags);
        return vector;
 }
@@ -704,14 +751,12 @@ static void __init setup_IO_APIC_irqs(void)
                if (IO_APIC_IRQ(irq)) {
                        cpumask_t mask;
-                        vector = assign_irq_vector(irq, TARGET_CPUS);
+                        vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
                        if (vector < 0)
                                continue;
-                        cpus_clear(mask);
-                        cpu_set(vector >> 8, mask);
                        entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
-                        entry.vector = vector & 0xff;
+                        entry.vector = vector;
                        ioapic_register_intr(irq, vector, IOAPIC_AUTO);
                        if (!apic && (irq < 16))
@@ -1430,12 +1475,13 @@ static inline void check_timer(void)
 {
        int apic1, pin1, apic2, pin2;
        int vector;
+        cpumask_t mask;
        /*
         * get/set the timer IRQ vector:
         */
        disable_8259A_irq(0);
-        vector = assign_irq_vector(0, TARGET_CPUS);
+        vector = assign_irq_vector(0, TARGET_CPUS, &mask);
        /*
         * Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1667,6 +1713,7 @@ int create_irq(void)
        int new;
        int vector = 0;
        unsigned long flags;
+        cpumask_t mask;
        irq = -ENOSPC;
        spin_lock_irqsave(&vector_lock, flags);
@@ -1675,7 +1722,7 @@ int create_irq(void)
                        continue;
                if (irq_vector[new] != 0)
                        continue;
-                vector = __assign_irq_vector(new, TARGET_CPUS);
+                vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
                if (likely(vector > 0))
                        irq = new;
                break;
@@ -1707,13 +1754,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 {
        int vector;
        unsigned dest;
+        cpumask_t tmp;
-        vector = assign_irq_vector(irq, TARGET_CPUS);
+        vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
        if (vector >= 0) {
-                cpumask_t tmp;
-                cpus_clear(tmp);
-                cpu_set(vector >> 8, tmp);
                dest = cpu_mask_to_apicid(tmp);
                msg->address_hi = MSI_ADDR_BASE_HI;
@@ -1752,12 +1796,10 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        cpus_and(mask, tmp, CPU_MASK_ALL);
-        vector = assign_irq_vector(irq, mask);
+        vector = assign_irq_vector(irq, mask, &tmp);
        if (vector < 0)
                return;
-        cpus_clear(tmp);
-        cpu_set(vector >> 8, tmp);
        dest = cpu_mask_to_apicid(tmp);
        read_msi_msg(irq, &msg);
@@ -1844,12 +1886,10 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        cpus_and(mask, tmp, CPU_MASK_ALL);
-        vector = assign_irq_vector(irq, mask);
+        vector = assign_irq_vector(irq, mask, &tmp);
        if (vector < 0)
                return;
-        cpus_clear(tmp);
-        cpu_set(vector >> 8, tmp);
        dest = cpu_mask_to_apicid(tmp);
        target_ht_irq(irq, dest, vector & 0xff);
@@ -1871,15 +1911,13 @@ static struct hw_interrupt_type ht_irq_chip = {
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
        int vector;
+        cpumask_t tmp;
-        vector = assign_irq_vector(irq, TARGET_CPUS);
+        vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
        if (vector >= 0) {
                u32 low, high;
                unsigned dest;
-                cpumask_t tmp;
-                cpus_clear(tmp);
-                cpu_set(vector >> 8, tmp);
                dest = cpu_mask_to_apicid(tmp);
                high =  HT_IRQ_HIGH_DEST_ID(dest);
@@ -1945,13 +1983,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
                add_pin_to_irq(irq, ioapic, pin);
-        vector = assign_irq_vector(irq, TARGET_CPUS);
+        vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
        if (vector < 0)
                return vector;
-        cpus_clear(mask);
-        cpu_set(vector >> 8, mask);
        /*
         * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
         * Note that we mask (disable) IRQs now -- these get enabled when the
author	Eric W. Biederman <ebiederm@xmission.com>	2006-10-08 09:47:55 -0400
committer	Linus Torvalds <torvalds@g5.osdl.org>	2006-10-08 15:24:02 -0400
commit	c7111c1318ee8890f385813f232fdb32643e2653 (patch)
tree	e21f8ee81f45f0c127a6233ed77d8052615d1fc8 /arch/x86_64/kernel
parent	b940d22d58c41b2ae491dca9232850f6f38f3653 (diff)

diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index cdb90e671b88..73d76308b955 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -63,6 +63,13 @@ static cpumask_t cluster_target_cpus(void)
63	return cpumask_of_cpu(0);	63	return cpumask_of_cpu(0);
64	}	64	}
65		65
		66	static cpumask_t cluster_vector_allocation_domain(int cpu)
		67	{
		68	cpumask_t domain = CPU_MASK_NONE;
		69	cpu_set(cpu, domain);
		70	return domain;
		71	}
		72
66	static void cluster_send_IPI_mask(cpumask_t mask, int vector)	73	static void cluster_send_IPI_mask(cpumask_t mask, int vector)
67	{	74	{
68	send_IPI_mask_sequence(mask, vector);	75	send_IPI_mask_sequence(mask, vector);
@@ -119,6 +126,7 @@ struct genapic apic_cluster = {
119	.int_delivery_mode = dest_Fixed,	126	.int_delivery_mode = dest_Fixed,
120	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),	127	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
121	.target_cpus = cluster_target_cpus,	128	.target_cpus = cluster_target_cpus,
		129	.vector_allocation_domain = cluster_vector_allocation_domain,
122	.apic_id_registered = cluster_apic_id_registered,	130	.apic_id_registered = cluster_apic_id_registered,
123	.init_apic_ldr = cluster_init_apic_ldr,	131	.init_apic_ldr = cluster_init_apic_ldr,
124	.send_IPI_all = cluster_send_IPI_all,	132	.send_IPI_all = cluster_send_IPI_all,


diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 50ad153eaac4..0dfc223c1839 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c
@@ -22,6 +22,20 @@ static cpumask_t flat_target_cpus(void)
22	return cpu_online_map;	22	return cpu_online_map;
23	}	23	}
24		24
		25	static cpumask_t flat_vector_allocation_domain(int cpu)
		26	{
		27	/* Careful. Some cpus do not strictly honor the set of cpus
		28	* specified in the interrupt destination when using lowest
		29	* priority interrupt delivery mode.
		30	*
		31	* In particular there was a hyperthreading cpu observed to
		32	* deliver interrupts to the wrong hyperthread when only one
		33	* hyperthread was specified in the interrupt desitination.
		34	*/
		35	cpumask_t domain = { { [0] = APIC_ALL_CPUS, } };
		36	return domain;
		37	}
		38
25	/*	39	/*
26	* Set up the logical destination ID.	40	* Set up the logical destination ID.
27	*	41	*
@@ -121,6 +135,7 @@ struct genapic apic_flat = {
121	.int_delivery_mode = dest_LowestPrio,	135	.int_delivery_mode = dest_LowestPrio,
122	.int_dest_mode = (APIC_DEST_LOGICAL != 0),	136	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
123	.target_cpus = flat_target_cpus,	137	.target_cpus = flat_target_cpus,
		138	.vector_allocation_domain = flat_vector_allocation_domain,
124	.apic_id_registered = flat_apic_id_registered,	139	.apic_id_registered = flat_apic_id_registered,
125	.init_apic_ldr = flat_init_apic_ldr,	140	.init_apic_ldr = flat_init_apic_ldr,
126	.send_IPI_all = flat_send_IPI_all,	141	.send_IPI_all = flat_send_IPI_all,
@@ -141,6 +156,14 @@ static cpumask_t physflat_target_cpus(void)
141	return cpumask_of_cpu(0);	156	return cpumask_of_cpu(0);
142	}	157	}
143		158
		159	static cpumask_t physflat_vector_allocation_domain(int cpu)
		160	{
		161	cpumask_t domain = CPU_MASK_NONE;
		162	cpu_set(cpu, domain);
		163	return domain;
		164	}
		165
		166
144	static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)	167	static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
145	{	168	{
146	send_IPI_mask_sequence(cpumask, vector);	169	send_IPI_mask_sequence(cpumask, vector);
@@ -179,6 +202,7 @@ struct genapic apic_physflat = {
179	.int_delivery_mode = dest_Fixed,	202	.int_delivery_mode = dest_Fixed,
180	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),	203	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181	.target_cpus = physflat_target_cpus,	204	.target_cpus = physflat_target_cpus,
		205	.vector_allocation_domain = physflat_vector_allocation_domain,
182	.apic_id_registered = flat_apic_id_registered,	206	.apic_id_registered = flat_apic_id_registered,
183	.init_apic_ldr = flat_init_apic_ldr,/not needed, but shouldn't hurt/	207	.init_apic_ldr = flat_init_apic_ldr,/not needed, but shouldn't hurt/
184	.send_IPI_all = physflat_send_IPI_all,	208	.send_IPI_all = physflat_send_IPI_all,


diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 9c3b9b19d699..771bcf77daf2 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c
@@ -47,7 +47,7 @@
47	#include <asm/msidef.h>	47	#include <asm/msidef.h>
48	#include <asm/hypertransport.h>	48	#include <asm/hypertransport.h>
49		49
50	static int assign_irq_vector(int irq, cpumask_t mask);	50	static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
51		51
52	#define __apicdebuginit __init	52	#define __apicdebuginit __init
53		53
@@ -174,12 +174,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
174		174
175	cpus_and(mask, tmp, CPU_MASK_ALL);	175	cpus_and(mask, tmp, CPU_MASK_ALL);
176		176
177	vector = assign_irq_vector(irq, mask);	177	vector = assign_irq_vector(irq, mask, &tmp);
178	if (vector < 0)	178	if (vector < 0)
179	return;	179	return;
180		180
181	cpus_clear(tmp);
182	cpu_set(vector >> 8, tmp);
183	dest = cpu_mask_to_apicid(tmp);	181	dest = cpu_mask_to_apicid(tmp);
184		182
185	/*	183	/*
@@ -188,7 +186,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
188	dest = SET_APIC_LOGICAL_ID(dest);	186	dest = SET_APIC_LOGICAL_ID(dest);
189		187
190	spin_lock_irqsave(&ioapic_lock, flags);	188	spin_lock_irqsave(&ioapic_lock, flags);
191	__target_IO_APIC_irq(irq, dest, vector & 0xff);	189	__target_IO_APIC_irq(irq, dest, vector);
192	set_native_irq_info(irq, mask);	190	set_native_irq_info(irq, mask);
193	spin_unlock_irqrestore(&ioapic_lock, flags);	191	spin_unlock_irqrestore(&ioapic_lock, flags);
194	}	192	}
@@ -563,9 +561,45 @@ static inline int IO_APIC_irq_trigger(int irq)
563	}	561	}
564		562
565	/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */	563	/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
566	unsigned int irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_EXTERNAL_VECTOR, 0 };	564	static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = {
		565	[0] = FIRST_EXTERNAL_VECTOR + 0,
		566	[1] = FIRST_EXTERNAL_VECTOR + 1,
		567	[2] = FIRST_EXTERNAL_VECTOR + 2,
		568	[3] = FIRST_EXTERNAL_VECTOR + 3,
		569	[4] = FIRST_EXTERNAL_VECTOR + 4,
		570	[5] = FIRST_EXTERNAL_VECTOR + 5,
		571	[6] = FIRST_EXTERNAL_VECTOR + 6,
		572	[7] = FIRST_EXTERNAL_VECTOR + 7,
		573	[8] = FIRST_EXTERNAL_VECTOR + 8,
		574	[9] = FIRST_EXTERNAL_VECTOR + 9,
		575	[10] = FIRST_EXTERNAL_VECTOR + 10,
		576	[11] = FIRST_EXTERNAL_VECTOR + 11,
		577	[12] = FIRST_EXTERNAL_VECTOR + 12,
		578	[13] = FIRST_EXTERNAL_VECTOR + 13,
		579	[14] = FIRST_EXTERNAL_VECTOR + 14,
		580	[15] = FIRST_EXTERNAL_VECTOR + 15,
		581	};
		582
		583	static cpumask_t irq_domain[NR_IRQ_VECTORS] __read_mostly = {
		584	[0] = CPU_MASK_ALL,
		585	[1] = CPU_MASK_ALL,
		586	[2] = CPU_MASK_ALL,
		587	[3] = CPU_MASK_ALL,
		588	[4] = CPU_MASK_ALL,
		589	[5] = CPU_MASK_ALL,
		590	[6] = CPU_MASK_ALL,
		591	[7] = CPU_MASK_ALL,
		592	[8] = CPU_MASK_ALL,
		593	[9] = CPU_MASK_ALL,
		594	[10] = CPU_MASK_ALL,
		595	[11] = CPU_MASK_ALL,
		596	[12] = CPU_MASK_ALL,
		597	[13] = CPU_MASK_ALL,
		598	[14] = CPU_MASK_ALL,
		599	[15] = CPU_MASK_ALL,
		600	};
567		601
568	static int __assign_irq_vector(int irq, cpumask_t mask)	602	static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
569	{	603	{
570	/*	604	/*
571	* NOTE! The local APIC isn't very good at handling	605	* NOTE! The local APIC isn't very good at handling
@@ -589,14 +623,22 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
589		623
590	if (irq_vector[irq] > 0)	624	if (irq_vector[irq] > 0)
591	old_vector = irq_vector[irq];	625	old_vector = irq_vector[irq];
592	if ((old_vector > 0) && cpu_isset(old_vector >> 8, mask)) {	626	if (old_vector > 0) {
593	return old_vector;	627	cpus_and(*result, irq_domain[irq], mask);
		628	if (!cpus_empty(*result))
		629	return old_vector;
594	}	630	}
595		631
596	for_each_cpu_mask(cpu, mask) {	632	for_each_cpu_mask(cpu, mask) {
		633	cpumask_t domain;
		634	int first, new_cpu;
597	int vector, offset;	635	int vector, offset;
598	vector = pos[cpu].vector;	636
599	offset = pos[cpu].offset;	637	domain = vector_allocation_domain(cpu);
		638	first = first_cpu(domain);
		639
		640	vector = pos[first].vector;
		641	offset = pos[first].offset;
600	next:	642	next:
601	vector += 8;	643	vector += 8;
602	if (vector >= FIRST_SYSTEM_VECTOR) {	644	if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -604,35 +646,40 @@ next:
604	offset = (offset + 1) % 8;	646	offset = (offset + 1) % 8;
605	vector = FIRST_DEVICE_VECTOR + offset;	647	vector = FIRST_DEVICE_VECTOR + offset;
606	}	648	}
607	if (unlikely(pos[cpu].vector == vector))	649	if (unlikely(pos[first].vector == vector))
608	continue;	650	continue;
609	if (vector == IA32_SYSCALL_VECTOR)	651	if (vector == IA32_SYSCALL_VECTOR)
610	goto next;	652	goto next;
611	if (per_cpu(vector_irq, cpu)[vector] != -1)	653	for_each_cpu_mask(new_cpu, domain)
612	goto next;	654	if (per_cpu(vector_irq, cpu)[vector] != -1)
		655	goto next;
613	/* Found one! */	656	/* Found one! */
614	pos[cpu].vector = vector;	657	for_each_cpu_mask(new_cpu, domain) {
615	pos[cpu].offset = offset;	658	pos[cpu].vector = vector;
		659	pos[cpu].offset = offset;
		660	}
616	if (old_vector >= 0) {	661	if (old_vector >= 0) {
617	int old_cpu = old_vector >> 8;	662	int old_cpu;
618	old_vector &= 0xff;	663	for_each_cpu_mask(old_cpu, domain)
619	per_cpu(vector_irq, old_cpu)[old_vector] = -1;	664	per_cpu(vector_irq, old_cpu)[old_vector] = -1;
620	}	665	}
621	per_cpu(vector_irq, cpu)[vector] = irq;	666	for_each_cpu_mask(new_cpu, domain)
622	vector \|= cpu << 8;	667	per_cpu(vector_irq, new_cpu)[vector] = irq;
623	irq_vector[irq] = vector;	668	irq_vector[irq] = vector;
		669	irq_domain[irq] = domain;
		670	cpus_and(*result, domain, mask);
624	return vector;	671	return vector;
625	}	672	}
626	return -ENOSPC;	673	return -ENOSPC;
627	}	674	}
628		675
629	static int assign_irq_vector(int irq, cpumask_t mask)	676	static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
630	{	677	{
631	int vector;	678	int vector;
632	unsigned long flags;	679	unsigned long flags;
633		680
634	spin_lock_irqsave(&vector_lock, flags);	681	spin_lock_irqsave(&vector_lock, flags);
635	vector = __assign_irq_vector(irq, mask);	682	vector = __assign_irq_vector(irq, mask, result);
636	spin_unlock_irqrestore(&vector_lock, flags);	683	spin_unlock_irqrestore(&vector_lock, flags);
637	return vector;	684	return vector;
638	}	685	}
@@ -704,14 +751,12 @@ static void __init setup_IO_APIC_irqs(void)
704		751
705	if (IO_APIC_IRQ(irq)) {	752	if (IO_APIC_IRQ(irq)) {
706	cpumask_t mask;	753	cpumask_t mask;
707	vector = assign_irq_vector(irq, TARGET_CPUS);	754	vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
708	if (vector < 0)	755	if (vector < 0)
709	continue;	756	continue;
710		757
711	cpus_clear(mask);
712	cpu_set(vector >> 8, mask);
713	entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);	758	entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
714	entry.vector = vector & 0xff;	759	entry.vector = vector;
715		760
716	ioapic_register_intr(irq, vector, IOAPIC_AUTO);	761	ioapic_register_intr(irq, vector, IOAPIC_AUTO);
717	if (!apic && (irq < 16))	762	if (!apic && (irq < 16))
@@ -1430,12 +1475,13 @@ static inline void check_timer(void)
1430	{	1475	{
1431	int apic1, pin1, apic2, pin2;	1476	int apic1, pin1, apic2, pin2;
1432	int vector;	1477	int vector;
		1478	cpumask_t mask;
1433		1479
1434	/*	1480	/*
1435	* get/set the timer IRQ vector:	1481	* get/set the timer IRQ vector:
1436	*/	1482	*/
1437	disable_8259A_irq(0);	1483	disable_8259A_irq(0);
1438	vector = assign_irq_vector(0, TARGET_CPUS);	1484	vector = assign_irq_vector(0, TARGET_CPUS, &mask);
1439		1485
1440	/*	1486	/*
1441	* Subtle, code in do_timer_interrupt() expects an AEOI	1487	* Subtle, code in do_timer_interrupt() expects an AEOI
@@ -1667,6 +1713,7 @@ int create_irq(void)
1667	int new;	1713	int new;
1668	int vector = 0;	1714	int vector = 0;
1669	unsigned long flags;	1715	unsigned long flags;
		1716	cpumask_t mask;
1670		1717
1671	irq = -ENOSPC;	1718	irq = -ENOSPC;
1672	spin_lock_irqsave(&vector_lock, flags);	1719	spin_lock_irqsave(&vector_lock, flags);
@@ -1675,7 +1722,7 @@ int create_irq(void)
1675	continue;	1722	continue;
1676	if (irq_vector[new] != 0)	1723	if (irq_vector[new] != 0)
1677	continue;	1724	continue;
1678	vector = __assign_irq_vector(new, TARGET_CPUS);	1725	vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
1679	if (likely(vector > 0))	1726	if (likely(vector > 0))
1680	irq = new;	1727	irq = new;
1681	break;	1728	break;
@@ -1707,13 +1754,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1707	{	1754	{
1708	int vector;	1755	int vector;
1709	unsigned dest;	1756	unsigned dest;
		1757	cpumask_t tmp;
1710		1758
1711	vector = assign_irq_vector(irq, TARGET_CPUS);	1759	vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1712	if (vector >= 0) {	1760	if (vector >= 0) {
1713	cpumask_t tmp;
1714
1715	cpus_clear(tmp);
1716	cpu_set(vector >> 8, tmp);
1717	dest = cpu_mask_to_apicid(tmp);	1761	dest = cpu_mask_to_apicid(tmp);
1718		1762
1719	msg->address_hi = MSI_ADDR_BASE_HI;	1763	msg->address_hi = MSI_ADDR_BASE_HI;
@@ -1752,12 +1796,10 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
1752		1796
1753	cpus_and(mask, tmp, CPU_MASK_ALL);	1797	cpus_and(mask, tmp, CPU_MASK_ALL);
1754		1798
1755	vector = assign_irq_vector(irq, mask);	1799	vector = assign_irq_vector(irq, mask, &tmp);
1756	if (vector < 0)	1800	if (vector < 0)
1757	return;	1801	return;
1758		1802
1759	cpus_clear(tmp);
1760	cpu_set(vector >> 8, tmp);
1761	dest = cpu_mask_to_apicid(tmp);	1803	dest = cpu_mask_to_apicid(tmp);
1762		1804
1763	read_msi_msg(irq, &msg);	1805	read_msi_msg(irq, &msg);
@@ -1844,12 +1886,10 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
1844		1886
1845	cpus_and(mask, tmp, CPU_MASK_ALL);	1887	cpus_and(mask, tmp, CPU_MASK_ALL);
1846		1888
1847	vector = assign_irq_vector(irq, mask);	1889	vector = assign_irq_vector(irq, mask, &tmp);
1848	if (vector < 0)	1890	if (vector < 0)
1849	return;	1891	return;
1850		1892
1851	cpus_clear(tmp);
1852	cpu_set(vector >> 8, tmp);
1853	dest = cpu_mask_to_apicid(tmp);	1893	dest = cpu_mask_to_apicid(tmp);
1854		1894
1855	target_ht_irq(irq, dest, vector & 0xff);	1895	target_ht_irq(irq, dest, vector & 0xff);
@@ -1871,15 +1911,13 @@ static struct hw_interrupt_type ht_irq_chip = {
1871	int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)	1911	int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
1872	{	1912	{
1873	int vector;	1913	int vector;
		1914	cpumask_t tmp;
1874		1915
1875	vector = assign_irq_vector(irq, TARGET_CPUS);	1916	vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
1876	if (vector >= 0) {	1917	if (vector >= 0) {
1877	u32 low, high;	1918	u32 low, high;
1878	unsigned dest;	1919	unsigned dest;
1879	cpumask_t tmp;
1880		1920
1881	cpus_clear(tmp);
1882	cpu_set(vector >> 8, tmp);
1883	dest = cpu_mask_to_apicid(tmp);	1921	dest = cpu_mask_to_apicid(tmp);
1884		1922
1885	high = HT_IRQ_HIGH_DEST_ID(dest);	1923	high = HT_IRQ_HIGH_DEST_ID(dest);
@@ -1945,13 +1983,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p
1945	add_pin_to_irq(irq, ioapic, pin);	1983	add_pin_to_irq(irq, ioapic, pin);
1946		1984
1947		1985
1948	vector = assign_irq_vector(irq, TARGET_CPUS);	1986	vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
1949	if (vector < 0)	1987	if (vector < 0)
1950	return vector;	1988	return vector;
1951		1989
1952	cpus_clear(mask);
1953	cpu_set(vector >> 8, mask);
1954
1955	/*	1990	/*
1956	* Generate a PCI IRQ routing entry and program the IOAPIC accordingly.	1991	* Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
1957	* Note that we mask (disable) IRQs now -- these get enabled when the	1992	* Note that we mask (disable) IRQs now -- these get enabled when the