aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAshok Raj <ashok.raj@intel.com>2005-09-06 18:16:15 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-07 19:57:15 -0400
commit54d5d42404e7705cf3804593189e963350d470e5 (patch)
tree7cf8a7fce163b19672193d8cf4ef6a7f6c131d9e
parentf63ed39c578a2a2d067356a85ce7c28a7c795d8a (diff)
[PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity
When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj <ashok.raj@intel.com> Acked-by: Zwane Mwaikambo <zwane@holomorphy.com> Grudgingly-acked-by: Andi Kleen <ak@muc.de> Signed-off-by: Coywolf Qi Hunt <coywolf@lovecn.org> Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/Kconfig5
-rw-r--r--arch/i386/kernel/io_apic.c55
-rw-r--r--arch/ia64/Kconfig5
-rw-r--r--arch/ia64/kernel/irq.c39
-rw-r--r--arch/x86_64/Kconfig5
-rw-r--r--arch/x86_64/kernel/io_apic.c102
-rw-r--r--drivers/pci/msi.c17
-rw-r--r--drivers/pci/msi.h5
-rw-r--r--include/asm-ia64/hw_irq.h7
-rw-r--r--include/asm-ia64/irq.h6
-rw-r--r--include/linux/irq.h123
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/irq/proc.c14
13 files changed, 253 insertions, 134 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 3b3b017e1c15..4b7de3e1e57b 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -1318,6 +1318,11 @@ config GENERIC_IRQ_PROBE
1318 bool 1318 bool
1319 default y 1319 default y
1320 1320
1321config GENERIC_PENDING_IRQ
1322 bool
1323 depends on GENERIC_HARDIRQS && SMP
1324 default y
1325
1321config X86_SMP 1326config X86_SMP
1322 bool 1327 bool
1323 depends on SMP && !X86_VOYAGER 1328 depends on SMP && !X86_VOYAGER
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 6578f40bd501..4a5940431579 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -33,6 +33,7 @@
33#include <linux/acpi.h> 33#include <linux/acpi.h>
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/sysdev.h> 35#include <linux/sysdev.h>
36
36#include <asm/io.h> 37#include <asm/io.h>
37#include <asm/smp.h> 38#include <asm/smp.h>
38#include <asm/desc.h> 39#include <asm/desc.h>
@@ -222,13 +223,21 @@ static void clear_IO_APIC (void)
222 clear_IO_APIC_pin(apic, pin); 223 clear_IO_APIC_pin(apic, pin);
223} 224}
224 225
226#ifdef CONFIG_SMP
225static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) 227static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
226{ 228{
227 unsigned long flags; 229 unsigned long flags;
228 int pin; 230 int pin;
229 struct irq_pin_list *entry = irq_2_pin + irq; 231 struct irq_pin_list *entry = irq_2_pin + irq;
230 unsigned int apicid_value; 232 unsigned int apicid_value;
233 cpumask_t tmp;
231 234
235 cpus_and(tmp, cpumask, cpu_online_map);
236 if (cpus_empty(tmp))
237 tmp = TARGET_CPUS;
238
239 cpus_and(cpumask, tmp, CPU_MASK_ALL);
240
232 apicid_value = cpu_mask_to_apicid(cpumask); 241 apicid_value = cpu_mask_to_apicid(cpumask);
233 /* Prepare to do the io_apic_write */ 242 /* Prepare to do the io_apic_write */
234 apicid_value = apicid_value << 24; 243 apicid_value = apicid_value << 24;
@@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
242 break; 251 break;
243 entry = irq_2_pin + entry->next; 252 entry = irq_2_pin + entry->next;
244 } 253 }
254 set_irq_info(irq, cpumask);
245 spin_unlock_irqrestore(&ioapic_lock, flags); 255 spin_unlock_irqrestore(&ioapic_lock, flags);
246} 256}
247 257
@@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
259# define Dprintk(x...) 269# define Dprintk(x...)
260# endif 270# endif
261 271
262cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS];
263 272
264#define IRQBALANCE_CHECK_ARCH -999 273#define IRQBALANCE_CHECK_ARCH -999
265static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; 274static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
@@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq)
328 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); 337 cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]);
329 new_cpu = move(cpu, allowed_mask, now, 1); 338 new_cpu = move(cpu, allowed_mask, now, 1);
330 if (cpu != new_cpu) { 339 if (cpu != new_cpu) {
331 irq_desc_t *desc = irq_desc + irq; 340 set_pending_irq(irq, cpumask_of_cpu(new_cpu));
332 unsigned long flags;
333
334 spin_lock_irqsave(&desc->lock, flags);
335 pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu);
336 spin_unlock_irqrestore(&desc->lock, flags);
337 } 341 }
338} 342}
339 343
@@ -528,16 +532,12 @@ tryanotherirq:
528 cpus_and(tmp, target_cpu_mask, allowed_mask); 532 cpus_and(tmp, target_cpu_mask, allowed_mask);
529 533
530 if (!cpus_empty(tmp)) { 534 if (!cpus_empty(tmp)) {
531 irq_desc_t *desc = irq_desc + selected_irq;
532 unsigned long flags;
533 535
534 Dprintk("irq = %d moved to cpu = %d\n", 536 Dprintk("irq = %d moved to cpu = %d\n",
535 selected_irq, min_loaded); 537 selected_irq, min_loaded);
536 /* mark for change destination */ 538 /* mark for change destination */
537 spin_lock_irqsave(&desc->lock, flags); 539 set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
538 pending_irq_balance_cpumask[selected_irq] = 540
539 cpumask_of_cpu(min_loaded);
540 spin_unlock_irqrestore(&desc->lock, flags);
541 /* Since we made a change, come back sooner to 541 /* Since we made a change, come back sooner to
542 * check for more variation. 542 * check for more variation.
543 */ 543 */
@@ -568,7 +568,8 @@ static int balanced_irq(void *unused)
568 568
569 /* push everything to CPU 0 to give us a starting point. */ 569 /* push everything to CPU 0 to give us a starting point. */
570 for (i = 0 ; i < NR_IRQS ; i++) { 570 for (i = 0 ; i < NR_IRQS ; i++) {
571 pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); 571 pending_irq_cpumask[i] = cpumask_of_cpu(0);
572 set_pending_irq(i, cpumask_of_cpu(0));
572 } 573 }
573 574
574 for ( ; ; ) { 575 for ( ; ; ) {
@@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str)
647 648
648__setup("noirqbalance", irqbalance_disable); 649__setup("noirqbalance", irqbalance_disable);
649 650
650static inline void move_irq(int irq)
651{
652 /* note - we hold the desc->lock */
653 if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) {
654 set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]);
655 cpus_clear(pending_irq_balance_cpumask[irq]);
656 }
657}
658
659late_initcall(balanced_irq_init); 651late_initcall(balanced_irq_init);
660
661#else /* !CONFIG_IRQBALANCE */
662static inline void move_irq(int irq) { }
663#endif /* CONFIG_IRQBALANCE */ 652#endif /* CONFIG_IRQBALANCE */
653#endif /* CONFIG_SMP */
664 654
665#ifndef CONFIG_SMP 655#ifndef CONFIG_SMP
666void fastcall send_IPI_self(int vector) 656void fastcall send_IPI_self(int vector)
@@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
820 * we need to reprogram the ioredtbls to cater for the cpus which have come online 810 * we need to reprogram the ioredtbls to cater for the cpus which have come online
821 * so mask in all cases should simply be TARGET_CPUS 811 * so mask in all cases should simply be TARGET_CPUS
822 */ 812 */
813#ifdef CONFIG_SMP
823void __init setup_ioapic_dest(void) 814void __init setup_ioapic_dest(void)
824{ 815{
825 int pin, ioapic, irq, irq_entry; 816 int pin, ioapic, irq, irq_entry;
@@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void)
838 829
839 } 830 }
840} 831}
832#endif
841 833
842/* 834/*
843 * EISA Edge/Level control register, ELCR 835 * EISA Edge/Level control register, ELCR
@@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void)
1249 spin_lock_irqsave(&ioapic_lock, flags); 1241 spin_lock_irqsave(&ioapic_lock, flags);
1250 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 1242 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1251 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 1243 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1244 set_native_irq_info(irq, TARGET_CPUS);
1252 spin_unlock_irqrestore(&ioapic_lock, flags); 1245 spin_unlock_irqrestore(&ioapic_lock, flags);
1253 } 1246 }
1254 } 1247 }
@@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
1944{ 1937{
1945 int irq = vector_to_irq(vector); 1938 int irq = vector_to_irq(vector);
1946 1939
1940 move_irq(vector);
1947 ack_edge_ioapic_irq(irq); 1941 ack_edge_ioapic_irq(irq);
1948} 1942}
1949 1943
@@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector)
1958{ 1952{
1959 int irq = vector_to_irq(vector); 1953 int irq = vector_to_irq(vector);
1960 1954
1955 move_irq(vector);
1961 end_level_ioapic_irq(irq); 1956 end_level_ioapic_irq(irq);
1962} 1957}
1963 1958
@@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
1975 unmask_IO_APIC_irq(irq); 1970 unmask_IO_APIC_irq(irq);
1976} 1971}
1977 1972
1973#ifdef CONFIG_SMP
1978static void set_ioapic_affinity_vector (unsigned int vector, 1974static void set_ioapic_affinity_vector (unsigned int vector,
1979 cpumask_t cpu_mask) 1975 cpumask_t cpu_mask)
1980{ 1976{
1981 int irq = vector_to_irq(vector); 1977 int irq = vector_to_irq(vector);
1982 1978
1979 set_native_irq_info(vector, cpu_mask);
1983 set_ioapic_affinity_irq(irq, cpu_mask); 1980 set_ioapic_affinity_irq(irq, cpu_mask);
1984} 1981}
1985#endif 1982#endif
1983#endif
1986 1984
1987/* 1985/*
1988 * Level and edge triggered IO-APIC interrupts need different handling, 1986 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2000,7 +1998,9 @@ static struct hw_interrupt_type ioapic_edge_type = {
2000 .disable = disable_edge_ioapic, 1998 .disable = disable_edge_ioapic,
2001 .ack = ack_edge_ioapic, 1999 .ack = ack_edge_ioapic,
2002 .end = end_edge_ioapic, 2000 .end = end_edge_ioapic,
2001#ifdef CONFIG_SMP
2003 .set_affinity = set_ioapic_affinity, 2002 .set_affinity = set_ioapic_affinity,
2003#endif
2004}; 2004};
2005 2005
2006static struct hw_interrupt_type ioapic_level_type = { 2006static struct hw_interrupt_type ioapic_level_type = {
@@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = {
2011 .disable = disable_level_ioapic, 2011 .disable = disable_level_ioapic,
2012 .ack = mask_and_ack_level_ioapic, 2012 .ack = mask_and_ack_level_ioapic,
2013 .end = end_level_ioapic, 2013 .end = end_level_ioapic,
2014#ifdef CONFIG_SMP
2014 .set_affinity = set_ioapic_affinity, 2015 .set_affinity = set_ioapic_affinity,
2016#endif
2015}; 2017};
2016 2018
2017static inline void init_IO_APIC_traps(void) 2019static inline void init_IO_APIC_traps(void)
@@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2569 spin_lock_irqsave(&ioapic_lock, flags); 2571 spin_lock_irqsave(&ioapic_lock, flags);
2570 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 2572 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2571 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); 2573 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2574 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2572 spin_unlock_irqrestore(&ioapic_lock, flags); 2575 spin_unlock_irqrestore(&ioapic_lock, flags);
2573 2576
2574 return 0; 2577 return 0;
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3deced637f07..17b5dbf8c311 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -434,6 +434,11 @@ config GENERIC_IRQ_PROBE
434 bool 434 bool
435 default y 435 default y
436 436
437config GENERIC_PENDING_IRQ
438 bool
439 depends on GENERIC_HARDIRQS && SMP
440 default y
441
437source "arch/ia64/hp/sim/Kconfig" 442source "arch/ia64/hp/sim/Kconfig"
438 443
439source "arch/ia64/oprofile/Kconfig" 444source "arch/ia64/oprofile/Kconfig"
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 28f2aadc38d0..205d98028261 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -91,23 +91,8 @@ skip:
91} 91}
92 92
93#ifdef CONFIG_SMP 93#ifdef CONFIG_SMP
94/*
95 * This is updated when the user sets irq affinity via /proc
96 */
97static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
98static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
99
100static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; 94static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
101 95
102/*
103 * Arch specific routine for deferred write to iosapic rte to reprogram
104 * intr destination.
105 */
106void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
107{
108 pending_irq_cpumask[irq] = mask_val;
109}
110
111void set_irq_affinity_info (unsigned int irq, int hwid, int redir) 96void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
112{ 97{
113 cpumask_t mask = CPU_MASK_NONE; 98 cpumask_t mask = CPU_MASK_NONE;
@@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
116 101
117 if (irq < NR_IRQS) { 102 if (irq < NR_IRQS) {
118 irq_affinity[irq] = mask; 103 irq_affinity[irq] = mask;
104 set_irq_info(irq, mask);
119 irq_redir[irq] = (char) (redir & 0xff); 105 irq_redir[irq] = (char) (redir & 0xff);
120 } 106 }
121} 107}
122
123
124void move_irq(int irq)
125{
126 /* note - we hold desc->lock */
127 cpumask_t tmp;
128 irq_desc_t *desc = irq_descp(irq);
129 int redir = test_bit(irq, pending_irq_redir);
130
131 if (unlikely(!desc->handler->set_affinity))
132 return;
133
134 if (!cpus_empty(pending_irq_cpumask[irq])) {
135 cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
136 if (unlikely(!cpus_empty(tmp))) {
137 desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
138 pending_irq_cpumask[irq]);
139 }
140 cpus_clear(pending_irq_cpumask[irq]);
141 }
142}
143
144
145#endif /* CONFIG_SMP */ 108#endif /* CONFIG_SMP */
146 109
147#ifdef CONFIG_HOTPLUG_CPU 110#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 75e52c57f19c..251ce7cf1a38 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -441,6 +441,11 @@ config ISA_DMA_API
441 bool 441 bool
442 default y 442 default y
443 443
444config GENERIC_PENDING_IRQ
445 bool
446 depends on GENERIC_HARDIRQS && SMP
447 default y
448
444menu "Power management options" 449menu "Power management options"
445 450
446source kernel/power/Kconfig 451source kernel/power/Kconfig
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index d206d7e49cf5..76bcc4e6979d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
78#define vector_to_irq(vector) (vector) 78#define vector_to_irq(vector) (vector)
79#endif 79#endif
80 80
81#define __DO_ACTION(R, ACTION, FINAL) \
82 \
83{ \
84 int pin; \
85 struct irq_pin_list *entry = irq_2_pin + irq; \
86 \
87 for (;;) { \
88 unsigned int reg; \
89 pin = entry->pin; \
90 if (pin == -1) \
91 break; \
92 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
93 reg ACTION; \
94 io_apic_modify(entry->apic, reg); \
95 if (!entry->next) \
96 break; \
97 entry = irq_2_pin + entry->next; \
98 } \
99 FINAL; \
100}
101
102#ifdef CONFIG_SMP
103static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
104{
105 unsigned long flags;
106 unsigned int dest;
107 cpumask_t tmp;
108
109 cpus_and(tmp, mask, cpu_online_map);
110 if (cpus_empty(tmp))
111 tmp = TARGET_CPUS;
112
113 cpus_and(mask, tmp, CPU_MASK_ALL);
114
115 dest = cpu_mask_to_apicid(mask);
116
117 /*
118 * Only the high 8 bits are valid.
119 */
120 dest = SET_APIC_LOGICAL_ID(dest);
121
122 spin_lock_irqsave(&ioapic_lock, flags);
123 __DO_ACTION(1, = dest, )
124 set_irq_info(irq, mask);
125 spin_unlock_irqrestore(&ioapic_lock, flags);
126}
127#endif
128
81/* 129/*
82 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 130 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
83 * shared ISA-space IRQs, so we have to support them. We are super 131 * shared ISA-space IRQs, so we have to support them. We are super
@@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
101 entry->pin = pin; 149 entry->pin = pin;
102} 150}
103 151
104#define __DO_ACTION(R, ACTION, FINAL) \
105 \
106{ \
107 int pin; \
108 struct irq_pin_list *entry = irq_2_pin + irq; \
109 \
110 for (;;) { \
111 unsigned int reg; \
112 pin = entry->pin; \
113 if (pin == -1) \
114 break; \
115 reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
116 reg ACTION; \
117 io_apic_modify(entry->apic, reg); \
118 if (!entry->next) \
119 break; \
120 entry = irq_2_pin + entry->next; \
121 } \
122 FINAL; \
123}
124 152
125#define DO_ACTION(name,R,ACTION, FINAL) \ 153#define DO_ACTION(name,R,ACTION, FINAL) \
126 \ 154 \
@@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void)
767 spin_lock_irqsave(&ioapic_lock, flags); 795 spin_lock_irqsave(&ioapic_lock, flags);
768 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); 796 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
769 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); 797 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
798 set_native_irq_info(irq, TARGET_CPUS);
770 spin_unlock_irqrestore(&ioapic_lock, flags); 799 spin_unlock_irqrestore(&ioapic_lock, flags);
771 } 800 }
772 } 801 }
@@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
1314 */ 1343 */
1315static void ack_edge_ioapic_irq(unsigned int irq) 1344static void ack_edge_ioapic_irq(unsigned int irq)
1316{ 1345{
1346 move_irq(irq);
1317 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) 1347 if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
1318 == (IRQ_PENDING | IRQ_DISABLED)) 1348 == (IRQ_PENDING | IRQ_DISABLED))
1319 mask_IO_APIC_irq(irq); 1349 mask_IO_APIC_irq(irq);
@@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq)
1343 1373
1344static void end_level_ioapic_irq (unsigned int irq) 1374static void end_level_ioapic_irq (unsigned int irq)
1345{ 1375{
1376 move_irq(irq);
1346 ack_APIC_irq(); 1377 ack_APIC_irq();
1347} 1378}
1348 1379
1349static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1350{
1351 unsigned long flags;
1352 unsigned int dest;
1353
1354 dest = cpu_mask_to_apicid(mask);
1355
1356 /*
1357 * Only the high 8 bits are valid.
1358 */
1359 dest = SET_APIC_LOGICAL_ID(dest);
1360
1361 spin_lock_irqsave(&ioapic_lock, flags);
1362 __DO_ACTION(1, = dest, )
1363 spin_unlock_irqrestore(&ioapic_lock, flags);
1364}
1365
1366#ifdef CONFIG_PCI_MSI 1380#ifdef CONFIG_PCI_MSI
1367static unsigned int startup_edge_ioapic_vector(unsigned int vector) 1381static unsigned int startup_edge_ioapic_vector(unsigned int vector)
1368{ 1382{
@@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
1375{ 1389{
1376 int irq = vector_to_irq(vector); 1390 int irq = vector_to_irq(vector);
1377 1391
1392 move_native_irq(vector);
1378 ack_edge_ioapic_irq(irq); 1393 ack_edge_ioapic_irq(irq);
1379} 1394}
1380 1395
@@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector)
1389{ 1404{
1390 int irq = vector_to_irq(vector); 1405 int irq = vector_to_irq(vector);
1391 1406
1407 move_native_irq(vector);
1392 end_level_ioapic_irq(irq); 1408 end_level_ioapic_irq(irq);
1393} 1409}
1394 1410
@@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
1406 unmask_IO_APIC_irq(irq); 1422 unmask_IO_APIC_irq(irq);
1407} 1423}
1408 1424
1425#ifdef CONFIG_SMP
1409static void set_ioapic_affinity_vector (unsigned int vector, 1426static void set_ioapic_affinity_vector (unsigned int vector,
1410 cpumask_t cpu_mask) 1427 cpumask_t cpu_mask)
1411{ 1428{
1412 int irq = vector_to_irq(vector); 1429 int irq = vector_to_irq(vector);
1413 1430
1431 set_native_irq_info(vector, cpu_mask);
1414 set_ioapic_affinity_irq(irq, cpu_mask); 1432 set_ioapic_affinity_irq(irq, cpu_mask);
1415} 1433}
1416#endif 1434#endif // CONFIG_SMP
1435#endif // CONFIG_PCI_MSI
1417 1436
1418/* 1437/*
1419 * Level and edge triggered IO-APIC interrupts need different handling, 1438 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = {
1432 .disable = disable_edge_ioapic, 1451 .disable = disable_edge_ioapic,
1433 .ack = ack_edge_ioapic, 1452 .ack = ack_edge_ioapic,
1434 .end = end_edge_ioapic, 1453 .end = end_edge_ioapic,
1454#ifdef CONFIG_SMP
1435 .set_affinity = set_ioapic_affinity, 1455 .set_affinity = set_ioapic_affinity,
1456#endif
1436}; 1457};
1437 1458
1438static struct hw_interrupt_type ioapic_level_type = { 1459static struct hw_interrupt_type ioapic_level_type = {
@@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = {
1443 .disable = disable_level_ioapic, 1464 .disable = disable_level_ioapic,
1444 .ack = mask_and_ack_level_ioapic, 1465 .ack = mask_and_ack_level_ioapic,
1445 .end = end_level_ioapic, 1466 .end = end_level_ioapic,
1467#ifdef CONFIG_SMP
1446 .set_affinity = set_ioapic_affinity, 1468 .set_affinity = set_ioapic_affinity,
1469#endif
1447}; 1470};
1448 1471
1449static inline void init_IO_APIC_traps(void) 1472static inline void init_IO_APIC_traps(void)
@@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
1918 spin_lock_irqsave(&ioapic_lock, flags); 1941 spin_lock_irqsave(&ioapic_lock, flags);
1919 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); 1942 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
1920 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); 1943 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
1944 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
1921 spin_unlock_irqrestore(&ioapic_lock, flags); 1945 spin_unlock_irqrestore(&ioapic_lock, flags);
1922 1946
1923 return 0; 1947 return 0;
@@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
1931 * we need to reprogram the ioredtbls to cater for the cpus which have come online 1955 * we need to reprogram the ioredtbls to cater for the cpus which have come online
1932 * so mask in all cases should simply be TARGET_CPUS 1956 * so mask in all cases should simply be TARGET_CPUS
1933 */ 1957 */
1958#ifdef CONFIG_SMP
1934void __init setup_ioapic_dest(void) 1959void __init setup_ioapic_dest(void)
1935{ 1960{
1936 int pin, ioapic, irq, irq_entry; 1961 int pin, ioapic, irq, irq_entry;
@@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void)
1949 1974
1950 } 1975 }
1951} 1976}
1977#endif
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2b85aa39f954..532f73bb2224 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -91,6 +91,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
91{ 91{
92 struct msi_desc *entry; 92 struct msi_desc *entry;
93 struct msg_address address; 93 struct msg_address address;
94 unsigned int irq = vector;
94 95
95 entry = (struct msi_desc *)msi_desc[vector]; 96 entry = (struct msi_desc *)msi_desc[vector];
96 if (!entry || !entry->dev) 97 if (!entry || !entry->dev)
@@ -112,6 +113,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
112 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); 113 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask);
113 pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), 114 pci_write_config_dword(entry->dev, msi_lower_address_reg(pos),
114 address.lo_address.value); 115 address.lo_address.value);
116 set_native_irq_info(irq, cpu_mask);
115 break; 117 break;
116 } 118 }
117 case PCI_CAP_ID_MSIX: 119 case PCI_CAP_ID_MSIX:
@@ -125,22 +127,13 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask)
125 MSI_TARGET_CPU_SHIFT); 127 MSI_TARGET_CPU_SHIFT);
126 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); 128 entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask);
127 writel(address.lo_address.value, entry->mask_base + offset); 129 writel(address.lo_address.value, entry->mask_base + offset);
130 set_native_irq_info(irq, cpu_mask);
128 break; 131 break;
129 } 132 }
130 default: 133 default:
131 break; 134 break;
132 } 135 }
133} 136}
134
135#ifdef CONFIG_IRQBALANCE
136static inline void move_msi(int vector)
137{
138 if (!cpus_empty(pending_irq_balance_cpumask[vector])) {
139 set_msi_affinity(vector, pending_irq_balance_cpumask[vector]);
140 cpus_clear(pending_irq_balance_cpumask[vector]);
141 }
142}
143#endif /* CONFIG_IRQBALANCE */
144#endif /* CONFIG_SMP */ 137#endif /* CONFIG_SMP */
145 138
146static void mask_MSI_irq(unsigned int vector) 139static void mask_MSI_irq(unsigned int vector)
@@ -191,13 +184,13 @@ static void shutdown_msi_irq(unsigned int vector)
191 184
192static void end_msi_irq_wo_maskbit(unsigned int vector) 185static void end_msi_irq_wo_maskbit(unsigned int vector)
193{ 186{
194 move_msi(vector); 187 move_native_irq(vector);
195 ack_APIC_irq(); 188 ack_APIC_irq();
196} 189}
197 190
198static void end_msi_irq_w_maskbit(unsigned int vector) 191static void end_msi_irq_w_maskbit(unsigned int vector)
199{ 192{
200 move_msi(vector); 193 move_native_irq(vector);
201 unmask_MSI_irq(vector); 194 unmask_MSI_irq(vector);
202 ack_APIC_irq(); 195 ack_APIC_irq();
203} 196}
diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h
index 390f1851c0f1..402136a5c9e4 100644
--- a/drivers/pci/msi.h
+++ b/drivers/pci/msi.h
@@ -19,7 +19,6 @@
19#define NR_HP_RESERVED_VECTORS 20 19#define NR_HP_RESERVED_VECTORS 20
20 20
21extern int vector_irq[NR_VECTORS]; 21extern int vector_irq[NR_VECTORS];
22extern cpumask_t pending_irq_balance_cpumask[NR_IRQS];
23extern void (*interrupt[NR_IRQS])(void); 22extern void (*interrupt[NR_IRQS])(void);
24extern int pci_vector_resources(int last, int nr_released); 23extern int pci_vector_resources(int last, int nr_released);
25 24
@@ -29,10 +28,6 @@ extern int pci_vector_resources(int last, int nr_released);
29#define set_msi_irq_affinity NULL 28#define set_msi_irq_affinity NULL
30#endif 29#endif
31 30
32#ifndef CONFIG_IRQBALANCE
33static inline void move_msi(int vector) {}
34#endif
35
36/* 31/*
37 * MSI-X Address Register 32 * MSI-X Address Register
38 */ 33 */
diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h
index 041ab8c51a64..0cf119b42f7d 100644
--- a/include/asm-ia64/hw_irq.h
+++ b/include/asm-ia64/hw_irq.h
@@ -116,13 +116,6 @@ __ia64_local_vector_to_irq (ia64_vector vec)
116 * and to obtain the irq descriptor for a given irq number. 116 * and to obtain the irq descriptor for a given irq number.
117 */ 117 */
118 118
119/* Return a pointer to the irq descriptor for IRQ. */
120static inline irq_desc_t *
121irq_descp (int irq)
122{
123 return irq_desc + irq;
124}
125
126/* Extract the IA-64 vector that corresponds to IRQ. */ 119/* Extract the IA-64 vector that corresponds to IRQ. */
127static inline ia64_vector 120static inline ia64_vector
128irq_to_vector (int irq) 121irq_to_vector (int irq)
diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h
index bd07d11d9f37..5d930fdc0bea 100644
--- a/include/asm-ia64/irq.h
+++ b/include/asm-ia64/irq.h
@@ -30,12 +30,6 @@ extern void disable_irq_nosync (unsigned int);
30extern void enable_irq (unsigned int); 30extern void enable_irq (unsigned int);
31extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); 31extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
32 32
33#ifdef CONFIG_SMP
34extern void move_irq(int irq);
35#else
36#define move_irq(irq)
37#endif
38
39struct irqaction; 33struct irqaction;
40struct pt_regs; 34struct pt_regs;
41int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); 35int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 069d3b84d311..4a362b9ec966 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -71,16 +71,139 @@ typedef struct irq_desc {
71 unsigned int irq_count; /* For detecting broken interrupts */ 71 unsigned int irq_count; /* For detecting broken interrupts */
72 unsigned int irqs_unhandled; 72 unsigned int irqs_unhandled;
73 spinlock_t lock; 73 spinlock_t lock;
74#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
75 unsigned int move_irq; /* Flag need to re-target intr dest*/
76#endif
74} ____cacheline_aligned irq_desc_t; 77} ____cacheline_aligned irq_desc_t;
75 78
76extern irq_desc_t irq_desc [NR_IRQS]; 79extern irq_desc_t irq_desc [NR_IRQS];
77 80
81/* Return a pointer to the irq descriptor for IRQ. */
82static inline irq_desc_t *
83irq_descp (int irq)
84{
85 return irq_desc + irq;
86}
87
78#include <asm/hw_irq.h> /* the arch dependent stuff */ 88#include <asm/hw_irq.h> /* the arch dependent stuff */
79 89
80extern int setup_irq(unsigned int irq, struct irqaction * new); 90extern int setup_irq(unsigned int irq, struct irqaction * new);
81 91
82#ifdef CONFIG_GENERIC_HARDIRQS 92#ifdef CONFIG_GENERIC_HARDIRQS
83extern cpumask_t irq_affinity[NR_IRQS]; 93extern cpumask_t irq_affinity[NR_IRQS];
94
95#ifdef CONFIG_SMP
96static inline void set_native_irq_info(int irq, cpumask_t mask)
97{
98 irq_affinity[irq] = mask;
99}
100#else
101static inline void set_native_irq_info(int irq, cpumask_t mask)
102{
103}
104#endif
105
106#ifdef CONFIG_SMP
107
108#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
109extern cpumask_t pending_irq_cpumask[NR_IRQS];
110
111static inline void set_pending_irq(unsigned int irq, cpumask_t mask)
112{
113 irq_desc_t *desc = irq_desc + irq;
114 unsigned long flags;
115
116 spin_lock_irqsave(&desc->lock, flags);
117 desc->move_irq = 1;
118 pending_irq_cpumask[irq] = mask;
119 spin_unlock_irqrestore(&desc->lock, flags);
120}
121
122static inline void
123move_native_irq(int irq)
124{
125 cpumask_t tmp;
126 irq_desc_t *desc = irq_descp(irq);
127
128 if (likely (!desc->move_irq))
129 return;
130
131 desc->move_irq = 0;
132
133 if (likely(cpus_empty(pending_irq_cpumask[irq])))
134 return;
135
136 if (!desc->handler->set_affinity)
137 return;
138
139 /* note - we hold the desc->lock */
140 cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
141
142 /*
143 * If there was a valid mask to work with, please
144 * do the disable, re-program, enable sequence.
145 * This is *not* particularly important for level triggered
146 * but in a edge trigger case, we might be setting rte
147 * when an active trigger is comming in. This could
148 * cause some ioapics to mal-function.
149 * Being paranoid i guess!
150 */
151 if (unlikely(!cpus_empty(tmp))) {
152 desc->handler->disable(irq);
153 desc->handler->set_affinity(irq,tmp);
154 desc->handler->enable(irq);
155 }
156 cpus_clear(pending_irq_cpumask[irq]);
157}
158
159#ifdef CONFIG_PCI_MSI
160/*
161 * Wonder why these are dummies?
162 * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq()
163 * counter part after translating the vector to irq info. We need to perform
164 * this operation on the real irq, when we dont use vector, i.e when
165 * pci_use_vector() is false.
166 */
167static inline void move_irq(int irq)
168{
169}
170
171static inline void set_irq_info(int irq, cpumask_t mask)
172{
173}
174
175#else // CONFIG_PCI_MSI
176
177static inline void move_irq(int irq)
178{
179 move_native_irq(irq);
180}
181
182static inline void set_irq_info(int irq, cpumask_t mask)
183{
184 set_native_irq_info(irq, mask);
185}
186#endif // CONFIG_PCI_MSI
187
188#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE
189
190#define move_irq(x)
191#define move_native_irq(x)
192#define set_pending_irq(x,y)
193static inline void set_irq_info(int irq, cpumask_t mask)
194{
195 set_native_irq_info(irq, mask);
196}
197
198#endif // CONFIG_GENERIC_PENDING_IRQ
199
200#else // CONFIG_SMP
201
202#define move_irq(x)
203#define move_native_irq(x)
204
205#endif // CONFIG_SMP
206
84extern int no_irq_affinity; 207extern int no_irq_affinity;
85extern int noirqdebug_setup(char *str); 208extern int noirqdebug_setup(char *str);
86 209
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac6700985705..1cfdb08ddf20 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -18,6 +18,10 @@
18 18
19cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; 19cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
20 20
21#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
22cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
23#endif
24
21/** 25/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 26 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * 27 *
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 85d08daa6600..f26e534c6585 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
19 */ 19 */
20static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; 20static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
21 21
22void __attribute__((weak)) 22#ifdef CONFIG_GENERIC_PENDING_IRQ
23proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) 23void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
24{
25 /*
26 * Save these away for later use. Re-progam when the
27 * interrupt is pending
28 */
29 set_pending_irq(irq, mask_val);
30}
31#else
32void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
24{ 33{
25 irq_affinity[irq] = mask_val; 34 irq_affinity[irq] = mask_val;
26 irq_desc[irq].handler->set_affinity(irq, mask_val); 35 irq_desc[irq].handler->set_affinity(irq, mask_val);
27} 36}
37#endif
28 38
29static int irq_affinity_read_proc(char *page, char **start, off_t off, 39static int irq_affinity_read_proc(char *page, char **start, off_t off,
30 int count, int *eof, void *data) 40 int count, int *eof, void *data)