aboutsummaryrefslogtreecommitdiffstats
path: root/arch/sparc/kernel/irq_64.c
diff options
context:
space:
mode:
authorHong H. Pham <hong.pham@windriver.com>2009-06-04 05:10:11 -0400
committerDavid S. Miller <davem@davemloft.net>2009-06-16 07:56:28 -0400
commit280ff97494e0fef4124bee5c52e39b23a18dd283 (patch)
treee906ca3c5e0a6238882d181ab5b01fb3f40ba5df /arch/sparc/kernel/irq_64.c
parent4fd78a5f1edf62ab1ca3d23efee4a8a336edb2b6 (diff)
sparc64: fix and optimize irq distribution
irq_choose_cpu() should compare the affinity mask against cpu_online_map rather than CPU_MASK_ALL, since irq_select_affinity() sets the interrupt's affinity mask to cpu_online_map "and" CPU_MASK_ALL (which ends up being just cpu_online_map). The mask comparison in irq_choose_cpu() will always fail since the two masks are not the same. So the CPU chosen is the first CPU in the intersection of cpu_online_map and CPU_MASK_ALL, which is always CPU0. That means all interrupts are reassigned to CPU0... Distributing interrupts to CPUs in a linearly increasing round robin fashion is not optimal for the UltraSPARC T1/T2. Also, the irq_rover in irq_choose_cpu() causes an interrupt to be assigned to a different processor each time the interrupt is allocated and released. This may lead to an unbalanced distribution over time. A static mapping of interrupts to processors is done to optimize and balance interrupt distribution. For the T1/T2, interrupts are spread to different cores first, and then to strands within a core. The following is some benchmarks showing the effects of interrupt distribution on a T2. The test was done with iperf using a pair of T5220 boxes, each with a 10GBe NIU (XAUI) connected back to back. TCP | Stock Linear RR IRQ Optimized IRQ Streams | 2.6.30-rc5 Distribution Distribution | GBits/sec GBits/sec GBits/sec --------+----------------------------------------- 1 0.839 0.862 0.868 8 1.16 4.96 5.88 16 1.15 6.40 8.04 100 1.09 7.28 8.68 Signed-off-by: Hong H. Pham <hong.pham@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel/irq_64.c')
-rw-r--r--arch/sparc/kernel/irq_64.c29
1 files changed, 4 insertions, 25 deletions
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index e5e78f9cfc95..bd075054942b 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -45,6 +45,7 @@
45#include <asm/cacheflush.h> 45#include <asm/cacheflush.h>
46 46
47#include "entry.h" 47#include "entry.h"
48#include "cpumap.h"
48 49
49#define NUM_IVECS (IMAP_INR + 1) 50#define NUM_IVECS (IMAP_INR + 1)
50 51
@@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq)
256 int cpuid; 257 int cpuid;
257 258
258 cpumask_copy(&mask, irq_desc[virt_irq].affinity); 259 cpumask_copy(&mask, irq_desc[virt_irq].affinity);
259 if (cpus_equal(mask, CPU_MASK_ALL)) { 260 if (cpus_equal(mask, cpu_online_map)) {
260 static int irq_rover; 261 cpuid = map_to_cpu(virt_irq);
261 static DEFINE_SPINLOCK(irq_rover_lock);
262 unsigned long flags;
263
264 /* Round-robin distribution... */
265 do_round_robin:
266 spin_lock_irqsave(&irq_rover_lock, flags);
267
268 while (!cpu_online(irq_rover)) {
269 if (++irq_rover >= nr_cpu_ids)
270 irq_rover = 0;
271 }
272 cpuid = irq_rover;
273 do {
274 if (++irq_rover >= nr_cpu_ids)
275 irq_rover = 0;
276 } while (!cpu_online(irq_rover));
277
278 spin_unlock_irqrestore(&irq_rover_lock, flags);
279 } else { 262 } else {
280 cpumask_t tmp; 263 cpumask_t tmp;
281 264
282 cpus_and(tmp, cpu_online_map, mask); 265 cpus_and(tmp, cpu_online_map, mask);
283 266 cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp);
284 if (cpus_empty(tmp))
285 goto do_round_robin;
286
287 cpuid = first_cpu(tmp);
288 } 267 }
289 268
290 return cpuid; 269 return cpuid;