diff options
author | Hong H. Pham <hong.pham@windriver.com> | 2009-06-04 05:10:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-06-16 07:56:28 -0400 |
commit | 280ff97494e0fef4124bee5c52e39b23a18dd283 (patch) | |
tree | e906ca3c5e0a6238882d181ab5b01fb3f40ba5df /arch/sparc/kernel/irq_64.c | |
parent | 4fd78a5f1edf62ab1ca3d23efee4a8a336edb2b6 (diff) |
sparc64: fix and optimize irq distribution
irq_choose_cpu() should compare the affinity mask against cpu_online_map
rather than CPU_MASK_ALL, since irq_select_affinity() sets the interrupt's
affinity mask to cpu_online_map "and" CPU_MASK_ALL (which ends up being
just cpu_online_map). The mask comparison in irq_choose_cpu() will always
fail since the two masks are not the same. So the CPU chosen is the first CPU
in the intersection of cpu_online_map and CPU_MASK_ALL, which is always CPU0.
That means all interrupts are reassigned to CPU0...
Distributing interrupts to CPUs in a linearly increasing round robin fashion
is not optimal for the UltraSPARC T1/T2. Also, the irq_rover in
irq_choose_cpu() causes an interrupt to be assigned to a different
processor each time the interrupt is allocated and released. This may lead
to an unbalanced distribution over time.
A static mapping of interrupts to processors is done to optimize and balance
interrupt distribution. For the T1/T2, interrupts are spread to different
cores first, and then to strands within a core.
The following is some benchmarks showing the effects of interrupt
distribution on a T2. The test was done with iperf using a pair of T5220
boxes, each with a 10GBe NIU (XAUI) connected back to back.
TCP | Stock Linear RR IRQ Optimized IRQ
Streams | 2.6.30-rc5 Distribution Distribution
| GBits/sec GBits/sec GBits/sec
--------+-----------------------------------------
1 0.839 0.862 0.868
8 1.16 4.96 5.88
16 1.15 6.40 8.04
100 1.09 7.28 8.68
Signed-off-by: Hong H. Pham <hong.pham@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/kernel/irq_64.c')
-rw-r--r-- | arch/sparc/kernel/irq_64.c | 29 |
1 files changed, 4 insertions, 25 deletions
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e5e78f9cfc95..bd075054942b 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/cacheflush.h> | 45 | #include <asm/cacheflush.h> |
46 | 46 | ||
47 | #include "entry.h" | 47 | #include "entry.h" |
48 | #include "cpumap.h" | ||
48 | 49 | ||
49 | #define NUM_IVECS (IMAP_INR + 1) | 50 | #define NUM_IVECS (IMAP_INR + 1) |
50 | 51 | ||
@@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq) | |||
256 | int cpuid; | 257 | int cpuid; |
257 | 258 | ||
258 | cpumask_copy(&mask, irq_desc[virt_irq].affinity); | 259 | cpumask_copy(&mask, irq_desc[virt_irq].affinity); |
259 | if (cpus_equal(mask, CPU_MASK_ALL)) { | 260 | if (cpus_equal(mask, cpu_online_map)) { |
260 | static int irq_rover; | 261 | cpuid = map_to_cpu(virt_irq); |
261 | static DEFINE_SPINLOCK(irq_rover_lock); | ||
262 | unsigned long flags; | ||
263 | |||
264 | /* Round-robin distribution... */ | ||
265 | do_round_robin: | ||
266 | spin_lock_irqsave(&irq_rover_lock, flags); | ||
267 | |||
268 | while (!cpu_online(irq_rover)) { | ||
269 | if (++irq_rover >= nr_cpu_ids) | ||
270 | irq_rover = 0; | ||
271 | } | ||
272 | cpuid = irq_rover; | ||
273 | do { | ||
274 | if (++irq_rover >= nr_cpu_ids) | ||
275 | irq_rover = 0; | ||
276 | } while (!cpu_online(irq_rover)); | ||
277 | |||
278 | spin_unlock_irqrestore(&irq_rover_lock, flags); | ||
279 | } else { | 262 | } else { |
280 | cpumask_t tmp; | 263 | cpumask_t tmp; |
281 | 264 | ||
282 | cpus_and(tmp, cpu_online_map, mask); | 265 | cpus_and(tmp, cpu_online_map, mask); |
283 | 266 | cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp); | |
284 | if (cpus_empty(tmp)) | ||
285 | goto do_round_robin; | ||
286 | |||
287 | cpuid = first_cpu(tmp); | ||
288 | } | 267 | } |
289 | 268 | ||
290 | return cpuid; | 269 | return cpuid; |