diff options
author | Arnd Bergmann <arnd@arndb.de> | 2006-01-05 09:05:29 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-01-08 23:44:57 -0500 |
commit | 2fb9d2063626374dd8a2514b3a730facac8235d8 (patch) | |
tree | b410dcdbc5aee656c37951be36951130450549e7 | |
parent | aeb013772a2cc85a8d0baffd64977d2888bc781d (diff) |
[PATCH] spufs: set irq affinity for running threads
For far, all SPU triggered interrupts always end up on
the first SMT thread, which is a bad solution.
This patch implements setting the affinity to the
CPU that was running last when entering execution on
an SPU. This should result in a significant reduction
in IPI calls and better cache locality for SPE thread
specific data.
Signed-off-by: Arnd Bergmann <arndb@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/powerpc/platforms/cell/interrupt.c | 42 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/interrupt.h | 1 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 5 | ||||
-rw-r--r-- | include/asm-powerpc/spu.h | 1 |
5 files changed, 41 insertions, 16 deletions
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 7fbe78a9327d..63aa52acf441 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/config.h> | 23 | #include <linux/config.h> |
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/irq.h> | 25 | #include <linux/irq.h> |
26 | #include <linux/module.h> | ||
26 | #include <linux/percpu.h> | 27 | #include <linux/percpu.h> |
27 | #include <linux/types.h> | 28 | #include <linux/types.h> |
28 | 29 | ||
@@ -55,6 +56,7 @@ struct iic_regs { | |||
55 | 56 | ||
56 | struct iic { | 57 | struct iic { |
57 | struct iic_regs __iomem *regs; | 58 | struct iic_regs __iomem *regs; |
59 | u8 target_id; | ||
58 | }; | 60 | }; |
59 | 61 | ||
60 | static DEFINE_PER_CPU(struct iic, iic); | 62 | static DEFINE_PER_CPU(struct iic, iic); |
@@ -172,12 +174,11 @@ int iic_get_irq(struct pt_regs *regs) | |||
172 | return irq; | 174 | return irq; |
173 | } | 175 | } |
174 | 176 | ||
175 | static struct iic_regs __iomem *find_iic(int cpu) | 177 | static int setup_iic(int cpu, struct iic *iic) |
176 | { | 178 | { |
177 | struct device_node *np; | 179 | struct device_node *np; |
178 | int nodeid = cpu / 2; | 180 | int nodeid = cpu / 2; |
179 | unsigned long regs; | 181 | unsigned long regs; |
180 | struct iic_regs __iomem *iic_regs; | ||
181 | 182 | ||
182 | for (np = of_find_node_by_type(NULL, "cpu"); | 183 | for (np = of_find_node_by_type(NULL, "cpu"); |
183 | np; | 184 | np; |
@@ -188,20 +189,23 @@ static struct iic_regs __iomem *find_iic(int cpu) | |||
188 | 189 | ||
189 | if (!np) { | 190 | if (!np) { |
190 | printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); | 191 | printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); |
191 | iic_regs = NULL; | 192 | iic->regs = NULL; |
192 | } else { | 193 | iic->target_id = 0xff; |
193 | regs = *(long *)get_property(np, "iic", NULL); | 194 | return -ENODEV; |
194 | |||
195 | /* hack until we have decided on the devtree info */ | ||
196 | regs += 0x400; | ||
197 | if (cpu & 1) | ||
198 | regs += 0x20; | ||
199 | |||
200 | printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); | ||
201 | iic_regs = __ioremap(regs, sizeof(struct iic_regs), | ||
202 | _PAGE_NO_CACHE); | ||
203 | } | 195 | } |
204 | return iic_regs; | 196 | |
197 | regs = *(long *)get_property(np, "iic", NULL); | ||
198 | |||
199 | /* hack until we have decided on the devtree info */ | ||
200 | regs += 0x400; | ||
201 | if (cpu & 1) | ||
202 | regs += 0x20; | ||
203 | |||
204 | printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); | ||
205 | iic->regs = __ioremap(regs, sizeof(struct iic_regs), | ||
206 | _PAGE_NO_CACHE); | ||
207 | iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe); | ||
208 | return 0; | ||
205 | } | 209 | } |
206 | 210 | ||
207 | #ifdef CONFIG_SMP | 211 | #ifdef CONFIG_SMP |
@@ -227,6 +231,12 @@ void iic_cause_IPI(int cpu, int mesg) | |||
227 | out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4); | 231 | out_be64(&per_cpu(iic, cpu).regs->generate, (IIC_NUM_IPIS - 1 - mesg) << 4); |
228 | } | 232 | } |
229 | 233 | ||
234 | u8 iic_get_target_id(int cpu) | ||
235 | { | ||
236 | return per_cpu(iic, cpu).target_id; | ||
237 | } | ||
238 | EXPORT_SYMBOL_GPL(iic_get_target_id); | ||
239 | |||
230 | static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) | 240 | static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) |
231 | { | 241 | { |
232 | smp_message_recv(iic_irq_to_ipi(irq), regs); | 242 | smp_message_recv(iic_irq_to_ipi(irq), regs); |
@@ -276,7 +286,7 @@ void iic_init_IRQ(void) | |||
276 | irq_offset = 0; | 286 | irq_offset = 0; |
277 | for_each_cpu(cpu) { | 287 | for_each_cpu(cpu) { |
278 | iic = &per_cpu(iic, cpu); | 288 | iic = &per_cpu(iic, cpu); |
279 | iic->regs = find_iic(cpu); | 289 | setup_iic(cpu, iic); |
280 | if (iic->regs) | 290 | if (iic->regs) |
281 | out_be64(&iic->regs->prio, 0xff); | 291 | out_be64(&iic->regs->prio, 0xff); |
282 | } | 292 | } |
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 37d58e6fd0c6..a14bd38791c0 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h | |||
@@ -54,6 +54,7 @@ extern void iic_setup_cpu(void); | |||
54 | extern void iic_local_enable(void); | 54 | extern void iic_local_enable(void); |
55 | extern void iic_local_disable(void); | 55 | extern void iic_local_disable(void); |
56 | 56 | ||
57 | extern u8 iic_get_target_id(int cpu); | ||
57 | 58 | ||
58 | extern void spider_init_IRQ(void); | 59 | extern void spider_init_IRQ(void); |
59 | extern int spider_get_irq(unsigned long int_pending); | 60 | extern int spider_get_irq(unsigned long int_pending); |
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 7fe3fa3da0e9..d75ae03df686 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
@@ -507,6 +507,14 @@ int spu_irq_class_1_bottom(struct spu *spu) | |||
507 | return ret; | 507 | return ret; |
508 | } | 508 | } |
509 | 509 | ||
510 | void spu_irq_setaffinity(struct spu *spu, int cpu) | ||
511 | { | ||
512 | u64 target = iic_get_target_id(cpu); | ||
513 | u64 route = target << 48 | target << 32 | target << 16; | ||
514 | spu_int_route_set(spu, route); | ||
515 | } | ||
516 | EXPORT_SYMBOL_GPL(spu_irq_setaffinity); | ||
517 | |||
510 | static void __iomem * __init map_spe_prop(struct device_node *n, | 518 | static void __iomem * __init map_spe_prop(struct device_node *n, |
511 | const char *name) | 519 | const char *name) |
512 | { | 520 | { |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index c34198c29159..963182fbd1aa 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
@@ -357,6 +357,11 @@ int spu_activate(struct spu_context *ctx, u64 flags) | |||
357 | if (!spu) | 357 | if (!spu) |
358 | return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; | 358 | return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; |
359 | bind_context(spu, ctx); | 359 | bind_context(spu, ctx); |
360 | /* | ||
361 | * We're likely to wait for interrupts on the same | ||
362 | * CPU that we are now on, so send them here. | ||
363 | */ | ||
364 | spu_irq_setaffinity(spu, raw_smp_processor_id()); | ||
360 | put_active_spu(spu); | 365 | put_active_spu(spu); |
361 | return 0; | 366 | return 0; |
362 | } | 367 | } |
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index 692aa60e9903..38bacf2f6e0c 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h | |||
@@ -147,6 +147,7 @@ struct spu *spu_alloc(void); | |||
147 | void spu_free(struct spu *spu); | 147 | void spu_free(struct spu *spu); |
148 | int spu_irq_class_0_bottom(struct spu *spu); | 148 | int spu_irq_class_0_bottom(struct spu *spu); |
149 | int spu_irq_class_1_bottom(struct spu *spu); | 149 | int spu_irq_class_1_bottom(struct spu *spu); |
150 | void spu_irq_setaffinity(struct spu *spu, int cpu); | ||
150 | 151 | ||
151 | extern struct spufs_calls { | 152 | extern struct spufs_calls { |
152 | asmlinkage long (*create_thread)(const char __user *name, | 153 | asmlinkage long (*create_thread)(const char __user *name, |