aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Nutter <mnutter@us.ibm.com>2006-10-04 11:26:12 -0400
committerPaul Mackerras <paulus@samba.org>2006-10-04 19:21:00 -0400
commita68cf983f635930ea35f9e96b27d96598550dea0 (patch)
tree531ea91b3f293130a0828c7579c514830b31e9e1
parent27d5bf2a35c0762f1358e9ef39776733cd942121 (diff)
[POWERPC] spufs: scheduler support for NUMA.
This patch adds NUMA support to the the spufs scheduler. The new arch/powerpc/platforms/cell/spufs/sched.c is greatly simplified, in an attempt to reduce complexity while adding support for NUMA scheduler domains. SPUs are allocated starting from the calling thread's node, moving to others as supported by current->cpus_allowed. Preemption is gone as it was buggy, but should be re-enabled in another patch when stable. The new arch/powerpc/platforms/cell/spu_base.c maintains idle lists on a per-node basis, and allows caller to specify which node(s) an SPU should be allocated from, while passing -1 tells spu_alloc() that any node is allowed. Since the patch removes the currently implemented preemptive scheduling, it is technically a regression, but practically all users have since migrated to this version, as it is part of the IBM SDK and the yellowdog distribution, so there is not much point holding it back while the new preemptive scheduling patch gets delayed further. Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c51
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c444
-rw-r--r--include/asm-powerpc/spu.h1
3 files changed, 198 insertions, 298 deletions
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index f78680346e5f..5931973845b1 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -317,7 +317,7 @@ static void spu_free_irqs(struct spu *spu)
317 free_irq(spu->irqs[2], spu); 317 free_irq(spu->irqs[2], spu);
318} 318}
319 319
320static LIST_HEAD(spu_list); 320static struct list_head spu_list[MAX_NUMNODES];
321static DEFINE_MUTEX(spu_mutex); 321static DEFINE_MUTEX(spu_mutex);
322 322
323static void spu_init_channels(struct spu *spu) 323static void spu_init_channels(struct spu *spu)
@@ -354,32 +354,42 @@ static void spu_init_channels(struct spu *spu)
354 } 354 }
355} 355}
356 356
357struct spu *spu_alloc(void) 357struct spu *spu_alloc_node(int node)
358{ 358{
359 struct spu *spu; 359 struct spu *spu = NULL;
360 360
361 mutex_lock(&spu_mutex); 361 mutex_lock(&spu_mutex);
362 if (!list_empty(&spu_list)) { 362 if (!list_empty(&spu_list[node])) {
363 spu = list_entry(spu_list.next, struct spu, list); 363 spu = list_entry(spu_list[node].next, struct spu, list);
364 list_del_init(&spu->list); 364 list_del_init(&spu->list);
365 pr_debug("Got SPU %x %d\n", spu->isrc, spu->number); 365 pr_debug("Got SPU %x %d %d\n",
366 } else { 366 spu->isrc, spu->number, spu->node);
367 pr_debug("No SPU left\n"); 367 spu_init_channels(spu);
368 spu = NULL;
369 } 368 }
370 mutex_unlock(&spu_mutex); 369 mutex_unlock(&spu_mutex);
371 370
372 if (spu) 371 return spu;
373 spu_init_channels(spu); 372}
373EXPORT_SYMBOL_GPL(spu_alloc_node);
374
375struct spu *spu_alloc(void)
376{
377 struct spu *spu = NULL;
378 int node;
379
380 for (node = 0; node < MAX_NUMNODES; node++) {
381 spu = spu_alloc_node(node);
382 if (spu)
383 break;
384 }
374 385
375 return spu; 386 return spu;
376} 387}
377EXPORT_SYMBOL_GPL(spu_alloc);
378 388
379void spu_free(struct spu *spu) 389void spu_free(struct spu *spu)
380{ 390{
381 mutex_lock(&spu_mutex); 391 mutex_lock(&spu_mutex);
382 list_add_tail(&spu->list, &spu_list); 392 list_add_tail(&spu->list, &spu_list[spu->node]);
383 mutex_unlock(&spu_mutex); 393 mutex_unlock(&spu_mutex);
384} 394}
385EXPORT_SYMBOL_GPL(spu_free); 395EXPORT_SYMBOL_GPL(spu_free);
@@ -712,7 +722,7 @@ static int __init create_spu(struct device_node *spe)
712 if (ret) 722 if (ret)
713 goto out_free_irqs; 723 goto out_free_irqs;
714 724
715 list_add(&spu->list, &spu_list); 725 list_add(&spu->list, &spu_list[spu->node]);
716 mutex_unlock(&spu_mutex); 726 mutex_unlock(&spu_mutex);
717 727
718 pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", 728 pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n",
@@ -745,9 +755,13 @@ static void destroy_spu(struct spu *spu)
745static void cleanup_spu_base(void) 755static void cleanup_spu_base(void)
746{ 756{
747 struct spu *spu, *tmp; 757 struct spu *spu, *tmp;
758 int node;
759
748 mutex_lock(&spu_mutex); 760 mutex_lock(&spu_mutex);
749 list_for_each_entry_safe(spu, tmp, &spu_list, list) 761 for (node = 0; node < MAX_NUMNODES; node++) {
750 destroy_spu(spu); 762 list_for_each_entry_safe(spu, tmp, &spu_list[node], list)
763 destroy_spu(spu);
764 }
751 mutex_unlock(&spu_mutex); 765 mutex_unlock(&spu_mutex);
752 sysdev_class_unregister(&spu_sysdev_class); 766 sysdev_class_unregister(&spu_sysdev_class);
753} 767}
@@ -756,13 +770,16 @@ module_exit(cleanup_spu_base);
756static int __init init_spu_base(void) 770static int __init init_spu_base(void)
757{ 771{
758 struct device_node *node; 772 struct device_node *node;
759 int ret; 773 int i, ret;
760 774
761 /* create sysdev class for spus */ 775 /* create sysdev class for spus */
762 ret = sysdev_class_register(&spu_sysdev_class); 776 ret = sysdev_class_register(&spu_sysdev_class);
763 if (ret) 777 if (ret)
764 return ret; 778 return ret;
765 779
780 for (i = 0; i < MAX_NUMNODES; i++)
781 INIT_LIST_HEAD(&spu_list[i]);
782
766 ret = -ENODEV; 783 ret = -ENODEV;
767 for (node = of_find_node_by_type(NULL, "spe"); 784 for (node = of_find_node_by_type(NULL, "spe");
768 node; node = of_find_node_by_type(node, "spe")) { 785 node; node = of_find_node_by_type(node, "spe")) {
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 1350294484b6..bd4e2c3d5d08 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -3,11 +3,7 @@
3 * Copyright (C) IBM 2005 3 * Copyright (C) IBM 2005
4 * Author: Mark Nutter <mnutter@us.ibm.com> 4 * Author: Mark Nutter <mnutter@us.ibm.com>
5 * 5 *
6 * SPU scheduler, based on Linux thread priority. For now use 6 * 2006-03-31 NUMA domains added.
7 * a simple "cooperative" yield model with no preemption. SPU
8 * scheduling will eventually be preemptive: When a thread with
9 * a higher static priority gets ready to run, then an active SPU
10 * context will be preempted and returned to the waitq.
11 * 7 *
12 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
@@ -37,6 +33,8 @@
37#include <linux/smp_lock.h> 33#include <linux/smp_lock.h>
38#include <linux/stddef.h> 34#include <linux/stddef.h>
39#include <linux/unistd.h> 35#include <linux/unistd.h>
36#include <linux/numa.h>
37#include <linux/mutex.h>
40 38
41#include <asm/io.h> 39#include <asm/io.h>
42#include <asm/mmu_context.h> 40#include <asm/mmu_context.h>
@@ -49,125 +47,38 @@
49 47
50#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) 48#define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1)
51struct spu_prio_array { 49struct spu_prio_array {
52 atomic_t nr_blocked;
53 unsigned long bitmap[SPU_BITMAP_SIZE]; 50 unsigned long bitmap[SPU_BITMAP_SIZE];
54 wait_queue_head_t waitq[MAX_PRIO]; 51 wait_queue_head_t waitq[MAX_PRIO];
52 struct list_head active_list[MAX_NUMNODES];
53 struct mutex active_mutex[MAX_NUMNODES];
55}; 54};
56 55
57/* spu_runqueue - This is the main runqueue data structure for SPUs. */ 56static struct spu_prio_array *spu_prio;
58struct spu_runqueue {
59 struct semaphore sem;
60 unsigned long nr_active;
61 unsigned long nr_idle;
62 unsigned long nr_switches;
63 struct list_head active_list;
64 struct list_head idle_list;
65 struct spu_prio_array prio;
66};
67
68static struct spu_runqueue *spu_runqueues = NULL;
69
70static inline struct spu_runqueue *spu_rq(void)
71{
72 /* Future: make this a per-NODE array,
73 * and use cpu_to_node(smp_processor_id())
74 */
75 return spu_runqueues;
76}
77
78static inline struct spu *del_idle(struct spu_runqueue *rq)
79{
80 struct spu *spu;
81
82 BUG_ON(rq->nr_idle <= 0);
83 BUG_ON(list_empty(&rq->idle_list));
84 /* Future: Move SPU out of low-power SRI state. */
85 spu = list_entry(rq->idle_list.next, struct spu, sched_list);
86 list_del_init(&spu->sched_list);
87 rq->nr_idle--;
88 return spu;
89}
90
91static inline void del_active(struct spu_runqueue *rq, struct spu *spu)
92{
93 BUG_ON(rq->nr_active <= 0);
94 BUG_ON(list_empty(&rq->active_list));
95 list_del_init(&spu->sched_list);
96 rq->nr_active--;
97}
98
99static inline void add_idle(struct spu_runqueue *rq, struct spu *spu)
100{
101 /* Future: Put SPU into low-power SRI state. */
102 list_add_tail(&spu->sched_list, &rq->idle_list);
103 rq->nr_idle++;
104}
105
106static inline void add_active(struct spu_runqueue *rq, struct spu *spu)
107{
108 rq->nr_active++;
109 rq->nr_switches++;
110 list_add_tail(&spu->sched_list, &rq->active_list);
111}
112
113static void prio_wakeup(struct spu_runqueue *rq)
114{
115 if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) {
116 int best = sched_find_first_bit(rq->prio.bitmap);
117 if (best < MAX_PRIO) {
118 wait_queue_head_t *wq = &rq->prio.waitq[best];
119 wake_up_interruptible_nr(wq, 1);
120 }
121 }
122}
123
124static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx,
125 u64 flags)
126{
127 int prio = current->prio;
128 wait_queue_head_t *wq = &rq->prio.waitq[prio];
129 DEFINE_WAIT(wait);
130
131 __set_bit(prio, rq->prio.bitmap);
132 atomic_inc(&rq->prio.nr_blocked);
133 prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE);
134 if (!signal_pending(current)) {
135 up(&rq->sem);
136 up_write(&ctx->state_sema);
137 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
138 current->pid, current->prio);
139 schedule();
140 down_write(&ctx->state_sema);
141 down(&rq->sem);
142 }
143 finish_wait(wq, &wait);
144 atomic_dec(&rq->prio.nr_blocked);
145 if (!waitqueue_active(wq))
146 __clear_bit(prio, rq->prio.bitmap);
147}
148 57
149static inline int is_best_prio(struct spu_runqueue *rq) 58static inline int node_allowed(int node)
150{ 59{
151 int best_prio; 60 cpumask_t mask;
152 61
153 best_prio = sched_find_first_bit(rq->prio.bitmap); 62 if (!nr_cpus_node(node))
154 return (current->prio < best_prio) ? 1 : 0; 63 return 0;
64 mask = node_to_cpumask(node);
65 if (!cpus_intersects(mask, current->cpus_allowed))
66 return 0;
67 return 1;
155} 68}
156 69
157static inline void mm_needs_global_tlbie(struct mm_struct *mm) 70static inline void mm_needs_global_tlbie(struct mm_struct *mm)
158{ 71{
72 int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
73
159 /* Global TLBIE broadcast required with SPEs. */ 74 /* Global TLBIE broadcast required with SPEs. */
160#if (NR_CPUS > 1) 75 __cpus_setall(&mm->cpu_vm_mask, nr);
161 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS);
162#else
163 __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */
164#endif
165} 76}
166 77
167static inline void bind_context(struct spu *spu, struct spu_context *ctx) 78static inline void bind_context(struct spu *spu, struct spu_context *ctx)
168{ 79{
169 pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, 80 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
170 spu->number); 81 spu->number, spu->node);
171 spu->ctx = ctx; 82 spu->ctx = ctx;
172 spu->flags = 0; 83 spu->flags = 0;
173 ctx->flags = 0; 84 ctx->flags = 0;
@@ -185,12 +96,13 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx)
185 spu_unmap_mappings(ctx); 96 spu_unmap_mappings(ctx);
186 spu_restore(&ctx->csa, spu); 97 spu_restore(&ctx->csa, spu);
187 spu->timestamp = jiffies; 98 spu->timestamp = jiffies;
99 spu_cpu_affinity_set(spu, raw_smp_processor_id());
188} 100}
189 101
190static inline void unbind_context(struct spu *spu, struct spu_context *ctx) 102static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
191{ 103{
192 pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, 104 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
193 spu->pid, spu->number); 105 spu->pid, spu->number, spu->node);
194 spu_unmap_mappings(ctx); 106 spu_unmap_mappings(ctx);
195 spu_save(&ctx->csa, spu); 107 spu_save(&ctx->csa, spu);
196 spu->timestamp = jiffies; 108 spu->timestamp = jiffies;
@@ -209,163 +121,148 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx)
209 spu->ctx = NULL; 121 spu->ctx = NULL;
210} 122}
211 123
212static void spu_reaper(void *data) 124static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait,
125 int prio)
213{ 126{
214 struct spu_context *ctx = data; 127 prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE);
215 struct spu *spu; 128 set_bit(prio, spu_prio->bitmap);
216
217 down_write(&ctx->state_sema);
218 spu = ctx->spu;
219 if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) {
220 if (atomic_read(&spu->rq->prio.nr_blocked)) {
221 pr_debug("%s: spu=%d\n", __func__, spu->number);
222 ctx->ops->runcntl_stop(ctx);
223 spu_deactivate(ctx);
224 wake_up_all(&ctx->stop_wq);
225 } else {
226 clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
227 }
228 }
229 up_write(&ctx->state_sema);
230 put_spu_context(ctx);
231} 129}
232 130
233static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) 131static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait,
132 int prio)
234{ 133{
235 struct spu_context *ctx = get_spu_context(spu->ctx); 134 u64 flags;
236 unsigned long now = jiffies;
237 unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE;
238
239 set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags);
240 INIT_WORK(&ctx->reap_work, spu_reaper, ctx);
241 if (time_after(now, expire))
242 schedule_work(&ctx->reap_work);
243 else
244 schedule_delayed_work(&ctx->reap_work, expire - now);
245}
246 135
247static void check_preempt_active(struct spu_runqueue *rq) 136 __set_current_state(TASK_RUNNING);
248{ 137
249 struct list_head *p; 138 spin_lock_irqsave(&wq->lock, flags);
250 struct spu *worst = NULL; 139
251 140 remove_wait_queue_locked(wq, wait);
252 list_for_each(p, &rq->active_list) { 141 if (list_empty(&wq->task_list))
253 struct spu *spu = list_entry(p, struct spu, sched_list); 142 clear_bit(prio, spu_prio->bitmap);
254 struct spu_context *ctx = spu->ctx; 143
255 if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { 144 spin_unlock_irqrestore(&wq->lock, flags);
256 if (!worst || (spu->prio > worst->prio)) {
257 worst = spu;
258 }
259 }
260 }
261 if (worst && (current->prio < worst->prio))
262 schedule_spu_reaper(rq, worst);
263} 145}
264 146
265static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) 147static void spu_prio_wait(struct spu_context *ctx, u64 flags)
266{ 148{
267 struct spu_runqueue *rq; 149 int prio = current->prio;
268 struct spu *spu = NULL; 150 wait_queue_head_t *wq = &spu_prio->waitq[prio];
151 DEFINE_WAIT(wait);
269 152
270 rq = spu_rq(); 153 if (ctx->spu)
271 down(&rq->sem); 154 return;
272 for (;;) { 155
273 if (rq->nr_idle > 0) { 156 spu_add_wq(wq, &wait, prio);
274 if (is_best_prio(rq)) { 157
275 /* Fall through. */ 158 if (!signal_pending(current)) {
276 spu = del_idle(rq); 159 up_write(&ctx->state_sema);
277 break; 160 pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__,
278 } else { 161 current->pid, current->prio);
279 prio_wakeup(rq); 162 schedule();
280 up(&rq->sem); 163 down_write(&ctx->state_sema);
281 yield();
282 if (signal_pending(current)) {
283 return NULL;
284 }
285 rq = spu_rq();
286 down(&rq->sem);
287 continue;
288 }
289 } else {
290 check_preempt_active(rq);
291 prio_wait(rq, ctx, flags);
292 if (signal_pending(current)) {
293 prio_wakeup(rq);
294 spu = NULL;
295 break;
296 }
297 continue;
298 }
299 } 164 }
300 up(&rq->sem); 165
301 return spu; 166 spu_del_wq(wq, &wait, prio);
302} 167}
303 168
304static void put_idle_spu(struct spu *spu) 169static void spu_prio_wakeup(void)
305{ 170{
306 struct spu_runqueue *rq = spu->rq; 171 int best = sched_find_first_bit(spu_prio->bitmap);
307 172 if (best < MAX_PRIO) {
308 down(&rq->sem); 173 wait_queue_head_t *wq = &spu_prio->waitq[best];
309 add_idle(rq, spu); 174 wake_up_interruptible_nr(wq, 1);
310 prio_wakeup(rq); 175 }
311 up(&rq->sem);
312} 176}
313 177
314static int get_active_spu(struct spu *spu) 178static int get_active_spu(struct spu *spu)
315{ 179{
316 struct spu_runqueue *rq = spu->rq; 180 int node = spu->node;
317 struct list_head *p;
318 struct spu *tmp; 181 struct spu *tmp;
319 int rc = 0; 182 int rc = 0;
320 183
321 down(&rq->sem); 184 mutex_lock(&spu_prio->active_mutex[node]);
322 list_for_each(p, &rq->active_list) { 185 list_for_each_entry(tmp, &spu_prio->active_list[node], list) {
323 tmp = list_entry(p, struct spu, sched_list);
324 if (tmp == spu) { 186 if (tmp == spu) {
325 del_active(rq, spu); 187 list_del_init(&spu->list);
326 rc = 1; 188 rc = 1;
327 break; 189 break;
328 } 190 }
329 } 191 }
330 up(&rq->sem); 192 mutex_unlock(&spu_prio->active_mutex[node]);
331 return rc; 193 return rc;
332} 194}
333 195
334static void put_active_spu(struct spu *spu) 196static void put_active_spu(struct spu *spu)
335{ 197{
336 struct spu_runqueue *rq = spu->rq; 198 int node = spu->node;
199
200 mutex_lock(&spu_prio->active_mutex[node]);
201 list_add_tail(&spu->list, &spu_prio->active_list[node]);
202 mutex_unlock(&spu_prio->active_mutex[node]);
203}
204
205static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags)
206{
207 struct spu *spu = NULL;
208 int node = cpu_to_node(raw_smp_processor_id());
209 int n;
210
211 for (n = 0; n < MAX_NUMNODES; n++, node++) {
212 node = (node < MAX_NUMNODES) ? node : 0;
213 if (!node_allowed(node))
214 continue;
215 spu = spu_alloc_node(node);
216 if (spu)
217 break;
218 }
219 return spu;
220}
337 221
338 down(&rq->sem); 222static inline struct spu *spu_get(struct spu_context *ctx, u64 flags)
339 add_active(rq, spu); 223{
340 up(&rq->sem); 224 /* Future: spu_get_idle() if possible,
225 * otherwise try to preempt an active
226 * context.
227 */
228 return spu_get_idle(ctx, flags);
341} 229}
342 230
343/* Lock order: 231/* The three externally callable interfaces
344 * spu_activate() & spu_deactivate() require the 232 * for the scheduler begin here.
345 * caller to have down_write(&ctx->state_sema).
346 * 233 *
347 * The rq->sem is breifly held (inside or outside a 234 * spu_activate - bind a context to SPU, waiting as needed.
348 * given ctx lock) for list management, but is never 235 * spu_deactivate - unbind a context from its SPU.
349 * held during save/restore. 236 * spu_yield - yield an SPU if others are waiting.
350 */ 237 */
351 238
352int spu_activate(struct spu_context *ctx, u64 flags) 239int spu_activate(struct spu_context *ctx, u64 flags)
353{ 240{
354 struct spu *spu; 241 struct spu *spu;
242 int ret = 0;
355 243
356 if (ctx->spu) 244 for (;;) {
357 return 0; 245 if (ctx->spu)
358 spu = get_idle_spu(ctx, flags); 246 return 0;
359 if (!spu) 247 spu = spu_get(ctx, flags);
360 return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; 248 if (spu != NULL) {
361 bind_context(spu, ctx); 249 if (ctx->spu != NULL) {
362 /* 250 spu_free(spu);
363 * We're likely to wait for interrupts on the same 251 spu_prio_wakeup();
364 * CPU that we are now on, so send them here. 252 break;
365 */ 253 }
366 spu_cpu_affinity_set(spu, raw_smp_processor_id()); 254 bind_context(spu, ctx);
367 put_active_spu(spu); 255 put_active_spu(spu);
368 return 0; 256 break;
257 }
258 spu_prio_wait(ctx, flags);
259 if (signal_pending(current)) {
260 ret = -ERESTARTSYS;
261 spu_prio_wakeup();
262 break;
263 }
264 }
265 return ret;
369} 266}
370 267
371void spu_deactivate(struct spu_context *ctx) 268void spu_deactivate(struct spu_context *ctx)
@@ -378,8 +275,10 @@ void spu_deactivate(struct spu_context *ctx)
378 return; 275 return;
379 needs_idle = get_active_spu(spu); 276 needs_idle = get_active_spu(spu);
380 unbind_context(spu, ctx); 277 unbind_context(spu, ctx);
381 if (needs_idle) 278 if (needs_idle) {
382 put_idle_spu(spu); 279 spu_free(spu);
280 spu_prio_wakeup();
281 }
383} 282}
384 283
385void spu_yield(struct spu_context *ctx) 284void spu_yield(struct spu_context *ctx)
@@ -387,77 +286,60 @@ void spu_yield(struct spu_context *ctx)
387 struct spu *spu; 286 struct spu *spu;
388 int need_yield = 0; 287 int need_yield = 0;
389 288
390 down_write(&ctx->state_sema); 289 if (down_write_trylock(&ctx->state_sema)) {
391 spu = ctx->spu; 290 if ((spu = ctx->spu) != NULL) {
392 if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { 291 int best = sched_find_first_bit(spu_prio->bitmap);
393 pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); 292 if (best < MAX_PRIO) {
394 spu_deactivate(ctx); 293 pr_debug("%s: yielding SPU %d NODE %d\n",
395 ctx->state = SPU_STATE_SAVED; 294 __FUNCTION__, spu->number, spu->node);
396 need_yield = 1; 295 spu_deactivate(ctx);
397 } else if (spu) { 296 ctx->state = SPU_STATE_SAVED;
398 spu->prio = MAX_PRIO; 297 need_yield = 1;
298 } else {
299 spu->prio = MAX_PRIO;
300 }
301 }
302 up_write(&ctx->state_sema);
399 } 303 }
400 up_write(&ctx->state_sema);
401 if (unlikely(need_yield)) 304 if (unlikely(need_yield))
402 yield(); 305 yield();
403} 306}
404 307
405int __init spu_sched_init(void) 308int __init spu_sched_init(void)
406{ 309{
407 struct spu_runqueue *rq;
408 struct spu *spu;
409 int i; 310 int i;
410 311
411 rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); 312 spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
412 if (!rq) { 313 if (!spu_prio) {
413 printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", 314 printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
414 __FUNCTION__); 315 __FUNCTION__);
415 return 1; 316 return 1;
416 } 317 }
417 memset(rq, 0, sizeof(struct spu_runqueue));
418 init_MUTEX(&rq->sem);
419 INIT_LIST_HEAD(&rq->active_list);
420 INIT_LIST_HEAD(&rq->idle_list);
421 rq->nr_active = 0;
422 rq->nr_idle = 0;
423 rq->nr_switches = 0;
424 atomic_set(&rq->prio.nr_blocked, 0);
425 for (i = 0; i < MAX_PRIO; i++) { 318 for (i = 0; i < MAX_PRIO; i++) {
426 init_waitqueue_head(&rq->prio.waitq[i]); 319 init_waitqueue_head(&spu_prio->waitq[i]);
427 __clear_bit(i, rq->prio.bitmap); 320 __clear_bit(i, spu_prio->bitmap);
428 } 321 }
429 __set_bit(MAX_PRIO, rq->prio.bitmap); 322 __set_bit(MAX_PRIO, spu_prio->bitmap);
430 for (;;) { 323 for (i = 0; i < MAX_NUMNODES; i++) {
431 spu = spu_alloc(); 324 mutex_init(&spu_prio->active_mutex[i]);
432 if (!spu) 325 INIT_LIST_HEAD(&spu_prio->active_list[i]);
433 break;
434 pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number);
435 add_idle(rq, spu);
436 spu->rq = rq;
437 spu->timestamp = jiffies;
438 }
439 if (!rq->nr_idle) {
440 printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__);
441 kfree(rq);
442 return 1;
443 } 326 }
444 return 0; 327 return 0;
445} 328}
446 329
447void __exit spu_sched_exit(void) 330void __exit spu_sched_exit(void)
448{ 331{
449 struct spu_runqueue *rq = spu_rq(); 332 struct spu *spu, *tmp;
450 struct spu *spu; 333 int node;
451 334
452 if (!rq) { 335 for (node = 0; node < MAX_NUMNODES; node++) {
453 printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); 336 mutex_lock(&spu_prio->active_mutex[node]);
454 return; 337 list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
455 } 338 list) {
456 while (rq->nr_idle > 0) { 339 list_del_init(&spu->list);
457 spu = del_idle(rq); 340 spu_free(spu);
458 if (!spu) 341 }
459 break; 342 mutex_unlock(&spu_prio->active_mutex[node]);
460 spu_free(spu);
461 } 343 }
462 kfree(rq); 344 kfree(spu_prio);
463} 345}
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h
index b42b53c40f5d..f6c0a95e8209 100644
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -147,6 +147,7 @@ struct spu {
147}; 147};
148 148
149struct spu *spu_alloc(void); 149struct spu *spu_alloc(void);
150struct spu *spu_alloc_node(int node);
150void spu_free(struct spu *spu); 151void spu_free(struct spu *spu);
151int spu_irq_class_0_bottom(struct spu *spu); 152int spu_irq_class_0_bottom(struct spu *spu);
152int spu_irq_class_1_bottom(struct spu *spu); 153int spu_irq_class_1_bottom(struct spu *spu);