diff options
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 51 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 444 | ||||
-rw-r--r-- | include/asm-powerpc/spu.h | 1 |
3 files changed, 198 insertions, 298 deletions
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index f78680346e5f..5931973845b1 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
@@ -317,7 +317,7 @@ static void spu_free_irqs(struct spu *spu) | |||
317 | free_irq(spu->irqs[2], spu); | 317 | free_irq(spu->irqs[2], spu); |
318 | } | 318 | } |
319 | 319 | ||
320 | static LIST_HEAD(spu_list); | 320 | static struct list_head spu_list[MAX_NUMNODES]; |
321 | static DEFINE_MUTEX(spu_mutex); | 321 | static DEFINE_MUTEX(spu_mutex); |
322 | 322 | ||
323 | static void spu_init_channels(struct spu *spu) | 323 | static void spu_init_channels(struct spu *spu) |
@@ -354,32 +354,42 @@ static void spu_init_channels(struct spu *spu) | |||
354 | } | 354 | } |
355 | } | 355 | } |
356 | 356 | ||
357 | struct spu *spu_alloc(void) | 357 | struct spu *spu_alloc_node(int node) |
358 | { | 358 | { |
359 | struct spu *spu; | 359 | struct spu *spu = NULL; |
360 | 360 | ||
361 | mutex_lock(&spu_mutex); | 361 | mutex_lock(&spu_mutex); |
362 | if (!list_empty(&spu_list)) { | 362 | if (!list_empty(&spu_list[node])) { |
363 | spu = list_entry(spu_list.next, struct spu, list); | 363 | spu = list_entry(spu_list[node].next, struct spu, list); |
364 | list_del_init(&spu->list); | 364 | list_del_init(&spu->list); |
365 | pr_debug("Got SPU %x %d\n", spu->isrc, spu->number); | 365 | pr_debug("Got SPU %x %d %d\n", |
366 | } else { | 366 | spu->isrc, spu->number, spu->node); |
367 | pr_debug("No SPU left\n"); | 367 | spu_init_channels(spu); |
368 | spu = NULL; | ||
369 | } | 368 | } |
370 | mutex_unlock(&spu_mutex); | 369 | mutex_unlock(&spu_mutex); |
371 | 370 | ||
372 | if (spu) | 371 | return spu; |
373 | spu_init_channels(spu); | 372 | } |
373 | EXPORT_SYMBOL_GPL(spu_alloc_node); | ||
374 | |||
375 | struct spu *spu_alloc(void) | ||
376 | { | ||
377 | struct spu *spu = NULL; | ||
378 | int node; | ||
379 | |||
380 | for (node = 0; node < MAX_NUMNODES; node++) { | ||
381 | spu = spu_alloc_node(node); | ||
382 | if (spu) | ||
383 | break; | ||
384 | } | ||
374 | 385 | ||
375 | return spu; | 386 | return spu; |
376 | } | 387 | } |
377 | EXPORT_SYMBOL_GPL(spu_alloc); | ||
378 | 388 | ||
379 | void spu_free(struct spu *spu) | 389 | void spu_free(struct spu *spu) |
380 | { | 390 | { |
381 | mutex_lock(&spu_mutex); | 391 | mutex_lock(&spu_mutex); |
382 | list_add_tail(&spu->list, &spu_list); | 392 | list_add_tail(&spu->list, &spu_list[spu->node]); |
383 | mutex_unlock(&spu_mutex); | 393 | mutex_unlock(&spu_mutex); |
384 | } | 394 | } |
385 | EXPORT_SYMBOL_GPL(spu_free); | 395 | EXPORT_SYMBOL_GPL(spu_free); |
@@ -712,7 +722,7 @@ static int __init create_spu(struct device_node *spe) | |||
712 | if (ret) | 722 | if (ret) |
713 | goto out_free_irqs; | 723 | goto out_free_irqs; |
714 | 724 | ||
715 | list_add(&spu->list, &spu_list); | 725 | list_add(&spu->list, &spu_list[spu->node]); |
716 | mutex_unlock(&spu_mutex); | 726 | mutex_unlock(&spu_mutex); |
717 | 727 | ||
718 | pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", | 728 | pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", |
@@ -745,9 +755,13 @@ static void destroy_spu(struct spu *spu) | |||
745 | static void cleanup_spu_base(void) | 755 | static void cleanup_spu_base(void) |
746 | { | 756 | { |
747 | struct spu *spu, *tmp; | 757 | struct spu *spu, *tmp; |
758 | int node; | ||
759 | |||
748 | mutex_lock(&spu_mutex); | 760 | mutex_lock(&spu_mutex); |
749 | list_for_each_entry_safe(spu, tmp, &spu_list, list) | 761 | for (node = 0; node < MAX_NUMNODES; node++) { |
750 | destroy_spu(spu); | 762 | list_for_each_entry_safe(spu, tmp, &spu_list[node], list) |
763 | destroy_spu(spu); | ||
764 | } | ||
751 | mutex_unlock(&spu_mutex); | 765 | mutex_unlock(&spu_mutex); |
752 | sysdev_class_unregister(&spu_sysdev_class); | 766 | sysdev_class_unregister(&spu_sysdev_class); |
753 | } | 767 | } |
@@ -756,13 +770,16 @@ module_exit(cleanup_spu_base); | |||
756 | static int __init init_spu_base(void) | 770 | static int __init init_spu_base(void) |
757 | { | 771 | { |
758 | struct device_node *node; | 772 | struct device_node *node; |
759 | int ret; | 773 | int i, ret; |
760 | 774 | ||
761 | /* create sysdev class for spus */ | 775 | /* create sysdev class for spus */ |
762 | ret = sysdev_class_register(&spu_sysdev_class); | 776 | ret = sysdev_class_register(&spu_sysdev_class); |
763 | if (ret) | 777 | if (ret) |
764 | return ret; | 778 | return ret; |
765 | 779 | ||
780 | for (i = 0; i < MAX_NUMNODES; i++) | ||
781 | INIT_LIST_HEAD(&spu_list[i]); | ||
782 | |||
766 | ret = -ENODEV; | 783 | ret = -ENODEV; |
767 | for (node = of_find_node_by_type(NULL, "spe"); | 784 | for (node = of_find_node_by_type(NULL, "spe"); |
768 | node; node = of_find_node_by_type(node, "spe")) { | 785 | node; node = of_find_node_by_type(node, "spe")) { |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 1350294484b6..bd4e2c3d5d08 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
@@ -3,11 +3,7 @@ | |||
3 | * Copyright (C) IBM 2005 | 3 | * Copyright (C) IBM 2005 |
4 | * Author: Mark Nutter <mnutter@us.ibm.com> | 4 | * Author: Mark Nutter <mnutter@us.ibm.com> |
5 | * | 5 | * |
6 | * SPU scheduler, based on Linux thread priority. For now use | 6 | * 2006-03-31 NUMA domains added. |
7 | * a simple "cooperative" yield model with no preemption. SPU | ||
8 | * scheduling will eventually be preemptive: When a thread with | ||
9 | * a higher static priority gets ready to run, then an active SPU | ||
10 | * context will be preempted and returned to the waitq. | ||
11 | * | 7 | * |
12 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
13 | * it under the terms of the GNU General Public License as published by | 9 | * it under the terms of the GNU General Public License as published by |
@@ -37,6 +33,8 @@ | |||
37 | #include <linux/smp_lock.h> | 33 | #include <linux/smp_lock.h> |
38 | #include <linux/stddef.h> | 34 | #include <linux/stddef.h> |
39 | #include <linux/unistd.h> | 35 | #include <linux/unistd.h> |
36 | #include <linux/numa.h> | ||
37 | #include <linux/mutex.h> | ||
40 | 38 | ||
41 | #include <asm/io.h> | 39 | #include <asm/io.h> |
42 | #include <asm/mmu_context.h> | 40 | #include <asm/mmu_context.h> |
@@ -49,125 +47,38 @@ | |||
49 | 47 | ||
50 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) | 48 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) |
51 | struct spu_prio_array { | 49 | struct spu_prio_array { |
52 | atomic_t nr_blocked; | ||
53 | unsigned long bitmap[SPU_BITMAP_SIZE]; | 50 | unsigned long bitmap[SPU_BITMAP_SIZE]; |
54 | wait_queue_head_t waitq[MAX_PRIO]; | 51 | wait_queue_head_t waitq[MAX_PRIO]; |
52 | struct list_head active_list[MAX_NUMNODES]; | ||
53 | struct mutex active_mutex[MAX_NUMNODES]; | ||
55 | }; | 54 | }; |
56 | 55 | ||
57 | /* spu_runqueue - This is the main runqueue data structure for SPUs. */ | 56 | static struct spu_prio_array *spu_prio; |
58 | struct spu_runqueue { | ||
59 | struct semaphore sem; | ||
60 | unsigned long nr_active; | ||
61 | unsigned long nr_idle; | ||
62 | unsigned long nr_switches; | ||
63 | struct list_head active_list; | ||
64 | struct list_head idle_list; | ||
65 | struct spu_prio_array prio; | ||
66 | }; | ||
67 | |||
68 | static struct spu_runqueue *spu_runqueues = NULL; | ||
69 | |||
70 | static inline struct spu_runqueue *spu_rq(void) | ||
71 | { | ||
72 | /* Future: make this a per-NODE array, | ||
73 | * and use cpu_to_node(smp_processor_id()) | ||
74 | */ | ||
75 | return spu_runqueues; | ||
76 | } | ||
77 | |||
78 | static inline struct spu *del_idle(struct spu_runqueue *rq) | ||
79 | { | ||
80 | struct spu *spu; | ||
81 | |||
82 | BUG_ON(rq->nr_idle <= 0); | ||
83 | BUG_ON(list_empty(&rq->idle_list)); | ||
84 | /* Future: Move SPU out of low-power SRI state. */ | ||
85 | spu = list_entry(rq->idle_list.next, struct spu, sched_list); | ||
86 | list_del_init(&spu->sched_list); | ||
87 | rq->nr_idle--; | ||
88 | return spu; | ||
89 | } | ||
90 | |||
91 | static inline void del_active(struct spu_runqueue *rq, struct spu *spu) | ||
92 | { | ||
93 | BUG_ON(rq->nr_active <= 0); | ||
94 | BUG_ON(list_empty(&rq->active_list)); | ||
95 | list_del_init(&spu->sched_list); | ||
96 | rq->nr_active--; | ||
97 | } | ||
98 | |||
99 | static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) | ||
100 | { | ||
101 | /* Future: Put SPU into low-power SRI state. */ | ||
102 | list_add_tail(&spu->sched_list, &rq->idle_list); | ||
103 | rq->nr_idle++; | ||
104 | } | ||
105 | |||
106 | static inline void add_active(struct spu_runqueue *rq, struct spu *spu) | ||
107 | { | ||
108 | rq->nr_active++; | ||
109 | rq->nr_switches++; | ||
110 | list_add_tail(&spu->sched_list, &rq->active_list); | ||
111 | } | ||
112 | |||
113 | static void prio_wakeup(struct spu_runqueue *rq) | ||
114 | { | ||
115 | if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { | ||
116 | int best = sched_find_first_bit(rq->prio.bitmap); | ||
117 | if (best < MAX_PRIO) { | ||
118 | wait_queue_head_t *wq = &rq->prio.waitq[best]; | ||
119 | wake_up_interruptible_nr(wq, 1); | ||
120 | } | ||
121 | } | ||
122 | } | ||
123 | |||
124 | static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, | ||
125 | u64 flags) | ||
126 | { | ||
127 | int prio = current->prio; | ||
128 | wait_queue_head_t *wq = &rq->prio.waitq[prio]; | ||
129 | DEFINE_WAIT(wait); | ||
130 | |||
131 | __set_bit(prio, rq->prio.bitmap); | ||
132 | atomic_inc(&rq->prio.nr_blocked); | ||
133 | prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); | ||
134 | if (!signal_pending(current)) { | ||
135 | up(&rq->sem); | ||
136 | up_write(&ctx->state_sema); | ||
137 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, | ||
138 | current->pid, current->prio); | ||
139 | schedule(); | ||
140 | down_write(&ctx->state_sema); | ||
141 | down(&rq->sem); | ||
142 | } | ||
143 | finish_wait(wq, &wait); | ||
144 | atomic_dec(&rq->prio.nr_blocked); | ||
145 | if (!waitqueue_active(wq)) | ||
146 | __clear_bit(prio, rq->prio.bitmap); | ||
147 | } | ||
148 | 57 | ||
149 | static inline int is_best_prio(struct spu_runqueue *rq) | 58 | static inline int node_allowed(int node) |
150 | { | 59 | { |
151 | int best_prio; | 60 | cpumask_t mask; |
152 | 61 | ||
153 | best_prio = sched_find_first_bit(rq->prio.bitmap); | 62 | if (!nr_cpus_node(node)) |
154 | return (current->prio < best_prio) ? 1 : 0; | 63 | return 0; |
64 | mask = node_to_cpumask(node); | ||
65 | if (!cpus_intersects(mask, current->cpus_allowed)) | ||
66 | return 0; | ||
67 | return 1; | ||
155 | } | 68 | } |
156 | 69 | ||
157 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) | 70 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) |
158 | { | 71 | { |
72 | int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; | ||
73 | |||
159 | /* Global TLBIE broadcast required with SPEs. */ | 74 | /* Global TLBIE broadcast required with SPEs. */ |
160 | #if (NR_CPUS > 1) | 75 | __cpus_setall(&mm->cpu_vm_mask, nr); |
161 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); | ||
162 | #else | ||
163 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ | ||
164 | #endif | ||
165 | } | 76 | } |
166 | 77 | ||
167 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) | 78 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) |
168 | { | 79 | { |
169 | pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, | 80 | pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, |
170 | spu->number); | 81 | spu->number, spu->node); |
171 | spu->ctx = ctx; | 82 | spu->ctx = ctx; |
172 | spu->flags = 0; | 83 | spu->flags = 0; |
173 | ctx->flags = 0; | 84 | ctx->flags = 0; |
@@ -185,12 +96,13 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx) | |||
185 | spu_unmap_mappings(ctx); | 96 | spu_unmap_mappings(ctx); |
186 | spu_restore(&ctx->csa, spu); | 97 | spu_restore(&ctx->csa, spu); |
187 | spu->timestamp = jiffies; | 98 | spu->timestamp = jiffies; |
99 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); | ||
188 | } | 100 | } |
189 | 101 | ||
190 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) | 102 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) |
191 | { | 103 | { |
192 | pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, | 104 | pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, |
193 | spu->pid, spu->number); | 105 | spu->pid, spu->number, spu->node); |
194 | spu_unmap_mappings(ctx); | 106 | spu_unmap_mappings(ctx); |
195 | spu_save(&ctx->csa, spu); | 107 | spu_save(&ctx->csa, spu); |
196 | spu->timestamp = jiffies; | 108 | spu->timestamp = jiffies; |
@@ -209,163 +121,148 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx) | |||
209 | spu->ctx = NULL; | 121 | spu->ctx = NULL; |
210 | } | 122 | } |
211 | 123 | ||
212 | static void spu_reaper(void *data) | 124 | static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
125 | int prio) | ||
213 | { | 126 | { |
214 | struct spu_context *ctx = data; | 127 | prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE); |
215 | struct spu *spu; | 128 | set_bit(prio, spu_prio->bitmap); |
216 | |||
217 | down_write(&ctx->state_sema); | ||
218 | spu = ctx->spu; | ||
219 | if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { | ||
220 | if (atomic_read(&spu->rq->prio.nr_blocked)) { | ||
221 | pr_debug("%s: spu=%d\n", __func__, spu->number); | ||
222 | ctx->ops->runcntl_stop(ctx); | ||
223 | spu_deactivate(ctx); | ||
224 | wake_up_all(&ctx->stop_wq); | ||
225 | } else { | ||
226 | clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); | ||
227 | } | ||
228 | } | ||
229 | up_write(&ctx->state_sema); | ||
230 | put_spu_context(ctx); | ||
231 | } | 129 | } |
232 | 130 | ||
233 | static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) | 131 | static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
132 | int prio) | ||
234 | { | 133 | { |
235 | struct spu_context *ctx = get_spu_context(spu->ctx); | 134 | u64 flags; |
236 | unsigned long now = jiffies; | ||
237 | unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; | ||
238 | |||
239 | set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); | ||
240 | INIT_WORK(&ctx->reap_work, spu_reaper, ctx); | ||
241 | if (time_after(now, expire)) | ||
242 | schedule_work(&ctx->reap_work); | ||
243 | else | ||
244 | schedule_delayed_work(&ctx->reap_work, expire - now); | ||
245 | } | ||
246 | 135 | ||
247 | static void check_preempt_active(struct spu_runqueue *rq) | 136 | __set_current_state(TASK_RUNNING); |
248 | { | 137 | |
249 | struct list_head *p; | 138 | spin_lock_irqsave(&wq->lock, flags); |
250 | struct spu *worst = NULL; | 139 | |
251 | 140 | remove_wait_queue_locked(wq, wait); | |
252 | list_for_each(p, &rq->active_list) { | 141 | if (list_empty(&wq->task_list)) |
253 | struct spu *spu = list_entry(p, struct spu, sched_list); | 142 | clear_bit(prio, spu_prio->bitmap); |
254 | struct spu_context *ctx = spu->ctx; | 143 | |
255 | if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { | 144 | spin_unlock_irqrestore(&wq->lock, flags); |
256 | if (!worst || (spu->prio > worst->prio)) { | ||
257 | worst = spu; | ||
258 | } | ||
259 | } | ||
260 | } | ||
261 | if (worst && (current->prio < worst->prio)) | ||
262 | schedule_spu_reaper(rq, worst); | ||
263 | } | 145 | } |
264 | 146 | ||
265 | static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) | 147 | static void spu_prio_wait(struct spu_context *ctx, u64 flags) |
266 | { | 148 | { |
267 | struct spu_runqueue *rq; | 149 | int prio = current->prio; |
268 | struct spu *spu = NULL; | 150 | wait_queue_head_t *wq = &spu_prio->waitq[prio]; |
151 | DEFINE_WAIT(wait); | ||
269 | 152 | ||
270 | rq = spu_rq(); | 153 | if (ctx->spu) |
271 | down(&rq->sem); | 154 | return; |
272 | for (;;) { | 155 | |
273 | if (rq->nr_idle > 0) { | 156 | spu_add_wq(wq, &wait, prio); |
274 | if (is_best_prio(rq)) { | 157 | |
275 | /* Fall through. */ | 158 | if (!signal_pending(current)) { |
276 | spu = del_idle(rq); | 159 | up_write(&ctx->state_sema); |
277 | break; | 160 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, |
278 | } else { | 161 | current->pid, current->prio); |
279 | prio_wakeup(rq); | 162 | schedule(); |
280 | up(&rq->sem); | 163 | down_write(&ctx->state_sema); |
281 | yield(); | ||
282 | if (signal_pending(current)) { | ||
283 | return NULL; | ||
284 | } | ||
285 | rq = spu_rq(); | ||
286 | down(&rq->sem); | ||
287 | continue; | ||
288 | } | ||
289 | } else { | ||
290 | check_preempt_active(rq); | ||
291 | prio_wait(rq, ctx, flags); | ||
292 | if (signal_pending(current)) { | ||
293 | prio_wakeup(rq); | ||
294 | spu = NULL; | ||
295 | break; | ||
296 | } | ||
297 | continue; | ||
298 | } | ||
299 | } | 164 | } |
300 | up(&rq->sem); | 165 | |
301 | return spu; | 166 | spu_del_wq(wq, &wait, prio); |
302 | } | 167 | } |
303 | 168 | ||
304 | static void put_idle_spu(struct spu *spu) | 169 | static void spu_prio_wakeup(void) |
305 | { | 170 | { |
306 | struct spu_runqueue *rq = spu->rq; | 171 | int best = sched_find_first_bit(spu_prio->bitmap); |
307 | 172 | if (best < MAX_PRIO) { | |
308 | down(&rq->sem); | 173 | wait_queue_head_t *wq = &spu_prio->waitq[best]; |
309 | add_idle(rq, spu); | 174 | wake_up_interruptible_nr(wq, 1); |
310 | prio_wakeup(rq); | 175 | } |
311 | up(&rq->sem); | ||
312 | } | 176 | } |
313 | 177 | ||
314 | static int get_active_spu(struct spu *spu) | 178 | static int get_active_spu(struct spu *spu) |
315 | { | 179 | { |
316 | struct spu_runqueue *rq = spu->rq; | 180 | int node = spu->node; |
317 | struct list_head *p; | ||
318 | struct spu *tmp; | 181 | struct spu *tmp; |
319 | int rc = 0; | 182 | int rc = 0; |
320 | 183 | ||
321 | down(&rq->sem); | 184 | mutex_lock(&spu_prio->active_mutex[node]); |
322 | list_for_each(p, &rq->active_list) { | 185 | list_for_each_entry(tmp, &spu_prio->active_list[node], list) { |
323 | tmp = list_entry(p, struct spu, sched_list); | ||
324 | if (tmp == spu) { | 186 | if (tmp == spu) { |
325 | del_active(rq, spu); | 187 | list_del_init(&spu->list); |
326 | rc = 1; | 188 | rc = 1; |
327 | break; | 189 | break; |
328 | } | 190 | } |
329 | } | 191 | } |
330 | up(&rq->sem); | 192 | mutex_unlock(&spu_prio->active_mutex[node]); |
331 | return rc; | 193 | return rc; |
332 | } | 194 | } |
333 | 195 | ||
334 | static void put_active_spu(struct spu *spu) | 196 | static void put_active_spu(struct spu *spu) |
335 | { | 197 | { |
336 | struct spu_runqueue *rq = spu->rq; | 198 | int node = spu->node; |
199 | |||
200 | mutex_lock(&spu_prio->active_mutex[node]); | ||
201 | list_add_tail(&spu->list, &spu_prio->active_list[node]); | ||
202 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
203 | } | ||
204 | |||
205 | static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags) | ||
206 | { | ||
207 | struct spu *spu = NULL; | ||
208 | int node = cpu_to_node(raw_smp_processor_id()); | ||
209 | int n; | ||
210 | |||
211 | for (n = 0; n < MAX_NUMNODES; n++, node++) { | ||
212 | node = (node < MAX_NUMNODES) ? node : 0; | ||
213 | if (!node_allowed(node)) | ||
214 | continue; | ||
215 | spu = spu_alloc_node(node); | ||
216 | if (spu) | ||
217 | break; | ||
218 | } | ||
219 | return spu; | ||
220 | } | ||
337 | 221 | ||
338 | down(&rq->sem); | 222 | static inline struct spu *spu_get(struct spu_context *ctx, u64 flags) |
339 | add_active(rq, spu); | 223 | { |
340 | up(&rq->sem); | 224 | /* Future: spu_get_idle() if possible, |
225 | * otherwise try to preempt an active | ||
226 | * context. | ||
227 | */ | ||
228 | return spu_get_idle(ctx, flags); | ||
341 | } | 229 | } |
342 | 230 | ||
343 | /* Lock order: | 231 | /* The three externally callable interfaces |
344 | * spu_activate() & spu_deactivate() require the | 232 | * for the scheduler begin here. |
345 | * caller to have down_write(&ctx->state_sema). | ||
346 | * | 233 | * |
347 | * The rq->sem is breifly held (inside or outside a | 234 | * spu_activate - bind a context to SPU, waiting as needed. |
348 | * given ctx lock) for list management, but is never | 235 | * spu_deactivate - unbind a context from its SPU. |
349 | * held during save/restore. | 236 | * spu_yield - yield an SPU if others are waiting. |
350 | */ | 237 | */ |
351 | 238 | ||
352 | int spu_activate(struct spu_context *ctx, u64 flags) | 239 | int spu_activate(struct spu_context *ctx, u64 flags) |
353 | { | 240 | { |
354 | struct spu *spu; | 241 | struct spu *spu; |
242 | int ret = 0; | ||
355 | 243 | ||
356 | if (ctx->spu) | 244 | for (;;) { |
357 | return 0; | 245 | if (ctx->spu) |
358 | spu = get_idle_spu(ctx, flags); | 246 | return 0; |
359 | if (!spu) | 247 | spu = spu_get(ctx, flags); |
360 | return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; | 248 | if (spu != NULL) { |
361 | bind_context(spu, ctx); | 249 | if (ctx->spu != NULL) { |
362 | /* | 250 | spu_free(spu); |
363 | * We're likely to wait for interrupts on the same | 251 | spu_prio_wakeup(); |
364 | * CPU that we are now on, so send them here. | 252 | break; |
365 | */ | 253 | } |
366 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); | 254 | bind_context(spu, ctx); |
367 | put_active_spu(spu); | 255 | put_active_spu(spu); |
368 | return 0; | 256 | break; |
257 | } | ||
258 | spu_prio_wait(ctx, flags); | ||
259 | if (signal_pending(current)) { | ||
260 | ret = -ERESTARTSYS; | ||
261 | spu_prio_wakeup(); | ||
262 | break; | ||
263 | } | ||
264 | } | ||
265 | return ret; | ||
369 | } | 266 | } |
370 | 267 | ||
371 | void spu_deactivate(struct spu_context *ctx) | 268 | void spu_deactivate(struct spu_context *ctx) |
@@ -378,8 +275,10 @@ void spu_deactivate(struct spu_context *ctx) | |||
378 | return; | 275 | return; |
379 | needs_idle = get_active_spu(spu); | 276 | needs_idle = get_active_spu(spu); |
380 | unbind_context(spu, ctx); | 277 | unbind_context(spu, ctx); |
381 | if (needs_idle) | 278 | if (needs_idle) { |
382 | put_idle_spu(spu); | 279 | spu_free(spu); |
280 | spu_prio_wakeup(); | ||
281 | } | ||
383 | } | 282 | } |
384 | 283 | ||
385 | void spu_yield(struct spu_context *ctx) | 284 | void spu_yield(struct spu_context *ctx) |
@@ -387,77 +286,60 @@ void spu_yield(struct spu_context *ctx) | |||
387 | struct spu *spu; | 286 | struct spu *spu; |
388 | int need_yield = 0; | 287 | int need_yield = 0; |
389 | 288 | ||
390 | down_write(&ctx->state_sema); | 289 | if (down_write_trylock(&ctx->state_sema)) { |
391 | spu = ctx->spu; | 290 | if ((spu = ctx->spu) != NULL) { |
392 | if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { | 291 | int best = sched_find_first_bit(spu_prio->bitmap); |
393 | pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); | 292 | if (best < MAX_PRIO) { |
394 | spu_deactivate(ctx); | 293 | pr_debug("%s: yielding SPU %d NODE %d\n", |
395 | ctx->state = SPU_STATE_SAVED; | 294 | __FUNCTION__, spu->number, spu->node); |
396 | need_yield = 1; | 295 | spu_deactivate(ctx); |
397 | } else if (spu) { | 296 | ctx->state = SPU_STATE_SAVED; |
398 | spu->prio = MAX_PRIO; | 297 | need_yield = 1; |
298 | } else { | ||
299 | spu->prio = MAX_PRIO; | ||
300 | } | ||
301 | } | ||
302 | up_write(&ctx->state_sema); | ||
399 | } | 303 | } |
400 | up_write(&ctx->state_sema); | ||
401 | if (unlikely(need_yield)) | 304 | if (unlikely(need_yield)) |
402 | yield(); | 305 | yield(); |
403 | } | 306 | } |
404 | 307 | ||
405 | int __init spu_sched_init(void) | 308 | int __init spu_sched_init(void) |
406 | { | 309 | { |
407 | struct spu_runqueue *rq; | ||
408 | struct spu *spu; | ||
409 | int i; | 310 | int i; |
410 | 311 | ||
411 | rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); | 312 | spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); |
412 | if (!rq) { | 313 | if (!spu_prio) { |
413 | printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", | 314 | printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", |
414 | __FUNCTION__); | 315 | __FUNCTION__); |
415 | return 1; | 316 | return 1; |
416 | } | 317 | } |
417 | memset(rq, 0, sizeof(struct spu_runqueue)); | ||
418 | init_MUTEX(&rq->sem); | ||
419 | INIT_LIST_HEAD(&rq->active_list); | ||
420 | INIT_LIST_HEAD(&rq->idle_list); | ||
421 | rq->nr_active = 0; | ||
422 | rq->nr_idle = 0; | ||
423 | rq->nr_switches = 0; | ||
424 | atomic_set(&rq->prio.nr_blocked, 0); | ||
425 | for (i = 0; i < MAX_PRIO; i++) { | 318 | for (i = 0; i < MAX_PRIO; i++) { |
426 | init_waitqueue_head(&rq->prio.waitq[i]); | 319 | init_waitqueue_head(&spu_prio->waitq[i]); |
427 | __clear_bit(i, rq->prio.bitmap); | 320 | __clear_bit(i, spu_prio->bitmap); |
428 | } | 321 | } |
429 | __set_bit(MAX_PRIO, rq->prio.bitmap); | 322 | __set_bit(MAX_PRIO, spu_prio->bitmap); |
430 | for (;;) { | 323 | for (i = 0; i < MAX_NUMNODES; i++) { |
431 | spu = spu_alloc(); | 324 | mutex_init(&spu_prio->active_mutex[i]); |
432 | if (!spu) | 325 | INIT_LIST_HEAD(&spu_prio->active_list[i]); |
433 | break; | ||
434 | pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); | ||
435 | add_idle(rq, spu); | ||
436 | spu->rq = rq; | ||
437 | spu->timestamp = jiffies; | ||
438 | } | ||
439 | if (!rq->nr_idle) { | ||
440 | printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); | ||
441 | kfree(rq); | ||
442 | return 1; | ||
443 | } | 326 | } |
444 | return 0; | 327 | return 0; |
445 | } | 328 | } |
446 | 329 | ||
447 | void __exit spu_sched_exit(void) | 330 | void __exit spu_sched_exit(void) |
448 | { | 331 | { |
449 | struct spu_runqueue *rq = spu_rq(); | 332 | struct spu *spu, *tmp; |
450 | struct spu *spu; | 333 | int node; |
451 | 334 | ||
452 | if (!rq) { | 335 | for (node = 0; node < MAX_NUMNODES; node++) { |
453 | printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); | 336 | mutex_lock(&spu_prio->active_mutex[node]); |
454 | return; | 337 | list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], |
455 | } | 338 | list) { |
456 | while (rq->nr_idle > 0) { | 339 | list_del_init(&spu->list); |
457 | spu = del_idle(rq); | 340 | spu_free(spu); |
458 | if (!spu) | 341 | } |
459 | break; | 342 | mutex_unlock(&spu_prio->active_mutex[node]); |
460 | spu_free(spu); | ||
461 | } | 343 | } |
462 | kfree(rq); | 344 | kfree(spu_prio); |
463 | } | 345 | } |
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index b42b53c40f5d..f6c0a95e8209 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h | |||
@@ -147,6 +147,7 @@ struct spu { | |||
147 | }; | 147 | }; |
148 | 148 | ||
149 | struct spu *spu_alloc(void); | 149 | struct spu *spu_alloc(void); |
150 | struct spu *spu_alloc_node(int node); | ||
150 | void spu_free(struct spu *spu); | 151 | void spu_free(struct spu *spu); |
151 | int spu_irq_class_0_bottom(struct spu *spu); | 152 | int spu_irq_class_0_bottom(struct spu *spu); |
152 | int spu_irq_class_1_bottom(struct spu *spu); | 153 | int spu_irq_class_1_bottom(struct spu *spu); |