diff options
| -rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 51 | ||||
| -rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 444 | ||||
| -rw-r--r-- | include/asm-powerpc/spu.h | 1 |
3 files changed, 198 insertions, 298 deletions
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index f78680346e5f..5931973845b1 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
| @@ -317,7 +317,7 @@ static void spu_free_irqs(struct spu *spu) | |||
| 317 | free_irq(spu->irqs[2], spu); | 317 | free_irq(spu->irqs[2], spu); |
| 318 | } | 318 | } |
| 319 | 319 | ||
| 320 | static LIST_HEAD(spu_list); | 320 | static struct list_head spu_list[MAX_NUMNODES]; |
| 321 | static DEFINE_MUTEX(spu_mutex); | 321 | static DEFINE_MUTEX(spu_mutex); |
| 322 | 322 | ||
| 323 | static void spu_init_channels(struct spu *spu) | 323 | static void spu_init_channels(struct spu *spu) |
| @@ -354,32 +354,42 @@ static void spu_init_channels(struct spu *spu) | |||
| 354 | } | 354 | } |
| 355 | } | 355 | } |
| 356 | 356 | ||
| 357 | struct spu *spu_alloc(void) | 357 | struct spu *spu_alloc_node(int node) |
| 358 | { | 358 | { |
| 359 | struct spu *spu; | 359 | struct spu *spu = NULL; |
| 360 | 360 | ||
| 361 | mutex_lock(&spu_mutex); | 361 | mutex_lock(&spu_mutex); |
| 362 | if (!list_empty(&spu_list)) { | 362 | if (!list_empty(&spu_list[node])) { |
| 363 | spu = list_entry(spu_list.next, struct spu, list); | 363 | spu = list_entry(spu_list[node].next, struct spu, list); |
| 364 | list_del_init(&spu->list); | 364 | list_del_init(&spu->list); |
| 365 | pr_debug("Got SPU %x %d\n", spu->isrc, spu->number); | 365 | pr_debug("Got SPU %x %d %d\n", |
| 366 | } else { | 366 | spu->isrc, spu->number, spu->node); |
| 367 | pr_debug("No SPU left\n"); | 367 | spu_init_channels(spu); |
| 368 | spu = NULL; | ||
| 369 | } | 368 | } |
| 370 | mutex_unlock(&spu_mutex); | 369 | mutex_unlock(&spu_mutex); |
| 371 | 370 | ||
| 372 | if (spu) | 371 | return spu; |
| 373 | spu_init_channels(spu); | 372 | } |
| 373 | EXPORT_SYMBOL_GPL(spu_alloc_node); | ||
| 374 | |||
| 375 | struct spu *spu_alloc(void) | ||
| 376 | { | ||
| 377 | struct spu *spu = NULL; | ||
| 378 | int node; | ||
| 379 | |||
| 380 | for (node = 0; node < MAX_NUMNODES; node++) { | ||
| 381 | spu = spu_alloc_node(node); | ||
| 382 | if (spu) | ||
| 383 | break; | ||
| 384 | } | ||
| 374 | 385 | ||
| 375 | return spu; | 386 | return spu; |
| 376 | } | 387 | } |
| 377 | EXPORT_SYMBOL_GPL(spu_alloc); | ||
| 378 | 388 | ||
| 379 | void spu_free(struct spu *spu) | 389 | void spu_free(struct spu *spu) |
| 380 | { | 390 | { |
| 381 | mutex_lock(&spu_mutex); | 391 | mutex_lock(&spu_mutex); |
| 382 | list_add_tail(&spu->list, &spu_list); | 392 | list_add_tail(&spu->list, &spu_list[spu->node]); |
| 383 | mutex_unlock(&spu_mutex); | 393 | mutex_unlock(&spu_mutex); |
| 384 | } | 394 | } |
| 385 | EXPORT_SYMBOL_GPL(spu_free); | 395 | EXPORT_SYMBOL_GPL(spu_free); |
| @@ -712,7 +722,7 @@ static int __init create_spu(struct device_node *spe) | |||
| 712 | if (ret) | 722 | if (ret) |
| 713 | goto out_free_irqs; | 723 | goto out_free_irqs; |
| 714 | 724 | ||
| 715 | list_add(&spu->list, &spu_list); | 725 | list_add(&spu->list, &spu_list[spu->node]); |
| 716 | mutex_unlock(&spu_mutex); | 726 | mutex_unlock(&spu_mutex); |
| 717 | 727 | ||
| 718 | pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", | 728 | pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", |
| @@ -745,9 +755,13 @@ static void destroy_spu(struct spu *spu) | |||
| 745 | static void cleanup_spu_base(void) | 755 | static void cleanup_spu_base(void) |
| 746 | { | 756 | { |
| 747 | struct spu *spu, *tmp; | 757 | struct spu *spu, *tmp; |
| 758 | int node; | ||
| 759 | |||
| 748 | mutex_lock(&spu_mutex); | 760 | mutex_lock(&spu_mutex); |
| 749 | list_for_each_entry_safe(spu, tmp, &spu_list, list) | 761 | for (node = 0; node < MAX_NUMNODES; node++) { |
| 750 | destroy_spu(spu); | 762 | list_for_each_entry_safe(spu, tmp, &spu_list[node], list) |
| 763 | destroy_spu(spu); | ||
| 764 | } | ||
| 751 | mutex_unlock(&spu_mutex); | 765 | mutex_unlock(&spu_mutex); |
| 752 | sysdev_class_unregister(&spu_sysdev_class); | 766 | sysdev_class_unregister(&spu_sysdev_class); |
| 753 | } | 767 | } |
| @@ -756,13 +770,16 @@ module_exit(cleanup_spu_base); | |||
| 756 | static int __init init_spu_base(void) | 770 | static int __init init_spu_base(void) |
| 757 | { | 771 | { |
| 758 | struct device_node *node; | 772 | struct device_node *node; |
| 759 | int ret; | 773 | int i, ret; |
| 760 | 774 | ||
| 761 | /* create sysdev class for spus */ | 775 | /* create sysdev class for spus */ |
| 762 | ret = sysdev_class_register(&spu_sysdev_class); | 776 | ret = sysdev_class_register(&spu_sysdev_class); |
| 763 | if (ret) | 777 | if (ret) |
| 764 | return ret; | 778 | return ret; |
| 765 | 779 | ||
| 780 | for (i = 0; i < MAX_NUMNODES; i++) | ||
| 781 | INIT_LIST_HEAD(&spu_list[i]); | ||
| 782 | |||
| 766 | ret = -ENODEV; | 783 | ret = -ENODEV; |
| 767 | for (node = of_find_node_by_type(NULL, "spe"); | 784 | for (node = of_find_node_by_type(NULL, "spe"); |
| 768 | node; node = of_find_node_by_type(node, "spe")) { | 785 | node; node = of_find_node_by_type(node, "spe")) { |
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 1350294484b6..bd4e2c3d5d08 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c | |||
| @@ -3,11 +3,7 @@ | |||
| 3 | * Copyright (C) IBM 2005 | 3 | * Copyright (C) IBM 2005 |
| 4 | * Author: Mark Nutter <mnutter@us.ibm.com> | 4 | * Author: Mark Nutter <mnutter@us.ibm.com> |
| 5 | * | 5 | * |
| 6 | * SPU scheduler, based on Linux thread priority. For now use | 6 | * 2006-03-31 NUMA domains added. |
| 7 | * a simple "cooperative" yield model with no preemption. SPU | ||
| 8 | * scheduling will eventually be preemptive: When a thread with | ||
| 9 | * a higher static priority gets ready to run, then an active SPU | ||
| 10 | * context will be preempted and returned to the waitq. | ||
| 11 | * | 7 | * |
| 12 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
| 13 | * it under the terms of the GNU General Public License as published by | 9 | * it under the terms of the GNU General Public License as published by |
| @@ -37,6 +33,8 @@ | |||
| 37 | #include <linux/smp_lock.h> | 33 | #include <linux/smp_lock.h> |
| 38 | #include <linux/stddef.h> | 34 | #include <linux/stddef.h> |
| 39 | #include <linux/unistd.h> | 35 | #include <linux/unistd.h> |
| 36 | #include <linux/numa.h> | ||
| 37 | #include <linux/mutex.h> | ||
| 40 | 38 | ||
| 41 | #include <asm/io.h> | 39 | #include <asm/io.h> |
| 42 | #include <asm/mmu_context.h> | 40 | #include <asm/mmu_context.h> |
| @@ -49,125 +47,38 @@ | |||
| 49 | 47 | ||
| 50 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) | 48 | #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) |
| 51 | struct spu_prio_array { | 49 | struct spu_prio_array { |
| 52 | atomic_t nr_blocked; | ||
| 53 | unsigned long bitmap[SPU_BITMAP_SIZE]; | 50 | unsigned long bitmap[SPU_BITMAP_SIZE]; |
| 54 | wait_queue_head_t waitq[MAX_PRIO]; | 51 | wait_queue_head_t waitq[MAX_PRIO]; |
| 52 | struct list_head active_list[MAX_NUMNODES]; | ||
| 53 | struct mutex active_mutex[MAX_NUMNODES]; | ||
| 55 | }; | 54 | }; |
| 56 | 55 | ||
| 57 | /* spu_runqueue - This is the main runqueue data structure for SPUs. */ | 56 | static struct spu_prio_array *spu_prio; |
| 58 | struct spu_runqueue { | ||
| 59 | struct semaphore sem; | ||
| 60 | unsigned long nr_active; | ||
| 61 | unsigned long nr_idle; | ||
| 62 | unsigned long nr_switches; | ||
| 63 | struct list_head active_list; | ||
| 64 | struct list_head idle_list; | ||
| 65 | struct spu_prio_array prio; | ||
| 66 | }; | ||
| 67 | |||
| 68 | static struct spu_runqueue *spu_runqueues = NULL; | ||
| 69 | |||
| 70 | static inline struct spu_runqueue *spu_rq(void) | ||
| 71 | { | ||
| 72 | /* Future: make this a per-NODE array, | ||
| 73 | * and use cpu_to_node(smp_processor_id()) | ||
| 74 | */ | ||
| 75 | return spu_runqueues; | ||
| 76 | } | ||
| 77 | |||
| 78 | static inline struct spu *del_idle(struct spu_runqueue *rq) | ||
| 79 | { | ||
| 80 | struct spu *spu; | ||
| 81 | |||
| 82 | BUG_ON(rq->nr_idle <= 0); | ||
| 83 | BUG_ON(list_empty(&rq->idle_list)); | ||
| 84 | /* Future: Move SPU out of low-power SRI state. */ | ||
| 85 | spu = list_entry(rq->idle_list.next, struct spu, sched_list); | ||
| 86 | list_del_init(&spu->sched_list); | ||
| 87 | rq->nr_idle--; | ||
| 88 | return spu; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline void del_active(struct spu_runqueue *rq, struct spu *spu) | ||
| 92 | { | ||
| 93 | BUG_ON(rq->nr_active <= 0); | ||
| 94 | BUG_ON(list_empty(&rq->active_list)); | ||
| 95 | list_del_init(&spu->sched_list); | ||
| 96 | rq->nr_active--; | ||
| 97 | } | ||
| 98 | |||
| 99 | static inline void add_idle(struct spu_runqueue *rq, struct spu *spu) | ||
| 100 | { | ||
| 101 | /* Future: Put SPU into low-power SRI state. */ | ||
| 102 | list_add_tail(&spu->sched_list, &rq->idle_list); | ||
| 103 | rq->nr_idle++; | ||
| 104 | } | ||
| 105 | |||
| 106 | static inline void add_active(struct spu_runqueue *rq, struct spu *spu) | ||
| 107 | { | ||
| 108 | rq->nr_active++; | ||
| 109 | rq->nr_switches++; | ||
| 110 | list_add_tail(&spu->sched_list, &rq->active_list); | ||
| 111 | } | ||
| 112 | |||
| 113 | static void prio_wakeup(struct spu_runqueue *rq) | ||
| 114 | { | ||
| 115 | if (atomic_read(&rq->prio.nr_blocked) && rq->nr_idle) { | ||
| 116 | int best = sched_find_first_bit(rq->prio.bitmap); | ||
| 117 | if (best < MAX_PRIO) { | ||
| 118 | wait_queue_head_t *wq = &rq->prio.waitq[best]; | ||
| 119 | wake_up_interruptible_nr(wq, 1); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | static void prio_wait(struct spu_runqueue *rq, struct spu_context *ctx, | ||
| 125 | u64 flags) | ||
| 126 | { | ||
| 127 | int prio = current->prio; | ||
| 128 | wait_queue_head_t *wq = &rq->prio.waitq[prio]; | ||
| 129 | DEFINE_WAIT(wait); | ||
| 130 | |||
| 131 | __set_bit(prio, rq->prio.bitmap); | ||
| 132 | atomic_inc(&rq->prio.nr_blocked); | ||
| 133 | prepare_to_wait_exclusive(wq, &wait, TASK_INTERRUPTIBLE); | ||
| 134 | if (!signal_pending(current)) { | ||
| 135 | up(&rq->sem); | ||
| 136 | up_write(&ctx->state_sema); | ||
| 137 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, | ||
| 138 | current->pid, current->prio); | ||
| 139 | schedule(); | ||
| 140 | down_write(&ctx->state_sema); | ||
| 141 | down(&rq->sem); | ||
| 142 | } | ||
| 143 | finish_wait(wq, &wait); | ||
| 144 | atomic_dec(&rq->prio.nr_blocked); | ||
| 145 | if (!waitqueue_active(wq)) | ||
| 146 | __clear_bit(prio, rq->prio.bitmap); | ||
| 147 | } | ||
| 148 | 57 | ||
| 149 | static inline int is_best_prio(struct spu_runqueue *rq) | 58 | static inline int node_allowed(int node) |
| 150 | { | 59 | { |
| 151 | int best_prio; | 60 | cpumask_t mask; |
| 152 | 61 | ||
| 153 | best_prio = sched_find_first_bit(rq->prio.bitmap); | 62 | if (!nr_cpus_node(node)) |
| 154 | return (current->prio < best_prio) ? 1 : 0; | 63 | return 0; |
| 64 | mask = node_to_cpumask(node); | ||
| 65 | if (!cpus_intersects(mask, current->cpus_allowed)) | ||
| 66 | return 0; | ||
| 67 | return 1; | ||
| 155 | } | 68 | } |
| 156 | 69 | ||
| 157 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) | 70 | static inline void mm_needs_global_tlbie(struct mm_struct *mm) |
| 158 | { | 71 | { |
| 72 | int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1; | ||
| 73 | |||
| 159 | /* Global TLBIE broadcast required with SPEs. */ | 74 | /* Global TLBIE broadcast required with SPEs. */ |
| 160 | #if (NR_CPUS > 1) | 75 | __cpus_setall(&mm->cpu_vm_mask, nr); |
| 161 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS); | ||
| 162 | #else | ||
| 163 | __cpus_setall(&mm->cpu_vm_mask, NR_CPUS+1); /* is this ok? */ | ||
| 164 | #endif | ||
| 165 | } | 76 | } |
| 166 | 77 | ||
| 167 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) | 78 | static inline void bind_context(struct spu *spu, struct spu_context *ctx) |
| 168 | { | 79 | { |
| 169 | pr_debug("%s: pid=%d SPU=%d\n", __FUNCTION__, current->pid, | 80 | pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid, |
| 170 | spu->number); | 81 | spu->number, spu->node); |
| 171 | spu->ctx = ctx; | 82 | spu->ctx = ctx; |
| 172 | spu->flags = 0; | 83 | spu->flags = 0; |
| 173 | ctx->flags = 0; | 84 | ctx->flags = 0; |
| @@ -185,12 +96,13 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx) | |||
| 185 | spu_unmap_mappings(ctx); | 96 | spu_unmap_mappings(ctx); |
| 186 | spu_restore(&ctx->csa, spu); | 97 | spu_restore(&ctx->csa, spu); |
| 187 | spu->timestamp = jiffies; | 98 | spu->timestamp = jiffies; |
| 99 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); | ||
| 188 | } | 100 | } |
| 189 | 101 | ||
| 190 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) | 102 | static inline void unbind_context(struct spu *spu, struct spu_context *ctx) |
| 191 | { | 103 | { |
| 192 | pr_debug("%s: unbind pid=%d SPU=%d\n", __FUNCTION__, | 104 | pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__, |
| 193 | spu->pid, spu->number); | 105 | spu->pid, spu->number, spu->node); |
| 194 | spu_unmap_mappings(ctx); | 106 | spu_unmap_mappings(ctx); |
| 195 | spu_save(&ctx->csa, spu); | 107 | spu_save(&ctx->csa, spu); |
| 196 | spu->timestamp = jiffies; | 108 | spu->timestamp = jiffies; |
| @@ -209,163 +121,148 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx) | |||
| 209 | spu->ctx = NULL; | 121 | spu->ctx = NULL; |
| 210 | } | 122 | } |
| 211 | 123 | ||
| 212 | static void spu_reaper(void *data) | 124 | static inline void spu_add_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
| 125 | int prio) | ||
| 213 | { | 126 | { |
| 214 | struct spu_context *ctx = data; | 127 | prepare_to_wait_exclusive(wq, wait, TASK_INTERRUPTIBLE); |
| 215 | struct spu *spu; | 128 | set_bit(prio, spu_prio->bitmap); |
| 216 | |||
| 217 | down_write(&ctx->state_sema); | ||
| 218 | spu = ctx->spu; | ||
| 219 | if (spu && test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { | ||
| 220 | if (atomic_read(&spu->rq->prio.nr_blocked)) { | ||
| 221 | pr_debug("%s: spu=%d\n", __func__, spu->number); | ||
| 222 | ctx->ops->runcntl_stop(ctx); | ||
| 223 | spu_deactivate(ctx); | ||
| 224 | wake_up_all(&ctx->stop_wq); | ||
| 225 | } else { | ||
| 226 | clear_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); | ||
| 227 | } | ||
| 228 | } | ||
| 229 | up_write(&ctx->state_sema); | ||
| 230 | put_spu_context(ctx); | ||
| 231 | } | 129 | } |
| 232 | 130 | ||
| 233 | static void schedule_spu_reaper(struct spu_runqueue *rq, struct spu *spu) | 131 | static inline void spu_del_wq(wait_queue_head_t * wq, wait_queue_t * wait, |
| 132 | int prio) | ||
| 234 | { | 133 | { |
| 235 | struct spu_context *ctx = get_spu_context(spu->ctx); | 134 | u64 flags; |
| 236 | unsigned long now = jiffies; | ||
| 237 | unsigned long expire = spu->timestamp + SPU_MIN_TIMESLICE; | ||
| 238 | |||
| 239 | set_bit(SPU_CONTEXT_PREEMPT, &ctx->flags); | ||
| 240 | INIT_WORK(&ctx->reap_work, spu_reaper, ctx); | ||
| 241 | if (time_after(now, expire)) | ||
| 242 | schedule_work(&ctx->reap_work); | ||
| 243 | else | ||
| 244 | schedule_delayed_work(&ctx->reap_work, expire - now); | ||
| 245 | } | ||
| 246 | 135 | ||
| 247 | static void check_preempt_active(struct spu_runqueue *rq) | 136 | __set_current_state(TASK_RUNNING); |
| 248 | { | 137 | |
| 249 | struct list_head *p; | 138 | spin_lock_irqsave(&wq->lock, flags); |
| 250 | struct spu *worst = NULL; | 139 | |
| 251 | 140 | remove_wait_queue_locked(wq, wait); | |
| 252 | list_for_each(p, &rq->active_list) { | 141 | if (list_empty(&wq->task_list)) |
| 253 | struct spu *spu = list_entry(p, struct spu, sched_list); | 142 | clear_bit(prio, spu_prio->bitmap); |
| 254 | struct spu_context *ctx = spu->ctx; | 143 | |
| 255 | if (!test_bit(SPU_CONTEXT_PREEMPT, &ctx->flags)) { | 144 | spin_unlock_irqrestore(&wq->lock, flags); |
| 256 | if (!worst || (spu->prio > worst->prio)) { | ||
| 257 | worst = spu; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | } | ||
| 261 | if (worst && (current->prio < worst->prio)) | ||
| 262 | schedule_spu_reaper(rq, worst); | ||
| 263 | } | 145 | } |
| 264 | 146 | ||
| 265 | static struct spu *get_idle_spu(struct spu_context *ctx, u64 flags) | 147 | static void spu_prio_wait(struct spu_context *ctx, u64 flags) |
| 266 | { | 148 | { |
| 267 | struct spu_runqueue *rq; | 149 | int prio = current->prio; |
| 268 | struct spu *spu = NULL; | 150 | wait_queue_head_t *wq = &spu_prio->waitq[prio]; |
| 151 | DEFINE_WAIT(wait); | ||
| 269 | 152 | ||
| 270 | rq = spu_rq(); | 153 | if (ctx->spu) |
| 271 | down(&rq->sem); | 154 | return; |
| 272 | for (;;) { | 155 | |
| 273 | if (rq->nr_idle > 0) { | 156 | spu_add_wq(wq, &wait, prio); |
| 274 | if (is_best_prio(rq)) { | 157 | |
| 275 | /* Fall through. */ | 158 | if (!signal_pending(current)) { |
| 276 | spu = del_idle(rq); | 159 | up_write(&ctx->state_sema); |
| 277 | break; | 160 | pr_debug("%s: pid=%d prio=%d\n", __FUNCTION__, |
| 278 | } else { | 161 | current->pid, current->prio); |
| 279 | prio_wakeup(rq); | 162 | schedule(); |
| 280 | up(&rq->sem); | 163 | down_write(&ctx->state_sema); |
| 281 | yield(); | ||
| 282 | if (signal_pending(current)) { | ||
| 283 | return NULL; | ||
| 284 | } | ||
| 285 | rq = spu_rq(); | ||
| 286 | down(&rq->sem); | ||
| 287 | continue; | ||
| 288 | } | ||
| 289 | } else { | ||
| 290 | check_preempt_active(rq); | ||
| 291 | prio_wait(rq, ctx, flags); | ||
| 292 | if (signal_pending(current)) { | ||
| 293 | prio_wakeup(rq); | ||
| 294 | spu = NULL; | ||
| 295 | break; | ||
| 296 | } | ||
| 297 | continue; | ||
| 298 | } | ||
| 299 | } | 164 | } |
| 300 | up(&rq->sem); | 165 | |
| 301 | return spu; | 166 | spu_del_wq(wq, &wait, prio); |
| 302 | } | 167 | } |
| 303 | 168 | ||
| 304 | static void put_idle_spu(struct spu *spu) | 169 | static void spu_prio_wakeup(void) |
| 305 | { | 170 | { |
| 306 | struct spu_runqueue *rq = spu->rq; | 171 | int best = sched_find_first_bit(spu_prio->bitmap); |
| 307 | 172 | if (best < MAX_PRIO) { | |
| 308 | down(&rq->sem); | 173 | wait_queue_head_t *wq = &spu_prio->waitq[best]; |
| 309 | add_idle(rq, spu); | 174 | wake_up_interruptible_nr(wq, 1); |
| 310 | prio_wakeup(rq); | 175 | } |
| 311 | up(&rq->sem); | ||
| 312 | } | 176 | } |
| 313 | 177 | ||
| 314 | static int get_active_spu(struct spu *spu) | 178 | static int get_active_spu(struct spu *spu) |
| 315 | { | 179 | { |
| 316 | struct spu_runqueue *rq = spu->rq; | 180 | int node = spu->node; |
| 317 | struct list_head *p; | ||
| 318 | struct spu *tmp; | 181 | struct spu *tmp; |
| 319 | int rc = 0; | 182 | int rc = 0; |
| 320 | 183 | ||
| 321 | down(&rq->sem); | 184 | mutex_lock(&spu_prio->active_mutex[node]); |
| 322 | list_for_each(p, &rq->active_list) { | 185 | list_for_each_entry(tmp, &spu_prio->active_list[node], list) { |
| 323 | tmp = list_entry(p, struct spu, sched_list); | ||
| 324 | if (tmp == spu) { | 186 | if (tmp == spu) { |
| 325 | del_active(rq, spu); | 187 | list_del_init(&spu->list); |
| 326 | rc = 1; | 188 | rc = 1; |
| 327 | break; | 189 | break; |
| 328 | } | 190 | } |
| 329 | } | 191 | } |
| 330 | up(&rq->sem); | 192 | mutex_unlock(&spu_prio->active_mutex[node]); |
| 331 | return rc; | 193 | return rc; |
| 332 | } | 194 | } |
| 333 | 195 | ||
| 334 | static void put_active_spu(struct spu *spu) | 196 | static void put_active_spu(struct spu *spu) |
| 335 | { | 197 | { |
| 336 | struct spu_runqueue *rq = spu->rq; | 198 | int node = spu->node; |
| 199 | |||
| 200 | mutex_lock(&spu_prio->active_mutex[node]); | ||
| 201 | list_add_tail(&spu->list, &spu_prio->active_list[node]); | ||
| 202 | mutex_unlock(&spu_prio->active_mutex[node]); | ||
| 203 | } | ||
| 204 | |||
| 205 | static struct spu *spu_get_idle(struct spu_context *ctx, u64 flags) | ||
| 206 | { | ||
| 207 | struct spu *spu = NULL; | ||
| 208 | int node = cpu_to_node(raw_smp_processor_id()); | ||
| 209 | int n; | ||
| 210 | |||
| 211 | for (n = 0; n < MAX_NUMNODES; n++, node++) { | ||
| 212 | node = (node < MAX_NUMNODES) ? node : 0; | ||
| 213 | if (!node_allowed(node)) | ||
| 214 | continue; | ||
| 215 | spu = spu_alloc_node(node); | ||
| 216 | if (spu) | ||
| 217 | break; | ||
| 218 | } | ||
| 219 | return spu; | ||
| 220 | } | ||
| 337 | 221 | ||
| 338 | down(&rq->sem); | 222 | static inline struct spu *spu_get(struct spu_context *ctx, u64 flags) |
| 339 | add_active(rq, spu); | 223 | { |
| 340 | up(&rq->sem); | 224 | /* Future: spu_get_idle() if possible, |
| 225 | * otherwise try to preempt an active | ||
| 226 | * context. | ||
| 227 | */ | ||
| 228 | return spu_get_idle(ctx, flags); | ||
| 341 | } | 229 | } |
| 342 | 230 | ||
| 343 | /* Lock order: | 231 | /* The three externally callable interfaces |
| 344 | * spu_activate() & spu_deactivate() require the | 232 | * for the scheduler begin here. |
| 345 | * caller to have down_write(&ctx->state_sema). | ||
| 346 | * | 233 | * |
| 347 | * The rq->sem is breifly held (inside or outside a | 234 | * spu_activate - bind a context to SPU, waiting as needed. |
| 348 | * given ctx lock) for list management, but is never | 235 | * spu_deactivate - unbind a context from its SPU. |
| 349 | * held during save/restore. | 236 | * spu_yield - yield an SPU if others are waiting. |
| 350 | */ | 237 | */ |
| 351 | 238 | ||
| 352 | int spu_activate(struct spu_context *ctx, u64 flags) | 239 | int spu_activate(struct spu_context *ctx, u64 flags) |
| 353 | { | 240 | { |
| 354 | struct spu *spu; | 241 | struct spu *spu; |
| 242 | int ret = 0; | ||
| 355 | 243 | ||
| 356 | if (ctx->spu) | 244 | for (;;) { |
| 357 | return 0; | 245 | if (ctx->spu) |
| 358 | spu = get_idle_spu(ctx, flags); | 246 | return 0; |
| 359 | if (!spu) | 247 | spu = spu_get(ctx, flags); |
| 360 | return (signal_pending(current)) ? -ERESTARTSYS : -EAGAIN; | 248 | if (spu != NULL) { |
| 361 | bind_context(spu, ctx); | 249 | if (ctx->spu != NULL) { |
| 362 | /* | 250 | spu_free(spu); |
| 363 | * We're likely to wait for interrupts on the same | 251 | spu_prio_wakeup(); |
| 364 | * CPU that we are now on, so send them here. | 252 | break; |
| 365 | */ | 253 | } |
| 366 | spu_cpu_affinity_set(spu, raw_smp_processor_id()); | 254 | bind_context(spu, ctx); |
| 367 | put_active_spu(spu); | 255 | put_active_spu(spu); |
| 368 | return 0; | 256 | break; |
| 257 | } | ||
| 258 | spu_prio_wait(ctx, flags); | ||
| 259 | if (signal_pending(current)) { | ||
| 260 | ret = -ERESTARTSYS; | ||
| 261 | spu_prio_wakeup(); | ||
| 262 | break; | ||
| 263 | } | ||
| 264 | } | ||
| 265 | return ret; | ||
| 369 | } | 266 | } |
| 370 | 267 | ||
| 371 | void spu_deactivate(struct spu_context *ctx) | 268 | void spu_deactivate(struct spu_context *ctx) |
| @@ -378,8 +275,10 @@ void spu_deactivate(struct spu_context *ctx) | |||
| 378 | return; | 275 | return; |
| 379 | needs_idle = get_active_spu(spu); | 276 | needs_idle = get_active_spu(spu); |
| 380 | unbind_context(spu, ctx); | 277 | unbind_context(spu, ctx); |
| 381 | if (needs_idle) | 278 | if (needs_idle) { |
| 382 | put_idle_spu(spu); | 279 | spu_free(spu); |
| 280 | spu_prio_wakeup(); | ||
| 281 | } | ||
| 383 | } | 282 | } |
| 384 | 283 | ||
| 385 | void spu_yield(struct spu_context *ctx) | 284 | void spu_yield(struct spu_context *ctx) |
| @@ -387,77 +286,60 @@ void spu_yield(struct spu_context *ctx) | |||
| 387 | struct spu *spu; | 286 | struct spu *spu; |
| 388 | int need_yield = 0; | 287 | int need_yield = 0; |
| 389 | 288 | ||
| 390 | down_write(&ctx->state_sema); | 289 | if (down_write_trylock(&ctx->state_sema)) { |
| 391 | spu = ctx->spu; | 290 | if ((spu = ctx->spu) != NULL) { |
| 392 | if (spu && (sched_find_first_bit(spu->rq->prio.bitmap) < MAX_PRIO)) { | 291 | int best = sched_find_first_bit(spu_prio->bitmap); |
| 393 | pr_debug("%s: yielding SPU %d\n", __FUNCTION__, spu->number); | 292 | if (best < MAX_PRIO) { |
| 394 | spu_deactivate(ctx); | 293 | pr_debug("%s: yielding SPU %d NODE %d\n", |
| 395 | ctx->state = SPU_STATE_SAVED; | 294 | __FUNCTION__, spu->number, spu->node); |
| 396 | need_yield = 1; | 295 | spu_deactivate(ctx); |
| 397 | } else if (spu) { | 296 | ctx->state = SPU_STATE_SAVED; |
| 398 | spu->prio = MAX_PRIO; | 297 | need_yield = 1; |
| 298 | } else { | ||
| 299 | spu->prio = MAX_PRIO; | ||
| 300 | } | ||
| 301 | } | ||
| 302 | up_write(&ctx->state_sema); | ||
| 399 | } | 303 | } |
| 400 | up_write(&ctx->state_sema); | ||
| 401 | if (unlikely(need_yield)) | 304 | if (unlikely(need_yield)) |
| 402 | yield(); | 305 | yield(); |
| 403 | } | 306 | } |
| 404 | 307 | ||
| 405 | int __init spu_sched_init(void) | 308 | int __init spu_sched_init(void) |
| 406 | { | 309 | { |
| 407 | struct spu_runqueue *rq; | ||
| 408 | struct spu *spu; | ||
| 409 | int i; | 310 | int i; |
| 410 | 311 | ||
| 411 | rq = spu_runqueues = kmalloc(sizeof(struct spu_runqueue), GFP_KERNEL); | 312 | spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); |
| 412 | if (!rq) { | 313 | if (!spu_prio) { |
| 413 | printk(KERN_WARNING "%s: Unable to allocate runqueues.\n", | 314 | printk(KERN_WARNING "%s: Unable to allocate priority queue.\n", |
| 414 | __FUNCTION__); | 315 | __FUNCTION__); |
| 415 | return 1; | 316 | return 1; |
| 416 | } | 317 | } |
| 417 | memset(rq, 0, sizeof(struct spu_runqueue)); | ||
| 418 | init_MUTEX(&rq->sem); | ||
| 419 | INIT_LIST_HEAD(&rq->active_list); | ||
| 420 | INIT_LIST_HEAD(&rq->idle_list); | ||
| 421 | rq->nr_active = 0; | ||
| 422 | rq->nr_idle = 0; | ||
| 423 | rq->nr_switches = 0; | ||
| 424 | atomic_set(&rq->prio.nr_blocked, 0); | ||
| 425 | for (i = 0; i < MAX_PRIO; i++) { | 318 | for (i = 0; i < MAX_PRIO; i++) { |
| 426 | init_waitqueue_head(&rq->prio.waitq[i]); | 319 | init_waitqueue_head(&spu_prio->waitq[i]); |
| 427 | __clear_bit(i, rq->prio.bitmap); | 320 | __clear_bit(i, spu_prio->bitmap); |
| 428 | } | 321 | } |
| 429 | __set_bit(MAX_PRIO, rq->prio.bitmap); | 322 | __set_bit(MAX_PRIO, spu_prio->bitmap); |
| 430 | for (;;) { | 323 | for (i = 0; i < MAX_NUMNODES; i++) { |
| 431 | spu = spu_alloc(); | 324 | mutex_init(&spu_prio->active_mutex[i]); |
| 432 | if (!spu) | 325 | INIT_LIST_HEAD(&spu_prio->active_list[i]); |
| 433 | break; | ||
| 434 | pr_debug("%s: adding SPU[%d]\n", __FUNCTION__, spu->number); | ||
| 435 | add_idle(rq, spu); | ||
| 436 | spu->rq = rq; | ||
| 437 | spu->timestamp = jiffies; | ||
| 438 | } | ||
| 439 | if (!rq->nr_idle) { | ||
| 440 | printk(KERN_WARNING "%s: No available SPUs.\n", __FUNCTION__); | ||
| 441 | kfree(rq); | ||
| 442 | return 1; | ||
| 443 | } | 326 | } |
| 444 | return 0; | 327 | return 0; |
| 445 | } | 328 | } |
| 446 | 329 | ||
| 447 | void __exit spu_sched_exit(void) | 330 | void __exit spu_sched_exit(void) |
| 448 | { | 331 | { |
| 449 | struct spu_runqueue *rq = spu_rq(); | 332 | struct spu *spu, *tmp; |
| 450 | struct spu *spu; | 333 | int node; |
| 451 | 334 | ||
| 452 | if (!rq) { | 335 | for (node = 0; node < MAX_NUMNODES; node++) { |
| 453 | printk(KERN_WARNING "%s: no runqueues!\n", __FUNCTION__); | 336 | mutex_lock(&spu_prio->active_mutex[node]); |
| 454 | return; | 337 | list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], |
| 455 | } | 338 | list) { |
| 456 | while (rq->nr_idle > 0) { | 339 | list_del_init(&spu->list); |
| 457 | spu = del_idle(rq); | 340 | spu_free(spu); |
| 458 | if (!spu) | 341 | } |
| 459 | break; | 342 | mutex_unlock(&spu_prio->active_mutex[node]); |
| 460 | spu_free(spu); | ||
| 461 | } | 343 | } |
| 462 | kfree(rq); | 344 | kfree(spu_prio); |
| 463 | } | 345 | } |
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index b42b53c40f5d..f6c0a95e8209 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h | |||
| @@ -147,6 +147,7 @@ struct spu { | |||
| 147 | }; | 147 | }; |
| 148 | 148 | ||
| 149 | struct spu *spu_alloc(void); | 149 | struct spu *spu_alloc(void); |
| 150 | struct spu *spu_alloc_node(int node); | ||
| 150 | void spu_free(struct spu *spu); | 151 | void spu_free(struct spu *spu); |
| 151 | int spu_irq_class_0_bottom(struct spu *spu); | 152 | int spu_irq_class_0_bottom(struct spu *spu); |
| 152 | int spu_irq_class_1_bottom(struct spu *spu); | 153 | int spu_irq_class_1_bottom(struct spu *spu); |
