diff options
author | Arnd Bergmann <arnd@arndb.de> | 2005-11-15 15:53:52 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-01-08 22:49:30 -0500 |
commit | 8b3d6663c6217e4f50cc3720935a96da9b984117 (patch) | |
tree | 5295c29787ac66c26ddf715868fda7fcd3ad5f97 /arch/powerpc/platforms/cell/spu_base.c | |
parent | 05b841174c289ca62a6b42d883b8791d9ac3a4bd (diff) |
[PATCH] spufs: cooperative scheduler support
This adds a scheduler for SPUs to make it possible to use
more logical SPUs than physical ones are present in the
system.
Currently, there is no support for preempting a running
SPU thread, they have to leave the SPU by either triggering
an event on the SPU that causes it to return to the
owning thread or by sending a signal to it.
This patch also adds operations that enable accessing an SPU
in either runnable or saved state. We use an RW semaphore
to protect the state of the SPU from changing underneath
us, while we are holding it readable. In order to change
the state, it is acquired writeable and a context save
or restore is executed before downgrading the semaphore
to read-only.
From: Mark Nutter <mnutter@us.ibm.com>,
Uli Weigand <Ulrich.Weigand@de.ibm.com>
Signed-off-by: Arnd Bergmann <arndb@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/platforms/cell/spu_base.c')
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 138 |
1 files changed, 76 insertions, 62 deletions
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 44492d87cdf7..408c455cff08 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c | |||
@@ -69,51 +69,49 @@ static void spu_restart_dma(struct spu *spu) | |||
69 | 69 | ||
70 | static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) | 70 | static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) |
71 | { | 71 | { |
72 | struct spu_priv2 __iomem *priv2; | 72 | struct spu_priv2 __iomem *priv2 = spu->priv2; |
73 | struct mm_struct *mm; | 73 | struct mm_struct *mm = spu->mm; |
74 | u64 esid, vsid; | ||
74 | 75 | ||
75 | pr_debug("%s\n", __FUNCTION__); | 76 | pr_debug("%s\n", __FUNCTION__); |
76 | 77 | ||
77 | if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE_nr, &spu->flags)) { | 78 | if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE_nr, &spu->flags)) { |
79 | /* SLBs are pre-loaded for context switch, so | ||
80 | * we should never get here! | ||
81 | */ | ||
78 | printk("%s: invalid access during switch!\n", __func__); | 82 | printk("%s: invalid access during switch!\n", __func__); |
79 | return 1; | 83 | return 1; |
80 | } | 84 | } |
81 | 85 | if (!mm || (REGION_ID(ea) != USER_REGION_ID)) { | |
82 | if (REGION_ID(ea) != USER_REGION_ID) { | 86 | /* Future: support kernel segments so that drivers |
87 | * can use SPUs. | ||
88 | */ | ||
83 | pr_debug("invalid region access at %016lx\n", ea); | 89 | pr_debug("invalid region access at %016lx\n", ea); |
84 | return 1; | 90 | return 1; |
85 | } | 91 | } |
86 | 92 | ||
87 | priv2 = spu->priv2; | 93 | esid = (ea & ESID_MASK) | SLB_ESID_V; |
88 | mm = spu->mm; | 94 | vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | SLB_VSID_USER; |
95 | if (in_hugepage_area(mm->context, ea)) | ||
96 | vsid |= SLB_VSID_L; | ||
89 | 97 | ||
98 | out_be64(&priv2->slb_index_W, spu->slb_replace); | ||
99 | out_be64(&priv2->slb_vsid_RW, vsid); | ||
100 | out_be64(&priv2->slb_esid_RW, esid); | ||
101 | |||
102 | spu->slb_replace++; | ||
90 | if (spu->slb_replace >= 8) | 103 | if (spu->slb_replace >= 8) |
91 | spu->slb_replace = 0; | 104 | spu->slb_replace = 0; |
92 | 105 | ||
93 | out_be64(&priv2->slb_index_W, spu->slb_replace); | ||
94 | out_be64(&priv2->slb_vsid_RW, | ||
95 | (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | ||
96 | | SLB_VSID_USER); | ||
97 | out_be64(&priv2->slb_esid_RW, (ea & ESID_MASK) | SLB_ESID_V); | ||
98 | |||
99 | spu_restart_dma(spu); | 106 | spu_restart_dma(spu); |
100 | 107 | ||
101 | pr_debug("set slb %d context %lx, ea %016lx, vsid %016lx, esid %016lx\n", | ||
102 | spu->slb_replace, mm->context.id, ea, | ||
103 | (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT)| SLB_VSID_USER, | ||
104 | (ea & ESID_MASK) | SLB_ESID_V); | ||
105 | return 0; | 108 | return 0; |
106 | } | 109 | } |
107 | 110 | ||
108 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX | 111 | extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX |
109 | static int __spu_trap_data_map(struct spu *spu, unsigned long ea) | 112 | static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) |
110 | { | 113 | { |
111 | unsigned long dsisr; | ||
112 | struct spu_priv1 __iomem *priv1; | ||
113 | |||
114 | pr_debug("%s\n", __FUNCTION__); | 114 | pr_debug("%s\n", __FUNCTION__); |
115 | priv1 = spu->priv1; | ||
116 | dsisr = in_be64(&priv1->mfc_dsisr_RW); | ||
117 | 115 | ||
118 | /* Handle kernel space hash faults immediately. | 116 | /* Handle kernel space hash faults immediately. |
119 | User hash faults need to be deferred to process context. */ | 117 | User hash faults need to be deferred to process context. */ |
@@ -129,14 +127,17 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea) | |||
129 | return 1; | 127 | return 1; |
130 | } | 128 | } |
131 | 129 | ||
130 | spu->dar = ea; | ||
131 | spu->dsisr = dsisr; | ||
132 | mb(); | ||
132 | wake_up(&spu->stop_wq); | 133 | wake_up(&spu->stop_wq); |
133 | return 0; | 134 | return 0; |
134 | } | 135 | } |
135 | 136 | ||
136 | static int __spu_trap_mailbox(struct spu *spu) | 137 | static int __spu_trap_mailbox(struct spu *spu) |
137 | { | 138 | { |
138 | wake_up_all(&spu->ibox_wq); | 139 | if (spu->ibox_callback) |
139 | kill_fasync(&spu->ibox_fasync, SIGIO, POLLIN); | 140 | spu->ibox_callback(spu); |
140 | 141 | ||
141 | /* atomically disable SPU mailbox interrupts */ | 142 | /* atomically disable SPU mailbox interrupts */ |
142 | spin_lock(&spu->register_lock); | 143 | spin_lock(&spu->register_lock); |
@@ -171,8 +172,8 @@ static int __spu_trap_tag_group(struct spu *spu) | |||
171 | 172 | ||
172 | static int __spu_trap_spubox(struct spu *spu) | 173 | static int __spu_trap_spubox(struct spu *spu) |
173 | { | 174 | { |
174 | wake_up_all(&spu->wbox_wq); | 175 | if (spu->wbox_callback) |
175 | kill_fasync(&spu->wbox_fasync, SIGIO, POLLOUT); | 176 | spu->wbox_callback(spu); |
176 | 177 | ||
177 | /* atomically disable SPU mailbox interrupts */ | 178 | /* atomically disable SPU mailbox interrupts */ |
178 | spin_lock(&spu->register_lock); | 179 | spin_lock(&spu->register_lock); |
@@ -220,17 +221,25 @@ static irqreturn_t | |||
220 | spu_irq_class_1(int irq, void *data, struct pt_regs *regs) | 221 | spu_irq_class_1(int irq, void *data, struct pt_regs *regs) |
221 | { | 222 | { |
222 | struct spu *spu; | 223 | struct spu *spu; |
223 | unsigned long stat, dar; | 224 | unsigned long stat, mask, dar, dsisr; |
224 | 225 | ||
225 | spu = data; | 226 | spu = data; |
226 | stat = in_be64(&spu->priv1->int_stat_class1_RW); | 227 | |
228 | /* atomically read & clear class1 status. */ | ||
229 | spin_lock(&spu->register_lock); | ||
230 | mask = in_be64(&spu->priv1->int_mask_class1_RW); | ||
231 | stat = in_be64(&spu->priv1->int_stat_class1_RW) & mask; | ||
227 | dar = in_be64(&spu->priv1->mfc_dar_RW); | 232 | dar = in_be64(&spu->priv1->mfc_dar_RW); |
233 | dsisr = in_be64(&spu->priv1->mfc_dsisr_RW); | ||
234 | out_be64(&spu->priv1->mfc_dsisr_RW, 0UL); | ||
235 | out_be64(&spu->priv1->int_stat_class1_RW, stat); | ||
236 | spin_unlock(&spu->register_lock); | ||
228 | 237 | ||
229 | if (stat & 1) /* segment fault */ | 238 | if (stat & 1) /* segment fault */ |
230 | __spu_trap_data_seg(spu, dar); | 239 | __spu_trap_data_seg(spu, dar); |
231 | 240 | ||
232 | if (stat & 2) { /* mapping fault */ | 241 | if (stat & 2) { /* mapping fault */ |
233 | __spu_trap_data_map(spu, dar); | 242 | __spu_trap_data_map(spu, dar, dsisr); |
234 | } | 243 | } |
235 | 244 | ||
236 | if (stat & 4) /* ls compare & suspend on get */ | 245 | if (stat & 4) /* ls compare & suspend on get */ |
@@ -239,7 +248,6 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs) | |||
239 | if (stat & 8) /* ls compare & suspend on put */ | 248 | if (stat & 8) /* ls compare & suspend on put */ |
240 | ; | 249 | ; |
241 | 250 | ||
242 | out_be64(&spu->priv1->int_stat_class1_RW, stat); | ||
243 | return stat ? IRQ_HANDLED : IRQ_NONE; | 251 | return stat ? IRQ_HANDLED : IRQ_NONE; |
244 | } | 252 | } |
245 | 253 | ||
@@ -396,8 +404,6 @@ EXPORT_SYMBOL(spu_alloc); | |||
396 | void spu_free(struct spu *spu) | 404 | void spu_free(struct spu *spu) |
397 | { | 405 | { |
398 | down(&spu_mutex); | 406 | down(&spu_mutex); |
399 | spu->ibox_fasync = NULL; | ||
400 | spu->wbox_fasync = NULL; | ||
401 | list_add_tail(&spu->list, &spu_list); | 407 | list_add_tail(&spu->list, &spu_list); |
402 | up(&spu_mutex); | 408 | up(&spu_mutex); |
403 | } | 409 | } |
@@ -405,15 +411,13 @@ EXPORT_SYMBOL(spu_free); | |||
405 | 411 | ||
406 | static int spu_handle_mm_fault(struct spu *spu) | 412 | static int spu_handle_mm_fault(struct spu *spu) |
407 | { | 413 | { |
408 | struct spu_priv1 __iomem *priv1; | ||
409 | struct mm_struct *mm = spu->mm; | 414 | struct mm_struct *mm = spu->mm; |
410 | struct vm_area_struct *vma; | 415 | struct vm_area_struct *vma; |
411 | u64 ea, dsisr, is_write; | 416 | u64 ea, dsisr, is_write; |
412 | int ret; | 417 | int ret; |
413 | 418 | ||
414 | priv1 = spu->priv1; | 419 | ea = spu->dar; |
415 | ea = in_be64(&priv1->mfc_dar_RW); | 420 | dsisr = spu->dsisr; |
416 | dsisr = in_be64(&priv1->mfc_dsisr_RW); | ||
417 | #if 0 | 421 | #if 0 |
418 | if (!IS_VALID_EA(ea)) { | 422 | if (!IS_VALID_EA(ea)) { |
419 | return -EFAULT; | 423 | return -EFAULT; |
@@ -476,15 +480,14 @@ bad_area: | |||
476 | 480 | ||
477 | static int spu_handle_pte_fault(struct spu *spu) | 481 | static int spu_handle_pte_fault(struct spu *spu) |
478 | { | 482 | { |
479 | struct spu_priv1 __iomem *priv1; | ||
480 | u64 ea, dsisr, access, error = 0UL; | 483 | u64 ea, dsisr, access, error = 0UL; |
481 | int ret = 0; | 484 | int ret = 0; |
482 | 485 | ||
483 | priv1 = spu->priv1; | 486 | ea = spu->dar; |
484 | ea = in_be64(&priv1->mfc_dar_RW); | 487 | dsisr = spu->dsisr; |
485 | dsisr = in_be64(&priv1->mfc_dsisr_RW); | ||
486 | access = (_PAGE_PRESENT | _PAGE_USER); | ||
487 | if (dsisr & MFC_DSISR_PTE_NOT_FOUND) { | 488 | if (dsisr & MFC_DSISR_PTE_NOT_FOUND) { |
489 | access = (_PAGE_PRESENT | _PAGE_USER); | ||
490 | access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; | ||
488 | if (hash_page(ea, access, 0x300) != 0) | 491 | if (hash_page(ea, access, 0x300) != 0) |
489 | error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; | 492 | error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; |
490 | } | 493 | } |
@@ -495,18 +498,33 @@ static int spu_handle_pte_fault(struct spu *spu) | |||
495 | else | 498 | else |
496 | error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR; | 499 | error &= ~CLASS1_ENABLE_STORAGE_FAULT_INTR; |
497 | } | 500 | } |
498 | if (!error) | 501 | spu->dar = 0UL; |
502 | spu->dsisr = 0UL; | ||
503 | if (!error) { | ||
499 | spu_restart_dma(spu); | 504 | spu_restart_dma(spu); |
500 | 505 | } else { | |
506 | __spu_trap_invalid_dma(spu); | ||
507 | } | ||
501 | return ret; | 508 | return ret; |
502 | } | 509 | } |
503 | 510 | ||
511 | static inline int spu_pending(struct spu *spu, u32 * stat) | ||
512 | { | ||
513 | struct spu_problem __iomem *prob = spu->problem; | ||
514 | u64 pte_fault; | ||
515 | |||
516 | *stat = in_be32(&prob->spu_status_R); | ||
517 | pte_fault = spu->dsisr & | ||
518 | (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); | ||
519 | return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; | ||
520 | } | ||
521 | |||
504 | int spu_run(struct spu *spu) | 522 | int spu_run(struct spu *spu) |
505 | { | 523 | { |
506 | struct spu_problem __iomem *prob; | 524 | struct spu_problem __iomem *prob; |
507 | struct spu_priv1 __iomem *priv1; | 525 | struct spu_priv1 __iomem *priv1; |
508 | struct spu_priv2 __iomem *priv2; | 526 | struct spu_priv2 __iomem *priv2; |
509 | unsigned long status; | 527 | u32 status; |
510 | int ret; | 528 | int ret; |
511 | 529 | ||
512 | prob = spu->problem; | 530 | prob = spu->problem; |
@@ -514,21 +532,15 @@ int spu_run(struct spu *spu) | |||
514 | priv2 = spu->priv2; | 532 | priv2 = spu->priv2; |
515 | 533 | ||
516 | /* Let SPU run. */ | 534 | /* Let SPU run. */ |
517 | spu->mm = current->mm; | ||
518 | eieio(); | 535 | eieio(); |
519 | out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); | 536 | out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE); |
520 | 537 | ||
521 | do { | 538 | do { |
522 | ret = wait_event_interruptible(spu->stop_wq, | 539 | ret = wait_event_interruptible(spu->stop_wq, |
523 | (!((status = in_be32(&prob->spu_status_R)) & 0x1)) | 540 | spu_pending(spu, &status)); |
524 | || (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND) | 541 | |
525 | || spu->class_0_pending); | 542 | if (spu->dsisr & |
526 | 543 | (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) | |
527 | if (status & SPU_STATUS_STOPPED_BY_STOP) | ||
528 | ret = -EAGAIN; | ||
529 | else if (status & SPU_STATUS_STOPPED_BY_HALT) | ||
530 | ret = -EIO; | ||
531 | else if (in_be64(&priv1->mfc_dsisr_RW) & MFC_DSISR_PTE_NOT_FOUND) | ||
532 | ret = spu_handle_pte_fault(spu); | 544 | ret = spu_handle_pte_fault(spu); |
533 | 545 | ||
534 | if (spu->class_0_pending) | 546 | if (spu->class_0_pending) |
@@ -537,7 +549,9 @@ int spu_run(struct spu *spu) | |||
537 | if (!ret && signal_pending(current)) | 549 | if (!ret && signal_pending(current)) |
538 | ret = -ERESTARTSYS; | 550 | ret = -ERESTARTSYS; |
539 | 551 | ||
540 | } while (!ret); | 552 | } while (!ret && !(status & |
553 | (SPU_STATUS_STOPPED_BY_STOP | | ||
554 | SPU_STATUS_STOPPED_BY_HALT))); | ||
541 | 555 | ||
542 | /* Ensure SPU is stopped. */ | 556 | /* Ensure SPU is stopped. */ |
543 | out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); | 557 | out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); |
@@ -549,8 +563,6 @@ int spu_run(struct spu *spu) | |||
549 | out_be64(&priv1->tlb_invalidate_entry_W, 0UL); | 563 | out_be64(&priv1->tlb_invalidate_entry_W, 0UL); |
550 | eieio(); | 564 | eieio(); |
551 | 565 | ||
552 | spu->mm = NULL; | ||
553 | |||
554 | /* Check for SPU breakpoint. */ | 566 | /* Check for SPU breakpoint. */ |
555 | if (unlikely(current->ptrace & PT_PTRACED)) { | 567 | if (unlikely(current->ptrace & PT_PTRACED)) { |
556 | status = in_be32(&prob->spu_status_R); | 568 | status = in_be32(&prob->spu_status_R); |
@@ -669,19 +681,21 @@ static int __init create_spu(struct device_node *spe) | |||
669 | spu->stop_code = 0; | 681 | spu->stop_code = 0; |
670 | spu->slb_replace = 0; | 682 | spu->slb_replace = 0; |
671 | spu->mm = NULL; | 683 | spu->mm = NULL; |
684 | spu->ctx = NULL; | ||
685 | spu->rq = NULL; | ||
686 | spu->pid = 0; | ||
672 | spu->class_0_pending = 0; | 687 | spu->class_0_pending = 0; |
673 | spu->flags = 0UL; | 688 | spu->flags = 0UL; |
689 | spu->dar = 0UL; | ||
690 | spu->dsisr = 0UL; | ||
674 | spin_lock_init(&spu->register_lock); | 691 | spin_lock_init(&spu->register_lock); |
675 | 692 | ||
676 | out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); | 693 | out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); |
677 | out_be64(&spu->priv1->mfc_sr1_RW, 0x33); | 694 | out_be64(&spu->priv1->mfc_sr1_RW, 0x33); |
678 | 695 | ||
679 | init_waitqueue_head(&spu->stop_wq); | 696 | init_waitqueue_head(&spu->stop_wq); |
680 | init_waitqueue_head(&spu->wbox_wq); | 697 | spu->ibox_callback = NULL; |
681 | init_waitqueue_head(&spu->ibox_wq); | 698 | spu->wbox_callback = NULL; |
682 | |||
683 | spu->ibox_fasync = NULL; | ||
684 | spu->wbox_fasync = NULL; | ||
685 | 699 | ||
686 | down(&spu_mutex); | 700 | down(&spu_mutex); |
687 | spu->number = number++; | 701 | spu->number = number++; |