aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2007-03-09 18:05:37 -0500
committerArnd Bergmann <arnd@klappe.arndb.de>2007-03-09 18:07:50 -0500
commit94b2a4393c500a620de90c3266d595926302e26b (patch)
treecde58177f430751b67a7aa47f3f89042e91357bf /arch
parent50b520d4efbce45281f58112789470ec7965fd33 (diff)
[POWERPC] Fix spu SLB invalidations
The SPU code doesn't properly invalidate SPUs SLBs when necessary, for example when changing a segment size from the hugetlbfs code. In addition, it saves and restores the SLB content on context switches which makes it harder to properly handle those invalidations. This patch removes the saving & restoring for now, something more efficient might be found later on. It also adds a spu_flush_all_slbs(mm) that can be used by the core mm code to flush the SLBs of all SPEs that are running a given mm at the time of the flush. In order to do that, it adds a spinlock to the list of all SPEs and move some bits & pieces from spufs to spu_base.c Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/mm/hash_utils_64.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage.c4
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c81
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c13
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c62
5 files changed, 84 insertions, 82 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index c0d2a694fa30..3c7fe2c65b5a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -685,6 +685,9 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
685 "non-cacheable mapping\n"); 685 "non-cacheable mapping\n");
686 psize = mmu_vmalloc_psize = MMU_PAGE_4K; 686 psize = mmu_vmalloc_psize = MMU_PAGE_4K;
687 } 687 }
688#ifdef CONFIG_SPE_BASE
689 spu_flush_all_slbs(mm);
690#endif
688 } 691 }
689 if (user_region) { 692 if (user_region) {
690 if (psize != get_paca()->context.user_psize) { 693 if (psize != get_paca()->context.user_psize) {
@@ -759,6 +762,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
759 mmu_psize_defs[MMU_PAGE_4K].sllp; 762 mmu_psize_defs[MMU_PAGE_4K].sllp;
760 get_paca()->context = mm->context; 763 get_paca()->context = mm->context;
761 slb_flush_and_rebolt(); 764 slb_flush_and_rebolt();
765#ifdef CONFIG_SPE_BASE
766 spu_flush_all_slbs(mm);
767#endif
762 } 768 }
763 } 769 }
764 if (mm->context.user_psize == MMU_PAGE_64K) 770 if (mm->context.user_psize == MMU_PAGE_64K)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 8c77c791f87e..f6ffaaa7a5bf 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -24,6 +24,7 @@
24#include <asm/machdep.h> 24#include <asm/machdep.h>
25#include <asm/cputable.h> 25#include <asm/cputable.h>
26#include <asm/tlb.h> 26#include <asm/tlb.h>
27#include <asm/spu.h>
27 28
28#include <linux/sysctl.h> 29#include <linux/sysctl.h>
29 30
@@ -513,6 +514,9 @@ int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
513 if ((addr + len) > 0x100000000UL) 514 if ((addr + len) > 0x100000000UL)
514 err = open_high_hpage_areas(current->mm, 515 err = open_high_hpage_areas(current->mm,
515 HTLB_AREA_MASK(addr, len)); 516 HTLB_AREA_MASK(addr, len));
517#ifdef CONFIG_SPE_BASE
518 spu_flush_all_slbs(current->mm);
519#endif
516 if (err) { 520 if (err) {
517 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" 521 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
518 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", 522 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index c43999a10deb..eba7a2641dce 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -38,8 +38,61 @@
38const struct spu_management_ops *spu_management_ops; 38const struct spu_management_ops *spu_management_ops;
39const struct spu_priv1_ops *spu_priv1_ops; 39const struct spu_priv1_ops *spu_priv1_ops;
40 40
41static struct list_head spu_list[MAX_NUMNODES];
42static LIST_HEAD(spu_full_list);
43static DEFINE_MUTEX(spu_mutex);
44static spinlock_t spu_list_lock = SPIN_LOCK_UNLOCKED;
45
41EXPORT_SYMBOL_GPL(spu_priv1_ops); 46EXPORT_SYMBOL_GPL(spu_priv1_ops);
42 47
48void spu_invalidate_slbs(struct spu *spu)
49{
50 struct spu_priv2 __iomem *priv2 = spu->priv2;
51
52 if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
53 out_be64(&priv2->slb_invalidate_all_W, 0UL);
54}
55EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
56
57/* This is called by the MM core when a segment size is changed, to
58 * request a flush of all the SPEs using a given mm
59 */
60void spu_flush_all_slbs(struct mm_struct *mm)
61{
62 struct spu *spu;
63 unsigned long flags;
64
65 spin_lock_irqsave(&spu_list_lock, flags);
66 list_for_each_entry(spu, &spu_full_list, full_list) {
67 if (spu->mm == mm)
68 spu_invalidate_slbs(spu);
69 }
70 spin_unlock_irqrestore(&spu_list_lock, flags);
71}
72
73/* The hack below stinks... try to do something better one of
74 * these days... Does it even work properly with NR_CPUS == 1 ?
75 */
76static inline void mm_needs_global_tlbie(struct mm_struct *mm)
77{
78 int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
79
80 /* Global TLBIE broadcast required with SPEs. */
81 __cpus_setall(&mm->cpu_vm_mask, nr);
82}
83
84void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
85{
86 unsigned long flags;
87
88 spin_lock_irqsave(&spu_list_lock, flags);
89 spu->mm = mm;
90 spin_unlock_irqrestore(&spu_list_lock, flags);
91 if (mm)
92 mm_needs_global_tlbie(mm);
93}
94EXPORT_SYMBOL_GPL(spu_associate_mm);
95
43static int __spu_trap_invalid_dma(struct spu *spu) 96static int __spu_trap_invalid_dma(struct spu *spu)
44{ 97{
45 pr_debug("%s\n", __FUNCTION__); 98 pr_debug("%s\n", __FUNCTION__);
@@ -74,6 +127,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
74 struct spu_priv2 __iomem *priv2 = spu->priv2; 127 struct spu_priv2 __iomem *priv2 = spu->priv2;
75 struct mm_struct *mm = spu->mm; 128 struct mm_struct *mm = spu->mm;
76 u64 esid, vsid, llp; 129 u64 esid, vsid, llp;
130 int psize;
77 131
78 pr_debug("%s\n", __FUNCTION__); 132 pr_debug("%s\n", __FUNCTION__);
79 133
@@ -90,22 +144,25 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
90 case USER_REGION_ID: 144 case USER_REGION_ID:
91#ifdef CONFIG_HUGETLB_PAGE 145#ifdef CONFIG_HUGETLB_PAGE
92 if (in_hugepage_area(mm->context, ea)) 146 if (in_hugepage_area(mm->context, ea))
93 llp = mmu_psize_defs[mmu_huge_psize].sllp; 147 psize = mmu_huge_psize;
94 else 148 else
95#endif 149#endif
96 llp = mmu_psize_defs[mmu_virtual_psize].sllp; 150 psize = mm->context.user_psize;
97 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | 151 vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) |
98 SLB_VSID_USER | llp; 152 SLB_VSID_USER;
99 break; 153 break;
100 case VMALLOC_REGION_ID: 154 case VMALLOC_REGION_ID:
101 llp = mmu_psize_defs[mmu_virtual_psize].sllp; 155 if (ea < VMALLOC_END)
156 psize = mmu_vmalloc_psize;
157 else
158 psize = mmu_io_psize;
102 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 159 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
103 SLB_VSID_KERNEL | llp; 160 SLB_VSID_KERNEL;
104 break; 161 break;
105 case KERNEL_REGION_ID: 162 case KERNEL_REGION_ID:
106 llp = mmu_psize_defs[mmu_linear_psize].sllp; 163 psize = mmu_linear_psize;
107 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | 164 vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) |
108 SLB_VSID_KERNEL | llp; 165 SLB_VSID_KERNEL;
109 break; 166 break;
110 default: 167 default:
111 /* Future: support kernel segments so that drivers 168 /* Future: support kernel segments so that drivers
@@ -114,9 +171,10 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
114 pr_debug("invalid region access at %016lx\n", ea); 171 pr_debug("invalid region access at %016lx\n", ea);
115 return 1; 172 return 1;
116 } 173 }
174 llp = mmu_psize_defs[psize].sllp;
117 175
118 out_be64(&priv2->slb_index_W, spu->slb_replace); 176 out_be64(&priv2->slb_index_W, spu->slb_replace);
119 out_be64(&priv2->slb_vsid_RW, vsid); 177 out_be64(&priv2->slb_vsid_RW, vsid | llp);
120 out_be64(&priv2->slb_esid_RW, esid); 178 out_be64(&priv2->slb_esid_RW, esid);
121 179
122 spu->slb_replace++; 180 spu->slb_replace++;
@@ -330,10 +388,6 @@ static void spu_free_irqs(struct spu *spu)
330 free_irq(spu->irqs[2], spu); 388 free_irq(spu->irqs[2], spu);
331} 389}
332 390
333static struct list_head spu_list[MAX_NUMNODES];
334static LIST_HEAD(spu_full_list);
335static DEFINE_MUTEX(spu_mutex);
336
337static void spu_init_channels(struct spu *spu) 391static void spu_init_channels(struct spu *spu)
338{ 392{
339 static const struct { 393 static const struct {
@@ -593,6 +647,7 @@ static int __init create_spu(void *data)
593 struct spu *spu; 647 struct spu *spu;
594 int ret; 648 int ret;
595 static int number; 649 static int number;
650 unsigned long flags;
596 651
597 ret = -ENOMEM; 652 ret = -ENOMEM;
598 spu = kzalloc(sizeof (*spu), GFP_KERNEL); 653 spu = kzalloc(sizeof (*spu), GFP_KERNEL);
@@ -620,8 +675,10 @@ static int __init create_spu(void *data)
620 goto out_free_irqs; 675 goto out_free_irqs;
621 676
622 mutex_lock(&spu_mutex); 677 mutex_lock(&spu_mutex);
678 spin_lock_irqsave(&spu_list_lock, flags);
623 list_add(&spu->list, &spu_list[spu->node]); 679 list_add(&spu->list, &spu_list[spu->node]);
624 list_add(&spu->full_list, &spu_full_list); 680 list_add(&spu->full_list, &spu_full_list);
681 spin_unlock_irqrestore(&spu_list_lock, flags);
625 mutex_unlock(&spu_mutex); 682 mutex_unlock(&spu_mutex);
626 683
627 goto out; 684 goto out;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 7dbf57c30282..39823cec0844 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -127,14 +127,6 @@ static void spu_remove_from_active_list(struct spu *spu)
127 mutex_unlock(&spu_prio->active_mutex[node]); 127 mutex_unlock(&spu_prio->active_mutex[node]);
128} 128}
129 129
130static inline void mm_needs_global_tlbie(struct mm_struct *mm)
131{
132 int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
133
134 /* Global TLBIE broadcast required with SPEs. */
135 __cpus_setall(&mm->cpu_vm_mask, nr);
136}
137
138static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); 130static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
139 131
140static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) 132static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
@@ -167,8 +159,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
167 ctx->spu = spu; 159 ctx->spu = spu;
168 ctx->ops = &spu_hw_ops; 160 ctx->ops = &spu_hw_ops;
169 spu->pid = current->pid; 161 spu->pid = current->pid;
170 spu->mm = ctx->owner; 162 spu_associate_mm(spu, ctx->owner);
171 mm_needs_global_tlbie(spu->mm);
172 spu->ibox_callback = spufs_ibox_callback; 163 spu->ibox_callback = spufs_ibox_callback;
173 spu->wbox_callback = spufs_wbox_callback; 164 spu->wbox_callback = spufs_wbox_callback;
174 spu->stop_callback = spufs_stop_callback; 165 spu->stop_callback = spufs_stop_callback;
@@ -205,7 +196,7 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
205 spu->stop_callback = NULL; 196 spu->stop_callback = NULL;
206 spu->mfc_callback = NULL; 197 spu->mfc_callback = NULL;
207 spu->dma_callback = NULL; 198 spu->dma_callback = NULL;
208 spu->mm = NULL; 199 spu_associate_mm(spu, NULL);
209 spu->pid = 0; 200 spu->pid = 0;
210 ctx->ops = &spu_backing_ops; 201 ctx->ops = &spu_backing_ops;
211 ctx->spu = NULL; 202 ctx->spu = NULL;
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index c08981ff7fc6..fd91c73de34e 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -468,26 +468,6 @@ static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
468 MFC_CNTL_PURGE_DMA_COMPLETE); 468 MFC_CNTL_PURGE_DMA_COMPLETE);
469} 469}
470 470
471static inline void save_mfc_slbs(struct spu_state *csa, struct spu *spu)
472{
473 struct spu_priv2 __iomem *priv2 = spu->priv2;
474 int i;
475
476 /* Save, Step 29:
477 * If MFC_SR1[R]='1', save SLBs in CSA.
478 */
479 if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
480 csa->priv2.slb_index_W = in_be64(&priv2->slb_index_W);
481 for (i = 0; i < 8; i++) {
482 out_be64(&priv2->slb_index_W, i);
483 eieio();
484 csa->slb_esid_RW[i] = in_be64(&priv2->slb_esid_RW);
485 csa->slb_vsid_RW[i] = in_be64(&priv2->slb_vsid_RW);
486 eieio();
487 }
488 }
489}
490
491static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu) 471static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
492{ 472{
493 /* Save, Step 30: 473 /* Save, Step 30:
@@ -708,20 +688,6 @@ static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
708 out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE); 688 out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
709} 689}
710 690
711static inline void invalidate_slbs(struct spu_state *csa, struct spu *spu)
712{
713 struct spu_priv2 __iomem *priv2 = spu->priv2;
714
715 /* Save, Step 45:
716 * Restore, Step 19:
717 * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All.
718 */
719 if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK) {
720 out_be64(&priv2->slb_invalidate_all_W, 0UL);
721 eieio();
722 }
723}
724
725static inline void get_kernel_slb(u64 ea, u64 slb[2]) 691static inline void get_kernel_slb(u64 ea, u64 slb[2])
726{ 692{
727 u64 llp; 693 u64 llp;
@@ -765,7 +731,7 @@ static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu)
765 * MFC_SR1[R]=1 (in other words, assume that 731 * MFC_SR1[R]=1 (in other words, assume that
766 * translation is desired by OS environment). 732 * translation is desired by OS environment).
767 */ 733 */
768 invalidate_slbs(csa, spu); 734 spu_invalidate_slbs(spu);
769 get_kernel_slb((unsigned long)&spu_save_code[0], code_slb); 735 get_kernel_slb((unsigned long)&spu_save_code[0], code_slb);
770 get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb); 736 get_kernel_slb((unsigned long)csa->lscsa, lscsa_slb);
771 load_mfc_slb(spu, code_slb, 0); 737 load_mfc_slb(spu, code_slb, 0);
@@ -1718,27 +1684,6 @@ static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
1718 } 1684 }
1719} 1685}
1720 1686
1721static inline void restore_mfc_slbs(struct spu_state *csa, struct spu *spu)
1722{
1723 struct spu_priv2 __iomem *priv2 = spu->priv2;
1724 int i;
1725
1726 /* Restore, Step 68:
1727 * If MFC_SR1[R]='1', restore SLBs from CSA.
1728 */
1729 if (csa->priv1.mfc_sr1_RW & MFC_STATE1_RELOCATE_MASK) {
1730 for (i = 0; i < 8; i++) {
1731 out_be64(&priv2->slb_index_W, i);
1732 eieio();
1733 out_be64(&priv2->slb_esid_RW, csa->slb_esid_RW[i]);
1734 out_be64(&priv2->slb_vsid_RW, csa->slb_vsid_RW[i]);
1735 eieio();
1736 }
1737 out_be64(&priv2->slb_index_W, csa->priv2.slb_index_W);
1738 eieio();
1739 }
1740}
1741
1742static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu) 1687static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
1743{ 1688{
1744 /* Restore, Step 69: 1689 /* Restore, Step 69:
@@ -1875,7 +1820,6 @@ static void save_csa(struct spu_state *prev, struct spu *spu)
1875 set_mfc_tclass_id(prev, spu); /* Step 26. */ 1820 set_mfc_tclass_id(prev, spu); /* Step 26. */
1876 purge_mfc_queue(prev, spu); /* Step 27. */ 1821 purge_mfc_queue(prev, spu); /* Step 27. */
1877 wait_purge_complete(prev, spu); /* Step 28. */ 1822 wait_purge_complete(prev, spu); /* Step 28. */
1878 save_mfc_slbs(prev, spu); /* Step 29. */
1879 setup_mfc_sr1(prev, spu); /* Step 30. */ 1823 setup_mfc_sr1(prev, spu); /* Step 30. */
1880 save_spu_npc(prev, spu); /* Step 31. */ 1824 save_spu_npc(prev, spu); /* Step 31. */
1881 save_spu_privcntl(prev, spu); /* Step 32. */ 1825 save_spu_privcntl(prev, spu); /* Step 32. */
@@ -1987,7 +1931,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
1987 reset_spu_privcntl(prev, spu); /* Step 16. */ 1931 reset_spu_privcntl(prev, spu); /* Step 16. */
1988 reset_spu_lslr(prev, spu); /* Step 17. */ 1932 reset_spu_lslr(prev, spu); /* Step 17. */
1989 setup_mfc_sr1(prev, spu); /* Step 18. */ 1933 setup_mfc_sr1(prev, spu); /* Step 18. */
1990 invalidate_slbs(prev, spu); /* Step 19. */ 1934 spu_invalidate_slbs(spu); /* Step 19. */
1991 reset_ch_part1(prev, spu); /* Step 20. */ 1935 reset_ch_part1(prev, spu); /* Step 20. */
1992 reset_ch_part2(prev, spu); /* Step 21. */ 1936 reset_ch_part2(prev, spu); /* Step 21. */
1993 enable_interrupts(prev, spu); /* Step 22. */ 1937 enable_interrupts(prev, spu); /* Step 22. */
@@ -2055,7 +1999,7 @@ static void restore_csa(struct spu_state *next, struct spu *spu)
2055 restore_spu_mb(next, spu); /* Step 65. */ 1999 restore_spu_mb(next, spu); /* Step 65. */
2056 check_ppu_mb_stat(next, spu); /* Step 66. */ 2000 check_ppu_mb_stat(next, spu); /* Step 66. */
2057 check_ppuint_mb_stat(next, spu); /* Step 67. */ 2001 check_ppuint_mb_stat(next, spu); /* Step 67. */
2058 restore_mfc_slbs(next, spu); /* Step 68. */ 2002 spu_invalidate_slbs(spu); /* Modified Step 68. */
2059 restore_mfc_sr1(next, spu); /* Step 69. */ 2003 restore_mfc_sr1(next, spu); /* Step 69. */
2060 restore_other_spu_access(next, spu); /* Step 70. */ 2004 restore_other_spu_access(next, spu); /* Step 70. */
2061 restore_spu_runcntl(next, spu); /* Step 71. */ 2005 restore_spu_runcntl(next, spu); /* Step 71. */