aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2007-05-08 02:27:29 -0400
committerPaul Mackerras <paulus@samba.org>2007-05-09 02:35:00 -0400
commitf1fa74f4afe96b0e4ac2beaa61fa4f4667acdcbb (patch)
tree287c3b73db9d871565b453c91b0a00559300306b
parent16c2d476232523260c495eafbf9cdc1be984b7df (diff)
[POWERPC] Spufs support for 64K LS mappings on 4K kernels
This adds an option to spufs when the kernel is configured for 4K page to give it the ability to use 64K pages for SPE local store mappings. Currently, we are optimistic and try order 4 allocations when creating contexts. If that fails, the code will fallback to 4K automatically. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/platforms/cell/Kconfig15
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile2
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c80
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c181
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c28
-rw-r--r--include/asm-powerpc/spu_csa.h10
7 files changed, 283 insertions, 37 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 82551770917c..9b2b386ccf48 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,21 @@ config SPU_FS
35 Units on machines implementing the Broadband Processor 35 Units on machines implementing the Broadband Processor
36 Architecture. 36 Architecture.
37 37
38config SPU_FS_64K_LS
39 bool "Use 64K pages to map SPE local store"
40 # we depend on PPC_MM_SLICES for now rather than selecting
41 # it because we depend on hugetlbfs hooks being present. We
42 # will fix that when the generic code has been improved to
43 # not require hijacking hugetlbfs hooks.
44 depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
45 default y
46 select PPC_HAS_HASH_64K
47 help
48 This option causes SPE local stores to be mapped in process
49 address spaces using 64K pages while the rest of the kernel
50 uses 4K pages. This can improve performances of applications
51 using multiple SPEs by lowering the TLB pressure on them.
52
38config SPU_BASE 53config SPU_BASE
39 bool 54 bool
40 default n 55 default n
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
index 2cd89c11af5a..328afcf89503 100644
--- a/arch/powerpc/platforms/cell/spufs/Makefile
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -1,4 +1,4 @@
1obj-y += switch.o fault.o 1obj-y += switch.o fault.o lscsa_alloc.o
2 2
3obj-$(CONFIG_SPU_FS) += spufs.o 3obj-$(CONFIG_SPU_FS) += spufs.o
4spufs-y += inode.o file.o context.o syscalls.o coredump.o 4spufs-y += inode.o file.o context.o syscalls.o coredump.o
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index a87d9ca3dba2..8654749e317b 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -36,10 +36,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
36 /* Binding to physical processor deferred 36 /* Binding to physical processor deferred
37 * until spu_activate(). 37 * until spu_activate().
38 */ 38 */
39 spu_init_csa(&ctx->csa); 39 if (spu_init_csa(&ctx->csa))
40 if (!ctx->csa.lscsa) {
41 goto out_free; 40 goto out_free;
42 }
43 spin_lock_init(&ctx->mmio_lock); 41 spin_lock_init(&ctx->mmio_lock);
44 spin_lock_init(&ctx->mapping_lock); 42 spin_lock_init(&ctx->mapping_lock);
45 kref_init(&ctx->kref); 43 kref_init(&ctx->kref);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index d010b2464a98..45614c73c784 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -118,14 +118,32 @@ spufs_mem_write(struct file *file, const char __user *buffer,
118static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma, 118static unsigned long spufs_mem_mmap_nopfn(struct vm_area_struct *vma,
119 unsigned long address) 119 unsigned long address)
120{ 120{
121 struct spu_context *ctx = vma->vm_file->private_data; 121 struct spu_context *ctx = vma->vm_file->private_data;
122 unsigned long pfn, offset = address - vma->vm_start; 122 unsigned long pfn, offset, addr0 = address;
123 123#ifdef CONFIG_SPU_FS_64K_LS
124 offset += vma->vm_pgoff << PAGE_SHIFT; 124 struct spu_state *csa = &ctx->csa;
125 int psize;
126
127 /* Check what page size we are using */
128 psize = get_slice_psize(vma->vm_mm, address);
129
130 /* Some sanity checking */
131 BUG_ON(csa->use_big_pages != (psize == MMU_PAGE_64K));
132
133 /* Wow, 64K, cool, we need to align the address though */
134 if (csa->use_big_pages) {
135 BUG_ON(vma->vm_start & 0xffff);
136 address &= ~0xfffful;
137 }
138#endif /* CONFIG_SPU_FS_64K_LS */
125 139
140 offset = (address - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
126 if (offset >= LS_SIZE) 141 if (offset >= LS_SIZE)
127 return NOPFN_SIGBUS; 142 return NOPFN_SIGBUS;
128 143
144 pr_debug("spufs_mem_mmap_nopfn address=0x%lx -> 0x%lx, offset=0x%lx\n",
145 addr0, address, offset);
146
129 spu_acquire(ctx); 147 spu_acquire(ctx);
130 148
131 if (ctx->state == SPU_STATE_SAVED) { 149 if (ctx->state == SPU_STATE_SAVED) {
@@ -149,9 +167,24 @@ static struct vm_operations_struct spufs_mem_mmap_vmops = {
149 .nopfn = spufs_mem_mmap_nopfn, 167 .nopfn = spufs_mem_mmap_nopfn,
150}; 168};
151 169
152static int 170static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
153spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) 171{
154{ 172#ifdef CONFIG_SPU_FS_64K_LS
173 struct spu_context *ctx = file->private_data;
174 struct spu_state *csa = &ctx->csa;
175
176 /* Sanity check VMA alignment */
177 if (csa->use_big_pages) {
178 pr_debug("spufs_mem_mmap 64K, start=0x%lx, end=0x%lx,"
179 " pgoff=0x%lx\n", vma->vm_start, vma->vm_end,
180 vma->vm_pgoff);
181 if (vma->vm_start & 0xffff)
182 return -EINVAL;
183 if (vma->vm_pgoff & 0xf)
184 return -EINVAL;
185 }
186#endif /* CONFIG_SPU_FS_64K_LS */
187
155 if (!(vma->vm_flags & VM_SHARED)) 188 if (!(vma->vm_flags & VM_SHARED))
156 return -EINVAL; 189 return -EINVAL;
157 190
@@ -163,13 +196,34 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
163 return 0; 196 return 0;
164} 197}
165 198
199#ifdef CONFIG_SPU_FS_64K_LS
200unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr,
201 unsigned long len, unsigned long pgoff,
202 unsigned long flags)
203{
204 struct spu_context *ctx = file->private_data;
205 struct spu_state *csa = &ctx->csa;
206
207 /* If not using big pages, fallback to normal MM g_u_a */
208 if (!csa->use_big_pages)
209 return current->mm->get_unmapped_area(file, addr, len,
210 pgoff, flags);
211
212 /* Else, try to obtain a 64K pages slice */
213 return slice_get_unmapped_area(addr, len, flags,
214 MMU_PAGE_64K, 1, 0);
215}
216#endif /* CONFIG_SPU_FS_64K_LS */
217
166static const struct file_operations spufs_mem_fops = { 218static const struct file_operations spufs_mem_fops = {
167 .open = spufs_mem_open, 219 .open = spufs_mem_open,
168 .release = spufs_mem_release, 220 .read = spufs_mem_read,
169 .read = spufs_mem_read, 221 .write = spufs_mem_write,
170 .write = spufs_mem_write, 222 .llseek = generic_file_llseek,
171 .llseek = generic_file_llseek, 223 .mmap = spufs_mem_mmap,
172 .mmap = spufs_mem_mmap, 224#ifdef CONFIG_SPU_FS_64K_LS
225 .get_unmapped_area = spufs_get_unmapped_area,
226#endif
173}; 227};
174 228
175static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma, 229static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000000..f4b3c052dabf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,181 @@
1/*
2 * SPU local store allocation routines
3 *
4 * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#undef DEBUG
22
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/vmalloc.h>
26
27#include <asm/spu.h>
28#include <asm/spu_csa.h>
29#include <asm/mmu.h>
30
31static int spu_alloc_lscsa_std(struct spu_state *csa)
32{
33 struct spu_lscsa *lscsa;
34 unsigned char *p;
35
36 lscsa = vmalloc(sizeof(struct spu_lscsa));
37 if (!lscsa)
38 return -ENOMEM;
39 memset(lscsa, 0, sizeof(struct spu_lscsa));
40 csa->lscsa = lscsa;
41
42 /* Set LS pages reserved to allow for user-space mapping. */
43 for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
44 SetPageReserved(vmalloc_to_page(p));
45
46 return 0;
47}
48
49static void spu_free_lscsa_std(struct spu_state *csa)
50{
51 /* Clear reserved bit before vfree. */
52 unsigned char *p;
53
54 if (csa->lscsa == NULL)
55 return;
56
57 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
58 ClearPageReserved(vmalloc_to_page(p));
59
60 vfree(csa->lscsa);
61}
62
63#ifdef CONFIG_SPU_FS_64K_LS
64
65#define SPU_64K_PAGE_SHIFT 16
66#define SPU_64K_PAGE_ORDER (SPU_64K_PAGE_SHIFT - PAGE_SHIFT)
67#define SPU_64K_PAGE_COUNT (1ul << SPU_64K_PAGE_ORDER)
68
69int spu_alloc_lscsa(struct spu_state *csa)
70{
71 struct page **pgarray;
72 unsigned char *p;
73 int i, j, n_4k;
74
75 /* Check availability of 64K pages */
76 if (mmu_psize_defs[MMU_PAGE_64K].shift == 0)
77 goto fail;
78
79 csa->use_big_pages = 1;
80
81 pr_debug("spu_alloc_lscsa(csa=0x%p), trying to allocate 64K pages\n",
82 csa);
83
84 /* First try to allocate our 64K pages. We need 5 of them
85 * with the current implementation. In the future, we should try
86 * to separate the lscsa with the actual local store image, thus
87 * allowing us to require only 4 64K pages per context
88 */
89 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++) {
90 /* XXX This is likely to fail, we should use a special pool
91 * similiar to what hugetlbfs does.
92 */
93 csa->lscsa_pages[i] = alloc_pages(GFP_KERNEL,
94 SPU_64K_PAGE_ORDER);
95 if (csa->lscsa_pages[i] == NULL)
96 goto fail;
97 }
98
99 pr_debug(" success ! creating vmap...\n");
100
101 /* Now we need to create a vmalloc mapping of these for the kernel
102 * and SPU context switch code to use. Currently, we stick to a
103 * normal kernel vmalloc mapping, which in our case will be 4K
104 */
105 n_4k = SPU_64K_PAGE_COUNT * SPU_LSCSA_NUM_BIG_PAGES;
106 pgarray = kmalloc(sizeof(struct page *) * n_4k, GFP_KERNEL);
107 if (pgarray == NULL)
108 goto fail;
109 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
110 for (j = 0; j < SPU_64K_PAGE_COUNT; j++)
111 /* We assume all the struct page's are contiguous
112 * which should be hopefully the case for an order 4
113 * allocation..
114 */
115 pgarray[i * SPU_64K_PAGE_COUNT + j] =
116 csa->lscsa_pages[i] + j;
117 csa->lscsa = vmap(pgarray, n_4k, VM_USERMAP, PAGE_KERNEL);
118 kfree(pgarray);
119 if (csa->lscsa == NULL)
120 goto fail;
121
122 memset(csa->lscsa, 0, sizeof(struct spu_lscsa));
123
124 /* Set LS pages reserved to allow for user-space mapping.
125 *
126 * XXX isn't that a bit obsolete ? I think we should just
127 * make sure the page count is high enough. Anyway, won't harm
128 * for now
129 */
130 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
131 SetPageReserved(vmalloc_to_page(p));
132
133 pr_debug(" all good !\n");
134
135 return 0;
136fail:
137 pr_debug("spufs: failed to allocate lscsa 64K pages, falling back\n");
138 spu_free_lscsa(csa);
139 return spu_alloc_lscsa_std(csa);
140}
141
142void spu_free_lscsa(struct spu_state *csa)
143{
144 unsigned char *p;
145 int i;
146
147 if (!csa->use_big_pages) {
148 spu_free_lscsa_std(csa);
149 return;
150 }
151 csa->use_big_pages = 0;
152
153 if (csa->lscsa == NULL)
154 goto free_pages;
155
156 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
157 ClearPageReserved(vmalloc_to_page(p));
158
159 vunmap(csa->lscsa);
160 csa->lscsa = NULL;
161
162 free_pages:
163
164 for (i = 0; i < SPU_LSCSA_NUM_BIG_PAGES; i++)
165 if (csa->lscsa_pages[i])
166 __free_pages(csa->lscsa_pages[i], SPU_64K_PAGE_ORDER);
167}
168
169#else /* CONFIG_SPU_FS_64K_LS */
170
171int spu_alloc_lscsa(struct spu_state *csa)
172{
173 return spu_alloc_lscsa_std(csa);
174}
175
176void spu_free_lscsa(struct spu_state *csa)
177{
178 spu_free_lscsa_std(csa);
179}
180
181#endif /* !defined(CONFIG_SPU_FS_64K_LS) */
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 29dc59cefc38..71a0b41adb8c 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -2188,40 +2188,30 @@ static void init_priv2(struct spu_state *csa)
2188 * as it is by far the largest of the context save regions, 2188 * as it is by far the largest of the context save regions,
2189 * and may need to be pinned or otherwise specially aligned. 2189 * and may need to be pinned or otherwise specially aligned.
2190 */ 2190 */
2191void spu_init_csa(struct spu_state *csa) 2191int spu_init_csa(struct spu_state *csa)
2192{ 2192{
2193 struct spu_lscsa *lscsa; 2193 int rc;
2194 unsigned char *p;
2195 2194
2196 if (!csa) 2195 if (!csa)
2197 return; 2196 return -EINVAL;
2198 memset(csa, 0, sizeof(struct spu_state)); 2197 memset(csa, 0, sizeof(struct spu_state));
2199 2198
2200 lscsa = vmalloc(sizeof(struct spu_lscsa)); 2199 rc = spu_alloc_lscsa(csa);
2201 if (!lscsa) 2200 if (rc)
2202 return; 2201 return rc;
2203 2202
2204 memset(lscsa, 0, sizeof(struct spu_lscsa));
2205 csa->lscsa = lscsa;
2206 spin_lock_init(&csa->register_lock); 2203 spin_lock_init(&csa->register_lock);
2207 2204
2208 /* Set LS pages reserved to allow for user-space mapping. */
2209 for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
2210 SetPageReserved(vmalloc_to_page(p));
2211
2212 init_prob(csa); 2205 init_prob(csa);
2213 init_priv1(csa); 2206 init_priv1(csa);
2214 init_priv2(csa); 2207 init_priv2(csa);
2208
2209 return 0;
2215} 2210}
2216EXPORT_SYMBOL_GPL(spu_init_csa); 2211EXPORT_SYMBOL_GPL(spu_init_csa);
2217 2212
2218void spu_fini_csa(struct spu_state *csa) 2213void spu_fini_csa(struct spu_state *csa)
2219{ 2214{
2220 /* Clear reserved bit before vfree. */ 2215 spu_free_lscsa(csa);
2221 unsigned char *p;
2222 for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
2223 ClearPageReserved(vmalloc_to_page(p));
2224
2225 vfree(csa->lscsa);
2226} 2216}
2227EXPORT_SYMBOL_GPL(spu_fini_csa); 2217EXPORT_SYMBOL_GPL(spu_fini_csa);
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h
index 02e56a6685a2..c48ae185c874 100644
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -235,6 +235,12 @@ struct spu_priv2_collapsed {
235 */ 235 */
236struct spu_state { 236struct spu_state {
237 struct spu_lscsa *lscsa; 237 struct spu_lscsa *lscsa;
238#ifdef CONFIG_SPU_FS_64K_LS
239 int use_big_pages;
240 /* One struct page per 64k page */
241#define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000)
242 struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES];
243#endif
238 struct spu_problem_collapsed prob; 244 struct spu_problem_collapsed prob;
239 struct spu_priv1_collapsed priv1; 245 struct spu_priv1_collapsed priv1;
240 struct spu_priv2_collapsed priv2; 246 struct spu_priv2_collapsed priv2;
@@ -247,12 +253,14 @@ struct spu_state {
247 spinlock_t register_lock; 253 spinlock_t register_lock;
248}; 254};
249 255
250extern void spu_init_csa(struct spu_state *csa); 256extern int spu_init_csa(struct spu_state *csa);
251extern void spu_fini_csa(struct spu_state *csa); 257extern void spu_fini_csa(struct spu_state *csa);
252extern int spu_save(struct spu_state *prev, struct spu *spu); 258extern int spu_save(struct spu_state *prev, struct spu *spu);
253extern int spu_restore(struct spu_state *new, struct spu *spu); 259extern int spu_restore(struct spu_state *new, struct spu *spu);
254extern int spu_switch(struct spu_state *prev, struct spu_state *new, 260extern int spu_switch(struct spu_state *prev, struct spu_state *new,
255 struct spu *spu); 261 struct spu *spu);
262extern int spu_alloc_lscsa(struct spu_state *csa);
263extern void spu_free_lscsa(struct spu_state *csa);
256 264
257#endif /* !__SPU__ */ 265#endif /* !__SPU__ */
258#endif /* __KERNEL__ */ 266#endif /* __KERNEL__ */