diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/Makefile.nvgpu | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 32 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | 532 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | 100 |
5 files changed, 668 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index e3c115c6..7e703706 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
@@ -55,6 +55,7 @@ nvgpu-y := \ | |||
55 | gk20a/gk20a_allocator.o \ | 55 | gk20a/gk20a_allocator.o \ |
56 | gk20a/gk20a_allocator_bitmap.o \ | 56 | gk20a/gk20a_allocator_bitmap.o \ |
57 | gk20a/gk20a_allocator_buddy.o \ | 57 | gk20a/gk20a_allocator_buddy.o \ |
58 | gk20a/gk20a_allocator_page.o \ | ||
58 | gk20a/cde_gk20a.o \ | 59 | gk20a/cde_gk20a.o \ |
59 | gk20a/platform_gk20a_generic.o \ | 60 | gk20a/platform_gk20a_generic.o \ |
60 | gk20a/tsg_gk20a.o \ | 61 | gk20a/tsg_gk20a.o \ |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index f3b6dab3..9becf053 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | |||
@@ -99,9 +99,32 @@ struct gk20a_allocator { | |||
99 | * allocations you need to keep track of the meta-data yourself (in this | 99 | * allocations you need to keep track of the meta-data yourself (in this |
100 | * case the base and length of the allocation as opposed to just the base | 100 | * case the base and length of the allocation as opposed to just the base |
101 | * of the allocation). | 101 | * of the allocation). |
102 | * | ||
103 | * GPU_ALLOC_4K_VIDMEM_PAGES | ||
104 | * | ||
105 | * We manage vidmem pages at a large page granularity for performance | ||
106 | * reasons; however, this can lead to wasting memory. For page allocators | ||
107 | * setting this flag will tell the allocator to manage pools of 4K pages | ||
108 | * inside internally allocated large pages. | ||
109 | * | ||
110 | * GPU_ALLOC_FORCE_CONTIG | ||
111 | * | ||
112 | * Force allocations to be contiguous. Currently only relevant for page | ||
113 | * allocators since all other allocators are naturally contiguous. | ||
114 | * | ||
115 | * GPU_ALLOC_NO_SCATTER_GATHER | ||
116 | * | ||
117 | * The page allocator normally returns a scatter gather data structure for | ||
118 | * allocations (to handle discontiguous pages). However, at times that can | ||
119 | * be annoying so this flag forces the page allocator to return a u64 | ||
120 | * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be | ||
121 | * set as well). | ||
102 | */ | 122 | */ |
103 | #define GPU_ALLOC_GVA_SPACE 0x1 | 123 | #define GPU_ALLOC_GVA_SPACE 0x1 |
104 | #define GPU_ALLOC_NO_ALLOC_PAGE 0x2 | 124 | #define GPU_ALLOC_NO_ALLOC_PAGE 0x2 |
125 | #define GPU_ALLOC_4K_VIDMEM_PAGES 0x4 | ||
126 | #define GPU_ALLOC_FORCE_CONTIG 0x8 | ||
127 | #define GPU_ALLOC_NO_SCATTER_GATHER 0x10 | ||
105 | 128 | ||
106 | static inline void alloc_lock(struct gk20a_allocator *a) | 129 | static inline void alloc_lock(struct gk20a_allocator *a) |
107 | { | 130 | { |
@@ -131,6 +154,13 @@ int gk20a_bitmap_allocator_init(struct gk20a_allocator *__a, | |||
131 | const char *name, u64 base, u64 length, | 154 | const char *name, u64 base, u64 length, |
132 | u64 blk_size, u64 flags); | 155 | u64 blk_size, u64 flags); |
133 | 156 | ||
157 | /* | ||
158 | * Page allocator initializers. | ||
159 | */ | ||
160 | int gk20a_page_allocator_init(struct gk20a_allocator *__a, | ||
161 | const char *name, u64 base, u64 length, | ||
162 | u64 blk_size, u64 flags); | ||
163 | |||
134 | #define GPU_BALLOC_MAX_ORDER 31 | 164 | #define GPU_BALLOC_MAX_ORDER 31 |
135 | 165 | ||
136 | /* | 166 | /* |
@@ -199,7 +229,7 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev); | |||
199 | } while (0) | 229 | } while (0) |
200 | 230 | ||
201 | #define __alloc_dbg(a, fmt, arg...) \ | 231 | #define __alloc_dbg(a, fmt, arg...) \ |
202 | pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg) | 232 | pr_warn("%-25s %25s() " fmt, (a)->name, __func__, ##arg) |
203 | 233 | ||
204 | #if defined(ALLOCATOR_DEBUG) | 234 | #if defined(ALLOCATOR_DEBUG) |
205 | /* | 235 | /* |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c new file mode 100644 index 00000000..534027cc --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c | |||
@@ -0,0 +1,532 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/slab.h> | ||
19 | #include <linux/bitops.h> | ||
20 | |||
21 | #include "gk20a_allocator.h" | ||
22 | #include "buddy_allocator_priv.h" | ||
23 | #include "page_allocator_priv.h" | ||
24 | |||
25 | #define palloc_dbg(a, fmt, arg...) \ | ||
26 | alloc_dbg(palloc_owner(a), fmt, ##arg) | ||
27 | |||
28 | static struct kmem_cache *page_alloc_cache; | ||
29 | static struct kmem_cache *page_alloc_chunk_cache; | ||
30 | static DEFINE_MUTEX(meta_data_cache_lock); | ||
31 | |||
32 | static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) | ||
33 | { | ||
34 | struct gk20a_page_allocator *va = a->priv; | ||
35 | |||
36 | return gk20a_alloc_length(&va->source_allocator); | ||
37 | } | ||
38 | |||
39 | static u64 gk20a_page_alloc_base(struct gk20a_allocator *a) | ||
40 | { | ||
41 | struct gk20a_page_allocator *va = a->priv; | ||
42 | |||
43 | return gk20a_alloc_base(&va->source_allocator); | ||
44 | } | ||
45 | |||
46 | static int gk20a_page_alloc_inited(struct gk20a_allocator *a) | ||
47 | { | ||
48 | struct gk20a_page_allocator *va = a->priv; | ||
49 | |||
50 | return gk20a_alloc_initialized(&va->source_allocator); | ||
51 | } | ||
52 | |||
53 | static u64 gk20a_page_alloc_end(struct gk20a_allocator *a) | ||
54 | { | ||
55 | struct gk20a_page_allocator *va = a->priv; | ||
56 | |||
57 | return gk20a_alloc_end(&va->source_allocator); | ||
58 | } | ||
59 | |||
60 | static int __insert_page_alloc(struct gk20a_page_allocator *a, | ||
61 | struct gk20a_page_alloc *alloc) | ||
62 | { | ||
63 | struct rb_node **new = &a->allocs.rb_node; | ||
64 | struct rb_node *parent = NULL; | ||
65 | |||
66 | while (*new) { | ||
67 | struct gk20a_page_alloc *tmp = | ||
68 | container_of(*new, struct gk20a_page_alloc, | ||
69 | tree_entry); | ||
70 | |||
71 | parent = *new; | ||
72 | if (alloc->base < tmp->base) { | ||
73 | new = &((*new)->rb_left); | ||
74 | } else if (alloc->base > tmp->base) { | ||
75 | new = &((*new)->rb_right); | ||
76 | } else { | ||
77 | WARN(1, "Duplicate entries in allocated list!\n"); | ||
78 | return 0; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | rb_link_node(&alloc->tree_entry, parent, new); | ||
83 | rb_insert_color(&alloc->tree_entry, &a->allocs); | ||
84 | |||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | static struct gk20a_page_alloc *__find_page_alloc( | ||
89 | struct gk20a_page_allocator *a, | ||
90 | u64 addr) | ||
91 | { | ||
92 | struct rb_node *node = a->allocs.rb_node; | ||
93 | struct gk20a_page_alloc *alloc; | ||
94 | |||
95 | while (node) { | ||
96 | alloc = container_of(node, struct gk20a_page_alloc, tree_entry); | ||
97 | |||
98 | if (addr < alloc->base) | ||
99 | node = node->rb_left; | ||
100 | else if (addr > alloc->base) | ||
101 | node = node->rb_right; | ||
102 | else | ||
103 | break; | ||
104 | } | ||
105 | |||
106 | if (!node) | ||
107 | return NULL; | ||
108 | |||
109 | rb_erase(node, &a->allocs); | ||
110 | |||
111 | return alloc; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Allocate physical pages. Since the underlying allocator is a buddy allocator | ||
116 | * the returned pages are always contiguous. However, since there could be | ||
117 | * fragmentation in the space this allocator will collate smaller non-contiguous | ||
118 | * allocations together if necessary. | ||
119 | */ | ||
120 | static struct gk20a_page_alloc *__gk20a_alloc_pages( | ||
121 | struct gk20a_page_allocator *a, u64 pages) | ||
122 | { | ||
123 | struct gk20a_page_alloc *alloc; | ||
124 | struct page_alloc_chunk *c; | ||
125 | u64 max_chunk_len = pages << a->page_shift; | ||
126 | int i = 0; | ||
127 | |||
128 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
129 | if (!alloc) | ||
130 | goto fail; | ||
131 | |||
132 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
133 | alloc->length = pages << a->page_shift; | ||
134 | |||
135 | while (pages) { | ||
136 | u64 chunk_addr = 0; | ||
137 | u64 chunk_pages = 1 << __fls(pages); | ||
138 | u64 chunk_len = chunk_pages << a->page_shift; | ||
139 | |||
140 | /* | ||
141 | * Take care of the possibility that the allocation must be | ||
142 | * contiguous. If this is not the first iteration then that | ||
143 | * means the first iteration failed to alloc the entire | ||
144 | * requested size. The buddy allocator guarantees any given | ||
145 | * single alloc is contiguous. | ||
146 | */ | ||
147 | if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0) | ||
148 | goto fail_cleanup; | ||
149 | |||
150 | if (chunk_len > max_chunk_len) | ||
151 | chunk_len = max_chunk_len; | ||
152 | |||
153 | /* | ||
154 | * Keep attempting to allocate in smaller chunks until the alloc | ||
155 | * either succeeds or is smaller than the page_size of the | ||
156 | * allocator (i.e the allocator is OOM). | ||
157 | */ | ||
158 | do { | ||
159 | chunk_addr = gk20a_alloc(&a->source_allocator, | ||
160 | chunk_len); | ||
161 | |||
162 | /* Divide by 2 and try again */ | ||
163 | if (!chunk_addr) { | ||
164 | palloc_dbg(a, "balloc failed: 0x%llx\n", | ||
165 | chunk_len); | ||
166 | chunk_len >>= 1; | ||
167 | max_chunk_len = chunk_len; | ||
168 | } | ||
169 | } while (!chunk_addr && chunk_len >= a->page_size); | ||
170 | |||
171 | if (!chunk_addr) { | ||
172 | palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len); | ||
173 | goto fail_cleanup; | ||
174 | } | ||
175 | |||
176 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
177 | if (!c) { | ||
178 | gk20a_free(&a->source_allocator, chunk_addr); | ||
179 | goto fail_cleanup; | ||
180 | } | ||
181 | |||
182 | pages -= chunk_pages; | ||
183 | |||
184 | c->base = chunk_addr; | ||
185 | c->length = chunk_len; | ||
186 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
187 | |||
188 | i++; | ||
189 | } | ||
190 | |||
191 | alloc->nr_chunks = i; | ||
192 | c = list_first_entry(&alloc->alloc_chunks, | ||
193 | struct page_alloc_chunk, list_entry); | ||
194 | alloc->base = c->base; | ||
195 | |||
196 | return alloc; | ||
197 | |||
198 | fail_cleanup: | ||
199 | while (!list_empty(&alloc->alloc_chunks)) { | ||
200 | c = list_first_entry(&alloc->alloc_chunks, | ||
201 | struct page_alloc_chunk, list_entry); | ||
202 | list_del(&c->list_entry); | ||
203 | kfree(c); | ||
204 | } | ||
205 | kfree(alloc); | ||
206 | fail: | ||
207 | return ERR_PTR(-ENOMEM); | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * Allocate enough pages to satisfy @len. Page size is determined at | ||
212 | * initialization of the allocator. | ||
213 | * | ||
214 | * The return is actually a pointer to a struct gk20a_page_alloc pointer. This | ||
215 | * is because it doesn't make a lot of sense to return the address of the first | ||
216 | * page in the list of pages (since they could be discontiguous). This has | ||
217 | * precedent in the dma_alloc APIs, though, it's really just an annoying | ||
218 | * artifact of the fact that the gk20a_alloc() API requires a u64 return type. | ||
219 | */ | ||
220 | static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len) | ||
221 | { | ||
222 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
223 | struct gk20a_page_alloc *alloc = NULL; | ||
224 | struct page_alloc_chunk *c; | ||
225 | u64 real_len; | ||
226 | u64 pages; | ||
227 | int i = 0; | ||
228 | |||
229 | /* | ||
230 | * If we want contig pages we have to round up to a power of two. It's | ||
231 | * easier to do that here than in the buddy allocator. | ||
232 | */ | ||
233 | real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? | ||
234 | roundup_pow_of_two(len) : len; | ||
235 | |||
236 | pages = ALIGN(real_len, a->page_size) >> a->page_shift; | ||
237 | |||
238 | alloc_lock(__a); | ||
239 | |||
240 | alloc = __gk20a_alloc_pages(a, pages); | ||
241 | if (IS_ERR(alloc)) { | ||
242 | alloc_unlock(__a); | ||
243 | palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n", | ||
244 | pages << a->page_shift, pages); | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | __insert_page_alloc(a, alloc); | ||
249 | alloc_unlock(__a); | ||
250 | |||
251 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | ||
252 | pages << a->page_shift, pages, alloc->base); | ||
253 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
254 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
255 | i++, c->base, c->length); | ||
256 | } | ||
257 | |||
258 | a->nr_allocs++; | ||
259 | a->pages_alloced += pages; | ||
260 | |||
261 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
262 | return alloc->base; | ||
263 | else | ||
264 | return (u64) (uintptr_t) alloc; | ||
265 | } | ||
266 | |||
267 | static void __gk20a_free_pages(struct gk20a_page_allocator *a, | ||
268 | struct gk20a_page_alloc *alloc) | ||
269 | { | ||
270 | struct page_alloc_chunk *chunk; | ||
271 | |||
272 | while (!list_empty(&alloc->alloc_chunks)) { | ||
273 | chunk = list_first_entry(&alloc->alloc_chunks, | ||
274 | struct page_alloc_chunk, | ||
275 | list_entry); | ||
276 | list_del(&chunk->list_entry); | ||
277 | |||
278 | gk20a_free(&a->source_allocator, chunk->base); | ||
279 | kfree(chunk); | ||
280 | } | ||
281 | |||
282 | kfree(alloc); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Note: this will remove the gk20a_page_alloc struct from the RB tree | ||
287 | * if it's found. | ||
288 | */ | ||
289 | static void gk20a_page_free(struct gk20a_allocator *__a, u64 base) | ||
290 | { | ||
291 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
292 | struct gk20a_page_alloc *alloc; | ||
293 | |||
294 | alloc_lock(__a); | ||
295 | |||
296 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
297 | alloc = __find_page_alloc(a, base); | ||
298 | else | ||
299 | alloc = __find_page_alloc(a, | ||
300 | ((struct gk20a_page_alloc *)(uintptr_t)base)->base); | ||
301 | |||
302 | if (!alloc) { | ||
303 | palloc_dbg(a, "Hrm, found no alloc?\n"); | ||
304 | goto done; | ||
305 | } | ||
306 | |||
307 | a->nr_frees++; | ||
308 | a->pages_freed += (alloc->length >> a->page_shift); | ||
309 | |||
310 | /* | ||
311 | * Frees *alloc. | ||
312 | */ | ||
313 | __gk20a_free_pages(a, alloc); | ||
314 | |||
315 | palloc_dbg(a, "Free 0x%010llx id=0x%010llx\n", | ||
316 | alloc->length, alloc->base); | ||
317 | |||
318 | done: | ||
319 | alloc_unlock(__a); | ||
320 | } | ||
321 | |||
322 | static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed( | ||
323 | struct gk20a_page_allocator *a, u64 base, u64 length) | ||
324 | { | ||
325 | struct gk20a_page_alloc *alloc; | ||
326 | struct page_alloc_chunk *c; | ||
327 | |||
328 | alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL); | ||
329 | c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL); | ||
330 | if (!alloc || !c) | ||
331 | goto fail; | ||
332 | |||
333 | alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length); | ||
334 | if (!alloc->base) { | ||
335 | WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base); | ||
336 | goto fail; | ||
337 | } | ||
338 | |||
339 | alloc->nr_chunks = 1; | ||
340 | alloc->length = length; | ||
341 | INIT_LIST_HEAD(&alloc->alloc_chunks); | ||
342 | |||
343 | c->base = alloc->base; | ||
344 | c->length = length; | ||
345 | list_add(&c->list_entry, &alloc->alloc_chunks); | ||
346 | |||
347 | return alloc; | ||
348 | |||
349 | fail: | ||
350 | kfree(c); | ||
351 | kfree(alloc); | ||
352 | return ERR_PTR(-ENOMEM); | ||
353 | } | ||
354 | |||
355 | static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a, | ||
356 | u64 base, u64 len) | ||
357 | { | ||
358 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
359 | struct gk20a_page_alloc *alloc = NULL; | ||
360 | struct page_alloc_chunk *c; | ||
361 | u64 aligned_len, pages; | ||
362 | int i = 0; | ||
363 | |||
364 | aligned_len = ALIGN(len, a->page_size); | ||
365 | pages = aligned_len >> a->page_shift; | ||
366 | |||
367 | alloc_lock(__a); | ||
368 | |||
369 | alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len); | ||
370 | if (IS_ERR(alloc)) { | ||
371 | alloc_unlock(__a); | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | __insert_page_alloc(a, alloc); | ||
376 | alloc_unlock(__a); | ||
377 | |||
378 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | ||
379 | alloc->base, aligned_len, pages); | ||
380 | list_for_each_entry(c, &alloc->alloc_chunks, list_entry) { | ||
381 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | ||
382 | i++, c->base, c->length); | ||
383 | } | ||
384 | |||
385 | a->nr_fixed_allocs++; | ||
386 | a->pages_alloced += pages; | ||
387 | |||
388 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) | ||
389 | return alloc->base; | ||
390 | else | ||
391 | return (u64) (uintptr_t) alloc; | ||
392 | } | ||
393 | |||
394 | static void gk20a_page_free_fixed(struct gk20a_allocator *__a, | ||
395 | u64 base, u64 len) | ||
396 | { | ||
397 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
398 | struct gk20a_page_alloc *alloc; | ||
399 | |||
400 | alloc_lock(__a); | ||
401 | |||
402 | if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) { | ||
403 | alloc = __find_page_alloc(a, base); | ||
404 | if (!alloc) | ||
405 | goto done; | ||
406 | } else { | ||
407 | alloc = (struct gk20a_page_alloc *) (uintptr_t) base; | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * This works for the time being since the buddy allocator | ||
412 | * uses the same free function for both fixed and regular | ||
413 | * allocs. This would have to be updated if the underlying | ||
414 | * allocator were to change. | ||
415 | */ | ||
416 | __gk20a_free_pages(a, alloc); | ||
417 | |||
418 | palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", | ||
419 | alloc->base, alloc->length); | ||
420 | a->nr_fixed_frees++; | ||
421 | a->pages_freed += (alloc->length >> a->page_shift); | ||
422 | |||
423 | done: | ||
424 | alloc_unlock(__a); | ||
425 | } | ||
426 | |||
427 | static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a) | ||
428 | { | ||
429 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
430 | |||
431 | alloc_lock(__a); | ||
432 | kfree(a); | ||
433 | __a->priv = NULL; | ||
434 | alloc_unlock(__a); | ||
435 | } | ||
436 | |||
437 | static void gk20a_page_print_stats(struct gk20a_allocator *__a, | ||
438 | struct seq_file *s, int lock) | ||
439 | { | ||
440 | struct gk20a_page_allocator *a = page_allocator(__a); | ||
441 | |||
442 | if (lock) | ||
443 | alloc_lock(__a); | ||
444 | |||
445 | __alloc_pstat(s, __a, "Page allocator:\n"); | ||
446 | __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs); | ||
447 | __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); | ||
448 | __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); | ||
449 | __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); | ||
450 | __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); | ||
451 | __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); | ||
452 | __alloc_pstat(s, __a, "\n"); | ||
453 | __alloc_pstat(s, __a, "Source alloc: %s\n", | ||
454 | a->source_allocator.name); | ||
455 | |||
456 | gk20a_alloc_print_stats(&a->source_allocator, s, lock); | ||
457 | |||
458 | if (lock) | ||
459 | alloc_unlock(__a); | ||
460 | } | ||
461 | |||
462 | static const struct gk20a_allocator_ops page_ops = { | ||
463 | .alloc = gk20a_page_alloc, | ||
464 | .free = gk20a_page_free, | ||
465 | |||
466 | .alloc_fixed = gk20a_page_alloc_fixed, | ||
467 | .free_fixed = gk20a_page_free_fixed, | ||
468 | |||
469 | .base = gk20a_page_alloc_base, | ||
470 | .length = gk20a_page_alloc_length, | ||
471 | .end = gk20a_page_alloc_end, | ||
472 | .inited = gk20a_page_alloc_inited, | ||
473 | |||
474 | .fini = gk20a_page_allocator_destroy, | ||
475 | |||
476 | .print_stats = gk20a_page_print_stats, | ||
477 | }; | ||
478 | |||
479 | int gk20a_page_allocator_init(struct gk20a_allocator *__a, | ||
480 | const char *name, u64 base, u64 length, | ||
481 | u64 blk_size, u64 flags) | ||
482 | { | ||
483 | struct gk20a_page_allocator *a; | ||
484 | char buddy_name[sizeof(__a->name)]; | ||
485 | int err; | ||
486 | |||
487 | mutex_lock(&meta_data_cache_lock); | ||
488 | if (!page_alloc_cache) | ||
489 | page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); | ||
490 | if (!page_alloc_chunk_cache) | ||
491 | page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); | ||
492 | mutex_unlock(&meta_data_cache_lock); | ||
493 | |||
494 | if (!page_alloc_cache || !page_alloc_chunk_cache) | ||
495 | return -ENOMEM; | ||
496 | |||
497 | a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); | ||
498 | if (!a) | ||
499 | return -ENOMEM; | ||
500 | |||
501 | err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops); | ||
502 | if (err) | ||
503 | goto fail; | ||
504 | |||
505 | a->base = base; | ||
506 | a->length = length; | ||
507 | a->page_size = blk_size; | ||
508 | a->page_shift = __ffs(blk_size); | ||
509 | a->allocs = RB_ROOT; | ||
510 | a->owner = __a; | ||
511 | a->flags = flags; | ||
512 | |||
513 | snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); | ||
514 | |||
515 | err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base, | ||
516 | length, blk_size, 0); | ||
517 | if (err) | ||
518 | goto fail; | ||
519 | |||
520 | gk20a_init_alloc_debug(__a); | ||
521 | palloc_dbg(a, "New allocator: type page\n"); | ||
522 | palloc_dbg(a, " base 0x%llx\n", a->base); | ||
523 | palloc_dbg(a, " size 0x%llx\n", a->length); | ||
524 | palloc_dbg(a, " page_size 0x%llx\n", a->page_size); | ||
525 | palloc_dbg(a, " flags 0x%llx\n", a->flags); | ||
526 | |||
527 | return 0; | ||
528 | |||
529 | fail: | ||
530 | kfree(a); | ||
531 | return err; | ||
532 | } | ||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index b63444d0..cab10902 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -764,8 +764,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm) | |||
764 | if (!size) | 764 | if (!size) |
765 | return 0; | 765 | return 0; |
766 | 766 | ||
767 | err = gk20a_buddy_allocator_init(&g->mm.vidmem.allocator, "vidmem", | 767 | err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", |
768 | SZ_4K, size - SZ_4K, SZ_4K, 0); | 768 | SZ_4K, size - SZ_4K, SZ_4K, |
769 | GPU_ALLOC_FORCE_CONTIG | | ||
770 | GPU_ALLOC_NO_SCATTER_GATHER); | ||
769 | if (err) { | 771 | if (err) { |
770 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", | 772 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", |
771 | size, err); | 773 | size, err); |
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h new file mode 100644 index 00000000..bce5b75e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #ifndef PAGE_ALLOCATOR_PRIV_H | ||
18 | #define PAGE_ALLOCATOR_PRIV_H | ||
19 | |||
20 | #include <linux/list.h> | ||
21 | #include <linux/rbtree.h> | ||
22 | |||
23 | #include "gk20a_allocator.h" | ||
24 | |||
25 | struct gk20a_allocator; | ||
26 | |||
27 | struct page_alloc_chunk { | ||
28 | struct list_head list_entry; | ||
29 | |||
30 | u64 base; | ||
31 | u64 length; | ||
32 | }; | ||
33 | |||
34 | /* | ||
35 | * Struct to handle internal management of page allocation. It holds a list | ||
36 | * of the chunks of page that make up the overall allocation - much like a | ||
37 | * scatter gather table. | ||
38 | */ | ||
39 | struct gk20a_page_alloc { | ||
40 | struct list_head alloc_chunks; | ||
41 | |||
42 | int nr_chunks; | ||
43 | u64 length; | ||
44 | |||
45 | /* | ||
46 | * Only useful for the RB tree - since the alloc will have discontiguous | ||
47 | * pages the base is essentially irrelevant except for the fact that it | ||
48 | * is guarenteed to be unique. | ||
49 | */ | ||
50 | u64 base; | ||
51 | |||
52 | struct rb_node tree_entry; | ||
53 | }; | ||
54 | |||
55 | struct gk20a_page_allocator { | ||
56 | struct gk20a_allocator *owner; /* Owner of this allocator. */ | ||
57 | |||
58 | /* | ||
59 | * Use a buddy allocator to manage the allocation of the underlying | ||
60 | * pages. This lets us abstract the discontiguous allocation handling | ||
61 | * out of the annoyingly complicated buddy allocator. | ||
62 | */ | ||
63 | struct gk20a_allocator source_allocator; | ||
64 | |||
65 | /* | ||
66 | * Page params. | ||
67 | */ | ||
68 | u64 base; | ||
69 | u64 length; | ||
70 | u64 page_size; | ||
71 | u32 page_shift; | ||
72 | |||
73 | struct rb_root allocs; /* Outstanding allocations. */ | ||
74 | |||
75 | u64 flags; | ||
76 | |||
77 | /* | ||
78 | * Stat tracking. | ||
79 | */ | ||
80 | u64 nr_allocs; | ||
81 | u64 nr_frees; | ||
82 | u64 nr_fixed_allocs; | ||
83 | u64 nr_fixed_frees; | ||
84 | u64 pages_alloced; | ||
85 | u64 pages_freed; | ||
86 | }; | ||
87 | |||
88 | static inline struct gk20a_page_allocator *page_allocator( | ||
89 | struct gk20a_allocator *a) | ||
90 | { | ||
91 | return (struct gk20a_page_allocator *)(a)->priv; | ||
92 | } | ||
93 | |||
94 | static inline struct gk20a_allocator *palloc_owner( | ||
95 | struct gk20a_page_allocator *a) | ||
96 | { | ||
97 | return a->owner; | ||
98 | } | ||
99 | |||
100 | #endif | ||