summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-07-21 21:26:20 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-08-30 13:04:10 -0400
commit448df6ed271a7b73a1a5e3dfbba826e745b82922 (patch)
tree284c4c4ede880629df1c342e2d5516c462d5ed56 /drivers/gpu
parent954258e121d2436097d5fc1abc6b7d73ddd784f6 (diff)
gpu: nvgpu: Implement a vidmem allocator
Implement an allocator suitable for managing the video memory on dGPUs. It works by allocating chunks from an underlying buddy allocator and collating the chunks together (similar to what an sgt does in the wider Linux kernel). This handles the ability to get large buffers in potentially fragmented memory. The GMMU can then obviously map the physical vidmem into contiguous GVA spaces. Jira DNVGPU-96 Change-Id: Ic1d7800b033a170b77790aa23fad6858443d0e89 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1197203 (cherry picked from commit fa44684a843956ae384fef6d7a79b9cbbd04f73e) Reviewed-on: http://git-master/r/1185231 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator.h32
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c532
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/page_allocator_priv.h100
5 files changed, 668 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index e3c115c6..7e703706 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -55,6 +55,7 @@ nvgpu-y := \
55 gk20a/gk20a_allocator.o \ 55 gk20a/gk20a_allocator.o \
56 gk20a/gk20a_allocator_bitmap.o \ 56 gk20a/gk20a_allocator_bitmap.o \
57 gk20a/gk20a_allocator_buddy.o \ 57 gk20a/gk20a_allocator_buddy.o \
58 gk20a/gk20a_allocator_page.o \
58 gk20a/cde_gk20a.o \ 59 gk20a/cde_gk20a.o \
59 gk20a/platform_gk20a_generic.o \ 60 gk20a/platform_gk20a_generic.o \
60 gk20a/tsg_gk20a.o \ 61 gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index f3b6dab3..9becf053 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -99,9 +99,32 @@ struct gk20a_allocator {
99 * allocations you need to keep track of the meta-data yourself (in this 99 * allocations you need to keep track of the meta-data yourself (in this
100 * case the base and length of the allocation as opposed to just the base 100 * case the base and length of the allocation as opposed to just the base
101 * of the allocation). 101 * of the allocation).
102 *
103 * GPU_ALLOC_4K_VIDMEM_PAGES
104 *
105 * We manage vidmem pages at a large page granularity for performance
106 * reasons; however, this can lead to wasting memory. For page allocators
107 * setting this flag will tell the allocator to manage pools of 4K pages
108 * inside internally allocated large pages.
109 *
110 * GPU_ALLOC_FORCE_CONTIG
111 *
112 * Force allocations to be contiguous. Currently only relevant for page
113 * allocators since all other allocators are naturally contiguous.
114 *
115 * GPU_ALLOC_NO_SCATTER_GATHER
116 *
117 * The page allocator normally returns a scatter gather data structure for
118 * allocations (to handle discontiguous pages). However, at times that can
119 * be annoying so this flag forces the page allocator to return a u64
120 * pointing to the allocation base (requires GPU_ALLOC_FORCE_CONTIG to be
121 * set as well).
102 */ 122 */
103#define GPU_ALLOC_GVA_SPACE 0x1 123#define GPU_ALLOC_GVA_SPACE 0x1
104#define GPU_ALLOC_NO_ALLOC_PAGE 0x2 124#define GPU_ALLOC_NO_ALLOC_PAGE 0x2
125#define GPU_ALLOC_4K_VIDMEM_PAGES 0x4
126#define GPU_ALLOC_FORCE_CONTIG 0x8
127#define GPU_ALLOC_NO_SCATTER_GATHER 0x10
105 128
106static inline void alloc_lock(struct gk20a_allocator *a) 129static inline void alloc_lock(struct gk20a_allocator *a)
107{ 130{
@@ -131,6 +154,13 @@ int gk20a_bitmap_allocator_init(struct gk20a_allocator *__a,
131 const char *name, u64 base, u64 length, 154 const char *name, u64 base, u64 length,
132 u64 blk_size, u64 flags); 155 u64 blk_size, u64 flags);
133 156
157/*
158 * Page allocator initializers.
159 */
160int gk20a_page_allocator_init(struct gk20a_allocator *__a,
161 const char *name, u64 base, u64 length,
162 u64 blk_size, u64 flags);
163
134#define GPU_BALLOC_MAX_ORDER 31 164#define GPU_BALLOC_MAX_ORDER 31
135 165
136/* 166/*
@@ -199,7 +229,7 @@ void gk20a_alloc_debugfs_init(struct platform_device *pdev);
199 } while (0) 229 } while (0)
200 230
201#define __alloc_dbg(a, fmt, arg...) \ 231#define __alloc_dbg(a, fmt, arg...) \
202 pr_info("%-25s %25s() " fmt, (a)->name, __func__, ##arg) 232 pr_warn("%-25s %25s() " fmt, (a)->name, __func__, ##arg)
203 233
204#if defined(ALLOCATOR_DEBUG) 234#if defined(ALLOCATOR_DEBUG)
205/* 235/*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
new file mode 100644
index 00000000..534027cc
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
@@ -0,0 +1,532 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <linux/kernel.h>
18#include <linux/slab.h>
19#include <linux/bitops.h>
20
21#include "gk20a_allocator.h"
22#include "buddy_allocator_priv.h"
23#include "page_allocator_priv.h"
24
25#define palloc_dbg(a, fmt, arg...) \
26 alloc_dbg(palloc_owner(a), fmt, ##arg)
27
28static struct kmem_cache *page_alloc_cache;
29static struct kmem_cache *page_alloc_chunk_cache;
30static DEFINE_MUTEX(meta_data_cache_lock);
31
32static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
33{
34 struct gk20a_page_allocator *va = a->priv;
35
36 return gk20a_alloc_length(&va->source_allocator);
37}
38
39static u64 gk20a_page_alloc_base(struct gk20a_allocator *a)
40{
41 struct gk20a_page_allocator *va = a->priv;
42
43 return gk20a_alloc_base(&va->source_allocator);
44}
45
46static int gk20a_page_alloc_inited(struct gk20a_allocator *a)
47{
48 struct gk20a_page_allocator *va = a->priv;
49
50 return gk20a_alloc_initialized(&va->source_allocator);
51}
52
53static u64 gk20a_page_alloc_end(struct gk20a_allocator *a)
54{
55 struct gk20a_page_allocator *va = a->priv;
56
57 return gk20a_alloc_end(&va->source_allocator);
58}
59
60static int __insert_page_alloc(struct gk20a_page_allocator *a,
61 struct gk20a_page_alloc *alloc)
62{
63 struct rb_node **new = &a->allocs.rb_node;
64 struct rb_node *parent = NULL;
65
66 while (*new) {
67 struct gk20a_page_alloc *tmp =
68 container_of(*new, struct gk20a_page_alloc,
69 tree_entry);
70
71 parent = *new;
72 if (alloc->base < tmp->base) {
73 new = &((*new)->rb_left);
74 } else if (alloc->base > tmp->base) {
75 new = &((*new)->rb_right);
76 } else {
77 WARN(1, "Duplicate entries in allocated list!\n");
78 return 0;
79 }
80 }
81
82 rb_link_node(&alloc->tree_entry, parent, new);
83 rb_insert_color(&alloc->tree_entry, &a->allocs);
84
85 return 0;
86}
87
88static struct gk20a_page_alloc *__find_page_alloc(
89 struct gk20a_page_allocator *a,
90 u64 addr)
91{
92 struct rb_node *node = a->allocs.rb_node;
93 struct gk20a_page_alloc *alloc;
94
95 while (node) {
96 alloc = container_of(node, struct gk20a_page_alloc, tree_entry);
97
98 if (addr < alloc->base)
99 node = node->rb_left;
100 else if (addr > alloc->base)
101 node = node->rb_right;
102 else
103 break;
104 }
105
106 if (!node)
107 return NULL;
108
109 rb_erase(node, &a->allocs);
110
111 return alloc;
112}
113
114/*
115 * Allocate physical pages. Since the underlying allocator is a buddy allocator
116 * the returned pages are always contiguous. However, since there could be
117 * fragmentation in the space this allocator will collate smaller non-contiguous
118 * allocations together if necessary.
119 */
120static struct gk20a_page_alloc *__gk20a_alloc_pages(
121 struct gk20a_page_allocator *a, u64 pages)
122{
123 struct gk20a_page_alloc *alloc;
124 struct page_alloc_chunk *c;
125 u64 max_chunk_len = pages << a->page_shift;
126 int i = 0;
127
128 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
129 if (!alloc)
130 goto fail;
131
132 INIT_LIST_HEAD(&alloc->alloc_chunks);
133 alloc->length = pages << a->page_shift;
134
135 while (pages) {
136 u64 chunk_addr = 0;
137 u64 chunk_pages = 1 << __fls(pages);
138 u64 chunk_len = chunk_pages << a->page_shift;
139
140 /*
141 * Take care of the possibility that the allocation must be
142 * contiguous. If this is not the first iteration then that
143 * means the first iteration failed to alloc the entire
144 * requested size. The buddy allocator guarantees any given
145 * single alloc is contiguous.
146 */
147 if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
148 goto fail_cleanup;
149
150 if (chunk_len > max_chunk_len)
151 chunk_len = max_chunk_len;
152
153 /*
154 * Keep attempting to allocate in smaller chunks until the alloc
155 * either succeeds or is smaller than the page_size of the
156 * allocator (i.e the allocator is OOM).
157 */
158 do {
159 chunk_addr = gk20a_alloc(&a->source_allocator,
160 chunk_len);
161
162 /* Divide by 2 and try again */
163 if (!chunk_addr) {
164 palloc_dbg(a, "balloc failed: 0x%llx\n",
165 chunk_len);
166 chunk_len >>= 1;
167 max_chunk_len = chunk_len;
168 }
169 } while (!chunk_addr && chunk_len >= a->page_size);
170
171 if (!chunk_addr) {
172 palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
173 goto fail_cleanup;
174 }
175
176 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
177 if (!c) {
178 gk20a_free(&a->source_allocator, chunk_addr);
179 goto fail_cleanup;
180 }
181
182 pages -= chunk_pages;
183
184 c->base = chunk_addr;
185 c->length = chunk_len;
186 list_add(&c->list_entry, &alloc->alloc_chunks);
187
188 i++;
189 }
190
191 alloc->nr_chunks = i;
192 c = list_first_entry(&alloc->alloc_chunks,
193 struct page_alloc_chunk, list_entry);
194 alloc->base = c->base;
195
196 return alloc;
197
198fail_cleanup:
199 while (!list_empty(&alloc->alloc_chunks)) {
200 c = list_first_entry(&alloc->alloc_chunks,
201 struct page_alloc_chunk, list_entry);
202 list_del(&c->list_entry);
203 kfree(c);
204 }
205 kfree(alloc);
206fail:
207 return ERR_PTR(-ENOMEM);
208}
209
210/*
211 * Allocate enough pages to satisfy @len. Page size is determined at
212 * initialization of the allocator.
213 *
214 * The return is actually a pointer to a struct gk20a_page_alloc pointer. This
215 * is because it doesn't make a lot of sense to return the address of the first
216 * page in the list of pages (since they could be discontiguous). This has
217 * precedent in the dma_alloc APIs, though, it's really just an annoying
218 * artifact of the fact that the gk20a_alloc() API requires a u64 return type.
219 */
220static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
221{
222 struct gk20a_page_allocator *a = page_allocator(__a);
223 struct gk20a_page_alloc *alloc = NULL;
224 struct page_alloc_chunk *c;
225 u64 real_len;
226 u64 pages;
227 int i = 0;
228
229 /*
230 * If we want contig pages we have to round up to a power of two. It's
231 * easier to do that here than in the buddy allocator.
232 */
233 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
234 roundup_pow_of_two(len) : len;
235
236 pages = ALIGN(real_len, a->page_size) >> a->page_shift;
237
238 alloc_lock(__a);
239
240 alloc = __gk20a_alloc_pages(a, pages);
241 if (IS_ERR(alloc)) {
242 alloc_unlock(__a);
243 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
244 pages << a->page_shift, pages);
245 return 0;
246 }
247
248 __insert_page_alloc(a, alloc);
249 alloc_unlock(__a);
250
251 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
252 pages << a->page_shift, pages, alloc->base);
253 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
254 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
255 i++, c->base, c->length);
256 }
257
258 a->nr_allocs++;
259 a->pages_alloced += pages;
260
261 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
262 return alloc->base;
263 else
264 return (u64) (uintptr_t) alloc;
265}
266
267static void __gk20a_free_pages(struct gk20a_page_allocator *a,
268 struct gk20a_page_alloc *alloc)
269{
270 struct page_alloc_chunk *chunk;
271
272 while (!list_empty(&alloc->alloc_chunks)) {
273 chunk = list_first_entry(&alloc->alloc_chunks,
274 struct page_alloc_chunk,
275 list_entry);
276 list_del(&chunk->list_entry);
277
278 gk20a_free(&a->source_allocator, chunk->base);
279 kfree(chunk);
280 }
281
282 kfree(alloc);
283}
284
285/*
286 * Note: this will remove the gk20a_page_alloc struct from the RB tree
287 * if it's found.
288 */
289static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
290{
291 struct gk20a_page_allocator *a = page_allocator(__a);
292 struct gk20a_page_alloc *alloc;
293
294 alloc_lock(__a);
295
296 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
297 alloc = __find_page_alloc(a, base);
298 else
299 alloc = __find_page_alloc(a,
300 ((struct gk20a_page_alloc *)(uintptr_t)base)->base);
301
302 if (!alloc) {
303 palloc_dbg(a, "Hrm, found no alloc?\n");
304 goto done;
305 }
306
307 a->nr_frees++;
308 a->pages_freed += (alloc->length >> a->page_shift);
309
310 /*
311 * Frees *alloc.
312 */
313 __gk20a_free_pages(a, alloc);
314
315 palloc_dbg(a, "Free 0x%010llx id=0x%010llx\n",
316 alloc->length, alloc->base);
317
318done:
319 alloc_unlock(__a);
320}
321
322static struct gk20a_page_alloc *__gk20a_alloc_pages_fixed(
323 struct gk20a_page_allocator *a, u64 base, u64 length)
324{
325 struct gk20a_page_alloc *alloc;
326 struct page_alloc_chunk *c;
327
328 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
329 c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
330 if (!alloc || !c)
331 goto fail;
332
333 alloc->base = gk20a_alloc_fixed(&a->source_allocator, base, length);
334 if (!alloc->base) {
335 WARN(1, "gk20a: failed to fixed alloc pages @ 0x%010llx", base);
336 goto fail;
337 }
338
339 alloc->nr_chunks = 1;
340 alloc->length = length;
341 INIT_LIST_HEAD(&alloc->alloc_chunks);
342
343 c->base = alloc->base;
344 c->length = length;
345 list_add(&c->list_entry, &alloc->alloc_chunks);
346
347 return alloc;
348
349fail:
350 kfree(c);
351 kfree(alloc);
352 return ERR_PTR(-ENOMEM);
353}
354
355static u64 gk20a_page_alloc_fixed(struct gk20a_allocator *__a,
356 u64 base, u64 len)
357{
358 struct gk20a_page_allocator *a = page_allocator(__a);
359 struct gk20a_page_alloc *alloc = NULL;
360 struct page_alloc_chunk *c;
361 u64 aligned_len, pages;
362 int i = 0;
363
364 aligned_len = ALIGN(len, a->page_size);
365 pages = aligned_len >> a->page_shift;
366
367 alloc_lock(__a);
368
369 alloc = __gk20a_alloc_pages_fixed(a, base, aligned_len);
370 if (IS_ERR(alloc)) {
371 alloc_unlock(__a);
372 return 0;
373 }
374
375 __insert_page_alloc(a, alloc);
376 alloc_unlock(__a);
377
378 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
379 alloc->base, aligned_len, pages);
380 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
381 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
382 i++, c->base, c->length);
383 }
384
385 a->nr_fixed_allocs++;
386 a->pages_alloced += pages;
387
388 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
389 return alloc->base;
390 else
391 return (u64) (uintptr_t) alloc;
392}
393
394static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
395 u64 base, u64 len)
396{
397 struct gk20a_page_allocator *a = page_allocator(__a);
398 struct gk20a_page_alloc *alloc;
399
400 alloc_lock(__a);
401
402 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
403 alloc = __find_page_alloc(a, base);
404 if (!alloc)
405 goto done;
406 } else {
407 alloc = (struct gk20a_page_alloc *) (uintptr_t) base;
408 }
409
410 /*
411 * This works for the time being since the buddy allocator
412 * uses the same free function for both fixed and regular
413 * allocs. This would have to be updated if the underlying
414 * allocator were to change.
415 */
416 __gk20a_free_pages(a, alloc);
417
418 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
419 alloc->base, alloc->length);
420 a->nr_fixed_frees++;
421 a->pages_freed += (alloc->length >> a->page_shift);
422
423done:
424 alloc_unlock(__a);
425}
426
427static void gk20a_page_allocator_destroy(struct gk20a_allocator *__a)
428{
429 struct gk20a_page_allocator *a = page_allocator(__a);
430
431 alloc_lock(__a);
432 kfree(a);
433 __a->priv = NULL;
434 alloc_unlock(__a);
435}
436
437static void gk20a_page_print_stats(struct gk20a_allocator *__a,
438 struct seq_file *s, int lock)
439{
440 struct gk20a_page_allocator *a = page_allocator(__a);
441
442 if (lock)
443 alloc_lock(__a);
444
445 __alloc_pstat(s, __a, "Page allocator:\n");
446 __alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs);
447 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
448 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
449 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
450 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
451 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
452 __alloc_pstat(s, __a, "\n");
453 __alloc_pstat(s, __a, "Source alloc: %s\n",
454 a->source_allocator.name);
455
456 gk20a_alloc_print_stats(&a->source_allocator, s, lock);
457
458 if (lock)
459 alloc_unlock(__a);
460}
461
462static const struct gk20a_allocator_ops page_ops = {
463 .alloc = gk20a_page_alloc,
464 .free = gk20a_page_free,
465
466 .alloc_fixed = gk20a_page_alloc_fixed,
467 .free_fixed = gk20a_page_free_fixed,
468
469 .base = gk20a_page_alloc_base,
470 .length = gk20a_page_alloc_length,
471 .end = gk20a_page_alloc_end,
472 .inited = gk20a_page_alloc_inited,
473
474 .fini = gk20a_page_allocator_destroy,
475
476 .print_stats = gk20a_page_print_stats,
477};
478
479int gk20a_page_allocator_init(struct gk20a_allocator *__a,
480 const char *name, u64 base, u64 length,
481 u64 blk_size, u64 flags)
482{
483 struct gk20a_page_allocator *a;
484 char buddy_name[sizeof(__a->name)];
485 int err;
486
487 mutex_lock(&meta_data_cache_lock);
488 if (!page_alloc_cache)
489 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
490 if (!page_alloc_chunk_cache)
491 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
492 mutex_unlock(&meta_data_cache_lock);
493
494 if (!page_alloc_cache || !page_alloc_chunk_cache)
495 return -ENOMEM;
496
497 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
498 if (!a)
499 return -ENOMEM;
500
501 err = __gk20a_alloc_common_init(__a, name, a, false, &page_ops);
502 if (err)
503 goto fail;
504
505 a->base = base;
506 a->length = length;
507 a->page_size = blk_size;
508 a->page_shift = __ffs(blk_size);
509 a->allocs = RB_ROOT;
510 a->owner = __a;
511 a->flags = flags;
512
513 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
514
515 err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base,
516 length, blk_size, 0);
517 if (err)
518 goto fail;
519
520 gk20a_init_alloc_debug(__a);
521 palloc_dbg(a, "New allocator: type page\n");
522 palloc_dbg(a, " base 0x%llx\n", a->base);
523 palloc_dbg(a, " size 0x%llx\n", a->length);
524 palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
525 palloc_dbg(a, " flags 0x%llx\n", a->flags);
526
527 return 0;
528
529fail:
530 kfree(a);
531 return err;
532}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index b63444d0..cab10902 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -764,8 +764,10 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
764 if (!size) 764 if (!size)
765 return 0; 765 return 0;
766 766
767 err = gk20a_buddy_allocator_init(&g->mm.vidmem.allocator, "vidmem", 767 err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem",
768 SZ_4K, size - SZ_4K, SZ_4K, 0); 768 SZ_4K, size - SZ_4K, SZ_4K,
769 GPU_ALLOC_FORCE_CONTIG |
770 GPU_ALLOC_NO_SCATTER_GATHER);
769 if (err) { 771 if (err) {
770 gk20a_err(d, "Failed to register vidmem for size %zu: %d", 772 gk20a_err(d, "Failed to register vidmem for size %zu: %d",
771 size, err); 773 size, err);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
new file mode 100644
index 00000000..bce5b75e
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef PAGE_ALLOCATOR_PRIV_H
18#define PAGE_ALLOCATOR_PRIV_H
19
20#include <linux/list.h>
21#include <linux/rbtree.h>
22
23#include "gk20a_allocator.h"
24
25struct gk20a_allocator;
26
27struct page_alloc_chunk {
28 struct list_head list_entry;
29
30 u64 base;
31 u64 length;
32};
33
34/*
35 * Struct to handle internal management of page allocation. It holds a list
36 * of the chunks of page that make up the overall allocation - much like a
37 * scatter gather table.
38 */
39struct gk20a_page_alloc {
40 struct list_head alloc_chunks;
41
42 int nr_chunks;
43 u64 length;
44
45 /*
46 * Only useful for the RB tree - since the alloc will have discontiguous
47 * pages the base is essentially irrelevant except for the fact that it
48 * is guarenteed to be unique.
49 */
50 u64 base;
51
52 struct rb_node tree_entry;
53};
54
55struct gk20a_page_allocator {
56 struct gk20a_allocator *owner; /* Owner of this allocator. */
57
58 /*
59 * Use a buddy allocator to manage the allocation of the underlying
60 * pages. This lets us abstract the discontiguous allocation handling
61 * out of the annoyingly complicated buddy allocator.
62 */
63 struct gk20a_allocator source_allocator;
64
65 /*
66 * Page params.
67 */
68 u64 base;
69 u64 length;
70 u64 page_size;
71 u32 page_shift;
72
73 struct rb_root allocs; /* Outstanding allocations. */
74
75 u64 flags;
76
77 /*
78 * Stat tracking.
79 */
80 u64 nr_allocs;
81 u64 nr_frees;
82 u64 nr_fixed_allocs;
83 u64 nr_fixed_frees;
84 u64 pages_alloced;
85 u64 pages_freed;
86};
87
88static inline struct gk20a_page_allocator *page_allocator(
89 struct gk20a_allocator *a)
90{
91 return (struct gk20a_page_allocator *)(a)->priv;
92}
93
94static inline struct gk20a_allocator *palloc_owner(
95 struct gk20a_page_allocator *a)
96{
97 return a->owner;
98}
99
100#endif