summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c457
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/page_allocator_priv.h69
3 files changed, 486 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
index 2e5d46b9..b8e38620 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20#include <linux/mm.h>
20 21
21#include "gk20a_allocator.h" 22#include "gk20a_allocator.h"
22#include "buddy_allocator_priv.h" 23#include "buddy_allocator_priv.h"
@@ -27,8 +28,59 @@
27 28
28static struct kmem_cache *page_alloc_cache; 29static struct kmem_cache *page_alloc_cache;
29static struct kmem_cache *page_alloc_chunk_cache; 30static struct kmem_cache *page_alloc_chunk_cache;
31static struct kmem_cache *page_alloc_slab_page_cache;
30static DEFINE_MUTEX(meta_data_cache_lock); 32static DEFINE_MUTEX(meta_data_cache_lock);
31 33
34/*
35 * Handle the book-keeping for these operations.
36 */
37static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
38 struct page_alloc_slab_page *page)
39{
40 BUG_ON(page->state != SP_NONE);
41 list_add(&page->list_entry, &slab->empty);
42 slab->nr_empty++;
43 page->state = SP_EMPTY;
44}
45static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
46 struct page_alloc_slab_page *page)
47{
48 BUG_ON(page->state != SP_NONE);
49 list_add(&page->list_entry, &slab->partial);
50 slab->nr_partial++;
51 page->state = SP_PARTIAL;
52}
53static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
54 struct page_alloc_slab_page *page)
55{
56 BUG_ON(page->state != SP_NONE);
57 list_add(&page->list_entry, &slab->full);
58 slab->nr_full++;
59 page->state = SP_FULL;
60}
61
62static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
63 struct page_alloc_slab_page *page)
64{
65 list_del_init(&page->list_entry);
66 slab->nr_empty--;
67 page->state = SP_NONE;
68}
69static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
70 struct page_alloc_slab_page *page)
71{
72 list_del_init(&page->list_entry);
73 slab->nr_partial--;
74 page->state = SP_NONE;
75}
76static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
77 struct page_alloc_slab_page *page)
78{
79 list_del_init(&page->list_entry);
80 slab->nr_full--;
81 page->state = SP_NONE;
82}
83
32static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) 84static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
33{ 85{
34 struct gk20a_page_allocator *va = a->priv; 86 struct gk20a_page_allocator *va = a->priv;
@@ -80,6 +132,26 @@ static void gk20a_page_release_co(struct gk20a_allocator *a,
80 gk20a_alloc_release_carveout(&va->source_allocator, co); 132 gk20a_alloc_release_carveout(&va->source_allocator, co);
81} 133}
82 134
135static void __gk20a_free_pages(struct gk20a_page_allocator *a,
136 struct gk20a_page_alloc *alloc,
137 bool free_buddy_alloc)
138{
139 struct page_alloc_chunk *chunk;
140
141 while (!list_empty(&alloc->alloc_chunks)) {
142 chunk = list_first_entry(&alloc->alloc_chunks,
143 struct page_alloc_chunk,
144 list_entry);
145 list_del(&chunk->list_entry);
146
147 if (free_buddy_alloc)
148 gk20a_free(&a->source_allocator, chunk->base);
149 kfree(chunk);
150 }
151
152 kfree(alloc);
153}
154
83static int __insert_page_alloc(struct gk20a_page_allocator *a, 155static int __insert_page_alloc(struct gk20a_page_allocator *a,
84 struct gk20a_page_alloc *alloc) 156 struct gk20a_page_alloc *alloc)
85{ 157{
@@ -134,13 +206,236 @@ static struct gk20a_page_alloc *__find_page_alloc(
134 return alloc; 206 return alloc;
135} 207}
136 208
209static struct page_alloc_slab_page *alloc_slab_page(
210 struct gk20a_page_allocator *a,
211 struct page_alloc_slab *slab)
212{
213 struct page_alloc_slab_page *slab_page;
214
215 slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
216 if (!slab_page) {
217 palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
218 return ERR_PTR(-ENOMEM);
219 }
220
221 memset(slab_page, 0, sizeof(*slab_page));
222
223 slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size);
224 if (!slab_page->page_addr) {
225 kfree(slab_page);
226 palloc_dbg(a, "OOM: vidmem is full!\n");
227 return ERR_PTR(-ENOMEM);
228 }
229
230 INIT_LIST_HEAD(&slab_page->list_entry);
231 slab_page->slab_size = slab->slab_size;
232 slab_page->nr_objects = a->page_size / slab->slab_size;
233 slab_page->nr_objects_alloced = 0;
234 slab_page->owner = slab;
235 slab_page->state = SP_NONE;
236
237 a->pages_alloced++;
238
239 palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
240 slab_page->page_addr, slab_page->slab_size);
241
242 return slab_page;
243}
244
245static void free_slab_page(struct gk20a_page_allocator *a,
246 struct page_alloc_slab_page *slab_page)
247{
248 palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
249
250 BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
251 slab_page->nr_objects_alloced != 0 ||
252 slab_page->bitmap != 0);
253
254 gk20a_free(&a->source_allocator, slab_page->page_addr);
255 a->pages_freed++;
256
257 kmem_cache_free(page_alloc_slab_page_cache, slab_page);
258}
259
260/*
261 * This expects @alloc to have 1 empty page_alloc_chunk already added to the
262 * alloc_chunks list.
263 */
264static int __do_slab_alloc(struct gk20a_page_allocator *a,
265 struct page_alloc_slab *slab,
266 struct gk20a_page_alloc *alloc)
267{
268 struct page_alloc_slab_page *slab_page = NULL;
269 struct page_alloc_chunk *chunk;
270 unsigned long offs;
271
272 /*
273 * Check the partial and empty lists to see if we have some space
274 * readily available. Take the slab_page out of what ever list it
275 * was in since it may be put back into a different list later.
276 */
277 if (!list_empty(&slab->partial)) {
278 slab_page = list_first_entry(&slab->partial,
279 struct page_alloc_slab_page,
280 list_entry);
281 del_slab_page_from_partial(slab, slab_page);
282 } else if (!list_empty(&slab->empty)) {
283 slab_page = list_first_entry(&slab->empty,
284 struct page_alloc_slab_page,
285 list_entry);
286 del_slab_page_from_empty(slab, slab_page);
287 }
288
289 if (!slab_page) {
290 slab_page = alloc_slab_page(a, slab);
291 if (IS_ERR(slab_page))
292 return PTR_ERR(slab_page);
293 }
294
295 /*
296 * We now have a slab_page. Do the alloc.
297 */
298 offs = bitmap_find_next_zero_area(&slab_page->bitmap,
299 slab_page->nr_objects,
300 0, 1, 0);
301 if (offs >= slab_page->nr_objects) {
302 WARN(1, "Empty/partial slab with no free objects?");
303
304 /* Add the buggy page to the full list... This isn't ideal. */
305 add_slab_page_to_full(slab, slab_page);
306 return -ENOMEM;
307 }
308
309 bitmap_set(&slab_page->bitmap, offs, 1);
310 slab_page->nr_objects_alloced++;
311
312 if (slab_page->nr_objects_alloced < slab_page->nr_objects)
313 add_slab_page_to_partial(slab, slab_page);
314 else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
315 add_slab_page_to_full(slab, slab_page);
316 else
317 BUG(); /* Should be impossible to hit this. */
318
319 /*
320 * Handle building the gk20a_page_alloc struct. We expect one
321 * page_alloc_chunk to be present.
322 */
323 alloc->slab_page = slab_page;
324 alloc->nr_chunks = 1;
325 alloc->length = slab_page->slab_size;
326 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
327
328 chunk = list_first_entry(&alloc->alloc_chunks,
329 struct page_alloc_chunk, list_entry);
330 chunk->base = alloc->base;
331 chunk->length = alloc->length;
332
333 return 0;
334}
335
336/*
337 * Allocate from a slab instead of directly from the page allocator.
338 */
339static struct gk20a_page_alloc *__gk20a_alloc_slab(
340 struct gk20a_page_allocator *a, u64 len)
341{
342 int err, slab_nr;
343 struct page_alloc_slab *slab;
344 struct gk20a_page_alloc *alloc = NULL;
345 struct page_alloc_chunk *chunk = NULL;
346
347 /*
348 * Align the length to a page and then divide by the page size (4k for
349 * this code). ilog2() of that then gets us the correct slab to use.
350 */
351 slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
352 slab = &a->slabs[slab_nr];
353
354 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
355 if (!alloc) {
356 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
357 goto fail;
358 }
359 chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
360 if (!chunk) {
361 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
362 goto fail;
363 }
364
365 INIT_LIST_HEAD(&alloc->alloc_chunks);
366 list_add(&chunk->list_entry, &alloc->alloc_chunks);
367
368 err = __do_slab_alloc(a, slab, alloc);
369 if (err)
370 goto fail;
371
372 palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
373 len, slab_nr, alloc->base);
374 a->nr_slab_allocs++;
375
376 return alloc;
377
378fail:
379 kfree(alloc);
380 kfree(chunk);
381 return ERR_PTR(-ENOMEM);
382}
383
384static void __gk20a_free_slab(struct gk20a_page_allocator *a,
385 struct gk20a_page_alloc *alloc)
386{
387 struct page_alloc_slab_page *slab_page = alloc->slab_page;
388 struct page_alloc_slab *slab = slab_page->owner;
389 enum slab_page_state new_state;
390 int offs;
391
392 offs = (alloc->base - slab_page->page_addr) / slab_page->slab_size;
393 bitmap_clear(&slab_page->bitmap, offs, 1);
394
395 slab_page->nr_objects_alloced--;
396
397 if (slab_page->nr_objects_alloced == 0)
398 new_state = SP_EMPTY;
399 else
400 new_state = SP_PARTIAL;
401
402 /*
403 * Need to migrate the page to a different list.
404 */
405 if (new_state != slab_page->state) {
406 /* Delete - can't be in empty. */
407 if (slab_page->state == SP_PARTIAL)
408 del_slab_page_from_partial(slab, slab_page);
409 else
410 del_slab_page_from_full(slab, slab_page);
411
412 /* And add. */
413 if (new_state == SP_EMPTY) {
414 if (list_empty(&slab->empty))
415 add_slab_page_to_empty(slab, slab_page);
416 else
417 free_slab_page(a, slab_page);
418 } else {
419 add_slab_page_to_partial(slab, slab_page);
420 }
421 }
422
423 /*
424 * Now handle the page_alloc.
425 */
426 __gk20a_free_pages(a, alloc, false);
427 a->nr_slab_frees++;
428
429 return;
430}
431
137/* 432/*
138 * Allocate physical pages. Since the underlying allocator is a buddy allocator 433 * Allocate physical pages. Since the underlying allocator is a buddy allocator
139 * the returned pages are always contiguous. However, since there could be 434 * the returned pages are always contiguous. However, since there could be
140 * fragmentation in the space this allocator will collate smaller non-contiguous 435 * fragmentation in the space this allocator will collate smaller non-contiguous
141 * allocations together if necessary. 436 * allocations together if necessary.
142 */ 437 */
143static struct gk20a_page_alloc *__gk20a_alloc_pages( 438static struct gk20a_page_alloc *__do_gk20a_alloc_pages(
144 struct gk20a_page_allocator *a, u64 pages) 439 struct gk20a_page_allocator *a, u64 pages)
145{ 440{
146 struct gk20a_page_alloc *alloc; 441 struct gk20a_page_alloc *alloc;
@@ -152,6 +447,8 @@ static struct gk20a_page_alloc *__gk20a_alloc_pages(
152 if (!alloc) 447 if (!alloc)
153 goto fail; 448 goto fail;
154 449
450 memset(alloc, 0, sizeof(*alloc));
451
155 INIT_LIST_HEAD(&alloc->alloc_chunks); 452 INIT_LIST_HEAD(&alloc->alloc_chunks);
156 alloc->length = pages << a->page_shift; 453 alloc->length = pages << a->page_shift;
157 454
@@ -233,6 +530,33 @@ fail:
233 return ERR_PTR(-ENOMEM); 530 return ERR_PTR(-ENOMEM);
234} 531}
235 532
533static struct gk20a_page_alloc *__gk20a_alloc_pages(
534 struct gk20a_page_allocator *a, u64 len)
535{
536 struct gk20a_page_alloc *alloc = NULL;
537 struct page_alloc_chunk *c;
538 u64 pages;
539 int i = 0;
540
541 pages = ALIGN(len, a->page_size) >> a->page_shift;
542
543 alloc = __do_gk20a_alloc_pages(a, pages);
544 if (IS_ERR(alloc)) {
545 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
546 pages << a->page_shift, pages);
547 return NULL;
548 }
549
550 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
551 pages << a->page_shift, pages, alloc->base);
552 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
553 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
554 i++, c->base, c->length);
555 }
556
557 return alloc;
558}
559
236/* 560/*
237 * Allocate enough pages to satisfy @len. Page size is determined at 561 * Allocate enough pages to satisfy @len. Page size is determined at
238 * initialization of the allocator. 562 * initialization of the allocator.
@@ -247,10 +571,7 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
247{ 571{
248 struct gk20a_page_allocator *a = page_allocator(__a); 572 struct gk20a_page_allocator *a = page_allocator(__a);
249 struct gk20a_page_alloc *alloc = NULL; 573 struct gk20a_page_alloc *alloc = NULL;
250 struct page_alloc_chunk *c;
251 u64 real_len; 574 u64 real_len;
252 u64 pages;
253 int i = 0;
254 575
255 /* 576 /*
256 * If we want contig pages we have to round up to a power of two. It's 577 * If we want contig pages we have to round up to a power of two. It's
@@ -259,30 +580,24 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
259 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? 580 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
260 roundup_pow_of_two(len) : len; 581 roundup_pow_of_two(len) : len;
261 582
262 pages = ALIGN(real_len, a->page_size) >> a->page_shift;
263
264 alloc_lock(__a); 583 alloc_lock(__a);
584 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
585 real_len <= (a->page_size / 2))
586 alloc = __gk20a_alloc_slab(a, real_len);
587 else
588 alloc = __gk20a_alloc_pages(a, real_len);
265 589
266 alloc = __gk20a_alloc_pages(a, pages); 590 if (!alloc) {
267 if (IS_ERR(alloc)) {
268 alloc_unlock(__a); 591 alloc_unlock(__a);
269 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
270 pages << a->page_shift, pages);
271 return 0; 592 return 0;
272 } 593 }
273 594
274 __insert_page_alloc(a, alloc); 595 __insert_page_alloc(a, alloc);
275 alloc_unlock(__a);
276
277 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
278 pages << a->page_shift, pages, alloc->base);
279 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
280 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
281 i++, c->base, c->length);
282 }
283 596
284 a->nr_allocs++; 597 a->nr_allocs++;
285 a->pages_alloced += pages; 598 if (real_len > a->page_size / 2)
599 a->pages_alloced += alloc->length >> a->page_shift;
600 alloc_unlock(__a);
286 601
287 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) 602 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
288 return alloc->base; 603 return alloc->base;
@@ -290,24 +605,6 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
290 return (u64) (uintptr_t) alloc; 605 return (u64) (uintptr_t) alloc;
291} 606}
292 607
293static void __gk20a_free_pages(struct gk20a_page_allocator *a,
294 struct gk20a_page_alloc *alloc)
295{
296 struct page_alloc_chunk *chunk;
297
298 while (!list_empty(&alloc->alloc_chunks)) {
299 chunk = list_first_entry(&alloc->alloc_chunks,
300 struct page_alloc_chunk,
301 list_entry);
302 list_del(&chunk->list_entry);
303
304 gk20a_free(&a->source_allocator, chunk->base);
305 kfree(chunk);
306 }
307
308 kfree(alloc);
309}
310
311/* 608/*
312 * Note: this will remove the gk20a_page_alloc struct from the RB tree 609 * Note: this will remove the gk20a_page_alloc struct from the RB tree
313 * if it's found. 610 * if it's found.
@@ -331,14 +628,18 @@ static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
331 } 628 }
332 629
333 a->nr_frees++; 630 a->nr_frees++;
334 a->pages_freed += (alloc->length >> a->page_shift);
335 631
336 /* 632 /*
337 * Frees *alloc. 633 * Frees *alloc.
338 */ 634 */
339 __gk20a_free_pages(a, alloc); 635 if (alloc->slab_page) {
636 __gk20a_free_slab(a, alloc);
637 } else {
638 a->pages_freed += (alloc->length >> a->page_shift);
639 __gk20a_free_pages(a, alloc, true);
640 }
340 641
341 palloc_dbg(a, "Free 0x%010llx id=0x%010llx\n", 642 palloc_dbg(a, "Free 0x%llx id=0x%010llx\n",
342 alloc->length, alloc->base); 643 alloc->length, alloc->base);
343 644
344done: 645done:
@@ -439,7 +740,7 @@ static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
439 * allocs. This would have to be updated if the underlying 740 * allocs. This would have to be updated if the underlying
440 * allocator were to change. 741 * allocator were to change.
441 */ 742 */
442 __gk20a_free_pages(a, alloc); 743 __gk20a_free_pages(a, alloc, true);
443 744
444 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", 745 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
445 alloc->base, alloc->length); 746 alloc->base, alloc->length);
@@ -464,6 +765,7 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
464 struct seq_file *s, int lock) 765 struct seq_file *s, int lock)
465{ 766{
466 struct gk20a_page_allocator *a = page_allocator(__a); 767 struct gk20a_page_allocator *a = page_allocator(__a);
768 int i;
467 769
468 if (lock) 770 if (lock)
469 alloc_lock(__a); 771 alloc_lock(__a);
@@ -473,12 +775,33 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
473 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); 775 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
474 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); 776 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
475 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); 777 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
778 __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs);
779 __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees);
476 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); 780 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
477 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); 781 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
478 __alloc_pstat(s, __a, "\n"); 782 __alloc_pstat(s, __a, "\n");
783
784 /*
785 * Slab info.
786 */
787 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
788 __alloc_pstat(s, __a, "Slabs:\n");
789 __alloc_pstat(s, __a, " size empty partial full\n");
790 __alloc_pstat(s, __a, " ---- ----- ------- ----\n");
791
792 for (i = 0; i < a->nr_slabs; i++) {
793 struct page_alloc_slab *slab = &a->slabs[i];
794
795 __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n",
796 slab->slab_size,
797 slab->nr_empty, slab->nr_partial,
798 slab->nr_full);
799 }
800 __alloc_pstat(s, __a, "\n");
801 }
802
479 __alloc_pstat(s, __a, "Source alloc: %s\n", 803 __alloc_pstat(s, __a, "Source alloc: %s\n",
480 a->source_allocator.name); 804 a->source_allocator.name);
481
482 gk20a_alloc_print_stats(&a->source_allocator, s, lock); 805 gk20a_alloc_print_stats(&a->source_allocator, s, lock);
483 806
484 if (lock) 807 if (lock)
@@ -506,6 +829,43 @@ static const struct gk20a_allocator_ops page_ops = {
506 .print_stats = gk20a_page_print_stats, 829 .print_stats = gk20a_page_print_stats,
507}; 830};
508 831
832/*
833 * nr_slabs is computed as follows: divide page_size by 4096 to get number of
834 * 4k pages in page_size. Then take the base 2 log of that to get number of
835 * slabs. For 64k page_size that works on like:
836 *
837 * 1024*64 / 1024*4 = 16
838 * ilog2(16) = 4
839 *
840 * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
841 */
842static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a)
843{
844 size_t nr_slabs = ilog2(a->page_size >> 12);
845 int i;
846
847 a->slabs = kcalloc(nr_slabs,
848 sizeof(struct page_alloc_slab),
849 GFP_KERNEL);
850 if (!a->slabs)
851 return -ENOMEM;
852 a->nr_slabs = nr_slabs;
853
854 for (i = 0; i < nr_slabs; i++) {
855 struct page_alloc_slab *slab = &a->slabs[i];
856
857 slab->slab_size = SZ_4K * (1 << i);
858 INIT_LIST_HEAD(&slab->empty);
859 INIT_LIST_HEAD(&slab->partial);
860 INIT_LIST_HEAD(&slab->full);
861 slab->nr_empty = 0;
862 slab->nr_partial = 0;
863 slab->nr_full = 0;
864 }
865
866 return 0;
867}
868
509int gk20a_page_allocator_init(struct gk20a_allocator *__a, 869int gk20a_page_allocator_init(struct gk20a_allocator *__a,
510 const char *name, u64 base, u64 length, 870 const char *name, u64 base, u64 length,
511 u64 blk_size, u64 flags) 871 u64 blk_size, u64 flags)
@@ -519,11 +879,17 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
519 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); 879 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
520 if (!page_alloc_chunk_cache) 880 if (!page_alloc_chunk_cache)
521 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); 881 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
882 if (!page_alloc_slab_page_cache)
883 page_alloc_slab_page_cache =
884 KMEM_CACHE(page_alloc_slab_page, 0);
522 mutex_unlock(&meta_data_cache_lock); 885 mutex_unlock(&meta_data_cache_lock);
523 886
524 if (!page_alloc_cache || !page_alloc_chunk_cache) 887 if (!page_alloc_cache || !page_alloc_chunk_cache)
525 return -ENOMEM; 888 return -ENOMEM;
526 889
890 if (blk_size < SZ_4K)
891 return -EINVAL;
892
527 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); 893 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
528 if (!a) 894 if (!a)
529 return -ENOMEM; 895 return -ENOMEM;
@@ -540,6 +906,12 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
540 a->owner = __a; 906 a->owner = __a;
541 a->flags = flags; 907 a->flags = flags;
542 908
909 if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
910 err = gk20a_page_alloc_init_slabs(a);
911 if (err)
912 goto fail;
913 }
914
543 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); 915 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
544 916
545 err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base, 917 err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base,
@@ -553,6 +925,7 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
553 palloc_dbg(a, " size 0x%llx\n", a->length); 925 palloc_dbg(a, " size 0x%llx\n", a->length);
554 palloc_dbg(a, " page_size 0x%llx\n", a->page_size); 926 palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
555 palloc_dbg(a, " flags 0x%llx\n", a->flags); 927 palloc_dbg(a, " flags 0x%llx\n", a->flags);
928 palloc_dbg(a, " slabs: %d\n", a->nr_slabs);
556 929
557 return 0; 930 return 0;
558 931
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 1bacb70a..df58feb2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -925,8 +925,11 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
925 bootstrap_base, bootstrap_size, 925 bootstrap_base, bootstrap_size,
926 SZ_4K, 0); 926 SZ_4K, 0);
927 927
928 err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", 928 err = gk20a_page_allocator_init(&g->mm.vidmem.allocator,
929 base, size - base, default_page_size, 0); 929 "vidmem",
930 base, size - base,
931 default_page_size,
932 GPU_ALLOC_4K_VIDMEM_PAGES);
930 if (err) { 933 if (err) {
931 gk20a_err(d, "Failed to register vidmem for size %zu: %d", 934 gk20a_err(d, "Failed to register vidmem for size %zu: %d",
932 size, err); 935 size, err);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
index 3d4e3c43..7d7f43c2 100644
--- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
+++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
@@ -19,12 +19,63 @@
19 19
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <gk20a/gk20a_allocator.h>
23 22
24#include "gk20a_allocator.h" 23#include "gk20a_allocator.h"
25 24
26struct gk20a_allocator; 25struct gk20a_allocator;
27 26
27/*
28 * This allocator implements the ability to do SLAB style allocation since the
29 * GPU has two page sizes available - 4k and 64k/128k. When the default
30 * granularity is the large page size (64k/128k) small allocations become very
31 * space inefficient. This is most notable in PDE and PTE blocks which are 4k
32 * in size.
33 *
34 * Thus we need the ability to suballocate in 64k pages. The way we do this for
35 * the GPU is as follows. We have several buckets for sub-64K allocations:
36 *
37 * B0 - 4k
38 * B1 - 8k
39 * B3 - 16k
40 * B4 - 32k
41 * B5 - 64k (for when large pages are 128k)
42 *
43 * When an allocation comes in for less than the large page size (from now on
44 * assumed to be 64k) the allocation is satisfied by one of the buckets.
45 */
46struct page_alloc_slab {
47 struct list_head empty;
48 struct list_head partial;
49 struct list_head full;
50
51 int nr_empty;
52 int nr_partial;
53 int nr_full;
54
55 u32 slab_size;
56};
57
58enum slab_page_state {
59 SP_EMPTY,
60 SP_PARTIAL,
61 SP_FULL,
62 SP_NONE
63};
64
65struct page_alloc_slab_page {
66 unsigned long bitmap;
67 u64 page_addr;
68 u32 slab_size;
69
70 u32 nr_objects;
71 u32 nr_objects_alloced;
72
73 enum slab_page_state state;
74
75 struct page_alloc_slab *owner;
76 struct list_head list_entry;
77};
78
28struct page_alloc_chunk { 79struct page_alloc_chunk {
29 struct list_head list_entry; 80 struct list_head list_entry;
30 81
@@ -34,7 +85,7 @@ struct page_alloc_chunk {
34 85
35/* 86/*
36 * Struct to handle internal management of page allocation. It holds a list 87 * Struct to handle internal management of page allocation. It holds a list
37 * of the chunks of page that make up the overall allocation - much like a 88 * of the chunks of pages that make up the overall allocation - much like a
38 * scatter gather table. 89 * scatter gather table.
39 */ 90 */
40struct gk20a_page_alloc { 91struct gk20a_page_alloc {
@@ -44,13 +95,20 @@ struct gk20a_page_alloc {
44 u64 length; 95 u64 length;
45 96
46 /* 97 /*
47 * Only useful for the RB tree - since the alloc will have discontiguous 98 * Only useful for the RB tree - since the alloc may have discontiguous
48 * pages the base is essentially irrelevant except for the fact that it 99 * pages the base is essentially irrelevant except for the fact that it
49 * is guarenteed to be unique. 100 * is guarenteed to be unique.
50 */ 101 */
51 u64 base; 102 u64 base;
52 103
53 struct rb_node tree_entry; 104 struct rb_node tree_entry;
105
106 /*
107 * Set if this is a slab alloc. Points back to the slab page that owns
108 * this particular allocation. nr_chunks will always be 1 if this is
109 * set.
110 */
111 struct page_alloc_slab_page *slab_page;
54}; 112};
55 113
56struct gk20a_page_allocator { 114struct gk20a_page_allocator {
@@ -73,6 +131,9 @@ struct gk20a_page_allocator {
73 131
74 struct rb_root allocs; /* Outstanding allocations. */ 132 struct rb_root allocs; /* Outstanding allocations. */
75 133
134 struct page_alloc_slab *slabs;
135 int nr_slabs;
136
76 u64 flags; 137 u64 flags;
77 138
78 /* 139 /*
@@ -82,6 +143,8 @@ struct gk20a_page_allocator {
82 u64 nr_frees; 143 u64 nr_frees;
83 u64 nr_fixed_allocs; 144 u64 nr_fixed_allocs;
84 u64 nr_fixed_frees; 145 u64 nr_fixed_frees;
146 u64 nr_slab_allocs;
147 u64 nr_slab_frees;
85 u64 pages_alloced; 148 u64 pages_alloced;
86 u64 pages_freed; 149 u64 pages_freed;
87}; 150};