summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2016-09-22 13:13:31 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2016-10-18 15:24:33 -0400
commitfc4f0ddddb50da826b527b7791028a67539f34c8 (patch)
tree0aa570dda2b3714e4ddc103c48821eb2f9903d10 /drivers
parent641444188f18dbf56dda980e31f1b404dbb6f166 (diff)
gpu: nvgpu: SLAB allocation for page allocator
Add the ability to do "SLAB" allocation in the page allocator. This is generally useful since the allocator manages 64K pages but often we only need 4k chunks (for example when allocating memory for page table entries). Bug 1799159 JIRA DNVGPU-100 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1225322 (cherry picked from commit 299a5639243e44be504391d9155b4ae17d914aa2) Change-Id: Ib3a8558d40ba16bd3a413f4fd38b146beaa3c66b Reviewed-on: http://git-master/r/1227924 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c457
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c7
-rw-r--r--drivers/gpu/nvgpu/gk20a/page_allocator_priv.h69
3 files changed, 486 insertions, 47 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
index 2e5d46b9..b8e38620 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_page.c
@@ -17,6 +17,7 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/bitops.h> 19#include <linux/bitops.h>
20#include <linux/mm.h>
20 21
21#include "gk20a_allocator.h" 22#include "gk20a_allocator.h"
22#include "buddy_allocator_priv.h" 23#include "buddy_allocator_priv.h"
@@ -27,8 +28,59 @@
27 28
28static struct kmem_cache *page_alloc_cache; 29static struct kmem_cache *page_alloc_cache;
29static struct kmem_cache *page_alloc_chunk_cache; 30static struct kmem_cache *page_alloc_chunk_cache;
31static struct kmem_cache *page_alloc_slab_page_cache;
30static DEFINE_MUTEX(meta_data_cache_lock); 32static DEFINE_MUTEX(meta_data_cache_lock);
31 33
34/*
35 * Handle the book-keeping for these operations.
36 */
37static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
38 struct page_alloc_slab_page *page)
39{
40 BUG_ON(page->state != SP_NONE);
41 list_add(&page->list_entry, &slab->empty);
42 slab->nr_empty++;
43 page->state = SP_EMPTY;
44}
45static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
46 struct page_alloc_slab_page *page)
47{
48 BUG_ON(page->state != SP_NONE);
49 list_add(&page->list_entry, &slab->partial);
50 slab->nr_partial++;
51 page->state = SP_PARTIAL;
52}
53static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
54 struct page_alloc_slab_page *page)
55{
56 BUG_ON(page->state != SP_NONE);
57 list_add(&page->list_entry, &slab->full);
58 slab->nr_full++;
59 page->state = SP_FULL;
60}
61
62static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
63 struct page_alloc_slab_page *page)
64{
65 list_del_init(&page->list_entry);
66 slab->nr_empty--;
67 page->state = SP_NONE;
68}
69static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
70 struct page_alloc_slab_page *page)
71{
72 list_del_init(&page->list_entry);
73 slab->nr_partial--;
74 page->state = SP_NONE;
75}
76static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
77 struct page_alloc_slab_page *page)
78{
79 list_del_init(&page->list_entry);
80 slab->nr_full--;
81 page->state = SP_NONE;
82}
83
32static u64 gk20a_page_alloc_length(struct gk20a_allocator *a) 84static u64 gk20a_page_alloc_length(struct gk20a_allocator *a)
33{ 85{
34 struct gk20a_page_allocator *va = a->priv; 86 struct gk20a_page_allocator *va = a->priv;
@@ -80,6 +132,26 @@ static void gk20a_page_release_co(struct gk20a_allocator *a,
80 gk20a_alloc_release_carveout(&va->source_allocator, co); 132 gk20a_alloc_release_carveout(&va->source_allocator, co);
81} 133}
82 134
135static void __gk20a_free_pages(struct gk20a_page_allocator *a,
136 struct gk20a_page_alloc *alloc,
137 bool free_buddy_alloc)
138{
139 struct page_alloc_chunk *chunk;
140
141 while (!list_empty(&alloc->alloc_chunks)) {
142 chunk = list_first_entry(&alloc->alloc_chunks,
143 struct page_alloc_chunk,
144 list_entry);
145 list_del(&chunk->list_entry);
146
147 if (free_buddy_alloc)
148 gk20a_free(&a->source_allocator, chunk->base);
149 kfree(chunk);
150 }
151
152 kfree(alloc);
153}
154
83static int __insert_page_alloc(struct gk20a_page_allocator *a, 155static int __insert_page_alloc(struct gk20a_page_allocator *a,
84 struct gk20a_page_alloc *alloc) 156 struct gk20a_page_alloc *alloc)
85{ 157{
@@ -134,13 +206,236 @@ static struct gk20a_page_alloc *__find_page_alloc(
134 return alloc; 206 return alloc;
135} 207}
136 208
209static struct page_alloc_slab_page *alloc_slab_page(
210 struct gk20a_page_allocator *a,
211 struct page_alloc_slab *slab)
212{
213 struct page_alloc_slab_page *slab_page;
214
215 slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
216 if (!slab_page) {
217 palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
218 return ERR_PTR(-ENOMEM);
219 }
220
221 memset(slab_page, 0, sizeof(*slab_page));
222
223 slab_page->page_addr = gk20a_alloc(&a->source_allocator, a->page_size);
224 if (!slab_page->page_addr) {
225 kfree(slab_page);
226 palloc_dbg(a, "OOM: vidmem is full!\n");
227 return ERR_PTR(-ENOMEM);
228 }
229
230 INIT_LIST_HEAD(&slab_page->list_entry);
231 slab_page->slab_size = slab->slab_size;
232 slab_page->nr_objects = a->page_size / slab->slab_size;
233 slab_page->nr_objects_alloced = 0;
234 slab_page->owner = slab;
235 slab_page->state = SP_NONE;
236
237 a->pages_alloced++;
238
239 palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
240 slab_page->page_addr, slab_page->slab_size);
241
242 return slab_page;
243}
244
245static void free_slab_page(struct gk20a_page_allocator *a,
246 struct page_alloc_slab_page *slab_page)
247{
248 palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
249
250 BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
251 slab_page->nr_objects_alloced != 0 ||
252 slab_page->bitmap != 0);
253
254 gk20a_free(&a->source_allocator, slab_page->page_addr);
255 a->pages_freed++;
256
257 kmem_cache_free(page_alloc_slab_page_cache, slab_page);
258}
259
260/*
261 * This expects @alloc to have 1 empty page_alloc_chunk already added to the
262 * alloc_chunks list.
263 */
264static int __do_slab_alloc(struct gk20a_page_allocator *a,
265 struct page_alloc_slab *slab,
266 struct gk20a_page_alloc *alloc)
267{
268 struct page_alloc_slab_page *slab_page = NULL;
269 struct page_alloc_chunk *chunk;
270 unsigned long offs;
271
272 /*
273 * Check the partial and empty lists to see if we have some space
274 * readily available. Take the slab_page out of what ever list it
275 * was in since it may be put back into a different list later.
276 */
277 if (!list_empty(&slab->partial)) {
278 slab_page = list_first_entry(&slab->partial,
279 struct page_alloc_slab_page,
280 list_entry);
281 del_slab_page_from_partial(slab, slab_page);
282 } else if (!list_empty(&slab->empty)) {
283 slab_page = list_first_entry(&slab->empty,
284 struct page_alloc_slab_page,
285 list_entry);
286 del_slab_page_from_empty(slab, slab_page);
287 }
288
289 if (!slab_page) {
290 slab_page = alloc_slab_page(a, slab);
291 if (IS_ERR(slab_page))
292 return PTR_ERR(slab_page);
293 }
294
295 /*
296 * We now have a slab_page. Do the alloc.
297 */
298 offs = bitmap_find_next_zero_area(&slab_page->bitmap,
299 slab_page->nr_objects,
300 0, 1, 0);
301 if (offs >= slab_page->nr_objects) {
302 WARN(1, "Empty/partial slab with no free objects?");
303
304 /* Add the buggy page to the full list... This isn't ideal. */
305 add_slab_page_to_full(slab, slab_page);
306 return -ENOMEM;
307 }
308
309 bitmap_set(&slab_page->bitmap, offs, 1);
310 slab_page->nr_objects_alloced++;
311
312 if (slab_page->nr_objects_alloced < slab_page->nr_objects)
313 add_slab_page_to_partial(slab, slab_page);
314 else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
315 add_slab_page_to_full(slab, slab_page);
316 else
317 BUG(); /* Should be impossible to hit this. */
318
319 /*
320 * Handle building the gk20a_page_alloc struct. We expect one
321 * page_alloc_chunk to be present.
322 */
323 alloc->slab_page = slab_page;
324 alloc->nr_chunks = 1;
325 alloc->length = slab_page->slab_size;
326 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
327
328 chunk = list_first_entry(&alloc->alloc_chunks,
329 struct page_alloc_chunk, list_entry);
330 chunk->base = alloc->base;
331 chunk->length = alloc->length;
332
333 return 0;
334}
335
336/*
337 * Allocate from a slab instead of directly from the page allocator.
338 */
339static struct gk20a_page_alloc *__gk20a_alloc_slab(
340 struct gk20a_page_allocator *a, u64 len)
341{
342 int err, slab_nr;
343 struct page_alloc_slab *slab;
344 struct gk20a_page_alloc *alloc = NULL;
345 struct page_alloc_chunk *chunk = NULL;
346
347 /*
348 * Align the length to a page and then divide by the page size (4k for
349 * this code). ilog2() of that then gets us the correct slab to use.
350 */
351 slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
352 slab = &a->slabs[slab_nr];
353
354 alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
355 if (!alloc) {
356 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
357 goto fail;
358 }
359 chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
360 if (!chunk) {
361 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
362 goto fail;
363 }
364
365 INIT_LIST_HEAD(&alloc->alloc_chunks);
366 list_add(&chunk->list_entry, &alloc->alloc_chunks);
367
368 err = __do_slab_alloc(a, slab, alloc);
369 if (err)
370 goto fail;
371
372 palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
373 len, slab_nr, alloc->base);
374 a->nr_slab_allocs++;
375
376 return alloc;
377
378fail:
379 kfree(alloc);
380 kfree(chunk);
381 return ERR_PTR(-ENOMEM);
382}
383
384static void __gk20a_free_slab(struct gk20a_page_allocator *a,
385 struct gk20a_page_alloc *alloc)
386{
387 struct page_alloc_slab_page *slab_page = alloc->slab_page;
388 struct page_alloc_slab *slab = slab_page->owner;
389 enum slab_page_state new_state;
390 int offs;
391
392 offs = (alloc->base - slab_page->page_addr) / slab_page->slab_size;
393 bitmap_clear(&slab_page->bitmap, offs, 1);
394
395 slab_page->nr_objects_alloced--;
396
397 if (slab_page->nr_objects_alloced == 0)
398 new_state = SP_EMPTY;
399 else
400 new_state = SP_PARTIAL;
401
402 /*
403 * Need to migrate the page to a different list.
404 */
405 if (new_state != slab_page->state) {
406 /* Delete - can't be in empty. */
407 if (slab_page->state == SP_PARTIAL)
408 del_slab_page_from_partial(slab, slab_page);
409 else
410 del_slab_page_from_full(slab, slab_page);
411
412 /* And add. */
413 if (new_state == SP_EMPTY) {
414 if (list_empty(&slab->empty))
415 add_slab_page_to_empty(slab, slab_page);
416 else
417 free_slab_page(a, slab_page);
418 } else {
419 add_slab_page_to_partial(slab, slab_page);
420 }
421 }
422
423 /*
424 * Now handle the page_alloc.
425 */
426 __gk20a_free_pages(a, alloc, false);
427 a->nr_slab_frees++;
428
429 return;
430}
431
137/* 432/*
138 * Allocate physical pages. Since the underlying allocator is a buddy allocator 433 * Allocate physical pages. Since the underlying allocator is a buddy allocator
139 * the returned pages are always contiguous. However, since there could be 434 * the returned pages are always contiguous. However, since there could be
140 * fragmentation in the space this allocator will collate smaller non-contiguous 435 * fragmentation in the space this allocator will collate smaller non-contiguous
141 * allocations together if necessary. 436 * allocations together if necessary.
142 */ 437 */
143static struct gk20a_page_alloc *__gk20a_alloc_pages( 438static struct gk20a_page_alloc *__do_gk20a_alloc_pages(
144 struct gk20a_page_allocator *a, u64 pages) 439 struct gk20a_page_allocator *a, u64 pages)
145{ 440{
146 struct gk20a_page_alloc *alloc; 441 struct gk20a_page_alloc *alloc;
@@ -152,6 +447,8 @@ static struct gk20a_page_alloc *__gk20a_alloc_pages(
152 if (!alloc) 447 if (!alloc)
153 goto fail; 448 goto fail;
154 449
450 memset(alloc, 0, sizeof(*alloc));
451
155 INIT_LIST_HEAD(&alloc->alloc_chunks); 452 INIT_LIST_HEAD(&alloc->alloc_chunks);
156 alloc->length = pages << a->page_shift; 453 alloc->length = pages << a->page_shift;
157 454
@@ -233,6 +530,33 @@ fail:
233 return ERR_PTR(-ENOMEM); 530 return ERR_PTR(-ENOMEM);
234} 531}
235 532
533static struct gk20a_page_alloc *__gk20a_alloc_pages(
534 struct gk20a_page_allocator *a, u64 len)
535{
536 struct gk20a_page_alloc *alloc = NULL;
537 struct page_alloc_chunk *c;
538 u64 pages;
539 int i = 0;
540
541 pages = ALIGN(len, a->page_size) >> a->page_shift;
542
543 alloc = __do_gk20a_alloc_pages(a, pages);
544 if (IS_ERR(alloc)) {
545 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
546 pages << a->page_shift, pages);
547 return NULL;
548 }
549
550 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
551 pages << a->page_shift, pages, alloc->base);
552 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
553 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
554 i++, c->base, c->length);
555 }
556
557 return alloc;
558}
559
236/* 560/*
237 * Allocate enough pages to satisfy @len. Page size is determined at 561 * Allocate enough pages to satisfy @len. Page size is determined at
238 * initialization of the allocator. 562 * initialization of the allocator.
@@ -247,10 +571,7 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
247{ 571{
248 struct gk20a_page_allocator *a = page_allocator(__a); 572 struct gk20a_page_allocator *a = page_allocator(__a);
249 struct gk20a_page_alloc *alloc = NULL; 573 struct gk20a_page_alloc *alloc = NULL;
250 struct page_alloc_chunk *c;
251 u64 real_len; 574 u64 real_len;
252 u64 pages;
253 int i = 0;
254 575
255 /* 576 /*
256 * If we want contig pages we have to round up to a power of two. It's 577 * If we want contig pages we have to round up to a power of two. It's
@@ -259,30 +580,24 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
259 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ? 580 real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
260 roundup_pow_of_two(len) : len; 581 roundup_pow_of_two(len) : len;
261 582
262 pages = ALIGN(real_len, a->page_size) >> a->page_shift;
263
264 alloc_lock(__a); 583 alloc_lock(__a);
584 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
585 real_len <= (a->page_size / 2))
586 alloc = __gk20a_alloc_slab(a, real_len);
587 else
588 alloc = __gk20a_alloc_pages(a, real_len);
265 589
266 alloc = __gk20a_alloc_pages(a, pages); 590 if (!alloc) {
267 if (IS_ERR(alloc)) {
268 alloc_unlock(__a); 591 alloc_unlock(__a);
269 palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
270 pages << a->page_shift, pages);
271 return 0; 592 return 0;
272 } 593 }
273 594
274 __insert_page_alloc(a, alloc); 595 __insert_page_alloc(a, alloc);
275 alloc_unlock(__a);
276
277 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
278 pages << a->page_shift, pages, alloc->base);
279 list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
280 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
281 i++, c->base, c->length);
282 }
283 596
284 a->nr_allocs++; 597 a->nr_allocs++;
285 a->pages_alloced += pages; 598 if (real_len > a->page_size / 2)
599 a->pages_alloced += alloc->length >> a->page_shift;
600 alloc_unlock(__a);
286 601
287 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) 602 if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
288 return alloc->base; 603 return alloc->base;
@@ -290,24 +605,6 @@ static u64 gk20a_page_alloc(struct gk20a_allocator *__a, u64 len)
290 return (u64) (uintptr_t) alloc; 605 return (u64) (uintptr_t) alloc;
291} 606}
292 607
293static void __gk20a_free_pages(struct gk20a_page_allocator *a,
294 struct gk20a_page_alloc *alloc)
295{
296 struct page_alloc_chunk *chunk;
297
298 while (!list_empty(&alloc->alloc_chunks)) {
299 chunk = list_first_entry(&alloc->alloc_chunks,
300 struct page_alloc_chunk,
301 list_entry);
302 list_del(&chunk->list_entry);
303
304 gk20a_free(&a->source_allocator, chunk->base);
305 kfree(chunk);
306 }
307
308 kfree(alloc);
309}
310
311/* 608/*
312 * Note: this will remove the gk20a_page_alloc struct from the RB tree 609 * Note: this will remove the gk20a_page_alloc struct from the RB tree
313 * if it's found. 610 * if it's found.
@@ -331,14 +628,18 @@ static void gk20a_page_free(struct gk20a_allocator *__a, u64 base)
331 } 628 }
332 629
333 a->nr_frees++; 630 a->nr_frees++;
334 a->pages_freed += (alloc->length >> a->page_shift);
335 631
336 /* 632 /*
337 * Frees *alloc. 633 * Frees *alloc.
338 */ 634 */
339 __gk20a_free_pages(a, alloc); 635 if (alloc->slab_page) {
636 __gk20a_free_slab(a, alloc);
637 } else {
638 a->pages_freed += (alloc->length >> a->page_shift);
639 __gk20a_free_pages(a, alloc, true);
640 }
340 641
341 palloc_dbg(a, "Free 0x%010llx id=0x%010llx\n", 642 palloc_dbg(a, "Free 0x%llx id=0x%010llx\n",
342 alloc->length, alloc->base); 643 alloc->length, alloc->base);
343 644
344done: 645done:
@@ -439,7 +740,7 @@ static void gk20a_page_free_fixed(struct gk20a_allocator *__a,
439 * allocs. This would have to be updated if the underlying 740 * allocs. This would have to be updated if the underlying
440 * allocator were to change. 741 * allocator were to change.
441 */ 742 */
442 __gk20a_free_pages(a, alloc); 743 __gk20a_free_pages(a, alloc, true);
443 744
444 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n", 745 palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
445 alloc->base, alloc->length); 746 alloc->base, alloc->length);
@@ -464,6 +765,7 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
464 struct seq_file *s, int lock) 765 struct seq_file *s, int lock)
465{ 766{
466 struct gk20a_page_allocator *a = page_allocator(__a); 767 struct gk20a_page_allocator *a = page_allocator(__a);
768 int i;
467 769
468 if (lock) 770 if (lock)
469 alloc_lock(__a); 771 alloc_lock(__a);
@@ -473,12 +775,33 @@ static void gk20a_page_print_stats(struct gk20a_allocator *__a,
473 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees); 775 __alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
474 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs); 776 __alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
475 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees); 777 __alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
778 __alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs);
779 __alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees);
476 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced); 780 __alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
477 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed); 781 __alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
478 __alloc_pstat(s, __a, "\n"); 782 __alloc_pstat(s, __a, "\n");
783
784 /*
785 * Slab info.
786 */
787 if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
788 __alloc_pstat(s, __a, "Slabs:\n");
789 __alloc_pstat(s, __a, " size empty partial full\n");
790 __alloc_pstat(s, __a, " ---- ----- ------- ----\n");
791
792 for (i = 0; i < a->nr_slabs; i++) {
793 struct page_alloc_slab *slab = &a->slabs[i];
794
795 __alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n",
796 slab->slab_size,
797 slab->nr_empty, slab->nr_partial,
798 slab->nr_full);
799 }
800 __alloc_pstat(s, __a, "\n");
801 }
802
479 __alloc_pstat(s, __a, "Source alloc: %s\n", 803 __alloc_pstat(s, __a, "Source alloc: %s\n",
480 a->source_allocator.name); 804 a->source_allocator.name);
481
482 gk20a_alloc_print_stats(&a->source_allocator, s, lock); 805 gk20a_alloc_print_stats(&a->source_allocator, s, lock);
483 806
484 if (lock) 807 if (lock)
@@ -506,6 +829,43 @@ static const struct gk20a_allocator_ops page_ops = {
506 .print_stats = gk20a_page_print_stats, 829 .print_stats = gk20a_page_print_stats,
507}; 830};
508 831
832/*
833 * nr_slabs is computed as follows: divide page_size by 4096 to get number of
834 * 4k pages in page_size. Then take the base 2 log of that to get number of
835 * slabs. For 64k page_size that works on like:
836 *
837 * 1024*64 / 1024*4 = 16
838 * ilog2(16) = 4
839 *
840 * That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
841 */
842static int gk20a_page_alloc_init_slabs(struct gk20a_page_allocator *a)
843{
844 size_t nr_slabs = ilog2(a->page_size >> 12);
845 int i;
846
847 a->slabs = kcalloc(nr_slabs,
848 sizeof(struct page_alloc_slab),
849 GFP_KERNEL);
850 if (!a->slabs)
851 return -ENOMEM;
852 a->nr_slabs = nr_slabs;
853
854 for (i = 0; i < nr_slabs; i++) {
855 struct page_alloc_slab *slab = &a->slabs[i];
856
857 slab->slab_size = SZ_4K * (1 << i);
858 INIT_LIST_HEAD(&slab->empty);
859 INIT_LIST_HEAD(&slab->partial);
860 INIT_LIST_HEAD(&slab->full);
861 slab->nr_empty = 0;
862 slab->nr_partial = 0;
863 slab->nr_full = 0;
864 }
865
866 return 0;
867}
868
509int gk20a_page_allocator_init(struct gk20a_allocator *__a, 869int gk20a_page_allocator_init(struct gk20a_allocator *__a,
510 const char *name, u64 base, u64 length, 870 const char *name, u64 base, u64 length,
511 u64 blk_size, u64 flags) 871 u64 blk_size, u64 flags)
@@ -519,11 +879,17 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
519 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0); 879 page_alloc_cache = KMEM_CACHE(gk20a_page_alloc, 0);
520 if (!page_alloc_chunk_cache) 880 if (!page_alloc_chunk_cache)
521 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0); 881 page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
882 if (!page_alloc_slab_page_cache)
883 page_alloc_slab_page_cache =
884 KMEM_CACHE(page_alloc_slab_page, 0);
522 mutex_unlock(&meta_data_cache_lock); 885 mutex_unlock(&meta_data_cache_lock);
523 886
524 if (!page_alloc_cache || !page_alloc_chunk_cache) 887 if (!page_alloc_cache || !page_alloc_chunk_cache)
525 return -ENOMEM; 888 return -ENOMEM;
526 889
890 if (blk_size < SZ_4K)
891 return -EINVAL;
892
527 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL); 893 a = kzalloc(sizeof(struct gk20a_page_allocator), GFP_KERNEL);
528 if (!a) 894 if (!a)
529 return -ENOMEM; 895 return -ENOMEM;
@@ -540,6 +906,12 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
540 a->owner = __a; 906 a->owner = __a;
541 a->flags = flags; 907 a->flags = flags;
542 908
909 if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
910 err = gk20a_page_alloc_init_slabs(a);
911 if (err)
912 goto fail;
913 }
914
543 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name); 915 snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
544 916
545 err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base, 917 err = gk20a_buddy_allocator_init(&a->source_allocator, buddy_name, base,
@@ -553,6 +925,7 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
553 palloc_dbg(a, " size 0x%llx\n", a->length); 925 palloc_dbg(a, " size 0x%llx\n", a->length);
554 palloc_dbg(a, " page_size 0x%llx\n", a->page_size); 926 palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
555 palloc_dbg(a, " flags 0x%llx\n", a->flags); 927 palloc_dbg(a, " flags 0x%llx\n", a->flags);
928 palloc_dbg(a, " slabs: %d\n", a->nr_slabs);
556 929
557 return 0; 930 return 0;
558 931
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 1bacb70a..df58feb2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -925,8 +925,11 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
925 bootstrap_base, bootstrap_size, 925 bootstrap_base, bootstrap_size,
926 SZ_4K, 0); 926 SZ_4K, 0);
927 927
928 err = gk20a_page_allocator_init(&g->mm.vidmem.allocator, "vidmem", 928 err = gk20a_page_allocator_init(&g->mm.vidmem.allocator,
929 base, size - base, default_page_size, 0); 929 "vidmem",
930 base, size - base,
931 default_page_size,
932 GPU_ALLOC_4K_VIDMEM_PAGES);
930 if (err) { 933 if (err) {
931 gk20a_err(d, "Failed to register vidmem for size %zu: %d", 934 gk20a_err(d, "Failed to register vidmem for size %zu: %d",
932 size, err); 935 size, err);
diff --git a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
index 3d4e3c43..7d7f43c2 100644
--- a/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
+++ b/drivers/gpu/nvgpu/gk20a/page_allocator_priv.h
@@ -19,12 +19,63 @@
19 19
20#include <linux/list.h> 20#include <linux/list.h>
21#include <linux/rbtree.h> 21#include <linux/rbtree.h>
22#include <gk20a/gk20a_allocator.h>
23 22
24#include "gk20a_allocator.h" 23#include "gk20a_allocator.h"
25 24
26struct gk20a_allocator; 25struct gk20a_allocator;
27 26
27/*
28 * This allocator implements the ability to do SLAB style allocation since the
29 * GPU has two page sizes available - 4k and 64k/128k. When the default
30 * granularity is the large page size (64k/128k) small allocations become very
31 * space inefficient. This is most notable in PDE and PTE blocks which are 4k
32 * in size.
33 *
34 * Thus we need the ability to suballocate in 64k pages. The way we do this for
35 * the GPU is as follows. We have several buckets for sub-64K allocations:
36 *
37 * B0 - 4k
38 * B1 - 8k
39 * B3 - 16k
40 * B4 - 32k
41 * B5 - 64k (for when large pages are 128k)
42 *
43 * When an allocation comes in for less than the large page size (from now on
44 * assumed to be 64k) the allocation is satisfied by one of the buckets.
45 */
46struct page_alloc_slab {
47 struct list_head empty;
48 struct list_head partial;
49 struct list_head full;
50
51 int nr_empty;
52 int nr_partial;
53 int nr_full;
54
55 u32 slab_size;
56};
57
58enum slab_page_state {
59 SP_EMPTY,
60 SP_PARTIAL,
61 SP_FULL,
62 SP_NONE
63};
64
65struct page_alloc_slab_page {
66 unsigned long bitmap;
67 u64 page_addr;
68 u32 slab_size;
69
70 u32 nr_objects;
71 u32 nr_objects_alloced;
72
73 enum slab_page_state state;
74
75 struct page_alloc_slab *owner;
76 struct list_head list_entry;
77};
78
28struct page_alloc_chunk { 79struct page_alloc_chunk {
29 struct list_head list_entry; 80 struct list_head list_entry;
30 81
@@ -34,7 +85,7 @@ struct page_alloc_chunk {
34 85
35/* 86/*
36 * Struct to handle internal management of page allocation. It holds a list 87 * Struct to handle internal management of page allocation. It holds a list
37 * of the chunks of page that make up the overall allocation - much like a 88 * of the chunks of pages that make up the overall allocation - much like a
38 * scatter gather table. 89 * scatter gather table.
39 */ 90 */
40struct gk20a_page_alloc { 91struct gk20a_page_alloc {
@@ -44,13 +95,20 @@ struct gk20a_page_alloc {
44 u64 length; 95 u64 length;
45 96
46 /* 97 /*
47 * Only useful for the RB tree - since the alloc will have discontiguous 98 * Only useful for the RB tree - since the alloc may have discontiguous
48 * pages the base is essentially irrelevant except for the fact that it 99 * pages the base is essentially irrelevant except for the fact that it
49 * is guarenteed to be unique. 100 * is guarenteed to be unique.
50 */ 101 */
51 u64 base; 102 u64 base;
52 103
53 struct rb_node tree_entry; 104 struct rb_node tree_entry;
105
106 /*
107 * Set if this is a slab alloc. Points back to the slab page that owns
108 * this particular allocation. nr_chunks will always be 1 if this is
109 * set.
110 */
111 struct page_alloc_slab_page *slab_page;
54}; 112};
55 113
56struct gk20a_page_allocator { 114struct gk20a_page_allocator {
@@ -73,6 +131,9 @@ struct gk20a_page_allocator {
73 131
74 struct rb_root allocs; /* Outstanding allocations. */ 132 struct rb_root allocs; /* Outstanding allocations. */
75 133
134 struct page_alloc_slab *slabs;
135 int nr_slabs;
136
76 u64 flags; 137 u64 flags;
77 138
78 /* 139 /*
@@ -82,6 +143,8 @@ struct gk20a_page_allocator {
82 u64 nr_frees; 143 u64 nr_frees;
83 u64 nr_fixed_allocs; 144 u64 nr_fixed_allocs;
84 u64 nr_fixed_frees; 145 u64 nr_fixed_frees;
146 u64 nr_slab_allocs;
147 u64 nr_slab_frees;
85 u64 pages_alloced; 148 u64 pages_alloced;
86 u64 pages_freed; 149 u64 pages_freed;
87}; 150};