diff options
author | Alex Waterman <alexw@nvidia.com> | 2017-05-25 19:56:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 15:52:48 -0400 |
commit | 0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch) | |
tree | 2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common/mm/page_allocator.c | |
parent | e32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff) |
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu
driver from being platform agnostic is the usage of Linux
scattergather tables and scattergather lists. These data
structures are used throughout the mapping code to handle
discontiguous DMA allocations and also overloaded to represent
VIDMEM allocs.
The notion of a scatter gather table is crucial to a HW device
that can handle discontiguous DMA. The GPU has a MMU which
allows the GPU to do page gathering and present a virtually
contiguous buffer to the GPU HW. As a result it makes sense
for the GPU driver to use some sort of scatter gather concept
so maximize memory usage efficiency.
To that end this patch keeps the notion of a scatter gather
list but implements it in the nvgpu common code. It is based
heavily on the Linux SGL concept. It is a singly linked list
of blocks - each representing a chunk of memory. To map or
use a DMA allocation SW must iterate over each block in the
SGL.
This patch implements the most basic level of support for this
data structure. There are certainly easy optimizations that
could be done to speed up the current implementation. However,
this patches' goal is to simply divest the core MM code from
any last Linux'isms. Speed and efficiency come next.
Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1530867
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/mm/page_allocator.c')
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 142 |
1 files changed, 76 insertions, 66 deletions
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 72ff8f2d..6d92b457 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | |||
147 | struct nvgpu_page_alloc *alloc, | 147 | struct nvgpu_page_alloc *alloc, |
148 | bool free_buddy_alloc) | 148 | bool free_buddy_alloc) |
149 | { | 149 | { |
150 | struct page_alloc_chunk *chunk; | 150 | struct nvgpu_mem_sgl *sgl = alloc->sgl; |
151 | 151 | ||
152 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 152 | if (free_buddy_alloc) { |
153 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 153 | while (sgl) { |
154 | page_alloc_chunk, | 154 | nvgpu_free(&a->source_allocator, sgl->phys); |
155 | list_entry); | 155 | sgl = nvgpu_mem_sgl_next(sgl); |
156 | nvgpu_list_del(&chunk->list_entry); | 156 | } |
157 | |||
158 | if (free_buddy_alloc) | ||
159 | nvgpu_free(&a->source_allocator, chunk->base); | ||
160 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | ||
161 | } | 157 | } |
162 | 158 | ||
159 | nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); | ||
163 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 160 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
164 | } | 161 | } |
165 | 162 | ||
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a, | |||
243 | } | 240 | } |
244 | 241 | ||
245 | /* | 242 | /* |
246 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | 243 | * This expects @alloc to have 1 empty sgl_entry ready for usage. |
247 | * alloc_chunks list. | ||
248 | */ | 244 | */ |
249 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | 245 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, |
250 | struct page_alloc_slab *slab, | 246 | struct page_alloc_slab *slab, |
251 | struct nvgpu_page_alloc *alloc) | 247 | struct nvgpu_page_alloc *alloc) |
252 | { | 248 | { |
253 | struct page_alloc_slab_page *slab_page = NULL; | 249 | struct page_alloc_slab_page *slab_page = NULL; |
254 | struct page_alloc_chunk *chunk; | 250 | struct nvgpu_mem_sgl *sgl; |
255 | unsigned long offs; | 251 | unsigned long offs; |
256 | 252 | ||
257 | /* | 253 | /* |
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, | |||
302 | BUG(); /* Should be impossible to hit this. */ | 298 | BUG(); /* Should be impossible to hit this. */ |
303 | 299 | ||
304 | /* | 300 | /* |
305 | * Handle building the nvgpu_page_alloc struct. We expect one | 301 | * Handle building the nvgpu_page_alloc struct. We expect one sgl |
306 | * page_alloc_chunk to be present. | 302 | * to be present. |
307 | */ | 303 | */ |
308 | alloc->slab_page = slab_page; | 304 | alloc->slab_page = slab_page; |
309 | alloc->nr_chunks = 1; | 305 | alloc->nr_chunks = 1; |
310 | alloc->length = slab_page->slab_size; | 306 | alloc->length = slab_page->slab_size; |
311 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | 307 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); |
312 | 308 | ||
313 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 309 | sgl = alloc->sgl; |
314 | page_alloc_chunk, list_entry); | 310 | sgl->phys = alloc->base; |
315 | chunk->base = alloc->base; | 311 | sgl->dma = alloc->base; |
316 | chunk->length = alloc->length; | 312 | sgl->length = alloc->length; |
313 | sgl->next = NULL; | ||
317 | 314 | ||
318 | return 0; | 315 | return 0; |
319 | } | 316 | } |
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
327 | int err, slab_nr; | 324 | int err, slab_nr; |
328 | struct page_alloc_slab *slab; | 325 | struct page_alloc_slab *slab; |
329 | struct nvgpu_page_alloc *alloc = NULL; | 326 | struct nvgpu_page_alloc *alloc = NULL; |
330 | struct page_alloc_chunk *chunk = NULL; | 327 | struct nvgpu_mem_sgl *sgl = NULL; |
331 | 328 | ||
332 | /* | 329 | /* |
333 | * Align the length to a page and then divide by the page size (4k for | 330 | * Align the length to a page and then divide by the page size (4k for |
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
341 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | 338 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); |
342 | goto fail; | 339 | goto fail; |
343 | } | 340 | } |
344 | chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); | 341 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
345 | if (!chunk) { | 342 | if (!sgl) { |
346 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | 343 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); |
347 | goto fail; | 344 | goto fail; |
348 | } | 345 | } |
349 | 346 | ||
350 | nvgpu_init_list_node(&alloc->alloc_chunks); | 347 | alloc->sgl = sgl; |
351 | nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
352 | |||
353 | err = __do_slab_alloc(a, slab, alloc); | 348 | err = __do_slab_alloc(a, slab, alloc); |
354 | if (err) | 349 | if (err) |
355 | goto fail; | 350 | goto fail; |
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
363 | fail: | 358 | fail: |
364 | if (alloc) | 359 | if (alloc) |
365 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 360 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
366 | if (chunk) | 361 | if (sgl) |
367 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | 362 | nvgpu_kfree(a->owner->g, sgl); |
368 | return NULL; | 363 | return NULL; |
369 | } | 364 | } |
370 | 365 | ||
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
426 | struct nvgpu_page_allocator *a, u64 pages) | 421 | struct nvgpu_page_allocator *a, u64 pages) |
427 | { | 422 | { |
428 | struct nvgpu_page_alloc *alloc; | 423 | struct nvgpu_page_alloc *alloc; |
429 | struct page_alloc_chunk *c; | 424 | struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; |
430 | u64 max_chunk_len = pages << a->page_shift; | 425 | u64 max_chunk_len = pages << a->page_shift; |
431 | int i = 0; | 426 | int i = 0; |
432 | 427 | ||
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
436 | 431 | ||
437 | memset(alloc, 0, sizeof(*alloc)); | 432 | memset(alloc, 0, sizeof(*alloc)); |
438 | 433 | ||
439 | nvgpu_init_list_node(&alloc->alloc_chunks); | ||
440 | alloc->length = pages << a->page_shift; | 434 | alloc->length = pages << a->page_shift; |
441 | 435 | ||
442 | while (pages) { | 436 | while (pages) { |
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
482 | goto fail_cleanup; | 476 | goto fail_cleanup; |
483 | } | 477 | } |
484 | 478 | ||
485 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 479 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
486 | if (!c) { | 480 | if (!sgl) { |
487 | nvgpu_free(&a->source_allocator, chunk_addr); | 481 | nvgpu_free(&a->source_allocator, chunk_addr); |
488 | goto fail_cleanup; | 482 | goto fail_cleanup; |
489 | } | 483 | } |
490 | 484 | ||
491 | pages -= chunk_pages; | 485 | pages -= chunk_pages; |
492 | 486 | ||
493 | c->base = chunk_addr; | 487 | sgl->phys = chunk_addr; |
494 | c->length = chunk_len; | 488 | sgl->dma = chunk_addr; |
495 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 489 | sgl->length = chunk_len; |
490 | |||
491 | /* | ||
492 | * Build the singly linked list with a head node that is part of | ||
493 | * the list. | ||
494 | */ | ||
495 | if (prev_sgl) | ||
496 | prev_sgl->next = sgl; | ||
497 | else | ||
498 | alloc->sgl = sgl; | ||
499 | |||
500 | prev_sgl = sgl; | ||
496 | 501 | ||
497 | i++; | 502 | i++; |
498 | } | 503 | } |
499 | 504 | ||
500 | alloc->nr_chunks = i; | 505 | alloc->nr_chunks = i; |
501 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 506 | alloc->base = alloc->sgl->phys; |
502 | page_alloc_chunk, list_entry); | ||
503 | alloc->base = c->base; | ||
504 | 507 | ||
505 | return alloc; | 508 | return alloc; |
506 | 509 | ||
507 | fail_cleanup: | 510 | fail_cleanup: |
508 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 511 | sgl = alloc->sgl; |
509 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 512 | while (sgl) { |
510 | page_alloc_chunk, list_entry); | 513 | struct nvgpu_mem_sgl *next = sgl->next; |
511 | nvgpu_list_del(&c->list_entry); | 514 | |
512 | nvgpu_free(&a->source_allocator, c->base); | 515 | nvgpu_free(&a->source_allocator, sgl->phys); |
513 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 516 | nvgpu_kfree(a->owner->g, sgl); |
517 | |||
518 | sgl = next; | ||
514 | } | 519 | } |
520 | |||
515 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 521 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
516 | fail: | 522 | fail: |
517 | return NULL; | 523 | return NULL; |
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
521 | struct nvgpu_page_allocator *a, u64 len) | 527 | struct nvgpu_page_allocator *a, u64 len) |
522 | { | 528 | { |
523 | struct nvgpu_page_alloc *alloc = NULL; | 529 | struct nvgpu_page_alloc *alloc = NULL; |
524 | struct page_alloc_chunk *c; | 530 | struct nvgpu_mem_sgl *sgl; |
525 | u64 pages; | 531 | u64 pages; |
526 | int i = 0; | 532 | int i = 0; |
527 | 533 | ||
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
536 | 542 | ||
537 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | 543 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", |
538 | pages << a->page_shift, pages, alloc->base); | 544 | pages << a->page_shift, pages, alloc->base); |
539 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 545 | sgl = alloc->sgl; |
540 | page_alloc_chunk, list_entry) { | 546 | while (sgl) { |
541 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 547 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
542 | i++, c->base, c->length); | 548 | i++, |
549 | nvgpu_mem_sgl_phys(sgl), | ||
550 | nvgpu_mem_sgl_length(sgl)); | ||
551 | sgl = sgl->next; | ||
543 | } | 552 | } |
553 | palloc_dbg(a, "Alloc done\n"); | ||
544 | 554 | ||
545 | return alloc; | 555 | return alloc; |
546 | } | 556 | } |
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
638 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) | 648 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
639 | { | 649 | { |
640 | struct nvgpu_page_alloc *alloc; | 650 | struct nvgpu_page_alloc *alloc; |
641 | struct page_alloc_chunk *c; | 651 | struct nvgpu_mem_sgl *sgl; |
642 | 652 | ||
643 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | 653 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); |
644 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 654 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
645 | if (!alloc || !c) | 655 | if (!alloc || !sgl) |
646 | goto fail; | 656 | goto fail; |
647 | 657 | ||
648 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | 658 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
653 | 663 | ||
654 | alloc->nr_chunks = 1; | 664 | alloc->nr_chunks = 1; |
655 | alloc->length = length; | 665 | alloc->length = length; |
656 | nvgpu_init_list_node(&alloc->alloc_chunks); | 666 | alloc->sgl = sgl; |
657 | 667 | ||
658 | c->base = alloc->base; | 668 | sgl->phys = alloc->base; |
659 | c->length = length; | 669 | sgl->dma = alloc->base; |
660 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 670 | sgl->length = length; |
671 | sgl->next = NULL; | ||
661 | 672 | ||
662 | return alloc; | 673 | return alloc; |
663 | 674 | ||
664 | fail: | 675 | fail: |
665 | if (c) | 676 | if (sgl) |
666 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 677 | nvgpu_kfree(a->owner->g, sgl); |
667 | if (alloc) | 678 | if (alloc) |
668 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 679 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
669 | return NULL; | 680 | return NULL; |
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
677 | { | 688 | { |
678 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
679 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
680 | struct page_alloc_chunk *c; | 691 | struct nvgpu_mem_sgl *sgl; |
681 | u64 aligned_len, pages; | 692 | u64 aligned_len, pages; |
682 | int i = 0; | 693 | int i = 0; |
683 | 694 | ||
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
697 | 708 | ||
698 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | 709 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", |
699 | alloc->base, aligned_len, pages); | 710 | alloc->base, aligned_len, pages); |
700 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 711 | sgl = alloc->sgl; |
701 | page_alloc_chunk, list_entry) { | 712 | while (sgl) { |
702 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 713 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
703 | i++, c->base, c->length); | 714 | i++, |
715 | nvgpu_mem_sgl_phys(sgl), | ||
716 | nvgpu_mem_sgl_length(sgl)); | ||
717 | sgl = sgl->next; | ||
704 | } | 718 | } |
705 | 719 | ||
706 | a->nr_fixed_allocs++; | 720 | a->nr_fixed_allocs++; |
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
896 | 910 | ||
897 | a->alloc_cache = nvgpu_kmem_cache_create(g, | 911 | a->alloc_cache = nvgpu_kmem_cache_create(g, |
898 | sizeof(struct nvgpu_page_alloc)); | 912 | sizeof(struct nvgpu_page_alloc)); |
899 | a->chunk_cache = nvgpu_kmem_cache_create(g, | ||
900 | sizeof(struct page_alloc_chunk)); | ||
901 | a->slab_page_cache = nvgpu_kmem_cache_create(g, | 913 | a->slab_page_cache = nvgpu_kmem_cache_create(g, |
902 | sizeof(struct page_alloc_slab_page)); | 914 | sizeof(struct page_alloc_slab_page)); |
903 | if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { | 915 | if (!a->alloc_cache || !a->slab_page_cache) { |
904 | err = -ENOMEM; | 916 | err = -ENOMEM; |
905 | goto fail; | 917 | goto fail; |
906 | } | 918 | } |
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
941 | fail: | 953 | fail: |
942 | if (a->alloc_cache) | 954 | if (a->alloc_cache) |
943 | nvgpu_kmem_cache_destroy(a->alloc_cache); | 955 | nvgpu_kmem_cache_destroy(a->alloc_cache); |
944 | if (a->chunk_cache) | ||
945 | nvgpu_kmem_cache_destroy(a->chunk_cache); | ||
946 | if (a->slab_page_cache) | 956 | if (a->slab_page_cache) |
947 | nvgpu_kmem_cache_destroy(a->slab_page_cache); | 957 | nvgpu_kmem_cache_destroy(a->slab_page_cache); |
948 | nvgpu_kfree(g, a); | 958 | nvgpu_kfree(g, a); |