summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/common
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-05-25 19:56:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:52:48 -0400
commit0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/common
parente32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu driver from being platform agnostic is the usage of Linux scattergather tables and scattergather lists. These data structures are used throughout the mapping code to handle discontiguous DMA allocations and also overloaded to represent VIDMEM allocs. The notion of a scatter gather table is crucial to a HW device that can handle discontiguous DMA. The GPU has a MMU which allows the GPU to do page gathering and present a virtually contiguous buffer to the GPU HW. As a result it makes sense for the GPU driver to use some sort of scatter gather concept so maximize memory usage efficiency. To that end this patch keeps the notion of a scatter gather list but implements it in the nvgpu common code. It is based heavily on the Linux SGL concept. It is a singly linked list of blocks - each representing a chunk of memory. To map or use a DMA allocation SW must iterate over each block in the SGL. This patch implements the most basic level of support for this data structure. There are certainly easy optimizations that could be done to speed up the current implementation. However, this patches' goal is to simply divest the core MM code from any last Linux'isms. Speed and efficiency come next. Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1530867 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c114
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c25
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c109
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c73
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c142
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c27
6 files changed, 345 insertions, 145 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
21#include <nvgpu/log.h> 21#include <nvgpu/log.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
24 25
25#include <nvgpu/linux/dma.h> 26#include <nvgpu/linux/dma.h>
26 27
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
395 396
396 return 0; 397 return 0;
397} 398}
399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
401 struct nvgpu_mem_sgl *sgl)
402{
403 struct nvgpu_mem_sgl *head, *next;
404
405 head = nvgpu_kzalloc(g, sizeof(*sgl));
406 if (!head)
407 return NULL;
408
409 next = head;
410 while (true) {
411 nvgpu_log(g, gpu_dbg_sgl,
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431
432 return head;
433}
434
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
436 struct gk20a *g,
437 struct scatterlist *linux_sgl)
438{
439 struct nvgpu_page_alloc *vidmem_alloc;
440
441 vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
442 if (!vidmem_alloc)
443 return NULL;
444
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448}
449
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
451 struct sg_table *sgt)
452{
453 struct nvgpu_mem_sgl *head, *sgl, *next;
454 struct scatterlist *linux_sgl = sgt->sgl;
455
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
458
459 head = nvgpu_kzalloc(g, sizeof(*sgl));
460 if (!head)
461 return NULL;
462
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
464
465 sgl = head;
466 while (true) {
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502
503 nvgpu_log(g, gpu_dbg_sgl, "Done!");
504 return head;
505}
506
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem)
509{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt);
511}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h> 22#include <nvgpu/rbtree.h>
23#include <nvgpu/vm_area.h> 23#include <nvgpu/vm_area.h>
24#include <nvgpu/nvgpu_mem.h>
24#include <nvgpu/page_allocator.h> 25#include <nvgpu/page_allocator.h>
25 26
27#include <nvgpu/linux/nvgpu_mem.h>
28
26#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 30#include "gk20a/mm_gk20a.h"
28#include "gk20a/kind_gk20a.h" 31#include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
66 69
67 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
68 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
69 struct page_alloc_chunk *chunk = NULL; 72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
70 73
71 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 74 while (sgl_vid) {
72 page_alloc_chunk, list_entry) { 75 chunk_align = 1ULL <<
73 chunk_align = 1ULL << __ffs(chunk->base | 76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
74 chunk->length); 77 nvgpu_mem_sgl_length(sgl_vid));
75 78
76 if (align) 79 if (align)
77 align = min(align, chunk_align); 80 align = min(align, chunk_align);
78 else 81 else
79 align = chunk_align; 82 align = chunk_align;
83
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
80 } 85 }
81 86
82 return align; 87 return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
237 struct nvgpu_vm_area *vm_area = NULL; 242 struct nvgpu_vm_area *vm_area = NULL;
238 u32 ctag_offset; 243 u32 ctag_offset;
239 enum nvgpu_aperture aperture; 244 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl;
240 246
241 /* 247 /*
242 * The kind used as part of the key for map caching. HW may 248 * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
393 ctag_offset += buffer_offset >> 399 ctag_offset += buffer_offset >>
394 ilog2(g->ops.fb.compression_page_size(g)); 400 ilog2(g->ops.fb.compression_page_size(g));
395 401
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
403
396 /* update gmmu ptes */ 404 /* update gmmu ptes */
397 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 405 map_offset = g->ops.mm.gmmu_map(vm,
398 bfr.sgt, 406 map_offset,
407 nvgpu_sgl,
399 buffer_offset, /* sg offset */ 408 buffer_offset, /* sg offset */
400 mapping_size, 409 mapping_size,
401 bfr.pgsz_idx, 410 bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
410 if (!map_offset) 419 if (!map_offset)
411 goto clean_up; 420 goto clean_up;
412 421
422 nvgpu_mem_sgl_free(g, nvgpu_sgl);
423
413 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
414 if (!mapped_buffer) { 425 if (!mapped_buffer) {
415 nvgpu_warn(g, "oom allocating tracking buffer"); 426 nvgpu_warn(g, "oom allocating tracking buffer");
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 7f486d68..41f5acdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct sg_table *sgt = mem->priv.sgt; 68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
69
70 if (!sgl)
71 return -ENOMEM;
69 72
70 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
71 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
72 sgt, /* sg table */ 75 sgl, /* sg list */
73 0, /* sg offset */ 76 0, /* sg offset */
74 size, 77 size,
75 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
82 NULL, /* mapping_batch handle */ 85 NULL, /* mapping_batch handle */
83 aperture); 86 aperture);
84 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88
89 nvgpu_mem_sgl_free(g, sgl);
90
85 if (!vaddr) { 91 if (!vaddr) {
86 nvgpu_err(g, "failed to allocate va space"); 92 nvgpu_err(g, "failed to map buffer!");
87 return 0; 93 return 0;
88 } 94 }
89 95
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
91} 97}
92 98
93/* 99/*
94 * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. 100 * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
95 */ 101 */
96u64 nvgpu_gmmu_map(struct vm_gk20a *vm, 102u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
97 struct nvgpu_mem *mem, 103 struct nvgpu_mem *mem,
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
106} 112}
107 113
108/* 114/*
109 * Like nvgpu_gmmu_map() except it can work on a fixed address instead. 115 * Like nvgpu_gmmu_map() except this can work on a fixed address.
110 */ 116 */
111u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, 117u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
112 struct nvgpu_mem *mem, 118 struct nvgpu_mem *mem,
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
407 */ 413 */
408 target_addr = next_pd ? 414 target_addr = next_pd ?
409 nvgpu_pde_phys_addr(g, next_pd) : 415 nvgpu_pde_phys_addr(g, next_pd) :
410 g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); 416 phys_addr;
411 417
412 l->update_entry(vm, l, 418 l->update_entry(vm, l,
413 pd, pd_idx, 419 pd, pd_idx,
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm,
458 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
459 */ 465 */
460static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
461 struct sg_table *sgt, 467 struct nvgpu_mem_sgl *sgl,
462 u64 space_to_skip, 468 u64 space_to_skip,
463 u64 virt_addr, 469 u64 virt_addr,
464 u64 length, 470 u64 length,
465 struct nvgpu_gmmu_attrs *attrs) 471 struct nvgpu_gmmu_attrs *attrs)
466{ 472{
467 struct nvgpu_page_alloc *alloc = NULL;
468 struct page_alloc_chunk *chunk = NULL;
469 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
470 int err = 0; 474 int err = 0;
471 475
472 if (!sgt) { 476 if (!sgl) {
473 /* 477 /*
474 * This is considered an unmap. Just pass in 0 as the physical 478 * This is considered an unmap. Just pass in 0 as the physical
475 * address for the entire GPU range. 479 * address for the entire GPU range.
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
482 return err; 486 return err;
483 } 487 }
484 488
485 alloc = get_vidmem_page_alloc(sgt->sgl);
486
487 /* 489 /*
488 * Otherwise iterate across all the chunks in this allocation and 490 * Otherwise iterate across all the chunks in this allocation and
489 * map them. 491 * map them.
490 */ 492 */
491 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 493 while (sgl) {
492 page_alloc_chunk, list_entry) {
493 if (space_to_skip && 494 if (space_to_skip &&
494 space_to_skip >= chunk->length) { 495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
495 space_to_skip -= chunk->length; 496 space_to_skip -= nvgpu_mem_sgl_length(sgl);
497 sgl = nvgpu_mem_sgl_next(sgl);
496 continue; 498 continue;
497 } 499 }
498 500
499 phys_addr = chunk->base + space_to_skip; 501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
500 chunk_length = min(length, (chunk->length - space_to_skip)); 502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
503 space_to_skip));
501 504
502 err = __set_pd_level(vm, &vm->pdb, 505 err = __set_pd_level(vm, &vm->pdb,
503 0, 506 0,
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 521
519 if (length == 0) 522 if (length == 0)
520 break; 523 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
521 } 526 }
522 527
523 return err; 528 return err;
524} 529}
525 530
526static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
527 struct sg_table *sgt, 532 struct nvgpu_mem_sgl *sgl,
528 u64 space_to_skip, 533 u64 space_to_skip,
529 u64 virt_addr, 534 u64 virt_addr,
530 u64 length, 535 u64 length,
531 struct nvgpu_gmmu_attrs *attrs) 536 struct nvgpu_gmmu_attrs *attrs)
532{ 537{
533 int err; 538 int err;
534 struct scatterlist *sgl;
535 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
536 540
537 if (!sgt) { 541 if (!sgl) {
538 /* 542 /*
539 * This is considered an unmap. Just pass in 0 as the physical 543 * This is considered an unmap. Just pass in 0 as the physical
540 * address for the entire GPU range. 544 * address for the entire GPU range.
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
548 } 552 }
549 553
550 /* 554 /*
551 * At this point we have a Linux scatter-gather list pointing to some 555 * At this point we have a scatter-gather list pointing to some number
552 * number of discontiguous chunks of memory. Iterate over that list and 556 * of discontiguous chunks of memory. We must iterate over that list and
553 * generate a GMMU map call for each chunk. There are two possibilities: 557 * generate a GMMU map call for each chunk. There are two possibilities:
554 * either the IOMMU is enabled or not. When the IOMMU is enabled the 558 * either an IOMMU is enabled or not. When an IOMMU is enabled the
555 * mapping is simple since the "physical" address is actually a virtual 559 * mapping is simple since the "physical" address is actually a virtual
556 * IO address and will be contiguous. The no-IOMMU case is more 560 * IO address and will be contiguous.
557 * complicated. We will have to iterate over the SGT and do a separate
558 * map for each chunk of the SGT.
559 */ 561 */
560 sgl = sgt->sgl;
561
562 if (!g->mm.bypass_smmu) { 562 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); 563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
564 564
565 io_addr += space_to_skip; 565 io_addr += space_to_skip;
566 566
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
585 /* 585 /*
586 * Cut out sgl ents for space_to_skip. 586 * Cut out sgl ents for space_to_skip.
587 */ 587 */
588 if (space_to_skip && space_to_skip >= sgl->length) { 588 if (space_to_skip &&
589 space_to_skip -= sgl->length; 589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
590 sgl = sg_next(sgl); 590 space_to_skip -= nvgpu_mem_sgl_length(sgl);
591 sgl = nvgpu_mem_sgl_next(sgl);
591 continue; 592 continue;
592 } 593 }
593 594
594 phys_addr = sg_phys(sgl) + space_to_skip; 595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
595 chunk_length = min(length, sgl->length - space_to_skip); 596 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip);
596 598
597 err = __set_pd_level(vm, &vm->pdb, 599 err = __set_pd_level(vm, &vm->pdb,
598 0, 600 0,
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
600 virt_addr, 602 virt_addr,
601 chunk_length, 603 chunk_length,
602 attrs); 604 attrs);
603 if (err)
604 return err;
605 605
606 space_to_skip = 0; 606 space_to_skip = 0;
607 virt_addr += chunk_length; 607 virt_addr += chunk_length;
608 length -= chunk_length; 608 length -= chunk_length;
609 sgl = sg_next(sgl); 609 sgl = nvgpu_mem_sgl_next(sgl);
610 610
611 if (length == 0) 611 if (length == 0)
612 break; 612 break;
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
624 * implementations. But the logic around that is generic to all chips. Every 624 * implementations. But the logic around that is generic to all chips. Every
625 * chip has some number of PDE levels and then a PTE level. 625 * chip has some number of PDE levels and then a PTE level.
626 * 626 *
627 * Each chunk of the incoming SGT is sent to the chip specific implementation 627 * Each chunk of the incoming SGL is sent to the chip specific implementation
628 * of page table update. 628 * of page table update.
629 * 629 *
630 * [*] Note: the "physical" address may actually be an IO virtual address in the 630 * [*] Note: the "physical" address may actually be an IO virtual address in the
631 * case of SMMU usage. 631 * case of SMMU usage.
632 */ 632 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct sg_table *sgt, 634 struct nvgpu_mem_sgl *sgl,
635 u64 space_to_skip, 635 u64 space_to_skip,
636 u64 virt_addr, 636 u64 virt_addr,
637 u64 length, 637 u64 length,
638 struct nvgpu_gmmu_attrs *attrs) 638 struct nvgpu_gmmu_attrs *attrs)
639{ 639{
640 struct gk20a *g = gk20a_from_vm(vm); 640 struct gk20a *g = gk20a_from_vm(vm);
641 struct nvgpu_page_alloc *alloc;
642 u64 phys_addr = 0;
643 u32 page_size; 641 u32 page_size;
644 int err; 642 int err;
645 643
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
665 return err; 663 return err;
666 } 664 }
667 665
668 if (sgt) {
669 if (attrs->aperture == APERTURE_VIDMEM) {
670 alloc = get_vidmem_page_alloc(sgt->sgl);
671
672 phys_addr = alloc->base;
673 } else
674 phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
675 }
676
677 __gmmu_dbg(g, attrs, 666 __gmmu_dbg(g, attrs,
678 "vm=%s " 667 "vm=%s "
679 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " 668 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx "
680 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
681 "kind=%#02x APT=%-6s %c%c%c%c%c", 670 "kind=%#02x APT=%-6s %c%c%c%c%c",
682 vm->name, 671 vm->name,
683 sgt ? "MAP" : "UNMAP", 672 sgl ? "MAP" : "UNMAP",
684 virt_addr, 673 virt_addr,
685 length, 674 length,
686 phys_addr, 675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
687 space_to_skip, 676 space_to_skip,
688 page_size >> 10, 677 page_size >> 10,
689 nvgpu_gmmu_perm_str(attrs->rw_flag), 678 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
696 attrs->valid ? 'V' : '-'); 685 attrs->valid ? 'V' : '-');
697 686
698 /* 687 /*
699 * Handle VIDMEM progamming. Currently uses a different scatter list 688 * For historical reasons these are separate, but soon these will be
700 * format. 689 * unified.
701 */ 690 */
702 if (attrs->aperture == APERTURE_VIDMEM) 691 if (attrs->aperture == APERTURE_VIDMEM)
703 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 692 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
704 sgt, 693 sgl,
705 space_to_skip, 694 space_to_skip,
706 virt_addr, 695 virt_addr,
707 length, 696 length,
708 attrs); 697 attrs);
709 else 698 else
710 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 699 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
711 sgt, 700 sgl,
712 space_to_skip, 701 space_to_skip,
713 virt_addr, 702 virt_addr,
714 length, 703 length,
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
717 unmap_gmmu_pages(g, &vm->pdb); 706 unmap_gmmu_pages(g, &vm->pdb);
718 nvgpu_smp_mb(); 707 nvgpu_smp_mb();
719 708
720 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); 709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
721 710
722 return err; 711 return err;
723} 712}
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
736 */ 725 */
737u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
738 u64 vaddr, 727 u64 vaddr,
739 struct sg_table *sgt, 728 struct nvgpu_mem_sgl *sgl,
740 u64 buffer_offset, 729 u64 buffer_offset,
741 u64 size, 730 u64 size,
742 int pgsz_idx, 731 int pgsz_idx,
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
785 allocated = true; 774 allocated = true;
786 } 775 }
787 776
788 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, 777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
789 vaddr, size, &attrs); 778 vaddr, size, &attrs);
790 if (err) { 779 if (err) {
791 nvgpu_err(g, "failed to update ptes on map"); 780 nvgpu_err(g, "failed to update ptes on map");
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
new file mode 100644
index 00000000..7296c673
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -0,0 +1,73 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/kmem.h>
18#include <nvgpu/nvgpu_mem.h>
19
20#include "gk20a/gk20a.h"
21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl)
23{
24 return sgl->next;
25}
26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl)
28{
29 return sgl->phys;
30}
31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl)
33{
34 return sgl->dma;
35}
36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl)
38{
39 return sgl->length;
40}
41
42/*
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs)
49{
50 if (nvgpu_mem_sgl_dma(sgl) == 0)
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58}
59
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
61{
62 struct nvgpu_mem_sgl *next;
63
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 72ff8f2d..6d92b457 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 147 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 148 bool free_buddy_alloc)
149{ 149{
150 struct page_alloc_chunk *chunk; 150 struct nvgpu_mem_sgl *sgl = alloc->sgl;
151 151
152 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 152 if (free_buddy_alloc) {
153 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 153 while (sgl) {
154 page_alloc_chunk, 154 nvgpu_free(&a->source_allocator, sgl->phys);
155 list_entry); 155 sgl = nvgpu_mem_sgl_next(sgl);
156 nvgpu_list_del(&chunk->list_entry); 156 }
157
158 if (free_buddy_alloc)
159 nvgpu_free(&a->source_allocator, chunk->base);
160 nvgpu_kmem_cache_free(a->chunk_cache, chunk);
161 } 157 }
162 158
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl);
163 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 160 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
164} 161}
165 162
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a,
243} 240}
244 241
245/* 242/*
246 * This expects @alloc to have 1 empty page_alloc_chunk already added to the 243 * This expects @alloc to have 1 empty sgl_entry ready for usage.
247 * alloc_chunks list.
248 */ 244 */
249static int __do_slab_alloc(struct nvgpu_page_allocator *a, 245static int __do_slab_alloc(struct nvgpu_page_allocator *a,
250 struct page_alloc_slab *slab, 246 struct page_alloc_slab *slab,
251 struct nvgpu_page_alloc *alloc) 247 struct nvgpu_page_alloc *alloc)
252{ 248{
253 struct page_alloc_slab_page *slab_page = NULL; 249 struct page_alloc_slab_page *slab_page = NULL;
254 struct page_alloc_chunk *chunk; 250 struct nvgpu_mem_sgl *sgl;
255 unsigned long offs; 251 unsigned long offs;
256 252
257 /* 253 /*
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
302 BUG(); /* Should be impossible to hit this. */ 298 BUG(); /* Should be impossible to hit this. */
303 299
304 /* 300 /*
305 * Handle building the nvgpu_page_alloc struct. We expect one 301 * Handle building the nvgpu_page_alloc struct. We expect one sgl
306 * page_alloc_chunk to be present. 302 * to be present.
307 */ 303 */
308 alloc->slab_page = slab_page; 304 alloc->slab_page = slab_page;
309 alloc->nr_chunks = 1; 305 alloc->nr_chunks = 1;
310 alloc->length = slab_page->slab_size; 306 alloc->length = slab_page->slab_size;
311 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
312 308
313 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 309 sgl = alloc->sgl;
314 page_alloc_chunk, list_entry); 310 sgl->phys = alloc->base;
315 chunk->base = alloc->base; 311 sgl->dma = alloc->base;
316 chunk->length = alloc->length; 312 sgl->length = alloc->length;
313 sgl->next = NULL;
317 314
318 return 0; 315 return 0;
319} 316}
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
327 int err, slab_nr; 324 int err, slab_nr;
328 struct page_alloc_slab *slab; 325 struct page_alloc_slab *slab;
329 struct nvgpu_page_alloc *alloc = NULL; 326 struct nvgpu_page_alloc *alloc = NULL;
330 struct page_alloc_chunk *chunk = NULL; 327 struct nvgpu_mem_sgl *sgl = NULL;
331 328
332 /* 329 /*
333 * Align the length to a page and then divide by the page size (4k for 330 * Align the length to a page and then divide by the page size (4k for
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
341 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
342 goto fail; 339 goto fail;
343 } 340 }
344 chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); 341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
345 if (!chunk) { 342 if (!sgl) {
346 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); 343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
347 goto fail; 344 goto fail;
348 } 345 }
349 346
350 nvgpu_init_list_node(&alloc->alloc_chunks); 347 alloc->sgl = sgl;
351 nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks);
352
353 err = __do_slab_alloc(a, slab, alloc); 348 err = __do_slab_alloc(a, slab, alloc);
354 if (err) 349 if (err)
355 goto fail; 350 goto fail;
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
363fail: 358fail:
364 if (alloc) 359 if (alloc)
365 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 360 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
366 if (chunk) 361 if (sgl)
367 nvgpu_kmem_cache_free(a->chunk_cache, chunk); 362 nvgpu_kfree(a->owner->g, sgl);
368 return NULL; 363 return NULL;
369} 364}
370 365
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
426 struct nvgpu_page_allocator *a, u64 pages) 421 struct nvgpu_page_allocator *a, u64 pages)
427{ 422{
428 struct nvgpu_page_alloc *alloc; 423 struct nvgpu_page_alloc *alloc;
429 struct page_alloc_chunk *c; 424 struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
430 u64 max_chunk_len = pages << a->page_shift; 425 u64 max_chunk_len = pages << a->page_shift;
431 int i = 0; 426 int i = 0;
432 427
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
436 431
437 memset(alloc, 0, sizeof(*alloc)); 432 memset(alloc, 0, sizeof(*alloc));
438 433
439 nvgpu_init_list_node(&alloc->alloc_chunks);
440 alloc->length = pages << a->page_shift; 434 alloc->length = pages << a->page_shift;
441 435
442 while (pages) { 436 while (pages) {
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
482 goto fail_cleanup; 476 goto fail_cleanup;
483 } 477 }
484 478
485 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 479 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
486 if (!c) { 480 if (!sgl) {
487 nvgpu_free(&a->source_allocator, chunk_addr); 481 nvgpu_free(&a->source_allocator, chunk_addr);
488 goto fail_cleanup; 482 goto fail_cleanup;
489 } 483 }
490 484
491 pages -= chunk_pages; 485 pages -= chunk_pages;
492 486
493 c->base = chunk_addr; 487 sgl->phys = chunk_addr;
494 c->length = chunk_len; 488 sgl->dma = chunk_addr;
495 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 489 sgl->length = chunk_len;
490
491 /*
492 * Build the singly linked list with a head node that is part of
493 * the list.
494 */
495 if (prev_sgl)
496 prev_sgl->next = sgl;
497 else
498 alloc->sgl = sgl;
499
500 prev_sgl = sgl;
496 501
497 i++; 502 i++;
498 } 503 }
499 504
500 alloc->nr_chunks = i; 505 alloc->nr_chunks = i;
501 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 506 alloc->base = alloc->sgl->phys;
502 page_alloc_chunk, list_entry);
503 alloc->base = c->base;
504 507
505 return alloc; 508 return alloc;
506 509
507fail_cleanup: 510fail_cleanup:
508 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 511 sgl = alloc->sgl;
509 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 512 while (sgl) {
510 page_alloc_chunk, list_entry); 513 struct nvgpu_mem_sgl *next = sgl->next;
511 nvgpu_list_del(&c->list_entry); 514
512 nvgpu_free(&a->source_allocator, c->base); 515 nvgpu_free(&a->source_allocator, sgl->phys);
513 nvgpu_kmem_cache_free(a->chunk_cache, c); 516 nvgpu_kfree(a->owner->g, sgl);
517
518 sgl = next;
514 } 519 }
520
515 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 521 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
516fail: 522fail:
517 return NULL; 523 return NULL;
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
521 struct nvgpu_page_allocator *a, u64 len) 527 struct nvgpu_page_allocator *a, u64 len)
522{ 528{
523 struct nvgpu_page_alloc *alloc = NULL; 529 struct nvgpu_page_alloc *alloc = NULL;
524 struct page_alloc_chunk *c; 530 struct nvgpu_mem_sgl *sgl;
525 u64 pages; 531 u64 pages;
526 int i = 0; 532 int i = 0;
527 533
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
536 542
537 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
538 pages << a->page_shift, pages, alloc->base); 544 pages << a->page_shift, pages, alloc->base);
539 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 545 sgl = alloc->sgl;
540 page_alloc_chunk, list_entry) { 546 while (sgl) {
541 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
542 i++, c->base, c->length); 548 i++,
549 nvgpu_mem_sgl_phys(sgl),
550 nvgpu_mem_sgl_length(sgl));
551 sgl = sgl->next;
543 } 552 }
553 palloc_dbg(a, "Alloc done\n");
544 554
545 return alloc; 555 return alloc;
546} 556}
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
638 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) 648 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
639{ 649{
640 struct nvgpu_page_alloc *alloc; 650 struct nvgpu_page_alloc *alloc;
641 struct page_alloc_chunk *c; 651 struct nvgpu_mem_sgl *sgl;
642 652
643 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); 653 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
644 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 654 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
645 if (!alloc || !c) 655 if (!alloc || !sgl)
646 goto fail; 656 goto fail;
647 657
648 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
653 663
654 alloc->nr_chunks = 1; 664 alloc->nr_chunks = 1;
655 alloc->length = length; 665 alloc->length = length;
656 nvgpu_init_list_node(&alloc->alloc_chunks); 666 alloc->sgl = sgl;
657 667
658 c->base = alloc->base; 668 sgl->phys = alloc->base;
659 c->length = length; 669 sgl->dma = alloc->base;
660 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 670 sgl->length = length;
671 sgl->next = NULL;
661 672
662 return alloc; 673 return alloc;
663 674
664fail: 675fail:
665 if (c) 676 if (sgl)
666 nvgpu_kmem_cache_free(a->chunk_cache, c); 677 nvgpu_kfree(a->owner->g, sgl);
667 if (alloc) 678 if (alloc)
668 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 679 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
669 return NULL; 680 return NULL;
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
677{ 688{
678 struct nvgpu_page_allocator *a = page_allocator(__a); 689 struct nvgpu_page_allocator *a = page_allocator(__a);
679 struct nvgpu_page_alloc *alloc = NULL; 690 struct nvgpu_page_alloc *alloc = NULL;
680 struct page_alloc_chunk *c; 691 struct nvgpu_mem_sgl *sgl;
681 u64 aligned_len, pages; 692 u64 aligned_len, pages;
682 int i = 0; 693 int i = 0;
683 694
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
697 708
698 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
699 alloc->base, aligned_len, pages); 710 alloc->base, aligned_len, pages);
700 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 711 sgl = alloc->sgl;
701 page_alloc_chunk, list_entry) { 712 while (sgl) {
702 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
703 i++, c->base, c->length); 714 i++,
715 nvgpu_mem_sgl_phys(sgl),
716 nvgpu_mem_sgl_length(sgl));
717 sgl = sgl->next;
704 } 718 }
705 719
706 a->nr_fixed_allocs++; 720 a->nr_fixed_allocs++;
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
896 910
897 a->alloc_cache = nvgpu_kmem_cache_create(g, 911 a->alloc_cache = nvgpu_kmem_cache_create(g,
898 sizeof(struct nvgpu_page_alloc)); 912 sizeof(struct nvgpu_page_alloc));
899 a->chunk_cache = nvgpu_kmem_cache_create(g,
900 sizeof(struct page_alloc_chunk));
901 a->slab_page_cache = nvgpu_kmem_cache_create(g, 913 a->slab_page_cache = nvgpu_kmem_cache_create(g,
902 sizeof(struct page_alloc_slab_page)); 914 sizeof(struct page_alloc_slab_page));
903 if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { 915 if (!a->alloc_cache || !a->slab_page_cache) {
904 err = -ENOMEM; 916 err = -ENOMEM;
905 goto fail; 917 goto fail;
906 } 918 }
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
941fail: 953fail:
942 if (a->alloc_cache) 954 if (a->alloc_cache)
943 nvgpu_kmem_cache_destroy(a->alloc_cache); 955 nvgpu_kmem_cache_destroy(a->alloc_cache);
944 if (a->chunk_cache)
945 nvgpu_kmem_cache_destroy(a->chunk_cache);
946 if (a->slab_page_cache) 956 if (a->slab_page_cache)
947 nvgpu_kmem_cache_destroy(a->slab_page_cache); 957 nvgpu_kmem_cache_destroy(a->slab_page_cache);
948 nvgpu_kfree(g, a); 958 nvgpu_kfree(g, a);
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index 425bfdb4..bb7d930e 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct page_alloc_chunk *chunk = NULL; 87 struct nvgpu_mem_sgl *sgl;
88 u32 byteoff, start_reg, until_end, n; 88 u32 byteoff, start_reg, until_end, n;
89 89
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 91 sgl = alloc->sgl;
92 page_alloc_chunk, list_entry) { 92 while (sgl) {
93 if (offset >= chunk->length) 93 if (offset >= nvgpu_mem_sgl_length(sgl)) {
94 offset -= chunk->length; 94 offset -= nvgpu_mem_sgl_length(sgl);
95 else 95 sgl = sgl->next;
96 } else {
96 break; 97 break;
98 }
97 } 99 }
98 100
99 while (size) { 101 while (size) {
100 byteoff = g->ops.pramin.enter(g, mem, chunk, 102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl);
103
104 byteoff = g->ops.pramin.enter(g, mem, sgl,
101 offset / sizeof(u32)); 105 offset / sizeof(u32));
102 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
103 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 107 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
104 108
105 n = min3(size, until_end, (u32)(chunk->length - offset)); 109 n = min3(size, until_end, (u32)(sgl_len - offset));
106 110
107 loop(g, start_reg, n / sizeof(u32), arg); 111 loop(g, start_reg, n / sizeof(u32), arg);
108 112
109 /* read back to synchronize accesses */ 113 /* read back to synchronize accesses */
110 gk20a_readl(g, start_reg); 114 gk20a_readl(g, start_reg);
111 g->ops.pramin.exit(g, mem, chunk); 115 g->ops.pramin.exit(g, mem, sgl);
112 116
113 size -= n; 117 size -= n;
114 118
115 if (n == (chunk->length - offset)) { 119 if (n == (sgl_len - offset)) {
116 chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, 120 sgl = nvgpu_mem_sgl_next(sgl);
117 list_entry);
118 offset = 0; 121 offset = 0;
119 } else { 122 } else {
120 offset += n; 123 offset += n;