summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-05-25 19:56:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:52:48 -0400
commit0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers
parente32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu driver from being platform agnostic is the usage of Linux scattergather tables and scattergather lists. These data structures are used throughout the mapping code to handle discontiguous DMA allocations and also overloaded to represent VIDMEM allocs. The notion of a scatter gather table is crucial to a HW device that can handle discontiguous DMA. The GPU has a MMU which allows the GPU to do page gathering and present a virtually contiguous buffer to the GPU HW. As a result it makes sense for the GPU driver to use some sort of scatter gather concept so maximize memory usage efficiency. To that end this patch keeps the notion of a scatter gather list but implements it in the nvgpu common code. It is based heavily on the Linux SGL concept. It is a singly linked list of blocks - each representing a chunk of memory. To map or use a DMA allocation SW must iterate over each block in the SGL. This patch implements the most basic level of support for this data structure. There are certainly easy optimizations that could be done to speed up the current implementation. However, this patches' goal is to simply divest the core MM code from any last Linux'isms. Speed and efficiency come next. Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1530867 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c114
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c25
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c109
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c73
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c142
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h43
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/log.h1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h45
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/page_allocator.h22
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c55
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c4
20 files changed, 474 insertions, 241 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index d02870fb..6e475fcb 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -55,6 +55,7 @@ nvgpu-y := \
55 common/mm/pd_cache.o \ 55 common/mm/pd_cache.o \
56 common/mm/vm.o \ 56 common/mm/vm.o \
57 common/mm/vm_area.o \ 57 common/mm/vm_area.o \
58 common/mm/nvgpu_mem.o \
58 common/bus.o \ 59 common/bus.o \
59 common/enabled.o \ 60 common/enabled.o \
60 common/pramin.o \ 61 common/pramin.o \
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
21#include <nvgpu/log.h> 21#include <nvgpu/log.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
24 25
25#include <nvgpu/linux/dma.h> 26#include <nvgpu/linux/dma.h>
26 27
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
395 396
396 return 0; 397 return 0;
397} 398}
399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
401 struct nvgpu_mem_sgl *sgl)
402{
403 struct nvgpu_mem_sgl *head, *next;
404
405 head = nvgpu_kzalloc(g, sizeof(*sgl));
406 if (!head)
407 return NULL;
408
409 next = head;
410 while (true) {
411 nvgpu_log(g, gpu_dbg_sgl,
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431
432 return head;
433}
434
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
436 struct gk20a *g,
437 struct scatterlist *linux_sgl)
438{
439 struct nvgpu_page_alloc *vidmem_alloc;
440
441 vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
442 if (!vidmem_alloc)
443 return NULL;
444
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448}
449
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
451 struct sg_table *sgt)
452{
453 struct nvgpu_mem_sgl *head, *sgl, *next;
454 struct scatterlist *linux_sgl = sgt->sgl;
455
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
458
459 head = nvgpu_kzalloc(g, sizeof(*sgl));
460 if (!head)
461 return NULL;
462
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
464
465 sgl = head;
466 while (true) {
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502
503 nvgpu_log(g, gpu_dbg_sgl, "Done!");
504 return head;
505}
506
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem)
509{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt);
511}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h> 22#include <nvgpu/rbtree.h>
23#include <nvgpu/vm_area.h> 23#include <nvgpu/vm_area.h>
24#include <nvgpu/nvgpu_mem.h>
24#include <nvgpu/page_allocator.h> 25#include <nvgpu/page_allocator.h>
25 26
27#include <nvgpu/linux/nvgpu_mem.h>
28
26#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 30#include "gk20a/mm_gk20a.h"
28#include "gk20a/kind_gk20a.h" 31#include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
66 69
67 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
68 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
69 struct page_alloc_chunk *chunk = NULL; 72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
70 73
71 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 74 while (sgl_vid) {
72 page_alloc_chunk, list_entry) { 75 chunk_align = 1ULL <<
73 chunk_align = 1ULL << __ffs(chunk->base | 76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
74 chunk->length); 77 nvgpu_mem_sgl_length(sgl_vid));
75 78
76 if (align) 79 if (align)
77 align = min(align, chunk_align); 80 align = min(align, chunk_align);
78 else 81 else
79 align = chunk_align; 82 align = chunk_align;
83
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
80 } 85 }
81 86
82 return align; 87 return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
237 struct nvgpu_vm_area *vm_area = NULL; 242 struct nvgpu_vm_area *vm_area = NULL;
238 u32 ctag_offset; 243 u32 ctag_offset;
239 enum nvgpu_aperture aperture; 244 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl;
240 246
241 /* 247 /*
242 * The kind used as part of the key for map caching. HW may 248 * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
393 ctag_offset += buffer_offset >> 399 ctag_offset += buffer_offset >>
394 ilog2(g->ops.fb.compression_page_size(g)); 400 ilog2(g->ops.fb.compression_page_size(g));
395 401
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
403
396 /* update gmmu ptes */ 404 /* update gmmu ptes */
397 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 405 map_offset = g->ops.mm.gmmu_map(vm,
398 bfr.sgt, 406 map_offset,
407 nvgpu_sgl,
399 buffer_offset, /* sg offset */ 408 buffer_offset, /* sg offset */
400 mapping_size, 409 mapping_size,
401 bfr.pgsz_idx, 410 bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
410 if (!map_offset) 419 if (!map_offset)
411 goto clean_up; 420 goto clean_up;
412 421
422 nvgpu_mem_sgl_free(g, nvgpu_sgl);
423
413 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
414 if (!mapped_buffer) { 425 if (!mapped_buffer) {
415 nvgpu_warn(g, "oom allocating tracking buffer"); 426 nvgpu_warn(g, "oom allocating tracking buffer");
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 7f486d68..41f5acdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct sg_table *sgt = mem->priv.sgt; 68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
69
70 if (!sgl)
71 return -ENOMEM;
69 72
70 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
71 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
72 sgt, /* sg table */ 75 sgl, /* sg list */
73 0, /* sg offset */ 76 0, /* sg offset */
74 size, 77 size,
75 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
82 NULL, /* mapping_batch handle */ 85 NULL, /* mapping_batch handle */
83 aperture); 86 aperture);
84 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88
89 nvgpu_mem_sgl_free(g, sgl);
90
85 if (!vaddr) { 91 if (!vaddr) {
86 nvgpu_err(g, "failed to allocate va space"); 92 nvgpu_err(g, "failed to map buffer!");
87 return 0; 93 return 0;
88 } 94 }
89 95
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
91} 97}
92 98
93/* 99/*
94 * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. 100 * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
95 */ 101 */
96u64 nvgpu_gmmu_map(struct vm_gk20a *vm, 102u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
97 struct nvgpu_mem *mem, 103 struct nvgpu_mem *mem,
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
106} 112}
107 113
108/* 114/*
109 * Like nvgpu_gmmu_map() except it can work on a fixed address instead. 115 * Like nvgpu_gmmu_map() except this can work on a fixed address.
110 */ 116 */
111u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, 117u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
112 struct nvgpu_mem *mem, 118 struct nvgpu_mem *mem,
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
407 */ 413 */
408 target_addr = next_pd ? 414 target_addr = next_pd ?
409 nvgpu_pde_phys_addr(g, next_pd) : 415 nvgpu_pde_phys_addr(g, next_pd) :
410 g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); 416 phys_addr;
411 417
412 l->update_entry(vm, l, 418 l->update_entry(vm, l,
413 pd, pd_idx, 419 pd, pd_idx,
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm,
458 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
459 */ 465 */
460static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
461 struct sg_table *sgt, 467 struct nvgpu_mem_sgl *sgl,
462 u64 space_to_skip, 468 u64 space_to_skip,
463 u64 virt_addr, 469 u64 virt_addr,
464 u64 length, 470 u64 length,
465 struct nvgpu_gmmu_attrs *attrs) 471 struct nvgpu_gmmu_attrs *attrs)
466{ 472{
467 struct nvgpu_page_alloc *alloc = NULL;
468 struct page_alloc_chunk *chunk = NULL;
469 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
470 int err = 0; 474 int err = 0;
471 475
472 if (!sgt) { 476 if (!sgl) {
473 /* 477 /*
474 * This is considered an unmap. Just pass in 0 as the physical 478 * This is considered an unmap. Just pass in 0 as the physical
475 * address for the entire GPU range. 479 * address for the entire GPU range.
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
482 return err; 486 return err;
483 } 487 }
484 488
485 alloc = get_vidmem_page_alloc(sgt->sgl);
486
487 /* 489 /*
488 * Otherwise iterate across all the chunks in this allocation and 490 * Otherwise iterate across all the chunks in this allocation and
489 * map them. 491 * map them.
490 */ 492 */
491 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 493 while (sgl) {
492 page_alloc_chunk, list_entry) {
493 if (space_to_skip && 494 if (space_to_skip &&
494 space_to_skip >= chunk->length) { 495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
495 space_to_skip -= chunk->length; 496 space_to_skip -= nvgpu_mem_sgl_length(sgl);
497 sgl = nvgpu_mem_sgl_next(sgl);
496 continue; 498 continue;
497 } 499 }
498 500
499 phys_addr = chunk->base + space_to_skip; 501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
500 chunk_length = min(length, (chunk->length - space_to_skip)); 502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
503 space_to_skip));
501 504
502 err = __set_pd_level(vm, &vm->pdb, 505 err = __set_pd_level(vm, &vm->pdb,
503 0, 506 0,
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 521
519 if (length == 0) 522 if (length == 0)
520 break; 523 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
521 } 526 }
522 527
523 return err; 528 return err;
524} 529}
525 530
526static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
527 struct sg_table *sgt, 532 struct nvgpu_mem_sgl *sgl,
528 u64 space_to_skip, 533 u64 space_to_skip,
529 u64 virt_addr, 534 u64 virt_addr,
530 u64 length, 535 u64 length,
531 struct nvgpu_gmmu_attrs *attrs) 536 struct nvgpu_gmmu_attrs *attrs)
532{ 537{
533 int err; 538 int err;
534 struct scatterlist *sgl;
535 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
536 540
537 if (!sgt) { 541 if (!sgl) {
538 /* 542 /*
539 * This is considered an unmap. Just pass in 0 as the physical 543 * This is considered an unmap. Just pass in 0 as the physical
540 * address for the entire GPU range. 544 * address for the entire GPU range.
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
548 } 552 }
549 553
550 /* 554 /*
551 * At this point we have a Linux scatter-gather list pointing to some 555 * At this point we have a scatter-gather list pointing to some number
552 * number of discontiguous chunks of memory. Iterate over that list and 556 * of discontiguous chunks of memory. We must iterate over that list and
553 * generate a GMMU map call for each chunk. There are two possibilities: 557 * generate a GMMU map call for each chunk. There are two possibilities:
554 * either the IOMMU is enabled or not. When the IOMMU is enabled the 558 * either an IOMMU is enabled or not. When an IOMMU is enabled the
555 * mapping is simple since the "physical" address is actually a virtual 559 * mapping is simple since the "physical" address is actually a virtual
556 * IO address and will be contiguous. The no-IOMMU case is more 560 * IO address and will be contiguous.
557 * complicated. We will have to iterate over the SGT and do a separate
558 * map for each chunk of the SGT.
559 */ 561 */
560 sgl = sgt->sgl;
561
562 if (!g->mm.bypass_smmu) { 562 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); 563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
564 564
565 io_addr += space_to_skip; 565 io_addr += space_to_skip;
566 566
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
585 /* 585 /*
586 * Cut out sgl ents for space_to_skip. 586 * Cut out sgl ents for space_to_skip.
587 */ 587 */
588 if (space_to_skip && space_to_skip >= sgl->length) { 588 if (space_to_skip &&
589 space_to_skip -= sgl->length; 589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
590 sgl = sg_next(sgl); 590 space_to_skip -= nvgpu_mem_sgl_length(sgl);
591 sgl = nvgpu_mem_sgl_next(sgl);
591 continue; 592 continue;
592 } 593 }
593 594
594 phys_addr = sg_phys(sgl) + space_to_skip; 595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
595 chunk_length = min(length, sgl->length - space_to_skip); 596 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip);
596 598
597 err = __set_pd_level(vm, &vm->pdb, 599 err = __set_pd_level(vm, &vm->pdb,
598 0, 600 0,
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
600 virt_addr, 602 virt_addr,
601 chunk_length, 603 chunk_length,
602 attrs); 604 attrs);
603 if (err)
604 return err;
605 605
606 space_to_skip = 0; 606 space_to_skip = 0;
607 virt_addr += chunk_length; 607 virt_addr += chunk_length;
608 length -= chunk_length; 608 length -= chunk_length;
609 sgl = sg_next(sgl); 609 sgl = nvgpu_mem_sgl_next(sgl);
610 610
611 if (length == 0) 611 if (length == 0)
612 break; 612 break;
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
624 * implementations. But the logic around that is generic to all chips. Every 624 * implementations. But the logic around that is generic to all chips. Every
625 * chip has some number of PDE levels and then a PTE level. 625 * chip has some number of PDE levels and then a PTE level.
626 * 626 *
627 * Each chunk of the incoming SGT is sent to the chip specific implementation 627 * Each chunk of the incoming SGL is sent to the chip specific implementation
628 * of page table update. 628 * of page table update.
629 * 629 *
630 * [*] Note: the "physical" address may actually be an IO virtual address in the 630 * [*] Note: the "physical" address may actually be an IO virtual address in the
631 * case of SMMU usage. 631 * case of SMMU usage.
632 */ 632 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct sg_table *sgt, 634 struct nvgpu_mem_sgl *sgl,
635 u64 space_to_skip, 635 u64 space_to_skip,
636 u64 virt_addr, 636 u64 virt_addr,
637 u64 length, 637 u64 length,
638 struct nvgpu_gmmu_attrs *attrs) 638 struct nvgpu_gmmu_attrs *attrs)
639{ 639{
640 struct gk20a *g = gk20a_from_vm(vm); 640 struct gk20a *g = gk20a_from_vm(vm);
641 struct nvgpu_page_alloc *alloc;
642 u64 phys_addr = 0;
643 u32 page_size; 641 u32 page_size;
644 int err; 642 int err;
645 643
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
665 return err; 663 return err;
666 } 664 }
667 665
668 if (sgt) {
669 if (attrs->aperture == APERTURE_VIDMEM) {
670 alloc = get_vidmem_page_alloc(sgt->sgl);
671
672 phys_addr = alloc->base;
673 } else
674 phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
675 }
676
677 __gmmu_dbg(g, attrs, 666 __gmmu_dbg(g, attrs,
678 "vm=%s " 667 "vm=%s "
679 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " 668 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx "
680 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
681 "kind=%#02x APT=%-6s %c%c%c%c%c", 670 "kind=%#02x APT=%-6s %c%c%c%c%c",
682 vm->name, 671 vm->name,
683 sgt ? "MAP" : "UNMAP", 672 sgl ? "MAP" : "UNMAP",
684 virt_addr, 673 virt_addr,
685 length, 674 length,
686 phys_addr, 675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
687 space_to_skip, 676 space_to_skip,
688 page_size >> 10, 677 page_size >> 10,
689 nvgpu_gmmu_perm_str(attrs->rw_flag), 678 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
696 attrs->valid ? 'V' : '-'); 685 attrs->valid ? 'V' : '-');
697 686
698 /* 687 /*
699 * Handle VIDMEM progamming. Currently uses a different scatter list 688 * For historical reasons these are separate, but soon these will be
700 * format. 689 * unified.
701 */ 690 */
702 if (attrs->aperture == APERTURE_VIDMEM) 691 if (attrs->aperture == APERTURE_VIDMEM)
703 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 692 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
704 sgt, 693 sgl,
705 space_to_skip, 694 space_to_skip,
706 virt_addr, 695 virt_addr,
707 length, 696 length,
708 attrs); 697 attrs);
709 else 698 else
710 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 699 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
711 sgt, 700 sgl,
712 space_to_skip, 701 space_to_skip,
713 virt_addr, 702 virt_addr,
714 length, 703 length,
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
717 unmap_gmmu_pages(g, &vm->pdb); 706 unmap_gmmu_pages(g, &vm->pdb);
718 nvgpu_smp_mb(); 707 nvgpu_smp_mb();
719 708
720 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); 709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
721 710
722 return err; 711 return err;
723} 712}
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
736 */ 725 */
737u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
738 u64 vaddr, 727 u64 vaddr,
739 struct sg_table *sgt, 728 struct nvgpu_mem_sgl *sgl,
740 u64 buffer_offset, 729 u64 buffer_offset,
741 u64 size, 730 u64 size,
742 int pgsz_idx, 731 int pgsz_idx,
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
785 allocated = true; 774 allocated = true;
786 } 775 }
787 776
788 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, 777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
789 vaddr, size, &attrs); 778 vaddr, size, &attrs);
790 if (err) { 779 if (err) {
791 nvgpu_err(g, "failed to update ptes on map"); 780 nvgpu_err(g, "failed to update ptes on map");
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
new file mode 100644
index 00000000..7296c673
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -0,0 +1,73 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/kmem.h>
18#include <nvgpu/nvgpu_mem.h>
19
20#include "gk20a/gk20a.h"
21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl)
23{
24 return sgl->next;
25}
26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl)
28{
29 return sgl->phys;
30}
31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl)
33{
34 return sgl->dma;
35}
36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl)
38{
39 return sgl->length;
40}
41
42/*
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs)
49{
50 if (nvgpu_mem_sgl_dma(sgl) == 0)
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58}
59
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
61{
62 struct nvgpu_mem_sgl *next;
63
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 72ff8f2d..6d92b457 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 147 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 148 bool free_buddy_alloc)
149{ 149{
150 struct page_alloc_chunk *chunk; 150 struct nvgpu_mem_sgl *sgl = alloc->sgl;
151 151
152 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 152 if (free_buddy_alloc) {
153 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 153 while (sgl) {
154 page_alloc_chunk, 154 nvgpu_free(&a->source_allocator, sgl->phys);
155 list_entry); 155 sgl = nvgpu_mem_sgl_next(sgl);
156 nvgpu_list_del(&chunk->list_entry); 156 }
157
158 if (free_buddy_alloc)
159 nvgpu_free(&a->source_allocator, chunk->base);
160 nvgpu_kmem_cache_free(a->chunk_cache, chunk);
161 } 157 }
162 158
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl);
163 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 160 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
164} 161}
165 162
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a,
243} 240}
244 241
245/* 242/*
246 * This expects @alloc to have 1 empty page_alloc_chunk already added to the 243 * This expects @alloc to have 1 empty sgl_entry ready for usage.
247 * alloc_chunks list.
248 */ 244 */
249static int __do_slab_alloc(struct nvgpu_page_allocator *a, 245static int __do_slab_alloc(struct nvgpu_page_allocator *a,
250 struct page_alloc_slab *slab, 246 struct page_alloc_slab *slab,
251 struct nvgpu_page_alloc *alloc) 247 struct nvgpu_page_alloc *alloc)
252{ 248{
253 struct page_alloc_slab_page *slab_page = NULL; 249 struct page_alloc_slab_page *slab_page = NULL;
254 struct page_alloc_chunk *chunk; 250 struct nvgpu_mem_sgl *sgl;
255 unsigned long offs; 251 unsigned long offs;
256 252
257 /* 253 /*
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
302 BUG(); /* Should be impossible to hit this. */ 298 BUG(); /* Should be impossible to hit this. */
303 299
304 /* 300 /*
305 * Handle building the nvgpu_page_alloc struct. We expect one 301 * Handle building the nvgpu_page_alloc struct. We expect one sgl
306 * page_alloc_chunk to be present. 302 * to be present.
307 */ 303 */
308 alloc->slab_page = slab_page; 304 alloc->slab_page = slab_page;
309 alloc->nr_chunks = 1; 305 alloc->nr_chunks = 1;
310 alloc->length = slab_page->slab_size; 306 alloc->length = slab_page->slab_size;
311 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
312 308
313 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 309 sgl = alloc->sgl;
314 page_alloc_chunk, list_entry); 310 sgl->phys = alloc->base;
315 chunk->base = alloc->base; 311 sgl->dma = alloc->base;
316 chunk->length = alloc->length; 312 sgl->length = alloc->length;
313 sgl->next = NULL;
317 314
318 return 0; 315 return 0;
319} 316}
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
327 int err, slab_nr; 324 int err, slab_nr;
328 struct page_alloc_slab *slab; 325 struct page_alloc_slab *slab;
329 struct nvgpu_page_alloc *alloc = NULL; 326 struct nvgpu_page_alloc *alloc = NULL;
330 struct page_alloc_chunk *chunk = NULL; 327 struct nvgpu_mem_sgl *sgl = NULL;
331 328
332 /* 329 /*
333 * Align the length to a page and then divide by the page size (4k for 330 * Align the length to a page and then divide by the page size (4k for
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
341 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
342 goto fail; 339 goto fail;
343 } 340 }
344 chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); 341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
345 if (!chunk) { 342 if (!sgl) {
346 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); 343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
347 goto fail; 344 goto fail;
348 } 345 }
349 346
350 nvgpu_init_list_node(&alloc->alloc_chunks); 347 alloc->sgl = sgl;
351 nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks);
352
353 err = __do_slab_alloc(a, slab, alloc); 348 err = __do_slab_alloc(a, slab, alloc);
354 if (err) 349 if (err)
355 goto fail; 350 goto fail;
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
363fail: 358fail:
364 if (alloc) 359 if (alloc)
365 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 360 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
366 if (chunk) 361 if (sgl)
367 nvgpu_kmem_cache_free(a->chunk_cache, chunk); 362 nvgpu_kfree(a->owner->g, sgl);
368 return NULL; 363 return NULL;
369} 364}
370 365
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
426 struct nvgpu_page_allocator *a, u64 pages) 421 struct nvgpu_page_allocator *a, u64 pages)
427{ 422{
428 struct nvgpu_page_alloc *alloc; 423 struct nvgpu_page_alloc *alloc;
429 struct page_alloc_chunk *c; 424 struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
430 u64 max_chunk_len = pages << a->page_shift; 425 u64 max_chunk_len = pages << a->page_shift;
431 int i = 0; 426 int i = 0;
432 427
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
436 431
437 memset(alloc, 0, sizeof(*alloc)); 432 memset(alloc, 0, sizeof(*alloc));
438 433
439 nvgpu_init_list_node(&alloc->alloc_chunks);
440 alloc->length = pages << a->page_shift; 434 alloc->length = pages << a->page_shift;
441 435
442 while (pages) { 436 while (pages) {
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
482 goto fail_cleanup; 476 goto fail_cleanup;
483 } 477 }
484 478
485 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 479 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
486 if (!c) { 480 if (!sgl) {
487 nvgpu_free(&a->source_allocator, chunk_addr); 481 nvgpu_free(&a->source_allocator, chunk_addr);
488 goto fail_cleanup; 482 goto fail_cleanup;
489 } 483 }
490 484
491 pages -= chunk_pages; 485 pages -= chunk_pages;
492 486
493 c->base = chunk_addr; 487 sgl->phys = chunk_addr;
494 c->length = chunk_len; 488 sgl->dma = chunk_addr;
495 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 489 sgl->length = chunk_len;
490
491 /*
492 * Build the singly linked list with a head node that is part of
493 * the list.
494 */
495 if (prev_sgl)
496 prev_sgl->next = sgl;
497 else
498 alloc->sgl = sgl;
499
500 prev_sgl = sgl;
496 501
497 i++; 502 i++;
498 } 503 }
499 504
500 alloc->nr_chunks = i; 505 alloc->nr_chunks = i;
501 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 506 alloc->base = alloc->sgl->phys;
502 page_alloc_chunk, list_entry);
503 alloc->base = c->base;
504 507
505 return alloc; 508 return alloc;
506 509
507fail_cleanup: 510fail_cleanup:
508 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 511 sgl = alloc->sgl;
509 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 512 while (sgl) {
510 page_alloc_chunk, list_entry); 513 struct nvgpu_mem_sgl *next = sgl->next;
511 nvgpu_list_del(&c->list_entry); 514
512 nvgpu_free(&a->source_allocator, c->base); 515 nvgpu_free(&a->source_allocator, sgl->phys);
513 nvgpu_kmem_cache_free(a->chunk_cache, c); 516 nvgpu_kfree(a->owner->g, sgl);
517
518 sgl = next;
514 } 519 }
520
515 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 521 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
516fail: 522fail:
517 return NULL; 523 return NULL;
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
521 struct nvgpu_page_allocator *a, u64 len) 527 struct nvgpu_page_allocator *a, u64 len)
522{ 528{
523 struct nvgpu_page_alloc *alloc = NULL; 529 struct nvgpu_page_alloc *alloc = NULL;
524 struct page_alloc_chunk *c; 530 struct nvgpu_mem_sgl *sgl;
525 u64 pages; 531 u64 pages;
526 int i = 0; 532 int i = 0;
527 533
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
536 542
537 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
538 pages << a->page_shift, pages, alloc->base); 544 pages << a->page_shift, pages, alloc->base);
539 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 545 sgl = alloc->sgl;
540 page_alloc_chunk, list_entry) { 546 while (sgl) {
541 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
542 i++, c->base, c->length); 548 i++,
549 nvgpu_mem_sgl_phys(sgl),
550 nvgpu_mem_sgl_length(sgl));
551 sgl = sgl->next;
543 } 552 }
553 palloc_dbg(a, "Alloc done\n");
544 554
545 return alloc; 555 return alloc;
546} 556}
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
638 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) 648 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
639{ 649{
640 struct nvgpu_page_alloc *alloc; 650 struct nvgpu_page_alloc *alloc;
641 struct page_alloc_chunk *c; 651 struct nvgpu_mem_sgl *sgl;
642 652
643 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); 653 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
644 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 654 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
645 if (!alloc || !c) 655 if (!alloc || !sgl)
646 goto fail; 656 goto fail;
647 657
648 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
653 663
654 alloc->nr_chunks = 1; 664 alloc->nr_chunks = 1;
655 alloc->length = length; 665 alloc->length = length;
656 nvgpu_init_list_node(&alloc->alloc_chunks); 666 alloc->sgl = sgl;
657 667
658 c->base = alloc->base; 668 sgl->phys = alloc->base;
659 c->length = length; 669 sgl->dma = alloc->base;
660 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 670 sgl->length = length;
671 sgl->next = NULL;
661 672
662 return alloc; 673 return alloc;
663 674
664fail: 675fail:
665 if (c) 676 if (sgl)
666 nvgpu_kmem_cache_free(a->chunk_cache, c); 677 nvgpu_kfree(a->owner->g, sgl);
667 if (alloc) 678 if (alloc)
668 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 679 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
669 return NULL; 680 return NULL;
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
677{ 688{
678 struct nvgpu_page_allocator *a = page_allocator(__a); 689 struct nvgpu_page_allocator *a = page_allocator(__a);
679 struct nvgpu_page_alloc *alloc = NULL; 690 struct nvgpu_page_alloc *alloc = NULL;
680 struct page_alloc_chunk *c; 691 struct nvgpu_mem_sgl *sgl;
681 u64 aligned_len, pages; 692 u64 aligned_len, pages;
682 int i = 0; 693 int i = 0;
683 694
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
697 708
698 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
699 alloc->base, aligned_len, pages); 710 alloc->base, aligned_len, pages);
700 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 711 sgl = alloc->sgl;
701 page_alloc_chunk, list_entry) { 712 while (sgl) {
702 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
703 i++, c->base, c->length); 714 i++,
715 nvgpu_mem_sgl_phys(sgl),
716 nvgpu_mem_sgl_length(sgl));
717 sgl = sgl->next;
704 } 718 }
705 719
706 a->nr_fixed_allocs++; 720 a->nr_fixed_allocs++;
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
896 910
897 a->alloc_cache = nvgpu_kmem_cache_create(g, 911 a->alloc_cache = nvgpu_kmem_cache_create(g,
898 sizeof(struct nvgpu_page_alloc)); 912 sizeof(struct nvgpu_page_alloc));
899 a->chunk_cache = nvgpu_kmem_cache_create(g,
900 sizeof(struct page_alloc_chunk));
901 a->slab_page_cache = nvgpu_kmem_cache_create(g, 913 a->slab_page_cache = nvgpu_kmem_cache_create(g,
902 sizeof(struct page_alloc_slab_page)); 914 sizeof(struct page_alloc_slab_page));
903 if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { 915 if (!a->alloc_cache || !a->slab_page_cache) {
904 err = -ENOMEM; 916 err = -ENOMEM;
905 goto fail; 917 goto fail;
906 } 918 }
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
941fail: 953fail:
942 if (a->alloc_cache) 954 if (a->alloc_cache)
943 nvgpu_kmem_cache_destroy(a->alloc_cache); 955 nvgpu_kmem_cache_destroy(a->alloc_cache);
944 if (a->chunk_cache)
945 nvgpu_kmem_cache_destroy(a->chunk_cache);
946 if (a->slab_page_cache) 956 if (a->slab_page_cache)
947 nvgpu_kmem_cache_destroy(a->slab_page_cache); 957 nvgpu_kmem_cache_destroy(a->slab_page_cache);
948 nvgpu_kfree(g, a); 958 nvgpu_kfree(g, a);
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index 425bfdb4..bb7d930e 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct page_alloc_chunk *chunk = NULL; 87 struct nvgpu_mem_sgl *sgl;
88 u32 byteoff, start_reg, until_end, n; 88 u32 byteoff, start_reg, until_end, n;
89 89
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 91 sgl = alloc->sgl;
92 page_alloc_chunk, list_entry) { 92 while (sgl) {
93 if (offset >= chunk->length) 93 if (offset >= nvgpu_mem_sgl_length(sgl)) {
94 offset -= chunk->length; 94 offset -= nvgpu_mem_sgl_length(sgl);
95 else 95 sgl = sgl->next;
96 } else {
96 break; 97 break;
98 }
97 } 99 }
98 100
99 while (size) { 101 while (size) {
100 byteoff = g->ops.pramin.enter(g, mem, chunk, 102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl);
103
104 byteoff = g->ops.pramin.enter(g, mem, sgl,
101 offset / sizeof(u32)); 105 offset / sizeof(u32));
102 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
103 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 107 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
104 108
105 n = min3(size, until_end, (u32)(chunk->length - offset)); 109 n = min3(size, until_end, (u32)(sgl_len - offset));
106 110
107 loop(g, start_reg, n / sizeof(u32), arg); 111 loop(g, start_reg, n / sizeof(u32), arg);
108 112
109 /* read back to synchronize accesses */ 113 /* read back to synchronize accesses */
110 gk20a_readl(g, start_reg); 114 gk20a_readl(g, start_reg);
111 g->ops.pramin.exit(g, mem, chunk); 115 g->ops.pramin.exit(g, mem, sgl);
112 116
113 size -= n; 117 size -= n;
114 118
115 if (n == (chunk->length - offset)) { 119 if (n == (sgl_len - offset)) {
116 chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, 120 sgl = nvgpu_mem_sgl_next(sgl);
117 list_entry);
118 offset = 0; 121 offset = 0;
119 } else { 122 } else {
120 offset += n; 123 offset += n;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7eee2d51..355228db 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,6 +34,7 @@ struct gk20a_debug_output;
34struct nvgpu_clk_pll_debug_data; 34struct nvgpu_clk_pll_debug_data;
35struct nvgpu_nvhost_dev; 35struct nvgpu_nvhost_dev;
36struct nvgpu_cpu_time_correlation_sample; 36struct nvgpu_cpu_time_correlation_sample;
37struct nvgpu_mem_sgl;
37 38
38#include <nvgpu/lock.h> 39#include <nvgpu/lock.h>
39#include <nvgpu/thread.h> 40#include <nvgpu/thread.h>
@@ -70,8 +71,6 @@ struct nvgpu_cpu_time_correlation_sample;
70#endif 71#endif
71#include "ecc_gk20a.h" 72#include "ecc_gk20a.h"
72 73
73struct page_alloc_chunk;
74
75/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 74/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
76 32 ns is the resolution of ptimer. */ 75 32 ns is the resolution of ptimer. */
77#define PTIMER_REF_FREQ_HZ 31250000 76#define PTIMER_REF_FREQ_HZ 31250000
@@ -701,7 +700,7 @@ struct gpu_ops {
701 bool (*support_sparse)(struct gk20a *g); 700 bool (*support_sparse)(struct gk20a *g);
702 u64 (*gmmu_map)(struct vm_gk20a *vm, 701 u64 (*gmmu_map)(struct vm_gk20a *vm,
703 u64 map_offset, 702 u64 map_offset,
704 struct sg_table *sgt, 703 struct nvgpu_mem_sgl *sgl,
705 u64 buffer_offset, 704 u64 buffer_offset,
706 u64 size, 705 u64 size,
707 int pgsz_idx, 706 int pgsz_idx,
@@ -761,9 +760,9 @@ struct gpu_ops {
761 size_t size); 760 size_t size);
762 struct { 761 struct {
763 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, 762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
764 struct page_alloc_chunk *chunk, u32 w); 763 struct nvgpu_mem_sgl *sgl, u32 w);
765 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, 764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
766 struct page_alloc_chunk *chunk); 765 struct nvgpu_mem_sgl *sgl);
767 u32 (*data032_r)(u32 i); 766 u32 (*data032_r)(u32 i);
768 } pramin; 767 } pramin;
769 struct { 768 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 97b7aa80..cd34e769 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1151 struct gk20a_fence *gk20a_fence_out = NULL; 1151 struct gk20a_fence *gk20a_fence_out = NULL;
1152 struct gk20a_fence *gk20a_last_fence = NULL; 1152 struct gk20a_fence *gk20a_last_fence = NULL;
1153 struct nvgpu_page_alloc *alloc = NULL; 1153 struct nvgpu_page_alloc *alloc = NULL;
1154 struct page_alloc_chunk *chunk = NULL; 1154 struct nvgpu_mem_sgl *sgl = NULL;
1155 int err = 0; 1155 int err = 0;
1156 1156
1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0) 1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,16 +1159,16 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1159 1159
1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
1161 1161
1162 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 1162 sgl = alloc->sgl;
1163 page_alloc_chunk, list_entry) { 1163 while (sgl) {
1164 if (gk20a_last_fence) 1164 if (gk20a_last_fence)
1165 gk20a_fence_put(gk20a_last_fence); 1165 gk20a_fence_put(gk20a_last_fence);
1166 1166
1167 err = gk20a_ce_execute_ops(g, 1167 err = gk20a_ce_execute_ops(g,
1168 g->mm.vidmem.ce_ctx_id, 1168 g->mm.vidmem.ce_ctx_id,
1169 0, 1169 0,
1170 chunk->base, 1170 nvgpu_mem_sgl_phys(sgl),
1171 chunk->length, 1171 nvgpu_mem_sgl_length(sgl),
1172 0x00000000, 1172 0x00000000,
1173 NVGPU_CE_DST_LOCATION_LOCAL_FB, 1173 NVGPU_CE_DST_LOCATION_LOCAL_FB,
1174 NVGPU_CE_MEMSET, 1174 NVGPU_CE_MEMSET,
@@ -1183,6 +1183,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1183 } 1183 }
1184 1184
1185 gk20a_last_fence = gk20a_fence_out; 1185 gk20a_last_fence = gk20a_fence_out;
1186 sgl = nvgpu_mem_sgl_next(sgl);
1186 } 1187 }
1187 1188
1188 if (gk20a_last_fence) { 1189 if (gk20a_last_fence) {
@@ -1262,10 +1263,10 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
1262 return addr; 1263 return addr;
1263} 1264}
1264 1265
1265u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) 1266u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, u64 iova)
1266{ 1267{
1267 /* ensure it is not vidmem allocation */ 1268 /* ensure it is not vidmem allocation */
1268 WARN_ON(is_vidmem_page_alloc((u64)iova)); 1269 WARN_ON(is_vidmem_page_alloc(iova));
1269 1270
1270 if (device_is_iommuable(dev_from_gk20a(g)) && 1271 if (device_is_iommuable(dev_from_gk20a(g)) &&
1271 g->ops.mm.get_physical_addr_bits) 1272 g->ops.mm.get_physical_addr_bits)
@@ -2167,11 +2168,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
2167 return 34; 2168 return 34;
2168} 2169}
2169 2170
2170u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags)
2171{
2172 return phys;
2173}
2174
2175const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, 2171const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
2176 u32 big_page_size) 2172 u32 big_page_size)
2177{ 2173{
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c77bebf8..2fdc1729 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -336,7 +336,6 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
336 336
337int gk20a_mm_suspend(struct gk20a *g); 337int gk20a_mm_suspend(struct gk20a *g);
338 338
339u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags);
340u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); 339u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova);
341 340
342void gk20a_mm_ltc_isr(struct gk20a *g); 341void gk20a_mm_ltc_isr(struct gk20a *g);
@@ -361,29 +360,29 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
361} 360}
362 361
363u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
364 u64 map_offset, 363 u64 map_offset,
365 struct sg_table *sgt, 364 struct nvgpu_mem_sgl *sgl,
366 u64 buffer_offset, 365 u64 buffer_offset,
367 u64 size, 366 u64 size,
368 int pgsz_idx, 367 int pgsz_idx,
369 u8 kind_v, 368 u8 kind_v,
370 u32 ctag_offset, 369 u32 ctag_offset,
371 u32 flags, 370 u32 flags,
372 int rw_flag, 371 int rw_flag,
373 bool clear_ctags, 372 bool clear_ctags,
374 bool sparse, 373 bool sparse,
375 bool priv, 374 bool priv,
376 struct vm_gk20a_mapping_batch *batch, 375 struct vm_gk20a_mapping_batch *batch,
377 enum nvgpu_aperture aperture); 376 enum nvgpu_aperture aperture);
378 377
379void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, 378void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
380 u64 vaddr, 379 u64 vaddr,
381 u64 size, 380 u64 size,
382 int pgsz_idx, 381 int pgsz_idx,
383 bool va_allocated, 382 bool va_allocated,
384 int rw_flag, 383 int rw_flag,
385 bool sparse, 384 bool sparse,
386 struct vm_gk20a_mapping_batch *batch); 385 struct vm_gk20a_mapping_batch *batch);
387 386
388struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); 387struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
389void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 388void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 9d19e9e5..8a34a63c 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
26 26
27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
29 struct page_alloc_chunk *chunk, u32 w) 29 struct nvgpu_mem_sgl *sgl, u32 w)
30{ 30{
31 u64 bufbase = chunk->base; 31 u64 bufbase = nvgpu_mem_sgl_phys(sgl);
32 u64 addr = bufbase + w * sizeof(u32); 32 u64 addr = bufbase + w * sizeof(u32);
33 u32 hi = (u32)((addr & ~(u64)0xfffff) 33 u32 hi = (u32)((addr & ~(u64)0xfffff)
34 >> bus_bar0_window_target_bar0_window_base_shift_v()); 34 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -40,8 +40,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
40 40
41 gk20a_dbg(gpu_dbg_mem, 41 gk20a_dbg(gpu_dbg_mem,
42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", 42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
43 hi, lo, mem, chunk, bufbase, 43 hi, lo, mem, sgl, bufbase,
44 bufbase + chunk->length, chunk->length); 44 bufbase + nvgpu_mem_sgl_phys(sgl),
45 nvgpu_mem_sgl_length(sgl));
45 46
46 WARN_ON(!bufbase); 47 WARN_ON(!bufbase);
47 48
@@ -57,9 +58,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
57} 58}
58 59
59void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
60 struct page_alloc_chunk *chunk) 61 struct nvgpu_mem_sgl *sgl)
61{ 62{
62 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); 63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
63 64
64 nvgpu_spinlock_release(&g->mm.pramin_window_lock); 65 nvgpu_spinlock_release(&g->mm.pramin_window_lock);
65} 66}
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index 1a1ac871..fc5ba919 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -19,10 +19,10 @@
19 19
20struct gk20a; 20struct gk20a;
21struct nvgpu_mem; 21struct nvgpu_mem;
22struct page_alloc_chunk; 22struct nvgpu_mem_sgl;
23 23
24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
25 struct page_alloc_chunk *chunk, u32 w); 25 struct nvgpu_mem_sgl *sgl, u32 w);
26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
27 struct page_alloc_chunk *chunk); 27 struct nvgpu_mem_sgl *sgl);
28#endif 28#endif
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index fc27b120..c276f5a6 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -904,7 +904,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
904 904
905 mem->gpu_va = nvgpu_gmmu_map(vm, 905 mem->gpu_va = nvgpu_gmmu_map(vm,
906 mem, 906 mem,
907 size, 907 mem->aligned_size,
908 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 908 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
909 gk20a_mem_flag_none, 909 gk20a_mem_flag_none,
910 false, 910 false,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index de129a5f..11060300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -27,8 +27,6 @@
27#include <nvgpu/gmmu_t19x.h> 27#include <nvgpu/gmmu_t19x.h>
28#endif 28#endif
29 29
30struct scatterlist;
31
32/* 30/*
33 * This is the GMMU API visible to blocks outside of the GMMU. Basically this 31 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
34 * API supports all the different types of mappings that might be done in the 32 * API supports all the different types of mappings that might be done in the
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
index e2d4d336..f96c2801 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -32,6 +32,8 @@ struct nvgpu_mem_priv {
32}; 32};
33 33
34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); 34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
35struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
36 struct sg_table *sgt);
35 37
36/** 38/**
37 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. 39 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h
index 4cac3e70..cfce8c5b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/log.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/log.h
@@ -71,6 +71,7 @@ enum nvgpu_log_categories {
71 gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */ 71 gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */
72 gpu_dbg_alloc = BIT(21), /* Allocator debugging. */ 72 gpu_dbg_alloc = BIT(21), /* Allocator debugging. */
73 gpu_dbg_dma = BIT(22), /* DMA allocation prints. */ 73 gpu_dbg_dma = BIT(22), /* DMA allocation prints. */
74 gpu_dbg_sgl = BIT(23), /* SGL related traces. */
74 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ 75 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */
75}; 76};
76 77
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index a112623e..7d19cf81 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -33,6 +33,8 @@ struct gk20a;
33struct nvgpu_allocator; 33struct nvgpu_allocator;
34struct nvgpu_gmmu_attrs; 34struct nvgpu_gmmu_attrs;
35 35
36#define NVGPU_MEM_DMA_ERROR (~0ULL)
37
36/* 38/*
37 * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be 39 * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be
38 * told to the gpu about the aperture, but this flag designates where the 40 * told to the gpu about the aperture, but this flag designates where the
@@ -44,6 +46,28 @@ enum nvgpu_aperture {
44 APERTURE_VIDMEM 46 APERTURE_VIDMEM
45}; 47};
46 48
49/*
50 * This struct holds the necessary information for describing a struct
51 * nvgpu_mem's scatter gather list.
52 *
53 * These are created in a platform dependent way. As a result the function
54 * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file.
55 */
56struct nvgpu_mem_sgl {
57 /*
58 * Internally this is implemented as a singly linked list.
59 */
60 struct nvgpu_mem_sgl *next;
61
62 /*
63 * There is both a phys address and a DMA address since some systems,
64 * for example ones with an IOMMU, may see these as different addresses.
65 */
66 u64 phys;
67 u64 dma;
68 u64 length;
69};
70
47struct nvgpu_mem { 71struct nvgpu_mem {
48 /* 72 /*
49 * Populated for all nvgpu_mem structs - vidmem or system. 73 * Populated for all nvgpu_mem structs - vidmem or system.
@@ -176,6 +200,27 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
176 struct nvgpu_mem *dest, struct nvgpu_mem *src, 200 struct nvgpu_mem *dest, struct nvgpu_mem *src,
177 int start_page, int nr_pages); 201 int start_page, int nr_pages);
178 202
203/**
204 * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem.
205 *
206 * @g - The GPU.
207 * @mem - The source memory allocation to use.
208 *
209 * Create a scatter gather list from the passed @mem struct. This list lets the
210 * calling code iterate across each chunk of a DMA allocation for when that DMA
211 * allocation is not completely contiguous.
212 */
213struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
214 struct nvgpu_mem *mem);
215void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl);
216
217struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl);
218u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl);
219u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl);
220u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl);
221u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
222 struct nvgpu_gmmu_attrs *attrs);
223
179/* 224/*
180 * Buffer accessors - wrap between begin() and end() if there is no permanent 225 * Buffer accessors - wrap between begin() and end() if there is no permanent
181 * kernel mapping for this buffer. 226 * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
index 9a5ef8d3..de83ca7f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
@@ -18,6 +18,7 @@
18#define PAGE_ALLOCATOR_PRIV_H 18#define PAGE_ALLOCATOR_PRIV_H
19 19
20#include <nvgpu/allocator.h> 20#include <nvgpu/allocator.h>
21#include <nvgpu/nvgpu_mem.h>
21#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
22#include <nvgpu/list.h> 23#include <nvgpu/list.h>
23#include <nvgpu/rbtree.h> 24#include <nvgpu/rbtree.h>
@@ -83,27 +84,17 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node)
83 ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry)); 84 ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry));
84}; 85};
85 86
86struct page_alloc_chunk {
87 struct nvgpu_list_node list_entry;
88
89 u64 base;
90 u64 length;
91};
92
93static inline struct page_alloc_chunk *
94page_alloc_chunk_from_list_entry(struct nvgpu_list_node *node)
95{
96 return (struct page_alloc_chunk *)
97 ((uintptr_t)node - offsetof(struct page_alloc_chunk, list_entry));
98};
99
100/* 87/*
101 * Struct to handle internal management of page allocation. It holds a list 88 * Struct to handle internal management of page allocation. It holds a list
102 * of the chunks of pages that make up the overall allocation - much like a 89 * of the chunks of pages that make up the overall allocation - much like a
103 * scatter gather table. 90 * scatter gather table.
104 */ 91 */
105struct nvgpu_page_alloc { 92struct nvgpu_page_alloc {
106 struct nvgpu_list_node alloc_chunks; 93 /*
94 * nvgpu_mem_sgl for describing the actual allocation. Convenient for
95 * GMMU mapping.
96 */
97 struct nvgpu_mem_sgl *sgl;
107 98
108 int nr_chunks; 99 int nr_chunks;
109 u64 length; 100 u64 length;
@@ -156,7 +147,6 @@ struct nvgpu_page_allocator {
156 int nr_slabs; 147 int nr_slabs;
157 148
158 struct nvgpu_kmem_cache *alloc_cache; 149 struct nvgpu_kmem_cache *alloc_cache;
159 struct nvgpu_kmem_cache *chunk_cache;
160 struct nvgpu_kmem_cache *slab_page_cache; 150 struct nvgpu_kmem_cache *slab_page_cache;
161 151
162 u64 flags; 152 u64 flags;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 85c436e5..ee9b791a 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -13,7 +13,6 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h" 16#include "vgpu/vgpu.h"
18#include "vgpu_mm_gp10b.h" 17#include "vgpu_mm_gp10b.h"
19#include "gk20a/mm_gk20a.h" 18#include "gk20a/mm_gk20a.h"
@@ -41,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
41 40
42static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, 41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
43 u64 map_offset, 42 u64 map_offset,
44 struct sg_table *sgt, 43 struct nvgpu_mem_sgl *sgl,
45 u64 buffer_offset, 44 u64 buffer_offset,
46 u64 size, 45 u64 size,
47 int pgsz_idx, 46 int pgsz_idx,
@@ -61,10 +60,9 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
61 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; 60 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
62 struct tegra_vgpu_mem_desc *mem_desc; 61 struct tegra_vgpu_mem_desc *mem_desc;
63 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 62 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
63 u64 buffer_size = PAGE_ALIGN(size);
64 u64 space_to_skip = buffer_offset; 64 u64 space_to_skip = buffer_offset;
65 u64 buffer_size = 0;
66 u32 mem_desc_count = 0, i; 65 u32 mem_desc_count = 0, i;
67 struct scatterlist *sgl;
68 void *handle = NULL; 66 void *handle = NULL;
69 size_t oob_size; 67 size_t oob_size;
70 u8 prot; 68 u8 prot;
@@ -73,7 +71,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
73 71
74 /* FIXME: add support for sparse mappings */ 72 /* FIXME: add support for sparse mappings */
75 73
76 if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) 74 if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu))
77 return 0; 75 return 0;
78 76
79 if (space_to_skip & (page_size - 1)) 77 if (space_to_skip & (page_size - 1))
@@ -100,33 +98,36 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
100 goto fail; 98 goto fail;
101 } 99 }
102 100
103 sgl = sgt->sgl; 101 while (sgl) {
104 while (space_to_skip && sgl && 102 u64 phys_addr;
105 (space_to_skip + page_size > sgl->length)) { 103 u64 chunk_length;
106 space_to_skip -= sgl->length; 104
107 sgl = sg_next(sgl); 105 /*
108 } 106 * Cut out sgl ents for space_to_skip.
109 WARN_ON(!sgl); 107 */
108 if (space_to_skip &&
109 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
110 space_to_skip -= nvgpu_mem_sgl_length(sgl);
111 sgl = nvgpu_mem_sgl_next(sgl);
112 continue;
113 }
110 114
111 if (add_mem_desc(&mem_desc[mem_desc_count++], 115 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
112 sg_phys(sgl) + space_to_skip, 116 chunk_length = min(size,
113 sgl->length - space_to_skip, 117 nvgpu_mem_sgl_length(sgl) - space_to_skip);
114 &oob_size)) {
115 err = -ENOMEM;
116 goto fail;
117 }
118 buffer_size += sgl->length - space_to_skip;
119 118
120 sgl = sg_next(sgl); 119 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
121 while (sgl && buffer_size < size) { 120 chunk_length, &oob_size)) {
122 if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl),
123 sgl->length, &oob_size)) {
124 err = -ENOMEM; 121 err = -ENOMEM;
125 goto fail; 122 goto fail;
126 } 123 }
127 124
128 buffer_size += sgl->length; 125 space_to_skip = 0;
129 sgl = sg_next(sgl); 126 size -= chunk_length;
127 sgl = nvgpu_mem_sgl_next(sgl);
128
129 if (size == 0)
130 break;
130 } 131 }
131 132
132 if (rw_flag == gk20a_mem_flag_read_only) 133 if (rw_flag == gk20a_mem_flag_read_only)
@@ -153,7 +154,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
153 msg.handle = vgpu_get_handle(g); 154 msg.handle = vgpu_get_handle(g);
154 p->handle = vm->handle; 155 p->handle = vm->handle;
155 p->gpu_va = map_offset; 156 p->gpu_va = map_offset;
156 p->size = size; 157 p->size = buffer_size;
157 p->mem_desc_count = mem_desc_count; 158 p->mem_desc_count = mem_desc_count;
158 p->pgsz_idx = pgsz_idx; 159 p->pgsz_idx = pgsz_idx;
159 p->iova = 0; 160 p->iova = 0;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ef9e00c8..5da6f158 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78 78
79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, 79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80 u64 map_offset, 80 u64 map_offset,
81 struct sg_table *sgt, 81 struct nvgpu_mem_sgl *sgl,
82 u64 buffer_offset, 82 u64 buffer_offset,
83 u64 size, 83 u64 size,
84 int pgsz_idx, 84 int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99 struct tegra_vgpu_cmd_msg msg; 99 struct tegra_vgpu_cmd_msg msg;
100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101 u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); 101 u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);
102 u8 prot; 102 u8 prot;
103 103
104 gk20a_dbg_fn(""); 104 gk20a_dbg_fn("");