From 17c581d75514c32d1e8c1e416beb33b3ccce22a5 Mon Sep 17 00:00:00 2001
From: Sunny He <suhe@nvidia.com>
Date: Tue, 15 Aug 2017 12:01:04 -0700
Subject: gpu: nvgpu: SGL passthrough implementation

The basic nvgpu_mem_sgl implementation provides support
for OS specific scatter-gather list implementations by
simply copying them node by node. This is inefficient,
taking extra time and memory.

This patch implements an nvgpu_mem_sgt struct to act as
a header which is inserted at the front of any scatter-
gather list implementation. This labels every struct
with a set of ops which can be used to interact with
the attached scatter gather list.

Since nvgpu common code only has to interact with these
function pointers, any sgl implementation can be used.
Initialization only requires the allocation of a single
struct, removing the need to copy or iterate through the
sgl being converted.

Jira NVGPU-186

Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541426
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/linux/nvgpu_mem.c   | 144 ++++++++++++---------------
 drivers/gpu/nvgpu/common/linux/vm.c          |  17 ++--
 drivers/gpu/nvgpu/common/mm/gmmu.c           |  67 +++++++------
 drivers/gpu/nvgpu/common/mm/nvgpu_mem.c      |  47 +++------
 drivers/gpu/nvgpu/common/mm/page_allocator.c | 114 +++++++++++++++++----
 drivers/gpu/nvgpu/common/pramin.c            |  21 ++--
 6 files changed, 225 insertions(+), 185 deletions(-)

(limited to 'drivers/gpu/nvgpu/common')

diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index eb54f3fd..8d8909dd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
 	return 0;
 }
 
-static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
-						 struct nvgpu_mem_sgl *sgl)
+static void *nvgpu_mem_linux_sgl_next(void *sgl)
 {
-	struct nvgpu_mem_sgl *head, *next;
+	return sg_next((struct scatterlist *)sgl);
+}
 
-	head = nvgpu_kzalloc(g, sizeof(*sgl));
-	if (!head)
-		return NULL;
+static u64 nvgpu_mem_linux_sgl_phys(void *sgl)
+{
+	return (u64)sg_phys((struct scatterlist *)sgl);
+}
 
-	next = head;
-	while (true) {
-		nvgpu_log(g, gpu_dbg_sgl,
-			  "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
-			  sgl->phys, sgl->dma, sgl->length);
-
-		next->dma    = sgl->dma;
-		next->phys   = sgl->phys;
-		next->length = sgl->length;
-		next->next   = NULL;
-
-		sgl = nvgpu_mem_sgl_next(sgl);
-		if (!sgl)
-			break;
-
-		next->next = nvgpu_kzalloc(g, sizeof(*sgl));
-		if (!next->next) {
-			nvgpu_mem_sgl_free(g, head);
-			return NULL;
-		}
-		next = next->next;
-	}
+static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
+{
+	return (u64)sg_dma_address((struct scatterlist *)sgl);
+}
 
-	return head;
+static u64 nvgpu_mem_linux_sgl_length(void *sgl)
+{
+	return (u64)((struct scatterlist *)sgl)->length;
 }
 
-static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
+static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl,
+					struct nvgpu_gmmu_attrs *attrs)
+{
+	if (sg_dma_address((struct scatterlist *)sgl) == 0)
+		return g->ops.mm.gpu_phys_addr(g, attrs,
+			sg_phys((struct scatterlist *)sgl));
+
+	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
+		return 0;
+
+	return gk20a_mm_smmu_vaddr_translate(g,
+			sg_dma_address((struct scatterlist *)sgl));
+}
+
+static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+	/*
+	 * Free this SGT. All we do is free the passed SGT. The actual Linux
+	 * SGT/SGL needs to be freed separately.
+	 */
+	nvgpu_kfree(g, sgt);
+}
+
+static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
+	.sgl_next     = nvgpu_mem_linux_sgl_next,
+	.sgl_phys     = nvgpu_mem_linux_sgl_phys,
+	.sgl_dma      = nvgpu_mem_linux_sgl_dma,
+	.sgl_length   = nvgpu_mem_linux_sgl_length,
+	.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
+	.sgt_free     = nvgpu_mem_linux_sgl_free,
+};
+
+static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
 	struct gk20a *g,
 	struct scatterlist *linux_sgl)
 {
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
 	if (!vidmem_alloc)
 		return NULL;
 
-	nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
-
-	return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
+	return &vidmem_alloc->sgt;
 }
 
-struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
-					   struct sg_table *sgt)
+struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
 {
-	struct nvgpu_mem_sgl *head, *sgl, *next;
+	struct nvgpu_sgt *nvgpu_sgt;
 	struct scatterlist *linux_sgl = sgt->sgl;
 
 	if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
-		return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
+		return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
 
-	head = nvgpu_kzalloc(g, sizeof(*sgl));
-	if (!head)
+	nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
+	if (!nvgpu_sgt)
 		return NULL;
 
-	nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
-
-	sgl = head;
-	while (true) {
-		sgl->dma    = sg_dma_address(linux_sgl);
-		sgl->phys   = sg_phys(linux_sgl);
-		sgl->length = linux_sgl->length;
-
-		/*
-		 * We don't like offsets in the pages here. This will cause
-		 * problems.
-		 */
-		if (WARN_ON(linux_sgl->offset)) {
-			nvgpu_mem_sgl_free(g, head);
-			return NULL;
-		}
-
-		nvgpu_log(g, gpu_dbg_sgl,
-			  "  phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
-			  sgl->phys, sgl->dma, sgl->length);
-
-		/*
-		 * When there's no more SGL ents for the Linux SGL we are
-		 * done. Don't bother making any more SGL ents for the nvgpu
-		 * SGL.
-		 */
-		linux_sgl = sg_next(linux_sgl);
-		if (!linux_sgl)
-			break;
-
-		next = nvgpu_kzalloc(g, sizeof(*sgl));
-		if (!next) {
-			nvgpu_mem_sgl_free(g, head);
-			return NULL;
-		}
-
-		sgl->next = next;
-		sgl = next;
-	}
+	nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
+
+	nvgpu_sgt->sgl = sgt->sgl;
+	nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
 
-	nvgpu_log(g, gpu_dbg_sgl, "Done!");
-	return head;
+	return nvgpu_sgt;
 }
 
-struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
-						    struct nvgpu_mem *mem)
+struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
+					    struct nvgpu_mem *mem)
 {
-	return nvgpu_mem_sgl_create(g, mem->priv.sgt);
+	return nvgpu_linux_sgt_create(g, mem->priv.sgt);
 }
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 4a4429dc..2e29f0f7 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
 
 	if (aperture == APERTURE_VIDMEM) {
 		struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
-		struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
+		struct nvgpu_sgt *sgt = &alloc->sgt;
+		void *sgl_vid = sgt->sgl;
 
 		while (sgl_vid) {
 			chunk_align = 1ULL <<
-				__ffs(nvgpu_mem_sgl_phys(sgl_vid) |
-				nvgpu_mem_sgl_length(sgl_vid));
+				__ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) |
+				nvgpu_sgt_get_length(sgt, sgl_vid);
 
 			if (align)
 				align = min(align, chunk_align);
 			else
 				align = chunk_align;
 
-			sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
+			sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid);
 		}
 
 		return align;
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
 	struct nvgpu_vm_area *vm_area = NULL;
 	u32 ctag_offset;
 	enum nvgpu_aperture aperture;
-	struct nvgpu_mem_sgl *nvgpu_sgl;
+	struct nvgpu_sgt *nvgpu_sgt;
 
 	/*
 	 * The kind used as part of the key for map caching. HW may
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
 		ctag_offset += buffer_offset >>
 			       ilog2(g->ops.fb.compression_page_size(g));
 
-	nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
+	nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt);
 
 	/* update gmmu ptes */
 	map_offset = g->ops.mm.gmmu_map(vm,
 					map_offset,
-					nvgpu_sgl,
+					nvgpu_sgt,
 					buffer_offset, /* sg offset */
 					mapping_size,
 					bfr.pgsz_idx,
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
 	if (!map_offset)
 		goto clean_up;
 
-	nvgpu_mem_sgl_free(g, nvgpu_sgl);
+	nvgpu_sgt_free(nvgpu_sgt, g);
 
 	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
 	if (!mapped_buffer) {
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 41f5acdd..66bce8f0 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 	u64 vaddr;
 
-	struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
+	struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem);
 
-	if (!sgl)
+	if (!sgt)
 		return -ENOMEM;
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	vaddr = g->ops.mm.gmmu_map(vm, addr,
-				   sgl,    /* sg list */
+				   sgt,    /* sg list */
 				   0,      /* sg offset */
 				   size,
 				   gmmu_page_size_kernel,
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 				   aperture);
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 
-	nvgpu_mem_sgl_free(g, sgl);
+	nvgpu_sgt_free(sgt, g);
 
 	if (!vaddr) {
 		nvgpu_err(g, "failed to map buffer!");
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
  * VIDMEM version of the update_ptes logic.
  */
 static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
-						 struct nvgpu_mem_sgl *sgl,
+						 struct nvgpu_sgt *sgt,
 						 u64 space_to_skip,
 						 u64 virt_addr,
 						 u64 length,
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
 {
 	u64 phys_addr, chunk_length;
 	int err = 0;
+	void *sgl;
 
-	if (!sgl) {
+	if (!sgt) {
 		/*
 		 * This is considered an unmap. Just pass in 0 as the physical
 		 * address for the entire GPU range.
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
 	 * Otherwise iterate across all the chunks in this allocation and
 	 * map them.
 	 */
+	sgl = sgt->sgl;
 	while (sgl) {
 		if (space_to_skip &&
-		    space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
-			space_to_skip -= nvgpu_mem_sgl_length(sgl);
-			sgl = nvgpu_mem_sgl_next(sgl);
+		    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+			space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+			sgl = nvgpu_sgt_get_next(sgt, sgl);
 			continue;
 		}
 
-		phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
-		chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
+		phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
+		chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) -
 					    space_to_skip));
 
 		err = __set_pd_level(vm, &vm->pdb,
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
 		 */
 		virt_addr += chunk_length;
 		length    -= chunk_length;
+		sgl = nvgpu_sgt_get_next(sgt, sgl);
 
 		if (length == 0)
 			break;
-
-		sgl = nvgpu_mem_sgl_next(sgl);
 	}
 
 	return err;
 }
 
 static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
-						 struct nvgpu_mem_sgl *sgl,
+						 struct nvgpu_sgt *sgt,
 						 u64 space_to_skip,
 						 u64 virt_addr,
 						 u64 length,
 						 struct nvgpu_gmmu_attrs *attrs)
 {
-	int err;
 	struct gk20a *g = gk20a_from_vm(vm);
+	void *sgl;
+	int err;
 
-	if (!sgl) {
+	if (!sgt) {
 		/*
 		 * This is considered an unmap. Just pass in 0 as the physical
 		 * address for the entire GPU range.
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
 	 * mapping is simple since the "physical" address is actually a virtual
 	 * IO address and will be contiguous.
 	 */
+	sgl = sgt->sgl;
+
 	if (!g->mm.bypass_smmu) {
-		u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
+		u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs);
 
 		io_addr += space_to_skip;
 
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
 		 * Cut out sgl ents for space_to_skip.
 		 */
 		if (space_to_skip &&
-		    space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
-			space_to_skip -= nvgpu_mem_sgl_length(sgl);
-			sgl = nvgpu_mem_sgl_next(sgl);
+		    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+			space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+			sgl = nvgpu_sgt_get_next(sgt, sgl);
 			continue;
 		}
 
-		phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
+		phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
 		chunk_length = min(length,
-				   nvgpu_mem_sgl_length(sgl) - space_to_skip);
+			nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
 
 		err = __set_pd_level(vm, &vm->pdb,
 				     0,
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
 		space_to_skip = 0;
 		virt_addr += chunk_length;
 		length    -= chunk_length;
-		sgl        = nvgpu_mem_sgl_next(sgl);
+		sgl        = nvgpu_sgt_get_next(sgt, sgl);
 
 		if (length == 0)
 			break;
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
  *     case of SMMU usage.
  */
 static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
-					  struct nvgpu_mem_sgl *sgl,
+					  struct nvgpu_sgt *sgt,
 					  u64 space_to_skip,
 					  u64 virt_addr,
 					  u64 length,
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 		   "phys offset: %#-4llx;  pgsz: %3dkb perm=%-2s | "
 		   "kind=%#02x APT=%-6s %c%c%c%c%c",
 		   vm->name,
-		   sgl ? "MAP" : "UNMAP",
+		   sgt ? "MAP" : "UNMAP",
 		   virt_addr,
 		   length,
-		   sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
+		   sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0,
 		   space_to_skip,
 		   page_size >> 10,
 		   nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 	 */
 	if (attrs->aperture == APERTURE_VIDMEM)
 		err = __nvgpu_gmmu_update_page_table_vidmem(vm,
-							    sgl,
+							    sgt,
 							    space_to_skip,
 							    virt_addr,
 							    length,
 							    attrs);
 	else
 		err = __nvgpu_gmmu_update_page_table_sysmem(vm,
-							    sgl,
+							    sgt,
 							    space_to_skip,
 							    virt_addr,
 							    length,
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
 	unmap_gmmu_pages(g, &vm->pdb);
 	nvgpu_smp_mb();
 
-	__gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
+	__gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP");
 
 	return err;
 }
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
  */
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 			  u64 vaddr,
-			  struct nvgpu_mem_sgl *sgl,
+			  struct nvgpu_sgt *sgt,
 			  u64 buffer_offset,
 			  u64 size,
 			  int pgsz_idx,
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 		allocated = true;
 	}
 
-	err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
+	err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
 					     vaddr, size, &attrs);
 	if (err) {
 		nvgpu_err(g, "failed to update ptes on map");
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 		batch->need_tlb_invalidate = true;
 
 	return vaddr;
+
 fail_validate:
 	if (allocated)
 		__nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 7296c673..6decec24 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -19,55 +19,34 @@
 
 #include "gk20a/gk20a.h"
 
-struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl)
+void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
 {
-	return sgl->next;
+	return sgt->ops->sgl_next(sgl);
 }
 
-u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl)
+u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl)
 {
-	return sgl->phys;
+	return sgt->ops->sgl_phys(sgl);
 }
 
-u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl)
+u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl)
 {
-	return sgl->dma;
+	return sgt->ops->sgl_dma(sgl);
 }
 
-u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl)
+u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl)
 {
-	return sgl->length;
+	return sgt->ops->sgl_length(sgl);
 }
 
-/*
- * This builds a GPU address for the %sgl based on whether an IOMMU is present
- * or not. It also handles turning the physical address into the true GPU
- * physical address that should be programmed into the page tables.
- */
-u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
+u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl,
 			   struct nvgpu_gmmu_attrs *attrs)
 {
-	if (nvgpu_mem_sgl_dma(sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, attrs,
-					       nvgpu_mem_sgl_phys(sgl));
-
-	if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
-		return 0;
-
-	return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
+	return sgt->ops->sgl_gpu_addr(g, sgl, attrs);
 }
 
-void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
+void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g)
 {
-	struct nvgpu_mem_sgl *next;
-
-	/*
-	 * Free each of the elements. We expect each element to have been
-	 * nvgpu_k[mz]alloc()ed.
-	 */
-	while (sgl) {
-		next = nvgpu_mem_sgl_next(sgl);
-		nvgpu_kfree(g, sgl);
-		sgl = next;
-	}
+	if (sgt && sgt->ops->sgt_free)
+		sgt->ops->sgt_free(g, sgt);
 }
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 6d92b457..9c35f528 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a,
 	nvgpu_alloc_release_carveout(&va->source_allocator, co);
 }
 
+static void *nvgpu_page_alloc_sgl_next(void *sgl)
+{
+	struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+
+	return nvgpu_sgl->next;
+}
+
+static u64 nvgpu_page_alloc_sgl_phys(void *sgl)
+{
+	struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+
+	return nvgpu_sgl->phys;
+}
+
+static u64 nvgpu_page_alloc_sgl_dma(void *sgl)
+{
+	struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+
+	return nvgpu_sgl->dma;
+}
+
+static u64 nvgpu_page_alloc_sgl_length(void *sgl)
+{
+	struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+
+	return nvgpu_sgl->length;
+}
+
+static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl,
+					 struct nvgpu_gmmu_attrs *attrs)
+{
+	struct nvgpu_mem_sgl *nvgpu_sgl = sgl;
+
+	return nvgpu_sgl->phys;
+}
+
+static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+	/*
+	 * No-op here. The free is handled by the page_alloc free() functions.
+	 */
+}
+
+/*
+ * These implement the generic scatter gather ops for pages allocated
+ * by the page allocator. however, the primary aim for this, is of course,
+ * vidmem.
+ */
+static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
+	.sgl_next = nvgpu_page_alloc_sgl_next,
+	.sgl_phys = nvgpu_page_alloc_sgl_phys,
+	.sgl_dma = nvgpu_page_alloc_sgl_dma,
+	.sgl_length = nvgpu_page_alloc_sgl_length,
+	.sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
+	.sgt_free = nvgpu_page_alloc_sgt_free,
+};
+
+/*
+ * This actually frees the sgl memory. Used by the page_alloc free() functions.
+ */
+static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
+					     struct nvgpu_mem_sgl *sgl)
+{
+	struct nvgpu_mem_sgl *next;
+
+	while (sgl) {
+		next = sgl->next;
+		nvgpu_kfree(g, sgl);
+		sgl = next;
+	}
+}
+
 static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
 			       struct nvgpu_page_alloc *alloc,
 			       bool free_buddy_alloc)
 {
-	struct nvgpu_mem_sgl *sgl = alloc->sgl;
+	struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl;
 
 	if (free_buddy_alloc) {
 		while (sgl) {
-			nvgpu_free(&a->source_allocator, sgl->phys);
-			sgl = nvgpu_mem_sgl_next(sgl);
+			nvgpu_free(&a->source_allocator,
+				   nvgpu_sgt_get_phys(&alloc->sgt, sgl));
+			sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
 		}
 	}
 
-	nvgpu_mem_sgl_free(a->owner->g, alloc->sgl);
+	nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl);
 	nvgpu_kmem_cache_free(a->alloc_cache, alloc);
 }
 
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
 	alloc->length = slab_page->slab_size;
 	alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
 
-	sgl         = alloc->sgl;
+	sgl         = alloc->sgt.sgl;
 	sgl->phys   = alloc->base;
 	sgl->dma    = alloc->base;
 	sgl->length = alloc->length;
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
 		palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
 		goto fail;
 	}
+
+	alloc->sgt.ops = &page_alloc_sgl_ops;
+
 	sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
 	if (!sgl) {
 		palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
 		goto fail;
 	}
 
-	alloc->sgl = sgl;
+	alloc->sgt.sgl = sgl;
 	err = __do_slab_alloc(a, slab, alloc);
 	if (err)
 		goto fail;
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
 	memset(alloc, 0, sizeof(*alloc));
 
 	alloc->length = pages << a->page_shift;
+	alloc->sgt.ops = &page_alloc_sgl_ops;
 
 	while (pages) {
 		u64 chunk_addr = 0;
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
 		if (prev_sgl)
 			prev_sgl->next = sgl;
 		else
-			alloc->sgl = sgl;
+			alloc->sgt.sgl = sgl;
 
 		prev_sgl = sgl;
 
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
 	}
 
 	alloc->nr_chunks = i;
-	alloc->base = alloc->sgl->phys;
+	alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
 
 	return alloc;
 
 fail_cleanup:
-	sgl = alloc->sgl;
+	sgl = alloc->sgt.sgl;
 	while (sgl) {
 		struct nvgpu_mem_sgl *next = sgl->next;
 
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
 
 	palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
 		   pages << a->page_shift, pages, alloc->base);
-	sgl = alloc->sgl;
+	sgl = alloc->sgt.sgl;
 	while (sgl) {
 		palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
 			   i++,
-			   nvgpu_mem_sgl_phys(sgl),
-			   nvgpu_mem_sgl_length(sgl));
-		sgl = sgl->next;
+			   nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+			   nvgpu_sgt_get_length(&alloc->sgt, sgl));
+		sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
 	}
 	palloc_dbg(a, "Alloc done\n");
 
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
 	if (!alloc || !sgl)
 		goto fail;
 
+	alloc->sgt.ops = &page_alloc_sgl_ops;
 	alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
 	if (!alloc->base) {
 		WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
 
 	alloc->nr_chunks = 1;
 	alloc->length = length;
-	alloc->sgl = sgl;
+	alloc->sgt.sgl = sgl;
 
 	sgl->phys   = alloc->base;
 	sgl->dma    = alloc->base;
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
 
 	palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
 		   alloc->base, aligned_len, pages);
-	sgl = alloc->sgl;
+	sgl = alloc->sgt.sgl;
 	while (sgl) {
 		palloc_dbg(a, "  Chunk %2d: 0x%010llx + 0x%llx\n",
 			   i++,
-			   nvgpu_mem_sgl_phys(sgl),
-			   nvgpu_mem_sgl_length(sgl));
-		sgl = sgl->next;
+			   nvgpu_sgt_get_phys(&alloc->sgt, sgl),
+			   nvgpu_sgt_get_length(&alloc->sgt, sgl));
+		sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
 	}
 
 	a->nr_fixed_allocs++;
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index bb7d930e..ae9c9b1f 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
 {
 	struct nvgpu_page_alloc *alloc = NULL;
-	struct nvgpu_mem_sgl *sgl;
+	struct nvgpu_sgt *sgt;
+	void *sgl;
 	u32 byteoff, start_reg, until_end, n;
 
 	alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
-	sgl = alloc->sgl;
-	while (sgl) {
-		if (offset >= nvgpu_mem_sgl_length(sgl)) {
-			offset -= nvgpu_mem_sgl_length(sgl);
-			sgl = sgl->next;
-		} else {
+	sgt = &alloc->sgt;
+	for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) {
+		if (offset >= nvgpu_sgt_get_length(sgt, sgl))
+			offset -= nvgpu_sgt_get_length(sgt, sgl);
+		else
 			break;
-		}
 	}
 
 	while (size) {
-		u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl);
+		u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
 
-		byteoff = g->ops.pramin.enter(g, mem, sgl,
+		byteoff = g->ops.pramin.enter(g, mem, sgt, sgl,
 					      offset / sizeof(u32));
 		start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
 		until_end = SZ_1M - (byteoff & (SZ_1M - 1));
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
 		size -= n;
 
 		if (n == (sgl_len - offset)) {
-			sgl = nvgpu_mem_sgl_next(sgl);
+			sgl = nvgpu_sgt_get_next(sgt, sgl);
 			offset = 0;
 		} else {
 			offset += n;
-- 
cgit v1.2.2