From 17c581d75514c32d1e8c1e416beb33b3ccce22a5 Mon Sep 17 00:00:00 2001
From: Sunny He <suhe@nvidia.com>
Date: Tue, 15 Aug 2017 12:01:04 -0700
Subject: gpu: nvgpu: SGL passthrough implementation

The basic nvgpu_mem_sgl implementation provides support
for OS specific scatter-gather list implementations by
simply copying them node by node. This is inefficient,
taking extra time and memory.

This patch implements an nvgpu_mem_sgt struct to act as
a header which is inserted at the front of any scatter-
gather list implementation. This labels every struct
with a set of ops which can be used to interact with
the attached scatter gather list.

Since nvgpu common code only has to interact with these
function pointers, any sgl implementation can be used.
Initialization only requires the allocation of a single
struct, removing the need to copy or iterate through the
sgl being converted.

Jira NVGPU-186

Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541426
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/gk20a.h        |  8 ++++----
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c     | 12 +++++++-----
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h     |  2 +-
 drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | 10 +++++-----
 drivers/gpu/nvgpu/gk20a/pramin_gk20a.h |  4 ++--
 5 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/nvgpu/gk20a')

diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 355228db..13c62691 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,7 +34,7 @@ struct gk20a_debug_output;
 struct nvgpu_clk_pll_debug_data;
 struct nvgpu_nvhost_dev;
 struct nvgpu_cpu_time_correlation_sample;
-struct nvgpu_mem_sgl;
+struct nvgpu_mem_sgt;
 
 #include <nvgpu/lock.h>
 #include <nvgpu/thread.h>
@@ -700,7 +700,7 @@ struct gpu_ops {
 		bool (*support_sparse)(struct gk20a *g);
 		u64 (*gmmu_map)(struct vm_gk20a *vm,
 				u64 map_offset,
-				struct nvgpu_mem_sgl *sgl,
+				struct nvgpu_sgt *sgt,
 				u64 buffer_offset,
 				u64 size,
 				int pgsz_idx,
@@ -760,9 +760,9 @@ struct gpu_ops {
 				size_t size);
 	struct {
 		u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
-			     struct nvgpu_mem_sgl *sgl, u32 w);
+			struct nvgpu_sgt *sgt, void *sgl, u32 w);
 		void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
-			     struct nvgpu_mem_sgl *sgl);
+			void *sgl);
 		u32 (*data032_r)(u32 i);
 	} pramin;
 	struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cd34e769..0e0326dd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 	struct gk20a_fence *gk20a_fence_out = NULL;
 	struct gk20a_fence *gk20a_last_fence = NULL;
 	struct nvgpu_page_alloc *alloc = NULL;
-	struct nvgpu_mem_sgl *sgl = NULL;
+	struct nvgpu_sgt *sgt = NULL;
+	void *sgl = NULL;
 	int err = 0;
 
 	if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,7 +1160,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 
 	alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
 
-	sgl = alloc->sgl;
+	sgt = &alloc->sgt;
+	sgl = sgt->sgl;
 	while (sgl) {
 		if (gk20a_last_fence)
 			gk20a_fence_put(gk20a_last_fence);
@@ -1167,8 +1169,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 		err = gk20a_ce_execute_ops(g,
 			g->mm.vidmem.ce_ctx_id,
 			0,
-			nvgpu_mem_sgl_phys(sgl),
-			nvgpu_mem_sgl_length(sgl),
+			nvgpu_sgt_get_phys(sgt, sgl),
+			nvgpu_sgt_get_length(sgt, sgl),
 			0x00000000,
 			NVGPU_CE_DST_LOCATION_LOCAL_FB,
 			NVGPU_CE_MEMSET,
@@ -1183,7 +1185,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 		}
 
 		gk20a_last_fence = gk20a_fence_out;
-		sgl = nvgpu_mem_sgl_next(sgl);
+		sgl = nvgpu_sgt_get_next(sgt, sgl);
 	}
 
 	if (gk20a_last_fence) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2fdc1729..9c5e0fae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -361,7 +361,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 			  u64 map_offset,
-			  struct nvgpu_mem_sgl *sgl,
+			  struct nvgpu_sgt *sgt,
 			  u64 buffer_offset,
 			  u64 size,
 			  int pgsz_idx,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 8a34a63c..aaba4ffc 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
 
 /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
 u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
-		       struct nvgpu_mem_sgl *sgl, u32 w)
+		       struct nvgpu_sgt *sgt, void *sgl, u32 w)
 {
-	u64 bufbase = nvgpu_mem_sgl_phys(sgl);
+	u64 bufbase = nvgpu_sgt_get_phys(sgt, sgl);
 	u64 addr = bufbase + w * sizeof(u32);
 	u32 hi = (u32)((addr & ~(u64)0xfffff)
 		>> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -41,8 +41,8 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
 	gk20a_dbg(gpu_dbg_mem,
 			"0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
 			hi, lo, mem, sgl, bufbase,
-			bufbase + nvgpu_mem_sgl_phys(sgl),
-			nvgpu_mem_sgl_length(sgl));
+			bufbase + nvgpu_sgt_get_phys(sgt, sgl),
+			nvgpu_sgt_get_length(sgt, sgl));
 
 	WARN_ON(!bufbase);
 
@@ -58,7 +58,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
 }
 
 void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
-		       struct nvgpu_mem_sgl *sgl)
+		       void *sgl)
 {
 	gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
 
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index fc5ba919..29e76978 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -22,7 +22,7 @@ struct nvgpu_mem;
 struct nvgpu_mem_sgl;
 
 u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
-		       struct nvgpu_mem_sgl *sgl, u32 w);
+		       struct nvgpu_sgt *sgt, void *sgl, u32 w);
 void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
-		       struct nvgpu_mem_sgl *sgl);
+		       void *sgl);
 #endif
-- 
cgit v1.2.2