summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorSunny He <suhe@nvidia.com>2017-08-15 15:01:04 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:55:24 -0400
commit17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
treea25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/gk20a
parent0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support for OS specific scatter-gather list implementations by simply copying them node by node. This is inefficient, taking extra time and memory. This patch implements an nvgpu_mem_sgt struct to act as a header which is inserted at the front of any scatter- gather list implementation. This labels every struct with a set of ops which can be used to interact with the attached scatter gather list. Since nvgpu common code only has to interact with these function pointers, any sgl implementation can be used. Initialization only requires the allocation of a single struct, removing the need to copy or iterate through the sgl being converted. Jira NVGPU-186 Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8 Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541426 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c12
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c10
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.h4
5 files changed, 19 insertions, 17 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 355228db..13c62691 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,7 +34,7 @@ struct gk20a_debug_output;
34struct nvgpu_clk_pll_debug_data; 34struct nvgpu_clk_pll_debug_data;
35struct nvgpu_nvhost_dev; 35struct nvgpu_nvhost_dev;
36struct nvgpu_cpu_time_correlation_sample; 36struct nvgpu_cpu_time_correlation_sample;
37struct nvgpu_mem_sgl; 37struct nvgpu_mem_sgt;
38 38
39#include <nvgpu/lock.h> 39#include <nvgpu/lock.h>
40#include <nvgpu/thread.h> 40#include <nvgpu/thread.h>
@@ -700,7 +700,7 @@ struct gpu_ops {
700 bool (*support_sparse)(struct gk20a *g); 700 bool (*support_sparse)(struct gk20a *g);
701 u64 (*gmmu_map)(struct vm_gk20a *vm, 701 u64 (*gmmu_map)(struct vm_gk20a *vm,
702 u64 map_offset, 702 u64 map_offset,
703 struct nvgpu_mem_sgl *sgl, 703 struct nvgpu_sgt *sgt,
704 u64 buffer_offset, 704 u64 buffer_offset,
705 u64 size, 705 u64 size,
706 int pgsz_idx, 706 int pgsz_idx,
@@ -760,9 +760,9 @@ struct gpu_ops {
760 size_t size); 760 size_t size);
761 struct { 761 struct {
762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, 762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
763 struct nvgpu_mem_sgl *sgl, u32 w); 763 struct nvgpu_sgt *sgt, void *sgl, u32 w);
764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, 764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
765 struct nvgpu_mem_sgl *sgl); 765 void *sgl);
766 u32 (*data032_r)(u32 i); 766 u32 (*data032_r)(u32 i);
767 } pramin; 767 } pramin;
768 struct { 768 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cd34e769..0e0326dd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1151 struct gk20a_fence *gk20a_fence_out = NULL; 1151 struct gk20a_fence *gk20a_fence_out = NULL;
1152 struct gk20a_fence *gk20a_last_fence = NULL; 1152 struct gk20a_fence *gk20a_last_fence = NULL;
1153 struct nvgpu_page_alloc *alloc = NULL; 1153 struct nvgpu_page_alloc *alloc = NULL;
1154 struct nvgpu_mem_sgl *sgl = NULL; 1154 struct nvgpu_sgt *sgt = NULL;
1155 void *sgl = NULL;
1155 int err = 0; 1156 int err = 0;
1156 1157
1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0) 1158 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,7 +1160,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1159 1160
1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 1161 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
1161 1162
1162 sgl = alloc->sgl; 1163 sgt = &alloc->sgt;
1164 sgl = sgt->sgl;
1163 while (sgl) { 1165 while (sgl) {
1164 if (gk20a_last_fence) 1166 if (gk20a_last_fence)
1165 gk20a_fence_put(gk20a_last_fence); 1167 gk20a_fence_put(gk20a_last_fence);
@@ -1167,8 +1169,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1167 err = gk20a_ce_execute_ops(g, 1169 err = gk20a_ce_execute_ops(g,
1168 g->mm.vidmem.ce_ctx_id, 1170 g->mm.vidmem.ce_ctx_id,
1169 0, 1171 0,
1170 nvgpu_mem_sgl_phys(sgl), 1172 nvgpu_sgt_get_phys(sgt, sgl),
1171 nvgpu_mem_sgl_length(sgl), 1173 nvgpu_sgt_get_length(sgt, sgl),
1172 0x00000000, 1174 0x00000000,
1173 NVGPU_CE_DST_LOCATION_LOCAL_FB, 1175 NVGPU_CE_DST_LOCATION_LOCAL_FB,
1174 NVGPU_CE_MEMSET, 1176 NVGPU_CE_MEMSET,
@@ -1183,7 +1185,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1183 } 1185 }
1184 1186
1185 gk20a_last_fence = gk20a_fence_out; 1187 gk20a_last_fence = gk20a_fence_out;
1186 sgl = nvgpu_mem_sgl_next(sgl); 1188 sgl = nvgpu_sgt_get_next(sgt, sgl);
1187 } 1189 }
1188 1190
1189 if (gk20a_last_fence) { 1191 if (gk20a_last_fence) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 2fdc1729..9c5e0fae 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -361,7 +361,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
361 361
362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
363 u64 map_offset, 363 u64 map_offset,
364 struct nvgpu_mem_sgl *sgl, 364 struct nvgpu_sgt *sgt,
365 u64 buffer_offset, 365 u64 buffer_offset,
366 u64 size, 366 u64 size,
367 int pgsz_idx, 367 int pgsz_idx,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 8a34a63c..aaba4ffc 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
26 26
27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
29 struct nvgpu_mem_sgl *sgl, u32 w) 29 struct nvgpu_sgt *sgt, void *sgl, u32 w)
30{ 30{
31 u64 bufbase = nvgpu_mem_sgl_phys(sgl); 31 u64 bufbase = nvgpu_sgt_get_phys(sgt, sgl);
32 u64 addr = bufbase + w * sizeof(u32); 32 u64 addr = bufbase + w * sizeof(u32);
33 u32 hi = (u32)((addr & ~(u64)0xfffff) 33 u32 hi = (u32)((addr & ~(u64)0xfffff)
34 >> bus_bar0_window_target_bar0_window_base_shift_v()); 34 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -41,8 +41,8 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
41 gk20a_dbg(gpu_dbg_mem, 41 gk20a_dbg(gpu_dbg_mem,
42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", 42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
43 hi, lo, mem, sgl, bufbase, 43 hi, lo, mem, sgl, bufbase,
44 bufbase + nvgpu_mem_sgl_phys(sgl), 44 bufbase + nvgpu_sgt_get_phys(sgt, sgl),
45 nvgpu_mem_sgl_length(sgl)); 45 nvgpu_sgt_get_length(sgt, sgl));
46 46
47 WARN_ON(!bufbase); 47 WARN_ON(!bufbase);
48 48
@@ -58,7 +58,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
58} 58}
59 59
60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
61 struct nvgpu_mem_sgl *sgl) 61 void *sgl)
62{ 62{
63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); 63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
64 64
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index fc5ba919..29e76978 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -22,7 +22,7 @@ struct nvgpu_mem;
22struct nvgpu_mem_sgl; 22struct nvgpu_mem_sgl;
23 23
24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
25 struct nvgpu_mem_sgl *sgl, u32 w); 25 struct nvgpu_sgt *sgt, void *sgl, u32 w);
26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
27 struct nvgpu_mem_sgl *sgl); 27 void *sgl);
28#endif 28#endif