gpu: nvgpu: SGL passthrough implementation

The basic nvgpu_mem_sgl implementation provides support for OS specific scatter-gather list implementations by simply copying them node by node. This is inefficient, taking extra time and memory. This patch implements an nvgpu_mem_sgt struct to act as a header which is inserted at the front of any scatter- gather list implementation. This labels every struct with a set of ops which can be used to interact with the attached scatter gather list. Since nvgpu common code only has to interact with these function pointers, any sgl implementation can be used. Initialization only requires the allocation of a single struct, removing the need to copy or iterate through the sgl being converted. Jira NVGPU-186 Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8 Signed-off-by: Sunny He <suhe@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1541426 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Sunny He <suhe@nvidia.com> 2017-08-15 15:01:04 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2017-09-22 15:55:24 -0400
commit: 17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
tree: a25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/vgpu
parent: 0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)
2 files changed, 12 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index ee9b791a..d9324363 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -40,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
 static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
                                u64 map_offset,
-                                struct nvgpu_mem_sgl *sgl,
+                                struct nvgpu_sgt *sgt,
                                u64 buffer_offset,
                                u64 size,
                                int pgsz_idx,
@@ -66,12 +66,13 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
        void *handle = NULL;
        size_t oob_size;
        u8 prot;
+        void *sgl;
        gk20a_dbg_fn("");
        /* FIXME: add support for sparse mappings */
-        if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu))
+        if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu))
                return 0;
        if (space_to_skip & (page_size - 1))
@@ -97,7 +98,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
                err = -EINVAL;
                goto fail;
        }
+        sgl = sgt->sgl;
        while (sgl) {
                u64 phys_addr;
                u64 chunk_length;
@@ -106,15 +107,15 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
                 * Cut out sgl ents for space_to_skip.
                 */
                if (space_to_skip &&
-                    space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
+                    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
-                        space_to_skip -= nvgpu_mem_sgl_length(sgl);
+                        space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
-                        sgl = nvgpu_mem_sgl_next(sgl);
+                        sgl = nvgpu_sgt_get_next(sgt, sgl);
                        continue;
                }
-                phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
+                phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
                chunk_length = min(size,
-                                   nvgpu_mem_sgl_length(sgl) - space_to_skip);
+                           nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
                if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
                                 chunk_length, &oob_size)) {
@@ -124,7 +125,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
                space_to_skip = 0;
                size -= chunk_length;
-                sgl   = nvgpu_mem_sgl_next(sgl);
+                sgl   = nvgpu_sgt_get_next(sgt, sgl);
                if (size == 0)
                        break;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 5da6f158..adb01ae5 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
 static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
                                u64 map_offset,
-                                struct nvgpu_mem_sgl *sgl,
+                                struct nvgpu_sgt *sgt,
                                u64 buffer_offset,
                                u64 size,
                                int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
        struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
        struct tegra_vgpu_cmd_msg msg;
        struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
-        u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);
+        u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL);
        u8 prot;
        gk20a_dbg_fn("");
author	Sunny He <suhe@nvidia.com>	2017-08-15 15:01:04 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2017-09-22 15:55:24 -0400
commit	17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch)
tree	a25d063f19b8e1f83f61af418f3aa2ac32fe0cce /drivers/gpu/nvgpu/vgpu
parent	0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff)

diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index ee9b791a..d9324363 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -40,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
40		40
41	static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,	41	static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
42	u64 map_offset,	42	u64 map_offset,
43	struct nvgpu_mem_sgl *sgl,	43	struct nvgpu_sgt *sgt,
44	u64 buffer_offset,	44	u64 buffer_offset,
45	u64 size,	45	u64 size,
46	int pgsz_idx,	46	int pgsz_idx,
@@ -66,12 +66,13 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
66	void *handle = NULL;	66	void *handle = NULL;
67	size_t oob_size;	67	size_t oob_size;
68	u8 prot;	68	u8 prot;
		69	void *sgl;
69		70
70	gk20a_dbg_fn("");	71	gk20a_dbg_fn("");
71		72
72	/* FIXME: add support for sparse mappings */	73	/* FIXME: add support for sparse mappings */
73		74
74	if (WARN_ON(!sgl) \|\| WARN_ON(!g->mm.bypass_smmu))	75	if (WARN_ON(!sgt) \|\| WARN_ON(!g->mm.bypass_smmu))
75	return 0;	76	return 0;
76		77
77	if (space_to_skip & (page_size - 1))	78	if (space_to_skip & (page_size - 1))
@@ -97,7 +98,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
97	err = -EINVAL;	98	err = -EINVAL;
98	goto fail;	99	goto fail;
99	}	100	}
100		101	sgl = sgt->sgl;
101	while (sgl) {	102	while (sgl) {
102	u64 phys_addr;	103	u64 phys_addr;
103	u64 chunk_length;	104	u64 chunk_length;
@@ -106,15 +107,15 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
106	* Cut out sgl ents for space_to_skip.	107	* Cut out sgl ents for space_to_skip.
107	*/	108	*/
108	if (space_to_skip &&	109	if (space_to_skip &&
109	space_to_skip >= nvgpu_mem_sgl_length(sgl)) {	110	space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
110	space_to_skip -= nvgpu_mem_sgl_length(sgl);	111	space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
111	sgl = nvgpu_mem_sgl_next(sgl);	112	sgl = nvgpu_sgt_get_next(sgt, sgl);
112	continue;	113	continue;
113	}	114	}
114		115
115	phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;	116	phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip;
116	chunk_length = min(size,	117	chunk_length = min(size,
117	nvgpu_mem_sgl_length(sgl) - space_to_skip);	118	nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
118		119
119	if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,	120	if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
120	chunk_length, &oob_size)) {	121	chunk_length, &oob_size)) {
@@ -124,7 +125,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
124		125
125	space_to_skip = 0;	126	space_to_skip = 0;
126	size -= chunk_length;	127	size -= chunk_length;
127	sgl = nvgpu_mem_sgl_next(sgl);	128	sgl = nvgpu_sgt_get_next(sgt, sgl);
128		129
129	if (size == 0)	130	if (size == 0)
130	break;	131	break;


diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 5da6f158..adb01ae5 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78		78
79	static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,	79	static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80	u64 map_offset,	80	u64 map_offset,
81	struct nvgpu_mem_sgl *sgl,	81	struct nvgpu_sgt *sgt,
82	u64 buffer_offset,	82	u64 buffer_offset,
83	u64 size,	83	u64 size,
84	int pgsz_idx,	84	int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);	98	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99	struct tegra_vgpu_cmd_msg msg;	99	struct tegra_vgpu_cmd_msg msg;
100	struct tegra_vgpu_as_map_params *p = &msg.params.as_map;	100	struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101	u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);	101	u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL);
102	u8 prot;	102	u8 prot;
103		103
104	gk20a_dbg_fn("");	104	gk20a_dbg_fn("");