summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/vgpu
diff options
context:
space:
mode:
authorAlex Waterman <alexw@nvidia.com>2017-05-25 19:56:50 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-09-22 15:52:48 -0400
commit0090ee5aca268a3c359f34c74b8c521df3bd8593 (patch)
tree2779dc64554cdb38b717ce09c0e3dcbf36107ed3 /drivers/gpu/nvgpu/vgpu
parente32cc0108cf2ef5de7a17f0f6c0aa9af7faf23ed (diff)
gpu: nvgpu: nvgpu SGL implementation
The last major item preventing the core MM code in the nvgpu driver from being platform agnostic is the usage of Linux scattergather tables and scattergather lists. These data structures are used throughout the mapping code to handle discontiguous DMA allocations and also overloaded to represent VIDMEM allocs. The notion of a scatter gather table is crucial to a HW device that can handle discontiguous DMA. The GPU has a MMU which allows the GPU to do page gathering and present a virtually contiguous buffer to the GPU HW. As a result it makes sense for the GPU driver to use some sort of scatter gather concept so maximize memory usage efficiency. To that end this patch keeps the notion of a scatter gather list but implements it in the nvgpu common code. It is based heavily on the Linux SGL concept. It is a singly linked list of blocks - each representing a chunk of memory. To map or use a DMA allocation SW must iterate over each block in the SGL. This patch implements the most basic level of support for this data structure. There are certainly easy optimizations that could be done to speed up the current implementation. However, this patches' goal is to simply divest the core MM code from any last Linux'isms. Speed and efficiency come next. Change-Id: Icf44641db22d87fa1d003debbd9f71b605258e42 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1530867 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/vgpu')
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c55
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c4
2 files changed, 30 insertions, 29 deletions
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 85c436e5..ee9b791a 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -13,7 +13,6 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h" 16#include "vgpu/vgpu.h"
18#include "vgpu_mm_gp10b.h" 17#include "vgpu_mm_gp10b.h"
19#include "gk20a/mm_gk20a.h" 18#include "gk20a/mm_gk20a.h"
@@ -41,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
41 40
42static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, 41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
43 u64 map_offset, 42 u64 map_offset,
44 struct sg_table *sgt, 43 struct nvgpu_mem_sgl *sgl,
45 u64 buffer_offset, 44 u64 buffer_offset,
46 u64 size, 45 u64 size,
47 int pgsz_idx, 46 int pgsz_idx,
@@ -61,10 +60,9 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
61 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; 60 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
62 struct tegra_vgpu_mem_desc *mem_desc; 61 struct tegra_vgpu_mem_desc *mem_desc;
63 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 62 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
63 u64 buffer_size = PAGE_ALIGN(size);
64 u64 space_to_skip = buffer_offset; 64 u64 space_to_skip = buffer_offset;
65 u64 buffer_size = 0;
66 u32 mem_desc_count = 0, i; 65 u32 mem_desc_count = 0, i;
67 struct scatterlist *sgl;
68 void *handle = NULL; 66 void *handle = NULL;
69 size_t oob_size; 67 size_t oob_size;
70 u8 prot; 68 u8 prot;
@@ -73,7 +71,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
73 71
74 /* FIXME: add support for sparse mappings */ 72 /* FIXME: add support for sparse mappings */
75 73
76 if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) 74 if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu))
77 return 0; 75 return 0;
78 76
79 if (space_to_skip & (page_size - 1)) 77 if (space_to_skip & (page_size - 1))
@@ -100,33 +98,36 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
100 goto fail; 98 goto fail;
101 } 99 }
102 100
103 sgl = sgt->sgl; 101 while (sgl) {
104 while (space_to_skip && sgl && 102 u64 phys_addr;
105 (space_to_skip + page_size > sgl->length)) { 103 u64 chunk_length;
106 space_to_skip -= sgl->length; 104
107 sgl = sg_next(sgl); 105 /*
108 } 106 * Cut out sgl ents for space_to_skip.
109 WARN_ON(!sgl); 107 */
108 if (space_to_skip &&
109 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
110 space_to_skip -= nvgpu_mem_sgl_length(sgl);
111 sgl = nvgpu_mem_sgl_next(sgl);
112 continue;
113 }
110 114
111 if (add_mem_desc(&mem_desc[mem_desc_count++], 115 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
112 sg_phys(sgl) + space_to_skip, 116 chunk_length = min(size,
113 sgl->length - space_to_skip, 117 nvgpu_mem_sgl_length(sgl) - space_to_skip);
114 &oob_size)) {
115 err = -ENOMEM;
116 goto fail;
117 }
118 buffer_size += sgl->length - space_to_skip;
119 118
120 sgl = sg_next(sgl); 119 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
121 while (sgl && buffer_size < size) { 120 chunk_length, &oob_size)) {
122 if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl),
123 sgl->length, &oob_size)) {
124 err = -ENOMEM; 121 err = -ENOMEM;
125 goto fail; 122 goto fail;
126 } 123 }
127 124
128 buffer_size += sgl->length; 125 space_to_skip = 0;
129 sgl = sg_next(sgl); 126 size -= chunk_length;
127 sgl = nvgpu_mem_sgl_next(sgl);
128
129 if (size == 0)
130 break;
130 } 131 }
131 132
132 if (rw_flag == gk20a_mem_flag_read_only) 133 if (rw_flag == gk20a_mem_flag_read_only)
@@ -153,7 +154,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
153 msg.handle = vgpu_get_handle(g); 154 msg.handle = vgpu_get_handle(g);
154 p->handle = vm->handle; 155 p->handle = vm->handle;
155 p->gpu_va = map_offset; 156 p->gpu_va = map_offset;
156 p->size = size; 157 p->size = buffer_size;
157 p->mem_desc_count = mem_desc_count; 158 p->mem_desc_count = mem_desc_count;
158 p->pgsz_idx = pgsz_idx; 159 p->pgsz_idx = pgsz_idx;
159 p->iova = 0; 160 p->iova = 0;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ef9e00c8..5da6f158 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78 78
79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, 79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80 u64 map_offset, 80 u64 map_offset,
81 struct sg_table *sgt, 81 struct nvgpu_mem_sgl *sgl,
82 u64 buffer_offset, 82 u64 buffer_offset,
83 u64 size, 83 u64 size,
84 int pgsz_idx, 84 int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99 struct tegra_vgpu_cmd_msg msg; 99 struct tegra_vgpu_cmd_msg msg;
100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101 u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); 101 u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);
102 u8 prot; 102 u8 prot;
103 103
104 gk20a_dbg_fn(""); 104 gk20a_dbg_fn("");